コード例 #1
0
    def test_uris_recognize_highwire_pdf(self):
        doc = Document(
            {'link': [{
                'href': 'pdf-uri',
                'type': 'application/pdf'
            }]},
            claimant='http://example.com')

        expected = [
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'pdf-uri',
                'type': 'highwire-pdf',
                'content_type': 'application/pdf',
                'created': None,
                'updated': None
            }),
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'http://example.com',
                'type': 'self-claim',
                'created': None,
                'updated': None
            })
        ]

        assert sorted(doc.document_uris) == sorted(expected)
コード例 #2
0
    def test_uris_prefix_type_when_rel(self):
        doc = Document(
            {'link': [{
                'href': 'https://example.com',
                'rel': 'canonical'
            }]},
            claimant='http://example.com')

        expected = [
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'https://example.com',
                'type': 'rel-canonical',
                'content_type': None,
                'created': None,
                'updated': None
            }),
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'http://example.com',
                'type': 'self-claim',
                'created': None,
                'updated': None
            })
        ]

        assert sorted(doc.document_uris) == sorted(expected)
コード例 #3
0
ファイル: elastic_test.py プロジェクト: hashin/h
    def test_uris_only_one_self_claim(self):
        doc = Document({'link': [{'href': 'http://example.com'}]},
                       claimant='http://example.com')

        expected = [DocumentURI({'claimant': 'http://example.com',
                                 'uri': 'http://example.com',
                                 'type': 'self-claim',
                                 'created': None, 'updated': None})]

        assert doc.document_uris == expected
コード例 #4
0
ファイル: elastic_test.py プロジェクト: hashin/h
    def test_uris_str_link(self):
        doc = Document({'link': 'http://example.com'},
                       claimant='http://example.com',
                       created=datetime.datetime(2016, 2, 25, 16, 45, 23, 371848),
                       updated=datetime.datetime(2016, 2, 25, 16, 45, 23, 371849))

        expected = [DocumentURI({'claimant': 'http://example.com',
                                 'uri': 'http://example.com',
                                 'type': 'self-claim',
                                 'created': datetime.datetime(2016, 2, 25, 16, 45, 23, 371848),
                                 'updated': datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)})]

        assert doc.document_uris == expected
コード例 #5
0
 def test_uris_discard_self_claim_when_claimant_is_missing(self):
     doc = Document({'link': [{'href': 'http://example.com'}]})
     expected = [
         DocumentURI({
             'claimant': None,
             'uri': 'http://example.com',
             'type': None,
             'content_type': None,
             'created': None,
             'updated': None
         })
     ]
     assert doc.document_uris == expected
コード例 #6
0
    def test_meta(self):
        doc = Document(
            {
                'og': {
                    'title': ['Example Page'],
                    'url': ['http://example.com']
                },
                'title': ['Example Page'],
                'link': ['http://example.com', 'https://example.com']
            },
            claimant='http://example.com',
            created=datetime.datetime(2016, 2, 25, 16, 45, 23, 371848),
            updated=datetime.datetime(2016, 2, 25, 16, 45, 23, 371849))

        expected = [
            DocumentMeta({
                'type':
                'og.title',
                'value': ['Example Page'],
                'claimant':
                'http://example.com',
                'created':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371848),
                'updated':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)
            }),
            DocumentMeta({
                'type':
                'og.url',
                'value': ['http://example.com'],
                'claimant':
                'http://example.com',
                'created':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371848),
                'updated':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)
            }),
            DocumentMeta({
                'type':
                'title',
                'value': ['Example Page'],
                'claimant':
                'http://example.com',
                'created':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371848),
                'updated':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)
            })
        ]
        assert sorted(doc.meta) == sorted(expected)
コード例 #7
0
 def test_updated(self):
     doc = Document({},
                    updated=datetime.datetime(2016, 2, 25, 16, 45, 23,
                                              371848))
     assert doc.updated == datetime.datetime(2016, 2, 25, 16, 45, 23,
                                             371848)
コード例 #8
0
 def test_uris_disregard_doi_links(self):
     doc = Document({'link': [{'href': 'doi:foobar'}]})
     assert len(doc.document_uris) == 0
コード例 #9
0
 def test_title_empty_array(self):
     doc = Document({'title': []})
     assert doc.title is None
コード例 #10
0
 def test_title(self):
     doc = Document({'title': 'Example Page'})
     assert doc.title == 'Example Page'
コード例 #11
0
 def test_title_array(self):
     doc = Document({'title': ['Example Page']})
     assert doc.title == 'Example Page'
コード例 #12
0
 def test_init_id(self):
     """It passes through other keyword arguments through to super class."""
     doc = Document({}, id='id-value')
     assert doc['id'] == 'id-value'
コード例 #13
0
ファイル: elastic_test.py プロジェクト: hashin/h
 def test_uris_disregard_doi_links(self):
     doc = Document({'link': [{'href': 'doi:foobar'}]})
     # it always includes a self-claim, not removing doi links would result
     # in a length of 2
     assert len(doc.document_uris) == 1
コード例 #14
0
 def test_init_updated(self):
     doc = Document({}, updated='updated-value')
     assert doc.updated == 'updated-value'
コード例 #15
0
 def test_init_created(self):
     doc = Document({}, created='created-value')
     assert doc.created == 'created-value'
コード例 #16
0
 def test_init_claimant(self):
     doc = Document({}, claimant='http://example.com')
     assert doc.claimant == 'http://example.com'
コード例 #17
0
 def test_init(self):
     doc = Document({'foo': 'bar'})
     assert doc == {'foo': 'bar'}
コード例 #18
0
class TestDocument(object):
    def test_init(self):
        doc = Document({'foo': 'bar'})
        assert doc == {'foo': 'bar'}

    def test_init_claimant(self):
        doc = Document({}, claimant='http://example.com')
        assert doc.claimant == 'http://example.com'

    def test_init_created(self):
        doc = Document({}, created='created-value')
        assert doc.created == 'created-value'

    def test_init_updated(self):
        doc = Document({}, updated='updated-value')
        assert doc.updated == 'updated-value'

    def test_init_id(self):
        """It passes through other keyword arguments through to super class."""
        doc = Document({}, id='id-value')
        assert doc['id'] == 'id-value'

    def test_created(self):
        doc = Document({},
                       created=datetime.datetime(2016, 2, 25, 16, 45, 23,
                                                 371848))
        assert doc.created == datetime.datetime(2016, 2, 25, 16, 45, 23,
                                                371848)

    def test_updated(self):
        doc = Document({},
                       updated=datetime.datetime(2016, 2, 25, 16, 45, 23,
                                                 371848))
        assert doc.updated == datetime.datetime(2016, 2, 25, 16, 45, 23,
                                                371848)

    def test_title(self):
        doc = Document({'title': 'Example Page'})
        assert doc.title == 'Example Page'

    def test_title_array(self):
        doc = Document({'title': ['Example Page']})
        assert doc.title == 'Example Page'

    def test_title_empty_array(self):
        doc = Document({'title': []})
        assert doc.title is None

    def test_meta(self):
        doc = Document(
            {
                'og': {
                    'title': ['Example Page'],
                    'url': ['http://example.com']
                },
                'title': ['Example Page'],
                'link': ['http://example.com', 'https://example.com']
            },
            claimant='http://example.com',
            created=datetime.datetime(2016, 2, 25, 16, 45, 23, 371848),
            updated=datetime.datetime(2016, 2, 25, 16, 45, 23, 371849))

        expected = [
            DocumentMeta({
                'type':
                'og.title',
                'value': ['Example Page'],
                'claimant':
                'http://example.com',
                'created':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371848),
                'updated':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)
            }),
            DocumentMeta({
                'type':
                'og.url',
                'value': ['http://example.com'],
                'claimant':
                'http://example.com',
                'created':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371848),
                'updated':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)
            }),
            DocumentMeta({
                'type':
                'title',
                'value': ['Example Page'],
                'claimant':
                'http://example.com',
                'created':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371848),
                'updated':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)
            })
        ]
        assert sorted(doc.meta) == sorted(expected)

    def test_uris_only_one_self_claim(self):
        doc = Document({'link': [{
            'href': 'http://example.com'
        }]},
                       claimant='http://example.com')

        expected = [
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'http://example.com',
                'type': 'self-claim',
                'created': None,
                'updated': None
            })
        ]

        assert doc.document_uris == expected

    def test_uris_discard_self_claim_when_claimant_is_missing(self):
        doc = Document({'link': [{'href': 'http://example.com'}]})
        expected = [
            DocumentURI({
                'claimant': None,
                'uri': 'http://example.com',
                'type': None,
                'content_type': None,
                'created': None,
                'updated': None
            })
        ]
        assert doc.document_uris == expected

    def test_uris_disregard_doi_links(self):
        doc = Document({'link': [{'href': 'doi:foobar'}]})
        assert len(doc.document_uris) == 0

    def test_uris_str_link(self):
        doc = Document({'link': 'http://example.com'},
                       claimant='http://example.com',
                       created=datetime.datetime(2016, 2, 25, 16, 45, 23,
                                                 371848),
                       updated=datetime.datetime(2016, 2, 25, 16, 45, 23,
                                                 371849))

        expected = [
            DocumentURI({
                'claimant':
                'http://example.com',
                'uri':
                'http://example.com',
                'type':
                'self-claim',
                'created':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371848),
                'updated':
                datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)
            })
        ]

        assert doc.document_uris == expected

    def test_uris_recognize_highwire_pdf(self):
        doc = Document(
            {'link': [{
                'href': 'pdf-uri',
                'type': 'application/pdf'
            }]},
            claimant='http://example.com')

        expected = [
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'pdf-uri',
                'type': 'highwire-pdf',
                'content_type': 'application/pdf',
                'created': None,
                'updated': None
            }),
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'http://example.com',
                'type': 'self-claim',
                'created': None,
                'updated': None
            })
        ]

        assert sorted(doc.document_uris) == sorted(expected)

    def test_uris_prefix_type_when_rel(self):
        doc = Document(
            {'link': [{
                'href': 'https://example.com',
                'rel': 'canonical'
            }]},
            claimant='http://example.com')

        expected = [
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'https://example.com',
                'type': 'rel-canonical',
                'content_type': None,
                'created': None,
                'updated': None
            }),
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'http://example.com',
                'type': 'self-claim',
                'created': None,
                'updated': None
            })
        ]

        assert sorted(doc.document_uris) == sorted(expected)

    @pytest.mark.parametrize('doc', [
        Document({'highwire': {
            'doi': ['foobar']
        }},
                 claimant='http://example.com'),
        Document({'highwire': {
            'doi': ['doi:foobar']
        }},
                 claimant='http://example.com')
    ])
    def test_uris_generates_doi_uri_from_highwire_meta(self, doc):
        expected = [
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'doi:foobar',
                'type': 'highwire-doi',
                'created': None,
                'updated': None
            }),
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'http://example.com',
                'type': 'self-claim',
                'created': None,
                'updated': None
            })
        ]

        assert sorted(doc.document_uris) == sorted(expected)

    @pytest.mark.parametrize('doc', [
        Document({'dc': {
            'identifier': ['foobar']
        }},
                 claimant='http://example.com'),
        Document({'dc': {
            'identifier': ['doi:foobar']
        }},
                 claimant='http://example.com')
    ])
    def test_uris_generates_doi_uri_from_dc_meta(self, doc):
        expected = [
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'doi:foobar',
                'type': 'dc-doi',
                'created': None,
                'updated': None
            }),
            DocumentURI({
                'claimant': 'http://example.com',
                'uri': 'http://example.com',
                'type': 'self-claim',
                'created': None,
                'updated': None
            })
        ]

        assert sorted(doc.document_uris) == sorted(expected)