def test_uris_recognize_highwire_pdf(self): doc = Document( {'link': [{ 'href': 'pdf-uri', 'type': 'application/pdf' }]}, claimant='http://example.com') expected = [ DocumentURI({ 'claimant': 'http://example.com', 'uri': 'pdf-uri', 'type': 'highwire-pdf', 'content_type': 'application/pdf', 'created': None, 'updated': None }), DocumentURI({ 'claimant': 'http://example.com', 'uri': 'http://example.com', 'type': 'self-claim', 'created': None, 'updated': None }) ] assert sorted(doc.document_uris) == sorted(expected)
def test_uris_prefix_type_when_rel(self): doc = Document( {'link': [{ 'href': 'https://example.com', 'rel': 'canonical' }]}, claimant='http://example.com') expected = [ DocumentURI({ 'claimant': 'http://example.com', 'uri': 'https://example.com', 'type': 'rel-canonical', 'content_type': None, 'created': None, 'updated': None }), DocumentURI({ 'claimant': 'http://example.com', 'uri': 'http://example.com', 'type': 'self-claim', 'created': None, 'updated': None }) ] assert sorted(doc.document_uris) == sorted(expected)
def test_uris_only_one_self_claim(self): doc = Document({'link': [{'href': 'http://example.com'}]}, claimant='http://example.com') expected = [DocumentURI({'claimant': 'http://example.com', 'uri': 'http://example.com', 'type': 'self-claim', 'created': None, 'updated': None})] assert doc.document_uris == expected
def test_uris_str_link(self): doc = Document({'link': 'http://example.com'}, claimant='http://example.com', created=datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), updated=datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)) expected = [DocumentURI({'claimant': 'http://example.com', 'uri': 'http://example.com', 'type': 'self-claim', 'created': datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), 'updated': datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)})] assert doc.document_uris == expected
def test_uris_discard_self_claim_when_claimant_is_missing(self): doc = Document({'link': [{'href': 'http://example.com'}]}) expected = [ DocumentURI({ 'claimant': None, 'uri': 'http://example.com', 'type': None, 'content_type': None, 'created': None, 'updated': None }) ] assert doc.document_uris == expected
def test_meta(self): doc = Document( { 'og': { 'title': ['Example Page'], 'url': ['http://example.com'] }, 'title': ['Example Page'], 'link': ['http://example.com', 'https://example.com'] }, claimant='http://example.com', created=datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), updated=datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)) expected = [ DocumentMeta({ 'type': 'og.title', 'value': ['Example Page'], 'claimant': 'http://example.com', 'created': datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), 'updated': datetime.datetime(2016, 2, 25, 16, 45, 23, 371849) }), DocumentMeta({ 'type': 'og.url', 'value': ['http://example.com'], 'claimant': 'http://example.com', 'created': datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), 'updated': datetime.datetime(2016, 2, 25, 16, 45, 23, 371849) }), DocumentMeta({ 'type': 'title', 'value': ['Example Page'], 'claimant': 'http://example.com', 'created': datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), 'updated': datetime.datetime(2016, 2, 25, 16, 45, 23, 371849) }) ] assert sorted(doc.meta) == sorted(expected)
def test_updated(self): doc = Document({}, updated=datetime.datetime(2016, 2, 25, 16, 45, 23, 371848)) assert doc.updated == datetime.datetime(2016, 2, 25, 16, 45, 23, 371848)
def test_uris_disregard_doi_links(self): doc = Document({'link': [{'href': 'doi:foobar'}]}) assert len(doc.document_uris) == 0
def test_title_empty_array(self): doc = Document({'title': []}) assert doc.title is None
def test_title(self): doc = Document({'title': 'Example Page'}) assert doc.title == 'Example Page'
def test_title_array(self): doc = Document({'title': ['Example Page']}) assert doc.title == 'Example Page'
def test_init_id(self): """It passes through other keyword arguments through to super class.""" doc = Document({}, id='id-value') assert doc['id'] == 'id-value'
def test_uris_disregard_doi_links(self): doc = Document({'link': [{'href': 'doi:foobar'}]}) # it always includes a self-claim, not removing doi links would result # in a length of 2 assert len(doc.document_uris) == 1
def test_init_updated(self): doc = Document({}, updated='updated-value') assert doc.updated == 'updated-value'
def test_init_created(self): doc = Document({}, created='created-value') assert doc.created == 'created-value'
def test_init_claimant(self): doc = Document({}, claimant='http://example.com') assert doc.claimant == 'http://example.com'
def test_init(self): doc = Document({'foo': 'bar'}) assert doc == {'foo': 'bar'}
class TestDocument(object): def test_init(self): doc = Document({'foo': 'bar'}) assert doc == {'foo': 'bar'} def test_init_claimant(self): doc = Document({}, claimant='http://example.com') assert doc.claimant == 'http://example.com' def test_init_created(self): doc = Document({}, created='created-value') assert doc.created == 'created-value' def test_init_updated(self): doc = Document({}, updated='updated-value') assert doc.updated == 'updated-value' def test_init_id(self): """It passes through other keyword arguments through to super class.""" doc = Document({}, id='id-value') assert doc['id'] == 'id-value' def test_created(self): doc = Document({}, created=datetime.datetime(2016, 2, 25, 16, 45, 23, 371848)) assert doc.created == datetime.datetime(2016, 2, 25, 16, 45, 23, 371848) def test_updated(self): doc = Document({}, updated=datetime.datetime(2016, 2, 25, 16, 45, 23, 371848)) assert doc.updated == datetime.datetime(2016, 2, 25, 16, 45, 23, 371848) def test_title(self): doc = Document({'title': 'Example Page'}) assert doc.title == 'Example Page' def test_title_array(self): doc = Document({'title': ['Example Page']}) assert doc.title == 'Example Page' def test_title_empty_array(self): doc = Document({'title': []}) assert doc.title is None def test_meta(self): doc = Document( { 'og': { 'title': ['Example Page'], 'url': ['http://example.com'] }, 'title': ['Example Page'], 'link': ['http://example.com', 'https://example.com'] }, claimant='http://example.com', created=datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), updated=datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)) expected = [ DocumentMeta({ 'type': 'og.title', 'value': ['Example Page'], 'claimant': 'http://example.com', 'created': datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), 'updated': datetime.datetime(2016, 2, 25, 16, 45, 23, 371849) }), DocumentMeta({ 'type': 'og.url', 'value': ['http://example.com'], 'claimant': 'http://example.com', 'created': datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), 'updated': datetime.datetime(2016, 2, 25, 16, 45, 23, 371849) }), DocumentMeta({ 'type': 'title', 'value': ['Example Page'], 'claimant': 'http://example.com', 'created': datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), 'updated': datetime.datetime(2016, 2, 25, 16, 45, 23, 371849) }) ] assert sorted(doc.meta) == sorted(expected) def test_uris_only_one_self_claim(self): doc = Document({'link': [{ 'href': 'http://example.com' }]}, claimant='http://example.com') expected = [ DocumentURI({ 'claimant': 'http://example.com', 'uri': 'http://example.com', 'type': 'self-claim', 'created': None, 'updated': None }) ] assert doc.document_uris == expected def test_uris_discard_self_claim_when_claimant_is_missing(self): doc = Document({'link': [{'href': 'http://example.com'}]}) expected = [ DocumentURI({ 'claimant': None, 'uri': 'http://example.com', 'type': None, 'content_type': None, 'created': None, 'updated': None }) ] assert doc.document_uris == expected def test_uris_disregard_doi_links(self): doc = Document({'link': [{'href': 'doi:foobar'}]}) assert len(doc.document_uris) == 0 def test_uris_str_link(self): doc = Document({'link': 'http://example.com'}, claimant='http://example.com', created=datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), updated=datetime.datetime(2016, 2, 25, 16, 45, 23, 371849)) expected = [ DocumentURI({ 'claimant': 'http://example.com', 'uri': 'http://example.com', 'type': 'self-claim', 'created': datetime.datetime(2016, 2, 25, 16, 45, 23, 371848), 'updated': datetime.datetime(2016, 2, 25, 16, 45, 23, 371849) }) ] assert doc.document_uris == expected def test_uris_recognize_highwire_pdf(self): doc = Document( {'link': [{ 'href': 'pdf-uri', 'type': 'application/pdf' }]}, claimant='http://example.com') expected = [ DocumentURI({ 'claimant': 'http://example.com', 'uri': 'pdf-uri', 'type': 'highwire-pdf', 'content_type': 'application/pdf', 'created': None, 'updated': None }), DocumentURI({ 'claimant': 'http://example.com', 'uri': 'http://example.com', 'type': 'self-claim', 'created': None, 'updated': None }) ] assert sorted(doc.document_uris) == sorted(expected) def test_uris_prefix_type_when_rel(self): doc = Document( {'link': [{ 'href': 'https://example.com', 'rel': 'canonical' }]}, claimant='http://example.com') expected = [ DocumentURI({ 'claimant': 'http://example.com', 'uri': 'https://example.com', 'type': 'rel-canonical', 'content_type': None, 'created': None, 'updated': None }), DocumentURI({ 'claimant': 'http://example.com', 'uri': 'http://example.com', 'type': 'self-claim', 'created': None, 'updated': None }) ] assert sorted(doc.document_uris) == sorted(expected) @pytest.mark.parametrize('doc', [ Document({'highwire': { 'doi': ['foobar'] }}, claimant='http://example.com'), Document({'highwire': { 'doi': ['doi:foobar'] }}, claimant='http://example.com') ]) def test_uris_generates_doi_uri_from_highwire_meta(self, doc): expected = [ DocumentURI({ 'claimant': 'http://example.com', 'uri': 'doi:foobar', 'type': 'highwire-doi', 'created': None, 'updated': None }), DocumentURI({ 'claimant': 'http://example.com', 'uri': 'http://example.com', 'type': 'self-claim', 'created': None, 'updated': None }) ] assert sorted(doc.document_uris) == sorted(expected) @pytest.mark.parametrize('doc', [ Document({'dc': { 'identifier': ['foobar'] }}, claimant='http://example.com'), Document({'dc': { 'identifier': ['doi:foobar'] }}, claimant='http://example.com') ]) def test_uris_generates_doi_uri_from_dc_meta(self, doc): expected = [ DocumentURI({ 'claimant': 'http://example.com', 'uri': 'doi:foobar', 'type': 'dc-doi', 'created': None, 'updated': None }), DocumentURI({ 'claimant': 'http://example.com', 'uri': 'http://example.com', 'type': 'self-claim', 'created': None, 'updated': None }) ] assert sorted(doc.document_uris) == sorted(expected)