def test_it_merges_documents_when_duplicates_found(req): docuri_1 = models.DocumentURI(claimant='http://example.org/', uri='http://example.org/', type='self-claim') docuri_2 = models.DocumentURI(claimant='https://example.net/', uri='https://example.org/', type='rel-canonical') req.db.add_all([ models.Document(document_uris=[docuri_1]), models.Document(document_uris=[docuri_2]), ]) req.db.flush() normalize_uris.normalize_document_uris(req) assert req.db.query(models.Document).count() == 1
class TestDocumentSearchIndexPresenter: @pytest.mark.parametrize( "document,expected", [ (models.Document(title="Foo"), { "title": ["Foo"] }), (models.Document(title=""), {}), (models.Document(title=None), {}), (models.Document(web_uri="http://foo.org"), { "web_uri": "http://foo.org" }), (models.Document(web_uri=""), {}), (models.Document(web_uri=None), {}), ( models.Document(title="Foo", web_uri="http://foo.org"), { "title": ["Foo"], "web_uri": "http://foo.org" }, ), (None, {}), ], ) def test_asdict(self, document, expected): assert expected == DocumentSearchIndexPresenter(document).asdict()
class TestDocumentSearchIndexPresenter: @pytest.mark.parametrize( "document,expected", [ (models.Document(title="Foo"), { "title": ["Foo"] }), (models.Document(title=""), {}), (models.Document(title=None), {}), # *Searching* for an annotation by `annotation.document` (e.g. by # document `title`` or `web_uri`) isn't enabled. But you can # retrieve an annotation by ID, or by searching on other field(s), # and then access its `document`. Bouncer # (https://github.com/hypothesis/bouncer) accesses h's # Elasticsearch index directly and uses this `document` field. (models.Document(web_uri="http://foo.org"), { "web_uri": "http://foo.org" }), (models.Document(web_uri=""), {}), (models.Document(web_uri=None), {}), ( models.Document(title="Foo", web_uri="http://foo.org"), { "title": ["Foo"], "web_uri": "http://foo.org" }, ), (None, {}), ], ) def test_asdict(self, document, expected): assert expected == DocumentSearchIndexPresenter(document).asdict()
def test_it_deletes_duplicate_document_meta_objects(req): docmeta_1 = models.DocumentMeta(_claimant="http://example.org/", type="title", value=["Test Title"]) docmeta_1._claimant_normalized = "http://example.org" docmeta_2 = models.DocumentMeta(_claimant="https://example.org/", type="title", value=["Test Title"]) docmeta_2._claimant_normalized = "https://example.org" req.db.add_all( [models.Document(meta=[docmeta_1]), models.Document(meta=[docmeta_2])]) req.db.flush() normalize_uris.normalize_document_meta(req) assert req.db.query(models.DocumentMeta).count() == 1
def _annotation(**kwargs): args = { "userid": "acct:[email protected]", "target_selectors": [], "created": datetime.datetime.utcnow(), "updated": datetime.datetime.utcnow(), "document": models.Document(), } args.update(**kwargs) return models.Annotation(**args)
def _annotation(**kwargs): args = { 'userid': 'acct:[email protected]', 'target_selectors': [], 'created': datetime.datetime.utcnow(), 'updated': datetime.datetime.utcnow(), 'document': models.Document(), } args.update(**kwargs) return models.Annotation(**args)
def document(session, uri): """Add a new Document for the given uri to the db and return it.""" document_ = models.Document() session.add(document_) # Flush the session so that document.id gets generated. session.flush() session.add(models.DocumentURI( claimant=uri, document_id=document_.id, uri=uri)) return document_
class TestDocumentSearchIndexPresenter(object): @pytest.mark.parametrize('document,expected', [ (models.Document(title='Foo'), {'title': ['Foo']}), (models.Document(title=''), {}), (models.Document(title=None), {}), (models.Document(web_uri='http://foo.org'), {'web_uri': 'http://foo.org'}), (models.Document(web_uri=''), {}), (models.Document(web_uri=None), {}), (models.Document(title='Foo', web_uri='http://foo.org'), {'title': ['Foo'], 'web_uri': 'http://foo.org'}), (None, {}) ]) def test_asdict(self, document, expected): assert expected == DocumentSearchIndexPresenter(document).asdict()
def test_asdict(self, db_session): document = models.Document( title='Foo', document_uris=[ models.DocumentURI(uri='http://foo.com', claimant='http://foo.com'), models.DocumentURI(uri='http://foo.org', claimant='http://foo.com', type='rel-canonical') ]) db_session.add(document) db_session.flush() presenter = DocumentJSONPresenter(document) expected = {'title': ['Foo']} assert expected == presenter.asdict()
def test_it_normalizes_document_meta_claimant(req): docmeta_1 = models.DocumentMeta(_claimant='http://example.org/', _claimant_normalized='http://example.org', type='title', value=['Test Title']) docmeta_2 = models.DocumentMeta(_claimant='http://example.net/', _claimant_normalized='http://example.net', type='title', value=['Test Title']) req.db.add(models.Document(meta=[docmeta_1, docmeta_2])) req.db.flush() normalize_uris.normalize_document_meta(req) assert docmeta_1.claimant_normalized == 'httpx://example.org' assert docmeta_2.claimant_normalized == 'httpx://example.net'
def test_it_deletes_duplicate_document_uri_objects(req): docuri_1 = models.DocumentURI(_claimant='http://example.org/', _claimant_normalized='http://example.org', _uri='http://example.org/', _uri_normalized='http://example.org', type='self-claim') docuri_2 = models.DocumentURI(_claimant='https://example.org/', _claimant_normalized='https://example.org', _uri='https://example.org/', _uri_normalized='https://example.org', type='self-claim') req.db.add(models.Document(document_uris=[docuri_1, docuri_2])) req.db.flush() normalize_uris.normalize_document_uris(req) assert req.db.query(models.DocumentURI).count() == 1
def test_asdict_does_not_render_other_meta_than_title(self, db_session): document = models.Document( title='Foo', meta=[ models.DocumentMeta(type='title', value=['Foo'], claimant='http://foo.com'), models.DocumentMeta(type='twitter.url', value=['http://foo.com'], claimant='http://foo.com'), models.DocumentMeta(type='facebook.title', value=['FB Title'], claimant='http://foo.com') ]) db_session.add(document) db_session.flush() presenter = DocumentJSONPresenter(document) assert {'title': ['Foo']} == presenter.asdict()
def test_asdict(self, db_session): document = models.Document( title="Foo", document_uris=[ models.DocumentURI(uri="http://foo.com", claimant="http://foo.com"), models.DocumentURI( uri="http://foo.org", claimant="http://foo.com", type="rel-canonical", ), ], ) db_session.add(document) db_session.flush() presenter = DocumentJSONPresenter(document) expected = {"title": ["Foo"]} assert expected == presenter.asdict()
def test_it_normalizes_document_uris_claimant(req): docuri_1 = models.DocumentURI(_claimant='http://example.org/', _claimant_normalized='http://example.org', _uri='http://example.org/', _uri_normalized='http://example.org', type='self-claim') docuri_2 = models.DocumentURI(_claimant='http://example.org/', _claimant_normalized='http://example.org', _uri='https://example.org/', _uri_normalized='https://example.org', type='rel-canonical') req.db.add(models.Document(document_uris=[docuri_1, docuri_2])) req.db.flush() normalize_uris.normalize_document_uris(req) assert docuri_1.claimant_normalized == 'httpx://example.org' assert docuri_2.claimant_normalized == 'httpx://example.org'
def test_it_normalizes_document_meta_claimant(req): docmeta_1 = models.DocumentMeta( _claimant="http://example.org/", _claimant_normalized="http://example.org", type="title", value=["Test Title"], ) docmeta_2 = models.DocumentMeta( _claimant="http://example.net/", _claimant_normalized="http://example.net", type="title", value=["Test Title"], ) req.db.add(models.Document(meta=[docmeta_1, docmeta_2])) req.db.flush() normalize_uris.normalize_document_meta(req) assert docmeta_1.claimant_normalized == "httpx://example.org" assert docmeta_2.claimant_normalized == "httpx://example.net"
def test_asdict_does_not_render_other_meta_than_title(self, db_session): document = models.Document( title="Foo", meta=[ models.DocumentMeta(type="title", value=["Foo"], claimant="http://foo.com"), models.DocumentMeta( type="twitter.url", value=["http://foo.com"], claimant="http://foo.com", ), models.DocumentMeta(type="facebook.title", value=["FB Title"], claimant="http://foo.com"), ], ) db_session.add(document) db_session.flush() presenter = DocumentJSONPresenter(document) assert {"title": ["Foo"]} == presenter.asdict()
def test_it_normalizes_document_uris_claimant(req): docuri_1 = models.DocumentURI( _claimant="http://example.org/", _claimant_normalized="http://example.org", _uri="http://example.org/", _uri_normalized="http://example.org", type="self-claim", ) docuri_2 = models.DocumentURI( _claimant="http://example.org/", _claimant_normalized="http://example.org", _uri="https://example.org/", _uri_normalized="https://example.org", type="rel-canonical", ) req.db.add(models.Document(document_uris=[docuri_1, docuri_2])) req.db.flush() normalize_uris.normalize_document_uris(req) assert docuri_1.claimant_normalized == "httpx://example.org" assert docuri_2.claimant_normalized == "httpx://example.org"