def test_it_calls_first(self, annotation, session, Document): """If it finds only one document it calls first().""" Document.find_or_create_by_uris.return_value = mock.Mock( count=mock.Mock(return_value=1)) document.update_document_metadata(session, annotation, [], []) Document.find_or_create_by_uris.return_value.first.assert_called_once_with( )
def test_it_calls_first(self, annotation, session, Document): """If it finds only one document it calls first().""" Document.find_or_create_by_uris.return_value = mock.Mock( count=mock.Mock(return_value=1) ) document.update_document_metadata(session, annotation, [], []) Document.find_or_create_by_uris.return_value.first.assert_called_once_with()
def make_anno(data, dbdocs): #document_uri_dicts = data['document']['document_uri_dicts'] #document_meta_dicts = data['document']['document_meta_dicts'] #del data['document'] #data = {k:v for k, v in data.items() if k != 'document'} # prevent overwrite on batch load annotation = models.Annotation( **data) # FIXME for batch the overhead here is stupid beyond belief annotation.document_id = dbdocs[uri_normalize(annotation.target_uri)].id #for k, v in data.items(): #print(k, v) #setattr(annotation, k, v) #id, created, updated = extra #annotation.id = id #annotation.created = created #annotation.updated = updated return annotation # this baby is super slow document = update_document_metadata(session, annotation.target_uri, document_meta_dicts, document_uri_dicts, created=created, updated=updated) annotation.document = document return annotation
def test_it_updates_document_web_uri(self, annotation, Document, factories, session): document_ = mock.Mock(web_uri=None) Document.find_or_create_by_uris.return_value.count.return_value = 1 Document.find_or_create_by_uris.return_value.first.return_value = document_ document.update_document_metadata( session, annotation.target_uri, [], [], annotation.created, annotation.updated, ) document_.update_web_uri.assert_called_once_with()
def create_anno(self, row): datum = validate(row) document_dict = datum.pop('document') document_uri_dicts = document_dict['document_uri_dicts'] document_meta_dicts = document_dict['document_meta_dicts'] id = row['id'] target_uri = datum['target_uri'] created = row['created'] updated = row['updated'] annotation = models.Annotation(**datum) document = update_document_metadata( # TODO update normalization rules self.session, target_uri, document_meta_dicts, document_uri_dicts, created=created, # FIXME doesn't quite seem right, would klobber updated=updated) print(id) annotation.document = document annotation.id = id annotation.target_uri = target_uri annotation.created = created annotation.updated = updated self.session.add(annotation) self.session.flush() self.session.commit() # FIXME hypothesis doesn't call this
def test_it_updates_document_updated(self, annotation, Document, merge_documents, session): yesterday_ = "yesterday" document_ = merge_documents.return_value = mock.Mock( updated=yesterday_) Document.find_or_create_by_uris.return_value.first.return_value = document_ document.update_document_metadata( session, annotation.target_uri, [], [], annotation.created, annotation.updated, ) assert document_.updated == annotation.updated
def test_it_updates_document_updated( self, annotation, Document, merge_documents, session ): yesterday_ = "yesterday" document_ = merge_documents.return_value = mock.Mock(updated=yesterday_) Document.find_or_create_by_uris.return_value.first.return_value = document_ document.update_document_metadata( session, annotation.target_uri, [], [], annotation.created, annotation.updated, ) assert document_.updated == annotation.updated
def test_it_updates_document_web_uri( self, annotation, Document, factories, session ): document_ = mock.Mock(web_uri=None) Document.find_or_create_by_uris.return_value.count.return_value = 1 Document.find_or_create_by_uris.return_value.first.return_value = document_ document.update_document_metadata( session, annotation.target_uri, [], [], annotation.created, annotation.updated, ) document_.update_web_uri.assert_called_once_with()
def test_it_saves_all_the_document_uris( self, session, annotation, Document, create_or_update_document_uri ): """It creates or updates a DocumentURI for each document URI dict.""" Document.find_or_create_by_uris.return_value.count.return_value = 1 document_uri_dicts = [ { "uri": "http://example.com/example_1", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, { "uri": "http://example.com/example_2", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, { "uri": "http://example.com/example_3", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, ] document.update_document_metadata( session, annotation.target_uri, [], document_uri_dicts, annotation.created, annotation.updated, ) assert create_or_update_document_uri.call_count == 3 for doc_uri_dict in document_uri_dicts: create_or_update_document_uri.assert_any_call( session=session, document=Document.find_or_create_by_uris.return_value.first.return_value, created=annotation.created, updated=annotation.updated, **doc_uri_dict )
def test_it_saves_all_the_document_uris(self, session, annotation, Document, create_or_update_document_uri): """It creates or updates a DocumentURI for each document URI dict.""" Document.find_or_create_by_uris.return_value.count.return_value = 1 document_uri_dicts = [ { "uri": "http://example.com/example_1", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, { "uri": "http://example.com/example_2", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, { "uri": "http://example.com/example_3", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, ] document.update_document_metadata( session, annotation.target_uri, [], document_uri_dicts, annotation.created, annotation.updated, ) assert create_or_update_document_uri.call_count == 3 for doc_uri_dict in document_uri_dicts: create_or_update_document_uri.assert_any_call( session=session, document=Document.find_or_create_by_uris.return_value.first. return_value, created=annotation.created, updated=annotation.updated, **doc_uri_dict)
def test_it_uses_the_target_uri_to_get_the_document( self, annotation, Document, session ): document_uri_dicts = [ { "uri": "http://example.com/example_1", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, { "uri": "http://example.com/example_2", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, { "uri": "http://example.com/example_3", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, ] document.update_document_metadata( session, annotation.target_uri, [], document_uri_dicts, annotation.created, annotation.updated, ) Document.find_or_create_by_uris.assert_called_once_with( session, annotation.target_uri, [ "http://example.com/example_1", "http://example.com/example_2", "http://example.com/example_3", ], created=annotation.created, updated=annotation.updated, )
def test_it_uses_the_target_uri_to_get_the_document( self, annotation, Document, session): document_uri_dicts = [ { "uri": "http://example.com/example_1", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, { "uri": "http://example.com/example_2", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, { "uri": "http://example.com/example_3", "claimant": "http://example.com/claimant", "type": "type", "content_type": None, }, ] document.update_document_metadata( session, annotation.target_uri, [], document_uri_dicts, annotation.created, annotation.updated, ) Document.find_or_create_by_uris.assert_called_once_with( session, annotation.target_uri, [ "http://example.com/example_1", "http://example.com/example_2", "http://example.com/example_3", ], created=annotation.created, updated=annotation.updated, )
def test_it_saves_all_the_document_metas( self, annotation, create_or_update_document_meta, Document, session ): """It creates or updates a DocumentMeta for each document meta dict.""" Document.find_or_create_by_uris.return_value.count.return_value = 1 document_meta_dicts = [ { "claimant": "http://example.com/claimant", "type": "title", "value": "foo", }, { "type": "article title", "value": "bar", "claimant": "http://example.com/claimant", }, { "type": "site title", "value": "gar", "claimant": "http://example.com/claimant", }, ] document.update_document_metadata( session, annotation.target_uri, document_meta_dicts, [], annotation.created, annotation.updated, ) assert create_or_update_document_meta.call_count == 3 for document_meta_dict in document_meta_dicts: create_or_update_document_meta.assert_any_call( session=session, document=Document.find_or_create_by_uris.return_value.first.return_value, created=annotation.created, updated=annotation.updated, **document_meta_dict )
def test_it_saves_all_the_document_metas(self, annotation, create_or_update_document_meta, Document, session): """It creates or updates a DocumentMeta for each document meta dict.""" Document.find_or_create_by_uris.return_value.count.return_value = 1 document_meta_dicts = [ { "claimant": "http://example.com/claimant", "type": "title", "value": "foo", }, { "type": "article title", "value": "bar", "claimant": "http://example.com/claimant", }, { "type": "site title", "value": "gar", "claimant": "http://example.com/claimant", }, ] document.update_document_metadata( session, annotation.target_uri, document_meta_dicts, [], annotation.created, annotation.updated, ) assert create_or_update_document_meta.call_count == 3 for document_meta_dict in document_meta_dicts: create_or_update_document_meta.assert_any_call( session=session, document=Document.find_or_create_by_uris.return_value.first. return_value, created=annotation.created, updated=annotation.updated, **document_meta_dict)
def test_if_there_are_multiple_documents_it_merges_them_into_one( self, annotation, Document, merge_documents, session): """If it finds more than one document it calls merge_documents().""" Document.find_or_create_by_uris.return_value = mock.Mock( count=mock.Mock(return_value=3)) document.update_document_metadata( session, annotation.target_uri, [], [], annotation.created, annotation.updated, ) merge_documents.assert_called_once_with( session, Document.find_or_create_by_uris.return_value, updated=annotation.updated, )
def test_if_there_are_multiple_documents_it_merges_them_into_one( self, annotation, Document, merge_documents, session ): """If it finds more than one document it calls merge_documents().""" Document.find_or_create_by_uris.return_value = mock.Mock( count=mock.Mock(return_value=3) ) document.update_document_metadata( session, annotation.target_uri, [], [], annotation.created, annotation.updated, ) merge_documents.assert_called_once_with( session, Document.find_or_create_by_uris.return_value, updated=annotation.updated, )
def test_it_returns_a_document( self, annotation, create_or_update_document_meta, Document, session ): Document.find_or_create_by_uris.return_value.count.return_value = 1 result = document.update_document_metadata( session, annotation.target_uri, [], [], annotation.created, annotation.updated, ) assert result == Document.find_or_create_by_uris.return_value.first.return_value
def test_it_returns_a_document(self, annotation, create_or_update_document_meta, Document, session): Document.find_or_create_by_uris.return_value.count.return_value = 1 result = document.update_document_metadata( session, annotation.target_uri, [], [], annotation.created, annotation.updated, ) assert result == Document.find_or_create_by_uris.return_value.first.return_value
def make_metadata(self, create, extracted, **kwargs): """Create associated document metadata for the annotation.""" # The metadata objects are going to be added to the db, so if we're not # using the create strategy then simply don't make any. if not create: return def document_uri_dict(): """ Return a randomly generated DocumentURI dict for this annotation. This doesn't add anything to the database session yet. """ document_uri = DocumentURI.build( document=None, claimant=self.target_uri, uri=self.target_uri ) return dict( claimant=document_uri.claimant, uri=document_uri.uri, type=document_uri.type, content_type=document_uri.content_type, ) document_uri_dicts = [document_uri_dict() for _ in range(random.randint(1, 3))] def document_meta_dict(type_=None): """ Return a randomly generated DocumentMeta dict for this annotation. This doesn't add anything to the database session yet. """ kwargs = {"document": None, "claimant": self.target_uri} if type_ is not None: kwargs["type"] = type_ document_meta = DocumentMeta.build(**kwargs) return dict( claimant=document_meta.claimant, type=document_meta.type, value=document_meta.value, ) document_meta_dicts = [ document_meta_dict() for _ in range(random.randint(1, 3)) ] # Make sure that there's always at least one DocumentMeta with # type='title', so that we never get annotation.document.title is None: if "title" not in [m["type"] for m in document_meta_dicts]: document_meta_dicts.append(document_meta_dict(type_="title")) self.document = update_document_metadata( orm.object_session(self), self.target_uri, document_meta_dicts=document_meta_dicts, document_uri_dicts=document_uri_dicts, created=self.created, updated=self.updated, )