def accept_changes(context, nodes, username): user = models.ShareUser.objects.get(username=username) graph = MutableGraph.from_jsonld(nodes) Regulator().regulate(graph) nd = NormalizedDataFactory(source=user) change_set = ChangeSetBuilder(graph, nd, disambiguate=True).build_change_set() return change_set.accept() if change_set else None
def test_update_dependencies_accept(self, john_doe, normalized_data): john_doe_id = IDObfuscator.encode(john_doe) graph = MutableGraph.from_jsonld([{ '@id': john_doe_id, '@type': 'person', 'given_name': 'Jane', }, { '@id': '_:456', '@type': 'Creator', 'agent': {'@id': john_doe_id, '@type': 'person'}, 'creative_work': {'@id': '_:789', '@type': 'preprint'}, }, { '@id': '_:789', '@type': 'preprint', 'title': 'All About Cats', }]) change_set = ChangeSetBuilder(graph, normalized_data, matches={ john_doe_id: john_doe, }).build_change_set() change_set.accept() john_doe.refresh_from_db() assert john_doe.given_name == 'Jane' assert models.Preprint.objects.filter(agent_relations__agent=john_doe).count() == 1 assert models.Preprint.objects.filter(agent_relations__agent=john_doe).first().title == 'All About Cats'
def test_can_delete_work(self, john_doe, normalized_data): graph = MutableGraph.from_jsonld([{ '@id': '_:abc', '@type': 'workidentifier', 'uri': 'http://osf.io/faq', 'creative_work': {'@id': '_:789', '@type': 'preprint'} }, { '@id': '_:789', '@type': 'preprint', 'title': 'All About Cats', }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() preprint, identifier = change_set.accept() assert preprint.is_deleted is False graph = MutableGraph.from_jsonld([{ '@id': '_:abc', '@type': 'workidentifier', 'uri': 'http://osf.io/faq', 'creative_work': {'@id': '_:789', '@type': 'preprint'} }, { '@id': '_:789', 'is_deleted': True, '@type': 'preprint', }]) ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set().accept() preprint.refresh_from_db() assert preprint.is_deleted is True
def test_create_dependencies_accept(self, normalized_data, create_graph_dependencies): change_set = ChangeSetBuilder(create_graph_dependencies, normalized_data).build_change_set() assert change_set.changes.count() == 3 assert change_set.changes.all()[0].node_id == '_:123' assert change_set.changes.all()[1].node_id == '_:789' assert change_set.changes.all()[2].node_id == '_:456' assert change_set.changes.last().change == { 'agent': { '@id': '_:123', '@type': 'person' }, 'creative_work': { '@id': '_:789', '@type': 'preprint' }, } changed = change_set.accept() assert len(changed) == 3 assert isinstance(changed[0], models.Person) assert isinstance(changed[1], models.Preprint) assert isinstance(changed[2], models.Creator) assert None not in [c.pk for c in changed]
def test_add_relation_related(self, normalized_data): ''' A work exists. Add a second work with a relation to the first work. The first work should have the appropriate inverse relation to the second work. ''' uri = 'http://osf.io/special-snowflake' ChangeSetBuilder(MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'article', 'title': 'All About Cats', 'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:1234', '@type': 'article'} }]), normalized_data).build_change_set().accept() assert models.Article.objects.count() == 1 graph = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': 'Dogs are okay too', 'related_works': [{'@id': '_:foo', '@type': 'cites'}] }, { '@id': '_:foo', '@type': 'cites', 'subject': {'@id': '_:1234', '@type': 'preprint'}, 'related': {'@id': '_:2345', '@type': 'creativework'}, }, { '@id': '_:2345', '@type': 'creativework', 'identifiers': [{'@id': '_:4567', '@type': 'workidentifier'}] }, { '@id': '_:4567', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:2345', '@type': 'creativework'} }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Article.objects.count() == 1 assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0 cat = models.Article.objects.first() dog = models.Preprint.objects.first() assert dog.outgoing_creative_work_relations.count() == 1 assert dog.outgoing_creative_work_relations.first()._meta.model_name == 'cites' assert dog.outgoing_creative_work_relations.first().related == cat assert cat.incoming_creative_work_relations.count() == 1 assert cat.incoming_creative_work_relations.first()._meta.model_name == 'cites' assert cat.incoming_creative_work_relations.first().subject == dog
def _ingest(graph, disambiguate=True, regulate=True, user=None, save=True): if regulate: Regulator().regulate(graph) nd = factories.NormalizedDataFactory(source=user) if user else normalized_data cs = ChangeSetBuilder(graph, nd, disambiguate=disambiguate).build_change_set() if save and cs is not None: cs.accept() return cs
def _ingest(graph, disambiguate=True, regulate=True, user=None, save=True): if regulate: Regulator().regulate(graph) nd = factories.NormalizedDataFactory( source=user) if user else normalized_data cs = ChangeSetBuilder(graph, nd, disambiguate=disambiguate).build_change_set() if save and cs is not None: cs.accept() return cs
def test_change_work_type(self, normalized_data): ''' A CreativeWork with an Identifier exists. Accept a new changeset with a Preprint with the same Identifier. The preprint should disambiguate to the existing work, and the work's type should be updated to Preprint ''' title = 'Ambiguous Earthquakes' uri = 'http://osf.io/special-snowflake' cg = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'project', 'title': title, 'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:1234', '@type': 'project'} }]) original_change_set = ChangeSetBuilder(cg, normalized_data, disambiguate=True).build_change_set() work, identifier = original_change_set.accept() id = work.id assert identifier.uri == uri assert models.Project.objects.count() == 1 assert models.Preprint.objects.count() == 0 assert models.CreativeWork.objects.count() == 1 assert models.Project.objects.all()[0].changes.count() == 1 cg = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:1234', '@type': 'preprint'} }]) change_set = ChangeSetBuilder(cg, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Project.objects.count() == 0 assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.count() == 1 assert models.Preprint.objects.get(id=id).title == title assert models.Preprint.objects.all()[0].changes.count() == 2
def test_generic_creative_work(self, normalized_data): ''' A Preprint with an Identifier exists. Accept a changeset with a CreativeWork with the same Identifier and a different title. The Preprint's title should be updated to the new value, but its type should remain the same. ''' old_title = 'Ambiguous Earthquakes' uri = 'http://osf.io/special-snowflake' original_change_set = ChangeSetBuilder(MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': old_title, 'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:1234', '@type': 'preprint'} }]), normalized_data).build_change_set() preprint, identifier = original_change_set.accept() id = preprint.id assert identifier.uri == uri assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0 assert models.Preprint.objects.get(id=id).title == old_title new_title = 'Ambidextrous Earthquakes' graph = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'creativework', 'title': new_title, 'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:1234', '@type': 'creativework'} }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0 assert models.Preprint.objects.get(id=id).title == new_title
def test_related_works(self, normalized_data): ''' Create two works with a relation between them. ''' uri = 'http://osf.io/special-snowflake' change_set = ChangeSetBuilder(MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': 'Dogs are okay too', 'related_works': [{'@id': '_:foo', '@type': 'cites'}] }, { '@id': '_:2345', '@type': 'creativework', 'title': 'Cats, tho', 'identifiers': [{'@id': '_:4567', '@type': 'workidentifier'}] }, { '@id': '_:foo', '@type': 'cites', 'subject': {'@id': '_:1234', '@type': 'preprint'}, 'related': {'@id': '_:2345', '@type': 'creativework'}, }, { '@id': '_:4567', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:2345', '@type': 'creativework'} }]), normalized_data).build_change_set() change_set.accept() assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter(type='share.creativework').count() == 1 p = models.Preprint.objects.first() c = models.AbstractCreativeWork.objects.get(title='Cats, tho') assert p.related_works.count() == 1 assert p.related_works.first() == c assert p.outgoing_creative_work_relations.count() == 1 assert p.outgoing_creative_work_relations.first()._meta.model_name == 'cites' assert p.outgoing_creative_work_relations.first().related == c assert c.incoming_creative_work_relations.count() == 1 assert c.incoming_creative_work_relations.first()._meta.model_name == 'cites' assert c.incoming_creative_work_relations.first().subject == p
def test_can_delete_work(self, john_doe, normalized_data): graph = MutableGraph.from_jsonld([{ '@id': '_:abc', '@type': 'workidentifier', 'uri': 'http://osf.io/faq', 'creative_work': { '@id': '_:789', '@type': 'preprint' } }, { '@id': '_:789', '@type': 'preprint', 'title': 'All About Cats', }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() preprint, identifier = change_set.accept() assert preprint.is_deleted is False graph = MutableGraph.from_jsonld([{ '@id': '_:abc', '@type': 'workidentifier', 'uri': 'http://osf.io/faq', 'creative_work': { '@id': '_:789', '@type': 'preprint' } }, { '@id': '_:789', 'is_deleted': True, '@type': 'preprint', }]) ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set().accept() preprint.refresh_from_db() assert preprint.is_deleted is True
def _apply_changes(self, job, graph, normalized_datum): updated = None matches = None try: # Load all relevant ContentTypes in a single query ContentType.objects.get_for_models(*apps.get_models('share'), for_concrete_models=False) with transaction.atomic(): change_set_builder = ChangeSetBuilder(graph, normalized_datum, disambiguate=True) change_set = change_set_builder.build_change_set() user = normalized_datum.source # "source" here is a user... source = user.source if change_set and (source or user.is_robot or user.is_trusted): updated = change_set.accept() matches = change_set_builder.matches # Retry if it was just the wrong place at the wrong time except (exceptions.IngestConflict, OperationalError) as e: job.retries = (job.retries or 0) + 1 job.save(update_fields=('retries',)) if job.retries > self.MAX_RETRIES: raise job.reschedule() return if not updated: return updated_works = set( x.id for x in (updated or []) if isinstance(x, AbstractCreativeWork) ) existing_works = set( x.id for x in (matches or {}).values() if isinstance(x, AbstractCreativeWork) ) return list(updated_works | existing_works)
def test_create_dependencies_accept(self, normalized_data, create_graph_dependencies): change_set = ChangeSetBuilder(create_graph_dependencies, normalized_data).build_change_set() assert change_set.changes.count() == 3 assert change_set.changes.all()[0].node_id == '_:123' assert change_set.changes.all()[1].node_id == '_:789' assert change_set.changes.all()[2].node_id == '_:456' assert change_set.changes.last().change == { 'agent': {'@id': '_:123', '@type': 'person'}, 'creative_work': {'@id': '_:789', '@type': 'preprint'}, } changed = change_set.accept() assert len(changed) == 3 assert isinstance(changed[0], models.Person) assert isinstance(changed[1], models.Preprint) assert isinstance(changed[2], models.Creator) assert None not in [c.pk for c in changed]
def test_update_dependencies_accept(self, john_doe, normalized_data): john_doe_id = IDObfuscator.encode(john_doe) graph = MutableGraph.from_jsonld([{ '@id': john_doe_id, '@type': 'person', 'given_name': 'Jane', }, { '@id': '_:456', '@type': 'Creator', 'agent': { '@id': john_doe_id, '@type': 'person' }, 'creative_work': { '@id': '_:789', '@type': 'preprint' }, }, { '@id': '_:789', '@type': 'preprint', 'title': 'All About Cats', }]) change_set = ChangeSetBuilder(graph, normalized_data, matches={ john_doe_id: john_doe, }).build_change_set() change_set.accept() john_doe.refresh_from_db() assert john_doe.given_name == 'Jane' assert models.Preprint.objects.filter( agent_relations__agent=john_doe).count() == 1 assert models.Preprint.objects.filter( agent_relations__agent=john_doe).first().title == 'All About Cats'
def from_graph(self, jsonld, disambiguate=False): nd = NormalizedData.objects.create(data=jsonld, source=share_user) graph = MutableGraph.from_jsonld(jsonld) return ChangeSetBuilder( graph, nd, disambiguate=disambiguate).build_change_set()
def test_change_work_type(self, normalized_data): ''' A CreativeWork with an Identifier exists. Accept a new changeset with a Preprint with the same Identifier. The preprint should disambiguate to the existing work, and the work's type should be updated to Preprint ''' title = 'Ambiguous Earthquakes' uri = 'http://osf.io/special-snowflake' cg = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'project', 'title': title, 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'project' } }]) original_change_set = ChangeSetBuilder( cg, normalized_data, disambiguate=True).build_change_set() work, identifier = original_change_set.accept() id = work.id assert identifier.uri == uri assert models.Project.objects.count() == 1 assert models.Preprint.objects.count() == 0 assert models.CreativeWork.objects.count() == 1 assert models.Project.objects.all()[0].changes.count() == 1 cg = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'preprint' } }]) change_set = ChangeSetBuilder(cg, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Project.objects.count() == 0 assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.count() == 1 assert models.Preprint.objects.get(id=id).title == title assert models.Preprint.objects.all()[0].changes.count() == 2
def test_add_work_with_existing_relation(self, normalized_data): ''' Harvest a work that has a relation to some work identified by a DOI. The related work should be a CreativeWork with no information except the one Identifier. Then harvest a work with the same DOI. It should update the CreativeWork's type and attributes instead of creating a new work. ''' uri = 'http://osf.io/special-snowflake' ChangeSetBuilder( MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': 'Dogs are okay', 'related_works': [{ '@id': '_:foo', '@type': 'cites' }] }, { '@id': '_:foo', '@type': 'cites', 'subject': { '@id': '_:1234', '@type': 'preprint' }, 'related': { '@id': '_:2345', '@type': 'creativework' }, }, { '@id': '_:2345', '@type': 'creativework', 'identifiers': [{ '@id': '_:4567', '@type': 'workidentifier' }] }, { '@id': '_:4567', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:2345', '@type': 'creativework' } }]), normalized_data).build_change_set().accept() assert models.CreativeWork.objects.filter( type='share.creativework').count() == 1 assert models.Preprint.objects.count() == 1 assert models.Article.objects.count() == 0 change = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'article', 'title': 'All About Cats', 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'article' } }]) ChangeSetBuilder(change, normalized_data, disambiguate=True).build_change_set().accept() assert models.CreativeWork.objects.filter( type='share.creativework').count() == 0 assert models.Article.objects.count() == 1 assert models.Preprint.objects.count() == 1 cat = models.Article.objects.first() dog = models.Preprint.objects.first() assert dog.outgoing_creative_work_relations.count() == 1 assert dog.outgoing_creative_work_relations.first( )._meta.model_name == 'cites' assert dog.outgoing_creative_work_relations.first().related == cat assert cat.incoming_creative_work_relations.count() == 1 assert cat.incoming_creative_work_relations.first( )._meta.model_name == 'cites' assert cat.incoming_creative_work_relations.first().subject == dog
def test_add_relation_related(self, normalized_data): ''' A work exists. Add a second work with a relation to the first work. The first work should have the appropriate inverse relation to the second work. ''' uri = 'http://osf.io/special-snowflake' ChangeSetBuilder( MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'article', 'title': 'All About Cats', 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'article' } }]), normalized_data).build_change_set().accept() assert models.Article.objects.count() == 1 graph = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': 'Dogs are okay too', 'related_works': [{ '@id': '_:foo', '@type': 'cites' }] }, { '@id': '_:foo', '@type': 'cites', 'subject': { '@id': '_:1234', '@type': 'preprint' }, 'related': { '@id': '_:2345', '@type': 'creativework' }, }, { '@id': '_:2345', '@type': 'creativework', 'identifiers': [{ '@id': '_:4567', '@type': 'workidentifier' }] }, { '@id': '_:4567', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:2345', '@type': 'creativework' } }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Article.objects.count() == 1 assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter( type='share.creativework').count() == 0 cat = models.Article.objects.first() dog = models.Preprint.objects.first() assert dog.outgoing_creative_work_relations.count() == 1 assert dog.outgoing_creative_work_relations.first( )._meta.model_name == 'cites' assert dog.outgoing_creative_work_relations.first().related == cat assert cat.incoming_creative_work_relations.count() == 1 assert cat.incoming_creative_work_relations.first( )._meta.model_name == 'cites' assert cat.incoming_creative_work_relations.first().subject == dog
def test_related_works(self, normalized_data): ''' Create two works with a relation between them. ''' uri = 'http://osf.io/special-snowflake' change_set = ChangeSetBuilder( MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': 'Dogs are okay too', 'related_works': [{ '@id': '_:foo', '@type': 'cites' }] }, { '@id': '_:2345', '@type': 'creativework', 'title': 'Cats, tho', 'identifiers': [{ '@id': '_:4567', '@type': 'workidentifier' }] }, { '@id': '_:foo', '@type': 'cites', 'subject': { '@id': '_:1234', '@type': 'preprint' }, 'related': { '@id': '_:2345', '@type': 'creativework' }, }, { '@id': '_:4567', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:2345', '@type': 'creativework' } }]), normalized_data).build_change_set() change_set.accept() assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter( type='share.creativework').count() == 1 p = models.Preprint.objects.first() c = models.AbstractCreativeWork.objects.get(title='Cats, tho') assert p.related_works.count() == 1 assert p.related_works.first() == c assert p.outgoing_creative_work_relations.count() == 1 assert p.outgoing_creative_work_relations.first( )._meta.model_name == 'cites' assert p.outgoing_creative_work_relations.first().related == c assert c.incoming_creative_work_relations.count() == 1 assert c.incoming_creative_work_relations.first( )._meta.model_name == 'cites' assert c.incoming_creative_work_relations.first().subject == p
def test_generic_creative_work(self, normalized_data): ''' A Preprint with an Identifier exists. Accept a changeset with a CreativeWork with the same Identifier and a different title. The Preprint's title should be updated to the new value, but its type should remain the same. ''' old_title = 'Ambiguous Earthquakes' uri = 'http://osf.io/special-snowflake' original_change_set = ChangeSetBuilder( MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': old_title, 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'preprint' } }]), normalized_data).build_change_set() preprint, identifier = original_change_set.accept() id = preprint.id assert identifier.uri == uri assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter( type='share.creativework').count() == 0 assert models.Preprint.objects.get(id=id).title == old_title new_title = 'Ambidextrous Earthquakes' graph = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'creativework', 'title': new_title, 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'creativework' } }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter( type='share.creativework').count() == 0 assert models.Preprint.objects.get(id=id).title == new_title