Example #1
0
    def do_run(self, *args, **kwargs):
        # Load all relevant ContentTypes in a single query
        ContentType.objects.get_for_models(*apps.get_models('share'),
                                           for_concrete_models=False)

        logger.info('%s started make JSON patches for NormalizedData %s at %s',
                    self.started_by, self.normalized.id,
                    datetime.datetime.utcnow().isoformat())

        try:
            with transaction.atomic():
                cg = ChangeGraph(self.normalized.data['@graph'],
                                 namespace=self.normalized.source.username)
                cg.process()
                cs = ChangeSet.objects.from_graph(cg, self.normalized.id)
                if cs and (self.source.is_robot or self.source.is_trusted):
                    # TODO: verify change set is not overwriting user created object
                    cs.accept()
        except Exception as e:
            logger.info(
                'Failed make JSON patches for NormalizedData %s with exception %s. Retrying...',
                self.normalized.id, e)
            raise self.retry(countdown=10, exc=e)

        logger.info(
            'Finished make JSON patches for NormalizedData %s by %s at %s',
            self.normalized.id, self.started_by,
            datetime.datetime.utcnow().isoformat())
Example #2
0
    def _apply_changes(self, job, normalized_datum):
        updated = None

        try:
            # Load all relevant ContentTypes in a single query
            ContentType.objects.get_for_models(*apps.get_models('share'), for_concrete_models=False)

            with transaction.atomic():
                cg = ChangeGraph(normalized_datum.data['@graph'], namespace=normalized_datum.source.username)
                cg.process()
                cs = ChangeSet.objects.from_graph(cg, normalized_datum.id)
                if cs and (normalized_datum.source.is_robot or normalized_datum.source.is_trusted or Source.objects.filter(user=normalized_datum.source).exists()):
                    updated = cs.accept()

        # Retry if it was just the wrong place at the wrong time
        except (exceptions.IngestConflict, OperationalError) as e:
            job.retries = (job.retries or 0) + 1
            job.save(update_fields=('retries',))
            if job.retries > self.MAX_RETRIES:
                raise
            job.reschedule()
            return

        if not updated:
            return  # Nothing to index

        # Index works that were added or directly updated
        updated_works = set(x.id for x in updated if isinstance(x, AbstractCreativeWork))
        # and works that matched, even if they didn't change, in case any related objects did
        existing_works = set(n.instance.id for n in cg.nodes if isinstance(n.instance, AbstractCreativeWork))

        return list(updated_works | existing_works)
Example #3
0
    def test_accept_subject(self, normalized_data_id):
        models.Subject.objects.bulk_create([
            models.Subject(name='Felines')
        ])

        assert models.Subject.objects.filter(name='Felines').count() == 1

        graph = ChangeGraph([{
            '@id': '_:987',
            '@type': 'subject',
            'name': 'Felines'
        }, {
            '@id': '_:678',
            '@type': 'throughsubjects',
            'subject': {'@id': '_:987', '@type': 'subject'},
            'creative_work': {'@id': '_:789', '@type': 'preprint'},
        }, {
            '@id': '_:789',
            '@type': 'preprint',
            'title': 'All About Cats',
        }])

        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)

        change_set.accept()

        assert models.Preprint.objects.filter(subjects__name='Felines').count() == 1
        assert models.Preprint.objects.filter(subjects__name='Felines').first().title == 'All About Cats'
Example #4
0
 def test_normalize_workidentifier(self, input, output, Graph):
     graph = ChangeGraph(
         Graph(WorkIdentifier(uri=input, creative_work=None)))
     graph.process(disambiguate=False)
     assert graph.serialize() == (Graph(
         WorkIdentifier(uri=output, parse=True, creative_work=None))
                                  if output else [])
Example #5
0
    def test_add_relation_related(self, normalized_data_id):
        '''
        A work exists. Add a second work with a relation to the first work.
        The first work should have the appropriate inverse relation to the
        second work.
        '''

        uri = 'http://osf.io/special-snowflake'
        models.ChangeSet.objects.from_graph(ChangeGraph([{
            '@id': '_:1234',
            '@type': 'article',
            'title': 'All About Cats',
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'article'}
        }]), normalized_data_id).accept()

        assert models.Article.objects.count() == 1

        graph = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'preprint',
            'title': 'Dogs are okay too',
            'related_works': [{'@id': '_:foo', '@type': 'cites'}]
        }, {
            '@id': '_:foo',
            '@type': 'cites',
            'subject': {'@id': '_:1234', '@type': 'preprint'},
            'related': {'@id': '_:2345', '@type': 'creativework'},
        }, {
            '@id': '_:2345',
            '@type': 'creativework',
            'identifiers': [{'@id': '_:4567', '@type': 'workidentifier'}]
        }, {
            '@id': '_:4567',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:2345', '@type': 'creativework'}
        }])
        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)
        change_set.accept()

        assert models.Article.objects.count() == 1
        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0

        cat = models.Article.objects.first()
        dog = models.Preprint.objects.first()

        assert dog.outgoing_creative_work_relations.count() == 1
        assert dog.outgoing_creative_work_relations.first()._meta.model_name == 'cites'
        assert dog.outgoing_creative_work_relations.first().related == cat
        assert cat.incoming_creative_work_relations.count() == 1
        assert cat.incoming_creative_work_relations.first()._meta.model_name == 'cites'
        assert cat.incoming_creative_work_relations.first().subject == dog
Example #6
0
    def test_is_blank(self):
        node = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'person',
        }]).nodes[0]

        node._id = '1234'
        assert node.is_blank is False
Example #7
0
    def test_is_blank(self):
        node = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'person',
        }]).nodes[0]

        node._id = '1234'
        assert node.is_blank is False
Example #8
0
 def test_create_extra(self):
     graph = ChangeGraph([], namespace='testing')
     node = graph.create(None, 'tag', {
         'name': 'Foo',
         'extra': {
             'tag': 'Foo'
         }
     })
     assert node.namespace == 'testing'
Example #9
0
    def test_delete_cascade(self, queryset, deltas, Graph):
        initial_cg = ChangeGraph(Graph(*self.initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(initial_cg, factories.NormalizedDataFactory().id).accept()

        before = {model: model.objects.count() for model in deltas.keys()}

        queryset.delete()

        for model, delta in deltas.items():
            assert model.objects.count() - before[model] == delta
Example #10
0
 def test_external_reference(self):
     ChangeGraph.from_jsonld({
         '@graph': [{
             '@id': '_:5678',
             '@type': 'contributor',
             'person': {
                 '@id': 8,
                 '@type': 'person'
             }
         }]
     }, disambiguate=False)
Example #11
0
 def test_unresolved_reference(self):
     with pytest.raises(UnresolvableReference):
         ChangeGraph.from_jsonld({
             '@graph': [{
                 '@id': '_:5678',
                 '@type': 'contributor',
                 'person': {
                     '@id': '_:1234',
                     '@type': 'person'
                 }
             }]
         }, disambiguate=False)
Example #12
0
    def test_delete_cascade(self, queryset, deltas, Graph):
        initial_cg = ChangeGraph(Graph(*self.initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(
            initial_cg,
            factories.NormalizedDataFactory().id).accept()

        before = {model: model.objects.count() for model in deltas.keys()}

        queryset.delete()

        for model, delta in deltas.items():
            assert model.objects.count() - before[model] == delta
Example #13
0
    def test_generic_creative_work(self, normalized_data_id):
        '''
        A Preprint with an Identifier exists. Accept a changeset with a
        CreativeWork with the same Identifier and a different title.
        The Preprint's title should be updated to the new value, but its type
        should remain the same.
        '''
        old_title = 'Ambiguous Earthquakes'
        uri = 'http://osf.io/special-snowflake'

        original_change_set = models.ChangeSet.objects.from_graph(ChangeGraph([{
            '@id': '_:1234',
            '@type': 'preprint',
            'title': old_title,
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'preprint'}
        }]), normalized_data_id)

        preprint, identifier = original_change_set.accept()
        id = preprint.id

        assert identifier.uri == uri
        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0
        assert models.Preprint.objects.get(id=id).title == old_title

        new_title = 'Ambidextrous Earthquakes'

        graph = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'creativework',
            'title': new_title,
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'creativework'}
        }])

        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)
        change_set.accept()

        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0
        assert models.Preprint.objects.get(id=id).title == new_title
Example #14
0
    def test_no_timetraveling(self, Graph):
        newer_graph = ChangeGraph(
            Graph(
                Publication(
                    id=1,
                    sparse=True,
                    identifiers=[WorkIdentifier(1)],
                    date_updated='2017-02-03T18:07:53.385000',
                    is_deleted=False,
                )))

        newer_graph.process()
        ChangeSet.objects.from_graph(newer_graph,
                                     NormalizedDataFactory().id).accept()

        older_graph = ChangeGraph(
            Graph(
                Publication(id=1,
                            sparse=True,
                            identifiers=[WorkIdentifier(1)],
                            date_updated='2017-02-03T18:07:50.000000',
                            is_deleted=True,
                            title='Not Previously Changed')))

        older_graph.process()
        assert older_graph.nodes[0].change == {
            'title': 'Not Previously Changed'
        }
Example #15
0
    def test_can_delete_work(self, john_doe, normalized_data_id):
        graph = ChangeGraph([{
            '@id': '_:abc',
            '@type': 'workidentifier',
            'uri': 'http://osf.io/faq',
            'creative_work': {'@id': '_:789', '@type': 'preprint'}
        }, {
            '@id': '_:789',
            '@type': 'preprint',
            'title': 'All About Cats',
        }])

        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)

        preprint, identifier = change_set.accept()

        assert preprint.is_deleted is False

        graph = ChangeGraph([{
            '@id': '_:abc',
            '@type': 'workidentifier',
            'uri': 'http://osf.io/faq',
            'creative_work': {'@id': '_:789', '@type': 'preprint'}
        }, {
            '@id': '_:789',
            'is_deleted': True,
            '@type': 'preprint',
        }])
        graph.process()

        models.ChangeSet.objects.from_graph(graph, normalized_data_id).accept()

        preprint.refresh_from_db()
        assert preprint.is_deleted is True
Example #16
0
    def test_no_merge_on_blank_value(self, Graph):
        blank_cited_as = [
            Publication(identifiers=[WorkIdentifier(1)],
                        agent_relations=[
                            Publisher(cited_as='', agent=Organization(1)),
                        ])
        ]
        initial_cg = ChangeGraph(Graph(*blank_cited_as))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg,
                                     NormalizedDataFactory().id).accept()
        assert models.Publication.objects.count() == 1
        assert models.Publisher.objects.count() == 1
        assert models.Organization.objects.count() == 1

        additional_pub = [
            Publication(identifiers=[WorkIdentifier(1)],
                        agent_relations=[
                            Publisher(cited_as='', agent=Organization(1)),
                            Publisher(cited_as='', agent=Organization(2)),
                        ])
        ]

        next_cg = ChangeGraph(Graph(*additional_pub))
        next_cg.process()
        ChangeSet.objects.from_graph(next_cg,
                                     NormalizedDataFactory().id).accept()
        assert models.Publication.objects.count() == 1
        assert models.Publisher.objects.count() == 2
        assert models.Organization.objects.count() == 2
Example #17
0
def change_node():
    return ChangeGraph([{
        '@id': '_:1234',
        '@type': 'person',
        'given_name': 'No',
        'family_name': 'Matter',
    }]).nodes[0]
Example #18
0
    def test_subject_accept(self, normalized_data_id):
        Subject.objects.bulk_create([
            Subject(name='Felines', lineages=[])
        ])

        assert Subject.objects.filter(name='Felines').count() == 1

        graph = ChangeGraph.from_jsonld({
            '@graph': [{
                '@id': '_:987',
                '@type': 'subject',
                'name': 'Felines'
            }, {
                '@id': '_:678',
                '@type': 'throughsubjects',
                'subject': {'@id': '_:987', '@type': 'subject'},
                'creative_work': {'@id': '_:789', '@type': 'preprint'},
            }, {
                '@id': '_:789',
                '@type': 'preprint',
                'title': 'All About Cats',
            }]
        })

        change_set = ChangeSet.objects.from_graph(graph, normalized_data_id)

        change_set.accept()

        assert Preprint.objects.filter(subjects__name='Felines').count() == 1
        assert Preprint.objects.filter(subjects__name='Felines').first().title == 'All About Cats'
Example #19
0
def create_graph_dependencies():
    return ChangeGraph([{
        '@id': '_:123',
        '@type': 'person',
        'given_name': 'Jane',
        'family_name': 'Doe',
    }, {
        '@id': '_:456',
        '@type': 'Creator',
        'agent': {
            '@id': '_:123',
            '@type': 'person'
        },
        'creative_work': {
            '@id': '_:789',
            '@type': 'preprint'
        },
    }, {
        '@id':
        '_:789',
        '@type':
        'preprint',
        'title':
        'All About Cats',
        'related_agents': [{
            '@id': '_:456',
            '@type': 'Creator'
        }]
    }])
Example #20
0
    def test_topological_sort_many_to_many(self):
        graph = ChangeGraph.from_jsonld(
            {
                '@graph':
                [{
                    '@id': '_:91011',
                    '@type': 'preprint',
                    'contributors': [{
                        '@id': '_:5678',
                        '@type': 'contributor'
                    }]
                }, {
                    '@id': '_:5678',
                    '@type': 'contributor',
                    'person': {
                        '@id': '_:1234',
                        '@type': 'person'
                    }
                }, {
                    '@id': '_:1234',
                    '@type': 'person',
                    'given_name': 'Doe',
                    'family_name': 'Jane',
                }]
            },
            disambiguate=False)

        assert len(graph.nodes) == 3
        assert graph.nodes[0].id == '_:91011'
        assert graph.nodes[1].id == '_:1234'
        assert graph.nodes[2].id == '_:5678'
Example #21
0
def create_graph_dependencies():
    return ChangeGraph.from_jsonld(
        {
            '@graph': [{
                '@id': '_:123',
                '@type': 'person',
                'given_name': 'Jane',
                'family_name': 'Doe',
            }, {
                '@id': '_:456',
                '@type': 'contributor',
                'person': {
                    '@id': '_:123',
                    '@type': 'person'
                },
                'creative_work': {
                    '@id': '_:789',
                    '@type': 'preprint'
                },
            }, {
                '@id': '_:789',
                '@type': 'preprint',
                'title': 'All About Cats',
            }]
        },
        disambiguate=False)
Example #22
0
    def test_update_dependencies_accept(self, john_doe, normalized_data_id):
        graph = ChangeGraph.from_jsonld({
            '@graph': [{
                '@id': john_doe.pk,
                '@type': 'person',
                'given_name': 'Jane',
            }, {
                '@id': '_:456',
                '@type': 'contributor',
                'person': {
                    '@id': john_doe.pk,
                    '@type': 'person'
                },
                'creative_work': {
                    '@id': '_:789',
                    '@type': 'preprint'
                },
            }, {
                '@id': '_:789',
                '@type': 'preprint',
                'title': 'All About Cats',
            }]
        })

        change_set = ChangeSet.objects.from_graph(graph, normalized_data_id)

        change_set.accept()

        john_doe.refresh_from_db()

        assert john_doe.given_name == 'Jane'
        assert Preprint.objects.filter(
            contributor__person=john_doe).count() == 1
        assert Preprint.objects.filter(
            contributor__person=john_doe).first().title == 'All About Cats'
Example #23
0
    def test_update_dependencies_accept(self, john_doe, normalized_data_id):
        graph = ChangeGraph([{
            '@id': IDObfuscator.encode(john_doe),
            '@type': 'person',
            'given_name': 'Jane',
        }, {
            '@id': '_:456',
            '@type': 'Creator',
            'agent': {
                '@id': IDObfuscator.encode(john_doe),
                '@type': 'person'
            },
            'creative_work': {
                '@id': '_:789',
                '@type': 'preprint'
            },
        }, {
            '@id': '_:789',
            '@type': 'preprint',
            'title': 'All About Cats',
        }])

        change_set = models.ChangeSet.objects.from_graph(
            graph, normalized_data_id)

        change_set.accept()

        john_doe.refresh_from_db()

        assert john_doe.given_name == 'Jane'
        assert models.Preprint.objects.filter(
            agent_relations__agent=john_doe).count() == 1
        assert models.Preprint.objects.filter(
            agent_relations__agent=john_doe).first().title == 'All About Cats'
Example #24
0
def create_graph():
    return ChangeGraph([{
        '@id': '_:1234',
        '@type': 'person',
        'given_name': 'Jane',
        'family_name': 'Doe',
    }])
Example #25
0
    def test_topological_sort_many_to_many(self):
        graph = ChangeGraph([{
            '@id':
            '_:91011',
            '@type':
            'preprint',
            'contributors': [{
                '@id': '_:5678',
                '@type': 'contributor'
            }]
        }, {
            '@id': '_:5678',
            '@type': 'contributor',
            'agent': {
                '@id': '_:1234',
                '@type': 'person'
            },
            'creative_work': {
                '@id': '_:91011',
                '@type': 'preprint'
            },
        }, {
            '@id': '_:1234',
            '@type': 'person',
            'given_name': 'Doe',
            'family_name': 'Jane',
        }])

        assert len(graph.nodes) == 3
        # assert graph.nodes[0].id == '_:1234'
        # assert graph.nodes[1].id == '_:91011'
        assert graph.nodes[2].id == '_:5678'
Example #26
0
    def test_no_changes(self, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.discarded_ids.clear()
        cg = ChangeGraph(Graph(*initial))
        cg.process()
        assert ChangeSet.objects.from_graph(cg, NormalizedDataFactory().id) is None
Example #27
0
    def test_can_delete_work(self, john_doe, normalized_data_id):
        graph = ChangeGraph.from_jsonld({
            '@graph': [{
                '@id': '_:abc',
                '@type': 'link',
                'url': 'https://share.osf.io/faq',
                'type': 'provider',
            }, {
                '@id': '_:456',
                '@type': 'throughlinks',
                'link': {'@id': '_:abc', '@type': 'link'},
                'creative_work': {'@id': '_:789', '@type': 'preprint'},
            }, {
                '@id': '_:789',
                '@type': 'preprint',
                'title': 'All About Cats',
            }]
        })

        change_set = ChangeSet.objects.from_graph(graph, normalized_data_id)

        link, preprint, _ = change_set.accept()

        assert preprint.is_deleted is False

        ChangeSet.objects.from_graph(ChangeGraph.from_jsonld({
            '@graph': [{
                '@id': '_:abc',
                '@type': 'link',
                'type': 'provider',
                'url': 'https://share.osf.io/faq',
            }, {
                '@id': '_:456',
                '@type': 'throughlinks',
                'link': {'@id': '_:abc', '@type': 'link'},
                'creative_work': {'@id': '_:789', '@type': 'preprint'},
            }, {
                '@id': '_:789',
                'is_deleted': True,
                '@type': 'preprint',
                'links': [{'@id': '_:456', '@type': 'throughlinks'}]
            }]
        }), normalized_data_id).accept()

        preprint.refresh_from_db()
        assert preprint.is_deleted is True
Example #28
0
def disambiguate(self, normalized_id):
    normalized = NormalizedData.objects.select_related('source__source').get(
        pk=normalized_id)

    if self.request.id:
        self.update_state(meta={'source': normalized.source.source.long_title})

    # Load all relevant ContentTypes in a single query
    ContentType.objects.get_for_models(*apps.get_models('share'),
                                       for_concrete_models=False)

    updated = None

    try:
        with transaction.atomic():
            cg = ChangeGraph(normalized.data['@graph'],
                             namespace=normalized.source.username)
            cg.process()
            cs = ChangeSet.objects.from_graph(cg, normalized.id)
            if cs and (normalized.source.is_robot
                       or normalized.source.is_trusted or
                       Source.objects.filter(user=normalized.source).exists()):
                # TODO: verify change set is not overwriting user created object
                updated = cs.accept()
    except Exception as e:
        raise self.retry(
            exc=e,
            countdown=(random.random() + 1) *
            min(settings.CELERY_RETRY_BACKOFF_BASE**self.request.retries,
                60 * 15))

    if not updated:
        return
    # Only index creativeworks on the fly, for the moment.
    updated_works = set(x.id for x in updated
                        if isinstance(x, AbstractCreativeWork))
    existing_works = set(n.instance.id for n in cg.nodes
                         if isinstance(n.instance, AbstractCreativeWork))
    ids = list(updated_works | existing_works)

    try:
        SearchIndexer(self.app).index('creativework', *ids)
    except Exception as e:
        logger.exception('Could not add results from %r to elasticqueue',
                         normalized)
        raise
Example #29
0
    def test_all_disambiguate(self, input, Graph, normalized_data_id):
        graph = ChangeGraph(Graph(*input))
        ChangeSet.objects.from_graph(graph, normalized_data_id).accept()

        assert all(n.instance is None for n in graph.nodes)
        GraphDisambiguator().find_instances(graph)
        assert all(n.instance for n in graph.nodes)
        assert all(n.instance._meta.model_name == n.type for n in graph.nodes)
Example #30
0
 def from_graph(self, graph, disambiguate=False):
     nd = NormalizedData.objects.create(normalized_data=graph,
                                        source=share_source)
     return ChangeSet.objects.from_graph(
         ChangeGraph.from_jsonld(
             graph,
             disambiguate=disambiguate,
         ), nd.pk)
Example #31
0
    def test_change_work_type(self, normalized_data_id):
        '''
        A CreativeWork with an Identifier exists. Accept a new changeset
        with a Preprint with the same Identifier. The preprint should
        disambiguate to the existing work, and the work's type should be
        updated to Preprint
        '''
        title = 'Ambiguous Earthquakes'
        uri = 'http://osf.io/special-snowflake'

        cg = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'project',
            'title': title,
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'project'}
        }])

        cg.process()

        original_change_set = models.ChangeSet.objects.from_graph(cg, normalized_data_id)

        work, identifier = original_change_set.accept()
        id = work.id

        assert identifier.uri == uri
        assert models.Project.objects.count() == 1
        assert models.Preprint.objects.count() == 0
        assert models.CreativeWork.objects.count() == 1
        assert models.Project.objects.all()[0].changes.count() == 1

        cg = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'preprint',
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'preprint'}
        }])

        cg.process()
        change_set = models.ChangeSet.objects.from_graph(cg, normalized_data_id)

        change_set.accept()

        assert models.Project.objects.count() == 0
        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.count() == 1
        assert models.Preprint.objects.get(id=id).title == title
        assert models.Preprint.objects.all()[0].changes.count() == 2
Example #32
0
def update_graph(jane_doe):
    return ChangeGraph.from_jsonld({
        '@graph': [{
            '@id': jane_doe.pk,
            '@type': 'person',
            'family_name': 'Dough',
        }]
    })
Example #33
0
    def test_relationships(self):
        node = ChangeGraph([{
            '@id': '_:5678',
            '@type': 'contributor',
            'agent': {
                '@id': '_:1234',
                '@type': 'person'
            }
        }, {
            '@id': '_:1234',
            '@type': 'person'
        }]).nodes[1]

        assert node.type == 'contributor'
        assert node.attrs == {}
        assert len(node.related()) == 1
        assert node.related('agent').related.id == '_:1234'
        assert node.related('agent').related.type == 'person'
Example #34
0
    def do_run(self, *args, **kwargs):
        # Load all relevant ContentTypes in a single query
        ContentType.objects.get_for_models(*apps.get_models('share'), for_concrete_models=False)

        logger.info('%s started make JSON patches for NormalizedData %s at %s', self.started_by, self.normalized.id, datetime.datetime.utcnow().isoformat())

        try:
            with transaction.atomic():
                cg = ChangeGraph(self.normalized.data['@graph'], namespace=self.normalized.source.username)
                cg.process()
                cs = ChangeSet.objects.from_graph(cg, self.normalized.id)
                if cs and (self.source.is_robot or self.source.is_trusted):
                    # TODO: verify change set is not overwriting user created object
                    cs.accept()
        except Exception as e:
            logger.info('Failed make JSON patches for NormalizedData %s with exception %s. Retrying...', self.normalized.id, e)
            raise self.retry(countdown=10, exc=e)

        logger.info('Finished make JSON patches for NormalizedData %s by %s at %s', self.normalized.id, self.started_by, datetime.datetime.utcnow().isoformat())
Example #35
0
    def test_reaccept(self, input, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.reseed()  # Force new values to be generated

        first_cg = ChangeGraph(Graph(*input))
        first_cg.process()
        first_cs = ChangeSet.objects.from_graph(first_cg, NormalizedDataFactory().id)
        assert first_cs is not None
        first_cs.accept()

        second_cg = ChangeGraph(Graph(*input))
        second_cg.process()
        second_cs = ChangeSet.objects.from_graph(second_cg, NormalizedDataFactory().id)
        assert second_cs is None
Example #36
0
    def test_no_merge_on_blank_value(self, Graph):
        blank_cited_as = [
            Publication(
                identifiers=[WorkIdentifier(1)],
                agent_relations=[
                    Publisher(cited_as='', agent=Organization(1)),
                ]
            )
        ]
        initial_cg = ChangeGraph(Graph(*blank_cited_as))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()
        assert models.Publication.objects.count() == 1
        assert models.Publisher.objects.count() == 1
        assert models.Organization.objects.count() == 1

        additional_pub = [
            Publication(
                identifiers=[WorkIdentifier(1)],
                agent_relations=[
                    Publisher(cited_as='', agent=Organization(1)),
                    Publisher(cited_as='', agent=Organization(2)),
                ]
            )
        ]

        next_cg = ChangeGraph(Graph(*additional_pub))
        next_cg.process()
        ChangeSet.objects.from_graph(next_cg, NormalizedDataFactory().id).accept()
        assert models.Publication.objects.count() == 1
        assert models.Publisher.objects.count() == 2
        assert models.Organization.objects.count() == 2
Example #37
0
    def test_no_timetraveling(self, Graph):
        newer_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                identifiers=[WorkIdentifier(1)],
                date_updated='2017-02-03T18:07:53.385000',
                is_deleted=False,
            )
        ))

        newer_graph.process()
        ChangeSet.objects.from_graph(newer_graph, NormalizedDataFactory().id).accept()

        older_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                identifiers=[WorkIdentifier(1)],
                date_updated='2017-02-03T18:07:50.000000',
                is_deleted=True,
                title='Not Previously Changed'
            )
        ))

        older_graph.process()
        assert older_graph.nodes[0].change == {'title': 'Not Previously Changed'}
Example #38
0
    def test_no_changes(self, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.discarded_ids.clear()
        cg = ChangeGraph(Graph(*initial))
        cg.process()
        assert ChangeSet.objects.from_graph(cg, NormalizedDataFactory().id) is None
Example #39
0
    def test_split_brain(self, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        # Multiple matches found for a thing should break
        cg = ChangeGraph(Graph(Preprint(identifiers=[WorkIdentifier(1), WorkIdentifier(2)])))
        with pytest.raises(NotImplementedError) as e:
            cg.process()
        assert e.value.args[0] == "Multiple <class 'share.models.creative.Preprint'>s found"
Example #40
0
    def test_disambiguate(self, input, model, delta, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.reseed()
        # Nasty hack to avoid progres' fuzzy counting
        before = model.objects.exclude(change=None).count()

        cg = ChangeGraph(Graph(*input))
        cg.process()
        cs = ChangeSet.objects.from_graph(cg, NormalizedDataFactory().id)
        if cs is not None:
            cs.accept()

        assert (model.objects.exclude(change=None).count() - before) == delta
    def test_disambiguate(self, input, model_delta, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.reseed()
        before_count = {}
        for model in model_delta.keys():
            before_count[model] = model.objects.filter(type=model._meta.label_lower).count()

        cg = ChangeGraph(Graph(*input))
        cg.process()
        cs = ChangeSet.objects.from_graph(cg, NormalizedDataFactory().id)
        if cs is not None:
            cs.accept()

        for model in model_delta.keys():
            assert model.objects.filter(type=model._meta.label_lower).count() - before_count[model] == model_delta[model]
Example #42
0
 def test_normalize_contributor_creator_relation(self, input, output, Graph):
     graph = ChangeGraph(Graph(CreativeWork(agent_relations=input)))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(CreativeWork(agent_relations=output))
Example #43
0
 def test_normalize_person_relation(self, input, output, Graph):
     graph = ChangeGraph(Graph(*input))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(*output)
Example #44
0
 def test_normalize_agentidentifier(self, input, output, Graph):
     graph = ChangeGraph(Graph(AgentIdentifier(uri=input, agent=None)))
     graph.process(disambiguate=False)
     assert graph.serialize() == (Graph(AgentIdentifier(uri=output, parse=True, agent=None)) if output else [])
Example #45
0
 def test_create_extra(self):
     graph = ChangeGraph([], namespace='testing')
     node = graph.create(None, 'tag', {'name': 'Foo', 'extra': {'tag': 'Foo'}})
     assert node.namespace == 'testing'
Example #46
0
 def test_normalize_agentworkrelation(self, input, output, Graph):
     graph = ChangeGraph(Graph(input))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(output)
Example #47
0
 def test_normalize_tags_on_work(self, input, output, Graph):
     graph = ChangeGraph(Graph(CreativeWork(tags=input)))
     graph.normalize()
     graph.prune()
     assert [n.serialize() for n in sorted(graph.nodes, key=lambda x: x.type + str(x.id))] == Graph(CreativeWork(tags=output))
Example #48
0
 def from_graph(self, graph, disambiguate=False):
     nd = NormalizedData.objects.create(data=graph, source=share_source)
     cg = ChangeGraph(graph['@graph'])
     cg.process(disambiguate=disambiguate)
     return ChangeSet.objects.from_graph(cg, nd.pk)
Example #49
0
 def test_normalize_organization_institution_name(self, input, output, Graph):
     graph = ChangeGraph(Graph(*input))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(*output)
Example #50
0
 def test_normalize_agent(self, input, output, Graph):
     graph = ChangeGraph(Graph(input))
     graph.process(disambiguate=False)
     assert graph.serialize() == (Graph(output) if output else [])
Example #51
0
 def test_normalize_workidentifier(self, input, output, Graph):
     graph = ChangeGraph(Graph(WorkIdentifier(uri=input, creative_work=None)))
     graph.process(disambiguate=False)
     assert graph.serialize() == (Graph(WorkIdentifier(uri=output, parse=True, creative_work=None)) if output else [])
Example #52
0
    def test_no_timetraveling_many(self, Graph):
        oldest_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                is_deleted=True,
                title='The first title',
                description='The first description',
                identifiers=[WorkIdentifier(1)],
                date_updated='2016-02-03T18:07:50.000000',
            )
        ))

        oldest_graph.process()
        ChangeSet.objects.from_graph(oldest_graph, NormalizedDataFactory().id).accept()

        newer_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                is_deleted=False,
                identifiers=[WorkIdentifier(1)],
                date_updated='2017-02-03T18:07:50.000000',
            )
        ))

        newer_graph.process()
        ChangeSet.objects.from_graph(newer_graph, NormalizedDataFactory().id).accept()

        newest_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                title='The final title',
                identifiers=[WorkIdentifier(1)],
                date_updated='2017-02-03T18:07:53.385000',
            )
        ))

        newest_graph.process()
        ChangeSet.objects.from_graph(newest_graph, NormalizedDataFactory().id).accept()

        older_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                is_deleted=True,
                title='The second title',
                description='The final description',
                identifiers=[WorkIdentifier(1)],
                date_updated='2017-01-01T18:00:00.000000',
            )
        ))

        older_graph.process()
        assert older_graph.nodes[0].change == {'description': 'The final description'}
Example #53
0
    def test_normalize_tag(self, input, output, Graph):
        graph = ChangeGraph(Graph(CreativeWork(tags=[input])))
        graph.process(disambiguate=False)

        assert graph.serialize() == Graph(CreativeWork(tags=output))
Example #54
0
 def test_normalize_creativework(self, input, output, Graph):
     graph = ChangeGraph(Graph(CreativeWork(**input)))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(CreativeWork(**output))
Example #55
0
    def test_add_work_with_existing_relation(self, normalized_data_id):
        '''
        Harvest a work that has a relation to some work identified by a DOI.
        The related work should be a CreativeWork with no information except
        the one Identifier.
        Then harvest a work with the same DOI. It should update the
        CreativeWork's type and attributes instead of creating a new work.
        '''

        uri = 'http://osf.io/special-snowflake'

        models.ChangeSet.objects.from_graph(ChangeGraph([{
            '@id': '_:1234',
            '@type': 'preprint',
            'title': 'Dogs are okay',
            'related_works': [{'@id': '_:foo', '@type': 'cites'}]
        }, {
            '@id': '_:foo',
            '@type': 'cites',
            'subject': {'@id': '_:1234', '@type': 'preprint'},
            'related': {'@id': '_:2345', '@type': 'creativework'},
        }, {
            '@id': '_:2345',
            '@type': 'creativework',
            'identifiers': [{'@id': '_:4567', '@type': 'workidentifier'}]
        }, {
            '@id': '_:4567',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:2345', '@type': 'creativework'}
        }]), normalized_data_id).accept()

        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 1
        assert models.Preprint.objects.count() == 1
        assert models.Article.objects.count() == 0

        change = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'article',
            'title': 'All About Cats',
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'article'}
        }])
        change.process()

        models.ChangeSet.objects.from_graph(change, normalized_data_id).accept()

        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0
        assert models.Article.objects.count() == 1
        assert models.Preprint.objects.count() == 1

        cat = models.Article.objects.first()
        dog = models.Preprint.objects.first()

        assert dog.outgoing_creative_work_relations.count() == 1
        assert dog.outgoing_creative_work_relations.first()._meta.model_name == 'cites'
        assert dog.outgoing_creative_work_relations.first().related == cat
        assert cat.incoming_creative_work_relations.count() == 1
        assert cat.incoming_creative_work_relations.first()._meta.model_name == 'cites'
        assert cat.incoming_creative_work_relations.first().subject == dog