Ejemplo n.º 1
0
 def test_insert_claim(self):
     """
     It should be able to create a series of claims
     """
     r = importer.insert_claims([
                 {'bibcode': 'b123456789123456789',
                  'orcidid': '0000-0000-0000-0001',
                  'provenance' : 'ads test'},
                 {'bibcode': 'b123456789123456789',
                  'orcidid': '0000-0000-0000-0001',
                  'status' : 'updated'},
                 importer.create_claim(bibcode='b123456789123456789', 
                                       orcidid='0000-0000-0000-0001', 
                                       status='removed')
             ])
     self.assertEquals(len(r), 3)
     
     self.assertTrue(len(self.app.session.query(ClaimsLog)
                         .filter_by(bibcode='b123456789123456789').all()) == 3)
Ejemplo n.º 2
0
         self.logger.warning('Error processing a record: '
             '{0} ({1})'.format(w,
                                traceback.format_exc()))
         continue
     except TypeError, e:
         self.logger.warning('Error processing a record: '
             '{0} ({1})'.format(w,
                                traceback.format_exc()))
         continue
 
 
 #always insert a record that marks the beginning of a full-import
 #TODO: record orcid's last-modified-date
 to_claim.append(importer.create_claim(bibcode='', 
                                           orcidid=orcidid, 
                                           provenance=self.__class__.__name__, 
                                           status='#full-import',
                                           date=updt
                                           ))
 
 # find difference between what we have and what orcid has
 claims_we_have = set(updated.keys()).difference(set(removed.keys()))
 claims_orcid_has = set(orcid_present.keys())
 
 # those guys will be added (with ORCID date signature)
 for c in claims_orcid_has.difference(claims_we_have):
     claim = orcid_present[c]
     to_claim.append(importer.create_claim(bibcode=claim[0], 
                                           orcidid=orcidid, 
                                           provenance=claim[2], 
                                           status='claimed', 
                                           date=claim[1])
Ejemplo n.º 3
0
    def test_ingester_logic(self, updater_retrieve_metadata):
        """Has to be able to diff orcid profile against the 
        existing log in a database"""
        # self.maxDiff = None
        orcidid = "0000-0003-3041-2092"

        httpretty.register_uri(
            httpretty.GET,
            self.app.config["API_ORCID_EXPORT_PROFILE"] % orcidid,
            content_type="application/json",
            body=open(os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".ads.json")).read(),
        )
        httpretty.register_uri(
            httpretty.GET,
            re.compile(self.app.config["API_ORCID_UPDATES_ENDPOINT"] % ".*"),
            content_type="application/json",
            body=open(
                os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".orcid-updates.json")
            ).read(),
        )
        httpretty.register_uri(
            httpretty.GET,
            re.compile(self.app.config["API_SOLR_QUERY_ENDPOINT"] + ".*"),
            content_type="application/json",
            body=open(os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".solr.json")).read(),
        )

        with mock.patch("ADSOrcid.pipeline.OrcidImporter.OrcidImporter.publish") as m:
            worker = OrcidImporter.OrcidImporter()
            worker.check_orcid_updates()
            worker.publish.assert_called_with(
                {"orcidid": u"0000-0003-3041-2092", "start": "1974-11-09T22:56:52.518002+00:00"},
                topic="ads.orcid.fresh-claims",
            )
            worker.publish.reset_mock()

            worker.process_payload({"orcidid": u"0000-0003-3041-2092", "start": "1974-11-09T22:56:52.518002+00:00"})
            with app.session_scope() as session:
                self.assertEquals(
                    "2015-11-05T11:37:36.381000+00:00",
                    session.query(KeyValue).filter(KeyValue.key == "last.check").first().value,
                )
                recs = []
                for x in session.query(ClaimsLog).all():
                    recs.append(x.toJSON())
                self.assertEqual(
                    recs,
                    [
                        {
                            "status": u"#full-import",
                            "bibcode": u"",
                            "created": "2015-11-05T16:37:33.381000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 1,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015arXiv150304194A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 2,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015AAS...22533655A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 3,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2014arXiv1406.4542H",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 4,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015arXiv150305881C",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"Roman Chyla",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 5,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015ASPC..492..150T",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 6,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015ASPC..492..208G",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 7,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2014AAS...22325503A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 8,
                        },
                    ],
                )
                kv = session.query(KeyValue).filter(KeyValue.key == "last.check").first()
                kv.value = ""
                session.commit()

        # do the same stuff again (it should not bother with new recs)
        with mock.patch("ADSOrcid.pipeline.OrcidImporter.OrcidImporter.publish") as m:
            worker.check_orcid_updates()
            assert worker.publish.call_args[0][0]["start"] != "1974-11-09T22:56:52.518002+00:00"
            worker.publish.reset_mock()

            worker.process_payload({"orcidid": u"0000-0003-3041-2092"})
            with app.session_scope() as session:
                self.assertEquals(len(session.query(ClaimsLog).all()), 8)
                new_value = parser.parse(session.query(KeyValue).filter(KeyValue.key == "last.check").first().value)
                self.assertEquals(
                    "2015-11-05T11:37:36.381000+00:00",
                    session.query(KeyValue).filter(KeyValue.key == "last.check").first().value,
                )

                # now change the date of the #full-import (this will force the logic to re-evaluate the batch against the
                # existing claims)
                c = session.query(ClaimsLog).filter(ClaimsLog.status == "#full-import").first()
                c.created = c.created + datetime.timedelta(microseconds=1000)

            worker.process_payload({"orcidid": u"0000-0003-3041-2092"})

            with app.session_scope() as session:
                recs = []
                for x in session.query(ClaimsLog).all():
                    recs.append(x.toJSON())
                self.assertEqual(
                    recs,
                    [
                        {
                            "status": u"#full-import",
                            "bibcode": u"",
                            "created": "2015-11-05T16:37:33.382000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 1,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015arXiv150304194A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 2,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015AAS...22533655A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 3,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2014arXiv1406.4542H",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 4,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015arXiv150305881C",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"Roman Chyla",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 5,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015ASPC..492..150T",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 6,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015ASPC..492..208G",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 7,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2014AAS...22325503A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 8,
                        },
                        {
                            "status": u"#full-import",
                            "bibcode": u"",
                            "created": "2015-11-05T16:37:33.381000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 9,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2015arXiv150304194A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 10,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2015AAS...22533655A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 11,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2014arXiv1406.4542H",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 12,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2015arXiv150305881C",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 13,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2015ASPC..492..150T",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 14,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2015ASPC..492..208G",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 15,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2014AAS...22325503A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 16,
                        },
                    ],
                )

            # now let's pretend that we have one extra claim and there was one deletion
            with app.session_scope() as session:
                session.query(ClaimsLog).filter(ClaimsLog.id > 8).delete()  # clean up
                session.query(ClaimsLog).filter_by(id=5).delete()
                importer.insert_claims(
                    [
                        importer.create_claim(
                            bibcode="2014AAS...22325503A",
                            orcidid=orcidid,
                            status="removed",
                            date="2015-11-05 11:37:33.381000+00:00",
                        )
                    ]
                )

            worker.process_payload({"orcidid": u"0000-0003-3041-2092"})

        with app.session_scope() as session:
            recs = []
            for x in session.query(ClaimsLog).all():
                recs.append(x.toJSON())
            self.assertEqual(
                recs,
                [
                    {
                        "status": u"#full-import",
                        "bibcode": u"",
                        "created": "2015-11-05T16:37:33.382000+00:00",
                        "provenance": u"OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 1,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2015arXiv150304194A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 2,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2015AAS...22533655A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 3,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2014arXiv1406.4542H",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 4,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2015ASPC..492..150T",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 6,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2015ASPC..492..208G",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 7,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2014AAS...22325503A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 8,
                    },
                    {
                        "status": u"removed",
                        "bibcode": u"2014AAS...22325503A",
                        "created": "2015-11-05T11:37:33.381000+00:00",
                        "provenance": u"None",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 9,
                    },
                    {
                        "status": u"#full-import",
                        "bibcode": u"",
                        "created": "2015-11-05T16:37:33.381000+00:00",
                        "provenance": u"OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 10,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2015arXiv150305881C",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "Roman Chyla",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 11,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2014AAS...22325503A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 12,
                    },
                    {
                        "status": u"unchanged",
                        "bibcode": u"2014arXiv1406.4542H",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 13,
                    },
                    {
                        "status": u"unchanged",
                        "bibcode": u"2015ASPC..492..150T",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 14,
                    },
                    {
                        "status": u"unchanged",
                        "bibcode": u"2015ASPC..492..208G",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 15,
                    },
                    {
                        "status": u"unchanged",
                        "bibcode": u"2015arXiv150304194A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 16,
                    },
                    {
                        "status": u"unchanged",
                        "bibcode": u"2015AAS...22533655A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 17,
                    },
                ],
            )
Ejemplo n.º 4
0
                        orcid_present[bibc.lower().strip()] = (bibc.strip(), get_date(ts.isoformat()), provenance)
                    else:
                        self.logger.warning("Found no bibcode for {0}".format(ids))

                except KeyError, e:
                    self.logger.warning("Error processing a record: " "{0} ({1})".format(w, traceback.format_exc()))
                    continue
                except TypeError, e:
                    self.logger.warning("Error processing a record: " "{0} ({1})".format(w, traceback.format_exc()))
                    continue

            # always insert a record that marks the beginning of a full-import
            # TODO: record orcid's last-modified-date
            to_claim.append(
                importer.create_claim(
                    bibcode="", orcidid=orcidid, provenance=self.__class__.__name__, status="#full-import", date=updt
                )
            )

            # find difference between what we have and what orcid has
            claims_we_have = set(updated.keys()).difference(set(removed.keys()))
            claims_orcid_has = set(orcid_present.keys())

            # those guys will be added (with ORCID date signature)
            for c in claims_orcid_has.difference(claims_we_have):
                claim = orcid_present[c]
                to_claim.append(
                    importer.create_claim(
                        bibcode=claim[0], orcidid=orcidid, provenance=claim[2], status="claimed", date=claim[1]
                    )
                )