Example #1
0
    def test_all_authors_get_viaf_lookup(self):
        # TODO: The code this calls could be refactored quite a bit --
        # we don't really need to test all of process_item() here.
        # But ATM it does seem to be our only test of process_item().

        oclc = MockOCLCLinkedDataAPI()
        viaf = MockVIAFClient()
        provider = LinkedDataCoverageProvider(
            self._db, api=oclc, viaf_api=viaf
        )

        # Here's a placeholder that will be filled in with information from
        # OCLC Linked Data.
        edition = self._edition()
        for i in edition.contributions:
            self._db.delete(i)
        self._db.commit()
        identifier = edition.primary_identifier

        # OCLC Linked Data is going to mention two authors -- one with
        # a sort name + VIAF, and one with a VIAF but no sort name.
        contributor1 = ContributorData(viaf="1")
        contributor2 = ContributorData(viaf="2", sort_name="Jordan, Robert")
        idata = IdentifierData(type=identifier.type, 
                               identifier=identifier.identifier)
        metadata = Metadata(
            DataSource.OCLC_LINKED_DATA,
            contributors=[contributor1, contributor2],
            primary_identifier=idata,
            title=u"foo"
        )
        oclc.queue_info_for(metadata)

        # Our OCLC Linked Data client is going to try to fill in the
        # data, asking VIAF about the contributors.
        lookup1 = (ContributorData(
                  viaf="1", display_name="Display Name",
                  family_name="Family", sort_name="Name, Sort",
                  wikipedia_name="Wikipedia_Name"), None, None)
        lookup2 = (ContributorData(
                   viaf="2", wikipedia_name="Robert_Jordan_(Author)",
                   biography="That guy."), None, None)
        viaf.queue_lookup(lookup1, lookup2)

        provider.process_item(identifier)

        # Both authors have had their information updated with the
        # VIAF results.
        filled_in = sorted(
            [(x.sort_name, x.display_name, x.viaf, x.wikipedia_name, x.biography)
             for x in edition.contributors]
        )
        eq_(
            [(u'Jordan, Robert', None, u'2', u'Robert_Jordan_(Author)', u'That guy.'),
            (u'Name, Sort', u'Display Name', u'1', u'Wikipedia_Name', None)],
            filled_in
        )
Example #2
0
class RedoOCLC(Explain):

    def __init__(self):
        self.coverage = LinkedDataCoverageProvider(self._db)

    @property
    def oclcld(self):
        return DataSource.lookup(self._db, DataSource.OCLC_LINKED_DATA)

    def run(self):
        id_type, identifier = sys.argv[1:]
        identifier, ignore = Identifier.for_foreign_id(
            self._db, id_type, identifier
        )
        self.fix_identifier(identifier)

    def fix_identifier(self, primary_identifier):
        equivalent_ids = primary_identifier.equivalent_identifier_ids(
            levels=6, threshold=0)
        return self.fix_identifier_with_equivalents(primary_identifier, equivalent_ids)

    def fix_identifier_with_equivalents(self, primary_identifier, equivalent_ids):
        for edition in primary_identifier.primarily_identifies:
            print "BEFORE"
            self.explain(self._db, edition)
            print "-" * 80

        t1 = self._db.begin_nested()

        equivalencies = self._db.query(Equivalency).filter(
            Equivalency.data_source == self.oclcld).filter(
                Equivalency.input_id.in_(equivalent_ids)
            )
        print "DELETING %d" % equivalencies.count()
        for e in equivalencies:
            if e.strength == 0:
                print "DELETING %r" % e
            self._db.delete(e)
        t1.commit()

        self.coverage.process_item(primary_identifier)

        equivalent_ids = primary_identifier.equivalent_identifier_ids(
            levels=6, threshold=0)
        equivalencies = self._db.query(Equivalency).filter(
            Equivalency.data_source == self.oclcld).filter(
                Equivalency.input_id.in_(equivalent_ids),
            )

        for edition in primary_identifier.primarily_identifies:
            if edition.work:
                edition.work.calculate_presentation()
            self.explain(self._db, edition)
        print "I WOULD NOW EXPECT EVERYTHING TO BE FINE."
Example #3
0
class RedoOCLC(Explain):
    def __init__(self):
        self.coverage = LinkedDataCoverageProvider(self._db)

    @property
    def oclcld(self):
        return DataSource.lookup(self._db, DataSource.OCLC_LINKED_DATA)

    def run(self):
        id_type, identifier = sys.argv[1:]
        identifier, ignore = Identifier.for_foreign_id(self._db, id_type,
                                                       identifier)
        self.fix_identifier(identifier)

    def fix_identifier(self, primary_identifier):
        equivalent_ids = primary_identifier.equivalent_identifier_ids(
            levels=6, threshold=0)
        return self.fix_identifier_with_equivalents(primary_identifier,
                                                    equivalent_ids)

    def fix_identifier_with_equivalents(self, primary_identifier,
                                        equivalent_ids):
        for edition in primary_identifier.primarily_identifies:
            print "BEFORE"
            self.explain(self._db, edition)
            print "-" * 80

        t1 = self._db.begin_nested()

        equivalencies = self._db.query(Equivalency).filter(
            Equivalency.data_source == self.oclcld).filter(
                Equivalency.input_id.in_(equivalent_ids))
        print "DELETING %d" % equivalencies.count()
        for e in equivalencies:
            if e.strength == 0:
                print "DELETING %r" % e
            self._db.delete(e)
        t1.commit()

        self.coverage.process_item(primary_identifier)

        for edition in primary_identifier.primarily_identifies:
            if edition.work:
                edition.work.calculate_presentation()
            self.explain(self._db, edition)
        print "I WOULD NOW EXPECT EVERYTHING TO BE FINE."
Example #4
0
    def test_process_item_exception(self):
        class DoomedOCLCLinkedData(OCLCLinkedData):
            def info_for(self, identifier):
                raise IOError("Exception!")

        provider = LinkedDataCoverageProvider(self._db, api=DoomedOCLCLinkedData(self._db))
        
        edition = self._edition()
        identifier = edition.primary_identifier

        result = provider.process_item(identifier)
        assert isinstance(result, CoverageFailure)
        assert "Exception!" in result.exception
Example #5
0
    def test_process_item_exception_missing_isbn(self):
        class DoomedOCLCLinkedData(OCLCLinkedData):
            def info_for(self, identifier):
                raise IOError("Tried, but couldn't find location")

        provider = LinkedDataCoverageProvider(
            self._db, api=DoomedOCLCLinkedData(self._db)
        )
        
        edition = self._edition()
        identifier = edition.primary_identifier

        result = provider.process_item(identifier)
        assert isinstance(result, CoverageFailure)
        assert "OCLC doesn't know about this ISBN" in result.exception
Example #6
0
    def test_process_item_exception_missing_isbn(self):
        class DoomedOCLCLinkedData(OCLCLinkedData):
            def info_for(self, identifier):
                raise IOError("Tried, but couldn't find location")

        provider = LinkedDataCoverageProvider(self._db,
                                              api=DoomedOCLCLinkedData(
                                                  self._db))

        edition = self._edition()
        identifier = edition.primary_identifier

        result = provider.process_item(identifier)
        assert isinstance(result, CoverageFailure)
        assert "OCLC doesn't know about this ISBN" in result.exception
Example #7
0
    def test_process_item_exception(self):
        class DoomedOCLCLinkedData(OCLCLinkedData):
            def info_for(self, identifier):
                raise IOError("Exception!")

        provider = LinkedDataCoverageProvider(self._db,
                                              api=DoomedOCLCLinkedData(
                                                  self._db))

        edition = self._edition()
        identifier = edition.primary_identifier

        result = provider.process_item(identifier)
        assert isinstance(result, CoverageFailure)
        assert "Exception!" in result.exception
Example #8
0
    def test_viaf_authors_get_viaf_lookup(self):
        # TODO: The code this calls could be refactored quite a bit --
        # we don't really need to test all of process_item() here.
        # But ATM it does seem to be our only test of process_item().

        oclc = MockOCLCLinkedDataAPI()
        viaf = MockVIAFClient()
        provider = LinkedDataCoverageProvider(self._db,
                                              api=oclc,
                                              viaf_api=viaf)

        # Here's a placeholder that will be filled in with information from
        # OCLC Linked Data.
        edition = self._edition()
        for i in edition.contributions:
            self._db.delete(i)
        self._db.commit()
        identifier = edition.primary_identifier

        # OCLC Linked Data is going to mention two authors -- one with
        # a sort name + VIAF, and one with a VIAF but no sort name.
        contributor1 = ContributorData(viaf="1")
        contributor2 = ContributorData(viaf="2", sort_name="Jordan, Robert")
        contributor3 = ContributorData(sort_name="Rice, Anne",
                                       display_name="Anne Rice")
        idata = IdentifierData(type=identifier.type,
                               identifier=identifier.identifier)
        metadata = Metadata(
            DataSource.OCLC_LINKED_DATA,
            contributors=[contributor1, contributor2, contributor3],
            primary_identifier=idata,
            title=u"foo")
        oclc.queue_info_for(metadata)

        # Our OCLC Linked Data client is going to try to fill in the
        # data, asking VIAF about the contributors that have VIAF data,
        # and not those who do not.
        lookup1 = (ContributorData(viaf="1",
                                   display_name="Display Name",
                                   family_name="Family",
                                   sort_name="Name, Sort",
                                   wikipedia_name="Wikipedia_Name"), None,
                   None)
        lookup2 = (ContributorData(viaf="2",
                                   wikipedia_name="Robert_Jordan_(Author)",
                                   biography="That guy."), None, None)
        viaf.queue_lookup(lookup1, lookup2, "Unrequested lookup")

        provider.process_item(identifier)

        # Both VIAF-identified authors have had their information updated
        # with the VIAF results.
        filled_in = sorted([(x.sort_name, x.display_name, x.viaf,
                             x.wikipedia_name, x.biography)
                            for x in edition.contributors])
        eq_([(u'Jordan, Robert', None, u'2', u'Robert_Jordan_(Author)',
              u'That guy.'),
             (u'Name, Sort', u'Display Name', u'1', u'Wikipedia_Name', None),
             (u'Rice, Anne', u'Anne Rice', None, None, None)], filled_in)
        # The author without VIAF data didn't request a VIAF lookup.
        # Instead, that result is still in the mock VIAF queue.
        eq_(viaf.results, ["Unrequested lookup"])