def test_all_authors_get_viaf_lookup(self): # TODO: The code this calls could be refactored quite a bit -- # we don't really need to test all of process_item() here. # But ATM it does seem to be our only test of process_item(). oclc = MockOCLCLinkedDataAPI() viaf = MockVIAFClient() provider = LinkedDataCoverageProvider( self._db, api=oclc, viaf_api=viaf ) # Here's a placeholder that will be filled in with information from # OCLC Linked Data. edition = self._edition() for i in edition.contributions: self._db.delete(i) self._db.commit() identifier = edition.primary_identifier # OCLC Linked Data is going to mention two authors -- one with # a sort name + VIAF, and one with a VIAF but no sort name. contributor1 = ContributorData(viaf="1") contributor2 = ContributorData(viaf="2", sort_name="Jordan, Robert") idata = IdentifierData(type=identifier.type, identifier=identifier.identifier) metadata = Metadata( DataSource.OCLC_LINKED_DATA, contributors=[contributor1, contributor2], primary_identifier=idata, title=u"foo" ) oclc.queue_info_for(metadata) # Our OCLC Linked Data client is going to try to fill in the # data, asking VIAF about the contributors. lookup1 = (ContributorData( viaf="1", display_name="Display Name", family_name="Family", sort_name="Name, Sort", wikipedia_name="Wikipedia_Name"), None, None) lookup2 = (ContributorData( viaf="2", wikipedia_name="Robert_Jordan_(Author)", biography="That guy."), None, None) viaf.queue_lookup(lookup1, lookup2) provider.process_item(identifier) # Both authors have had their information updated with the # VIAF results. filled_in = sorted( [(x.sort_name, x.display_name, x.viaf, x.wikipedia_name, x.biography) for x in edition.contributors] ) eq_( [(u'Jordan, Robert', None, u'2', u'Robert_Jordan_(Author)', u'That guy.'), (u'Name, Sort', u'Display Name', u'1', u'Wikipedia_Name', None)], filled_in )
class RedoOCLC(Explain): def __init__(self): self.coverage = LinkedDataCoverageProvider(self._db) @property def oclcld(self): return DataSource.lookup(self._db, DataSource.OCLC_LINKED_DATA) def run(self): id_type, identifier = sys.argv[1:] identifier, ignore = Identifier.for_foreign_id( self._db, id_type, identifier ) self.fix_identifier(identifier) def fix_identifier(self, primary_identifier): equivalent_ids = primary_identifier.equivalent_identifier_ids( levels=6, threshold=0) return self.fix_identifier_with_equivalents(primary_identifier, equivalent_ids) def fix_identifier_with_equivalents(self, primary_identifier, equivalent_ids): for edition in primary_identifier.primarily_identifies: print "BEFORE" self.explain(self._db, edition) print "-" * 80 t1 = self._db.begin_nested() equivalencies = self._db.query(Equivalency).filter( Equivalency.data_source == self.oclcld).filter( Equivalency.input_id.in_(equivalent_ids) ) print "DELETING %d" % equivalencies.count() for e in equivalencies: if e.strength == 0: print "DELETING %r" % e self._db.delete(e) t1.commit() self.coverage.process_item(primary_identifier) equivalent_ids = primary_identifier.equivalent_identifier_ids( levels=6, threshold=0) equivalencies = self._db.query(Equivalency).filter( Equivalency.data_source == self.oclcld).filter( Equivalency.input_id.in_(equivalent_ids), ) for edition in primary_identifier.primarily_identifies: if edition.work: edition.work.calculate_presentation() self.explain(self._db, edition) print "I WOULD NOW EXPECT EVERYTHING TO BE FINE."
class RedoOCLC(Explain): def __init__(self): self.coverage = LinkedDataCoverageProvider(self._db) @property def oclcld(self): return DataSource.lookup(self._db, DataSource.OCLC_LINKED_DATA) def run(self): id_type, identifier = sys.argv[1:] identifier, ignore = Identifier.for_foreign_id(self._db, id_type, identifier) self.fix_identifier(identifier) def fix_identifier(self, primary_identifier): equivalent_ids = primary_identifier.equivalent_identifier_ids( levels=6, threshold=0) return self.fix_identifier_with_equivalents(primary_identifier, equivalent_ids) def fix_identifier_with_equivalents(self, primary_identifier, equivalent_ids): for edition in primary_identifier.primarily_identifies: print "BEFORE" self.explain(self._db, edition) print "-" * 80 t1 = self._db.begin_nested() equivalencies = self._db.query(Equivalency).filter( Equivalency.data_source == self.oclcld).filter( Equivalency.input_id.in_(equivalent_ids)) print "DELETING %d" % equivalencies.count() for e in equivalencies: if e.strength == 0: print "DELETING %r" % e self._db.delete(e) t1.commit() self.coverage.process_item(primary_identifier) for edition in primary_identifier.primarily_identifies: if edition.work: edition.work.calculate_presentation() self.explain(self._db, edition) print "I WOULD NOW EXPECT EVERYTHING TO BE FINE."
def test_process_item_exception(self): class DoomedOCLCLinkedData(OCLCLinkedData): def info_for(self, identifier): raise IOError("Exception!") provider = LinkedDataCoverageProvider(self._db, api=DoomedOCLCLinkedData(self._db)) edition = self._edition() identifier = edition.primary_identifier result = provider.process_item(identifier) assert isinstance(result, CoverageFailure) assert "Exception!" in result.exception
def test_process_item_exception_missing_isbn(self): class DoomedOCLCLinkedData(OCLCLinkedData): def info_for(self, identifier): raise IOError("Tried, but couldn't find location") provider = LinkedDataCoverageProvider( self._db, api=DoomedOCLCLinkedData(self._db) ) edition = self._edition() identifier = edition.primary_identifier result = provider.process_item(identifier) assert isinstance(result, CoverageFailure) assert "OCLC doesn't know about this ISBN" in result.exception
def test_process_item_exception_missing_isbn(self): class DoomedOCLCLinkedData(OCLCLinkedData): def info_for(self, identifier): raise IOError("Tried, but couldn't find location") provider = LinkedDataCoverageProvider(self._db, api=DoomedOCLCLinkedData( self._db)) edition = self._edition() identifier = edition.primary_identifier result = provider.process_item(identifier) assert isinstance(result, CoverageFailure) assert "OCLC doesn't know about this ISBN" in result.exception
def test_process_item_exception(self): class DoomedOCLCLinkedData(OCLCLinkedData): def info_for(self, identifier): raise IOError("Exception!") provider = LinkedDataCoverageProvider(self._db, api=DoomedOCLCLinkedData( self._db)) edition = self._edition() identifier = edition.primary_identifier result = provider.process_item(identifier) assert isinstance(result, CoverageFailure) assert "Exception!" in result.exception
def test_viaf_authors_get_viaf_lookup(self): # TODO: The code this calls could be refactored quite a bit -- # we don't really need to test all of process_item() here. # But ATM it does seem to be our only test of process_item(). oclc = MockOCLCLinkedDataAPI() viaf = MockVIAFClient() provider = LinkedDataCoverageProvider(self._db, api=oclc, viaf_api=viaf) # Here's a placeholder that will be filled in with information from # OCLC Linked Data. edition = self._edition() for i in edition.contributions: self._db.delete(i) self._db.commit() identifier = edition.primary_identifier # OCLC Linked Data is going to mention two authors -- one with # a sort name + VIAF, and one with a VIAF but no sort name. contributor1 = ContributorData(viaf="1") contributor2 = ContributorData(viaf="2", sort_name="Jordan, Robert") contributor3 = ContributorData(sort_name="Rice, Anne", display_name="Anne Rice") idata = IdentifierData(type=identifier.type, identifier=identifier.identifier) metadata = Metadata( DataSource.OCLC_LINKED_DATA, contributors=[contributor1, contributor2, contributor3], primary_identifier=idata, title=u"foo") oclc.queue_info_for(metadata) # Our OCLC Linked Data client is going to try to fill in the # data, asking VIAF about the contributors that have VIAF data, # and not those who do not. lookup1 = (ContributorData(viaf="1", display_name="Display Name", family_name="Family", sort_name="Name, Sort", wikipedia_name="Wikipedia_Name"), None, None) lookup2 = (ContributorData(viaf="2", wikipedia_name="Robert_Jordan_(Author)", biography="That guy."), None, None) viaf.queue_lookup(lookup1, lookup2, "Unrequested lookup") provider.process_item(identifier) # Both VIAF-identified authors have had their information updated # with the VIAF results. filled_in = sorted([(x.sort_name, x.display_name, x.viaf, x.wikipedia_name, x.biography) for x in edition.contributors]) eq_([(u'Jordan, Robert', None, u'2', u'Robert_Jordan_(Author)', u'That guy.'), (u'Name, Sort', u'Display Name', u'1', u'Wikipedia_Name', None), (u'Rice, Anne', u'Anne Rice', None, None, None)], filled_in) # The author without VIAF data didn't request a VIAF lookup. # Instead, that result is still in the mock VIAF queue. eq_(viaf.results, ["Unrequested lookup"])