def __init__(self, test_session=None): # Allows tests to run without db session overlap. if test_session: self._session = test_session self.coverage = LinkedDataCoverageProvider(self._db) self.oclc_classify = OCLCClassifyCoverageProvider(self._db) self.viaf = VIAFClient(self._db)
def test_process_item_exception_missing_isbn(self): class DoomedOCLCLinkedData(OCLCLinkedData): def info_for(self, identifier): raise IOError("Tried, but couldn't find location") provider = LinkedDataCoverageProvider(self._db, api=DoomedOCLCLinkedData( self._db)) edition = self._edition() identifier = edition.primary_identifier result = provider.process_item(identifier) assert isinstance(result, CoverageFailure) assert "OCLC doesn't know about this ISBN" in result.exception
def test_process_item_exception(self): class DoomedOCLCLinkedData(OCLCLinkedData): def info_for(self, identifier): raise IOError("Exception!") provider = LinkedDataCoverageProvider(self._db, api=DoomedOCLCLinkedData( self._db)) edition = self._edition() identifier = edition.primary_identifier result = provider.process_item(identifier) assert isinstance(result, CoverageFailure) assert "Exception!" in result.exception
def setup(self): super(TestIdentifierResolutionCoverageProvider, self).setup() self.identifier = self._identifier(Identifier.OVERDRIVE_ID) self._default_collection.catalog_identifier(self._db, self.identifier) self.source = DataSource.license_source_for(self._db, self.identifier) # Create mocks for the different collections and APIs used by # IdentifierResolutionCoverageProvider. overdrive_collection = MockOverdriveAPI.mock_collection(self._db) overdrive_collection.name = (IdentifierResolutionCoverageProvider. DEFAULT_OVERDRIVE_COLLECTION_NAME) self.viaf = MockVIAFClient(self._db) self.linked_data_client = MockOCLCLinkedData(self._db) self.linked_data_coverage_provider = LinkedDataCoverageProvider( self._db, None, self.viaf, api=self.linked_data_client) self.uploader = DummyS3Uploader() # Make the constructor arguments available in case a test # needs to create a different type of resolver. self.provider_kwargs = dict( uploader=self.uploader, viaf_client=self.viaf, overdrive_api_class=MockOverdriveAPI, linked_data_coverage_provider=self.linked_data_coverage_provider, ) # But most tests will use this resolver. self.resolver = MockIdentifierResolutionCoverageProvider( self._default_collection, **self.provider_kwargs) # Create some useful CoverageProviders that can be inserted # into self.resolver.required_coverage_providers # and self.resolver.optional_coverage_providers self.always_successful = AlwaysSuccessfulCoverageProvider(self._db) self.never_successful = NeverSuccessfulCoverageProvider(self._db) self.broken = BrokenCoverageProvider(self._db)
def __init__(self, collection, uploader=None, viaf_client=None, linked_data_coverage_provider=None, content_cafe_api=None, overdrive_api_class=OverdriveAPI, **kwargs): super(IdentifierResolutionCoverageProvider, self).__init__(collection, **kwargs) # Since we are the metadata wrangler, any resources we find, # we mirror to S3. if not uploader: uploader = S3Uploader.from_config(self._db) self.uploader = uploader # We're going to be aggressive about recalculating the presentation # for this work because either the work is currently not set up # at all, or something went wrong trying to set it up. self.policy = PresentationCalculationPolicy( regenerate_opds_entries=True) self.overdrive_api = self.create_overdrive_api(overdrive_api_class) self.content_cafe_api = content_cafe_api # Determine the optional and required coverage providers. # Each Identifier in this Collection's catalog will be run # through all relevant providers. self.required_coverage_providers, self.optional_coverage_providers = self.providers( ) # When we need to look up a contributor via VIAF we will use this # client. self.viaf_client = viaf_client or VIAFClient(self._db) # Books are not looked up in OCLC Linked Data directly, since # there is no Collection that identifies a book by its OCLC Number. # However, when a book is looked up through OCLC Classify, some # OCLC Numbers may be associated with it, and _those_ numbers # can be run through OCLC Linked Data. # # TODO: We get many books identified by ISBN, and those books # _could_ be run through a LinkedDataCoverageProvider if it # worked a little differently. However, I don't think this # would be very useful, since those books will get looked up # through OCLC Classify, which will probably result in us # finding that same ISBN via OCLC Number. self.oclc_linked_data = (linked_data_coverage_provider or LinkedDataCoverageProvider( self._db, viaf_api=self.viaf_client)) # The ordinary OverdriveBibliographicCoverageProvider # doesn't upload images, so we need to create our own # mirror and scaler. # # TODO: This class would be neater if we were to subclass # OverdriveBibliographicCoverageProvider to do the scaling and # uploading. self.image_mirrors = { DataSource.OVERDRIVE: OverdriveCoverImageMirror(self._db, uploader=uploader) } self.image_scaler = ImageScaler(self._db, self.image_mirrors.values(), uploader=uploader)
def test_viaf_authors_get_viaf_lookup(self): # TODO: The code this calls could be refactored quite a bit -- # we don't really need to test all of process_item() here. # But ATM it does seem to be our only test of process_item(). oclc = MockOCLCLinkedDataAPI() viaf = MockVIAFClient() provider = LinkedDataCoverageProvider(self._db, api=oclc, viaf_api=viaf) # Here's a placeholder that will be filled in with information from # OCLC Linked Data. edition = self._edition() for i in edition.contributions: self._db.delete(i) self._db.commit() identifier = edition.primary_identifier # OCLC Linked Data is going to mention two authors -- one with # a sort name + VIAF, and one with a VIAF but no sort name. contributor1 = ContributorData(viaf="1") contributor2 = ContributorData(viaf="2", sort_name="Jordan, Robert") contributor3 = ContributorData(sort_name="Rice, Anne", display_name="Anne Rice") idata = IdentifierData(type=identifier.type, identifier=identifier.identifier) metadata = Metadata( DataSource.OCLC_LINKED_DATA, contributors=[contributor1, contributor2, contributor3], primary_identifier=idata, title=u"foo") oclc.queue_info_for(metadata) # Our OCLC Linked Data client is going to try to fill in the # data, asking VIAF about the contributors that have VIAF data, # and not those who do not. lookup1 = (ContributorData(viaf="1", display_name="Display Name", family_name="Family", sort_name="Name, Sort", wikipedia_name="Wikipedia_Name"), None, None) lookup2 = (ContributorData(viaf="2", wikipedia_name="Robert_Jordan_(Author)", biography="That guy."), None, None) viaf.queue_lookup(lookup1, lookup2, "Unrequested lookup") provider.process_item(identifier) # Both VIAF-identified authors have had their information updated # with the VIAF results. filled_in = sorted([(x.sort_name, x.display_name, x.viaf, x.wikipedia_name, x.biography) for x in edition.contributors]) eq_([(u'Jordan, Robert', None, u'2', u'Robert_Jordan_(Author)', u'That guy.'), (u'Name, Sort', u'Display Name', u'1', u'Wikipedia_Name', None), (u'Rice, Anne', u'Anne Rice', None, None, None)], filled_in) # The author without VIAF data didn't request a VIAF lookup. # Instead, that result is still in the mock VIAF queue. eq_(viaf.results, ["Unrequested lookup"])
def setup(self): super(TestLinkedDataCoverageProvider, self).setup() self.provider = LinkedDataCoverageProvider(self._db)
def __init__(self): self.coverage = LinkedDataCoverageProvider(self._db)