def test_replacement_policy_uses_provided_mirror(self): collection = MockOverdriveAPI.mock_collection(self._db) mirror = MockS3Uploader() replacement_policy = ReplacementPolicy.from_metadata_source( mirror=mirror ) api = MockOverdriveAPI(self._db, collection) api.queue_collection_token() provider = OverdriveBibliographicCoverageProvider( collection, replacement_policy=replacement_policy, api_class=api ) # Any resources discovered by Overdrive will be # sent through this mirror. eq_(mirror, provider.replacement_policy.mirror) http = DummyHTTPClient() provider.replacement_policy.http_get = http.do_get # Now let's try looking up a specific identifier through 'Overdrive'. identifier = self._identifier( Identifier.OVERDRIVE_ID, "3896665d-9d81-4cac-bd43-ffc5066de1f5" ) body = self.data_file("overdrive/overdrive_metadata.json") provider.api.queue_response(200, {}, body) test_cover = self.data_file("covers/test-book-cover.png") test_small_cover = self.data_file("covers/tiny-image-cover.png") # Overdrive's full-sized image -- we will be creating our own # thumbnail from this. http.queue_response(200, "image/jpeg", {}, test_cover) # Overdrive's thumbnail image -- we will not be using this http.queue_response(200, "image/jpeg", {}, test_small_cover) record = provider.ensure_coverage(identifier) eq_("success", record.status) # The full image and the thumbnail have been uploaded to # the fake S3. full, thumbnail = mirror.uploaded eq_(test_cover, full.content) # The URLs for the Resource objects are our S3 URLs, not Overdrive's # URLs. expect = "Overdrive/Overdrive+ID/%s" % identifier.identifier for url in [full.mirror_url, thumbnail.mirror_url]: assert expect in url assert "/scaled/" in thumbnail.mirror_url assert "/scaled/" not in full.mirror_url # The thumbnail is a newly created image that is not the # same as the full image or the test cover. assert thumbnail.content != test_small_cover assert thumbnail.content != test_cover
def setup(self): super(TestIntegrationClientCoverImageCoverageProvider, self).setup() mirror = MockS3Uploader() replacement_policy = ReplacementPolicy.from_metadata_source( mirror=mirror) self.collection = self._collection( protocol=ExternalIntegration.OPDS_FOR_DISTRIBUTORS) self.provider = IntegrationClientCoverImageCoverageProvider( replacement_policy=replacement_policy, collection=self.collection)
def test_replacement_policy_uses_provided_mirror(self): collection = MockOverdriveAPI.mock_collection(self._db) mirror = MockS3Uploader() replacement_policy = ReplacementPolicy.from_metadata_source( mirror=mirror) api = MockOverdriveAPI(self._db, collection) api.queue_collection_token() provider = OverdriveBibliographicCoverageProvider( collection, replacement_policy=replacement_policy, api_class=api) # Any resources discovered by Overdrive will be # sent through this mirror. eq_(mirror, provider.replacement_policy.mirror) http = DummyHTTPClient() provider.replacement_policy.http_get = http.do_get # Now let's try looking up a specific identifier through 'Overdrive'. identifier = self._identifier(Identifier.OVERDRIVE_ID, "3896665d-9d81-4cac-bd43-ffc5066de1f5") body = self.data_file("overdrive/overdrive_metadata.json") provider.api.queue_response(200, {}, body) test_cover = self.data_file("covers/test-book-cover.png") test_small_cover = self.data_file("covers/tiny-image-cover.png") # Overdrive's full-sized image -- we will be creating our own # thumbnail from this. http.queue_response(200, "image/jpeg", {}, test_cover) # Overdrive's thumbnail image -- we will not be using this http.queue_response(200, "image/jpeg", {}, test_small_cover) record = provider.ensure_coverage(identifier) eq_("success", record.status) # The full image and the thumbnail have been uploaded to # the fake S3. full, thumbnail = mirror.uploaded eq_(test_cover, full.content) # The URLs for the Resource objects are our S3 URLs, not Overdrive's # URLs. expect = "Overdrive/Overdrive+ID/%s" % identifier.identifier for url in [full.mirror_url, thumbnail.mirror_url]: assert expect in url assert "/scaled/" in thumbnail.mirror_url assert "/scaled/" not in full.mirror_url # The thumbnail is a newly created image that is not the # same as the full image or the test cover. assert thumbnail.content != test_small_cover assert thumbnail.content != test_cover
def setup(self): super(TestIntegrationClientCoverImageCoverageProvider, self).setup() mirror = MockS3Uploader() replacement_policy = ReplacementPolicy.from_metadata_source( mirror=mirror ) self.collection = self._collection( protocol=ExternalIntegration.OPDS_FOR_DISTRIBUTORS ) self.provider = IntegrationClientCoverImageCoverageProvider( replacement_policy=replacement_policy, collection=self.collection )
class MetadataWranglerBibliographicCoverageProvider( BibliographicCoverageProvider): def _default_replacement_policy(self, _db): """In general, data used by the metadata wrangler is a reliable source of metadata but not of licensing information. We always provide the MirrorUploader in case a data source has cover images available. """ try: mirror = MirrorUploader.sitewide(_db) except CannotLoadConfiguration, e: # It's not a problem if there's no MirrorUploader # configured -- it just means we can't mirror cover images # when they show up. mirror = None return ReplacementPolicy.from_metadata_source(mirror=mirror)
def __init__(self, _db, batch_size=10, cutoff_time=None, uploader=None, providers=None, **kwargs): output_source, made_new = get_one_or_create( _db, DataSource, name=DataSource.INTERNAL_PROCESSING ) # Other components don't have INTERNAL_PROCESSING as offering # licenses, but we do, because we're responsible for managing # LicensePools. output_source.offers_licenses=True input_identifier_types = [Identifier.OVERDRIVE_ID, Identifier.ISBN] super(IdentifierResolutionCoverageProvider, self).__init__( service_name="Identifier Resolution Coverage Provider", input_identifier_types=input_identifier_types, output_source=output_source, batch_size=batch_size, operation=CoverageRecord.RESOLVE_IDENTIFIER_OPERATION, ) # Since we are the metadata wrangler, any resources we find, # we mirror to S3. mirror = uploader or S3Uploader() # We're going to be aggressive about recalculating the presentation # for this work because either the work is currently not set up # at all, or something went wrong trying to set it up. presentation_calculation_policy = PresentationCalculationPolicy( regenerate_opds_entries=True, update_search_index=True ) policy = ReplacementPolicy.from_metadata_source( mirror=mirror, even_if_not_apparently_updated=True, presentation_calculation_policy=presentation_calculation_policy ) if providers: # For testing purposes. Initializing the real coverage providers # during tests can cause requests to third-parties. (self.required_coverage_providers, self.optional_coverage_providers) = providers else: overdrive = OverdriveBibliographicCoverageProvider( _db, metadata_replacement_policy=policy ) content_cafe = ContentCafeCoverageProvider(self._db) content_server = ContentServerCoverageProvider(self._db) oclc_classify = OCLCClassifyCoverageProvider(self._db) self.required_coverage_providers = [ overdrive, content_cafe, content_server, oclc_classify ] self.optional_coverage_providers = [] self.viaf = VIAFClient(self._db) self.image_mirrors = { DataSource.OVERDRIVE : OverdriveCoverImageMirror( self._db, uploader=uploader ) } self.image_scaler = ImageScaler( self._db, self.image_mirrors.values(), uploader=uploader ) self.oclc_linked_data = LinkedDataCoverageProvider(self._db)
class IdentifierResolutionCoverageProvider(CatalogCoverageProvider): """Make sure all Identifiers associated with some Collection become Works. Coverage happens by running the Identifier through _other_ CoverageProviders, which fill in the blanks with data from third-party entities. This CoverageProvider may force those other CoverageProviders to do their work for each Identifier immediately, or it may simply register its Identifiers with those CoverageProviders and allow them to complete the work at their own pace. Unlike most CoverageProviders, which are invoked from a script, this CoverageProvider is invoked from URNLookupController.process_urns, and only when a client expresses a desire that we look into a specific identifier. """ SERVICE_NAME = "Identifier Resolution Coverage Provider" DATA_SOURCE_NAME = DataSource.INTERNAL_PROCESSING # These are the only identifier types we have any hope of providing # insight into. INPUT_IDENTIFIER_TYPES = [ Identifier.OVERDRIVE_ID, Identifier.ISBN, Identifier.URI, ] OPERATION = CoverageRecord.RESOLVE_IDENTIFIER_OPERATION # We cover all Collections, regardless of their protocol. PROTOCOL = None def __init__(self, collection, mirror=None, http_get=None, viaf=None, provide_coverage_immediately=False, force=False, provider_kwargs=None, **kwargs): """Constructor. :param collection: Handle all Identifiers from this Collection that were previously registered with this CoverageProvider. :param mirror: A MirrorUploader to use if coverage requires uploading any cover images to external storage. :param http_get: A drop-in replacement for Representation.simple_http_get, to be used if any information (such as a book cover) needs to be obtained from the public Internet. :param viaf_client: A VIAFClient to use if coverage requires gathering information about authors from VIAF. :param force: Force CoverageProviders to cover identifiers even if they believe they have already done the work. :param provide_coverage_immediately: If this is True, then resolving an identifier means registering it with all of its other CoverageProviders *and then attempting to provide coverage*. Registration is considered a success even if the other CoverageProviders fail, but the attempt must be made immediately. If this is False (the default), then resolving an identifier just means registering it with all other relevant CoverageProviders. :param provider_kwargs: Pass this object in as provider_kwargs when calling gather_providers at the end of the constructor. Used only in testing. """ _db = Session.object_session(collection) # Since we are the metadata wrangler, any resources we find, # we mirror using the sitewide MirrorUploader. if not mirror: try: mirror = MirrorUploader.sitewide(_db) except CannotLoadConfiguration, e: logging.error( "No storage integration is configured. Cover images will not be stored anywhere.", exc_info=e) self.mirror = mirror # We're going to be aggressive about recalculating the presentation # for this work because either the work is currently not set up # at all, or something went wrong trying to set it up. presentation = PresentationCalculationPolicy( regenerate_opds_entries=True) replacement_policy = ReplacementPolicy.from_metadata_source( presentation_calculation_policy=presentation, mirror=self.mirror, http_get=http_get, ) super(IdentifierResolutionCoverageProvider, self).__init__(collection, replacement_policy=replacement_policy, **kwargs) self.provide_coverage_immediately = provide_coverage_immediately self.force = force or provide_coverage_immediately self.viaf = viaf or VIAFClient(self._db) # Instantiate the coverage providers that may be needed to # relevant to any given Identifier. # # Each Identifier in this Collection's catalog will be registered # with all relevant providers (if provide_coverage_immediately # is False) or immediately covered by all relevant providers # (if provide_coverage_immediately is True). self.providers = self.gather_providers(provider_kwargs)