def test_items_that_need_coverage(self): source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) other_source = DataSource.lookup(self._db, DataSource.OVERDRIVE) # An item that hasn't been covered by the provider yet cr = self._coverage_record(self._edition(), other_source) # An item that has been covered by the reaper operation already reaper_cr = self._coverage_record( self._edition(), source, operation=CoverageRecord.REAP_OPERATION ) # An item that has been covered by the reaper operation, but has # had its license repurchased. relicensed_edition, relicensed_licensepool = self._edition(with_license_pool=True) relicensed_coverage_record = self._coverage_record( relicensed_edition, source, operation=CoverageRecord.REAP_OPERATION ) relicensed_licensepool.update_availability(1, 0, 0, 0) items = self.provider.items_that_need_coverage().all() # Provider ignores anything that has been reaped and doesn't have # licenses. assert reaper_cr.identifier not in items # But it picks up anything that hasn't been covered at all and anything # that's been licensed anew even if its already been reaped. eq_(2, len(items)) assert relicensed_licensepool.identifier in items assert cr.identifier in items # The Wrangler Reaper coverage record is removed from the db # when it's committed. assert relicensed_coverage_record in relicensed_licensepool.identifier.coverage_records self._db.commit() assert relicensed_coverage_record not in relicensed_licensepool.identifier.coverage_records
def __init__(self, _db, overdrive=None, threem=None, axis=None): self._db = _db self.overdrive = overdrive self.threem = threem self.axis = axis self.apis = [x for x in (overdrive, threem, axis) if x] self.log = logging.getLogger("Circulation API") # When we get our view of a patron's loans and holds, we need # to include loans from all licensed data sources. We do not # need to include loans from open-access sources because we # are the authorities on those. data_sources_for_sync = [] if self.overdrive: data_sources_for_sync.append( DataSource.lookup(_db, DataSource.OVERDRIVE) ) if self.threem: data_sources_for_sync.append( DataSource.lookup(_db, DataSource.THREEM) ) if self.axis: data_sources_for_sync.append( DataSource.lookup(_db, DataSource.AXIS_360) ) self.identifier_type_to_data_source_name = dict( (ds.primary_identifier_type, ds.name) for ds in data_sources_for_sync) self.data_source_ids_for_sync = [ x.id for x in data_sources_for_sync ]
def test_items_that_need_coverage(self): source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) other_source = DataSource.lookup(self._db, DataSource.OVERDRIVE) # An item that hasn't been covered by the provider yet cr = self._coverage_record(self._edition(), other_source) # An item that has been covered by the reaper operation already reaper_cr = self._coverage_record( self._edition(), source, operation=CoverageRecord.REAP_OPERATION ) # An item that has been covered by the reaper operation, but has # had its license repurchased. relicensed, relicensed_lp = self._edition(with_license_pool=True) self._coverage_record( relicensed, source, operation=CoverageRecord.REAP_OPERATION ) relicensed_lp.update_availability(1, 0, 0, 0) with temp_config() as config: config[Configuration.INTEGRATIONS][Configuration.METADATA_WRANGLER_INTEGRATION] = { Configuration.URL : "http://url.gov" } provider = MetadataWranglerCoverageProvider(self._db) items = provider.items_that_need_coverage.all() # Provider ignores anything that has been reaped and doesn't have # licenses. assert reaper_cr.identifier not in items # But it picks up anything that hasn't been covered at all and anything # that's been licensed anew even if its already been reaped. eq_(2, len(items)) assert relicensed_lp.identifier in items assert cr.identifier in items # The Wrangler Reaper coverage record is removed from the db # when it's committed. self._db.commit() eq_([], relicensed_lp.identifier.coverage_records)
def test_run_once(self): # Setup authentication and Metadata Wrangler details. lp = self._licensepool( None, data_source_name=DataSource.BIBLIOTHECA, collection=self.collection ) lp.identifier.type = Identifier.BIBLIOTHECA_ID isbn = Identifier.parse_urn(self._db, u'urn:isbn:9781594632556')[0] lp.identifier.equivalent_to( DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1 ) eq_([], lp.identifier.links) eq_([], lp.identifier.measurements) # Queue some data to be found. responses = ( 'metadata_updates_response.opds', 'metadata_updates_empty_response.opds', ) for filename in responses: data = sample_data(filename, 'opds') self.lookup.queue_response( 200, {'content-type' : OPDSFeed.ACQUISITION_FEED_TYPE}, data ) timestamp = self.ts new_timestamp = self.monitor.run_once(timestamp) # We have a new value to use for the Monitor's timestamp -- the # earliest date seen in the last OPDS feed that contained # any entries. eq_(datetime.datetime(2016, 9, 20, 19, 37, 2), new_timestamp.finish) eq_("Editions processed: 1", new_timestamp.achievements) # Normally run_once() doesn't update the monitor's timestamp, # but this implementation does, so that work isn't redone if # run_once() crashes or the monitor is killed. eq_(new_timestamp.finish, self.monitor.timestamp().finish) # The original Identifier has information from the # mock Metadata Wrangler. mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) eq_(3, len(lp.identifier.links)) [quality] = lp.identifier.measurements eq_(mw_source, quality.data_source) # Check the URLs we processed. url1, url2 = [x[0] for x in self.lookup.requests] # The first URL processed was the default one for the # MetadataWranglerOPDSLookup. eq_(self.lookup.get_collection_url(self.lookup.UPDATES_ENDPOINT), url1) # The second URL processed was whatever we saw in the 'next' link. eq_("http://next-link/", url2)
def test_load_cover_link(self): # Create a directory import script with an empty mock filesystem. script = MockDirectoryImportScript(self._db, {}) identifier = self._identifier(Identifier.GUTENBERG_ID, "2345") gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG) mirror = MockS3Uploader() args = (identifier, gutenberg, "covers", mirror) # There is nothing on the mock filesystem, so in this case # load_cover_link returns None. eq_(None, script.load_cover_link(*args)) # But we tried. eq_( ('2345', 'covers', Representation.COMMON_IMAGE_EXTENSIONS, 'cover image'), script._locate_file_args ) # Try another script that has a populated mock filesystem. mock_filesystem = { 'covers' : ( 'acover.jpeg', Representation.JPEG_MEDIA_TYPE, "I'm an image." ) } script = MockDirectoryImportScript(self._db, mock_filesystem) link = script.load_cover_link(*args) eq_(Hyperlink.IMAGE, link.rel) assert link.href.endswith( '/test.cover.bucket/Gutenberg/Gutenberg+ID/2345/2345.jpg' ) eq_(Representation.JPEG_MEDIA_TYPE, link.media_type) eq_("I'm an image.", link.content)
def __init__(self, _db, data_directory): self._db = _db self.collection = Collection.by_protocol(self._db, ExternalIntegration.GUTENBERG).one() self.source = DataSource.lookup(self._db, DataSource.GUTENBERG) self.data_directory = data_directory self.catalog_path = os.path.join(self.data_directory, self.FILENAME) self.log = logging.getLogger("Gutenberg API")
def __init__(self, _db, lookup=None, input_identifier_types=None, operation=None, **kwargs): if not input_identifier_types: input_identifier_types = [ Identifier.OVERDRIVE_ID, Identifier.THREEM_ID, Identifier.GUTENBERG_ID, Identifier.AXIS_360_ID, ] output_source = DataSource.lookup( _db, DataSource.METADATA_WRANGLER ) super(MetadataWranglerCoverageProvider, self).__init__( lookup = lookup or SimplifiedOPDSLookup.from_config(), service_name=self.SERVICE_NAME, input_identifier_types=input_identifier_types, output_source=output_source, operation=operation or self.OPERATION, **kwargs ) if not self.lookup.authenticated: self.log.warn( "Authentication for the Library Simplified Metadata Wrangler " "is not set up. You can still use the metadata wrangler, but " "it will not know which collection you're asking about." )
def metadata_needed_for(self, collection_details): """Returns identifiers in the collection that could benefit from distributor metadata on the circulation manager. """ client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details( self._db, client, collection_details ) resolver = IdentifierResolutionCoverageProvider unresolved_identifiers = collection.unresolved_catalog( self._db, resolver.DATA_SOURCE_NAME, resolver.OPERATION ) # Omit identifiers that currently have metadata pending for # the IntegrationClientCoverImageCoverageProvider. data_source = DataSource.lookup( self._db, collection.name, autocreate=True ) is_awaiting_metadata = self._db.query( CoverageRecord.id, CoverageRecord.identifier_id ).filter( CoverageRecord.data_source_id==data_source.id, CoverageRecord.status==CoverageRecord.REGISTERED, CoverageRecord.operation==IntegrationClientCoverImageCoverageProvider.OPERATION, ).subquery() unresolved_identifiers = unresolved_identifiers.outerjoin( is_awaiting_metadata, Identifier.id==is_awaiting_metadata.c.identifier_id ).filter(is_awaiting_metadata.c.id==None) # Add a message for each unresolved identifier pagination = load_pagination_from_request(default_size=25) feed_identifiers = pagination.apply(unresolved_identifiers).all() messages = list() for identifier in feed_identifiers: messages.append(OPDSMessage( identifier.urn, HTTP_ACCEPTED, "Metadata needed." )) title = "%s Metadata Requests for %s" % (collection.protocol, client.url) metadata_request_url = self.collection_feed_url( 'metadata_needed_for', collection ) request_feed = AcquisitionFeed( self._db, title, metadata_request_url, [], VerboseAnnotator, precomposed_entries=messages ) self.add_pagination_links_to_feed( pagination, unresolved_identifiers, request_feed, 'metadata_needed_for', collection ) return feed_response(request_feed)
def test_handle_import_messages(self): data_source = DataSource.lookup(self._db, DataSource.OVERDRIVE) provider = OPDSImportCoverageProvider("name", [], data_source) message = StatusMessage(201, "try again later") message2 = StatusMessage(404, "we're doomed") message3 = StatusMessage(200, "everything's fine") identifier = self._identifier() identifier2 = self._identifier() identifier3 = self._identifier() messages_by_id = { identifier.urn: message, identifier2.urn: message2, identifier3.urn: message3, } [f1, f2] = sorted(list(provider.handle_import_messages(messages_by_id)), key=lambda x: x.exception) eq_(identifier, f1.obj) eq_("201: try again later", f1.exception) eq_(True, f1.transient) eq_(identifier2, f2.obj) eq_("404: we're doomed", f2.exception) eq_(False, f2.transient)
def _provider(self, presentation_ready_on_success=True): """Create a generic MockOPDSImportCoverageProvider for testing purposes.""" source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER) return MockOPDSImportCoverageProvider( "mock provider", [], source, presentation_ready_on_success=presentation_ready_on_success )
def test_items_that_need_coverage_respects_cutoff(self): """Verify that this coverage provider respects the cutoff_time argument. """ source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) edition = self._edition() cr = self._coverage_record(edition, source, operation='sync') # We have a coverage record already, so this book doesn't show # up in items_that_need_coverage items = self.provider.items_that_need_coverage().all() eq_([], items) # But if we send a cutoff_time that's later than the time # associated with the coverage record... one_hour_from_now = ( datetime.datetime.utcnow() + datetime.timedelta(seconds=3600) ) provider_with_cutoff = self.create_provider( cutoff_time=one_hour_from_now ) # The book starts showing up in items_that_need_coverage. eq_([edition.primary_identifier], provider_with_cutoff.items_that_need_coverage().all())
def test_finalize_edition(self): provider_no_presentation_ready = self._provider(presentation_ready_on_success=False) provider_presentation_ready = self._provider(presentation_ready_on_success=True) identifier = self._identifier() source = DataSource.lookup(self._db, DataSource.GUTENBERG) # Here's an Edition with no LicensePool. edition, is_new = Edition.for_foreign_id( self._db, source, identifier.type, identifier.identifier ) edition.title = self._str # This will effectively do nothing. provider_no_presentation_ready.finalize_edition(edition) # No Works have been created. eq_(0, self._db.query(Work).count()) # But if there's also a LicensePool... pool, is_new = LicensePool.for_foreign_id( self._db, source, identifier.type, identifier.identifier ) # finalize_edition() will create a Work. provider_no_presentation_ready.finalize_edition(edition) work = pool.work eq_(work, edition.work) eq_(False, work.presentation_ready) # If the provider is configured to do so, finalize_edition() # will also set the Work as presentation-ready. provider_presentation_ready.finalize_edition(edition) eq_(True, work.presentation_ready)
def __init__(self, _db, input_identifier_types=None, metadata_lookup=None, cutoff_time=None, operation=None): self._db = _db if not input_identifier_types: input_identifier_types = [ Identifier.OVERDRIVE_ID, Identifier.THREEM_ID, Identifier.GUTENBERG_ID, Identifier.AXIS_360_ID, ] self.output_source = DataSource.lookup( self._db, DataSource.METADATA_WRANGLER ) if not metadata_lookup: metadata_lookup = SimplifiedOPDSLookup.from_config() self.lookup = metadata_lookup if not operation: operation = CoverageRecord.SYNC_OPERATION self.operation = operation super(MetadataWranglerCoverageProvider, self).__init__( self.service_name, input_identifier_types, self.output_source, workset_size=20, cutoff_time=cutoff_time, operation=self.operation, )
def generate_mock_api(self): """Prep an empty NoveList result.""" source = DataSource.lookup(self._db, DataSource.OVERDRIVE) metadata = Metadata(source) mock_api = MockNoveListAPI(self._db) mock_api.setup(metadata) return mock_api
def setup(self): super(TestMetadataWranglerCollectionReaper, self).setup() self.source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) with temp_config() as config: config[Configuration.INTEGRATIONS][Configuration.METADATA_WRANGLER_INTEGRATION] = { Configuration.URL : "http://url.gov" } self.reaper = MetadataWranglerCollectionReaper(self._db)
def __init__(self, _db, testing=False, api=None): super(ThreeMCirculationSweep, self).__init__( _db, "3M Circulation Sweep", batch_size=25) self._db = _db if not api: api = ThreeMAPI(self._db, testing=testing) self.api = api self.data_source = DataSource.lookup(self._db, DataSource.THREEM)
def __init__(self, db, mirrors, uploader=None): self._db = db self.data_source_ids = [] self.uploader = uploader or S3Uploader() self.log = logging.getLogger("Cover Image Scaler") for mirror in mirrors: data_source_name = mirror.DATA_SOURCE data_source = DataSource.lookup(self._db, data_source_name) self.data_source_ids.append(data_source.id)
def __init__(self, _db, authenticator, node_value, temporary_token_duration=None): self._db = _db self.authenticator = authenticator self.data_source = DataSource.lookup(_db, DataSource.ADOBE) self.temporary_token_duration = ( temporary_token_duration or datetime.timedelta(minutes=10)) if isinstance(node_value, basestring): node_value = int(node_value, 16) self.node_value = node_value
def setup(self): super(TestFeedbooksOPDSImporter, self).setup() self.http = DummyHTTPClient() self.metadata = DummyMetadataClient() self.mirror = MockS3Uploader() self.data_source = DataSource.lookup(self._db, DataSource.FEEDBOOKS) # Create a default importer that's good enough for most tests. self.collection, self.importer = self._importer()
def test_lookup_equivalent_isbns(self): identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID) api = MockNoveListAPI.from_config(self._default_library) # If there are no ISBN equivalents, it returns None. eq_(None, api.lookup_equivalent_isbns(identifier)) source = DataSource.lookup(self._db, DataSource.OVERDRIVE) identifier.equivalent_to(source, self._identifier(), strength=1) self._db.commit() eq_(None, api.lookup_equivalent_isbns(identifier)) # If there's an ISBN equivalent, but it doesn't result in metadata, # it returns none. isbn = self._identifier(identifier_type=Identifier.ISBN) identifier.equivalent_to(source, isbn, strength=1) self._db.commit() api.responses.append(None) eq_(None, api.lookup_equivalent_isbns(identifier)) # Create an API class that can mockout NoveListAPI.choose_best_metadata class MockBestMetadataAPI(MockNoveListAPI): choose_best_metadata_return = None def choose_best_metadata(self, *args, **kwargs): return self.choose_best_metadata_return api = MockBestMetadataAPI.from_config(self._default_library) # Give the identifier another ISBN equivalent. isbn2 = self._identifier(identifier_type=Identifier.ISBN) identifier.equivalent_to(source, isbn2, strength=1) self._db.commit() # Queue metadata responses for each ISBN lookup. metadatas = [object(), object()] api.responses.extend(metadatas) # If choose_best_metadata returns None, the lookup returns None. api.choose_best_metadata_return = (None, None) eq_(None, api.lookup_equivalent_isbns(identifier)) # Lookup was performed for both ISBNs. eq_([], api.responses) # If choose_best_metadata returns a low confidence metadata, the # lookup returns None. api.responses.extend(metadatas) api.choose_best_metadata_return = (metadatas[0], 0.33) eq_(None, api.lookup_equivalent_isbns(identifier)) # If choose_best_metadata returns a high confidence metadata, the # lookup returns the metadata. api.responses.extend(metadatas) api.choose_best_metadata_return = (metadatas[1], 0.67) eq_(metadatas[1], api.lookup_equivalent_isbns(identifier))
def test_facets(self): # Normally we yield one FeaturedFacets object for each of the # library's enabled entry points. library = self._default_library script = CacheOPDSGroupFeedPerLane( self._db, manager=object(), cmd_args=[] ) setting = library.setting(EntryPoint.ENABLED_SETTING) setting.value = json.dumps( [AudiobooksEntryPoint.INTERNAL_NAME, EbooksEntryPoint.INTERNAL_NAME] ) lane = self._lane() audio_facets, ebook_facets = script.facets(lane) eq_(AudiobooksEntryPoint, audio_facets.entrypoint) eq_(EbooksEntryPoint, ebook_facets.entrypoint) # The first entry point in the library's list of enabled entry # points is treated as the default. eq_(True, audio_facets.entrypoint_is_default) eq_(audio_facets.entrypoint, list(library.entrypoints)[0]) eq_(False, ebook_facets.entrypoint_is_default) for facets in (audio_facets, ebook_facets): # The FeaturedFacets objects knows to feature works at the # library's minimum quality level. eq_(library.minimum_featured_quality, facets.minimum_featured_quality) # The FeaturedFacets object knows that custom lists are # not in play. eq_(False, facets.uses_customlists) # The first entry point is treated as the default only for WorkLists # that have no parent. When the WorkList has a parent, the selected # entry point is treated as an explicit choice -- navigating downward # in the lane hierarchy ratifies the default value. sublane = self._lane(parent=lane) f1, f2 = script.facets(sublane) for f in f1, f2: eq_(False, f.entrypoint_is_default) # Make it look like the lane uses custom lists. lane.list_datasource = DataSource.lookup(self._db, DataSource.OVERDRIVE) # If the library has no enabled entry points, we yield one # FeaturedFacets object with no particular entry point. setting.value = json.dumps([]) no_entry_point, = script.facets(lane) eq_(None, no_entry_point.entrypoint) # The FeaturedFacets object knows that custom lists are in # play. eq_(True, no_entry_point.uses_customlists)
def test_feed_includes_staff_rating(self): work = self._work(with_open_access_download=True) lp = work.license_pools[0] staff_data_source = DataSource.lookup(self._db, DataSource.LIBRARY_STAFF) lp.identifier.add_measurement(staff_data_source, Measurement.RATING, 3, weight=1000) feed = AcquisitionFeed(self._db, "test", "url", [work], AdminAnnotator(None, self._default_library, test_mode=True)) [entry] = feedparser.parse(unicode(feed))['entries'] rating = entry['schema_rating'] eq_(3, float(rating['schema:ratingvalue'])) eq_(Measurement.RATING, rating['additionaltype'])
def __init__(self, _db, api=None, **kwargs): input_identifier_types = [ Identifier.GUTENBERG_ID, Identifier.URI ] output_source = DataSource.lookup(_db, DataSource.OCLC) super(OCLCClassifyCoverageProvider, self).__init__( "OCLC Classify Coverage Provider", input_identifier_types, output_source) self._db = _db self.api = api or OCLCClassifyAPI(self._db)
def setup(self): super(TestVendorIDModel, self).setup() self.authenticator = DummyMilleniumPatronAPI() self.model = AdobeVendorIDModel(self._db, self.authenticator, self.TEST_NODE_VALUE) self.data_source = DataSource.lookup(self._db, DataSource.ADOBE) # Normally this test patron doesn't have an authorization identifier. # Let's make sure there is one so it'll show up as the label. self.bob_patron = self.authenticator.authenticated_patron( self._db, dict(username="******", password="******")) self.bob_patron.authorization_identifier = "5"
def process_item(self, identifier): data_source = DataSource.lookup( self._db, self.importer.data_source_name ) try: response = self.content_server.lookup([identifier]) except BadResponseException, e: return CoverageFailure( identifier, e.message, data_source )
def setup(self): super(TestMetadataUploadCoverageProvider, self).setup() self.integration = self._external_integration( ExternalIntegration.METADATA_WRANGLER, goal=ExternalIntegration.METADATA_GOAL, url=self._url, username=u'abc', password=u'def' ) self.source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) self.collection = self._collection( protocol=ExternalIntegration.BIBLIOTHECA, external_account_id=u'lib' ) self.provider = self.create_provider()
def test_new_isbns(self): existing_id = self._identifier() metadata = Metadata( DataSource.lookup(self._db, DataSource.GUTENBERG), identifiers=[ IdentifierData(type=Identifier.OCLC_WORK, identifier="abra"), IdentifierData(type=existing_id.type, identifier=existing_id.identifier), IdentifierData(type=Identifier.ISBN, identifier="kadabra"), ] ) eq_(2, self.provider.new_isbns(metadata))
def test_load_circulation_data(self): # Create a directory import script with an empty mock filesystem. script = MockDirectoryImportScript(self._db, {}) identifier = self._identifier(Identifier.GUTENBERG_ID, "2345") gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG) mirror = MockS3Uploader() args = (identifier, gutenberg, "ebooks", mirror, "Name of book", "rights URI") # There is nothing on the mock filesystem, so in this case # load_circulation_data returns None. eq_(None, script.load_circulation_data(*args)) # But we tried. eq_( ('2345', 'ebooks', Representation.COMMON_EBOOK_EXTENSIONS, 'ebook file'), script._locate_file_args ) # Try another script that has a populated mock filesystem. mock_filesystem = { 'ebooks' : ( 'book.epub', Representation.EPUB_MEDIA_TYPE, "I'm an EPUB." ) } script = MockDirectoryImportScript(self._db, mock_filesystem) # Now _locate_file finds something on the mock filesystem, and # load_circulation_data loads it into a fully populated # CirculationData object. circulation = script.load_circulation_data(*args) eq_(identifier, circulation.primary_identifier(self._db)) eq_(gutenberg, circulation.data_source(self._db)) eq_("rights URI", circulation.default_rights_uri) # The CirculationData has an open-access link associated with it. [link] = circulation.links eq_(Hyperlink.OPEN_ACCESS_DOWNLOAD, link.rel) assert link.href.endswith( '/test.content.bucket/Gutenberg/Gutenberg+ID/2345/Name+of+book.epub' ) eq_(Representation.EPUB_MEDIA_TYPE, link.media_type) eq_("I'm an EPUB.", link.content) # This open-access link will be made available through a # delivery mechanism described by this FormatData. [format] = circulation.formats eq_(link, format.link) eq_(link.media_type, format.content_type) eq_(DeliveryMechanism.NO_DRM, format.drm_scheme)
def test_related_books(self): # A book with no related books returns a ProblemDetail. with temp_config() as config: config['integrations'][Configuration.NOVELIST_INTEGRATION] = {} with self.app.test_request_context('/'): response = self.manager.work_controller.related( self.datasource, self.identifier.type, self.identifier.identifier ) eq_(404, response.status_code) eq_("http://librarysimplified.org/terms/problem/unknown-lane", response.uri) # Prep book with a book in its series and a recommendation. self.lp.presentation_edition.series = "Around the World" self.french_1.presentation_edition.series = "Around the World" SessionManager.refresh_materialized_views(self._db) source = DataSource.lookup(self._db, self.datasource) metadata = Metadata(source) mock_api = MockNoveListAPI() metadata.recommendations = [self.english_2.license_pools[0].identifier] mock_api.setup(metadata) # A grouped feed is returned with both of these related books with self.app.test_request_context('/'): response = self.manager.work_controller.related( self.datasource, self.identifier.type, self.identifier.identifier, novelist_api=mock_api ) eq_(200, response.status_code) feed = feedparser.parse(response.data) eq_(3, len(feed['entries'])) # One book is in the recommendations feed. [e1] = [e for e in feed['entries'] if e['title'] == self.english_2.title] [collection_link] = [link for link in e1['links'] if link['rel']=='collection'] eq_("Recommended Books", collection_link['title']) work_url = "/works/%s/%s/%s/" % (self.datasource, self.identifier.type, self.identifier.identifier) expected = urllib.quote(work_url + 'recommendations') eq_(True, collection_link['href'].endswith(expected)) # Two books are in the series feed. The original work and its companion [e2] = [e for e in feed['entries'] if e['title'] == self.french_1.title] [collection_link] = [link for link in e2['links'] if link['rel']=='collection'] eq_("Around the World", collection_link['title']) expected = urllib.quote(work_url + 'series') eq_(True, collection_link['href'].endswith(expected)) [e3] = [e for e in feed['entries'] if e['title'] == self.english_1.title] [collection_link] = [link for link in e3['links'] if link['rel']=='collection'] eq_("Around the World", collection_link['title']) expected = urllib.quote(work_url + 'series') eq_(True, collection_link['href'].endswith(expected))
def __init__(self, _db, api=None, viaf_api=None): self._db = _db self.api = api or OCLCLinkedData(self._db) self.viaf = viaf_api or VIAFClient(self._db) output_source = DataSource.lookup(_db, DataSource.OCLC_LINKED_DATA) input_identifier_types = [ Identifier.OCLC_WORK, Identifier.OCLC_NUMBER, Identifier.OVERDRIVE_ID, Identifier.THREEM_ID ] super(LinkedDataCoverageProvider, self).__init__( "OCLC Linked Data Coverage Provider", input_identifier_types, output_source, batch_size=10 )
def opds_feed_identifiers(self): """Creates three Identifiers to use for testing with a sample OPDS file.""" # Straightforward identifier that's represented in the OPDS response. valid_id = self._identifier(foreign_id=u'2020110') # Mapped identifier. source = DataSource.lookup(self._db, DataSource.AXIS_360) mapped_id = self._identifier(identifier_type=Identifier.AXIS_360_ID, foreign_id=u'0015187876') equivalent_id = self._identifier(identifier_type=Identifier.ISBN, foreign_id=self._isbn) mapped_id.equivalent_to(source, equivalent_id, 1) # An identifier that's not represented in the OPDS response. lost_id = self._identifier() return valid_id, mapped_id, lost_id
def test_target_age_errs_towards_wider_span(self): i = self._identifier() source = DataSource.lookup(self._db, DataSource.OVERDRIVE) c1 = i.classify(source, Subject.AGE_RANGE, "8-9", weight=1) c2 = i.classify(source, Subject.AGE_RANGE, "6-7", weight=1) overdrive_edition, lp = self._edition( data_source_name=source.name, with_license_pool=True, identifier_id=i.identifier, ) self.classifier.work = self._work(presentation_edition=overdrive_edition) for classification in i.classifications: self.classifier.add(classification) genres, fiction, audience, target_age = self.classifier.classify() assert Classifier.AUDIENCE_CHILDREN == audience assert (6, 9) == target_age
def test_mirror_open_access_link_fetch_failure(self): mirrors = dict(books_mirror=MockS3Uploader()) h = DummyHTTPClient() edition, pool = self._edition(with_license_pool=True) data_source = DataSource.lookup(self._db, DataSource.GUTENBERG) policy = ReplacementPolicy(mirrors=mirrors, http_get=h.do_get) circulation_data = CirculationData( data_source=edition.data_source, primary_identifier=edition.primary_identifier, ) link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, ) link_obj, ignore = edition.primary_identifier.add_link( rel=link.rel, href=link.href, data_source=data_source, media_type=link.media_type, content=link.content, ) h.queue_response(403) circulation_data.mirror_link(pool, data_source, link, link_obj, policy) representation = link_obj.resource.representation # Fetch failed, so we should have a fetch exception but no mirror url. assert representation.fetch_exception != None assert None == representation.mirror_exception assert None == representation.mirror_url assert link.href == representation.url assert representation.fetched_at != None assert None == representation.mirrored_at # The license pool is suppressed when fetch fails. assert True == pool.suppressed assert representation.fetch_exception in pool.license_exception
def test_isbn_covers_are_imported_from_mapped_identifiers(self): # Now that we pass ISBN equivalents instead of Bibliotheca identifiers # to the Metadata Wrangler, they're not getting covers. Let's confirm # that the problem isn't on the Circulation Manager import side of things. # Create a Bibliotheca identifier with a license pool. source = DataSource.lookup(self._db, DataSource.BIBLIOTHECA) identifier = self._identifier( identifier_type=Identifier.BIBLIOTHECA_ID) LicensePool.for_foreign_id(self._db, source, identifier.type, identifier.identifier, collection=self.provider.collection) # Create an ISBN and set it equivalent. isbn = self._identifier(identifier_type=Identifier.ISBN) isbn.identifier = '9781594632556' identifier.equivalent_to(source, isbn, 1) opds = sample_data('metadata_isbn_response.opds', 'opds') self.provider.lookup_client.queue_response( 200, { 'content-type': 'application/atom+xml;profile=opds-catalog;kind=acquisition' }, opds) result = self.provider.process_item(identifier) # The lookup is successful eq_(result, identifier) # The appropriate cover links are transferred. identifier_uris = [ l.resource.url for l in identifier.links if l.rel in [Hyperlink.IMAGE, Hyperlink.THUMBNAIL_IMAGE] ] expected = [ 'http://book-covers.nypl.org/Content%20Cafe/ISBN/9781594632556/cover.jpg', 'http://book-covers.nypl.org/scaled/300/Content%20Cafe/ISBN/9781594632556/cover.jpg' ] eq_(sorted(identifier_uris), sorted(expected)) # The ISBN doesn't get any information. eq_(isbn.links, [])
def test_process_patron(self): patron = self._patron() # This patron has old-style and new-style Credentials that link # them to Adobe account IDs (hopefully the same ID, though that # doesn't matter here. def set_value(credential): credential.value = "a credential" # Data source doesn't matter -- even if it's incorrect, a Credential # of the appropriate type will be deleted. data_source = DataSource.lookup(self._db, DataSource.OVERDRIVE) # Create two Credentials that will be deleted and one that will be # left alone. for type in (AdobeVendorIDModel.VENDOR_ID_UUID_TOKEN_TYPE, AuthdataUtility.ADOBE_ACCOUNT_ID_PATRON_IDENTIFIER, "Some other type" ): credential = Credential.lookup( self._db, data_source, type, patron, set_value, True ) eq_(3, len(patron.credentials)) # Run the patron through the script. script = AdobeAccountIDResetScript(self._db) # A dry run does nothing. script.delete = False script.process_patron(patron) self._db.commit() eq_(3, len(patron.credentials)) # Now try it for real. script.delete = True script.process_patron(patron) self._db.commit() # The two Adobe-related credentials are gone. The other one remains. [credential] = patron.credentials eq_("Some other type", credential.type)
def test_collect_event_without_work(self): integration, ignore = create( self._db, ExternalIntegration, goal=ExternalIntegration.ANALYTICS_GOAL, protocol="api.google_analytics_provider", ) integration.url = self._str ConfigurationSetting.for_library_and_externalintegration( self._db, GoogleAnalyticsProvider.TRACKING_ID, self._default_library, integration).value = "faketrackingid" ga = MockGoogleAnalyticsProvider(integration, self._default_library) identifier = self._identifier() source = DataSource.lookup(self._db, DataSource.GUTENBERG) pool, is_new = get_one_or_create(self._db, LicensePool, identifier=identifier, data_source=source, collection=self._default_collection) now = datetime.datetime.utcnow() ga.collect_event(self._default_library, pool, CirculationEvent.DISTRIBUTOR_CHECKIN, now) params = urlparse.parse_qs(ga.params) eq_(1, ga.count) eq_(integration.url, ga.url) eq_("faketrackingid", params['tid'][0]) eq_("event", params['t'][0]) eq_("circulation", params['ec'][0]) eq_(CirculationEvent.DISTRIBUTOR_CHECKIN, params['ea'][0]) eq_(str(now), params['cd1'][0]) eq_(pool.identifier.identifier, params['cd2'][0]) eq_(pool.identifier.type, params['cd3'][0]) eq_(None, params.get('cd4')) eq_(None, params.get('cd5')) eq_(None, params.get('cd6')) eq_(None, params.get('cd7')) eq_(None, params.get('cd8')) eq_(None, params.get('cd9')) eq_(None, params.get('cd10')) eq_(None, params.get('cd11')) eq_(None, params.get('cd12'))
def __init__(self, _db, service_name=None, lookup=None, **kwargs): service_name = service_name or self.DEFAULT_SERVICE_NAME if not lookup: content_server_url = ( Configuration.integration_url( Configuration.CONTENT_SERVER_INTEGRATION ) ) lookup = SimplifiedOPDSLookup(content_server_url) output_source = DataSource.lookup( _db, DataSource.OA_CONTENT_SERVER ) kwargs['input_identifier_types'] = None super(ContentServerBibliographicCoverageProvider, self).__init__( service_name, output_source=output_source, lookup=lookup, expect_license_pool=True, presentation_ready_on_success=True, **kwargs )
def test_feed_includes_staff_rating(self): work = self._work(with_open_access_download=True) lp = work.license_pools[0] staff_data_source = DataSource.lookup(self._db, DataSource.LIBRARY_STAFF) lp.identifier.add_measurement( staff_data_source, Measurement.RATING, 3, weight=1000 ) feed = AcquisitionFeed( self._db, "test", "url", [work], AdminAnnotator(None, self._default_library, test_mode=True), ) [entry] = feedparser.parse(str(feed))["entries"] rating = entry["schema_rating"] assert 3 == float(rating["schema:ratingvalue"]) assert Measurement.RATING == rating["additionaltype"]
def opds_feed_identifiers(self): """Creates three Identifiers to use for testing with sample OPDS files.""" # An identifier directly represented in the OPDS response. valid_id = self._identifier(foreign_id=u'2020110') # An identifier mapped to an identifier represented in the OPDS # response. source = DataSource.lookup(self._db, DataSource.AXIS_360) mapped_id = self._identifier(identifier_type=Identifier.AXIS_360_ID, foreign_id=u'0015187876') equivalent_id = self._identifier(identifier_type=Identifier.ISBN, foreign_id='9781936460236') mapped_id.equivalent_to(source, equivalent_id, 1) # An identifier that's not represented in the OPDS response. lost_id = self._identifier() return valid_id, mapped_id, lost_id
def run_once(self, start, cutoff): _db = self._db added_books = 0 overdrive_data_source = DataSource.lookup(_db, DataSource.OVERDRIVE) total_books = 0 consecutive_unchanged_books = 0 for i, book in enumerate(self.recently_changed_ids(start, cutoff)): total_books += 1 if not total_books % 100: self.log.info("%s books processed", total_books) if not book: continue license_pool, is_new, is_changed = self.api.update_licensepool( book) # Log a circulation event for this work. if is_new: for library in self.collection.libraries: self.analytics.collect_event( library, license_pool, CirculationEvent.DISTRIBUTOR_TITLE_ADD, license_pool.last_checked) _db.commit() if is_changed: consecutive_unchanged_books = 0 else: consecutive_unchanged_books += 1 if (self.maximum_consecutive_unchanged_books and consecutive_unchanged_books >= self.maximum_consecutive_unchanged_books): # We're supposed to stop this run after finding a # run of books that have not changed, and we have # in fact seen that many consecutive unchanged # books. self.log.info("Stopping at %d unchanged books.", consecutive_unchanged_books) break if total_books: self.log.info("Processed %d books total.", total_books)
def test_smuggled_authdata_credential_success(self): # Bob's client has created a persistent token to authenticate him. now = datetime.datetime.utcnow() token, ignore = Credential.persistent_token_create( self._db, self.data_source, self.model.AUTHDATA_TOKEN_TYPE, self.bob_patron ) # But Bob's client can't trigger the operation that will cause # Adobe to authenticate him via that token, so it passes in # the token credential as the 'username' and leaves the # password blank. urn, label = self.model.standard_lookup( dict(username=token.credential) ) # There is now an anonymized identifier associated with Bob's # patron account. internal = DataSource.lookup(self._db, DataSource.INTERNAL_PROCESSING) bob_anonymized_identifier = Credential.lookup( self._db, internal, AuthdataUtility.ADOBE_ACCOUNT_ID_PATRON_IDENTIFIER, self.bob_patron, None ) # That anonymized identifier is associated with a # DelegatedPatronIdentifier whose delegated_identifier is a # UUID. [bob_delegated_patron_identifier] = self._db.query( DelegatedPatronIdentifier).filter( DelegatedPatronIdentifier.patron_identifier ==bob_anonymized_identifier.credential ).all() # That UUID is the one returned by standard_lookup. eq_(urn, bob_delegated_patron_identifier.delegated_identifier) # A future attempt to authenticate with the token will succeed. urn, label = self.model.standard_lookup( dict(username=token.credential) ) eq_(urn, bob_delegated_patron_identifier.delegated_identifier)
def test_checkout(self): patron = self._patron() data_source = DataSource.lookup(self._db, "Biblioboard", autocreate=True) edition, pool = self._edition( identifier_type=Identifier.URI, data_source_name=data_source.name, with_license_pool=True, collection=self.collection, ) loan_info = self.api.checkout(patron, "1234", pool, Representation.EPUB_MEDIA_TYPE) eq_(self.collection.id, loan_info.collection_id) eq_(data_source.name, loan_info.data_source_name) eq_(Identifier.URI, loan_info.identifier_type) eq_(pool.identifier.identifier, loan_info.identifier) eq_(None, loan_info.end_date)
def test_add_isbn(self): isbn = self._identifier(identifier_type=Identifier.ISBN) record = Record() Annotator.add_isbn(record, isbn) self._check_field(record, "020", {"a": isbn.identifier}) # If the identifier isn't an ISBN, but has an equivalent that is, it still # works. equivalent = self._identifier() data_source = DataSource.lookup(self._db, DataSource.OCLC) equivalent.equivalent_to(data_source, isbn, 1) record = Record() Annotator.add_isbn(record, equivalent) self._check_field(record, "020", {"a": isbn.identifier}) # If there is no ISBN, the field is left out. non_isbn = self._identifier() record = Record() Annotator.add_isbn(record, non_isbn) assert [] == record.get_fields("020")
def test_create_identifier_mapping(self): # Most identifiers map to themselves. overdrive = self._identifier(Identifier.OVERDRIVE_ID) # But Axis 360 and 3M identifiers map to equivalent ISBNs. axis = self._identifier(Identifier.AXIS_360_ID) threem = self._identifier(Identifier.THREEM_ID) isbn_axis = self._identifier(Identifier.ISBN) isbn_threem = self._identifier(Identifier.ISBN) who_says = DataSource.lookup(self._db, DataSource.AXIS_360) axis.equivalent_to(who_says, isbn_axis, 1) threem.equivalent_to(who_says, isbn_threem, 1) mapping = self.provider.create_identifier_mapping( [overdrive, axis, threem]) eq_(overdrive, mapping[overdrive]) eq_(axis, mapping[isbn_axis]) eq_(threem, mapping[isbn_threem])
def test_no_children_or_ya_signal_from_distributor_implies_book_is_for_adults(self): # Create some classifications that end up in # direct_from_license_source, but don't imply that the book is # from children or # YA. classifier.audience_weights[AUDIENCE_ADULT] will be set # to 500. i = self.identifier source = DataSource.lookup(self._db, DataSource.OVERDRIVE) for subject in ("Nonfiction", "Science Fiction", "History"): c = i.classify(source, Subject.OVERDRIVE, subject, weight=1000) self.classifier.add(c) # There's a little bit of evidence that it's a children's book, # but not enough to outweight the distributor's silence. c2 = self.identifier.classify(source, Subject.TAG, "Children's books", weight=1) self.classifier.add(c2) self.classifier.prepare_to_classify() # Overdrive classifications are regarded as 50 times more reliable # than their actual weight, as per Classification.scaled_weight assert 50000 == self.classifier.audience_weights[Classifier.AUDIENCE_ADULT]
def test_cover_image_root( self, name, bucket, data_source_name, expected_result, scaled_size=None, region=None, ): # Arrange uploader = self._create_s3_uploader(region=region) data_source = DataSource.lookup(self._db, data_source_name) # Act result = uploader.cover_image_root(bucket, data_source, scaled_size=scaled_size) # Assert assert result == expected_result
def test_calculate_quality(self): w = self._work(with_open_access_download=True) # This book used to be incredibly popular. identifier = w.presentation_edition.primary_identifier old_popularity = identifier.add_measurement(self.source, Measurement.POPULARITY, 6000) # Now it's just so-so. popularity = identifier.add_measurement(self.source, Measurement.POPULARITY, 59) # This measurement is irrelevant because "Test Data Source" # doesn't have a mapping from number of editions to a # percentile range. irrelevant = identifier.add_measurement(self.source, Measurement.PUBLISHED_EDITIONS, 42) # If we calculate the quality based solely on the primary # identifier, only the most recent popularity is considered, # and the book ends up in the middle of the road in terms of # quality. w.calculate_quality([identifier.id]) assert 0.5 == w.quality old_quality = w.quality # But let's say there's another identifier that's equivalent, # and it has a number of editions that was obtained from # OCLC Classify, which _does_ have a mapping from number # of editions to a percentile range. wi = self._identifier() oclc = DataSource.lookup(self._db, DataSource.OCLC) wi.add_measurement(oclc, Measurement.PUBLISHED_EDITIONS, 800) # Now the quality is higher--the large OCLC PUBLISHED_EDITIONS # measurement bumped it up. w.calculate_quality([identifier.id, wi.id]) assert w.quality > old_quality
def test_patron_activity(self): # The patron has two loans from this API's collection and # one from a different collection. patron = self._patron() data_source = DataSource.lookup(self._db, "Biblioboard", autocreate=True) e1, p1 = self._edition( identifier_type=Identifier.URI, data_source_name=data_source.name, with_license_pool=True, collection=self.collection, ) p1.loan_to(patron) e2, p2 = self._edition( identifier_type=Identifier.URI, data_source_name=data_source.name, with_license_pool=True, collection=self.collection, ) p2.loan_to(patron) other_collection = self._collection( protocol=ExternalIntegration.OVERDRIVE) e3, p3 = self._edition( identifier_type=Identifier.OVERDRIVE_ID, data_source_name=DataSource.OVERDRIVE, with_license_pool=True, collection=other_collection, ) p3.loan_to(patron) activity = self.api.patron_activity(patron, "1234") eq_(2, len(activity)) [l1, l2] = activity eq_(l1.collection_id, self.collection.id) eq_(l2.collection_id, self.collection.id) eq_(set([l1.identifier, l2.identifier]), set([p1.identifier.identifier, p2.identifier.identifier]))
def test_process_item_creates_license_pool(self): self.resolver.required_coverage_providers = [self.always_successful] self.resolver.process_item(self.identifier) [lp] = self.identifier.licensed_through eq_(True, isinstance(lp, LicensePool)) eq_(lp.collection, self.resolver.collection) eq_(lp.data_source, self.resolver.data_source) # Prepare an identifier that already has a LicensePool through # another source. licensed = self._identifier(identifier_type=Identifier.OVERDRIVE_ID) other_source = DataSource.lookup(self._db, DataSource.OVERDRIVE) lp = LicensePool.for_foreign_id(self._db, other_source, licensed.type, licensed.identifier, collection=self._default_collection)[0] self.resolver.process_item(licensed) eq_([lp], licensed.licensed_through)
def test_ensure_isbn_identifier(self): self.script.oclc_classify = DummyCoverageProvider() eq_(0, self.script.oclc_classify.hit_count) # When there are no equivalent identifiers, both identifiers go to the # OCLCClassify coverage provider. identifiers = [ self.edition1.primary_identifier, self.edition2.primary_identifier ] self.script.ensure_isbn_identifier(identifiers) eq_(2, self.script.oclc_classify.hit_count) # If an edition already has an ISBN identifier it doesn't go to the # coverage provider. self.script.oclc_classify.hit_count = 0 self.edition1.primary_identifier.equivalent_to( DataSource.lookup(self._db, DataSource.GUTENBERG), self._identifier(identifier_type=Identifier.ISBN), 1) self._db.commit() self.script.ensure_isbn_identifier(identifiers) eq_(1, self.script.oclc_classify.hit_count)
def do_run(self): self.api = NYTBestSellerAPI.from_config(self._db) self.data_source = DataSource.lookup(self._db, DataSource.NYT) # For every best-seller list... names = self.api.list_of_lists() for l in sorted(names['results'], key=lambda x: x['list_name_encoded']): name = l['list_name_encoded'] self.log.info("Handling list %s" % name) best = self.api.best_seller_list(l) if self.include_history: self.api.fill_in_history(best) else: self.api.update(best) # Mirror the list to the database. customlist = best.to_customlist(self._db) self.log.info( "Now %s entries in the list.", len(customlist.entries)) self._db.commit()
def test_process_batch(self): provider = self._provider() # Here are an Edition and a LicensePool for the same identifier but # from different data sources. We would expect this to happen # when talking to the open-access content server. edition = self._edition(data_source_name=DataSource.OA_CONTENT_SERVER) identifier = edition.primary_identifier license_source = DataSource.lookup(self._db, DataSource.GUTENBERG) pool, is_new = LicensePool.for_foreign_id(self._db, license_source, identifier.type, identifier.identifier) eq_(None, pool.work) # Here's a second identifier that's doomed to failure. identifier = self._identifier() messages_by_id = { identifier.urn: CoverageFailure(identifier, "201: try again later") } provider.queue_import_results([edition], [pool], [], messages_by_id) fake_batch = [object()] success, failure = provider.process_batch(fake_batch) # The batch was provided to lookup_and_import_batch. eq_([fake_batch], provider.batches) # The Edition and LicensePool have been knitted together into # a Work. eq_(edition, pool.presentation_edition) assert pool.work != None # The license pool was finalized. eq_([pool], provider.finalized) # The failure stayed a CoverageFailure object. eq_(identifier, failure.obj) eq_(True, failure.transient)
def test_adults_only_indication_from_distributor_has_no_implication_for_audience( self, ): # Create some classifications that end up in # direct_from_license_source, one of which implies the book is # for adults only. i = self.identifier source = DataSource.lookup(self._db, DataSource.OVERDRIVE) for subject in ("Erotic Literature", "Science Fiction", "History"): c = i.classify(source, Subject.OVERDRIVE, subject, weight=1) self.classifier.add(c) self.classifier.prepare_to_classify() # Again, Overdrive classifications are regarded as 50 times # more reliable than their actual weight, as per # Classification.scaled_weight assert 50 == self.classifier.audience_weights[Classifier.AUDIENCE_ADULTS_ONLY] # No boost was given to AUDIENCE_ADULT, because a distributor # classification implied AUDIENCE_ADULTS_ONLY. assert 0 == self.classifier.audience_weights[Classifier.AUDIENCE_ADULT]
def test_process_batch_with_identifier_mapping(self): """Test that internal identifiers are mapped to and from the form used by the external service. """ # Unlike other tests in this class, we are using a real # implementation of OPDSImportCoverageProvider.process_batch. class TestProvider(OPDSImportCoverageProvider): # Mock the identifier mapping def create_identifier_mapping(self, batch): return self.mapping # This means we need to mock the lookup client instead. lookup = MockSimplifiedOPDSLookup(self._url) source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER) provider = TestProvider("test provider", [], source, lookup=lookup) # Create a hard-coded mapping. We use id1 internally, but the # foreign data source knows the book as id2. id1 = self._identifier() id2 = self._identifier() provider.mapping = {id2: id1} feed = "<feed><entry><id>%s</id><title>Here's your title!</title></entry></feed>" % id2.urn headers = {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE} lookup.queue_response(200, headers=headers, content=feed) [identifier] = provider.process_batch([id1]) # We wanted to process id1. We sent id2 to the server, the # server responded with an <entry> for id2, and it was used to # modify the Edition associated with id1. eq_(id1, identifier) [edition] = id1.primarily_identifies eq_("Here's your title!", edition.title) eq_(id1, edition.primary_identifier)
def test_username_password_lookup_success(self): urn, label = self.model.standard_lookup(self.credentials) # There is now an anonymized identifier associated with Bob's # patron account. internal = DataSource.lookup(self._db, DataSource.INTERNAL_PROCESSING) bob_anonymized_identifier = Credential.lookup( self._db, internal, AuthdataUtility.ADOBE_ACCOUNT_ID_PATRON_IDENTIFIER, self.bob_patron, None) # That anonymized identifier is associated with a # DelegatedPatronIdentifier whose delegated_identifier is a # UUID. [bob_delegated_patron_identifier ] = self._db.query(DelegatedPatronIdentifier).filter( DelegatedPatronIdentifier.patron_identifier == bob_anonymized_identifier.credential).all() eq_("Delegated account ID %s" % urn, label) eq_(urn, bob_delegated_patron_identifier.delegated_identifier) assert urn.startswith("urn:uuid:0") assert urn.endswith('685b35c00f05')
def test_juvenile_classification_is_split_between_children_and_ya(self): # LCC files both children's and YA works under 'PZ'. # Here's how we deal with that. # i = self.identifier source = DataSource.lookup(self._db, DataSource.OCLC) c = i.classify(source, Subject.LCC, "PZ", weight=100) self.classifier.add(c) # (This classification has no bearing on audience and its # weight will be ignored.) c2 = i.classify(source, Subject.TAG, "Pets", weight=1000) self.classifier.add(c2) self.classifier.prepare_to_classify genres, fiction, audience, target_age = self.classifier.classify() # Young Adult wins because we err on the side of showing books # to kids who are too old, rather than too young. assert Classifier.AUDIENCE_YOUNG_ADULT == audience # But behind the scenes, more is going on. The weight of the # classifier has been split 60/40 between YA and children. weights = self.classifier.audience_weights assert 60 == weights[Classifier.AUDIENCE_YOUNG_ADULT] assert 40 == weights[Classifier.AUDIENCE_CHILDREN] # If this is in fact a children's book, this will make it # relatively easy for data from some other source to come in # and tip the balance. # The adult audiences have been reduced, to reduce the chance # that splitting up the weight between YA and Children will # cause the work to be mistakenly classified as Adult. assert -50 == weights[Classifier.AUDIENCE_ADULT] assert -50 == weights[Classifier.AUDIENCE_ADULTS_ONLY] # The juvenile classification doesn't make the all ages less likely. assert 0 == weights[Classifier.AUDIENCE_ALL_AGES]
def test_reaper(self): feed = self.get_data("biblioboard_mini_feed.opds") class MockOPDSForDistributorsReaperMonitor( OPDSForDistributorsReaperMonitor): """An OPDSForDistributorsReaperMonitor that overrides _get.""" def _get(self, url, headers): return (200, { 'content-type': OPDSFeed.ACQUISITION_FEED_TYPE }, feed) data_source = DataSource.lookup(self._db, "Biblioboard", autocreate=True) collection = MockOPDSForDistributorsAPI.mock_collection(self._db) collection.external_integration.set_setting( Collection.DATA_SOURCE_NAME_SETTING, data_source.name) monitor = MockOPDSForDistributorsReaperMonitor( self._db, collection, OPDSForDistributorsImporter, metadata_client=object()) # There's a license pool in the database that isn't in the feed anymore. edition, pool = self._edition( identifier_type=Identifier.URI, data_source_name=data_source.name, with_license_pool=True, collection=collection, ) pool.licenses_owned = 1 pool.licenses_available = 1 monitor.run_once(None, None) eq_(0, pool.licenses_owned) eq_(0, pool.licenses_available)
class JustAddMetadata(object): """A mock CoverageProvider that puts some data in place, but for whatever reason neglects to create a presentation-ready Work. """ COVERAGE_COUNTS_FOR_EVERY_COLLECTION = True STATUS = CoverageRecord.SUCCESS SOURCE = DataSource.lookup(self._db, DataSource.GUTENBERG) TITLE = "A great book" def can_cover(self, *args, **kwargs): return True def register(s, identifier, *args, **kwargs): # They only told us to register, but we're going to # actually do the work. edition = self._edition( identifier_type=identifier.type, identifier_id=identifier.identifier, title=s.TITLE ) return self._coverage_record( identifier, coverage_source=s.SOURCE, status=s.STATUS ), True
def test_items_that_need_coverage_respects_cutoff(self): """Verify that this coverage provider respects the cutoff_time argument. """ source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) edition = self._edition() cr = self._coverage_record(edition, source, operation='sync') # We have a coverage record already, so this book doesn't show # up in items_that_need_coverage items = self.provider.items_that_need_coverage().all() eq_([], items) # But if we send a cutoff_time that's later than the time # associated with the coverage record... one_hour_from_now = (datetime.datetime.utcnow() + datetime.timedelta(seconds=3600)) provider_with_cutoff = self.create_provider( cutoff_time=one_hour_from_now) # The book starts showing up in items_that_need_coverage. eq_([edition.primary_identifier], provider_with_cutoff.items_that_need_coverage().all())
def test_authdata_token_credential_lookup_success(self): # Create an authdata token Credential for Bob. now = datetime.datetime.utcnow() token, ignore = Credential.persistent_token_create( self._db, self.data_source, self.model.AUTHDATA_TOKEN_TYPE, self.bob_patron ) # The token is persistent. eq_(None, token.expires) # Use that token to perform a lookup of Bob's Adobe Vendor ID # UUID. urn, label = self.model.authdata_lookup(token.credential) # There is now an anonymized identifier associated with Bob's # patron account. internal = DataSource.lookup(self._db, DataSource.INTERNAL_PROCESSING) bob_anonymized_identifier = Credential.lookup( self._db, internal, AuthdataUtility.ADOBE_ACCOUNT_ID_PATRON_IDENTIFIER, self.bob_patron, None ) # That anonymized identifier is associated with a # DelegatedPatronIdentifier whose delegated_identifier is a # UUID. [bob_delegated_patron_identifier] = self._db.query( DelegatedPatronIdentifier).filter( DelegatedPatronIdentifier.patron_identifier ==bob_anonymized_identifier.credential ).all() # That UUID is the one returned by authdata_lookup. eq_(urn, bob_delegated_patron_identifier.delegated_identifier)