Example #1
0
    def test_items_that_need_coverage(self):
        source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        other_source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        
        # An item that hasn't been covered by the provider yet
        cr = self._coverage_record(self._edition(), other_source)
        
        # An item that has been covered by the reaper operation already
        reaper_cr = self._coverage_record(
            self._edition(), source, operation=CoverageRecord.REAP_OPERATION
        )
        
        # An item that has been covered by the reaper operation, but has
        # had its license repurchased.
        relicensed_edition, relicensed_licensepool = self._edition(with_license_pool=True)
        relicensed_coverage_record = self._coverage_record(
            relicensed_edition, source, operation=CoverageRecord.REAP_OPERATION
        )
        relicensed_licensepool.update_availability(1, 0, 0, 0)

        items = self.provider.items_that_need_coverage().all()
        # Provider ignores anything that has been reaped and doesn't have
        # licenses.
        assert reaper_cr.identifier not in items
        # But it picks up anything that hasn't been covered at all and anything
        # that's been licensed anew even if its already been reaped.
        eq_(2, len(items))
        assert relicensed_licensepool.identifier in items
        assert cr.identifier in items
        # The Wrangler Reaper coverage record is removed from the db
        # when it's committed.
        assert relicensed_coverage_record in relicensed_licensepool.identifier.coverage_records
        self._db.commit()
        assert relicensed_coverage_record not in relicensed_licensepool.identifier.coverage_records
    def __init__(self, _db, overdrive=None, threem=None, axis=None):
        self._db = _db
        self.overdrive = overdrive
        self.threem = threem
        self.axis = axis
        self.apis = [x for x in (overdrive, threem, axis) if x]
        self.log = logging.getLogger("Circulation API")

        # When we get our view of a patron's loans and holds, we need
        # to include loans from all licensed data sources.  We do not
        # need to include loans from open-access sources because we
        # are the authorities on those.
        data_sources_for_sync = []
        if self.overdrive:
            data_sources_for_sync.append(
                DataSource.lookup(_db, DataSource.OVERDRIVE)
            )
        if self.threem:
            data_sources_for_sync.append(
                DataSource.lookup(_db, DataSource.THREEM)
            )
        if self.axis:
            data_sources_for_sync.append(
                DataSource.lookup(_db, DataSource.AXIS_360)
            )

        self.identifier_type_to_data_source_name = dict(
            (ds.primary_identifier_type, ds.name) 
            for ds in data_sources_for_sync)
        self.data_source_ids_for_sync = [
            x.id for x in data_sources_for_sync
        ]
    def test_items_that_need_coverage(self):
        source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        other_source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        # An item that hasn't been covered by the provider yet
        cr = self._coverage_record(self._edition(), other_source)
        # An item that has been covered by the reaper operation already
        reaper_cr = self._coverage_record(
            self._edition(), source, operation=CoverageRecord.REAP_OPERATION
        )
        # An item that has been covered by the reaper operation, but has
        # had its license repurchased.
        relicensed, relicensed_lp = self._edition(with_license_pool=True)
        self._coverage_record(
            relicensed, source, operation=CoverageRecord.REAP_OPERATION
        )
        relicensed_lp.update_availability(1, 0, 0, 0)

        with temp_config() as config:
            config[Configuration.INTEGRATIONS][Configuration.METADATA_WRANGLER_INTEGRATION] = {
                Configuration.URL : "http://url.gov"
            }
            provider = MetadataWranglerCoverageProvider(self._db)
        items = provider.items_that_need_coverage.all()
        # Provider ignores anything that has been reaped and doesn't have
        # licenses.
        assert reaper_cr.identifier not in items
        # But it picks up anything that hasn't been covered at all and anything
        # that's been licensed anew even if its already been reaped.
        eq_(2, len(items))
        assert relicensed_lp.identifier in items
        assert cr.identifier in items
        # The Wrangler Reaper coverage record is removed from the db
        # when it's committed.
        self._db.commit()
        eq_([], relicensed_lp.identifier.coverage_records)
    def test_run_once(self):
        # Setup authentication and Metadata Wrangler details.
        lp = self._licensepool(
            None, data_source_name=DataSource.BIBLIOTHECA,
            collection=self.collection
        )
        lp.identifier.type = Identifier.BIBLIOTHECA_ID
        isbn = Identifier.parse_urn(self._db, u'urn:isbn:9781594632556')[0]
        lp.identifier.equivalent_to(
            DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1
        )
        eq_([], lp.identifier.links)
        eq_([], lp.identifier.measurements)

        # Queue some data to be found.
        responses = (
            'metadata_updates_response.opds',
            'metadata_updates_empty_response.opds',
        )
        for filename in responses:
            data = sample_data(filename, 'opds')
            self.lookup.queue_response(
                200, {'content-type' : OPDSFeed.ACQUISITION_FEED_TYPE}, data
            )

        timestamp = self.ts
        new_timestamp = self.monitor.run_once(timestamp)

        # We have a new value to use for the Monitor's timestamp -- the
        # earliest date seen in the last OPDS feed that contained
        # any entries.
        eq_(datetime.datetime(2016, 9, 20, 19, 37, 2), new_timestamp.finish)
        eq_("Editions processed: 1", new_timestamp.achievements)

        # Normally run_once() doesn't update the monitor's timestamp,
        # but this implementation does, so that work isn't redone if
        # run_once() crashes or the monitor is killed.
        eq_(new_timestamp.finish, self.monitor.timestamp().finish)

        # The original Identifier has information from the
        # mock Metadata Wrangler.
        mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        eq_(3, len(lp.identifier.links))
        [quality] = lp.identifier.measurements
        eq_(mw_source, quality.data_source)

        # Check the URLs we processed.
        url1, url2 = [x[0] for x in self.lookup.requests]

        # The first URL processed was the default one for the
        # MetadataWranglerOPDSLookup.
        eq_(self.lookup.get_collection_url(self.lookup.UPDATES_ENDPOINT), url1)

        # The second URL processed was whatever we saw in the 'next' link.
        eq_("http://next-link/", url2)
    def test_load_cover_link(self):
        # Create a directory import script with an empty mock filesystem.
        script = MockDirectoryImportScript(self._db, {})

        identifier = self._identifier(Identifier.GUTENBERG_ID, "2345")
        gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
        mirror = MockS3Uploader()
        args = (identifier, gutenberg, "covers", mirror)

        # There is nothing on the mock filesystem, so in this case
        # load_cover_link returns None.
        eq_(None, script.load_cover_link(*args))

        # But we tried.
        eq_(
            ('2345', 'covers', Representation.COMMON_IMAGE_EXTENSIONS,
             'cover image'),
            script._locate_file_args
        )

        # Try another script that has a populated mock filesystem.
        mock_filesystem = {
            'covers' : (
                'acover.jpeg', Representation.JPEG_MEDIA_TYPE, "I'm an image."
            )
        }
        script = MockDirectoryImportScript(self._db, mock_filesystem)
        link = script.load_cover_link(*args)
        eq_(Hyperlink.IMAGE, link.rel)
        assert link.href.endswith(
            '/test.cover.bucket/Gutenberg/Gutenberg+ID/2345/2345.jpg'
        )
        eq_(Representation.JPEG_MEDIA_TYPE, link.media_type)
        eq_("I'm an image.", link.content)
 def __init__(self, _db, data_directory):
     self._db = _db
     self.collection = Collection.by_protocol(self._db, ExternalIntegration.GUTENBERG).one()
     self.source = DataSource.lookup(self._db, DataSource.GUTENBERG)
     self.data_directory = data_directory
     self.catalog_path = os.path.join(self.data_directory, self.FILENAME)
     self.log = logging.getLogger("Gutenberg API")
Example #7
0
    def __init__(self, _db, lookup=None, input_identifier_types=None, 
                 operation=None, **kwargs):
        if not input_identifier_types:
            input_identifier_types = [
                Identifier.OVERDRIVE_ID, 
                Identifier.THREEM_ID,
                Identifier.GUTENBERG_ID, 
                Identifier.AXIS_360_ID,
            ]
        output_source = DataSource.lookup(
            _db, DataSource.METADATA_WRANGLER
        )
        super(MetadataWranglerCoverageProvider, self).__init__(
            lookup = lookup or SimplifiedOPDSLookup.from_config(),
            service_name=self.SERVICE_NAME,
            input_identifier_types=input_identifier_types,
            output_source=output_source,
            operation=operation or self.OPERATION,
            **kwargs
        )

        if not self.lookup.authenticated:
            self.log.warn(
                "Authentication for the Library Simplified Metadata Wrangler "
                "is not set up. You can still use the metadata wrangler, but "
                "it will not know which collection you're asking about."
            )
    def metadata_needed_for(self, collection_details):
        """Returns identifiers in the collection that could benefit from
        distributor metadata on the circulation manager.
        """
        client = authenticated_client_from_request(self._db)
        if isinstance(client, ProblemDetail):
            return client

        collection = collection_from_details(
            self._db, client, collection_details
        )

        resolver = IdentifierResolutionCoverageProvider
        unresolved_identifiers = collection.unresolved_catalog(
            self._db, resolver.DATA_SOURCE_NAME, resolver.OPERATION
        )

        # Omit identifiers that currently have metadata pending for
        # the IntegrationClientCoverImageCoverageProvider.
        data_source = DataSource.lookup(
            self._db, collection.name, autocreate=True
        )
        is_awaiting_metadata = self._db.query(
            CoverageRecord.id, CoverageRecord.identifier_id
        ).filter(
            CoverageRecord.data_source_id==data_source.id,
            CoverageRecord.status==CoverageRecord.REGISTERED,
            CoverageRecord.operation==IntegrationClientCoverImageCoverageProvider.OPERATION,
        ).subquery()

        unresolved_identifiers = unresolved_identifiers.outerjoin(
            is_awaiting_metadata,
            Identifier.id==is_awaiting_metadata.c.identifier_id
        ).filter(is_awaiting_metadata.c.id==None)

        # Add a message for each unresolved identifier
        pagination = load_pagination_from_request(default_size=25)
        feed_identifiers = pagination.apply(unresolved_identifiers).all()
        messages = list()
        for identifier in feed_identifiers:
            messages.append(OPDSMessage(
                identifier.urn, HTTP_ACCEPTED, "Metadata needed."
            ))

        title = "%s Metadata Requests for %s" % (collection.protocol, client.url)
        metadata_request_url = self.collection_feed_url(
            'metadata_needed_for', collection
        )

        request_feed = AcquisitionFeed(
            self._db, title, metadata_request_url, [], VerboseAnnotator,
            precomposed_entries=messages
        )

        self.add_pagination_links_to_feed(
            pagination, unresolved_identifiers, request_feed,
            'metadata_needed_for', collection
        )

        return feed_response(request_feed)
    def test_handle_import_messages(self):
        data_source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        provider = OPDSImportCoverageProvider("name", [], data_source)

        message = StatusMessage(201, "try again later")
        message2 = StatusMessage(404, "we're doomed")
        message3 = StatusMessage(200, "everything's fine")

        identifier = self._identifier()
        identifier2 = self._identifier()
        identifier3 = self._identifier()

        messages_by_id = { identifier.urn: message,
                           identifier2.urn: message2,
                           identifier3.urn: message3,
        }

        [f1, f2] = sorted(list(provider.handle_import_messages(messages_by_id)),
                          key=lambda x: x.exception)
        eq_(identifier, f1.obj)
        eq_("201: try again later", f1.exception)
        eq_(True, f1.transient)

        eq_(identifier2, f2.obj)
        eq_("404: we're doomed", f2.exception)
        eq_(False, f2.transient)
Example #10
0
 def _provider(self, presentation_ready_on_success=True):
     """Create a generic MockOPDSImportCoverageProvider for testing purposes."""
     source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER)
     return MockOPDSImportCoverageProvider(
         "mock provider", [], source,
         presentation_ready_on_success=presentation_ready_on_success
     )
Example #11
0
    def test_items_that_need_coverage_respects_cutoff(self):
        """Verify that this coverage provider respects the cutoff_time
        argument.
        """

        source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        edition = self._edition()
        cr = self._coverage_record(edition, source, operation='sync')

        # We have a coverage record already, so this book doesn't show
        # up in items_that_need_coverage
        items = self.provider.items_that_need_coverage().all()
        eq_([], items)

        # But if we send a cutoff_time that's later than the time
        # associated with the coverage record...
        one_hour_from_now = (
            datetime.datetime.utcnow() + datetime.timedelta(seconds=3600)
        )
        provider_with_cutoff = self.create_provider(
            cutoff_time=one_hour_from_now
        )

        # The book starts showing up in items_that_need_coverage.
        eq_([edition.primary_identifier], 
            provider_with_cutoff.items_that_need_coverage().all())
Example #12
0
    def test_finalize_edition(self):

        provider_no_presentation_ready = self._provider(presentation_ready_on_success=False)
        provider_presentation_ready = self._provider(presentation_ready_on_success=True)
        identifier = self._identifier()
        source = DataSource.lookup(self._db, DataSource.GUTENBERG)

        # Here's an Edition with no LicensePool.
        edition, is_new = Edition.for_foreign_id(
            self._db, source, identifier.type, identifier.identifier
        )
        edition.title = self._str

        # This will effectively do nothing.
        provider_no_presentation_ready.finalize_edition(edition)

        # No Works have been created.
        eq_(0, self._db.query(Work).count())

        # But if there's also a LicensePool...
        pool, is_new = LicensePool.for_foreign_id(
            self._db, source, identifier.type, identifier.identifier
        )

        # finalize_edition() will create a Work.
        provider_no_presentation_ready.finalize_edition(edition)

        work = pool.work
        eq_(work, edition.work)
        eq_(False, work.presentation_ready)

        # If the provider is configured to do so, finalize_edition()
        # will also set the Work as presentation-ready.
        provider_presentation_ready.finalize_edition(edition)
        eq_(True, work.presentation_ready)
Example #13
0
    def __init__(self, _db, input_identifier_types=None, metadata_lookup=None,
                 cutoff_time=None, operation=None):
        self._db = _db
        if not input_identifier_types:
            input_identifier_types = [
                Identifier.OVERDRIVE_ID, 
                Identifier.THREEM_ID,
                Identifier.GUTENBERG_ID, 
                Identifier.AXIS_360_ID,
            ]
        self.output_source = DataSource.lookup(
            self._db, DataSource.METADATA_WRANGLER
        )

        if not metadata_lookup:
            metadata_lookup = SimplifiedOPDSLookup.from_config()
        self.lookup = metadata_lookup

        if not operation:
            operation = CoverageRecord.SYNC_OPERATION
        self.operation = operation

        super(MetadataWranglerCoverageProvider, self).__init__(
            self.service_name,
            input_identifier_types,
            self.output_source,
            workset_size=20,
            cutoff_time=cutoff_time,
            operation=self.operation,
        )
Example #14
0
    def generate_mock_api(self):
        """Prep an empty NoveList result."""
        source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        metadata = Metadata(source)

        mock_api = MockNoveListAPI(self._db)
        mock_api.setup(metadata)
        return mock_api
 def setup(self):
     super(TestMetadataWranglerCollectionReaper, self).setup()
     self.source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
     with temp_config() as config:
         config[Configuration.INTEGRATIONS][Configuration.METADATA_WRANGLER_INTEGRATION] = {
             Configuration.URL : "http://url.gov"
         }
         self.reaper = MetadataWranglerCollectionReaper(self._db)
Example #16
0
 def __init__(self, _db, testing=False, api=None):
     super(ThreeMCirculationSweep, self).__init__(
         _db, "3M Circulation Sweep", batch_size=25)
     self._db = _db
     if not api:
         api = ThreeMAPI(self._db, testing=testing)
     self.api = api
     self.data_source = DataSource.lookup(self._db, DataSource.THREEM)
Example #17
0
    def __init__(self, db, mirrors, uploader=None):
        self._db = db
        self.data_source_ids = []
        self.uploader = uploader or S3Uploader()
        self.log = logging.getLogger("Cover Image Scaler")

        for mirror in mirrors:
            data_source_name = mirror.DATA_SOURCE
            data_source = DataSource.lookup(self._db, data_source_name)
            self.data_source_ids.append(data_source.id)
 def __init__(self, _db, authenticator, node_value,
              temporary_token_duration=None):
     self._db = _db
     self.authenticator = authenticator
     self.data_source = DataSource.lookup(_db, DataSource.ADOBE)
     self.temporary_token_duration = (
         temporary_token_duration or datetime.timedelta(minutes=10))
     if isinstance(node_value, basestring):
         node_value = int(node_value, 16)
     self.node_value = node_value
    def setup(self):
        super(TestFeedbooksOPDSImporter, self).setup()
        self.http = DummyHTTPClient()
        self.metadata = DummyMetadataClient()
        self.mirror = MockS3Uploader()

        self.data_source = DataSource.lookup(self._db, DataSource.FEEDBOOKS)

        # Create a default importer that's good enough for most tests.
        self.collection, self.importer = self._importer()
    def test_lookup_equivalent_isbns(self):
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)
        api = MockNoveListAPI.from_config(self._default_library)

        # If there are no ISBN equivalents, it returns None.
        eq_(None, api.lookup_equivalent_isbns(identifier))

        source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        identifier.equivalent_to(source, self._identifier(), strength=1)
        self._db.commit()
        eq_(None, api.lookup_equivalent_isbns(identifier))

        # If there's an ISBN equivalent, but it doesn't result in metadata,
        # it returns none.
        isbn = self._identifier(identifier_type=Identifier.ISBN)
        identifier.equivalent_to(source, isbn, strength=1)
        self._db.commit()
        api.responses.append(None)
        eq_(None, api.lookup_equivalent_isbns(identifier))

        # Create an API class that can mockout NoveListAPI.choose_best_metadata
        class MockBestMetadataAPI(MockNoveListAPI):
            choose_best_metadata_return = None
            def choose_best_metadata(self, *args, **kwargs):
                return self.choose_best_metadata_return
        api = MockBestMetadataAPI.from_config(self._default_library)

        # Give the identifier another ISBN equivalent.
        isbn2 = self._identifier(identifier_type=Identifier.ISBN)
        identifier.equivalent_to(source, isbn2, strength=1)
        self._db.commit()

        # Queue metadata responses for each ISBN lookup.
        metadatas = [object(), object()]
        api.responses.extend(metadatas)

        # If choose_best_metadata returns None, the lookup returns None.
        api.choose_best_metadata_return = (None, None)
        eq_(None, api.lookup_equivalent_isbns(identifier))

        # Lookup was performed for both ISBNs.
        eq_([], api.responses)

        # If choose_best_metadata returns a low confidence metadata, the
        # lookup returns None.
        api.responses.extend(metadatas)
        api.choose_best_metadata_return = (metadatas[0], 0.33)
        eq_(None, api.lookup_equivalent_isbns(identifier))

        # If choose_best_metadata returns a high confidence metadata, the
        # lookup returns the metadata.
        api.responses.extend(metadatas)
        api.choose_best_metadata_return = (metadatas[1], 0.67)
        eq_(metadatas[1], api.lookup_equivalent_isbns(identifier))
    def test_facets(self):
        # Normally we yield one FeaturedFacets object for each of the
        # library's enabled entry points.
        library = self._default_library
        script = CacheOPDSGroupFeedPerLane(
            self._db, manager=object(), cmd_args=[]
        )
        setting = library.setting(EntryPoint.ENABLED_SETTING)
        setting.value = json.dumps(
            [AudiobooksEntryPoint.INTERNAL_NAME,
             EbooksEntryPoint.INTERNAL_NAME]
        )

        lane = self._lane()
        audio_facets, ebook_facets = script.facets(lane)
        eq_(AudiobooksEntryPoint, audio_facets.entrypoint)
        eq_(EbooksEntryPoint, ebook_facets.entrypoint)

        # The first entry point in the library's list of enabled entry
        # points is treated as the default.
        eq_(True, audio_facets.entrypoint_is_default)
        eq_(audio_facets.entrypoint, list(library.entrypoints)[0])
        eq_(False, ebook_facets.entrypoint_is_default)

        for facets in (audio_facets, ebook_facets):
            # The FeaturedFacets objects knows to feature works at the
            # library's minimum quality level.
            eq_(library.minimum_featured_quality,
                facets.minimum_featured_quality)
            # The FeaturedFacets object knows that custom lists are
            # not in play.
            eq_(False, facets.uses_customlists)

        # The first entry point is treated as the default only for WorkLists
        # that have no parent. When the WorkList has a parent, the selected
        # entry point is treated as an explicit choice  -- navigating downward
        # in the lane hierarchy ratifies the default value.
        sublane = self._lane(parent=lane)
        f1, f2 = script.facets(sublane)
        for f in f1, f2:
            eq_(False, f.entrypoint_is_default)

        # Make it look like the lane uses custom lists.
        lane.list_datasource = DataSource.lookup(self._db, DataSource.OVERDRIVE)

        # If the library has no enabled entry points, we yield one
        # FeaturedFacets object with no particular entry point.
        setting.value = json.dumps([])
        no_entry_point, = script.facets(lane)
        eq_(None, no_entry_point.entrypoint)

        # The FeaturedFacets object knows that custom lists are in
        # play.
        eq_(True, no_entry_point.uses_customlists)
Example #22
0
    def test_feed_includes_staff_rating(self):
        work = self._work(with_open_access_download=True)
        lp = work.license_pools[0]
        staff_data_source = DataSource.lookup(self._db, DataSource.LIBRARY_STAFF)
        lp.identifier.add_measurement(staff_data_source, Measurement.RATING, 3, weight=1000)

        feed = AcquisitionFeed(self._db, "test", "url", [work], AdminAnnotator(None, self._default_library, test_mode=True))
        [entry] = feedparser.parse(unicode(feed))['entries']
        rating = entry['schema_rating']
        eq_(3, float(rating['schema:ratingvalue']))
        eq_(Measurement.RATING, rating['additionaltype'])
Example #23
0
    def __init__(self, _db, api=None, **kwargs):
        input_identifier_types = [
            Identifier.GUTENBERG_ID, Identifier.URI
        ]
        output_source = DataSource.lookup(_db, DataSource.OCLC)
        super(OCLCClassifyCoverageProvider, self).__init__(
            "OCLC Classify Coverage Provider", input_identifier_types,
            output_source)

        self._db = _db
        self.api = api or OCLCClassifyAPI(self._db)
Example #24
0
 def setup(self):
     super(TestVendorIDModel, self).setup()
     self.authenticator = DummyMilleniumPatronAPI()
     self.model = AdobeVendorIDModel(self._db, self.authenticator,
                                     self.TEST_NODE_VALUE)
     self.data_source = DataSource.lookup(self._db, DataSource.ADOBE)
     # Normally this test patron doesn't have an authorization identifier.
     # Let's make sure there is one so it'll show up as the label.
     self.bob_patron = self.authenticator.authenticated_patron(
         self._db, dict(username="******", password="******"))
     self.bob_patron.authorization_identifier = "5"
Example #25
0
 def process_item(self, identifier):
     data_source = DataSource.lookup(
         self._db, self.importer.data_source_name
     )
     try:
         response = self.content_server.lookup([identifier])
     except BadResponseException, e:
         return CoverageFailure(
             identifier,
             e.message,
             data_source
         )
 def setup(self):
     super(TestMetadataUploadCoverageProvider, self).setup()
     self.integration = self._external_integration(
         ExternalIntegration.METADATA_WRANGLER,
         goal=ExternalIntegration.METADATA_GOAL, url=self._url,
         username=u'abc', password=u'def'
     )
     self.source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
     self.collection = self._collection(
         protocol=ExternalIntegration.BIBLIOTHECA, external_account_id=u'lib'
     )
     self.provider = self.create_provider()
Example #27
0
    def test_new_isbns(self):
        existing_id = self._identifier()
        metadata = Metadata(
            DataSource.lookup(self._db, DataSource.GUTENBERG),
            identifiers=[
                IdentifierData(type=Identifier.OCLC_WORK, identifier="abra"),
                IdentifierData(type=existing_id.type, identifier=existing_id.identifier),
                IdentifierData(type=Identifier.ISBN, identifier="kadabra"),
            ]
        )

        eq_(2, self.provider.new_isbns(metadata))
    def test_load_circulation_data(self):
        # Create a directory import script with an empty mock filesystem.
        script = MockDirectoryImportScript(self._db, {})

        identifier = self._identifier(Identifier.GUTENBERG_ID, "2345")
        gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
        mirror = MockS3Uploader()
        args = (identifier, gutenberg, "ebooks", mirror, "Name of book",
                "rights URI")

        # There is nothing on the mock filesystem, so in this case
        # load_circulation_data returns None.
        eq_(None, script.load_circulation_data(*args))

        # But we tried.
        eq_(
            ('2345', 'ebooks', Representation.COMMON_EBOOK_EXTENSIONS,
             'ebook file'),
            script._locate_file_args
        )

        # Try another script that has a populated mock filesystem.
        mock_filesystem = {
            'ebooks' : (
                'book.epub', Representation.EPUB_MEDIA_TYPE, "I'm an EPUB."
            )
        }
        script = MockDirectoryImportScript(self._db, mock_filesystem)

        # Now _locate_file finds something on the mock filesystem, and
        # load_circulation_data loads it into a fully populated
        # CirculationData object.
        circulation = script.load_circulation_data(*args)
        eq_(identifier, circulation.primary_identifier(self._db))
        eq_(gutenberg, circulation.data_source(self._db))
        eq_("rights URI", circulation.default_rights_uri)

        # The CirculationData has an open-access link associated with it.
        [link] = circulation.links
        eq_(Hyperlink.OPEN_ACCESS_DOWNLOAD, link.rel)
        assert link.href.endswith(
            '/test.content.bucket/Gutenberg/Gutenberg+ID/2345/Name+of+book.epub'
        )
        eq_(Representation.EPUB_MEDIA_TYPE, link.media_type)
        eq_("I'm an EPUB.", link.content)

        # This open-access link will be made available through a
        # delivery mechanism described by this FormatData.
        [format] = circulation.formats
        eq_(link, format.link)
        eq_(link.media_type, format.content_type)
        eq_(DeliveryMechanism.NO_DRM, format.drm_scheme)
Example #29
0
    def test_related_books(self):
        # A book with no related books returns a ProblemDetail.
        with temp_config() as config:
            config['integrations'][Configuration.NOVELIST_INTEGRATION] = {}
            with self.app.test_request_context('/'):
                response = self.manager.work_controller.related(
                    self.datasource, self.identifier.type, self.identifier.identifier
                )
        eq_(404, response.status_code)
        eq_("http://librarysimplified.org/terms/problem/unknown-lane", response.uri)

        # Prep book with a book in its series and a recommendation.
        self.lp.presentation_edition.series = "Around the World"
        self.french_1.presentation_edition.series = "Around the World"
        SessionManager.refresh_materialized_views(self._db)

        source = DataSource.lookup(self._db, self.datasource)
        metadata = Metadata(source)
        mock_api = MockNoveListAPI()
        metadata.recommendations = [self.english_2.license_pools[0].identifier]
        mock_api.setup(metadata)

        # A grouped feed is returned with both of these related books
        with self.app.test_request_context('/'):
            response = self.manager.work_controller.related(
                self.datasource, self.identifier.type, self.identifier.identifier,
                novelist_api=mock_api
            )
        eq_(200, response.status_code)
        feed = feedparser.parse(response.data)
        eq_(3, len(feed['entries']))

        # One book is in the recommendations feed.
        [e1] = [e for e in feed['entries'] if e['title'] == self.english_2.title]
        [collection_link] = [link for link in e1['links'] if link['rel']=='collection']
        eq_("Recommended Books", collection_link['title'])
        work_url = "/works/%s/%s/%s/" % (self.datasource, self.identifier.type, self.identifier.identifier)
        expected = urllib.quote(work_url + 'recommendations')
        eq_(True, collection_link['href'].endswith(expected))

        # Two books are in the series feed. The original work and its companion
        [e2] = [e for e in feed['entries'] if e['title'] == self.french_1.title]
        [collection_link] = [link for link in e2['links'] if link['rel']=='collection']
        eq_("Around the World", collection_link['title'])
        expected = urllib.quote(work_url + 'series')
        eq_(True, collection_link['href'].endswith(expected))

        [e3] = [e for e in feed['entries'] if e['title'] == self.english_1.title]
        [collection_link] = [link for link in e3['links'] if link['rel']=='collection']
        eq_("Around the World", collection_link['title'])
        expected = urllib.quote(work_url + 'series')
        eq_(True, collection_link['href'].endswith(expected))
Example #30
0
 def __init__(self, _db, api=None, viaf_api=None):
     self._db = _db
     self.api = api or OCLCLinkedData(self._db)
     self.viaf = viaf_api or VIAFClient(self._db)
     output_source = DataSource.lookup(_db, DataSource.OCLC_LINKED_DATA)
     input_identifier_types = [
         Identifier.OCLC_WORK, Identifier.OCLC_NUMBER,
         Identifier.OVERDRIVE_ID, Identifier.THREEM_ID
     ]
     super(LinkedDataCoverageProvider, self).__init__(
         "OCLC Linked Data Coverage Provider", input_identifier_types,
         output_source, batch_size=10
     )
Example #31
0
    def opds_feed_identifiers(self):
        """Creates three Identifiers to use for testing with a sample OPDS file."""

        # Straightforward identifier that's represented in the OPDS response.
        valid_id = self._identifier(foreign_id=u'2020110')

        # Mapped identifier.
        source = DataSource.lookup(self._db, DataSource.AXIS_360)
        mapped_id = self._identifier(identifier_type=Identifier.AXIS_360_ID,
                                     foreign_id=u'0015187876')
        equivalent_id = self._identifier(identifier_type=Identifier.ISBN,
                                         foreign_id=self._isbn)
        mapped_id.equivalent_to(source, equivalent_id, 1)

        # An identifier that's not represented in the OPDS response.
        lost_id = self._identifier()

        return valid_id, mapped_id, lost_id
    def test_target_age_errs_towards_wider_span(self):
        i = self._identifier()
        source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        c1 = i.classify(source, Subject.AGE_RANGE, "8-9", weight=1)
        c2 = i.classify(source, Subject.AGE_RANGE, "6-7", weight=1)

        overdrive_edition, lp = self._edition(
            data_source_name=source.name,
            with_license_pool=True,
            identifier_id=i.identifier,
        )
        self.classifier.work = self._work(presentation_edition=overdrive_edition)
        for classification in i.classifications:
            self.classifier.add(classification)
        genres, fiction, audience, target_age = self.classifier.classify()

        assert Classifier.AUDIENCE_CHILDREN == audience
        assert (6, 9) == target_age
Example #33
0
    def test_mirror_open_access_link_fetch_failure(self):
        mirrors = dict(books_mirror=MockS3Uploader())
        h = DummyHTTPClient()

        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        policy = ReplacementPolicy(mirrors=mirrors, http_get=h.do_get)
        circulation_data = CirculationData(
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            media_type=link.media_type,
            content=link.content,
        )

        h.queue_response(403)

        circulation_data.mirror_link(pool, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # Fetch failed, so we should have a fetch exception but no mirror url.
        assert representation.fetch_exception != None
        assert None == representation.mirror_exception
        assert None == representation.mirror_url
        assert link.href == representation.url
        assert representation.fetched_at != None
        assert None == representation.mirrored_at

        # The license pool is suppressed when fetch fails.
        assert True == pool.suppressed
        assert representation.fetch_exception in pool.license_exception
Example #34
0
    def test_isbn_covers_are_imported_from_mapped_identifiers(self):
        # Now that we pass ISBN equivalents instead of Bibliotheca identifiers
        # to the Metadata Wrangler, they're not getting covers. Let's confirm
        # that the problem isn't on the Circulation Manager import side of things.

        # Create a Bibliotheca identifier with a license pool.
        source = DataSource.lookup(self._db, DataSource.BIBLIOTHECA)
        identifier = self._identifier(
            identifier_type=Identifier.BIBLIOTHECA_ID)
        LicensePool.for_foreign_id(self._db,
                                   source,
                                   identifier.type,
                                   identifier.identifier,
                                   collection=self.provider.collection)

        # Create an ISBN and set it equivalent.
        isbn = self._identifier(identifier_type=Identifier.ISBN)
        isbn.identifier = '9781594632556'
        identifier.equivalent_to(source, isbn, 1)

        opds = sample_data('metadata_isbn_response.opds', 'opds')
        self.provider.lookup_client.queue_response(
            200, {
                'content-type':
                'application/atom+xml;profile=opds-catalog;kind=acquisition'
            }, opds)

        result = self.provider.process_item(identifier)
        # The lookup is successful
        eq_(result, identifier)
        # The appropriate cover links are transferred.
        identifier_uris = [
            l.resource.url for l in identifier.links
            if l.rel in [Hyperlink.IMAGE, Hyperlink.THUMBNAIL_IMAGE]
        ]
        expected = [
            'http://book-covers.nypl.org/Content%20Cafe/ISBN/9781594632556/cover.jpg',
            'http://book-covers.nypl.org/scaled/300/Content%20Cafe/ISBN/9781594632556/cover.jpg'
        ]

        eq_(sorted(identifier_uris), sorted(expected))

        # The ISBN doesn't get any information.
        eq_(isbn.links, [])
    def test_process_patron(self):
        patron = self._patron()

        # This patron has old-style and new-style Credentials that link
        # them to Adobe account IDs (hopefully the same ID, though that
        # doesn't matter here.
        def set_value(credential):
            credential.value = "a credential"

        # Data source doesn't matter -- even if it's incorrect, a Credential
        # of the appropriate type will be deleted.
        data_source = DataSource.lookup(self._db, DataSource.OVERDRIVE)

        # Create two Credentials that will be deleted and one that will be
        # left alone.
        for type in (AdobeVendorIDModel.VENDOR_ID_UUID_TOKEN_TYPE,
                     AuthdataUtility.ADOBE_ACCOUNT_ID_PATRON_IDENTIFIER,
                     "Some other type"
        ):

            credential = Credential.lookup(
                self._db, data_source, type, patron,
                set_value, True
            )

        eq_(3, len(patron.credentials))

        # Run the patron through the script.
        script = AdobeAccountIDResetScript(self._db)

        # A dry run does nothing.
        script.delete = False
        script.process_patron(patron)
        self._db.commit()
        eq_(3, len(patron.credentials))

        # Now try it for real.
        script.delete = True
        script.process_patron(patron)
        self._db.commit()

        # The two Adobe-related credentials are gone. The other one remains.
        [credential] = patron.credentials
        eq_("Some other type", credential.type)
    def test_collect_event_without_work(self):
        integration, ignore = create(
            self._db,
            ExternalIntegration,
            goal=ExternalIntegration.ANALYTICS_GOAL,
            protocol="api.google_analytics_provider",
        )
        integration.url = self._str
        ConfigurationSetting.for_library_and_externalintegration(
            self._db, GoogleAnalyticsProvider.TRACKING_ID,
            self._default_library, integration).value = "faketrackingid"
        ga = MockGoogleAnalyticsProvider(integration, self._default_library)

        identifier = self._identifier()
        source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        pool, is_new = get_one_or_create(self._db,
                                         LicensePool,
                                         identifier=identifier,
                                         data_source=source,
                                         collection=self._default_collection)

        now = datetime.datetime.utcnow()
        ga.collect_event(self._default_library, pool,
                         CirculationEvent.DISTRIBUTOR_CHECKIN, now)
        params = urlparse.parse_qs(ga.params)

        eq_(1, ga.count)
        eq_(integration.url, ga.url)
        eq_("faketrackingid", params['tid'][0])
        eq_("event", params['t'][0])
        eq_("circulation", params['ec'][0])
        eq_(CirculationEvent.DISTRIBUTOR_CHECKIN, params['ea'][0])
        eq_(str(now), params['cd1'][0])
        eq_(pool.identifier.identifier, params['cd2'][0])
        eq_(pool.identifier.type, params['cd3'][0])
        eq_(None, params.get('cd4'))
        eq_(None, params.get('cd5'))
        eq_(None, params.get('cd6'))
        eq_(None, params.get('cd7'))
        eq_(None, params.get('cd8'))
        eq_(None, params.get('cd9'))
        eq_(None, params.get('cd10'))
        eq_(None, params.get('cd11'))
        eq_(None, params.get('cd12'))
Example #37
0
 def __init__(self, _db, service_name=None, lookup=None, **kwargs):
     service_name = service_name or self.DEFAULT_SERVICE_NAME
     if not lookup:
         content_server_url = (
             Configuration.integration_url(
                 Configuration.CONTENT_SERVER_INTEGRATION
             )
         )
         lookup = SimplifiedOPDSLookup(content_server_url)
     output_source = DataSource.lookup(
         _db, DataSource.OA_CONTENT_SERVER
     )
     kwargs['input_identifier_types'] = None
     super(ContentServerBibliographicCoverageProvider, self).__init__(
         service_name,
         output_source=output_source, lookup=lookup,
         expect_license_pool=True, presentation_ready_on_success=True,
         **kwargs
     )
Example #38
0
    def test_feed_includes_staff_rating(self):
        work = self._work(with_open_access_download=True)
        lp = work.license_pools[0]
        staff_data_source = DataSource.lookup(self._db, DataSource.LIBRARY_STAFF)
        lp.identifier.add_measurement(
            staff_data_source, Measurement.RATING, 3, weight=1000
        )

        feed = AcquisitionFeed(
            self._db,
            "test",
            "url",
            [work],
            AdminAnnotator(None, self._default_library, test_mode=True),
        )
        [entry] = feedparser.parse(str(feed))["entries"]
        rating = entry["schema_rating"]
        assert 3 == float(rating["schema:ratingvalue"])
        assert Measurement.RATING == rating["additionaltype"]
Example #39
0
    def opds_feed_identifiers(self):
        """Creates three Identifiers to use for testing with sample OPDS files."""

        # An identifier directly represented in the OPDS response.
        valid_id = self._identifier(foreign_id=u'2020110')

        # An identifier mapped to an identifier represented in the OPDS
        # response.
        source = DataSource.lookup(self._db, DataSource.AXIS_360)
        mapped_id = self._identifier(identifier_type=Identifier.AXIS_360_ID,
                                     foreign_id=u'0015187876')
        equivalent_id = self._identifier(identifier_type=Identifier.ISBN,
                                         foreign_id='9781936460236')
        mapped_id.equivalent_to(source, equivalent_id, 1)

        # An identifier that's not represented in the OPDS response.
        lost_id = self._identifier()

        return valid_id, mapped_id, lost_id
Example #40
0
    def run_once(self, start, cutoff):
        _db = self._db
        added_books = 0
        overdrive_data_source = DataSource.lookup(_db, DataSource.OVERDRIVE)

        total_books = 0
        consecutive_unchanged_books = 0
        for i, book in enumerate(self.recently_changed_ids(start, cutoff)):
            total_books += 1
            if not total_books % 100:
                self.log.info("%s books processed", total_books)
            if not book:
                continue
            license_pool, is_new, is_changed = self.api.update_licensepool(
                book)
            # Log a circulation event for this work.
            if is_new:
                for library in self.collection.libraries:
                    self.analytics.collect_event(
                        library, license_pool,
                        CirculationEvent.DISTRIBUTOR_TITLE_ADD,
                        license_pool.last_checked)

            _db.commit()

            if is_changed:
                consecutive_unchanged_books = 0
            else:
                consecutive_unchanged_books += 1
                if (self.maximum_consecutive_unchanged_books
                        and consecutive_unchanged_books >=
                        self.maximum_consecutive_unchanged_books):
                    # We're supposed to stop this run after finding a
                    # run of books that have not changed, and we have
                    # in fact seen that many consecutive unchanged
                    # books.
                    self.log.info("Stopping at %d unchanged books.",
                                  consecutive_unchanged_books)
                    break

        if total_books:
            self.log.info("Processed %d books total.", total_books)
Example #41
0
    def test_smuggled_authdata_credential_success(self):
        # Bob's client has created a persistent token to authenticate him.
        now = datetime.datetime.utcnow()
        token, ignore = Credential.persistent_token_create(
            self._db, self.data_source, self.model.AUTHDATA_TOKEN_TYPE,
            self.bob_patron
        )

        # But Bob's client can't trigger the operation that will cause
        # Adobe to authenticate him via that token, so it passes in
        # the token credential as the 'username' and leaves the
        # password blank.
        urn, label = self.model.standard_lookup(
            dict(username=token.credential)
        )

        # There is now an anonymized identifier associated with Bob's
        # patron account.
        internal = DataSource.lookup(self._db, DataSource.INTERNAL_PROCESSING)
        bob_anonymized_identifier = Credential.lookup(
            self._db, internal,
            AuthdataUtility.ADOBE_ACCOUNT_ID_PATRON_IDENTIFIER,
            self.bob_patron, None
        )

        # That anonymized identifier is associated with a
        # DelegatedPatronIdentifier whose delegated_identifier is a
        # UUID.
        [bob_delegated_patron_identifier] = self._db.query(
            DelegatedPatronIdentifier).filter(
                DelegatedPatronIdentifier.patron_identifier
                ==bob_anonymized_identifier.credential
            ).all()

        # That UUID is the one returned by standard_lookup.
        eq_(urn, bob_delegated_patron_identifier.delegated_identifier)

        # A future attempt to authenticate with the token will succeed.
        urn, label = self.model.standard_lookup(
            dict(username=token.credential)
        )
        eq_(urn, bob_delegated_patron_identifier.delegated_identifier)
Example #42
0
    def test_checkout(self):
        patron = self._patron()

        data_source = DataSource.lookup(self._db,
                                        "Biblioboard",
                                        autocreate=True)
        edition, pool = self._edition(
            identifier_type=Identifier.URI,
            data_source_name=data_source.name,
            with_license_pool=True,
            collection=self.collection,
        )

        loan_info = self.api.checkout(patron, "1234", pool,
                                      Representation.EPUB_MEDIA_TYPE)
        eq_(self.collection.id, loan_info.collection_id)
        eq_(data_source.name, loan_info.data_source_name)
        eq_(Identifier.URI, loan_info.identifier_type)
        eq_(pool.identifier.identifier, loan_info.identifier)
        eq_(None, loan_info.end_date)
Example #43
0
    def test_add_isbn(self):
        isbn = self._identifier(identifier_type=Identifier.ISBN)
        record = Record()
        Annotator.add_isbn(record, isbn)
        self._check_field(record, "020", {"a": isbn.identifier})

        # If the identifier isn't an ISBN, but has an equivalent that is, it still
        # works.
        equivalent = self._identifier()
        data_source = DataSource.lookup(self._db, DataSource.OCLC)
        equivalent.equivalent_to(data_source, isbn, 1)
        record = Record()
        Annotator.add_isbn(record, equivalent)
        self._check_field(record, "020", {"a": isbn.identifier})

        # If there is no ISBN, the field is left out.
        non_isbn = self._identifier()
        record = Record()
        Annotator.add_isbn(record, non_isbn)
        assert [] == record.get_fields("020")
Example #44
0
    def test_create_identifier_mapping(self):
        # Most identifiers map to themselves.
        overdrive = self._identifier(Identifier.OVERDRIVE_ID)

        # But Axis 360 and 3M identifiers map to equivalent ISBNs.
        axis = self._identifier(Identifier.AXIS_360_ID)
        threem = self._identifier(Identifier.THREEM_ID)
        isbn_axis = self._identifier(Identifier.ISBN)
        isbn_threem = self._identifier(Identifier.ISBN)

        who_says = DataSource.lookup(self._db, DataSource.AXIS_360)

        axis.equivalent_to(who_says, isbn_axis, 1)
        threem.equivalent_to(who_says, isbn_threem, 1)

        mapping = self.provider.create_identifier_mapping(
            [overdrive, axis, threem])
        eq_(overdrive, mapping[overdrive])
        eq_(axis, mapping[isbn_axis])
        eq_(threem, mapping[isbn_threem])
    def test_no_children_or_ya_signal_from_distributor_implies_book_is_for_adults(self):
        # Create some classifications that end up in
        # direct_from_license_source, but don't imply that the book is
        # from children or
        # YA. classifier.audience_weights[AUDIENCE_ADULT] will be set
        # to 500.
        i = self.identifier
        source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        for subject in ("Nonfiction", "Science Fiction", "History"):
            c = i.classify(source, Subject.OVERDRIVE, subject, weight=1000)
            self.classifier.add(c)

        # There's a little bit of evidence that it's a children's book,
        # but not enough to outweight the distributor's silence.
        c2 = self.identifier.classify(source, Subject.TAG, "Children's books", weight=1)
        self.classifier.add(c2)
        self.classifier.prepare_to_classify()
        # Overdrive classifications are regarded as 50 times more reliable
        # than their actual weight, as per Classification.scaled_weight
        assert 50000 == self.classifier.audience_weights[Classifier.AUDIENCE_ADULT]
Example #46
0
    def test_cover_image_root(
        self,
        name,
        bucket,
        data_source_name,
        expected_result,
        scaled_size=None,
        region=None,
    ):
        # Arrange
        uploader = self._create_s3_uploader(region=region)
        data_source = DataSource.lookup(self._db, data_source_name)

        # Act
        result = uploader.cover_image_root(bucket,
                                           data_source,
                                           scaled_size=scaled_size)

        # Assert
        assert result == expected_result
    def test_calculate_quality(self):
        w = self._work(with_open_access_download=True)

        # This book used to be incredibly popular.
        identifier = w.presentation_edition.primary_identifier
        old_popularity = identifier.add_measurement(self.source,
                                                    Measurement.POPULARITY,
                                                    6000)

        # Now it's just so-so.
        popularity = identifier.add_measurement(self.source,
                                                Measurement.POPULARITY, 59)

        # This measurement is irrelevant because "Test Data Source"
        # doesn't have a mapping from number of editions to a
        # percentile range.
        irrelevant = identifier.add_measurement(self.source,
                                                Measurement.PUBLISHED_EDITIONS,
                                                42)

        # If we calculate the quality based solely on the primary
        # identifier, only the most recent popularity is considered,
        # and the book ends up in the middle of the road in terms of
        # quality.
        w.calculate_quality([identifier.id])
        assert 0.5 == w.quality

        old_quality = w.quality

        # But let's say there's another identifier that's equivalent,
        # and it has a number of editions that was obtained from
        # OCLC Classify, which _does_ have a mapping from number
        # of editions to a percentile range.
        wi = self._identifier()
        oclc = DataSource.lookup(self._db, DataSource.OCLC)
        wi.add_measurement(oclc, Measurement.PUBLISHED_EDITIONS, 800)

        # Now the quality is higher--the large OCLC PUBLISHED_EDITIONS
        # measurement bumped it up.
        w.calculate_quality([identifier.id, wi.id])
        assert w.quality > old_quality
Example #48
0
    def test_patron_activity(self):
        # The patron has two loans from this API's collection and
        # one from a different collection.
        patron = self._patron()

        data_source = DataSource.lookup(self._db,
                                        "Biblioboard",
                                        autocreate=True)
        e1, p1 = self._edition(
            identifier_type=Identifier.URI,
            data_source_name=data_source.name,
            with_license_pool=True,
            collection=self.collection,
        )
        p1.loan_to(patron)

        e2, p2 = self._edition(
            identifier_type=Identifier.URI,
            data_source_name=data_source.name,
            with_license_pool=True,
            collection=self.collection,
        )
        p2.loan_to(patron)

        other_collection = self._collection(
            protocol=ExternalIntegration.OVERDRIVE)
        e3, p3 = self._edition(
            identifier_type=Identifier.OVERDRIVE_ID,
            data_source_name=DataSource.OVERDRIVE,
            with_license_pool=True,
            collection=other_collection,
        )
        p3.loan_to(patron)

        activity = self.api.patron_activity(patron, "1234")
        eq_(2, len(activity))
        [l1, l2] = activity
        eq_(l1.collection_id, self.collection.id)
        eq_(l2.collection_id, self.collection.id)
        eq_(set([l1.identifier, l2.identifier]),
            set([p1.identifier.identifier, p2.identifier.identifier]))
Example #49
0
    def test_process_item_creates_license_pool(self):
        self.resolver.required_coverage_providers = [self.always_successful]

        self.resolver.process_item(self.identifier)
        [lp] = self.identifier.licensed_through
        eq_(True, isinstance(lp, LicensePool))
        eq_(lp.collection, self.resolver.collection)
        eq_(lp.data_source, self.resolver.data_source)

        # Prepare an identifier that already has a LicensePool through
        # another source.
        licensed = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)
        other_source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        lp = LicensePool.for_foreign_id(self._db,
                                        other_source,
                                        licensed.type,
                                        licensed.identifier,
                                        collection=self._default_collection)[0]

        self.resolver.process_item(licensed)
        eq_([lp], licensed.licensed_through)
Example #50
0
    def test_ensure_isbn_identifier(self):
        self.script.oclc_classify = DummyCoverageProvider()
        eq_(0, self.script.oclc_classify.hit_count)

        # When there are no equivalent identifiers, both identifiers go to the
        # OCLCClassify coverage provider.
        identifiers = [
            self.edition1.primary_identifier, self.edition2.primary_identifier
        ]
        self.script.ensure_isbn_identifier(identifiers)
        eq_(2, self.script.oclc_classify.hit_count)

        # If an edition already has an ISBN identifier it doesn't go to the
        # coverage provider.
        self.script.oclc_classify.hit_count = 0
        self.edition1.primary_identifier.equivalent_to(
            DataSource.lookup(self._db, DataSource.GUTENBERG),
            self._identifier(identifier_type=Identifier.ISBN), 1)
        self._db.commit()
        self.script.ensure_isbn_identifier(identifiers)
        eq_(1, self.script.oclc_classify.hit_count)
Example #51
0
    def do_run(self):
        self.api = NYTBestSellerAPI.from_config(self._db)
        self.data_source = DataSource.lookup(self._db, DataSource.NYT)
        # For every best-seller list...
        names = self.api.list_of_lists()
        for l in sorted(names['results'], key=lambda x: x['list_name_encoded']):

            name = l['list_name_encoded']
            self.log.info("Handling list %s" % name)
            best = self.api.best_seller_list(l)

            if self.include_history:
                self.api.fill_in_history(best)
            else:
                self.api.update(best)

            # Mirror the list to the database.
            customlist = best.to_customlist(self._db)
            self.log.info(
                "Now %s entries in the list.", len(customlist.entries))
            self._db.commit()
    def test_process_batch(self):
        provider = self._provider()

        # Here are an Edition and a LicensePool for the same identifier but
        # from different data sources. We would expect this to happen
        # when talking to the open-access content server.
        edition = self._edition(data_source_name=DataSource.OA_CONTENT_SERVER)
        identifier = edition.primary_identifier

        license_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        pool, is_new = LicensePool.for_foreign_id(self._db, license_source,
                                                  identifier.type,
                                                  identifier.identifier)
        eq_(None, pool.work)

        # Here's a second identifier that's doomed to failure.
        identifier = self._identifier()
        messages_by_id = {
            identifier.urn: CoverageFailure(identifier, "201: try again later")
        }

        provider.queue_import_results([edition], [pool], [], messages_by_id)

        fake_batch = [object()]
        success, failure = provider.process_batch(fake_batch)

        # The batch was provided to lookup_and_import_batch.
        eq_([fake_batch], provider.batches)

        # The Edition and LicensePool have been knitted together into
        # a Work.
        eq_(edition, pool.presentation_edition)
        assert pool.work != None

        # The license pool was finalized.
        eq_([pool], provider.finalized)

        # The failure stayed a CoverageFailure object.
        eq_(identifier, failure.obj)
        eq_(True, failure.transient)
    def test_adults_only_indication_from_distributor_has_no_implication_for_audience(
        self,
    ):
        # Create some classifications that end up in
        # direct_from_license_source, one of which implies the book is
        # for adults only.
        i = self.identifier
        source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        for subject in ("Erotic Literature", "Science Fiction", "History"):
            c = i.classify(source, Subject.OVERDRIVE, subject, weight=1)
            self.classifier.add(c)

        self.classifier.prepare_to_classify()

        # Again, Overdrive classifications are regarded as 50 times
        # more reliable than their actual weight, as per
        # Classification.scaled_weight
        assert 50 == self.classifier.audience_weights[Classifier.AUDIENCE_ADULTS_ONLY]

        # No boost was given to AUDIENCE_ADULT, because a distributor
        # classification implied AUDIENCE_ADULTS_ONLY.
        assert 0 == self.classifier.audience_weights[Classifier.AUDIENCE_ADULT]
    def test_process_batch_with_identifier_mapping(self):
        """Test that internal identifiers are mapped to and from the form used
        by the external service.
        """

        # Unlike other tests in this class, we are using a real
        # implementation of OPDSImportCoverageProvider.process_batch.
        class TestProvider(OPDSImportCoverageProvider):

            # Mock the identifier mapping
            def create_identifier_mapping(self, batch):
                return self.mapping

        # This means we need to mock the lookup client instead.
        lookup = MockSimplifiedOPDSLookup(self._url)

        source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER)
        provider = TestProvider("test provider", [], source, lookup=lookup)

        # Create a hard-coded mapping. We use id1 internally, but the
        # foreign data source knows the book as id2.
        id1 = self._identifier()
        id2 = self._identifier()
        provider.mapping = {id2: id1}

        feed = "<feed><entry><id>%s</id><title>Here's your title!</title></entry></feed>" % id2.urn
        headers = {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}
        lookup.queue_response(200, headers=headers, content=feed)
        [identifier] = provider.process_batch([id1])

        # We wanted to process id1. We sent id2 to the server, the
        # server responded with an <entry> for id2, and it was used to
        # modify the Edition associated with id1.
        eq_(id1, identifier)

        [edition] = id1.primarily_identifies
        eq_("Here's your title!", edition.title)
        eq_(id1, edition.primary_identifier)
    def test_username_password_lookup_success(self):
        urn, label = self.model.standard_lookup(self.credentials)

        # There is now an anonymized identifier associated with Bob's
        # patron account.
        internal = DataSource.lookup(self._db, DataSource.INTERNAL_PROCESSING)
        bob_anonymized_identifier = Credential.lookup(
            self._db, internal,
            AuthdataUtility.ADOBE_ACCOUNT_ID_PATRON_IDENTIFIER,
            self.bob_patron, None)

        # That anonymized identifier is associated with a
        # DelegatedPatronIdentifier whose delegated_identifier is a
        # UUID.
        [bob_delegated_patron_identifier
         ] = self._db.query(DelegatedPatronIdentifier).filter(
             DelegatedPatronIdentifier.patron_identifier ==
             bob_anonymized_identifier.credential).all()

        eq_("Delegated account ID %s" % urn, label)
        eq_(urn, bob_delegated_patron_identifier.delegated_identifier)
        assert urn.startswith("urn:uuid:0")
        assert urn.endswith('685b35c00f05')
    def test_juvenile_classification_is_split_between_children_and_ya(self):

        # LCC files both children's and YA works under 'PZ'.
        # Here's how we deal with that.
        #
        i = self.identifier
        source = DataSource.lookup(self._db, DataSource.OCLC)
        c = i.classify(source, Subject.LCC, "PZ", weight=100)
        self.classifier.add(c)

        # (This classification has no bearing on audience and its
        # weight will be ignored.)
        c2 = i.classify(source, Subject.TAG, "Pets", weight=1000)
        self.classifier.add(c2)
        self.classifier.prepare_to_classify
        genres, fiction, audience, target_age = self.classifier.classify()

        # Young Adult wins because we err on the side of showing books
        # to kids who are too old, rather than too young.
        assert Classifier.AUDIENCE_YOUNG_ADULT == audience

        # But behind the scenes, more is going on. The weight of the
        # classifier has been split 60/40 between YA and children.
        weights = self.classifier.audience_weights
        assert 60 == weights[Classifier.AUDIENCE_YOUNG_ADULT]
        assert 40 == weights[Classifier.AUDIENCE_CHILDREN]
        # If this is in fact a children's book, this will make it
        # relatively easy for data from some other source to come in
        # and tip the balance.

        # The adult audiences have been reduced, to reduce the chance
        # that splitting up the weight between YA and Children will
        # cause the work to be mistakenly classified as Adult.
        assert -50 == weights[Classifier.AUDIENCE_ADULT]
        assert -50 == weights[Classifier.AUDIENCE_ADULTS_ONLY]
        # The juvenile classification doesn't make the all ages less likely.
        assert 0 == weights[Classifier.AUDIENCE_ALL_AGES]
Example #57
0
    def test_reaper(self):
        feed = self.get_data("biblioboard_mini_feed.opds")

        class MockOPDSForDistributorsReaperMonitor(
                OPDSForDistributorsReaperMonitor):
            """An OPDSForDistributorsReaperMonitor that overrides _get."""
            def _get(self, url, headers):
                return (200, {
                    'content-type': OPDSFeed.ACQUISITION_FEED_TYPE
                }, feed)

        data_source = DataSource.lookup(self._db,
                                        "Biblioboard",
                                        autocreate=True)
        collection = MockOPDSForDistributorsAPI.mock_collection(self._db)
        collection.external_integration.set_setting(
            Collection.DATA_SOURCE_NAME_SETTING, data_source.name)
        monitor = MockOPDSForDistributorsReaperMonitor(
            self._db,
            collection,
            OPDSForDistributorsImporter,
            metadata_client=object())

        # There's a license pool in the database that isn't in the feed anymore.
        edition, pool = self._edition(
            identifier_type=Identifier.URI,
            data_source_name=data_source.name,
            with_license_pool=True,
            collection=collection,
        )
        pool.licenses_owned = 1
        pool.licenses_available = 1

        monitor.run_once(None, None)

        eq_(0, pool.licenses_owned)
        eq_(0, pool.licenses_available)
        class JustAddMetadata(object):
            """A mock CoverageProvider that puts some data in place, but for
            whatever reason neglects to create a presentation-ready
            Work.
            """
            COVERAGE_COUNTS_FOR_EVERY_COLLECTION = True
            STATUS = CoverageRecord.SUCCESS
            SOURCE = DataSource.lookup(self._db, DataSource.GUTENBERG)
            TITLE = "A great book"
            def can_cover(self, *args, **kwargs):
                return True

            def register(s, identifier, *args, **kwargs):
                # They only told us to register, but we're going to
                # actually do the work.
                edition = self._edition(
                    identifier_type=identifier.type,
                    identifier_id=identifier.identifier,
                    title=s.TITLE
                )
                return self._coverage_record(
                    identifier, coverage_source=s.SOURCE,
                    status=s.STATUS
                ), True
    def test_items_that_need_coverage_respects_cutoff(self):
        """Verify that this coverage provider respects the cutoff_time
        argument.
        """

        source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        edition = self._edition()
        cr = self._coverage_record(edition, source, operation='sync')

        # We have a coverage record already, so this book doesn't show
        # up in items_that_need_coverage
        items = self.provider.items_that_need_coverage().all()
        eq_([], items)

        # But if we send a cutoff_time that's later than the time
        # associated with the coverage record...
        one_hour_from_now = (datetime.datetime.utcnow() +
                             datetime.timedelta(seconds=3600))
        provider_with_cutoff = self.create_provider(
            cutoff_time=one_hour_from_now)

        # The book starts showing up in items_that_need_coverage.
        eq_([edition.primary_identifier],
            provider_with_cutoff.items_that_need_coverage().all())
Example #60
0
    def test_authdata_token_credential_lookup_success(self):
        
        # Create an authdata token Credential for Bob.
        now = datetime.datetime.utcnow()
        token, ignore = Credential.persistent_token_create(
            self._db, self.data_source, self.model.AUTHDATA_TOKEN_TYPE,
            self.bob_patron
        )

        # The token is persistent.
        eq_(None, token.expires)

        # Use that token to perform a lookup of Bob's Adobe Vendor ID
        # UUID.
        urn, label = self.model.authdata_lookup(token.credential)

        # There is now an anonymized identifier associated with Bob's
        # patron account.
        internal = DataSource.lookup(self._db, DataSource.INTERNAL_PROCESSING)
        bob_anonymized_identifier = Credential.lookup(
            self._db, internal,
            AuthdataUtility.ADOBE_ACCOUNT_ID_PATRON_IDENTIFIER,
            self.bob_patron, None
        )

        # That anonymized identifier is associated with a
        # DelegatedPatronIdentifier whose delegated_identifier is a
        # UUID.
        [bob_delegated_patron_identifier] = self._db.query(
            DelegatedPatronIdentifier).filter(
                DelegatedPatronIdentifier.patron_identifier
                ==bob_anonymized_identifier.credential
            ).all()

        # That UUID is the one returned by authdata_lookup.
        eq_(urn, bob_delegated_patron_identifier.delegated_identifier)