def test_gather_providers_overdrive(self):
        # Set up an Overdrive integration.
        overdrive_collection = MockOverdriveAPI.mock_collection(self._db)
        provider_kwargs = {
            OverdriveBibliographicCoverageProvider : dict(
                api_class=MockOverdriveAPI
            )
        }
        provider = IdentifierResolutionCoverageProvider(
            overdrive_collection, provider_kwargs=provider_kwargs
        )

        # The OCLC provider was already configured; now there is also an OverdriveBibliographicCoverageProvider.
        eq_(len(provider.providers), 2)
        [overdrive] = [x for x in provider.providers if isinstance(x, OverdriveBibliographicCoverageProvider)]
        assert overdrive
        # The MockOverdriveAPI we passed in as part of provider_kwargs
        # was instantiated as part of the
        # OverdriveBibliographicCoverageProvider instantiation.
        assert isinstance(overdrive.api, MockOverdriveAPI)

        # Since the Overdrive coverage provider checks with VIAF after
        # adding author information to the database, it's been given a
        # reference to our VIAF client.
        eq_(provider.viaf, overdrive.viaf)

        # All subproviders are associated with the collection used in the
        # main provider, and they all have the same replacement policy.
        # (And thus the same MirrorUploader.)
        for subprovider in provider.providers:
            eq_(provider.collection, subprovider.collection)
            eq_(provider.replacement_policy, subprovider.replacement_policy)
    def test_providers_overdrive(self):
        # For an Overdrive collection...
        collection = MockOverdriveAPI.mock_collection(self._db)

        # In lieu of a proper mock API, create one that will crash
        # if it tries to make a real HTTP request.
        mock_content_cafe = ContentCafeAPI(self._db, None, object(), object(),
                                           self.uploader)
        resolver = IdentifierResolutionCoverageProvider(
            collection,
            overdrive_api_class=MockOverdriveAPI,
            content_cafe_api=mock_content_cafe,
            uploader=self.uploader)

        # We get three required coverage providers: Content Cafe, OCLC
        # Classify, and Overdrive.
        optional, [content_cafe, oclc_classify,
                   overdrive] = resolver.providers()
        eq_([], optional)
        assert isinstance(content_cafe, ContentCafeCoverageProvider)
        assert isinstance(oclc_classify, OCLCClassifyCoverageProvider)
        assert isinstance(overdrive, OverdriveBibliographicCoverageProvider)
    def test_gather_providers_content_cafe(self):
        # Set up a Content Cafe integration
        content_cafe = self._external_integration(
            goal=ExternalIntegration.METADATA_GOAL,
            protocol=ExternalIntegration.CONTENT_CAFE,
            username="******",
            password="******"
        )

        # The OCLC provider was already configured; now there is also a ContentCafeCoverageProvider.
        provider = IdentifierResolutionCoverageProvider(self._default_collection)
        eq_(len(provider.providers), 2)
        [content_cafe] = [x for x in provider.providers if isinstance(x, ContentCafeCoverageProvider)]
        assert content_cafe
    def test_providers_opds(self):
        # For an OPDS collection that goes against the open-access content
        # server...
        self._default_collection.external_integration.set_setting(
            Collection.DATA_SOURCE_NAME_SETTING, DataSource.OA_CONTENT_SERVER)
        uploader = object()
        # In lieu of a proper mock API, create one that will crash
        # if it tries to make a real HTTP request.
        mock_content_cafe = ContentCafeAPI(self._db, None, object(), object(),
                                           self.uploader)
        resolver = IdentifierResolutionCoverageProvider(
            self._default_collection,
            content_cafe_api=mock_content_cafe,
            uploader=uploader)

        # We get three required coverage providers: Content Cafe, OCLC
        # Classify, and OPDS Lookup Protocol.
        optional, [content_cafe, oclc_classify, opds] = resolver.providers()
        eq_([], optional)
        assert isinstance(content_cafe, ContentCafeCoverageProvider)
        assert isinstance(oclc_classify, OCLCClassifyCoverageProvider)
        assert isinstance(opds, LookupClientCoverageProvider)
        eq_(mock_content_cafe, content_cafe.content_cafe)
        eq_(self._default_collection, opds.collection)
    def test_gather_providers_opds_for_distributors(self):
        collection = self._default_collection
        collection.protocol = ExternalIntegration.OPDS_FOR_DISTRIBUTORS
        provider = IdentifierResolutionCoverageProvider(collection)

        # The OCLC provider was already configured; now there is also an IntegrationClientCoverImageCoverageProvider.
        eq_(len(provider.providers), 2)
        [integration_client] = [x for x in provider.providers if isinstance(x, IntegrationClientCoverImageCoverageProvider)]
        assert integration_client

        # All subproviders are associated with the collection used in the
        # main provider, and they all have the same replacement policy.
        # (And thus the same MirrorUploader.)
        for subprovider in provider.providers:
            eq_(provider.collection, subprovider.collection)
            eq_(provider.replacement_policy, subprovider.replacement_policy)
    def test_all(self):
        class Mock(IdentifierResolutionCoverageProvider):
            def gather_providers(self, provider_kwargs):
                return []

        # We have 3 collections created here, plus the 'unaffiliated'
        # collection.
        unaffiliated, ignore = IdentifierResolutionCoverageProvider.unaffiliated_collection(self._db)
        for i in range(3):
            collection = self._collection()

        # all() puts them in random order (not tested), but
        # the unaffiliated collection is always last.
        providers = Mock.all(self._db, mirror=object())
        providers = list(providers)
        eq_(4, len(providers))
        eq_(unaffiliated, providers[-1].collection)
Beispiel #7
0
    def setup(self):
        super(TestIdentifierResolutionCoverageProvider, self).setup()
        self.identifier = self._identifier(Identifier.OVERDRIVE_ID)
        self.source = DataSource.license_source_for(self._db, self.identifier)
        uploader = DummyS3Uploader()
        self.coverage_provider = IdentifierResolutionCoverageProvider(
            self._db, uploader=uploader, providers=([], [])
        )

        self.always_successful = AlwaysSuccessfulCoverageProvider(
            "Always", [self.identifier.type], self.source
        )
        self.never_successful = NeverSuccessfulCoverageProvider(
            "Never", [self.identifier.type], self.source
        )
        self.broken = BrokenCoverageProvider(
            "Broken", [self.identifier.type], self.source
        )
 def default_collection(self):
     if getattr(self, '_default_collection_id', None) is None:
         default_collection, ignore = IdentifierResolutionCoverageProvider.unaffiliated_collection(self._db)
         self._default_collection_id = default_collection.id
     return get_one(self._db, Collection, id=self._default_collection_id)
Beispiel #9
0
    def process_urns(self, urns, collection_details=None, **kwargs):
        """Processes URNs submitted via lookup request

        An authenticated request can process up to 30 URNs at once,
        but must specify a collection under which to catalog the
        URNs. This is used when initially recording the fact that
        certain URNs are in a collection, to get a baseline set of
        metadata. Updates on the books should be obtained through the
        CatalogController.

        An unauthenticated request is used for testing. Such a request
        does not have to specify a collection (the "Unaffiliated"
        collection is used), but can only process one URN at a time.

        :return: None or ProblemDetail

        """
        client = authenticated_client_from_request(self._db, required=False)
        if isinstance(client, ProblemDetail):
            return client

        resolve_now = request.args.get('resolve_now', None) is not None

        collection = collection_from_details(self._db, client,
                                             collection_details)

        if client:
            # Authenticated access.
            if not collection:
                return INVALID_INPUT.detailed(_("No collection provided."))
            limit = 30
        else:
            # Anonymous access.
            collection = self.default_collection
            limit = 1

        if resolve_now:
            # You can't force-resolve more than one Identifier at a time.
            limit = 1

        if len(urns) > limit:
            return INVALID_INPUT.detailed(
                _("The maximum number of URNs you can provide at once is %d. (You sent %d)"
                  ) % (limit, len(urns)))
        identifiers_by_urn, failures = Identifier.parse_urns(
            self._db, urns, allowed_types=self.VALID_TYPES)
        self.add_urn_failure_messages(failures)

        # Catalog all identifiers.
        collection.catalog_identifiers(identifiers_by_urn.values())

        # Load all coverage records in a single query to speed up the
        # code that reports on the status of Identifiers that aren't
        # ready.
        self.bulk_load_coverage_records(identifiers_by_urn.values())

        resolver = IdentifierResolutionCoverageProvider(
            collection,
            provide_coverage_immediately=resolve_now,
            **self.coverage_provider_kwargs)
        for urn, identifier in identifiers_by_urn.items():
            self.process_identifier(identifier, urn, resolver=resolver)
Beispiel #10
0
 def default_collection(self):
     if getattr(self, '_default_collection_id', None) is None:
         default_collection, ignore = IdentifierResolutionCoverageProvider.unaffiliated_collection(
             self._db)
         self._default_collection_id = default_collection.id
     return get_one(self._db, Collection, id=self._default_collection_id)
Beispiel #11
0
class TestIdentifierResolutionCoverageProvider(DatabaseTest):

    def setup(self):
        super(TestIdentifierResolutionCoverageProvider, self).setup()
        self.identifier = self._identifier(Identifier.OVERDRIVE_ID)
        self.source = DataSource.license_source_for(self._db, self.identifier)
        uploader = DummyS3Uploader()
        self.coverage_provider = IdentifierResolutionCoverageProvider(
            self._db, uploader=uploader, providers=([], [])
        )

        self.always_successful = AlwaysSuccessfulCoverageProvider(
            "Always", [self.identifier.type], self.source
        )
        self.never_successful = NeverSuccessfulCoverageProvider(
            "Never", [self.identifier.type], self.source
        )
        self.broken = BrokenCoverageProvider(
            "Broken", [self.identifier.type], self.source
        )

    def test_items_that_need_coverage(self):
        # Only items with an existing transient failure status require coverage.
        self._coverage_record(
            self.identifier, self.coverage_provider.output_source,
            operation=CoverageRecord.RESOLVE_IDENTIFIER_OPERATION,
            status=CoverageRecord.TRANSIENT_FAILURE
        )
        # Identifiers without coverage will be ignored.
        no_coverage = self._identifier(identifier_type=Identifier.ISBN)

        items = self.coverage_provider.items_that_need_coverage().all()
        eq_([self.identifier], items)

    def test_process_item_creates_license_pool(self):
        self.coverage_provider.required_coverage_providers = [
            self.always_successful
        ]

        self.coverage_provider.process_item(self.identifier)
        lp = self.identifier.licensed_through
        eq_(True, isinstance(lp, LicensePool))
        eq_(lp.data_source, self.coverage_provider.output_source)

    def test_process_item_succeeds_if_all_required_coverage_providers_succeed(self):
        self.coverage_provider.required_coverage_providers = [
            self.always_successful, self.always_successful
        ]

        # The coverage provider succeeded and returned an identifier.
        result = self.coverage_provider.process_item(self.identifier)
        eq_(result, self.identifier)

    def test_process_item_fails_if_any_required_coverage_providers_fail(self):
        self.coverage_provider.required_coverage_providers = [
            self.always_successful, self.never_successful
        ]

        result = self.coverage_provider.process_item(self.identifier)

        eq_(True, isinstance(result, CoverageFailure))
        eq_("500: What did you expect?", result.exception)
        eq_(False, result.transient)

        # The failure type of the IdentifierResolutionCoverageProvider
        # coverage record matches the failure type of the required provider's
        # coverage record.
        self.never_successful.transient = True
        result = self.coverage_provider.process_item(self.identifier)
        eq_(True, isinstance(result, CoverageFailure))
        eq_(True, result.transient)

    def test_process_item_fails_when_required_provider_raises_exception(self):
        self.coverage_provider.required_coverage_providers = [self.broken]
        result = self.coverage_provider.process_item(self.identifier)

        eq_(True, isinstance(result, CoverageFailure))
        eq_(True, result.transient)

    def test_process_item_fails_when_finalize_raises_exception(self):
        class FinalizeAlwaysFails(IdentifierResolutionCoverageProvider):
            def finalize(self, unresolved_identifier):
                raise Exception("Oh no!")

        provider = FinalizeAlwaysFails(
            self._db, uploader=DummyS3Uploader(), providers=([], [])
        )
        result = provider.process_item(self.identifier)

        eq_(True, isinstance(result, CoverageFailure))
        assert "Oh no!" in result.exception
        eq_(True, result.transient)

    def test_process_item_succeeds_when_optional_provider_fails(self):
        self.coverage_provider.required_coverage_providers = [
            self.always_successful, self.always_successful
        ]

        self.coverage_provider.optional_coverage_providers = [
            self.always_successful, self.never_successful
        ]

        result = self.coverage_provider.process_item(self.identifier)

        # A successful result is achieved, even though the optional
        # coverage provider failed.
        eq_(result, self.identifier)

        # An appropriate coverage record was created to mark the failure.
        presentation_edition = DataSource.lookup(
            self._db, DataSource.PRESENTATION_EDITION
        )
        r = self._db.query(CoverageRecord).filter(
            CoverageRecord.identifier==self.identifier,
            CoverageRecord.data_source!=presentation_edition).one()
        eq_("What did you expect?", r.exception)
    def test_process_one_provider(self):
        """Test what happens when IdentifierResolutionCoverageProvider
        tells a subprovider to do something.
        """
        collection = self._default_collection

        provider = IdentifierResolutionCoverageProvider(
            collection, force=object()
        )

        # If the subprovider can't cover the Identifier, nothing
        # happens.
        class CantCoverAnything(object):
            def can_cover(self, identifier):
                return False
            def ensure_coverage(self, identifier, force):
                raise Exception("I'll never be called")
        provider.process_one_provider(object(), CantCoverAnything())

        # Try again with a subprovider that doesn't need coverage
        # for every collection.
        class OnlyOnce(CollectionCoverageProvider):
            SERVICE_NAME = "Do it once, it's done for every collection"
            COVERAGE_COUNTS_FOR_EVERY_COLLECTION = True
            DATA_SOURCE_NAME = DataSource.OVERDRIVE

            def register(self, identifier, collection, force):
                self.register_called_with = [identifier, collection, force]
                return None, None

            def ensure_coverage(self, identifier, force):
                self.ensure_coverage_called_with = [identifier, force]

        i1 = self._identifier()
        subprovider = OnlyOnce(collection)
        provider.process_one_provider(i1, subprovider)

        # The subprovider's register method was called, with no
        # collection being provided.
        eq_([i1, None, provider.force], subprovider.register_called_with)

        # If the main provider requires that coverage happen immediately,
        # then ensure_coverage_called_with is called instead.
        provider.provide_coverage_immediately = True
        provider.process_one_provider(i1, subprovider)
        eq_([i1, provider.force], subprovider.ensure_coverage_called_with)

        # Try again with a subprovider that _does_ need separate coverage
        # for every collection.
        class EveryTime(CollectionCoverageProvider):
            SERVICE_NAME = "Every collection must be covered separately"
            COVERAGE_COUNTS_FOR_EVERY_COLLECTION = False
            DATA_SOURCE_NAME = DataSource.OVERDRIVE

            def register(self, identifier, collection, force):
                self.register_called_with = [identifier, collection, force]
                return None, None

            def ensure_coverage(self, identifier, force):
                self.ensure_coverage_called_with = [identifier, force]

        subprovider = EveryTime(collection)
        provider.provide_coverage_immediately = False
        provider.process_one_provider(i1, subprovider)

        # The subprovider's register method was called, with the
        # collection we're covering being provided.
        eq_([i1, provider.collection, provider.force],
            subprovider.register_called_with)

        # If the main provider requires that coverage happen immediately,
        # then ensure_coverage_called_with is called instead.
        provider.provide_coverage_immediately = True
        provider.process_one_provider(i1, subprovider)
        eq_([i1, provider.force], subprovider.ensure_coverage_called_with)
 def test_gather_providers_no_credentials(self):
     # The OCLC provider should be there from the beginning, but the other CoverageProviders
     # require credentials, so IdentifierResolutionCoverageProvider can't configure them.
     providers = IdentifierResolutionCoverageProvider(self._default_collection).providers
     [oclc] = providers
     assert isinstance(oclc, IdentifierLookupCoverageProvider)