Esempio n. 1
0
    def test_apply_removes_old_formats_based_on_replacement_policy(self):
        edition, pool = self._edition(with_license_pool=True)

        # Start with one delivery mechanism for this pool.
        for lpdm in pool.delivery_mechanisms:
            self._db.delete(lpdm)

        old_lpdm = pool.set_delivery_mechanism(
            Representation.PDF_MEDIA_TYPE,
            DeliveryMechanism.ADOBE_DRM,
            RightsStatus.IN_COPYRIGHT,
            None,
        )

        # And it has been loaned.
        patron = self._patron()
        loan, ignore = pool.loan_to(patron, fulfillment=old_lpdm)
        assert old_lpdm == loan.fulfillment

        # We have new circulation data that has a different format.
        format = FormatData(
            content_type=Representation.EPUB_MEDIA_TYPE,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
        )
        circulation_data = CirculationData(
            formats=[format],
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )

        # If we apply the new CirculationData with formats false in the policy,
        # we'll add the new format, but keep the old one as well.
        replacement_policy = ReplacementPolicy(formats=False)
        circulation_data.apply(self._db, pool.collection, replacement_policy)

        assert 2 == len(pool.delivery_mechanisms)
        assert set(
            [Representation.PDF_MEDIA_TYPE,
             Representation.EPUB_MEDIA_TYPE]) == set([
                 lpdm.delivery_mechanism.content_type
                 for lpdm in pool.delivery_mechanisms
             ])
        assert old_lpdm == loan.fulfillment

        # But if we make formats true in the policy, we'll delete the old format
        # and remove it from its loan.
        replacement_policy = ReplacementPolicy(formats=True)
        circulation_data.apply(self._db, pool.collection, replacement_policy)

        assert 1 == len(pool.delivery_mechanisms)
        assert (Representation.EPUB_MEDIA_TYPE ==
                pool.delivery_mechanisms[0].delivery_mechanism.content_type)
        assert None == loan.fulfillment
Esempio n. 2
0
    def test_rights_status_open_access_link_with_rights(self):
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
            rights_uri=RightsStatus.CC_BY_ND,
        )

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
        )
        replace = ReplacementPolicy(formats=True, )

        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        circulation_data.apply(self._db, pool.collection, replace)
        assert True == pool.open_access
        assert 1 == len(pool.delivery_mechanisms)
        assert RightsStatus.CC_BY_ND == pool.delivery_mechanisms[
            0].rights_status.uri
Esempio n. 3
0
    def test_rights_status_commercial_link_with_rights(self):
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
            rights_uri=RightsStatus.IN_COPYRIGHT,
        )
        format = FormatData(
            content_type=link.media_type,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
            link=link,
            rights_uri=RightsStatus.IN_COPYRIGHT,
        )

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
            formats=[format],
        )

        replace = ReplacementPolicy(formats=True, )

        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        circulation_data.apply(self._db, pool.collection, replace)
        assert False == pool.open_access
        assert 1 == len(pool.delivery_mechanisms)
        assert (RightsStatus.IN_COPYRIGHT ==
                pool.delivery_mechanisms[0].rights_status.uri)
Esempio n. 4
0
    def process_book(self, bibliographic):

        """Make the local database reflect the state of the remote Enki
        collection for the given book.

        :param bibliographic: A Metadata object with attached CirculationData

        :return: A 2-tuple (LicensePool, Edition). If possible, a
            presentation-ready Work will be created for the LicensePool.
        """
        availability = bibliographic.circulation
        edition, new_edition = bibliographic.edition(self._db)
        now = utc_now()
        policy = ReplacementPolicy(
            identifiers=False,
            subjects=True,
            contributions=True,
            formats=True,
        )
        bibliographic.apply(edition, self.collection, replace=policy)
        license_pool, ignore = availability.license_pool(self._db, self.collection)

        if new_edition:
            for library in self.collection.libraries:
                self.analytics.collect_event(
                    library, license_pool, CirculationEvent.DISTRIBUTOR_TITLE_ADD, now
                )

        return edition, license_pool
Esempio n. 5
0
    def test_rights_status_default_rights_from_data_source(self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
        )

        circulation_data = CirculationData(
            data_source=DataSource.OA_CONTENT_SERVER,
            primary_identifier=identifier,
            links=[link],
        )

        replace = ReplacementPolicy(formats=True, )

        # This pool starts off as not being open-access.
        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        assert False == pool.open_access

        circulation_data.apply(self._db, pool.collection, replace)

        # The pool became open-access because it was given a
        # link that came from the OS content server.
        assert True == pool.open_access
        assert 1 == len(pool.delivery_mechanisms)
        # The rights status is the default for the OA content server.
        assert (RightsStatus.GENERIC_OPEN_ACCESS ==
                pool.delivery_mechanisms[0].rights_status.uri)
Esempio n. 6
0
    def process_book(self, bibliographic, availability):

        analytics = Analytics(self._db)
        license_pool, new_license_pool = availability.license_pool(
            self._db, self.collection, analytics)
        edition, new_edition = bibliographic.edition(self._db)
        license_pool.edition = edition
        policy = ReplacementPolicy(
            identifiers=False,
            subjects=True,
            contributions=True,
            formats=True,
            analytics=analytics,
        )
        availability.apply(self._db, self.collection, replace=policy)
        if new_edition:
            bibliographic.apply(edition, self.collection, replace=policy)

        if new_license_pool or new_edition:
            # At this point we have done work equivalent to that done by
            # the Axis360BibliographicCoverageProvider. Register that the
            # work has been done so we don't have to do it again.
            identifier = edition.primary_identifier
            self.bibliographic_coverage_provider.handle_success(identifier)
            self.bibliographic_coverage_provider.add_coverage_record_for(
                identifier)

        return edition, license_pool
Esempio n. 7
0
    def test_explicit_formatdata(self):
        # Creating an edition with an open-access download will
        # automatically create a delivery mechanism.
        edition, pool = self._edition(with_open_access_download=True)

        # Let's also add a DRM format.
        drm_format = FormatData(
            content_type=Representation.PDF_MEDIA_TYPE,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
        )

        circulation_data = CirculationData(
            formats=[drm_format],
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )
        circulation_data.apply(self._db, pool.collection)

        [epub, pdf] = sorted(pool.delivery_mechanisms,
                             key=lambda x: x.delivery_mechanism.content_type)
        assert epub.resource == pool.best_open_access_resource

        assert Representation.PDF_MEDIA_TYPE == pdf.delivery_mechanism.content_type
        assert DeliveryMechanism.ADOBE_DRM == pdf.delivery_mechanism.drm_scheme

        # If we tell Metadata to replace the list of formats, we only
        # have the one format we manually created.
        replace = ReplacementPolicy(formats=True, )
        circulation_data.apply(self._db, pool.collection, replace=replace)
        [pdf] = pool.delivery_mechanisms
        assert Representation.PDF_MEDIA_TYPE == pdf.delivery_mechanism.content_type
Esempio n. 8
0
    def __init__(self, collection, api_class=OdiloAPI, **kwargs):
        """Constructor.

        :param collection: Provide bibliographic coverage to all
            Odilo books in the given Collection.
        :param api_class: Instantiate this class with the given Collection,
            rather than instantiating OdiloAPI.
        """
        super(OdiloBibliographicCoverageProvider,
              self).__init__(collection, **kwargs)
        if isinstance(api_class, OdiloAPI):
            # Use a previously instantiated OdiloAPI instance
            # rather than creating a new one.
            self.api = api_class
        else:
            # A web application should not use this option because it
            # will put a non-scoped session in the mix.
            _db = Session.object_session(collection)
            self.api = api_class(_db, collection)

        self.replacement_policy = ReplacementPolicy(
            identifiers=True,
            subjects=True,
            contributions=True,
            links=True,
            formats=True,
            rights=True,
            link_content=True,
            # even_if_not_apparently_updated=False,
            analytics=Analytics(self._db))
Esempio n. 9
0
    def test_apply_creates_work_and_presentation_edition_if_needed(self):
        edition = self._edition()
        # This pool doesn't have a presentation edition or a work yet.
        pool = self._licensepool(edition)

        # We have new circulation data for this pool.
        circulation_data = CirculationData(
            formats=[],
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )

        # If we apply the new CirculationData the work gets both a
        # presentation and a work.
        replacement_policy = ReplacementPolicy()
        circulation_data.apply(self._db, pool.collection, replacement_policy)

        assert edition == pool.presentation_edition
        assert pool.work != None

        # If we have another new pool for the same book in another
        # collection, it will share the work.
        collection = self._collection()
        pool2 = self._licensepool(edition, collection=collection)
        circulation_data.apply(self._db, pool2.collection, replacement_policy)
        assert edition == pool2.presentation_edition
        assert pool.work == pool2.work
Esempio n. 10
0
    def process_book(self, bibliographic, availability):
        license_pool, new_license_pool = availability.license_pool(
            self._db, self.collection)
        now = datetime.datetime.utcnow()
        edition, new_edition = bibliographic.edition(self._db)
        license_pool.edition = edition
        policy = ReplacementPolicy(
            identifiers=False,
            subjects=True,
            contributions=True,
            formats=True,
        )
        availability.apply(
            self._db,
            license_pool.collection,
            replace=policy,
        )
        if new_edition:
            bibliographic.apply(edition, self.collection, replace=policy)

        if new_license_pool or new_edition:
            # At this point we have done work equivalent to that done by
            # the EnkiBibliographicCoverageProvider. Register that the
            # work has been done so we don't have to do it again.
            identifier = edition.primary_identifier
            self.bibliographic_coverage_provider.handle_success(identifier)
            self.bibliographic_coverage_provider.add_coverage_record_for(
                identifier)
            for library in self.collection.libraries:
                self.analytics.collect_event(
                    library, license_pool,
                    CirculationEvent.DISTRIBUTOR_TITLE_ADD, now)

        return edition, license_pool
Esempio n. 11
0
    def test_rights_status_default_rights_passed_in(self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
        )

        circulation_data = CirculationData(
            data_source=DataSource.OA_CONTENT_SERVER,
            primary_identifier=identifier,
            default_rights_uri=RightsStatus.CC_BY,
            links=[link],
        )

        replace = ReplacementPolicy(formats=True, )

        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        circulation_data.apply(self._db, pool.collection, replace)
        assert True == pool.open_access
        assert 1 == len(pool.delivery_mechanisms)
        # The rights status is the one that was passed in to CirculationData.
        assert RightsStatus.CC_BY == pool.delivery_mechanisms[
            0].rights_status.uri
Esempio n. 12
0
    def _reap(self, identifier):
        """Update our local circulation information to reflect the fact that
        the identified book has been removed from the remote
        collection.
        """
        collection = self.collection
        pool = identifier.licensed_through_collection(collection)
        if not pool:
            self.log.warn(
                "Was about to reap %r but no local license pool in this collection.",
                identifier)
            return
        if pool.licenses_owned == 0:
            # Already reaped.
            return
        self.log.info("Reaping %r", identifier)

        availability = CirculationData(
            data_source=pool.data_source,
            primary_identifier=identifier,
            licenses_owned=0,
            licenses_available=0,
            licenses_reserved=0,
            patrons_in_hold_queue=0,
        )
        availability.apply(self._db, collection,
                           ReplacementPolicy.from_license_source(self._db))
    def test_replacement_policy_uses_provided_mirror(self):
        collection = MockOverdriveAPI.mock_collection(self._db)
        mirror = MockS3Uploader()
        replacement_policy = ReplacementPolicy.from_metadata_source(
            mirror=mirror
        )
        api = MockOverdriveAPI(self._db, collection)
        api.queue_collection_token()
        provider = OverdriveBibliographicCoverageProvider(
            collection, replacement_policy=replacement_policy,
            api_class=api
        )
        
        # Any resources discovered by Overdrive will be
        # sent through this mirror.
        eq_(mirror, provider.replacement_policy.mirror)

        http = DummyHTTPClient()
        provider.replacement_policy.http_get = http.do_get

        # Now let's try looking up a specific identifier through 'Overdrive'.
        identifier = self._identifier(
            Identifier.OVERDRIVE_ID, "3896665d-9d81-4cac-bd43-ffc5066de1f5"
        )


        body = self.data_file("overdrive/overdrive_metadata.json")
        provider.api.queue_response(200, {}, body)

        test_cover = self.data_file("covers/test-book-cover.png")
        test_small_cover = self.data_file("covers/tiny-image-cover.png")

        # Overdrive's full-sized image -- we will be creating our own
        # thumbnail from this.
        http.queue_response(200, "image/jpeg", {}, test_cover)

        # Overdrive's thumbnail image -- we will not be using this
        http.queue_response(200, "image/jpeg", {}, test_small_cover)

        record = provider.ensure_coverage(identifier)
        eq_("success", record.status)

        # The full image and the thumbnail have been uploaded to
        # the fake S3.
        full, thumbnail = mirror.uploaded
        eq_(test_cover, full.content)

        # The URLs for the Resource objects are our S3 URLs, not Overdrive's
        # URLs.
        expect = "Overdrive/Overdrive+ID/%s" % identifier.identifier
        for url in [full.mirror_url, thumbnail.mirror_url]:
            assert expect in url
        assert "/scaled/" in thumbnail.mirror_url
        assert "/scaled/" not in full.mirror_url

        # The thumbnail is a newly created image that is not the
        # same as the full image or the test cover.
        assert thumbnail.content != test_small_cover
        assert thumbnail.content != test_cover
Esempio n. 14
0
 def default_circulation_replacement_policy(self):
     return ReplacementPolicy(
         identifiers=False,
         subjects=True,
         contributions=True,
         formats=True,
         analytics=Analytics(self._db),
     )
Esempio n. 15
0
    def test_implicit_format_for_open_access_link(self):
        # A format is a delivery mechanism.  We handle delivery on open access
        # pools from our mirrored content in S3.
        # Tests that when a link is open access, a pool can be delivered.

        edition, pool = self._edition(with_license_pool=True)

        # This is the delivery mechanism created by default when you
        # create a book with _edition().
        [epub] = pool.delivery_mechanisms
        assert Representation.EPUB_MEDIA_TYPE == epub.delivery_mechanism.content_type
        assert DeliveryMechanism.ADOBE_DRM == epub.delivery_mechanism.drm_scheme

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.PDF_MEDIA_TYPE,
            href=self._url,
        )
        circulation_data = CirculationData(
            data_source=DataSource.GUTENBERG,
            primary_identifier=edition.primary_identifier,
            links=[link],
        )

        replace = ReplacementPolicy(formats=True, )
        circulation_data.apply(self._db, pool.collection, replace)

        # We destroyed the default delivery format and added a new,
        # open access delivery format.
        [pdf] = pool.delivery_mechanisms
        assert Representation.PDF_MEDIA_TYPE == pdf.delivery_mechanism.content_type
        assert DeliveryMechanism.NO_DRM == pdf.delivery_mechanism.drm_scheme

        circulation_data = CirculationData(
            data_source=DataSource.GUTENBERG,
            primary_identifier=edition.primary_identifier,
            links=[],
        )
        replace = ReplacementPolicy(
            formats=True,
            links=True,
        )
        circulation_data.apply(self._db, pool.collection, replace)

        # Now we have no formats at all.
        assert 0 == len(pool.delivery_mechanisms)
    def setup(self):
        super(TestIntegrationClientCoverImageCoverageProvider, self).setup()
        mirror = MockS3Uploader()
        replacement_policy = ReplacementPolicy.from_metadata_source(
            mirror=mirror)
        self.collection = self._collection(
            protocol=ExternalIntegration.OPDS_FOR_DISTRIBUTORS)

        self.provider = IntegrationClientCoverImageCoverageProvider(
            replacement_policy=replacement_policy, collection=self.collection)
    def test_replacement_policy_uses_provided_mirror(self):
        collection = MockOverdriveAPI.mock_collection(self._db)
        mirror = MockS3Uploader()
        replacement_policy = ReplacementPolicy.from_metadata_source(
            mirror=mirror)
        api = MockOverdriveAPI(self._db, collection)
        api.queue_collection_token()
        provider = OverdriveBibliographicCoverageProvider(
            collection, replacement_policy=replacement_policy, api_class=api)

        # Any resources discovered by Overdrive will be
        # sent through this mirror.
        eq_(mirror, provider.replacement_policy.mirror)

        http = DummyHTTPClient()
        provider.replacement_policy.http_get = http.do_get

        # Now let's try looking up a specific identifier through 'Overdrive'.
        identifier = self._identifier(Identifier.OVERDRIVE_ID,
                                      "3896665d-9d81-4cac-bd43-ffc5066de1f5")

        body = self.data_file("overdrive/overdrive_metadata.json")
        provider.api.queue_response(200, {}, body)

        test_cover = self.data_file("covers/test-book-cover.png")
        test_small_cover = self.data_file("covers/tiny-image-cover.png")

        # Overdrive's full-sized image -- we will be creating our own
        # thumbnail from this.
        http.queue_response(200, "image/jpeg", {}, test_cover)

        # Overdrive's thumbnail image -- we will not be using this
        http.queue_response(200, "image/jpeg", {}, test_small_cover)

        record = provider.ensure_coverage(identifier)
        eq_("success", record.status)

        # The full image and the thumbnail have been uploaded to
        # the fake S3.
        full, thumbnail = mirror.uploaded
        eq_(test_cover, full.content)

        # The URLs for the Resource objects are our S3 URLs, not Overdrive's
        # URLs.
        expect = "Overdrive/Overdrive+ID/%s" % identifier.identifier
        for url in [full.mirror_url, thumbnail.mirror_url]:
            assert expect in url
        assert "/scaled/" in thumbnail.mirror_url
        assert "/scaled/" not in full.mirror_url

        # The thumbnail is a newly created image that is not the
        # same as the full image or the test cover.
        assert thumbnail.content != test_small_cover
        assert thumbnail.content != test_cover
Esempio n. 18
0
    def update_licensepool_for_identifier(self, isbn, availability):
        """Update availability information for a single book.

        If the book has never been seen before, a new LicensePool
        will be created for the book.

        The book's LicensePool will be updated with current approximate 
        circulation information (we can tell if it's available, but 
        not how many copies). 
        Bibliographic coverage will be ensured for the OneClick Identifier. 
        Work will be created for the LicensePool and set as presentation-ready.

        :param isbn the identifier OneClick uses
        :param availability boolean denoting if book can be lent to patrons 
        """

        # find a license pool to match the isbn, and see if it'll need a metadata update later
        license_pool, is_new_pool = LicensePool.for_foreign_id(
            self._db, DataSource.ONECLICK, Identifier.ONECLICK_ID, isbn,
            collection=self.collection
        )
        if is_new_pool:
            # This is the first time we've seen this book. Make sure its
            # identifier has bibliographic coverage.
            self.bibliographic_coverage_provider.ensure_coverage(
                license_pool.identifier
            )

        # now tell the licensepool if it's lendable
        policy = ReplacementPolicy(
            identifiers=False,
            subjects=True,
            contributions=True,
            formats=True,
            analytics=Analytics(self._db),
        )

        # licenses_available can be 0 or 999, depending on whether the book is 
        # lendable or not.   
        licenses_available = 999
        if not availability:
            licenses_available = 0

        circulation_data = CirculationData(data_source=DataSource.ONECLICK, 
            primary_identifier=license_pool.identifier, 
            licenses_available=licenses_available)

        license_pool, circulation_changed = circulation_data.apply(
            self._db,
            self.collection,
            replace=policy,
        )

        return license_pool, is_new_pool, circulation_changed
Esempio n. 19
0
    def update_formats(self, licensepool):
        """Update the format information for a single book.
        """
        info = self.metadata_lookup(licensepool.identifier)

        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(
            info, include_bibliographic=False, include_formats=True)
        circulation_data = metadata.circulation

        replace = ReplacementPolicy(formats=True, )
        circulation_data.apply(licensepool, replace)
Esempio n. 20
0
    def test_mirror_open_access_link_mirror_failure(self):
        mirrors = dict(books_mirror=MockS3Uploader(fail=True),
                       covers_mirror=None)
        h = DummyHTTPClient()

        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        policy = ReplacementPolicy(mirrors=mirrors, http_get=h.do_get)

        circulation_data = CirculationData(
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            media_type=link.media_type,
            content=link.content,
        )

        h.queue_response(200, media_type=Representation.EPUB_MEDIA_TYPE)

        circulation_data.mirror_link(pool, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # The representation was fetched successfully.
        assert None == representation.fetch_exception
        assert representation.fetched_at != None

        # But mirroing failed.
        assert representation.mirror_exception != None
        assert None == representation.mirrored_at
        assert link.media_type == representation.media_type
        assert link.href == representation.url

        # The mirror url was never set.
        assert None == representation.mirror_url

        # Book content is still there since it wasn't mirrored.
        assert representation.content != None

        # The license pool is suppressed when mirroring fails.
        assert True == pool.suppressed
        assert representation.mirror_exception in pool.license_exception
    def setup(self):
        super(TestIntegrationClientCoverImageCoverageProvider, self).setup()
        mirror = MockS3Uploader()
        replacement_policy = ReplacementPolicy.from_metadata_source(
            mirror=mirror
        )
        self.collection = self._collection(
            protocol=ExternalIntegration.OPDS_FOR_DISTRIBUTORS
        )

        self.provider = IntegrationClientCoverImageCoverageProvider(
            replacement_policy=replacement_policy, collection=self.collection
        )
    def __init__(self, collection, *args, **kwargs):
        _db = Session.object_session(collection)

        replacement_policy = kwargs.pop('replacement_policy', None)
        if not replacement_policy:
            mirror = MirrorUploader.sitewide(_db)
            replacement_policy = ReplacementPolicy(mirror=mirror, links=True)

        # Only process identifiers that have been registered for coverage.
        kwargs['registered_only'] = kwargs.get('registered_only', True)
        super(IntegrationClientCoverImageCoverageProvider,
              self).__init__(collection,
                             *args,
                             replacement_policy=replacement_policy,
                             **kwargs)
Esempio n. 23
0
    def test_format_change_may_change_open_access_status(self):

        # In this test, whenever we call CirculationData.apply(), we
        # want to destroy the old list of formats and recreate it.
        replace_formats = ReplacementPolicy(formats=True)

        # Here's a seemingly ordinary non-open-access LicensePool.
        edition, pool = self._edition(with_license_pool=True)
        assert False == pool.open_access

        # One day, we learn that it has an open-access delivery mechanism.
        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
            rights_uri=RightsStatus.CC_BY_ND,
        )

        circulation_data = CirculationData(
            data_source=pool.data_source,
            primary_identifier=pool.identifier,
            links=[link],
        )

        # Applying this information turns the pool into an open-access pool.
        circulation_data.apply(self._db,
                               pool.collection,
                               replace=replace_formats)
        assert True == pool.open_access

        # Then we find out it was a mistake -- the book is in copyright.
        format = FormatData(
            Representation.EPUB_MEDIA_TYPE,
            DeliveryMechanism.NO_DRM,
            rights_uri=RightsStatus.IN_COPYRIGHT,
        )
        circulation_data = CirculationData(
            data_source=pool.data_source,
            primary_identifier=pool.identifier,
            formats=[format],
        )
        circulation_data.apply(self._db,
                               pool.collection,
                               replace=replace_formats)

        # The original LPDM has been removed and only the new one remains.
        assert False == pool.open_access
        assert 1 == len(pool.delivery_mechanisms)
Esempio n. 24
0
    def update_licensepools_for_identifiers(self, identifiers):
        """Update availability information for a list of books.

        If the book has never been seen before, a new LicensePool
        will be created for the book.

        The book's LicensePool will be updated with current
        circulation information.
        """
        identifier_strings = self.create_identifier_strings(identifiers)
        response = self.availability(title_ids=identifier_strings)
        collection = self.collection
        parser = BibliographicParser(collection)
        remainder = set(identifiers)
        for bibliographic, availability in parser.process_all(
                response.content):
            identifier, is_new = bibliographic.primary_identifier.load(
                self._db)
            if identifier in remainder:
                remainder.remove(identifier)
            pool, is_new = availability.license_pool(self._db, collection)
            availability.apply(self._db, pool.collection)

        # We asked Axis about n books. It sent us n-k responses. Those
        # k books are the identifiers in `remainder`. These books have
        # been removed from the collection without us being notified.
        for removed_identifier in remainder:
            pool = identifier.licensed_through_collection(self.collection)
            if not pool:
                self.log.warn(
                    "Was about to reap %r but no local license pool in this collection.",
                    removed_identifier)
                continue
            if pool.licenses_owned == 0:
                # Already reaped.
                continue
            self.log.info("Reaping %r", removed_identifier)

            availability = CirculationData(
                data_source=pool.data_source,
                primary_identifier=removed_identifier,
                licenses_owned=0,
                licenses_available=0,
                licenses_reserved=0,
                patrons_in_hold_queue=0,
            )
            availability.apply(pool,
                               ReplacementPolicy.from_license_source(self._db))
class MetadataWranglerBibliographicCoverageProvider(
        BibliographicCoverageProvider):
    def _default_replacement_policy(self, _db):
        """In general, data used by the metadata wrangler is a reliable source
        of metadata but not of licensing information. We always
        provide the MirrorUploader in case a data source has cover
        images available.
        """
        try:
            mirror = MirrorUploader.sitewide(_db)
        except CannotLoadConfiguration, e:
            # It's not a problem if there's no MirrorUploader
            # configured -- it just means we can't mirror cover images
            # when they show up.
            mirror = None
        return ReplacementPolicy.from_metadata_source(mirror=mirror)
Esempio n. 26
0
    def update_formats(self, licensepool):
        """Update the format information for a single book.
        """
        info = self.metadata_lookup(licensepool.identifier)

        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(
            info, include_bibliographic=False, include_formats=True)
        circulation_data = metadata.circulation

        # The identifier in the CirculationData needs to match the
        # identifier associated with the LicensePool -- otherwise
        # a new LicensePool will be created.
        circulation_data._primary_identifier.identifier = licensepool.identifier.identifier
        replace = ReplacementPolicy(formats=True)
        _db = Session.object_session(licensepool)
        circulation_data.apply(_db, licensepool.collection, replace)
Esempio n. 27
0
    def process_item(self, identifier):
        self.log.debug(
            "Seeing if %s needs reaping", identifier.identifier
        )
        metadata = self.api.get_item(identifier.identifier)
        if metadata:
            # This title is still in the collection. Do nothing.
            return

        # Get this collection's license pool for this identifier.
        # We'll reap it by setting its licenses_owned to 0.
        pool = identifier.licensed_through_collection(self.collection)

        if not pool or pool.licenses_owned == 0:
            # It's already been reaped.
            return

        if pool.presentation_edition:
            self.log.warn(
                "Removing %r from circulation",
                pool.presentation_edition
            )
        else:
            self.log.warn(
                "Removing unknown title %s from circulation.",
                identifier.identifier
            )

        now = datetime.datetime.utcnow()
        circulationdata = CirculationData(
            data_source=DataSource.ENKI,
            primary_identifier= IdentifierData(
                identifier.type, identifier.identifier
            ),
            licenses_owned = 0,
            licenses_available = 0,
            patrons_in_hold_queue = 0,
            last_checked = now
        )

        circulationdata.apply(
            self._db,
            self.collection,
            replace=ReplacementPolicy.from_license_source(self._db)
        )
        return circulationdata
Esempio n. 28
0
    def process_item(self, identifier):
        self.log.debug(
            "Seeing if %s needs reaping", identifier.identifier
        )
        metadata = self.api.get_item(identifier.identifier)
        if metadata:
            # This title is still in the collection. Do nothing.
            return

        # Get this collection's license pool for this identifier.
        # We'll reap it by setting its licenses_owned to 0.
        pool = identifier.licensed_through_collection(self.collection)

        if not pool or pool.licenses_owned == 0:
            # It's already been reaped.
            return

        if pool.presentation_edition:
            self.log.warn(
                "Removing %r from circulation",
                pool.presentation_edition
            )
        else:
            self.log.warn(
                "Removing unknown title %s from circulation.",
                identifier.identifier
            )

        now = datetime.datetime.utcnow()
        circulationdata = CirculationData(
            data_source=DataSource.ENKI,
            primary_identifier= IdentifierData(
                identifier.type, identifier.identifier
            ),
            licenses_owned = 0,
            licenses_available = 0,
            patrons_in_hold_queue = 0,
            last_checked = now
        )

        circulationdata.apply(
            self._db,
            self.collection,
            replace=ReplacementPolicy.from_license_source(self._db)
        )
        return circulationdata
Esempio n. 29
0
    def update_formats(self, licensepool):
        """Update the format information for a single book.

        Incidentally updates the metadata, just in case Overdrive has
        changed it.
        """
        info = self.metadata_lookup(licensepool.identifier)

        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(
            info, include_bibliographic=True, include_formats=True)
        if not metadata:
            # No work to be done.
            return

        edition, ignore = self._edition(licensepool)

        replace = ReplacementPolicy.from_license_source(self._db)
        metadata.apply(edition, self.collection, replace=replace)
Esempio n. 30
0
    def update_formats(self, licensepool):
        """Update the format information for a single book.

        Incidentally updates the metadata, just in case Overdrive has
        changed it.
        """
        info = self.metadata_lookup(licensepool.identifier)

        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(
            info, include_bibliographic=True, include_formats=True)
        if not metadata:
            # No work to be done.
            return

        edition, ignore = self._edition(licensepool)

        replace = ReplacementPolicy.from_license_source(self._db)
        metadata.apply(edition, self.collection, replace=replace)
Esempio n. 31
0
    def test_mirror_open_access_link_fetch_failure(self):
        mirrors = dict(books_mirror=MockS3Uploader())
        h = DummyHTTPClient()

        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        policy = ReplacementPolicy(mirrors=mirrors, http_get=h.do_get)
        circulation_data = CirculationData(
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            media_type=link.media_type,
            content=link.content,
        )

        h.queue_response(403)

        circulation_data.mirror_link(pool, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # Fetch failed, so we should have a fetch exception but no mirror url.
        assert representation.fetch_exception != None
        assert None == representation.mirror_exception
        assert None == representation.mirror_url
        assert link.href == representation.url
        assert representation.fetched_at != None
        assert None == representation.mirrored_at

        # The license pool is suppressed when fetch fails.
        assert True == pool.suppressed
        assert representation.fetch_exception in pool.license_exception
Esempio n. 32
0
    def reaper_request(self, identifier):
        self.log.debug ("Checking availability for " + str(identifier.identifier))
        now = datetime.datetime.utcnow()
        url = str(self.base_url) + str(self.item_endpoint)
        args = dict()
        args['method'] = "getItem"
        args['recordid'] = identifier.identifier
        args['size'] = "small"
        args['lib'] = self.library_id
        response = self.request(url, method='get', params=args)

        try:
            # If a book doesn't exist in Enki, we'll just get an HTML page saying we did something wrong.
            data = json.loads(response.content)
            self.log.debug ("Keeping existing book: " + str(identifier))
        except:
            # Get this collection's license pool for this identifier.
            pool = identifier.licensed_through_collection(self.collection)
            if pool and (pool.licenses_owned > 0):
                if pool.presentation_edition:
                    self.log.warn("Removing %s (%s) from circulation",
                                  pool.presentation_edition.title, pool.presentation_edition.author)
                else:
                    self.log.warn(
                        "Removing unknown work %s from circulation.",
                        identifier.identifier
                    )

            circulationdata = CirculationData(
                data_source=DataSource.ENKI,
                primary_identifier= IdentifierData(EnkiAPI.ENKI_ID, identifier.identifier),
                licenses_owned = 0,
                licenses_available = 0,
                patrons_in_hold_queue = 0,
                last_checked = now
            )

            circulationdata.apply(
                self._db,
                self.collection,
                replace=ReplacementPolicy.from_license_source(self._db)
            )

            return circulationdata
Esempio n. 33
0
    def update_consolidated_copy(self, _db, copy_info, analytics=None):
        """Process information about the current status of a consolidated
        copy from the consolidated copies feed.
        """
        identifier = copy_info.get("identifier")
        licenses = copy_info.get("licenses")
        available = copy_info.get("available")

        identifier_data = IdentifierData(Identifier.URI, identifier)
        circulation_data = CirculationData(
            data_source=self.data_source_name,
            primary_identifier=identifier_data,
            licenses_owned=licenses,
            licenses_available=available,
        )

        replacement_policy = ReplacementPolicy(analytics=analytics)
        pool, ignore = circulation_data.apply(_db, self.collection(_db),
                                              replacement_policy)

        # Update licenses reserved if there are holds.
        if len(pool.holds) > 0 and pool.licenses_available > 0:
            self.update_hold_queue(pool)
Esempio n. 34
0
    def test_work_from_metadata(self):
        """Validate the ability to create a new Work from appropriate metadata.
        """

        class Mock(MockDirectoryImportScript):
            """In this test we need to verify that annotate_metadata
            was called but did nothing.
            """
            def annotate_metadata(self, metadata, *args, **kwargs):
                metadata.annotated = True
                return super(Mock, self).annotate_metadata(
                    metadata, *args, **kwargs
                )

        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1003")
        identifier_obj, ignore = identifier.load(self._db)
        metadata = Metadata(
            DataSource.GUTENBERG,
            primary_identifier=identifier,
            title=u"A book"
        )
        metadata.annotated = False
        datasource = DataSource.lookup(self._db, DataSource.GUTENBERG)
        policy = ReplacementPolicy.from_license_source(self._db)
        mirror = MockS3Uploader()
        policy.mirror = mirror

        # Here, work_from_metadata calls annotate_metadata, but does
        # not actually import anything because there are no files 'on
        # disk' and thus no way to actually get the book.
        collection = self._default_collection
        args = (collection, metadata, policy, "cover directory",
                "ebook directory", RightsStatus.CC0)
        script = Mock(self._db)
        eq_(None, script.work_from_metadata(*args))
        eq_(True, metadata.annotated)

        # Now let's try it with some files 'on disk'.
        with open(self.sample_cover_path('test-book-cover.png')) as fh:
            image = fh.read()
        mock_filesystem = {
            'cover directory' : (
                'cover.jpg', Representation.JPEG_MEDIA_TYPE, image
            ),
            'ebook directory' : (
                'book.epub', Representation.EPUB_MEDIA_TYPE, "I'm an EPUB."
            )
        }
        script = MockDirectoryImportScript(
            self._db, mock_filesystem=mock_filesystem
        )
        work = script.work_from_metadata(*args)

        # We have created a book. It has a cover image, which has a
        # thumbnail.
        eq_("A book", work.title)
        assert work.cover_full_url.endswith(
            '/test.cover.bucket/Gutenberg/Gutenberg+ID/1003/1003.jpg'
        )
        assert work.cover_thumbnail_url.endswith(
            '/test.cover.bucket/scaled/300/Gutenberg/Gutenberg+ID/1003/1003.png'
        )
        [pool] = work.license_pools
        assert pool.open_access_download_url.endswith(
            '/test.content.bucket/Gutenberg/Gutenberg+ID/1003/A+book.epub'
        )

        eq_(RightsStatus.CC0,
            pool.delivery_mechanisms[0].rights_status.uri)

        # The mock S3Uploader has a record of 'uploading' all these files
        # to S3.
        epub, full, thumbnail = mirror.uploaded
        eq_(epub.url, pool.open_access_download_url)
        eq_(full.url, work.cover_full_url)
        eq_(thumbnail.url, work.cover_thumbnail_url)

        # The EPUB Representation was cleared out after the upload, to
        # save database space.
        eq_("I'm an EPUB.", mirror.content[0])
        eq_(None, epub.content)
Esempio n. 35
0
    def __init__(self, _db, batch_size=10, cutoff_time=None,
                 uploader=None, providers=None, **kwargs):
        output_source, made_new = get_one_or_create(
            _db, DataSource,
            name=DataSource.INTERNAL_PROCESSING
        )
        # Other components don't have INTERNAL_PROCESSING as offering
        # licenses, but we do, because we're responsible for managing
        # LicensePools.
        output_source.offers_licenses=True
        input_identifier_types = [Identifier.OVERDRIVE_ID, Identifier.ISBN]

        super(IdentifierResolutionCoverageProvider, self).__init__(
            service_name="Identifier Resolution Coverage Provider",
            input_identifier_types=input_identifier_types,
            output_source=output_source,
            batch_size=batch_size,
            operation=CoverageRecord.RESOLVE_IDENTIFIER_OPERATION,
        )

        # Since we are the metadata wrangler, any resources we find,
        # we mirror to S3.
        mirror = uploader or S3Uploader()

        # We're going to be aggressive about recalculating the presentation
        # for this work because either the work is currently not set up
        # at all, or something went wrong trying to set it up.
        presentation_calculation_policy = PresentationCalculationPolicy(
            regenerate_opds_entries=True,
            update_search_index=True
        )
        policy = ReplacementPolicy.from_metadata_source(
            mirror=mirror, even_if_not_apparently_updated=True,
            presentation_calculation_policy=presentation_calculation_policy
        )
        if providers:
            # For testing purposes. Initializing the real coverage providers
            # during tests can cause requests to third-parties.
            (self.required_coverage_providers,
            self.optional_coverage_providers) = providers
        else:
            overdrive = OverdriveBibliographicCoverageProvider(
                _db, metadata_replacement_policy=policy
            )
            content_cafe = ContentCafeCoverageProvider(self._db)
            content_server = ContentServerCoverageProvider(self._db)
            oclc_classify = OCLCClassifyCoverageProvider(self._db)

            self.required_coverage_providers = [
                overdrive, content_cafe, content_server, oclc_classify
            ]
            self.optional_coverage_providers = []

        self.viaf = VIAFClient(self._db)
        self.image_mirrors = {
            DataSource.OVERDRIVE : OverdriveCoverImageMirror(
                self._db, uploader=uploader
            )
        }
        self.image_scaler = ImageScaler(
            self._db, self.image_mirrors.values(), uploader=uploader
        )
        self.oclc_linked_data = LinkedDataCoverageProvider(self._db)
Esempio n. 36
0
    def test_open_access_content_mirrored(self):
        # Make sure that open access material links are translated to our S3 buckets, and that
        # commercial material links are left as is.
        # Note: Mirroring tests passing does not guarantee that all code now
        # correctly calls on CirculationData, as well as Metadata.  This is a risk.

        mirrors = dict(books_mirror=MockS3Uploader(), covers_mirror=None)
        mirror_type = ExternalIntegrationLink.OPEN_ACCESS_BOOKS
        # Here's a book.
        edition, pool = self._edition(with_license_pool=True)

        # Here's a link to the content of the book, which will be mirrored.
        link_mirrored = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            href="http://example.com/",
            media_type=Representation.EPUB_MEDIA_TYPE,
            content="i am a tiny book",
        )

        # This link will not be mirrored.
        link_unmirrored = LinkData(
            rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
            href="http://example.com/2",
            media_type=Representation.EPUB_MEDIA_TYPE,
            content="i am a pricy book",
        )

        # Apply the metadata.
        policy = ReplacementPolicy(mirrors=mirrors)

        metadata = Metadata(
            data_source=edition.data_source,
            links=[link_mirrored, link_unmirrored],
        )
        metadata.apply(edition, pool.collection, replace=policy)
        # make sure the refactor is done right, and metadata does not upload
        assert 0 == len(mirrors[mirror_type].uploaded)

        circulation_data = CirculationData(
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
            links=[link_mirrored, link_unmirrored],
        )
        circulation_data.apply(self._db, pool.collection, replace=policy)

        # make sure the refactor is done right, and circulation does upload
        assert 1 == len(mirrors[mirror_type].uploaded)

        # Only the open-access link has been 'mirrored'.
        [book] = mirrors[mirror_type].uploaded

        # It's remained an open-access link.
        assert [Hyperlink.OPEN_ACCESS_DOWNLOAD
                ] == [x.rel for x in book.resource.links]

        # It's been 'mirrored' to the appropriate S3 bucket.
        assert book.mirror_url.startswith(
            "https://test-content-bucket.s3.amazonaws.com/")
        expect = "/%s/%s.epub" % (edition.primary_identifier.identifier,
                                  edition.title)
        assert book.mirror_url.endswith(expect)

        # make sure the mirrored link is safely on edition
        sorted_edition_links = sorted(pool.identifier.links,
                                      key=lambda x: x.rel)
        unmirrored_representation, mirrored_representation = [
            edlink.resource.representation for edlink in sorted_edition_links
        ]
        assert mirrored_representation.mirror_url.startswith(
            "https://test-content-bucket.s3.amazonaws.com/")

        # make sure the unmirrored link is safely on edition
        assert "http://example.com/2" == unmirrored_representation.url
        # make sure the unmirrored link has not been translated to an S3 URL
        assert None == unmirrored_representation.mirror_url