Exemple #1
0
 def __init__(self,
              _db,
              api,
              datasource,
              batch_size=10,
              metadata_replacement_policy=None,
              circulationdata_replacement_policy=None,
              cutoff_time=None):
     self._db = _db
     self.api = api
     output_source = DataSource.lookup(_db, datasource)
     input_identifier_types = [output_source.primary_identifier_type]
     service_name = "%s Bibliographic Coverage Provider" % datasource
     metadata_replacement_policy = (
         metadata_replacement_policy
         or ReplacementPolicy.from_metadata_source())
     circulationdata_replacement_policy = (
         circulationdata_replacement_policy
         or ReplacementPolicy.from_license_source())
     self.metadata_replacement_policy = metadata_replacement_policy
     self.circulationdata_replacement_policy = circulationdata_replacement_policy
     super(BibliographicCoverageProvider,
           self).__init__(service_name,
                          input_identifier_types,
                          output_source,
                          batch_size=batch_size,
                          cutoff_time=cutoff_time)
Exemple #2
0
    def test_apply_removes_old_formats_based_on_replacement_policy(self):
        edition, pool = self._edition(with_license_pool=True)

        # Start with one delivery mechanism for this pool.
        for lpdm in pool.delivery_mechanisms:
            self._db.delete(lpdm)

        old_lpdm = pool.set_delivery_mechanism(Representation.PDF_MEDIA_TYPE,
                                               DeliveryMechanism.ADOBE_DRM,
                                               RightsStatus.IN_COPYRIGHT, None)

        # And it has been loaned.
        patron = self._patron()
        loan, ignore = pool.loan_to(patron, fulfillment=old_lpdm)
        eq_(old_lpdm, loan.fulfillment)

        # We have new circulation data that has a different format.
        format = FormatData(
            content_type=Representation.EPUB_MEDIA_TYPE,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
        )
        circulation_data = CirculationData(
            formats=[format],
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )

        # If we apply the new CirculationData with formats false in the policy,
        # we'll add the new format, but keep the old one as well.
        replacement_policy = ReplacementPolicy(formats=False)
        circulation_data.apply(self._db, pool.collection, replacement_policy)

        eq_(2, pool.delivery_mechanisms.count())
        eq_(
            set([
                Representation.PDF_MEDIA_TYPE, Representation.EPUB_MEDIA_TYPE
            ]),
            set([
                lpdm.delivery_mechanism.content_type
                for lpdm in pool.delivery_mechanisms
            ]))
        eq_(old_lpdm, loan.fulfillment)

        # But if we make formats true in the policy, we'll delete the old format
        # and remove it from its loan.
        replacement_policy = ReplacementPolicy(formats=True)
        circulation_data.apply(self._db, pool.collection, replacement_policy)

        eq_(1, pool.delivery_mechanisms.count())
        eq_(Representation.EPUB_MEDIA_TYPE,
            pool.delivery_mechanisms[0].delivery_mechanism.content_type)
        eq_(None, loan.fulfillment)
Exemple #3
0
    def test_implicit_format_for_open_access_link(self):
        # A format is a delivery mechanism.  We handle delivery on open access 
        # pools from our mirrored content in S3.  
        # Tests that when a link is open access, a pool can be delivered.
        
        edition, pool = self._edition(with_license_pool=True)

        # This is the delivery mechanism created by default when you
        # create a book with _edition().
        [epub] = pool.delivery_mechanisms
        eq_(Representation.EPUB_MEDIA_TYPE, epub.delivery_mechanism.content_type)
        eq_(DeliveryMechanism.ADOBE_DRM, epub.delivery_mechanism.drm_scheme)


        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.PDF_MEDIA_TYPE,
            href=self._url
        )
        circulation_data = CirculationData(
            data_source=DataSource.GUTENBERG, 
            primary_identifier=edition.primary_identifier, 
            links=[link], 
        )

        replace = ReplacementPolicy(
                formats=True,
            )
        circulation_data.apply(pool, replace)

        # We destroyed the default delivery format and added a new,
        # open access delivery format.
        [pdf] = pool.delivery_mechanisms
        eq_(Representation.PDF_MEDIA_TYPE, pdf.delivery_mechanism.content_type)
        eq_(DeliveryMechanism.NO_DRM, pdf.delivery_mechanism.drm_scheme)

        circulation_data = CirculationData(
            data_source=DataSource.GUTENBERG, 
            primary_identifier=edition.primary_identifier, 
            links=[]
        )
        replace = ReplacementPolicy(
                formats=True,
                links=True,
            )
        circulation_data.apply(pool, replace)

        # Now we have no formats at all.
        eq_([], pool.delivery_mechanisms)
Exemple #4
0
    def test_rights_status_default_rights_from_data_source(self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )
        link = LinkData(rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
                        media_type=Representation.EPUB_MEDIA_TYPE,
                        href=self._url)

        circulation_data = CirculationData(
            data_source=DataSource.OA_CONTENT_SERVER,
            primary_identifier=identifier,
            links=[link],
        )

        replace = ReplacementPolicy(formats=True, )

        # This pool starts off as not being open-access.
        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        eq_(False, pool.open_access)

        circulation_data.apply(self._db, pool.collection, replace)

        # The pool became open-access because it was given a
        # link that came from the OS content server.
        eq_(True, pool.open_access)
        eq_(1, pool.delivery_mechanisms.count())
        # The rights status is the default for the OA content server.
        eq_(RightsStatus.GENERIC_OPEN_ACCESS,
            pool.delivery_mechanisms[0].rights_status.uri)
    def test_mirror_404_error(self):
        mirror = DummyS3Uploader()
        h = DummyHTTPClient()
        h.queue_response(404)
        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)

        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)

        link = LinkData(
            rel=Hyperlink.IMAGE,
            media_type=Representation.JPEG_MEDIA_TYPE,
            href="http://example.com/",
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )

        m = Metadata(data_source=data_source)

        m.mirror_link(edition, data_source, link, link_obj, policy)

        # Since we got a 404 error, the cover image was not mirrored.
        eq_(404, link_obj.resource.representation.status_code)
        eq_(None, link_obj.resource.representation.mirror_url)
        eq_([], mirror.uploaded)
Exemple #6
0
    def __init__(self, collection, api_class=OdiloAPI, **kwargs):
        """Constructor.

        :param collection: Provide bibliographic coverage to all
            Odilo books in the given Collection.
        :param api_class: Instantiate this class with the given Collection,
            rather than instantiating OdiloAPI.
        """
        super(OdiloBibliographicCoverageProvider,
              self).__init__(collection, **kwargs)
        if isinstance(api_class, OdiloAPI):
            # Use a previously instantiated OdiloAPI instance
            # rather than creating a new one.
            self.api = api_class
        else:
            # A web application should not use this option because it
            # will put a non-scoped session in the mix.
            _db = Session.object_session(collection)
            self.api = api_class(_db, collection)

        self.replacement_policy = ReplacementPolicy(
            identifiers=True,
            subjects=True,
            contributions=True,
            links=True,
            formats=True,
            rights=True,
            link_content=True,
            # even_if_not_apparently_updated=False,
            analytics=Analytics(self._db))
    def test_non_open_access_book_not_mirrored(self):
        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        m = Metadata(data_source=data_source)

        mirror = DummyS3Uploader(fail=True)
        h = DummyHTTPClient()

        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)

        content = "foo"
        link = LinkData(rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
                        media_type=Representation.EPUB_MEDIA_TYPE,
                        href="http://example.com/",
                        content=content,
                        rights_uri=RightsStatus.IN_COPYRIGHT)

        identifier = self._identifier()
        link_obj, is_new = identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            media_type=link.media_type,
            content=link.content,
        )

        # The Hyperlink object makes it look like an open-access book,
        # but the context we have from the OPDS feed says that it's
        # not.
        m.mirror_link(None, data_source, link, link_obj, policy)

        # No HTTP requests were made.
        eq_([], h.requests)

        # Nothing was uploaded.
        eq_([], mirror.uploaded)
Exemple #8
0
    def import_edition_from_metadata(
            self, metadata, even_if_no_author, immediately_presentation_ready
    ):
        """ For the passed-in Metadata object, see if can find or create an Edition 
            in the database.  Do not set the edition's pool or work, yet.
        """

        # Locate or create an Edition for this book.
        edition, is_new_edition = metadata.edition(self._db)

        policy = ReplacementPolicy(
            subjects=True,
            links=True,
            contributions=True,
            rights=True,
            link_content=True,
            even_if_not_apparently_updated=True,
            mirror=self.mirror,
            content_modifier=self.content_modifier,
            http_get=self.http_get,
        )
        metadata.apply(
            edition, self.metadata_client, replace=policy
        )

        return edition
Exemple #9
0
    def test_rights_status_open_access_link_no_rights_uses_data_source_default(
            self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )

        # Here's a CirculationData that will create an open-access
        # LicensePoolDeliveryMechanism.
        link = LinkData(rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
                        media_type=Representation.EPUB_MEDIA_TYPE,
                        href=self._url)
        circulation_data = CirculationData(
            data_source=DataSource.GUTENBERG,
            primary_identifier=identifier,
            links=[link],
        )
        replace_formats = ReplacementPolicy(formats=True, )

        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        pool.open_access = False

        # Applying this CirculationData to a LicensePool makes it
        # open-access.
        circulation_data.apply(self._db, pool.collection, replace_formats)
        eq_(True, pool.open_access)
        eq_(1, pool.delivery_mechanisms.count())

        # The delivery mechanism's rights status is the default for
        # the data source.
        eq_(RightsStatus.PUBLIC_DOMAIN_USA,
            pool.delivery_mechanisms[0].rights_status.uri)

        # Even if a commercial source like Overdrive should offer a
        # link with rel="open access", unless we know it's an
        # open-access link we will give it a RightsStatus of
        # IN_COPYRIGHT.
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )
        link = LinkData(rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
                        media_type=Representation.EPUB_MEDIA_TYPE,
                        href=self._url)

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
        )

        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        pool.open_access = False
        circulation_data.apply(self._db, pool.collection, replace_formats)
        eq_(RightsStatus.IN_COPYRIGHT,
            pool.delivery_mechanisms[0].rights_status.uri)

        eq_(False, pool.open_access)
Exemple #10
0
    def populate_all_catalog(self):
        """ Call get_all_catalog to get all of library's book info from OneClick.
        Create Work, Edition, LicensePool objects in our database.
        """
        catalog_list = self.get_all_catalog()
        items_transmitted = len(catalog_list)
        items_created = 0
        coverage_provider = OneClickBibliographicCoverageProvider(_db=self._db)
        # the default policy doesn't update delivery mechanisms, which we do want to do
        metadata_replacement_policy = ReplacementPolicy.from_metadata_source()
        metadata_replacement_policy.formats = True

        for catalog_item in catalog_list:
            result = coverage_provider.update_metadata(
                catalog_item=catalog_item,
                metadata_replacement_policy=metadata_replacement_policy)
            if not isinstance(result, CoverageFailure):
                items_created += 1

                if isinstance(result, Identifier):
                    # calls work.set_presentation_ready() for us
                    coverage_provider.handle_success(result)

                    # We're populating the catalog, so we can assume the list OneClick
                    # sent us is of books we own licenses to.
                    # NOTE:  TODO later:  For the 4 out of 2000 libraries that chose to display
                    # books they don't own, we'd need to call the search endpoint to get
                    # the interest field, and then deal with licenses_owned.
                    if result.licensed_through:
                        result.licensed_through.licenses_owned = 1

        # stay data, stay!
        self._db.commit()

        return items_transmitted, items_created
Exemple #11
0
    def test_rights_status_open_access_link_with_rights(self):
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
            rights_uri=RightsStatus.CC_BY_ND,
        )

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
        )
        replace = ReplacementPolicy(
            formats=True,
        )

        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(True, pool.open_access)
        eq_(1, len(pool.delivery_mechanisms))
        eq_(RightsStatus.CC_BY_ND, pool.delivery_mechanisms[0].rights_status.uri)
Exemple #12
0
    def test_rights_status_commercial_link_with_rights(self):
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
            rights_uri=RightsStatus.IN_COPYRIGHT,
        )
        format = FormatData(
            content_type=link.media_type,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
            link=link,
            rights_uri=RightsStatus.IN_COPYRIGHT,
        )

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
            formats=[format],
        )

        replace = ReplacementPolicy(
            formats=True,
        )

        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(False, pool.open_access)
        eq_(1, len(pool.delivery_mechanisms))
        eq_(RightsStatus.IN_COPYRIGHT, pool.delivery_mechanisms[0].rights_status.uri)
Exemple #13
0
    def test_rights_status_default_rights_from_data_source(self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url
        )

        circulation_data = CirculationData(
            data_source=DataSource.OA_CONTENT_SERVER,
            primary_identifier=identifier,
            links=[link],
        )

        replace = ReplacementPolicy(
            formats=True,
        )

        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(True, pool.open_access)
        eq_(1, len(pool.delivery_mechanisms))
        # The rights status is the default for the OA content server.
        eq_(RightsStatus.GENERIC_OPEN_ACCESS, pool.delivery_mechanisms[0].rights_status.uri)
Exemple #14
0
    def test_rights_status_open_access_link_no_rights(self):
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url
        )

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
        )
        replace = ReplacementPolicy(
            formats=True,
        )

        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(True, pool.open_access)
        eq_(1, len(pool.delivery_mechanisms))
        # Rights status is generic open access because there's an open access
        # link but no other rights info.
        eq_(RightsStatus.GENERIC_OPEN_ACCESS, pool.delivery_mechanisms[0].rights_status.uri)
Exemple #15
0
    def test_explicit_formatdata(self):
        # Creating an edition with an open-access download will
        # automatically create a delivery mechanism.
        edition, pool = self._edition(with_open_access_download=True)

        # Let's also add a DRM format.
        drm_format = FormatData(
            content_type=Representation.PDF_MEDIA_TYPE,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
        )

        circulation_data = CirculationData(formats=[drm_format],
                            data_source=edition.data_source, 
                            primary_identifier=edition.primary_identifier)
        circulation_data.apply(pool)

        [epub, pdf] = sorted(pool.delivery_mechanisms, 
                             key=lambda x: x.delivery_mechanism.content_type)
        eq_(epub.resource, edition.license_pool.best_open_access_link)

        eq_(Representation.PDF_MEDIA_TYPE, pdf.delivery_mechanism.content_type)
        eq_(DeliveryMechanism.ADOBE_DRM, pdf.delivery_mechanism.drm_scheme)

        # If we tell Metadata to replace the list of formats, we only
        # have the one format we manually created.
        replace = ReplacementPolicy(
                formats=True,
            )
        circulation_data.apply(pool, replace=replace)
        [pdf] = pool.delivery_mechanisms
        eq_(Representation.PDF_MEDIA_TYPE, pdf.delivery_mechanism.content_type)
Exemple #16
0
    def test_rights_status_default_rights_passed_in(self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )
        link = LinkData(rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
                        media_type=Representation.EPUB_MEDIA_TYPE,
                        href=self._url)

        circulation_data = CirculationData(
            data_source=DataSource.OA_CONTENT_SERVER,
            primary_identifier=identifier,
            default_rights_uri=RightsStatus.CC_BY,
            links=[link],
        )

        replace = ReplacementPolicy(formats=True, )

        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        circulation_data.apply(self._db, pool.collection, replace)
        eq_(True, pool.open_access)
        eq_(1, pool.delivery_mechanisms.count())
        # The rights status is the one that was passed in to CirculationData.
        eq_(RightsStatus.CC_BY, pool.delivery_mechanisms[0].rights_status.uri)
    def test_rights_status_open_access_link_no_rights_uses_data_source_default(self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url
        )

        circulation_data = CirculationData(
            data_source=DataSource.GUTENBERG,
            primary_identifier=identifier,
            links=[link],
        )
        replace = ReplacementPolicy(
            formats=True,
        )

        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(True, pool.open_access)
        eq_(1, len(pool.delivery_mechanisms))

        # The delivery mechanism's rights status is the default for
        # the data source.
        eq_(RightsStatus.PUBLIC_DOMAIN_USA, pool.delivery_mechanisms[0].rights_status.uri)

        # Even if a commercial source like Overdrive should offer a
        # link with rel="open access", unless we know it's an
        # open-access link we will give it a RightsStatus of
        # IN_COPYRIGHT.
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url
        )

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
        )
        
        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(RightsStatus.IN_COPYRIGHT,
            pool.delivery_mechanisms[0].rights_status.uri)

        # This will cause the work to be treated as a non-open-access
        # work.
        eq_(False, pool.open_access)
    def test_mirror_open_access_link_mirror_failure(self):
        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        m = Metadata(data_source=data_source)

        mirror = DummyS3Uploader(fail=True)
        h = DummyHTTPClient()

        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)

        content = open(self.sample_cover_path("test-book-cover.png")).read()
        link = LinkData(rel=Hyperlink.IMAGE,
                        media_type=Representation.JPEG_MEDIA_TYPE,
                        href="http://example.com/",
                        content=content)

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )

        h.queue_response(200, media_type=Representation.JPEG_MEDIA_TYPE)

        m.mirror_link(edition, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # The representation was fetched successfully.
        eq_(None, representation.fetch_exception)
        assert representation.fetched_at != None

        # But mirroing failed.
        assert representation.mirror_exception != None
        eq_(None, representation.mirrored_at)
        eq_(link.media_type, representation.media_type)
        eq_(link.href, representation.url)

        # The mirror url should still be set.
        assert "Gutenberg" in representation.mirror_url
        assert representation.mirror_url.endswith(
            "%s/cover.jpg" % edition.primary_identifier.identifier)

        # Book content is still there since it wasn't mirrored.
        assert representation.content != None

        # the edition's identifier-associated license pool should not be
        # suppressed just because fetch failed on getting image.
        eq_(False, pool.suppressed)

        # the license pool only gets its license_exception column filled in
        # if fetch failed on getting an Hyperlink.OPEN_ACCESS_DOWNLOAD-type epub.
        eq_(None, pool.license_exception)
    def test_mirror_with_content_modifier(self):
        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        m = Metadata(data_source=data_source)

        mirror = DummyS3Uploader()

        def dummy_content_modifier(representation):
            representation.content = "Replaced Content"

        h = DummyHTTPClient()

        policy = ReplacementPolicy(mirror=mirror,
                                   content_modifier=dummy_content_modifier,
                                   http_get=h.do_get)

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href="http://example.com/test.epub",
            content="I'm an epub",
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )

        h.queue_response(200, media_type=Representation.EPUB_MEDIA_TYPE)

        m.mirror_link(edition, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # The representation was fetched successfully.
        eq_(None, representation.fetch_exception)
        assert representation.fetched_at != None

        # The mirror url is set.
        assert "Gutenberg" in representation.mirror_url
        assert representation.mirror_url.endswith(
            "%s/%s.epub" %
            (edition.primary_identifier.identifier, edition.title))

        # Content isn't there since it was mirrored.
        eq_(None, representation.content)

        # The representation was mirrored, with the modified content.
        eq_([representation], mirror.uploaded)
        eq_(["Replaced Content"], mirror.content)
Exemple #20
0
    def test_mirror_open_access_link_mirror_failure(self):
        mirror = DummyS3Uploader(fail=True)
        h = DummyHTTPClient()

        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)

        circulation_data = CirculationData(
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )

        h.queue_response(200, media_type=Representation.EPUB_MEDIA_TYPE)

        circulation_data.mirror_link(pool, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # The representation was fetched successfully.
        eq_(None, representation.fetch_exception)
        assert representation.fetched_at != None

        # But mirroing failed.
        assert representation.mirror_exception != None
        eq_(None, representation.mirrored_at)
        eq_(link.media_type, representation.media_type)
        eq_(link.href, representation.url)

        # The mirror url should still be set.
        assert "Gutenberg" in representation.mirror_url
        assert representation.mirror_url.endswith("%s.epub" % edition.title)

        # Book content is still there since it wasn't mirrored.
        assert representation.content != None

        # The license pool is suppressed when mirroring fails.
        eq_(True, pool.suppressed)
        assert representation.mirror_exception in pool.license_exception
    def test_mirror_open_access_link_fetch_failure(self):
        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        m = Metadata(data_source=data_source)

        mirror = DummyS3Uploader()
        h = DummyHTTPClient()

        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)

        link = LinkData(
            rel=Hyperlink.IMAGE,
            media_type=Representation.JPEG_MEDIA_TYPE,
            href="http://example.com/",
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )
        h.queue_response(403)

        m.mirror_link(edition, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # Fetch failed, so we should have a fetch exception but no mirror url.
        assert representation.fetch_exception != None
        eq_(None, representation.mirror_exception)
        eq_(None, representation.mirror_url)
        eq_(link.href, representation.url)
        assert representation.fetched_at != None
        eq_(None, representation.mirrored_at)

        # the edition's identifier-associated license pool should not be
        # suppressed just because fetch failed on getting image.
        eq_(False, pool.suppressed)

        # the license pool only gets its license_exception column filled in
        # if fetch failed on getting an Hyperlink.OPEN_ACCESS_DOWNLOAD-type epub.
        eq_(None, pool.license_exception)
Exemple #22
0
    def test_mirror_open_access_link_fetch_failure(self):
        mirror = DummyS3Uploader()
        h = DummyHTTPClient()

        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)
        circulation_data = CirculationData(
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )

        h.queue_response(403)

        circulation_data.mirror_link(pool, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # Fetch failed, so we should have a fetch exception but no mirror url.
        assert representation.fetch_exception != None
        eq_(None, representation.mirror_exception)
        eq_(None, representation.mirror_url)
        eq_(link.href, representation.url)
        assert representation.fetched_at != None
        eq_(None, representation.mirrored_at)

        # The license pool is suppressed when fetch fails.
        eq_(True, pool.suppressed)
        assert representation.fetch_exception in pool.license_exception
Exemple #23
0
    def test_format_change_may_change_open_access_status(self):

        # In this test, whenever we call CirculationData.apply(), we
        # want to destroy the old list of formats and recreate it.
        replace_formats = ReplacementPolicy(formats=True)

        # Here's a seemingly ordinary non-open-access LicensePool.
        edition, pool = self._edition(with_license_pool=True)
        eq_(False, pool.open_access)

        # One day, we learn that it has an open-access delivery mechanism.
        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
            rights_uri=RightsStatus.CC_BY_ND,
        )

        circulation_data = CirculationData(
            data_source=pool.data_source,
            primary_identifier=pool.identifier,
            links=[link],
        )

        # Applying this information turns the pool into an open-access pool.
        circulation_data.apply(self._db,
                               pool.collection,
                               replace=replace_formats)
        eq_(True, pool.open_access)

        # Then we find out it was a mistake -- the book is in copyright.
        format = FormatData(Representation.EPUB_MEDIA_TYPE,
                            DeliveryMechanism.NO_DRM,
                            rights_uri=RightsStatus.IN_COPYRIGHT)
        circulation_data = CirculationData(data_source=pool.data_source,
                                           primary_identifier=pool.identifier,
                                           formats=[format])
        circulation_data.apply(self._db,
                               pool.collection,
                               replace=replace_formats)

        # The original LPDM has been removed and only the new one remains.
        eq_(False, pool.open_access)
        eq_(1, pool.delivery_mechanisms.count())
Exemple #24
0
    def _set_circulationdata(self,
                             identifier,
                             circulationdata,
                             circulationdata_replacement_policy=None):
        """Finds or creates the LicensePool for an Identifier, updates it
        with the given circulationdata, then creates a Work for the book.

        TODO:  Makes assumption of one license pool per identifier.  In a 
        later branch, this will change.

        :return: The Identifier (if successful) or an appropriate
        CoverageFailure (if not).
        """
        circulationdata_replacement_policy = circulationdata_replacement_policy or (
            ReplacementPolicy.from_license_source())

        pool = self.license_pool(identifier)
        if isinstance(pool, CoverageFailure):
            return pool

        if not circulationdata:
            e = "Did not receive circulationdata from input source"
            return CoverageFailure(identifier,
                                   e,
                                   data_source=self.output_source,
                                   transient=True)

        try:
            circulationdata.apply(
                pool,
                replace=circulationdata_replacement_policy,
            )
        except Exception as e:
            self.log.warn("Error applying circulationdata to pool %d: %s",
                          pool.id,
                          e,
                          exc_info=e)
            return CoverageFailure(identifier,
                                   repr(e),
                                   data_source=self.output_source,
                                   transient=True)

        return identifier
    def test_classifications_from_another_source_not_updated(self):

        # Set up an edition whose primary identifier has two
        # classifications.
        source1 = DataSource.lookup(self._db, DataSource.AXIS_360)
        source2 = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        edition = self._edition()
        identifier = edition.primary_identifier
        c1 = identifier.classify(source1, Subject.TAG, "i will persist")
        c2 = identifier.classify(source2, Subject.TAG, "i will perish")

        # Now we get some new metadata from source #2.
        subjects = [SubjectData(type=Subject.TAG, identifier="i will conquer")]
        metadata = Metadata(subjects=subjects, data_source=source2)
        replace = ReplacementPolicy(subjects=True)
        metadata.apply(edition, replace=replace)

        # The old classification from source #2 has been destroyed.
        # The old classification from source #1 is still there.
        eq_(['i will conquer', 'i will persist'],
            sorted([x.subject.identifier for x in identifier.classifications]))
Exemple #26
0
    def _set_metadata(self,
                      identifier,
                      metadata,
                      metadata_replacement_policy=None):
        """Finds or creates the Edition for an Identifier, updates it
        with the given metadata.

        :return: The Identifier (if successful) or an appropriate
        CoverageFailure (if not).
        """
        metadata_replacement_policy = metadata_replacement_policy or (
            ReplacementPolicy.from_metadata_source())

        edition = self.edition(identifier)
        if isinstance(edition, CoverageFailure):
            return edition

        if not metadata:
            e = "Did not receive metadata from input source"
            return CoverageFailure(identifier,
                                   e,
                                   data_source=self.output_source,
                                   transient=True)

        try:
            metadata.apply(
                edition,
                replace=metadata_replacement_policy,
            )
        except Exception as e:
            self.log.warn("Error applying metadata to edition %d: %s",
                          edition.id,
                          e,
                          exc_info=e)
            return CoverageFailure(identifier,
                                   repr(e),
                                   data_source=self.output_source,
                                   transient=True)

        return identifier
    def test_set_metadata_incorporates_replacement_policy(self):
        """Make sure that if a ReplacementPolicy is passed in to
        set_metadata(), the policy's settings (and those of its
        .presentation_calculation_policy) are respected.
        """

        edition, pool = self._edition(with_license_pool=True)
        identifier = edition.primary_identifier

        # All images and open-access content should be uploaded to
        # this 'mirror'.
        mirror = DummyS3Uploader()
        http = DummyHTTPClient()
        http.queue_response(
            200,
            content='I am an epub.',
            media_type=Representation.EPUB_MEDIA_TYPE,
        )

        class Tripwire(PresentationCalculationPolicy):
            # This class sets a variable if one of its properties is
            # accessed.
            def __init__(self, *args, **kwargs):
                self.tripped = False

            def __getattr__(self, name):
                self.tripped = True
                return True

        presentation_calculation_policy = Tripwire()

        metadata_replacement_policy = ReplacementPolicy(
            mirror=mirror,
            http_get=http.do_get,
            presentation_calculation_policy=presentation_calculation_policy)

        circulationdata_replacement_policy = ReplacementPolicy(
            mirror=mirror,
            http_get=http.do_get,
        )

        output_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        provider = CoverageProvider("service", [identifier.type],
                                    output_source)

        metadata = Metadata(output_source)
        # We've got a CirculationData object that includes an open-access download.
        link = LinkData(rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
                        href="http://foo.com/")
        circulationdata = CirculationData(
            output_source,
            primary_identifier=metadata.primary_identifier,
            links=[link])

        provider.set_metadata_and_circulation_data(
            identifier,
            metadata,
            circulationdata,
            metadata_replacement_policy=metadata_replacement_policy,
            circulationdata_replacement_policy=
            circulationdata_replacement_policy,
        )

        # The open-access download was 'downloaded' and 'mirrored'.
        [mirrored] = mirror.uploaded
        eq_("http://foo.com/", mirrored.url)
        assert mirrored.mirror_url.endswith(
            "/%s/%s.epub" % (identifier.identifier, edition.title))

        # The book content was removed from the db after it was
        # mirrored successfully.
        eq_(None, mirrored.content)

        # Our custom PresentationCalculationPolicy was used when
        # determining whether to recalculate the work's
        # presentation. We know this because the tripwire was
        # triggered.
        eq_(True, presentation_calculation_policy.tripped)
Exemple #28
0
    def test_open_access_content_mirrored(self):
        # Make sure that open access material links are translated to our S3 buckets, and that 
        # commercial material links are left as is.
        # Note: Mirroring tests passing does not guarantee that all code now 
        # correctly calls on CirculationData, as well as Metadata.  This is a risk.

        mirror = DummyS3Uploader()
        # Here's a book.
        edition, pool = self._edition(with_license_pool=True)

        # Here's a link to the content of the book, which will be mirrored.
        link_mirrored = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, href="http://example.com/",
            media_type=Representation.EPUB_MEDIA_TYPE,
            content="i am a tiny book"
        )

        # This link will not be mirrored.
        link_unmirrored = LinkData(
            rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD, href="http://example.com/2",
            media_type=Representation.EPUB_MEDIA_TYPE,
            content="i am a pricy book"
        )

        # Apply the metadata.
        policy = ReplacementPolicy(mirror=mirror)

        metadata = Metadata(data_source=edition.data_source, 
        	links=[link_mirrored, link_unmirrored],
    	)
        metadata.apply(edition, replace=policy)
        # make sure the refactor is done right, and metadata does not upload
        eq_(0, len(mirror.uploaded))


        circulation_data = CirculationData(
        	data_source=edition.data_source, 
        	primary_identifier=edition.primary_identifier,
        	links=[link_mirrored, link_unmirrored],
        )
        circulation_data.apply(pool, replace=policy)
        
        # make sure the refactor is done right, and circulation does upload 
        eq_(1, len(mirror.uploaded))

        # Only the open-access link has been 'mirrored'.
        [book] = mirror.uploaded

        # It's remained an open-access link.
        eq_(
            [Hyperlink.OPEN_ACCESS_DOWNLOAD], 
            [x.rel for x in book.resource.links]
        )


        # It's been 'mirrored' to the appropriate S3 bucket.
        assert book.mirror_url.startswith('http://s3.amazonaws.com/test.content.bucket/')
        expect = '/%s/%s.epub' % (
            edition.primary_identifier.identifier,
            edition.title
        )
        assert book.mirror_url.endswith(expect)

        # make sure the mirrored link is safely on edition
        sorted_edition_links = sorted(edition.license_pool.identifier.links, key=lambda x: x.rel)
        unmirrored_representation, mirrored_representation = [edlink.resource.representation for edlink in sorted_edition_links]
        assert mirrored_representation.mirror_url.startswith('http://s3.amazonaws.com/test.content.bucket/')

        # make sure the unmirrored link is safely on edition
        eq_('http://example.com/2', unmirrored_representation.url)
        # make sure the unmirrored link has not been translated to an S3 URL
        eq_(None, unmirrored_representation.mirror_url)
Exemple #29
0
class TitleFromExternalList(object):
    """This class helps you convert data from external lists into Simplified
    Edition and CustomListEntry objects.
    """
    def __init__(self, metadata, first_appearance, most_recent_appearance,
                 annotation):
        self.log = logging.getLogger("Title from external list")
        self.metadata = metadata
        self.first_appearance = first_appearance or most_recent_appearance
        self.most_recent_appearance = (most_recent_appearance
                                       or datetime.datetime.now())
        self.annotation = annotation

    def to_custom_list_entry(self,
                             custom_list,
                             metadata_client,
                             overwrite_old_data=False):
        """Turn this object into a CustomListEntry with associated Edition."""
        _db = Session.object_session(custom_list)
        edition = self.to_edition(_db, metadata_client, overwrite_old_data)

        list_entry, is_new = get_one_or_create(_db,
                                               CustomListEntry,
                                               edition=edition,
                                               customlist=custom_list)

        if (not list_entry.first_appearance
                or list_entry.first_appearance > self.first_appearance):
            if list_entry.first_appearance:
                self.log.info(
                    "I thought %s first showed up at %s, but then I saw it earlier, at %s!",
                    self.metadata.title, list_entry.first_appearance,
                    self.first_appearance)
            list_entry.first_appearance = self.first_appearance

        if (not list_entry.most_recent_appearance
                or list_entry.most_recent_appearance <
                self.most_recent_appearance):
            if list_entry.most_recent_appearance:
                self.log.info(
                    "I thought %s most recently showed up at %s, but then I saw it later, at %s!",
                    self.metadata.title, list_entry.most_recent_appearance,
                    self.most_recent_appearance)
            list_entry.most_recent_appearance = self.most_recent_appearance

        list_entry.annotation = self.annotation

        list_entry.set_work(self.metadata, metadata_client)
        return list_entry, is_new

    def to_edition(self, _db, metadata_client, overwrite_old_data=False):
        """Create or update an Edition object for this list item.

        We have two goals here:

        1. Make sure there is an Edition representing the list's view
        of the data.

        2. If at all possible, connect the Edition's primary
        identifier to other identifiers in the system, identifiers
        which may have associated LicensePools. This can happen in two
        ways:

        2a. The Edition's primary identifier, or other identifiers
        associated with the Edition, may be directly associated with
        LicensePools. This can happen if a book's list entry includes
        (e.g.) an Overdrive ID.

        2b. The Edition's permanent work ID may identify it as the
        same work as other Editions in the system. In that case this
        Edition's primary identifier may be associated with the other
        Editions' primary identifiers. (p=0.85)
        """
        self.log.info("Converting %s to an Edition object.",
                      self.metadata.title)

        # Make sure the Metadata object's view of the book is present
        # as an Edition. This will also associate all its identifiers
        # with its primary identifier, and calculate the permanent work
        # ID if possible.
        try:
            edition, is_new = self.metadata.edition(_db)
        except ValueError, e:
            self.log.info("Ignoring %s, no corresponding edition.",
                          self.metadata.title)
            return None
        if overwrite_old_data:
            policy = ReplacementPolicy.from_metadata_source(
                even_if_not_apparently_updated=True)
        else:
            policy = ReplacementPolicy.append_only(
                even_if_not_apparently_updated=True)
        self.metadata.apply(
            edition=edition,
            metadata_client=metadata_client,
            replace=policy,
        )
        self.metadata.associate_with_identifiers_based_on_permanent_work_id(
            _db)
        return edition
    def test_image_scale_and_mirror(self):
        # Make sure that open access material links are translated to our S3 buckets, and that
        # commercial material links are left as is.
        # Note: mirroring links is now also CirculationData's job.  So the unit tests
        # that test for that have been changed to call to mirror cover images.
        # However, updated tests passing does not guarantee that all code now
        # correctly calls on CirculationData, too.  This is a risk.

        mirror = DummyS3Uploader()
        edition, pool = self._edition(with_license_pool=True)
        content = open(self.sample_cover_path("test-book-cover.png")).read()
        l1 = LinkData(rel=Hyperlink.IMAGE,
                      href="http://example.com/",
                      media_type=Representation.JPEG_MEDIA_TYPE,
                      content=content)
        thumbnail_content = open(
            self.sample_cover_path("tiny-image-cover.png")).read()
        l2 = LinkData(rel=Hyperlink.THUMBNAIL_IMAGE,
                      href="http://example.com/thumb.jpg",
                      media_type=Representation.JPEG_MEDIA_TYPE,
                      content=content)

        # When we call metadata.apply, all image links will be scaled and
        # 'mirrored'.
        policy = ReplacementPolicy(mirror=mirror)
        metadata = Metadata(links=[l1, l2], data_source=edition.data_source)
        metadata.apply(edition, replace=policy)

        # Two Representations were 'mirrored'.
        image, thumbnail = mirror.uploaded

        # The image...
        [image_link] = image.resource.links
        eq_(Hyperlink.IMAGE, image_link.rel)

        # And its thumbnail.
        eq_(image, thumbnail.thumbnail_of)

        # The original image is too big to be a thumbnail.
        eq_(600, image.image_height)
        eq_(400, image.image_width)

        # The thumbnail is the right height.
        eq_(Edition.MAX_THUMBNAIL_HEIGHT, thumbnail.image_height)
        eq_(Edition.MAX_THUMBNAIL_WIDTH, thumbnail.image_width)

        # The thumbnail is newly generated from the full-size
        # image--the thumbnail that came in from the OPDS feed was
        # ignored.
        assert thumbnail.url != l2.href
        assert thumbnail.content != l2.content

        # Both images have been 'mirrored' to Amazon S3.
        assert image.mirror_url.startswith(
            'http://s3.amazonaws.com/test.cover.bucket/')
        assert image.mirror_url.endswith('cover.jpg')

        # The thumbnail image has been converted to PNG.
        assert thumbnail.mirror_url.startswith(
            'http://s3.amazonaws.com/test.cover.bucket/scaled/300/')
        assert thumbnail.mirror_url.endswith('cover.png')