def setup(self):
     super(TestCoverageProvider, self).setup()
     gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
     self.input_identifier_types = gutenberg.primary_identifier_type
     self.output_source = DataSource.lookup(self._db, DataSource.OCLC)
     self.edition = self._edition(gutenberg.name)
     self.identifier = self.edition.primary_identifier
 def setup(self):
     super(TestPresentationReadyMonitor, self).setup()
     self.gutenberg = Identifier.GUTENBERG_ID
     self.oclc = DataSource.lookup(self._db, DataSource.OCLC)
     self.overdrive = DataSource.lookup(self._db, DataSource.OVERDRIVE)
     self.edition, self.edition_license_pool = self._edition(
         DataSource.GUTENBERG, with_license_pool=True)
     self.work = self._work(DataSource.GUTENBERG, with_license_pool=True)
     # Don't fake that the work is presentation ready, as we usually do,
     # because presentation readiness is what we're trying to test.
     self.work.presentation_ready = False
Exemple #3
0
 def test_cover_image_root(self):
     with self.temp_config():
         gutenberg_illustrated = DataSource.lookup(
             self._db, DataSource.GUTENBERG_COVER_GENERATOR)
         overdrive = DataSource.lookup(
             self._db, DataSource.OVERDRIVE)
         eq_("http://s3.amazonaws.com/test-book-covers-s3-bucket/Gutenberg%20Illustrated/",
             S3Uploader.cover_image_root(gutenberg_illustrated))
         eq_("http://s3.amazonaws.com/test-book-covers-s3-bucket/Overdrive/",
             S3Uploader.cover_image_root(overdrive))
         eq_("http://s3.amazonaws.com/test-book-covers-s3-bucket/scaled/300/Overdrive/", 
             S3Uploader.cover_image_root(overdrive, 300))
Exemple #4
0
    def test_book_url(self):
        identifier = self._identifier(foreign_id="ABOOK")
        buckets = {S3Uploader.OA_CONTENT_BUCKET_KEY: 'thebooks'}
        uploader = self._uploader(**buckets)
        m = uploader.book_url

        eq_(u'https://s3.amazonaws.com/thebooks/Gutenberg+ID/ABOOK.epub',
            m(identifier))

        # The default extension is .epub, but a custom extension can
        # be specified.
        eq_(u'https://s3.amazonaws.com/thebooks/Gutenberg+ID/ABOOK.pdf',
            m(identifier, extension='pdf'))

        eq_(u'https://s3.amazonaws.com/thebooks/Gutenberg+ID/ABOOK.pdf',
            m(identifier, extension='.pdf'))

        # If a data source is provided, the book is stored underneath the
        # data source.
        unglueit = DataSource.lookup(self._db, DataSource.UNGLUE_IT)
        eq_(
            u'https://s3.amazonaws.com/thebooks/unglue.it/Gutenberg+ID/ABOOK.epub',
            m(identifier, data_source=unglueit))

        # If a title is provided, the book's filename incorporates the
        # title, for the benefit of people who download the book onto
        # their hard drive.
        eq_(
            u'https://s3.amazonaws.com/thebooks/Gutenberg+ID/ABOOK/On+Books.epub',
            m(identifier, title="On Books"))

        # Non-open-access content can't be stored.
        assert_raises(NotImplementedError, m, identifier, open_access=False)
Exemple #5
0
    def test_cover_image_root(self):
        bucket = u'test-book-covers-s3-bucket'
        m = S3Uploader.cover_image_root

        gutenberg_illustrated = DataSource.lookup(
            self._db, DataSource.GUTENBERG_COVER_GENERATOR)
        overdrive = DataSource.lookup(self._db, DataSource.OVERDRIVE)

        eq_(
            "https://s3.amazonaws.com/test-book-covers-s3-bucket/Gutenberg+Illustrated/",
            m(bucket, gutenberg_illustrated))
        eq_("https://s3.amazonaws.com/test-book-covers-s3-bucket/Overdrive/",
            m(bucket, overdrive))
        eq_(
            "https://s3.amazonaws.com/test-book-covers-s3-bucket/scaled/300/Overdrive/",
            m(bucket, overdrive, 300))
Exemple #6
0
    def _customlist(self,
                    foreign_identifier=None,
                    name=None,
                    data_source_name=DataSource.NYT,
                    num_entries=1,
                    entries_exist_as_works=True):
        data_source = DataSource.lookup(self._db, data_source_name)
        foreign_identifier = foreign_identifier or self._str
        now = datetime.utcnow()
        customlist, ignore = get_one_or_create(
            self._db,
            CustomList,
            create_method_kwargs=dict(
                created=now,
                updated=now,
                name=name or self._str,
                description=self._str,
            ),
            data_source=data_source,
            foreign_identifier=foreign_identifier)

        editions = []
        for i in range(num_entries):
            if entries_exist_as_works:
                work = self._work(with_open_access_download=True)
                edition = work.presentation_edition
            else:
                edition = self._edition(data_source_name, title="Item %s" % i)
                edition.permanent_work_id = "Permanent work ID %s" % self._str
            customlist.add_entry(edition,
                                 "Annotation %s" % i,
                                 first_appearance=now)
            editions.append(edition)
        return customlist, editions
Exemple #7
0
    def test_extract_data_from_feedparser_handles_exception(self):
        class DoomedFeedparserOPDSImporter(OPDSImporter):
            """An importer that can't extract metadata from feedparser."""
            @classmethod
            def _data_detail_for_feedparser_entry(cls, entry, data_source):
                raise Exception("Utter failure!")

        data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER)

        values, failures = DoomedFeedparserOPDSImporter.extract_data_from_feedparser(
            self.content_server_mini_feed, data_source)

        # No metadata was extracted.
        eq_(0, len(values.keys()))

        # There are 2 failures, both from exceptions. The 202 message
        # found in content_server_mini.opds is not extracted
        # here--it's extracted by extract_metadata_from_elementtree.
        eq_(2, len(failures))

        # The first error message became a CoverageFailure.
        failure = failures[
            'urn:librarysimplified.org/terms/id/Gutenberg%20ID/10441']
        assert isinstance(failure, CoverageFailure)
        eq_(True, failure.transient)
        assert "Utter failure!" in failure.exception

        # The second error message became a CoverageFailure.
        failure = failures[
            'urn:librarysimplified.org/terms/id/Gutenberg%20ID/10557']
        assert isinstance(failure, CoverageFailure)
        eq_(True, failure.transient)
        assert "Utter failure!" in failure.exception
    def test_mirror_404_error(self):
        mirror = DummyS3Uploader()
        h = DummyHTTPClient()
        h.queue_response(404)
        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)

        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)

        link = LinkData(
            rel=Hyperlink.IMAGE,
            media_type=Representation.JPEG_MEDIA_TYPE,
            href="http://example.com/",
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )

        m = Metadata(data_source=data_source)

        m.mirror_link(edition, data_source, link, link_obj, policy)

        # Since we got a 404 error, the cover image was not mirrored.
        eq_(404, link_obj.resource.representation.status_code)
        eq_(None, link_obj.resource.representation.mirror_url)
        eq_([], mirror.uploaded)
    def _customlist(self, foreign_identifier=None,
                    name=None,
                    data_source_name=DataSource.NYT, num_entries=1,
                    entries_exist_as_works=True
    ):
        data_source = DataSource.lookup(self._db, data_source_name)
        foreign_identifier = foreign_identifier or self._str
        now = datetime.utcnow()
        customlist, ignore = get_one_or_create(
            self._db, CustomList,
            create_method_kwargs=dict(
                created=now,
                updated=now,
                name=name or self._str,
                description=self._str,
                ),
            data_source=data_source,
            foreign_identifier=foreign_identifier
        )

        editions = []
        for i in range(num_entries):
            if entries_exist_as_works:
                work = self._work(with_open_access_download=True)
                edition = work.presentation_edition
            else:
                edition = self._edition(
                    data_source_name, title="Item %s" % i)
                edition.permanent_work_id="Permanent work ID %s" % self._str
            customlist.add_entry(
                edition, "Annotation %s" % i, first_appearance=now)
            editions.append(edition)
        return customlist, editions
    def to_customlist(self, _db, dictreader):
        """Turn the CSV file in `dictreader` into a CustomList.

        TODO: Keep track of the list's current members. If any item
        was on the list but is no longer on the list, set its
        last_appeared date to its most recent appearance.
        """
        data_source = DataSource.lookup(_db, self.data_source_name)
        now = datetime.datetime.utcnow()

        # Find or create the CustomList object itself.
        custom_list, was_new = get_one_or_create(
            _db,
            CustomList,
            data_source=data_source,
            foreign_identifier=self.foreign_identifier,
            create_method_kwargs = dict(
                created=now,
            )
        )
        custom_list.updated = now

        # Turn the rows of the CSV file into a sequence of Metadata
        # objects, then turn each Metadata into a CustomListEntry object.
        for metadata in self.to_metadata(dictreader):
            entry = self.metadata_to_list_entry(
                custom_list, data_source, now, metadata)
Exemple #11
0
    def test_staff_picks_and_best_sellers_sublane(self):
        staff_picks, ignore = self._customlist(
            foreign_identifier=u"Staff Picks",
            name=u"Staff Picks!",
            data_source_name=DataSource.LIBRARY_STAFF,
            num_entries=0)
        best_sellers, ignore = self._customlist(
            foreign_identifier=u"NYT Best Sellers",
            name=u"Best Sellers!",
            data_source_name=DataSource.NYT,
            num_entries=0)
        lane = Lane(self._db,
                    "Everything",
                    include_staff_picks=True,
                    include_best_sellers=True)

        # A staff picks sublane and a best-sellers sublane have been
        # created for us.
        best, picks = lane.sublanes.lanes
        eq_("Best Sellers", best.display_name)
        eq_("Everything - Best Sellers", best.name)
        nyt = DataSource.lookup(self._db, DataSource.NYT)
        eq_(nyt.id, best.list_data_source_id)

        eq_("Staff Picks", picks.display_name)
        eq_("Everything - Staff Picks", picks.name)
        eq_([staff_picks.id], picks.list_ids)
Exemple #12
0
    def _edition(self, data_source_name=DataSource.GUTENBERG,
                    identifier_type=Identifier.GUTENBERG_ID,
                    with_license_pool=False, with_open_access_download=False,
                    title=None, language="eng", authors=None, identifier_id=None):
        id = identifier_id or self._str
        source = DataSource.lookup(self._db, data_source_name)
        wr = Edition.for_foreign_id(
            self._db, source, identifier_type, id)[0]
        if not title:
            title = self._str
        wr.title = unicode(title)
        if language:
            wr.language = language
        if authors is None:
            authors = self._str
        if isinstance(authors, basestring):
            authors = [authors]
        if authors != []:
            wr.add_contributor(unicode(authors[0]), Contributor.PRIMARY_AUTHOR_ROLE)
            wr.author = unicode(authors[0])
        for author in authors[1:]:
            wr.add_contributor(unicode(author), Contributor.AUTHOR_ROLE)
            
        if with_license_pool or with_open_access_download:
            pool = self._licensepool(wr, data_source_name=data_source_name,
                                     with_open_access_download=with_open_access_download)  

            pool.set_presentation_edition()
            return wr, pool
        return wr
Exemple #13
0
 def __init__(self,
              _db,
              api,
              datasource,
              batch_size=10,
              metadata_replacement_policy=None,
              circulationdata_replacement_policy=None,
              cutoff_time=None):
     self._db = _db
     self.api = api
     output_source = DataSource.lookup(_db, datasource)
     input_identifier_types = [output_source.primary_identifier_type]
     service_name = "%s Bibliographic Coverage Provider" % datasource
     metadata_replacement_policy = (
         metadata_replacement_policy
         or ReplacementPolicy.from_metadata_source())
     circulationdata_replacement_policy = (
         circulationdata_replacement_policy
         or ReplacementPolicy.from_license_source())
     self.metadata_replacement_policy = metadata_replacement_policy
     self.circulationdata_replacement_policy = circulationdata_replacement_policy
     super(BibliographicCoverageProvider,
           self).__init__(service_name,
                          input_identifier_types,
                          output_source,
                          batch_size=batch_size,
                          cutoff_time=cutoff_time)
    def test_non_open_access_book_not_mirrored(self):
        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        m = Metadata(data_source=data_source)

        mirror = DummyS3Uploader(fail=True)
        h = DummyHTTPClient()

        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)

        content = "foo"
        link = LinkData(rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
                        media_type=Representation.EPUB_MEDIA_TYPE,
                        href="http://example.com/",
                        content=content,
                        rights_uri=RightsStatus.IN_COPYRIGHT)

        identifier = self._identifier()
        link_obj, is_new = identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            media_type=link.media_type,
            content=link.content,
        )

        # The Hyperlink object makes it look like an open-access book,
        # but the context we have from the OPDS feed says that it's
        # not.
        m.mirror_link(None, data_source, link, link_obj, policy)

        # No HTTP requests were made.
        eq_([], h.requests)

        # Nothing was uploaded.
        eq_([], mirror.uploaded)
Exemple #15
0
 def data_source(self):
     """Look up or create a DataSource object representing the
     source of this OPDS feed.
     """
     return DataSource.lookup(
         self._db, self.data_source_name, autocreate=True,
         offers_licenses=self.data_source_offers_licenses
     )
    def test_mirror_open_access_link_mirror_failure(self):
        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        m = Metadata(data_source=data_source)

        mirror = DummyS3Uploader(fail=True)
        h = DummyHTTPClient()

        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)

        content = open(self.sample_cover_path("test-book-cover.png")).read()
        link = LinkData(rel=Hyperlink.IMAGE,
                        media_type=Representation.JPEG_MEDIA_TYPE,
                        href="http://example.com/",
                        content=content)

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )

        h.queue_response(200, media_type=Representation.JPEG_MEDIA_TYPE)

        m.mirror_link(edition, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # The representation was fetched successfully.
        eq_(None, representation.fetch_exception)
        assert representation.fetched_at != None

        # But mirroing failed.
        assert representation.mirror_exception != None
        eq_(None, representation.mirrored_at)
        eq_(link.media_type, representation.media_type)
        eq_(link.href, representation.url)

        # The mirror url should still be set.
        assert "Gutenberg" in representation.mirror_url
        assert representation.mirror_url.endswith(
            "%s/cover.jpg" % edition.primary_identifier.identifier)

        # Book content is still there since it wasn't mirrored.
        assert representation.content != None

        # the edition's identifier-associated license pool should not be
        # suppressed just because fetch failed on getting image.
        eq_(False, pool.suppressed)

        # the license pool only gets its license_exception column filled in
        # if fetch failed on getting an Hyperlink.OPEN_ACCESS_DOWNLOAD-type epub.
        eq_(None, pool.license_exception)
Exemple #17
0
 def _credential(self, data_source_name=DataSource.GUTENBERG,
                 type=None, patron=None):
     data_source = DataSource.lookup(self._db, data_source_name)
     type = type or self._str
     patron = patron or self._patron()
     credential, is_new = Credential.persistent_token_create(
         self._db, data_source, type, patron
     )
     return credential
 def _credential(self, data_source_name=DataSource.GUTENBERG,
                 type=None, patron=None):
     data_source = DataSource.lookup(self._db, data_source_name)
     type = type or self._str
     patron = patron or self._patron()
     credential, is_new = Credential.persistent_token_create(
         self._db, data_source, type, patron
     )
     return credential
Exemple #19
0
    def data_source(self):
        """Look up the DataSource object corresponding to the
        service we're running this data through.

        Out of an excess of caution, we look up the DataSource every
        time, rather than storing it, in case a CoverageProvider is
        ever used in an environment where the database session is
        scoped (e.g. the circulation manager).
        """
        return DataSource.lookup(self._db, self.DATA_SOURCE_NAME)
    def test_mirror_with_content_modifier(self):
        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        m = Metadata(data_source=data_source)

        mirror = DummyS3Uploader()

        def dummy_content_modifier(representation):
            representation.content = "Replaced Content"

        h = DummyHTTPClient()

        policy = ReplacementPolicy(mirror=mirror,
                                   content_modifier=dummy_content_modifier,
                                   http_get=h.do_get)

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href="http://example.com/test.epub",
            content="I'm an epub",
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )

        h.queue_response(200, media_type=Representation.EPUB_MEDIA_TYPE)

        m.mirror_link(edition, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # The representation was fetched successfully.
        eq_(None, representation.fetch_exception)
        assert representation.fetched_at != None

        # The mirror url is set.
        assert "Gutenberg" in representation.mirror_url
        assert representation.mirror_url.endswith(
            "%s/%s.epub" %
            (edition.primary_identifier.identifier, edition.title))

        # Content isn't there since it was mirrored.
        eq_(None, representation.content)

        # The representation was mirrored, with the modified content.
        eq_([representation], mirror.uploaded)
        eq_(["Replaced Content"], mirror.content)
Exemple #21
0
    def test_mirror_open_access_link_mirror_failure(self):
        mirror = DummyS3Uploader(fail=True)
        h = DummyHTTPClient()

        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)

        circulation_data = CirculationData(
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )

        h.queue_response(200, media_type=Representation.EPUB_MEDIA_TYPE)

        circulation_data.mirror_link(pool, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # The representation was fetched successfully.
        eq_(None, representation.fetch_exception)
        assert representation.fetched_at != None

        # But mirroing failed.
        assert representation.mirror_exception != None
        eq_(None, representation.mirrored_at)
        eq_(link.media_type, representation.media_type)
        eq_(link.href, representation.url)

        # The mirror url should still be set.
        assert "Gutenberg" in representation.mirror_url
        assert representation.mirror_url.endswith("%s.epub" % edition.title)

        # Book content is still there since it wasn't mirrored.
        assert representation.content != None

        # The license pool is suppressed when mirroring fails.
        eq_(True, pool.suppressed)
        assert representation.mirror_exception in pool.license_exception
Exemple #22
0
    def _licensepool(self, edition, open_access=True,
                     data_source_name=DataSource.GUTENBERG,
                     with_open_access_download=False,
                     set_edition_as_presentation=False,
                     collection=None):
        source = DataSource.lookup(self._db, data_source_name)
        if not edition:
            edition = self._edition(data_source_name)
        collection = collection or self._default_collection
        pool, ignore = get_one_or_create(
            self._db, LicensePool,
            create_method_kwargs=dict(
                open_access=open_access),
            identifier=edition.primary_identifier,
            data_source=source,
            collection=collection,
            availability_time=datetime.utcnow()
        )

        if set_edition_as_presentation:
            pool.presentation_edition = edition

        if with_open_access_download:
            pool.open_access = True
            url = "http://foo.com/" + self._str
            media_type = MediaTypes.EPUB_MEDIA_TYPE
            link, new = pool.identifier.add_link(
                Hyperlink.OPEN_ACCESS_DOWNLOAD, url,
                source, media_type
            )

            # Add a DeliveryMechanism for this download
            pool.set_delivery_mechanism(
                media_type,
                DeliveryMechanism.NO_DRM,
                RightsStatus.GENERIC_OPEN_ACCESS,
                link.resource,
            )

            representation, is_new = self._representation(
                url, media_type, "Dummy content", mirrored=True)
            link.resource.representation = representation
        else:

            # Add a DeliveryMechanism for this licensepool
            pool.set_delivery_mechanism(
                MediaTypes.EPUB_MEDIA_TYPE,
                DeliveryMechanism.ADOBE_DRM,
                RightsStatus.UNKNOWN,
                None
            )
            pool.licenses_owned = pool.licenses_available = 1

        return pool
Exemple #23
0
    def test_cover_image_url(self):
        identifier = self._identifier(foreign_id="ABOOK")
        buckets = {S3Uploader.BOOK_COVERS_BUCKET_KEY: 'thecovers'}
        uploader = self._uploader(**buckets)
        m = uploader.cover_image_url

        unglueit = DataSource.lookup(self._db, DataSource.UNGLUE_IT)
        identifier = self._identifier(foreign_id="ABOOK")
        eq_(
            u'https://s3.amazonaws.com/thecovers/scaled/601/unglue.it/Gutenberg+ID/ABOOK/filename',
            m(unglueit, identifier, "filename", scaled_size=601))
Exemple #24
0
    def test_import_one_feed(self):
        # Check coverage records are created.

        monitor = OPDSImportMonitor(self._db, "http://url",
                                    DataSource.OA_CONTENT_SERVER,
                                    DoomedOPDSImporter)
        data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER)

        feed = self.content_server_mini_feed

        monitor.import_one_feed(feed, "http://root-url/")

        editions = self._db.query(Edition).all()

        # One edition has been imported
        eq_(1, len(editions))
        [edition] = editions

        # That edition has a CoverageRecord.
        record = CoverageRecord.lookup(
            editions[0].primary_identifier,
            data_source,
            operation=CoverageRecord.IMPORT_OPERATION)
        eq_(CoverageRecord.SUCCESS, record.status)
        eq_(None, record.exception)

        # The edition's primary identifier has a cover link whose
        # relative URL has been resolved relative to the URL we passed
        # into import_one_feed.
        [cover] = [
            x.resource.url for x in editions[0].primary_identifier.links
            if x.rel == Hyperlink.IMAGE
        ]
        eq_("http://root-url/full-cover-image.png", cover)

        # The 202 status message in the feed caused a transient failure.
        # The exception caused a persistent failure.

        coverage_records = self._db.query(CoverageRecord).filter(
            CoverageRecord.operation == CoverageRecord.IMPORT_OPERATION,
            CoverageRecord.status != CoverageRecord.SUCCESS)
        eq_(
            sorted([
                CoverageRecord.TRANSIENT_FAILURE,
                CoverageRecord.PERSISTENT_FAILURE
            ]), sorted([x.status for x in coverage_records]))

        identifier, ignore = Identifier.parse_urn(
            self._db,
            "urn:librarysimplified.org/terms/id/Gutenberg%20ID/10441")
        failure = CoverageRecord.lookup(
            identifier, data_source, operation=CoverageRecord.IMPORT_OPERATION)
        assert "Utter failure!" in failure.exception
    def test_classifications_from_another_source_not_updated(self):

        # Set up an edition whose primary identifier has two
        # classifications.
        source1 = DataSource.lookup(self._db, DataSource.AXIS_360)
        source2 = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        edition = self._edition()
        identifier = edition.primary_identifier
        c1 = identifier.classify(source1, Subject.TAG, "i will persist")
        c2 = identifier.classify(source2, Subject.TAG, "i will perish")

        # Now we get some new metadata from source #2.
        subjects = [SubjectData(type=Subject.TAG, identifier="i will conquer")]
        metadata = Metadata(subjects=subjects, data_source=source2)
        replace = ReplacementPolicy(subjects=True)
        metadata.apply(edition, replace=replace)

        # The old classification from source #2 has been destroyed.
        # The old classification from source #1 is still there.
        eq_(['i will conquer', 'i will persist'],
            sorted([x.subject.identifier for x in identifier.classifications]))
Exemple #26
0
 def to_customlist(self, _db):
     """Turn this NYTBestSeller list into a CustomList object."""
     data_source = DataSource.lookup(_db, DataSource.NYT)
     l, was_new = get_one_or_create(
         _db,
         CustomList,
         data_source=data_source,
         foreign_identifier=self.foreign_identifier,
         create_method_kwargs=dict(created=self.created, ))
     l.name = self.name
     l.updated = self.updated
     self.update_custom_list(l)
     return l
    def test_parse_list_as_identifiers_with_data_source(self):
        lp1 = self._licensepool(None, data_source_name=DataSource.UNGLUE_IT)
        lp2 = self._licensepool(None, data_source_name=DataSource.FEEDBOOKS)
        lp3 = self._licensepool(None, data_source_name=DataSource.FEEDBOOKS)

        i1, i2, i3 = [lp.identifier for lp in [lp1, lp2, lp3]]
        i1.type = i2.type = Identifier.URI
        source = DataSource.lookup(self._db, DataSource.FEEDBOOKS)

        # Only URIs with a FeedBooks LicensePool are selected.
        identifiers = IdentifierInputScript.parse_identifier_list(
            self._db, Identifier.URI, source, [])
        eq_([i2], identifiers)
 def __init__(self, manager_class, data_source_name, list_identifier,
              list_name, primary_language, description, **manager_kwargs):
     data_source = DataSource.lookup(self._db, data_source_name)
     self.custom_list, is_new = get_one_or_create(
         self._db,
         CustomList,
         data_source_id=data_source.id,
         foreign_identifier=list_identifier,
     )
     self.custom_list.primary_language = primary_language
     self.custom_list.description = description
     self.membership_manager = manager_class(self.custom_list,
                                             **manager_kwargs)
Exemple #29
0
 def setup(self):
     super(TestCustomListFromCSV, self).setup()
     self.data_source = DataSource.lookup(self._db,
                                          DataSource.LIBRARY_STAFF)
     self.metadata = DummyMetadataClient()
     self.metadata.lookups['Octavia Butler'] = 'Butler, Octavia'
     self.l = CustomListFromCSV(self.data_source.name,
                                "Test list",
                                metadata_client=self.metadata,
                                display_author_field='author',
                                identifier_fields={Identifier.ISBN: "isbn"})
     self.custom_list, ignore = self._customlist(
         data_source_name=self.data_source.name, num_entries=0)
     self.now = datetime.datetime.utcnow()
    def test_register_equivalency(self):
        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        id = "549"

        # We've got a record.
        record, was_new = Edition.for_foreign_id(self._db, data_source,
                                                 Identifier.GUTENBERG_ID, id)

        # Then we look it up and discover another identifier for it.
        data_source_2 = DataSource.lookup(self._db, DataSource.OCLC)
        record2, was_new = Edition.for_foreign_id(self._db, data_source_2,
                                                  Identifier.OCLC_NUMBER, "22")

        eq = record.primary_identifier.equivalent_to(
            data_source_2, record2.primary_identifier, 1)

        eq_(eq.input, record.primary_identifier)
        eq_(eq.output, record2.primary_identifier)
        eq_(eq.data_source, data_source_2)

        eq_([eq], record.primary_identifier.equivalencies)

        eq_(set([record, record2]), set(record.equivalent_editions().all()))
    def test_mirror_open_access_link_fetch_failure(self):
        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        m = Metadata(data_source=data_source)

        mirror = DummyS3Uploader()
        h = DummyHTTPClient()

        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)

        link = LinkData(
            rel=Hyperlink.IMAGE,
            media_type=Representation.JPEG_MEDIA_TYPE,
            href="http://example.com/",
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )
        h.queue_response(403)

        m.mirror_link(edition, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # Fetch failed, so we should have a fetch exception but no mirror url.
        assert representation.fetch_exception != None
        eq_(None, representation.mirror_exception)
        eq_(None, representation.mirror_url)
        eq_(link.href, representation.url)
        assert representation.fetched_at != None
        eq_(None, representation.mirrored_at)

        # the edition's identifier-associated license pool should not be
        # suppressed just because fetch failed on getting image.
        eq_(False, pool.suppressed)

        # the license pool only gets its license_exception column filled in
        # if fetch failed on getting an Hyperlink.OPEN_ACCESS_DOWNLOAD-type epub.
        eq_(None, pool.license_exception)
Exemple #32
0
    def test_mirror_open_access_link_fetch_failure(self):
        mirror = DummyS3Uploader()
        h = DummyHTTPClient()

        edition, pool = self._edition(with_license_pool=True)

        data_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get)
        circulation_data = CirculationData(
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
        )

        link_obj, ignore = edition.primary_identifier.add_link(
            rel=link.rel,
            href=link.href,
            data_source=data_source,
            license_pool=pool,
            media_type=link.media_type,
            content=link.content,
        )

        h.queue_response(403)

        circulation_data.mirror_link(pool, data_source, link, link_obj, policy)

        representation = link_obj.resource.representation

        # Fetch failed, so we should have a fetch exception but no mirror url.
        assert representation.fetch_exception != None
        eq_(None, representation.mirror_exception)
        eq_(None, representation.mirror_url)
        eq_(link.href, representation.url)
        assert representation.fetched_at != None
        eq_(None, representation.mirrored_at)

        # The license pool is suppressed when fetch fails.
        eq_(True, pool.suppressed)
        assert representation.fetch_exception in pool.license_exception
 def __init__(self, _db=None, cmd_args=None):
     _db = _db or self._db
     args = self.parse_command_line(_db, cmd_args=cmd_args)
     self.identifier_type = args.identifier_type
     self.identifiers = args.identifiers
     subject_type = args.subject_type
     subject_identifier = args.subject_identifier
     subject_name = args.subject_name
     if not subject_name and not subject_identifier:
         raise ValueError(
             "Either subject-name or subject-identifier must be provided.")
     self.data_source = DataSource.lookup(_db, args.data_source)
     self.weight = args.weight
     self.subject, ignore = Subject.lookup(_db,
                                           subject_type,
                                           subject_identifier,
                                           subject_name,
                                           autocreate=args.create_subject)
    def _edition(self, data_source_name=DataSource.GUTENBERG,
                 identifier_type=Identifier.GUTENBERG_ID,
                 with_license_pool=False, with_open_access_download=False,
                 title=None, language="eng", authors=None, identifier_id=None,
                 series=None, collection=None, publicationDate=None
    ):
        id = identifier_id or self._str
        source = DataSource.lookup(self._db, data_source_name)
        wr = Edition.for_foreign_id(
            self._db, source, identifier_type, id)[0]
        if not title:
            title = self._str
        wr.title = unicode(title)
        wr.medium = Edition.BOOK_MEDIUM
        if series:
            wr.series = series
        if language:
            wr.language = language
        if authors is None:
            authors = self._str
        if isinstance(authors, basestring):
            authors = [authors]
        if authors != []:
            wr.add_contributor(unicode(authors[0]), Contributor.PRIMARY_AUTHOR_ROLE)
            wr.author = unicode(authors[0])
        for author in authors[1:]:
            wr.add_contributor(unicode(author), Contributor.AUTHOR_ROLE)
        if publicationDate:
            wr.published = publicationDate

        if with_license_pool or with_open_access_download:
            pool = self._licensepool(
                wr, data_source_name=data_source_name,
                with_open_access_download=with_open_access_download,
                collection=collection
            )

            pool.set_presentation_edition()
            return wr, pool
        return wr
import site
import sys
from nose.tools import set_trace

d = os.path.split(__file__)[0]
site.addsitedir(os.path.join(d, ".."))

from model import DataSource, LicensePool, SessionManager, Work, Identifier
from model import production_session

if __name__ == "__main__":
    session = production_session()

    data_source_name = sys.argv[1]
    identifier = sys.argv[2]
    data_source = DataSource.lookup(session, data_source_name)
    wid, ignore = Identifier.for_foreign_id(session, data_source.primary_identifier_type, identifier, False)
    pool = (
        session.query(LicensePool)
        .filter(LicensePool.data_source == data_source)
        .filter(LicensePool.identifier == wid)
        .one()
    )
    primary_edition = pool.edition()
    old_work = primary_edition.work
    if old_work:
        old_work.license_pools.remove(pool)
    primary_edition.work = None
    pool.calculate_work()
    work = pool.work
    work.calculate_presentation()
from nose.tools import set_trace
d = os.path.split(__file__)[0]
site.addsitedir(os.path.join(d, ".."))
from model import (
    Edition,
    production_session,
    DataSource,
    Work
)
from sqlalchemy.orm import joinedload

a = 0
db = production_session()
start = 0
batch_size = 1000
source = DataSource.lookup(db, DataSource.THREEM)
base_query = db.query(Work).join(Work.primary_edition).filter(Edition.data_source==source).order_by(Work.id).options(
        joinedload('summary'), joinedload('primary_edition', 'cover')).limit(batch_size)
batch = base_query.offset(start).all()
while batch:
    for work in batch:
        if not work.primary_edition:
            continue
        if work.primary_edition.cover:
            work.primary_edition.set_cover(work.primary_edition.cover)
            print work.primary_edition.cover_thumbnail_url
        else:
            print "!COVER %s" % work.primary_edition.primary_identifier
        if work.summary:
            work.set_summary(work.summary)
            print work.summary.content[:70]
            genre = classification.genre.name
            genredata = classifier.genres[genre]
            parentage = [x.name for x in genredata.parents] + [genre]
            parentage.reverse()
            while len(parentage) < 3:
                parentage.append("")
            stats[tuple(parentage)][source] += 1
    return stats

if __name__ == '__main__':

    _db = production_session()

    out = csv.writer(sys.stdout)

    sources = [DataSource.lookup(_db, x) for x in [
        DataSource.GUTENBERG, DataSource.OVERDRIVE, DataSource.THREEM]]
    out.writerow(["Classification", "Parent", "Grandparent"] + [x.name for x in sources] + ["Total"])

    for audience in "Adult", "Young Adult", "Children":
        base_query = _db.query(Work).filter(Work.audience==audience)
        by_source = count_for_each_data_source(base_query, sources)
        
        row = [by_source[source] for source in sources]
        row += [sum(row)]
        row = [audience, "" ,""] + row
        out.writerow(row)

    out.writerow([])
    for fiction, name in (True, "Fiction"), (False, "Nonfiction"), (None, "No Fiction Status"):
        base_query = _db.query(Work).filter(Work.fiction==fiction)
    modified = datetime.datetime.fromtimestamp(os.stat(path).st_mtime)
    data = open(path).read()
    representation, ignore = get_one_or_create(db, Representation,
        url=url, data_source=data_source)
    representation.status_code = 200
    representation.content = data
    representation.media_type = 'application/xml'
    representation.fetched_at = modified
    print url

if __name__ == '__main__':
    data_dir = sys.argv[1]
    
    template = "http://cloudlibraryapi.3m.com/cirrus/library/a4tmf/data/cloudevents?startdate=%s&enddate=%s"

    db = production_session()
    threem = DataSource.lookup(db, DataSource.THREEM)

    cache_path = os.path.join(data_dir, "3M", "cache", "events")
    a = 0
    for filename in os.listdir(cache_path):
        path = os.path.join(cache_path, filename)
        start_date = filename[:19]
        end_date = filename[20:]
        url = template % (start_date, end_date)
        imp(db, threem, path, url)
        a += 1
        if not a % 10:
            db.commit()
    db.commit()
from model import (
    CirculationEvent,
    DataSource,
    CoverageRecord,
    production_session,
    Identifier,
    Measurement,
    LicensePool,
)
import json
import gzip

database = production_session()
data_dir = sys.argv[1]
OVERDRIVE = DataSource.lookup(database, DataSource.OVERDRIVE)

TIME_FORMAT = "%Y-%m-%dT%H:%M:%S+00:00"

def process_item(_db, item):
                              
    overdrive_id = item['id']
    event_name = item['event']
    old_value = item.get('old_value', 0)
    new_value = item.get('new_value', 0)
    if event_name in ('check_out', 'check_in'):
        x = new_value
        new_value = old_value
        old_value = x
    elif event_name in ('hold_release', 'hold_place', 'license_remove'):
        pass
    representation.status_code = status_code
    representation.content = data
    representation.location = location
    representation.media_type = media_type
    representation.fetched_at = modified
    return True

if __name__ == '__main__':
    data_dir = sys.argv[1]

    db = production_session()
    oclc = OCLCLinkedData(db)
    d = os.path.join(data_dir, "OCLC Linked Data", "cache", "OCLC Number")
    cache = FakeCache(d, 4, False)

    source = DataSource.lookup(db, DataSource.OCLC_LINKED_DATA)
    min_oclc = 1284796
    max_oclc = 2052405
    batch_size = 10000
    type = Identifier.OCLC_NUMBER

    cursor = min_oclc
    while cursor < max_oclc:
        first_time = time.time()
        processed = 0
        max_batch = cursor + batch_size
        q = db.query(Identifier).filter(Identifier.type==Identifier.OCLC_NUMBER).filter(Identifier.id >= cursor).filter(Identifier.id < max_batch)

        for identifier in q:
            if imp(db, source, identifier, cache):
                processed += 1
    fn = cache._filename(fn)
    modified = datetime.datetime.fromtimestamp(os.stat(fn).st_mtime)
    data = cache.open(fn).read()
    a = dict(collection_token=library['collectionToken'],
             item_id=i)
    url = OverdriveAPI.METADATA_ENDPOINT % a
    representation, ignore = get_one_or_create(db, Representation,
        url=url, data_source=data_source, identifier=identifier)
    representation.status_code = 200
    representation.content = data
    representation.media_type = 'application/json'
    representation.fetched_at = modified
    print identifier

if __name__ == '__main__':
    data_dir = sys.argv[1]

    overdrive = OverdriveAPI(data_dir)
    library = overdrive.get_library()
    db = production_session()
    b = overdrive.bibliographic_cache

    source = DataSource.lookup(db, DataSource.OVERDRIVE)
    q = db.query(Identifier).filter(Identifier.type==Identifier.OVERDRIVE_ID)
    a = 0
    for i in q:
        imp(db, source, i, b, library)
        a += 1
        if not a % 1000:
            db.commit()
 def __init__(self, db):
     self._db = db
     self.overdrive = DataSource.lookup(self._db, DataSource.OVERDRIVE)