def test_explicit_formatdata(self): # Creating an edition with an open-access download will # automatically create a delivery mechanism. edition, pool = self._edition(with_open_access_download=True) # Let's also add a DRM format. drm_format = FormatData( content_type=Representation.PDF_MEDIA_TYPE, drm_scheme=DeliveryMechanism.ADOBE_DRM, ) circulation_data = CirculationData( formats=[drm_format], data_source=edition.data_source, primary_identifier=edition.primary_identifier, ) circulation_data.apply(self._db, pool.collection) [epub, pdf] = sorted(pool.delivery_mechanisms, key=lambda x: x.delivery_mechanism.content_type) assert epub.resource == pool.best_open_access_resource assert Representation.PDF_MEDIA_TYPE == pdf.delivery_mechanism.content_type assert DeliveryMechanism.ADOBE_DRM == pdf.delivery_mechanism.drm_scheme # If we tell Metadata to replace the list of formats, we only # have the one format we manually created. replace = ReplacementPolicy(formats=True, ) circulation_data.apply(self._db, pool.collection, replace=replace) [pdf] = pool.delivery_mechanisms assert Representation.PDF_MEDIA_TYPE == pdf.delivery_mechanism.content_type
def test_rights_status_default_rights_from_data_source(self): identifier = IdentifierData( Identifier.GUTENBERG_ID, "abcd", ) link = LinkData( rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, ) circulation_data = CirculationData( data_source=DataSource.OA_CONTENT_SERVER, primary_identifier=identifier, links=[link], ) replace = ReplacementPolicy(formats=True, ) # This pool starts off as not being open-access. pool, ignore = circulation_data.license_pool(self._db, self._default_collection) assert False == pool.open_access circulation_data.apply(self._db, pool.collection, replace) # The pool became open-access because it was given a # link that came from the OS content server. assert True == pool.open_access assert 1 == len(pool.delivery_mechanisms) # The rights status is the default for the OA content server. assert (RightsStatus.GENERIC_OPEN_ACCESS == pool.delivery_mechanisms[0].rights_status.uri)
def test_apply_with_licenses_overrides_availability(self): edition, pool = self._edition(with_license_pool=True) license_data = LicenseData( identifier="8c5fdbfe-c26e-11e8-8706-5254009434c4", checkout_url="https://borrow2", status_url="https://status2", checkouts_available=0, terms_concurrency=1, status=LicenseStatus.available, ) # If we give CirculationData both availability information # and licenses, it ignores the availability information and # instead uses the licenses to calculate availability. circulation_data = CirculationData( licenses=[license_data], data_source=edition.data_source, primary_identifier=edition.primary_identifier, licenses_owned=999, licenses_available=999, licenses_reserved=999, patrons_in_hold_queue=999, ) circulation_data.apply(self._db, pool.collection) assert len(pool.licenses) == 1 assert pool.licenses_available == 0 assert pool.licenses_owned == 1 assert pool.licenses_reserved == 0 assert pool.patrons_in_hold_queue == 0
def test_rights_status_open_access_link_with_rights(self): identifier = IdentifierData( Identifier.OVERDRIVE_ID, "abcd", ) link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, rights_uri=RightsStatus.CC_BY_ND, ) circulation_data = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=identifier, links=[link], ) replace = ReplacementPolicy(formats=True, ) pool, ignore = circulation_data.license_pool(self._db, self._default_collection) circulation_data.apply(self._db, pool.collection, replace) assert True == pool.open_access assert 1 == len(pool.delivery_mechanisms) assert RightsStatus.CC_BY_ND == pool.delivery_mechanisms[ 0].rights_status.uri
def _reap(self, identifier): """Update our local circulation information to reflect the fact that the identified book has been removed from the remote collection. """ collection = self.collection pool = identifier.licensed_through_collection(collection) if not pool: self.log.warn( "Was about to reap %r but no local license pool in this collection.", identifier) return if pool.licenses_owned == 0: # Already reaped. return self.log.info("Reaping %r", identifier) availability = CirculationData( data_source=pool.data_source, primary_identifier=identifier, licenses_owned=0, licenses_available=0, licenses_reserved=0, patrons_in_hold_queue=0, ) availability.apply(self._db, collection, ReplacementPolicy.from_license_source(self._db))
def test_apply_creates_work_and_presentation_edition_if_needed(self): edition = self._edition() # This pool doesn't have a presentation edition or a work yet. pool = self._licensepool(edition) # We have new circulation data for this pool. circulation_data = CirculationData( formats=[], data_source=edition.data_source, primary_identifier=edition.primary_identifier, ) # If we apply the new CirculationData the work gets both a # presentation and a work. replacement_policy = ReplacementPolicy() circulation_data.apply(self._db, pool.collection, replacement_policy) assert edition == pool.presentation_edition assert pool.work != None # If we have another new pool for the same book in another # collection, it will share the work. collection = self._collection() pool2 = self._licensepool(edition, collection=collection) circulation_data.apply(self._db, pool2.collection, replacement_policy) assert edition == pool2.presentation_edition assert pool.work == pool2.work
def test_rights_status_default_rights_passed_in(self): identifier = IdentifierData( Identifier.GUTENBERG_ID, "abcd", ) link = LinkData( rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, ) circulation_data = CirculationData( data_source=DataSource.OA_CONTENT_SERVER, primary_identifier=identifier, default_rights_uri=RightsStatus.CC_BY, links=[link], ) replace = ReplacementPolicy(formats=True, ) pool, ignore = circulation_data.license_pool(self._db, self._default_collection) circulation_data.apply(self._db, pool.collection, replace) assert True == pool.open_access assert 1 == len(pool.delivery_mechanisms) # The rights status is the one that was passed in to CirculationData. assert RightsStatus.CC_BY == pool.delivery_mechanisms[ 0].rights_status.uri
def test_rights_status_commercial_link_with_rights(self): identifier = IdentifierData( Identifier.OVERDRIVE_ID, "abcd", ) link = LinkData( rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, rights_uri=RightsStatus.IN_COPYRIGHT, ) format = FormatData( content_type=link.media_type, drm_scheme=DeliveryMechanism.ADOBE_DRM, link=link, rights_uri=RightsStatus.IN_COPYRIGHT, ) circulation_data = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=identifier, links=[link], formats=[format], ) replace = ReplacementPolicy(formats=True, ) pool, ignore = circulation_data.license_pool(self._db, self._default_collection) circulation_data.apply(self._db, pool.collection, replace) assert False == pool.open_access assert 1 == len(pool.delivery_mechanisms) assert (RightsStatus.IN_COPYRIGHT == pool.delivery_mechanisms[0].rights_status.uri)
def test_apply_updates_existing_licenses(self): edition, pool = self._edition(with_license_pool=True) # Start with one license for this pool. old_license = self._license( pool, expires=None, checkouts_left=2, checkouts_available=3, ) license_data = LicenseData( identifier=old_license.identifier, expires=old_license.expires, checkouts_left=0, checkouts_available=3, status=LicenseStatus.unavailable, checkout_url=old_license.checkout_url, status_url=old_license.status_url, ) circulation_data = CirculationData( licenses=[license_data], data_source=edition.data_source, primary_identifier=edition.primary_identifier, ) circulation_data.apply(self._db, pool.collection) self._db.commit() assert 1 == len(pool.licenses) new_license = pool.licenses[0] assert new_license.id == old_license.id assert old_license.status == LicenseStatus.unavailable
def test_apply_removes_old_formats_based_on_replacement_policy(self): edition, pool = self._edition(with_license_pool=True) # Start with one delivery mechanism for this pool. for lpdm in pool.delivery_mechanisms: self._db.delete(lpdm) old_lpdm = pool.set_delivery_mechanism( Representation.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM, RightsStatus.IN_COPYRIGHT, None, ) # And it has been loaned. patron = self._patron() loan, ignore = pool.loan_to(patron, fulfillment=old_lpdm) assert old_lpdm == loan.fulfillment # We have new circulation data that has a different format. format = FormatData( content_type=Representation.EPUB_MEDIA_TYPE, drm_scheme=DeliveryMechanism.ADOBE_DRM, ) circulation_data = CirculationData( formats=[format], data_source=edition.data_source, primary_identifier=edition.primary_identifier, ) # If we apply the new CirculationData with formats false in the policy, # we'll add the new format, but keep the old one as well. replacement_policy = ReplacementPolicy(formats=False) circulation_data.apply(self._db, pool.collection, replacement_policy) assert 2 == len(pool.delivery_mechanisms) assert set( [Representation.PDF_MEDIA_TYPE, Representation.EPUB_MEDIA_TYPE]) == set([ lpdm.delivery_mechanism.content_type for lpdm in pool.delivery_mechanisms ]) assert old_lpdm == loan.fulfillment # But if we make formats true in the policy, we'll delete the old format # and remove it from its loan. replacement_policy = ReplacementPolicy(formats=True) circulation_data.apply(self._db, pool.collection, replacement_policy) assert 1 == len(pool.delivery_mechanisms) assert (Representation.EPUB_MEDIA_TYPE == pool.delivery_mechanisms[0].delivery_mechanism.content_type) assert None == loan.fulfillment
def update_licensepools_for_identifiers(self, identifiers): """Update availability information for a list of books. If the book has never been seen before, a new LicensePool will be created for the book. The book's LicensePool will be updated with current circulation information. """ identifier_strings = self.create_identifier_strings(identifiers) response = self.availability(title_ids=identifier_strings) collection = self.collection parser = BibliographicParser(collection) remainder = set(identifiers) for bibliographic, availability in parser.process_all( response.content): identifier, is_new = bibliographic.primary_identifier.load( self._db) if identifier in remainder: remainder.remove(identifier) pool, is_new = availability.license_pool(self._db, collection) availability.apply(self._db, pool.collection) # We asked Axis about n books. It sent us n-k responses. Those # k books are the identifiers in `remainder`. These books have # been removed from the collection without us being notified. for removed_identifier in remainder: pool = identifier.licensed_through_collection(self.collection) if not pool: self.log.warn( "Was about to reap %r but no local license pool in this collection.", removed_identifier) continue if pool.licenses_owned == 0: # Already reaped. continue self.log.info("Reaping %r", removed_identifier) availability = CirculationData( data_source=pool.data_source, primary_identifier=removed_identifier, licenses_owned=0, licenses_available=0, licenses_reserved=0, patrons_in_hold_queue=0, ) availability.apply(pool, ReplacementPolicy.from_license_source(self._db))
def test_format_change_may_change_open_access_status(self): # In this test, whenever we call CirculationData.apply(), we # want to destroy the old list of formats and recreate it. replace_formats = ReplacementPolicy(formats=True) # Here's a seemingly ordinary non-open-access LicensePool. edition, pool = self._edition(with_license_pool=True) assert False == pool.open_access # One day, we learn that it has an open-access delivery mechanism. link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, rights_uri=RightsStatus.CC_BY_ND, ) circulation_data = CirculationData( data_source=pool.data_source, primary_identifier=pool.identifier, links=[link], ) # Applying this information turns the pool into an open-access pool. circulation_data.apply(self._db, pool.collection, replace=replace_formats) assert True == pool.open_access # Then we find out it was a mistake -- the book is in copyright. format = FormatData( Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM, rights_uri=RightsStatus.IN_COPYRIGHT, ) circulation_data = CirculationData( data_source=pool.data_source, primary_identifier=pool.identifier, formats=[format], ) circulation_data.apply(self._db, pool.collection, replace=replace_formats) # The original LPDM has been removed and only the new one remains. assert False == pool.open_access assert 1 == len(pool.delivery_mechanisms)
def update_licensepools_for_identifiers(self, identifiers): """Update availability information for a list of books. If the book has never been seen before, a new LicensePool will be created for the book. The book's LicensePool will be updated with current circulation information. """ identifier_strings = self.create_identifier_strings(identifiers) response = self.availability(title_ids=identifier_strings) parser = BibliographicParser() remainder = set(identifiers) for bibliographic, availability in parser.process_all(response.content): identifier, is_new = bibliographic.primary_identifier.load(self._db) if identifier in remainder: remainder.remove(identifier) pool, is_new = availability.license_pool(self._db) availability.apply(pool) # We asked Axis about n books. It sent us n-k responses. Those # k books are the identifiers in `remainder`. These books have # been removed from the collection without us being notified. for removed_identifier in remainder: pool = removed_identifier.licensed_through if not pool: self.log.warn( "Was about to reap %r but no local license pool.", removed_identifier ) continue if pool.licenses_owned == 0: # Already reaped. continue self.log.info( "Reaping %r", removed_identifier ) availability = CirculationData( data_source=pool.data_source, primary_identifier=removed_identifier, licenses_owned=0, licenses_available=0, licenses_reserved=0, patrons_in_hold_queue=0, ) availability.apply(pool, False)
def process_item(self, identifier): self.log.debug( "Seeing if %s needs reaping", identifier.identifier ) metadata = self.api.get_item(identifier.identifier) if metadata: # This title is still in the collection. Do nothing. return # Get this collection's license pool for this identifier. # We'll reap it by setting its licenses_owned to 0. pool = identifier.licensed_through_collection(self.collection) if not pool or pool.licenses_owned == 0: # It's already been reaped. return if pool.presentation_edition: self.log.warn( "Removing %r from circulation", pool.presentation_edition ) else: self.log.warn( "Removing unknown title %s from circulation.", identifier.identifier ) now = datetime.datetime.utcnow() circulationdata = CirculationData( data_source=DataSource.ENKI, primary_identifier= IdentifierData( identifier.type, identifier.identifier ), licenses_owned = 0, licenses_available = 0, patrons_in_hold_queue = 0, last_checked = now ) circulationdata.apply( self._db, self.collection, replace=ReplacementPolicy.from_license_source(self._db) ) return circulationdata
def test_implicit_format_for_open_access_link(self): # A format is a delivery mechanism. We handle delivery on open access # pools from our mirrored content in S3. # Tests that when a link is open access, a pool can be delivered. edition, pool = self._edition(with_license_pool=True) # This is the delivery mechanism created by default when you # create a book with _edition(). [epub] = pool.delivery_mechanisms assert Representation.EPUB_MEDIA_TYPE == epub.delivery_mechanism.content_type assert DeliveryMechanism.ADOBE_DRM == epub.delivery_mechanism.drm_scheme link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.PDF_MEDIA_TYPE, href=self._url, ) circulation_data = CirculationData( data_source=DataSource.GUTENBERG, primary_identifier=edition.primary_identifier, links=[link], ) replace = ReplacementPolicy(formats=True, ) circulation_data.apply(self._db, pool.collection, replace) # We destroyed the default delivery format and added a new, # open access delivery format. [pdf] = pool.delivery_mechanisms assert Representation.PDF_MEDIA_TYPE == pdf.delivery_mechanism.content_type assert DeliveryMechanism.NO_DRM == pdf.delivery_mechanism.drm_scheme circulation_data = CirculationData( data_source=DataSource.GUTENBERG, primary_identifier=edition.primary_identifier, links=[], ) replace = ReplacementPolicy( formats=True, links=True, ) circulation_data.apply(self._db, pool.collection, replace) # Now we have no formats at all. assert 0 == len(pool.delivery_mechanisms)
def process_item(self, identifier): self.log.debug( "Seeing if %s needs reaping", identifier.identifier ) metadata = self.api.get_item(identifier.identifier) if metadata: # This title is still in the collection. Do nothing. return # Get this collection's license pool for this identifier. # We'll reap it by setting its licenses_owned to 0. pool = identifier.licensed_through_collection(self.collection) if not pool or pool.licenses_owned == 0: # It's already been reaped. return if pool.presentation_edition: self.log.warn( "Removing %r from circulation", pool.presentation_edition ) else: self.log.warn( "Removing unknown title %s from circulation.", identifier.identifier ) now = datetime.datetime.utcnow() circulationdata = CirculationData( data_source=DataSource.ENKI, primary_identifier= IdentifierData( identifier.type, identifier.identifier ), licenses_owned = 0, licenses_available = 0, patrons_in_hold_queue = 0, last_checked = now ) circulationdata.apply( self._db, self.collection, replace=ReplacementPolicy.from_license_source(self._db) ) return circulationdata
def test_apply_adds_new_licenses(self): edition, pool = self._edition(with_license_pool=True) # Start with one license for this pool. old_license = self._license( pool, expires=None, checkouts_left=2, checkouts_available=3, ) # And it has been loaned. patron = self._patron() loan, ignore = old_license.loan_to(patron) assert old_license == loan.license # We have new circulation data that has a different license. license_data = LicenseData( identifier="8c5fdbfe-c26e-11e8-8706-5254009434c4", checkout_url="https://borrow2", status_url="https://status2", expires=(utc_now() + datetime.timedelta(days=7)), checkouts_left=None, checkouts_available=1, terms_concurrency=1, status=LicenseStatus.available, ) circulation_data = CirculationData( licenses=[license_data], data_source=edition.data_source, primary_identifier=edition.primary_identifier, ) # If we apply the new CirculationData, we'll add the new license, # but keep the old one as well. circulation_data.apply(self._db, pool.collection) self._db.commit() assert 2 == len(pool.licenses) assert {old_license.identifier, license_data.identifier } == {license.identifier for license in pool.licenses} assert old_license == loan.license
def reaper_request(self, identifier): self.log.debug ("Checking availability for " + str(identifier.identifier)) now = datetime.datetime.utcnow() url = str(self.base_url) + str(self.item_endpoint) args = dict() args['method'] = "getItem" args['recordid'] = identifier.identifier args['size'] = "small" args['lib'] = self.library_id response = self.request(url, method='get', params=args) try: # If a book doesn't exist in Enki, we'll just get an HTML page saying we did something wrong. data = json.loads(response.content) self.log.debug ("Keeping existing book: " + str(identifier)) except: # Get this collection's license pool for this identifier. pool = identifier.licensed_through_collection(self.collection) if pool and (pool.licenses_owned > 0): if pool.presentation_edition: self.log.warn("Removing %s (%s) from circulation", pool.presentation_edition.title, pool.presentation_edition.author) else: self.log.warn( "Removing unknown work %s from circulation.", identifier.identifier ) circulationdata = CirculationData( data_source=DataSource.ENKI, primary_identifier= IdentifierData(EnkiAPI.ENKI_ID, identifier.identifier), licenses_owned = 0, licenses_available = 0, patrons_in_hold_queue = 0, last_checked = now ) circulationdata.apply( self._db, self.collection, replace=ReplacementPolicy.from_license_source(self._db) ) return circulationdata
def update_licensepool_for_identifier(self, isbn, availability): """Update availability information for a single book. If the book has never been seen before, a new LicensePool will be created for the book. The book's LicensePool will be updated with current approximate circulation information (we can tell if it's available, but not how many copies). Bibliographic coverage will be ensured for the OneClick Identifier. Work will be created for the LicensePool and set as presentation-ready. :param isbn the identifier OneClick uses :param availability boolean denoting if book can be lent to patrons """ # find a license pool to match the isbn, and see if it'll need a metadata update later license_pool, is_new_pool = LicensePool.for_foreign_id( self._db, DataSource.ONECLICK, Identifier.ONECLICK_ID, isbn, collection=self.collection ) if is_new_pool: # This is the first time we've seen this book. Make sure its # identifier has bibliographic coverage. self.bibliographic_coverage_provider.ensure_coverage( license_pool.identifier ) # now tell the licensepool if it's lendable policy = ReplacementPolicy( identifiers=False, subjects=True, contributions=True, formats=True, analytics=Analytics(self._db), ) # licenses_available can be 0 or 999, depending on whether the book is # lendable or not. licenses_available = 999 if not availability: licenses_available = 0 circulation_data = CirculationData(data_source=DataSource.ONECLICK, primary_identifier=license_pool.identifier, licenses_available=licenses_available) license_pool, circulation_changed = circulation_data.apply( self._db, self.collection, replace=policy, ) return license_pool, is_new_pool, circulation_changed
def test_apply_without_licenses_sets_availability(self): edition, pool = self._edition(with_license_pool=True) # If we give CirculationData availability information without # also giving it licenses it uses the availability information # to set values on the LicensePool. circulation_data = CirculationData( data_source=edition.data_source, primary_identifier=edition.primary_identifier, licenses_owned=999, licenses_available=999, licenses_reserved=999, patrons_in_hold_queue=999, ) circulation_data.apply(self._db, pool.collection) assert len(pool.licenses) == 0 assert pool.licenses_available == 999 assert pool.licenses_owned == 999 assert pool.licenses_reserved == 999 assert pool.patrons_in_hold_queue == 999
def test_circulationdata_may_require_collection(self): """Depending on the information provided in a CirculationData object, it might or might not be possible to call apply() without providing a Collection. """ identifier = IdentifierData(Identifier.OVERDRIVE_ID, "1") format = FormatData( Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM, rights_uri=RightsStatus.IN_COPYRIGHT, ) circdata = CirculationData(DataSource.OVERDRIVE, primary_identifier=identifier, formats=[format]) circdata.apply(self._db, collection=None) # apply() has created a LicensePoolDeliveryMechanism for this # title, even though there are no LicensePools for it. identifier_obj, ignore = identifier.load(self._db) assert [] == identifier_obj.licensed_through [lpdm] = identifier_obj.delivery_mechanisms assert DataSource.OVERDRIVE == lpdm.data_source.name assert RightsStatus.IN_COPYRIGHT == lpdm.rights_status.uri mechanism = lpdm.delivery_mechanism assert Representation.EPUB_MEDIA_TYPE == mechanism.content_type assert DeliveryMechanism.NO_DRM == mechanism.drm_scheme # But if we put some information in the CirculationData # that can only be stored in a LicensePool, there's trouble. circdata.licenses_owned = 0 with pytest.raises(ValueError) as excinfo: circdata.apply(self._db, collection=None) assert ( "Cannot store circulation information because no Collection was provided." in str(excinfo.value))
def update_consolidated_copy(self, _db, copy_info, analytics=None): """Process information about the current status of a consolidated copy from the consolidated copies feed. """ identifier = copy_info.get("identifier") licenses = copy_info.get("licenses") available = copy_info.get("available") identifier_data = IdentifierData(Identifier.URI, identifier) circulation_data = CirculationData( data_source=self.data_source_name, primary_identifier=identifier_data, licenses_owned=licenses, licenses_available=available, ) replacement_policy = ReplacementPolicy(analytics=analytics) pool, ignore = circulation_data.apply(_db, self.collection(_db), replacement_policy) # Update licenses reserved if there are holds. if len(pool.holds) > 0 and pool.licenses_available > 0: self.update_hold_queue(pool)
def test_open_access_content_mirrored(self): # Make sure that open access material links are translated to our S3 buckets, and that # commercial material links are left as is. # Note: Mirroring tests passing does not guarantee that all code now # correctly calls on CirculationData, as well as Metadata. This is a risk. mirrors = dict(books_mirror=MockS3Uploader(), covers_mirror=None) mirror_type = ExternalIntegrationLink.OPEN_ACCESS_BOOKS # Here's a book. edition, pool = self._edition(with_license_pool=True) # Here's a link to the content of the book, which will be mirrored. link_mirrored = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, href="http://example.com/", media_type=Representation.EPUB_MEDIA_TYPE, content="i am a tiny book", ) # This link will not be mirrored. link_unmirrored = LinkData( rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD, href="http://example.com/2", media_type=Representation.EPUB_MEDIA_TYPE, content="i am a pricy book", ) # Apply the metadata. policy = ReplacementPolicy(mirrors=mirrors) metadata = Metadata( data_source=edition.data_source, links=[link_mirrored, link_unmirrored], ) metadata.apply(edition, pool.collection, replace=policy) # make sure the refactor is done right, and metadata does not upload assert 0 == len(mirrors[mirror_type].uploaded) circulation_data = CirculationData( data_source=edition.data_source, primary_identifier=edition.primary_identifier, links=[link_mirrored, link_unmirrored], ) circulation_data.apply(self._db, pool.collection, replace=policy) # make sure the refactor is done right, and circulation does upload assert 1 == len(mirrors[mirror_type].uploaded) # Only the open-access link has been 'mirrored'. [book] = mirrors[mirror_type].uploaded # It's remained an open-access link. assert [Hyperlink.OPEN_ACCESS_DOWNLOAD ] == [x.rel for x in book.resource.links] # It's been 'mirrored' to the appropriate S3 bucket. assert book.mirror_url.startswith( "https://test-content-bucket.s3.amazonaws.com/") expect = "/%s/%s.epub" % (edition.primary_identifier.identifier, edition.title) assert book.mirror_url.endswith(expect) # make sure the mirrored link is safely on edition sorted_edition_links = sorted(pool.identifier.links, key=lambda x: x.rel) unmirrored_representation, mirrored_representation = [ edlink.resource.representation for edlink in sorted_edition_links ] assert mirrored_representation.mirror_url.startswith( "https://test-content-bucket.s3.amazonaws.com/") # make sure the unmirrored link is safely on edition assert "http://example.com/2" == unmirrored_representation.url # make sure the unmirrored link has not been translated to an S3 URL assert None == unmirrored_representation.mirror_url
def update_licensepool_for_identifier(self, isbn, availability, medium, policy=None): """Update availability information for a single book. If the book has never been seen before, a new LicensePool will be created for the book. The book's LicensePool will be updated with current approximate circulation information (we can tell if it's available, but not how many copies). Bibliographic coverage will be ensured for the OneClick Identifier. Work will be created for the LicensePool and set as presentation-ready. :param isbn the identifier OneClick uses :param availability boolean denoting if book can be lent to patrons :param medium: The name OneClick uses for the book's medium. """ # find a license pool to match the isbn, and see if it'll need a metadata update later license_pool, is_new_pool = LicensePool.for_foreign_id( self._db, DataSource.RB_DIGITAL, Identifier.RB_DIGITAL_ID, isbn, collection=self.collection) if is_new_pool: # This is the first time we've seen this book. Make sure its # identifier has bibliographic coverage. self.bibliographic_coverage_provider.ensure_coverage( license_pool.identifier) # now tell the licensepool if it's lendable # We don't know exactly how many licenses are available, but # we know that it's either zero (book is not lendable) or greater # than zero (book is lendable) licenses_available = 1 if not availability: licenses_available = 0 # Because the book showed up in availability, we know we own # at least one license to it. licenses_owned = 1 if (not is_new_pool and license_pool.licenses_owned == licenses_owned and license_pool.licenses_available == licenses_available): # Optimization: Nothing has changed, so don't even bother # calling CirculationData.apply() return license_pool, is_new_pool, False # If possible, create a FormatData object representing # how the book is available. formats = [] # Note that these strings are different from the similar strings # found in "fileFormat" when looking at a patron's loans. # "ebook" (a medium) versus "EPUB" (a format). Unfortunately we # don't get the file format when checking the book's # availability before a patron has checked it out. delivery_type = None drm_scheme = None medium = medium.lower() if medium == 'ebook': delivery_type = Representation.EPUB_MEDIA_TYPE # OneClick doesn't tell us the DRM scheme at this # point, but some of their EPUBs do have Adobe DRM. # Also, their DRM usage may change in the future. drm_scheme = DeliveryMechanism.ADOBE_DRM elif medium == 'eaudio': # TODO: we can't deliver on this promise yet, but this is # how we will be delivering audiobook manifests. delivery_type = Representation.AUDIOBOOK_MANIFEST_MEDIA_TYPE if delivery_type: formats.append(FormatData(delivery_type, drm_scheme)) circulation_data = CirculationData( data_source=DataSource.RB_DIGITAL, primary_identifier=license_pool.identifier, licenses_owned=licenses_owned, licenses_available=licenses_available, formats=formats, ) policy = policy or self.default_circulation_replacement_policy license_pool, circulation_changed = circulation_data.apply( self._db, self.collection, replace=policy, ) return license_pool, is_new_pool, circulation_changed
def parse_book(cls, collection, g, uri, title): """Turn an RDF graph into a Edition for the given `uri` and `title`. """ source_id = unicode(cls.ID_IN_URI.search(uri).groups()[0]) primary_identifier = IdentifierData( Identifier.GUTENBERG_ID, source_id ) # Split a subtitle out from the main title. title = unicode(title) subtitle = None for separator in "\r\n", "\n": if separator in title: parts = title.split(separator) title = parts[0] subtitle = "\n".join(parts[1:]) break issued = cls._value(g, (uri, cls.dcterms.issued, None)) issued = datetime.datetime.strptime(issued, cls.DATE_FORMAT).date() rights = cls._value(g, (uri, cls.dcterms.rights, None)) if rights: rights = str(rights) else: rights = '' rights_uri = RightsStatus.rights_uri_from_string(rights) # As far as I can tell, Gutenberg descriptions are 100% # useless for our purposes. They should not be used, even if # no other description is available. publisher = cls._value(g, (uri, cls.dcterms.publisher, None)) languages = [] for ignore, ignore, language_uri in g.triples( (uri, cls.dcterms.language, None)): code = str(cls._value(g, (language_uri, cls.rdf.value, None))) code = LanguageCodes.two_to_three[code] if code: languages.append(code) if 'eng' in languages: language = 'eng' elif languages: language = languages[0] else: language = None contributors = [] for ignore, ignore, author_uri in g.triples((uri, cls.dcterms.creator, None)): name = cls._value(g, (author_uri, cls.gutenberg.name, None)) aliases = cls._values(g, (author_uri, cls.gutenberg.alias, None)) contributors.append(ContributorData( sort_name=name, aliases=aliases, roles=[Contributor.AUTHOR_ROLE], )) subjects = [] subject_links = cls._values(g, (uri, cls.dcterms.subject, None)) for subject in subject_links: value = cls._value(g, (subject, cls.rdf.value, None)) vocabulary = cls._value(g, (subject, cls.dcam.memberOf, None)) vocabulary = Subject.by_uri[str(vocabulary)] subjects.append(SubjectData(vocabulary, value)) medium = Edition.BOOK_MEDIUM # Turn the Gutenberg download links into Hyperlinks associated # with the new Edition. They will serve either as open access # downloads or cover images. download_links = cls._values(g, (uri, cls.dcterms.hasFormat, None)) links = [LinkData( rel=Hyperlink.CANONICAL, href=str(uri), )] # Gutenberg won't allow us to use any of the download or image # links--we have to make our own from an rsynced mirror--but # we can look through the links to determine which medium to # assign to this book. formats = [] for href in download_links: for format_uri in cls._values( g, (href, cls.dcterms['format'], None)): media_type = unicode( cls._value(g, (format_uri, cls.rdf.value, None))) if media_type.startswith('audio/'): medium = Edition.AUDIO_MEDIUM formats.append(FormatData( content_type=Representation.MP3_MEDIA_TYPE, drm_scheme=DeliveryMechanism.NO_DRM, )) elif media_type.startswith('video/'): medium = Edition.VIDEO_MEDIUM else: formats.append(FormatData( content_type=Representation.EPUB_MEDIA_TYPE, drm_scheme=DeliveryMechanism.NO_DRM, rights_uri=rights_uri, )) _db = Session.object_session(collection) metadata = Metadata( data_source=DataSource.GUTENBERG, title=title, subtitle=subtitle, language=language, publisher=publisher, issued=issued, medium=medium, primary_identifier=primary_identifier, subjects=subjects, contributors=contributors, links=links, ) edition, new = metadata.edition(_db) metadata.apply(edition, collection) # Ensure that an open-access LicensePool exists for this book. circulation_data = CirculationData( data_source=DataSource.GUTENBERG, primary_identifier=primary_identifier, formats=formats, default_rights_uri=rights_uri, links=links, ) license_pool, new_license_pool = circulation_data.license_pool( _db, collection ) replace = ReplacementPolicy(formats=True) circulation_data.apply(_db, collection, replace=replace) license_pool.calculate_work() return edition, license_pool, new
def test_rights_status_open_access_link_no_rights_uses_data_source_default( self): identifier = IdentifierData( Identifier.GUTENBERG_ID, "abcd", ) # Here's a CirculationData that will create an open-access # LicensePoolDeliveryMechanism. link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, ) circulation_data = CirculationData( data_source=DataSource.GUTENBERG, primary_identifier=identifier, links=[link], ) replace_formats = ReplacementPolicy(formats=True, ) pool, ignore = circulation_data.license_pool(self._db, self._default_collection) pool.open_access = False # Applying this CirculationData to a LicensePool makes it # open-access. circulation_data.apply(self._db, pool.collection, replace_formats) assert True == pool.open_access assert 1 == len(pool.delivery_mechanisms) # The delivery mechanism's rights status is the default for # the data source. assert (RightsStatus.PUBLIC_DOMAIN_USA == pool.delivery_mechanisms[0].rights_status.uri) # Even if a commercial source like Overdrive should offer a # link with rel="open access", unless we know it's an # open-access link we will give it a RightsStatus of # IN_COPYRIGHT. identifier = IdentifierData( Identifier.OVERDRIVE_ID, "abcd", ) link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, ) circulation_data = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=identifier, links=[link], ) pool, ignore = circulation_data.license_pool(self._db, self._default_collection) pool.open_access = False circulation_data.apply(self._db, pool.collection, replace_formats) assert (RightsStatus.IN_COPYRIGHT == pool.delivery_mechanisms[0].rights_status.uri) assert False == pool.open_access