def _detail_for_elementtree_entry(cls, parser, entry_tag, feed_url=None): subtag = parser.text_of_optional_subtag data = OPDSImporter._detail_for_elementtree_entry( parser, entry_tag, feed_url) formats = [] odl_license_tags = parser._xpath(entry_tag, 'odl:license') or [] for odl_license_tag in odl_license_tags: content_type = subtag(odl_license_tag, 'dcterms:format') drm_schemes = [] protection_tags = parser._xpath(odl_license_tag, 'odl:protection') or [] for protection_tag in protection_tags: drm_scheme = subtag(protection_tag, 'dcterms:format') drm_schemes.append(drm_scheme) if not drm_schemes: formats.append( FormatData( content_type=content_type, drm_scheme=None, rights_uri=RightsStatus.IN_COPYRIGHT, )) for drm_scheme in drm_schemes: formats.append( FormatData( content_type=content_type, drm_scheme=drm_scheme, rights_uri=RightsStatus.IN_COPYRIGHT, )) if not data.get('circulation'): data['circulation'] = dict() if not data['circulation'].get('formats'): data['circulation']['formats'] = [] data['circulation']['formats'].extend(formats) return data
def extract_circulation(self, primary_identifier, availability, formattype): """Turn the 'availability' portion of an Enki API response into a CirculationData. """ if not availability: return None licenses_owned = availability.get("totalCopies", 0) licenses_available = availability.get("availableCopies", 0) hold = availability.get("onHold", 0) drm_type = EnkiAPI.no_drm if availability.get("accessType") == "acs": drm_type = EnkiAPI.adobe_drm formats = [] content_type = None if formattype == "PDF": content_type = Representation.PDF_MEDIA_TYPE elif formattype == "EPUB": content_type = Representation.EPUB_MEDIA_TYPE if content_type != None: formats.append(FormatData(content_type, drm_scheme=drm_type)) else: self.log.error("Unrecognized formattype: %s", formattype) circulationdata = CirculationData( data_source=DataSource.ENKI, primary_identifier=primary_identifier, formats=formats, licenses_owned=int(licenses_owned), licenses_available=int(licenses_available), licenses_reserved=0, patrons_in_hold_queue=int(hold), ) return circulationdata
def test_circulationdata_can_be_deepcopied(self): # Check that we didn't put something in the CirculationData that # will prevent it from being copied. (e.g., self.log) subject = SubjectData(Subject.TAG, "subject") contributor = ContributorData() identifier = IdentifierData(Identifier.GUTENBERG_ID, "1") link = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub") format = FormatData(Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM) rights_uri = RightsStatus.GENERIC_OPEN_ACCESS circulation_data = CirculationData( DataSource.GUTENBERG, primary_identifier=identifier, links=[link], licenses_owned=5, licenses_available=5, licenses_reserved=None, patrons_in_hold_queue=None, formats=[format], default_rights_uri=rights_uri, ) circulation_data_copy = deepcopy(circulation_data) # If deepcopy didn't throw an exception we're ok. assert circulation_data_copy is not None
def test_rights_status_commercial_link_with_rights(self): identifier = IdentifierData( Identifier.OVERDRIVE_ID, "abcd", ) link = LinkData( rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, rights_uri=RightsStatus.IN_COPYRIGHT, ) format = FormatData( content_type=link.media_type, drm_scheme=DeliveryMechanism.ADOBE_DRM, link=link, rights_uri=RightsStatus.IN_COPYRIGHT, ) circulation_data = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=identifier, links=[link], formats=[format], ) replace = ReplacementPolicy(formats=True, ) pool, ignore = circulation_data.license_pool(self._db, self._default_collection) circulation_data.apply(self._db, pool.collection, replace) assert False == pool.open_access assert 1 == len(pool.delivery_mechanisms) assert (RightsStatus.IN_COPYRIGHT == pool.delivery_mechanisms[0].rights_status.uri)
def test_license_pool_sets_default_license_values(self): """We have no information about how many copies of the book we've actually licensed, but a LicensePool can be created anyway, so we can store format information. """ identifier = IdentifierData(Identifier.OVERDRIVE_ID, "1") drm_format = FormatData( content_type=Representation.PDF_MEDIA_TYPE, drm_scheme=DeliveryMechanism.ADOBE_DRM, ) circulation = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=identifier, formats=[drm_format], ) collection = self._default_collection pool, is_new = circulation.license_pool(self._db, collection) assert True == is_new assert collection == pool.collection # We start with the conservative assumption that we own no # licenses for the book. assert 0 == pool.licenses_owned assert 0 == pool.licenses_available assert 0 == pool.licenses_reserved assert 0 == pool.patrons_in_hold_queue
def test_explicit_formatdata(self): # Creating an edition with an open-access download will # automatically create a delivery mechanism. edition, pool = self._edition(with_open_access_download=True) # Let's also add a DRM format. drm_format = FormatData( content_type=Representation.PDF_MEDIA_TYPE, drm_scheme=DeliveryMechanism.ADOBE_DRM, ) circulation_data = CirculationData( formats=[drm_format], data_source=edition.data_source, primary_identifier=edition.primary_identifier, ) circulation_data.apply(self._db, pool.collection) [epub, pdf] = sorted(pool.delivery_mechanisms, key=lambda x: x.delivery_mechanism.content_type) assert epub.resource == pool.best_open_access_resource assert Representation.PDF_MEDIA_TYPE == pdf.delivery_mechanism.content_type assert DeliveryMechanism.ADOBE_DRM == pdf.delivery_mechanism.drm_scheme # If we tell Metadata to replace the list of formats, we only # have the one format we manually created. replace = ReplacementPolicy(formats=True, ) circulation_data.apply(self._db, pool.collection, replace=replace) [pdf] = pool.delivery_mechanisms assert Representation.PDF_MEDIA_TYPE == pdf.delivery_mechanism.content_type
def _add_format_data(cls, circulation): for link in circulation.links: if link.rel == Hyperlink.GENERIC_OPDS_ACQUISITION and link.media_type in OPDSForDistributorsAPI.SUPPORTED_MEDIA_TYPES: circulation.formats.append( FormatData( content_type=link.media_type, drm_scheme=DeliveryMechanism.BEARER_TOKEN, link=link, rights_uri=RightsStatus.IN_COPYRIGHT, ))
def test_apply_removes_old_formats_based_on_replacement_policy(self): edition, pool = self._edition(with_license_pool=True) # Start with one delivery mechanism for this pool. for lpdm in pool.delivery_mechanisms: self._db.delete(lpdm) old_lpdm = pool.set_delivery_mechanism( Representation.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM, RightsStatus.IN_COPYRIGHT, None, ) # And it has been loaned. patron = self._patron() loan, ignore = pool.loan_to(patron, fulfillment=old_lpdm) assert old_lpdm == loan.fulfillment # We have new circulation data that has a different format. format = FormatData( content_type=Representation.EPUB_MEDIA_TYPE, drm_scheme=DeliveryMechanism.ADOBE_DRM, ) circulation_data = CirculationData( formats=[format], data_source=edition.data_source, primary_identifier=edition.primary_identifier, ) # If we apply the new CirculationData with formats false in the policy, # we'll add the new format, but keep the old one as well. replacement_policy = ReplacementPolicy(formats=False) circulation_data.apply(self._db, pool.collection, replacement_policy) assert 2 == len(pool.delivery_mechanisms) assert set( [Representation.PDF_MEDIA_TYPE, Representation.EPUB_MEDIA_TYPE]) == set([ lpdm.delivery_mechanism.content_type for lpdm in pool.delivery_mechanisms ]) assert old_lpdm == loan.fulfillment # But if we make formats true in the policy, we'll delete the old format # and remove it from its loan. replacement_policy = ReplacementPolicy(formats=True) circulation_data.apply(self._db, pool.collection, replacement_policy) assert 1 == len(pool.delivery_mechanisms) assert (Representation.EPUB_MEDIA_TYPE == pool.delivery_mechanisms[0].delivery_mechanism.content_type) assert None == loan.fulfillment
def extract_bibliographic(self, element): identifiers = [] contributors = [] identifiers.append(IdentifierData(Identifier.ISBN, element["isbn"])) sort_name = element["author"] if not sort_name: sort_name = Edition.UNKNOWN_AUTHOR contributors.append(ContributorData(sort_name=sort_name)) primary_identifier = IdentifierData(EnkiAPI.ENKI_ID, element["id"]) image_url = element["large_image"] thumbnail_url = element["large_image"] images = [ LinkData(rel=Hyperlink.THUMBNAIL_IMAGE, href=thumbnail_url, media_type=Representation.PNG_MEDIA_TYPE), LinkData(rel=Hyperlink.IMAGE, href=image_url, media_type=Representation.PNG_MEDIA_TYPE) ] metadata = Metadata( data_source=DataSource.ENKI, title=element["title"], language="eng", medium=Edition.BOOK_MEDIUM, publisher=element["publisher"], primary_identifier=primary_identifier, identifiers=identifiers, contributors=contributors, links=images, ) licenses_owned = element["availability"]["totalCopies"] licenses_available = element["availability"]["availableCopies"] hold = element["availability"]["onHold"] drm_type = EnkiAPI.adobe_drm if (element["availability"]["accessType"] == 'acs') else EnkiAPI.no_drm formats = [] formats.append( FormatData(content_type=Representation.EPUB_MEDIA_TYPE, drm_scheme=drm_type)) circulationdata = CirculationData( data_source=DataSource.ENKI, primary_identifier=primary_identifier, formats=formats, licenses_owned=int(licenses_owned), licenses_available=int(licenses_available), patrons_in_hold_queue=int(hold)) metadata.circulation = circulationdata return metadata
def test_format_change_may_change_open_access_status(self): # In this test, whenever we call CirculationData.apply(), we # want to destroy the old list of formats and recreate it. replace_formats = ReplacementPolicy(formats=True) # Here's a seemingly ordinary non-open-access LicensePool. edition, pool = self._edition(with_license_pool=True) assert False == pool.open_access # One day, we learn that it has an open-access delivery mechanism. link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, rights_uri=RightsStatus.CC_BY_ND, ) circulation_data = CirculationData( data_source=pool.data_source, primary_identifier=pool.identifier, links=[link], ) # Applying this information turns the pool into an open-access pool. circulation_data.apply(self._db, pool.collection, replace=replace_formats) assert True == pool.open_access # Then we find out it was a mistake -- the book is in copyright. format = FormatData( Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM, rights_uri=RightsStatus.IN_COPYRIGHT, ) circulation_data = CirculationData( data_source=pool.data_source, primary_identifier=pool.identifier, formats=[format], ) circulation_data.apply(self._db, pool.collection, replace=replace_formats) # The original LPDM has been removed and only the new one remains. assert False == pool.open_access assert 1 == len(pool.delivery_mechanisms)
def test_circulationdata_may_require_collection(self): """Depending on the information provided in a CirculationData object, it might or might not be possible to call apply() without providing a Collection. """ identifier = IdentifierData(Identifier.OVERDRIVE_ID, "1") format = FormatData( Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM, rights_uri=RightsStatus.IN_COPYRIGHT, ) circdata = CirculationData(DataSource.OVERDRIVE, primary_identifier=identifier, formats=[format]) circdata.apply(self._db, collection=None) # apply() has created a LicensePoolDeliveryMechanism for this # title, even though there are no LicensePools for it. identifier_obj, ignore = identifier.load(self._db) assert [] == identifier_obj.licensed_through [lpdm] = identifier_obj.delivery_mechanisms assert DataSource.OVERDRIVE == lpdm.data_source.name assert RightsStatus.IN_COPYRIGHT == lpdm.rights_status.uri mechanism = lpdm.delivery_mechanism assert Representation.EPUB_MEDIA_TYPE == mechanism.content_type assert DeliveryMechanism.NO_DRM == mechanism.drm_scheme # But if we put some information in the CirculationData # that can only be stored in a LicensePool, there's trouble. circdata.licenses_owned = 0 with pytest.raises(ValueError) as excinfo: circdata.apply(self._db, collection=None) assert ( "Cannot store circulation information because no Collection was provided." in str(excinfo.value))
def set_format(cls, format_received, formats): content_type, drm_scheme = cls.format_data_for_odilo_format.get( format_received) formats.append(FormatData(content_type, drm_scheme)) return cls.odilo_medium_to_simplified_medium.get(format_received)
def _extract_publication_metadata(self, feed, publication, data_source_name): """Extract a Metadata object from webpub-manifest-parser's publication. :param publication: Feed object :type publication: opds2_ast.OPDS2Feed :param publication: Publication object :type publication: opds2_ast.OPDS2Publication :param data_source_name: Data source's name :type data_source_name: str :return: Publication's metadata :rtype: Metadata """ metadata = super(ODL2Importer, self)._extract_publication_metadata( feed, publication, data_source_name ) formats = [] licenses = [] medium = None with self._get_configuration(self._db) as configuration: skipped_license_formats = configuration.skipped_license_formats if skipped_license_formats: skipped_license_formats = set(skipped_license_formats) if publication.licenses: for odl_license in publication.licenses: identifier = odl_license.metadata.identifier checkout_link = first_or_default( odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key) ) if checkout_link: checkout_link = checkout_link.href license_info_document_link = first_or_default( odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key) ) if license_info_document_link: license_info_document_link = license_info_document_link.href expires = ( to_utc(odl_license.metadata.terms.expires) if odl_license.metadata.terms else None ) concurrency = ( int(odl_license.metadata.terms.concurrency) if odl_license.metadata.terms else None ) if not license_info_document_link: parsed_license = None else: parsed_license = ODLImporter.get_license_data( license_info_document_link, checkout_link, identifier, expires, concurrency, self.http_get, ) if parsed_license is not None: licenses.append(parsed_license) # DPLA feed doesn't have information about a DRM protection used for audiobooks. # We want to try to extract that information from the License Info Document it's present there. license_formats = set(odl_license.metadata.formats) if parsed_license and parsed_license.content_types: license_formats |= set(parsed_license.content_types) for license_format in license_formats: if ( skipped_license_formats and license_format in skipped_license_formats ): continue if not medium: medium = Edition.medium_from_media_type(license_format) if license_format in ODLImporter.LICENSE_FORMATS: # Special case to handle DeMarque audiobooks which # include the protection in the content type drm_schemes = [ ODLImporter.LICENSE_FORMATS[license_format][ ODLImporter.DRM_SCHEME ] ] license_format = ODLImporter.LICENSE_FORMATS[license_format][ ODLImporter.CONTENT_TYPE ] else: drm_schemes = ( odl_license.metadata.protection.formats if odl_license.metadata.protection else [] ) for drm_scheme in drm_schemes or [None]: formats.append( FormatData( content_type=license_format, drm_scheme=drm_scheme, rights_uri=RightsStatus.IN_COPYRIGHT, ) ) metadata.circulation.licenses = licenses metadata.circulation.licenses_owned = None metadata.circulation.licenses_available = None metadata.circulation.licenses_reserved = None metadata.circulation.patrons_in_hold_queue = None metadata.circulation.formats.extend(formats) metadata.medium = medium return metadata
def update_licensepool_for_identifier(self, isbn, availability, medium, policy=None): """Update availability information for a single book. If the book has never been seen before, a new LicensePool will be created for the book. The book's LicensePool will be updated with current approximate circulation information (we can tell if it's available, but not how many copies). Bibliographic coverage will be ensured for the OneClick Identifier. Work will be created for the LicensePool and set as presentation-ready. :param isbn the identifier OneClick uses :param availability boolean denoting if book can be lent to patrons :param medium: The name OneClick uses for the book's medium. """ # find a license pool to match the isbn, and see if it'll need a metadata update later license_pool, is_new_pool = LicensePool.for_foreign_id( self._db, DataSource.RB_DIGITAL, Identifier.RB_DIGITAL_ID, isbn, collection=self.collection) if is_new_pool: # This is the first time we've seen this book. Make sure its # identifier has bibliographic coverage. self.bibliographic_coverage_provider.ensure_coverage( license_pool.identifier) # now tell the licensepool if it's lendable # We don't know exactly how many licenses are available, but # we know that it's either zero (book is not lendable) or greater # than zero (book is lendable) licenses_available = 1 if not availability: licenses_available = 0 # Because the book showed up in availability, we know we own # at least one license to it. licenses_owned = 1 if (not is_new_pool and license_pool.licenses_owned == licenses_owned and license_pool.licenses_available == licenses_available): # Optimization: Nothing has changed, so don't even bother # calling CirculationData.apply() return license_pool, is_new_pool, False # If possible, create a FormatData object representing # how the book is available. formats = [] # Note that these strings are different from the similar strings # found in "fileFormat" when looking at a patron's loans. # "ebook" (a medium) versus "EPUB" (a format). Unfortunately we # don't get the file format when checking the book's # availability before a patron has checked it out. delivery_type = None drm_scheme = None medium = medium.lower() if medium == 'ebook': delivery_type = Representation.EPUB_MEDIA_TYPE # OneClick doesn't tell us the DRM scheme at this # point, but some of their EPUBs do have Adobe DRM. # Also, their DRM usage may change in the future. drm_scheme = DeliveryMechanism.ADOBE_DRM elif medium == 'eaudio': # TODO: we can't deliver on this promise yet, but this is # how we will be delivering audiobook manifests. delivery_type = Representation.AUDIOBOOK_MANIFEST_MEDIA_TYPE if delivery_type: formats.append(FormatData(delivery_type, drm_scheme)) circulation_data = CirculationData( data_source=DataSource.RB_DIGITAL, primary_identifier=license_pool.identifier, licenses_owned=licenses_owned, licenses_available=licenses_available, formats=formats, ) policy = policy or self.default_circulation_replacement_policy license_pool, circulation_changed = circulation_data.apply( self._db, self.collection, replace=policy, ) return license_pool, is_new_pool, circulation_changed