def test_rights_status_open_access_link_no_rights_uses_data_source_default( self): identifier = IdentifierData( Identifier.GUTENBERG_ID, "abcd", ) # Here's a CirculationData that will create an open-access # LicensePoolDeliveryMechanism. link = LinkData(rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url) circulation_data = CirculationData( data_source=DataSource.GUTENBERG, primary_identifier=identifier, links=[link], ) replace_formats = ReplacementPolicy(formats=True, ) pool, ignore = circulation_data.license_pool(self._db, self._default_collection) pool.open_access = False # Applying this CirculationData to a LicensePool makes it # open-access. circulation_data.apply(self._db, pool.collection, replace_formats) eq_(True, pool.open_access) eq_(1, pool.delivery_mechanisms.count()) # The delivery mechanism's rights status is the default for # the data source. eq_(RightsStatus.PUBLIC_DOMAIN_USA, pool.delivery_mechanisms[0].rights_status.uri) # Even if a commercial source like Overdrive should offer a # link with rel="open access", unless we know it's an # open-access link we will give it a RightsStatus of # IN_COPYRIGHT. identifier = IdentifierData( Identifier.OVERDRIVE_ID, "abcd", ) link = LinkData(rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url) circulation_data = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=identifier, links=[link], ) pool, ignore = circulation_data.license_pool(self._db, self._default_collection) pool.open_access = False circulation_data.apply(self._db, pool.collection, replace_formats) eq_(RightsStatus.IN_COPYRIGHT, pool.delivery_mechanisms[0].rights_status.uri) eq_(False, pool.open_access)
def test_apply_identifier_equivalency(self): # Set up primary identifier with matching & new IdentifierData objects edition, pool = self._edition(with_license_pool=True) primary = edition.primary_identifier primary_as_data = IdentifierData(type=primary.type, identifier=primary.identifier) other_data = IdentifierData(type=u"abc", identifier=u"def") # Prep Metadata object. metadata = Metadata(data_source=DataSource.OVERDRIVE, primary_identifier=primary, identifiers=[primary_as_data, other_data]) # The primary identifier is put into the identifiers array after init eq_(3, len(metadata.identifiers)) assert primary in metadata.identifiers metadata.apply(edition) # Neither the primary edition nor the identifier data that represents # it have become equivalencies. eq_(1, len(primary.equivalencies)) [equivalency] = primary.equivalencies eq_(equivalency.output.type, u"abc") eq_(equivalency.output.identifier, u"def")
def test_rights_status_open_access_link_no_rights_uses_data_source_default(self): identifier = IdentifierData( Identifier.GUTENBERG_ID, "abcd", ) link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url ) circulation_data = CirculationData( data_source=DataSource.GUTENBERG, primary_identifier=identifier, links=[link], ) replace = ReplacementPolicy( formats=True, ) pool, ignore = circulation_data.license_pool(self._db) circulation_data.apply(pool, replace) eq_(True, pool.open_access) eq_(1, len(pool.delivery_mechanisms)) # The delivery mechanism's rights status is the default for # the data source. eq_(RightsStatus.PUBLIC_DOMAIN_USA, pool.delivery_mechanisms[0].rights_status.uri) # Even if a commercial source like Overdrive should offer a # link with rel="open access", unless we know it's an # open-access link we will give it a RightsStatus of # IN_COPYRIGHT. identifier = IdentifierData( Identifier.OVERDRIVE_ID, "abcd", ) link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url ) circulation_data = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=identifier, links=[link], ) pool, ignore = circulation_data.license_pool(self._db) circulation_data.apply(pool, replace) eq_(RightsStatus.IN_COPYRIGHT, pool.delivery_mechanisms[0].rights_status.uri) # This will cause the work to be treated as a non-open-access # work. eq_(False, pool.open_access)
def extract_identifier(cls, identifier_tag): """Turn a <dcterms:identifier> tag into an IdentifierData object.""" try: type, identifier = Identifier.type_and_identifier_for_urn(identifier_tag.text.lower()) return IdentifierData(type, identifier) except ValueError: return None
def test_rights_status_default_rights_from_data_source(self): identifier = IdentifierData( Identifier.GUTENBERG_ID, "abcd", ) link = LinkData( rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url ) circulation_data = CirculationData( data_source=DataSource.OA_CONTENT_SERVER, primary_identifier=identifier, links=[link], ) replace = ReplacementPolicy( formats=True, ) pool, ignore = circulation_data.license_pool(self._db) circulation_data.apply(pool, replace) eq_(True, pool.open_access) eq_(1, len(pool.delivery_mechanisms)) # The rights status is the default for the OA content server. eq_(RightsStatus.GENERIC_OPEN_ACCESS, pool.delivery_mechanisms[0].rights_status.uri)
def test_rights_status_default_rights_from_data_source(self): identifier = IdentifierData( Identifier.GUTENBERG_ID, "abcd", ) link = LinkData(rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url) circulation_data = CirculationData( data_source=DataSource.OA_CONTENT_SERVER, primary_identifier=identifier, links=[link], ) replace = ReplacementPolicy(formats=True, ) # This pool starts off as not being open-access. pool, ignore = circulation_data.license_pool(self._db, self._default_collection) eq_(False, pool.open_access) circulation_data.apply(self._db, pool.collection, replace) # The pool became open-access because it was given a # link that came from the OS content server. eq_(True, pool.open_access) eq_(1, pool.delivery_mechanisms.count()) # The rights status is the default for the OA content server. eq_(RightsStatus.GENERIC_OPEN_ACCESS, pool.delivery_mechanisms[0].rights_status.uri)
def test_rights_status_default_rights_passed_in(self): identifier = IdentifierData( Identifier.GUTENBERG_ID, "abcd", ) link = LinkData(rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url) circulation_data = CirculationData( data_source=DataSource.OA_CONTENT_SERVER, primary_identifier=identifier, default_rights_uri=RightsStatus.CC_BY, links=[link], ) replace = ReplacementPolicy(formats=True, ) pool, ignore = circulation_data.license_pool(self._db, self._default_collection) circulation_data.apply(self._db, pool.collection, replace) eq_(True, pool.open_access) eq_(1, pool.delivery_mechanisms.count()) # The rights status is the one that was passed in to CirculationData. eq_(RightsStatus.CC_BY, pool.delivery_mechanisms[0].rights_status.uri)
def test_links_filtered(self): # test that filter links to only metadata-relevant ones link1 = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub") link2 = LinkData(rel=Hyperlink.IMAGE, href="http://example.com/") link3 = LinkData(rel=Hyperlink.DESCRIPTION, content="foo") link4 = LinkData( rel=Hyperlink.THUMBNAIL_IMAGE, href="http://thumbnail.com/", media_type=Representation.JPEG_MEDIA_TYPE, ) link5 = LinkData( rel=Hyperlink.IMAGE, href="http://example.com/", thumbnail=link4, media_type=Representation.JPEG_MEDIA_TYPE, ) links = [link1, link2, link3, link4, link5] identifier = IdentifierData(Identifier.GUTENBERG_ID, "1") metadata = Metadata( data_source=DataSource.GUTENBERG, primary_identifier=identifier, links=links, ) filtered_links = sorted(metadata.links, key=lambda x: x.rel) eq_([link2, link5, link4, link3], filtered_links)
def test_rights_status_open_access_link_with_rights(self): identifier = IdentifierData( Identifier.OVERDRIVE_ID, "abcd", ) link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, rights_uri=RightsStatus.CC_BY_ND, ) circulation_data = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=identifier, links=[link], ) replace = ReplacementPolicy( formats=True, ) pool, ignore = circulation_data.license_pool(self._db) circulation_data.apply(pool, replace) eq_(True, pool.open_access) eq_(1, len(pool.delivery_mechanisms)) eq_(RightsStatus.CC_BY_ND, pool.delivery_mechanisms[0].rights_status.uri)
def test_links_filtered(self): # Tests that passed-in links filter down to only the relevant ones. link1 = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub") link2 = LinkData(rel=Hyperlink.IMAGE, href="http://example.com/") link3 = LinkData(rel=Hyperlink.DESCRIPTION, content="foo") link4 = LinkData( rel=Hyperlink.THUMBNAIL_IMAGE, href="http://thumbnail.com/", media_type=Representation.JPEG_MEDIA_TYPE, ) link5 = LinkData( rel=Hyperlink.IMAGE, href="http://example.com/", thumbnail=link4, media_type=Representation.JPEG_MEDIA_TYPE, ) links = [link1, link2, link3, link4, link5] identifier = IdentifierData(Identifier.GUTENBERG_ID, "1") circulation_data = CirculationData( DataSource.GUTENBERG, primary_identifier=identifier, links=links, ) filtered_links = sorted(circulation_data.links, key=lambda x:x.rel) eq_([link1], filtered_links)
def test_success(self): pwid = 'pwid1' # Here's a print book. book = self._edition() book.medium = Edition.BOOK_MEDIUM book.permanent_work_id = pwid # Here's an audio book with the same PWID. audio = self._edition() audio.medium = Edition.AUDIO_MEDIUM audio.permanent_work_id = pwid # Here's an Metadata object for a second print book with the # same PWID. identifier = self._identifier() identifierdata = IdentifierData(type=identifier.type, identifier=identifier.identifier) metadata = Metadata(DataSource.GUTENBERG, primary_identifier=identifierdata, medium=Edition.BOOK_MEDIUM) metadata.permanent_work_id = pwid # Call the method we're testing. metadata.associate_with_identifiers_based_on_permanent_work_id( self._db) # The identifier of the second print book has been associated # with the identifier of the first print book, but not # with the identifier of the audiobook equivalent_identifiers = [x.output for x in identifier.equivalencies] eq_([book.primary_identifier], equivalent_identifiers)
def test_rights_status_commercial_link_with_rights(self): identifier = IdentifierData( Identifier.OVERDRIVE_ID, "abcd", ) link = LinkData( rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, rights_uri=RightsStatus.IN_COPYRIGHT, ) format = FormatData( content_type=link.media_type, drm_scheme=DeliveryMechanism.ADOBE_DRM, link=link, rights_uri=RightsStatus.IN_COPYRIGHT, ) circulation_data = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=identifier, links=[link], formats=[format], ) replace = ReplacementPolicy( formats=True, ) pool, ignore = circulation_data.license_pool(self._db) circulation_data.apply(pool, replace) eq_(False, pool.open_access) eq_(1, len(pool.delivery_mechanisms)) eq_(RightsStatus.IN_COPYRIGHT, pool.delivery_mechanisms[0].rights_status.uri)
def test_circulationdata_can_be_deepcopied(self): # Check that we didn't put something in the CirculationData that # will prevent it from being copied. (e.g., self.log) subject = SubjectData(Subject.TAG, "subject") contributor = ContributorData() identifier = IdentifierData(Identifier.GUTENBERG_ID, "1") link = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub") format = FormatData(Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM) rights_uri = RightsStatus.GENERIC_OPEN_ACCESS circulation_data = CirculationData( DataSource.GUTENBERG, primary_identifier=identifier, links=[link], licenses_owned=5, licenses_available=5, licenses_reserved=None, patrons_in_hold_queue=None, formats=[format], default_rights_uri=rights_uri, ) circulation_data_copy = deepcopy(circulation_data) # If deepcopy didn't throw an exception we're ok. assert circulation_data_copy is not None
def test_rights_status_open_access_link_no_rights(self): identifier = IdentifierData( Identifier.OVERDRIVE_ID, "abcd", ) link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url ) circulation_data = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=identifier, links=[link], ) replace = ReplacementPolicy( formats=True, ) pool, ignore = circulation_data.license_pool(self._db) circulation_data.apply(pool, replace) eq_(True, pool.open_access) eq_(1, len(pool.delivery_mechanisms)) # Rights status is generic open access because there's an open access # link but no other rights info. eq_(RightsStatus.GENERIC_OPEN_ACCESS, pool.delivery_mechanisms[0].rights_status.uri)
def test_license_pool_sets_default_license_values(self): """We have no information about how many copies of the book we've actually licensed, but a LicensePool can be created anyway, so we can store format information. """ identifier = IdentifierData(Identifier.OVERDRIVE_ID, "1") drm_format = FormatData( content_type=Representation.PDF_MEDIA_TYPE, drm_scheme=DeliveryMechanism.ADOBE_DRM, ) circulation = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=identifier, formats=[drm_format], ) pool, is_new = circulation.license_pool( self._db, ) eq_(True, is_new) # We start with the conservative assumption that we own no # licenses for the book. eq_(0, pool.licenses_owned) eq_(0, pool.licenses_available) eq_(0, pool.licenses_reserved) eq_(0, pool.patrons_in_hold_queue)
def test_make_thumbnail_assigns_pool(self): identifier = IdentifierData(Identifier.GUTENBERG_ID, "1") #identifier = self._identifier() #identifier = IdentifierData(type=Identifier.GUTENBERG_ID, identifier=edition.primary_identifier) edition = self._edition(identifier_id=identifier.identifier) link = LinkData( rel=Hyperlink.THUMBNAIL_IMAGE, href="http://thumbnail.com/", media_type=Representation.JPEG_MEDIA_TYPE, ) metadata = Metadata( data_source=edition.data_source, primary_identifier=identifier, links=[link], ) circulation = CirculationData(data_source=edition.data_source, primary_identifier=identifier) metadata.circulation = circulation metadata.apply(edition) thumbnail_link = edition.primary_identifier.links[0] circulation_pool, is_new = circulation.license_pool(self._db) eq_(thumbnail_link.license_pool, circulation_pool)
def test_availability_needs_update(self): """Test the logic that controls whether a LicensePool's availability information should actually be updated. """ identifier = IdentifierData(Identifier.GUTENBERG_ID, "1") now = datetime.datetime.utcnow() yesterday = now - datetime.timedelta(days=1) recent_data = CirculationData(DataSource.GUTENBERG, identifier) # CirculationData.last_checked defaults to the current time. assert (recent_data.last_checked - now).total_seconds() < 10 old_data = CirculationData(DataSource.GUTENBERG, identifier, last_checked=yesterday) edition, pool = self._edition(with_license_pool=True) # A pool that has never been checked always needs to be updated. pool.last_checked = None eq_(True, recent_data._availability_needs_update(pool)) eq_(True, old_data._availability_needs_update(pool)) # A pool that has been checked before only needs to be updated # if the information is at least as new as what we had before. pool.last_checked = now eq_(True, recent_data._availability_needs_update(pool)) eq_(False, old_data._availability_needs_update(pool))
def test_has_open_access_link(self): identifier = IdentifierData(Identifier.GUTENBERG_ID, "1") circulationdata = CirculationData( DataSource.GUTENBERG, identifier, ) # No links eq_(False, circulationdata.has_open_access_link) linkdata = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, href=self._url, ) circulationdata.links = [linkdata] # Open-access link with no explicit rights URI. eq_(True, circulationdata.has_open_access_link) # Open-access link with contradictory rights URI. linkdata.rights_uri = RightsStatus.IN_COPYRIGHT eq_(False, circulationdata.has_open_access_link) # Open-access link with consistent rights URI. linkdata.rights_uri = RightsStatus.GENERIC_OPEN_ACCESS eq_(True, circulationdata.has_open_access_link)
def extract_availability(self, circulation_data, element, ns): identifier = self.text_of_subtag(element, 'axis:titleId', ns) primary_identifier = IdentifierData(Identifier.THETA_ID, identifier) if not circulation_data: circulation_data = CirculationData( data_source=DataSource.THETA, primary_identifier=primary_identifier, ) availability = self._xpath1(element, 'axis:availability', ns) total_copies = self.int_of_subtag(availability, 'axis:totalCopies', ns) available_copies = self.int_of_subtag(availability, 'axis:availableCopies', ns) size_of_hold_queue = self.int_of_subtag(availability, 'axis:holdsQueueSize', ns) availability_updated = self.text_of_optional_subtag( availability, 'axis:updateDate', ns) if availability_updated: try: attempt = datetime.datetime.strptime( availability_updated, self.FULL_DATE_FORMAT_IMPLICIT_UTC) availability_updated += ' +00:00' except ValueError: pass availability_updated = datetime.datetime.strptime( availability_updated, self.FULL_DATE_FORMAT) circulation_data.licenses_owned = total_copies circulation_data.licenses_available = available_copies circulation_data.licenses_reserved = 0 circulation_data.patrons_in_hold_queue = size_of_hold_queue return circulation_data
def test_metadata_can_be_deepcopied(self): # Check that we didn't put something in the metadata that # will prevent it from being copied. (e.g., self.log) subject = SubjectData(Subject.TAG, "subject") contributor = ContributorData() identifier = IdentifierData(Identifier.GUTENBERG_ID, "1") link = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub") measurement = MeasurementData(Measurement.RATING, 5) circulation = CirculationData(data_source=DataSource.GUTENBERG, primary_identifier=identifier, licenses_owned=0, licenses_available=0, licenses_reserved=0, patrons_in_hold_queue=0) primary_as_data = IdentifierData(type=identifier.type, identifier=identifier.identifier) other_data = IdentifierData(type=u"abc", identifier=u"def") m = Metadata( DataSource.GUTENBERG, subjects=[subject], contributors=[contributor], primary_identifier=identifier, links=[link], measurements=[measurement], circulation=circulation, title="Hello Title", subtitle="Subtle Hello", sort_title="Sorting Howdy", language="US English", medium=Edition.BOOK_MEDIUM, series="1", series_position=1, publisher="Hello World Publishing House", imprint=u"Follywood", issued=datetime.datetime.utcnow(), published=datetime.datetime.utcnow(), identifiers=[primary_as_data, other_data], data_source_last_updated=datetime.datetime.utcnow(), ) m_copy = deepcopy(m) # If deepcopy didn't throw an exception we're ok. assert m_copy is not None
def process_one(self, tag, namespaces): """Turn an <item> tag into a Metadata and an encompassed CirculationData objects, and return the Metadata.""" def value(threem_key): return self.text_of_optional_subtag(tag, threem_key) links = dict() identifiers = dict() subjects = [] primary_identifier = IdentifierData( Identifier.THREEM_ID, value("ItemId") ) identifiers = [] for key in ('ISBN13', 'PhysicalISBN'): v = value(key) if v: identifiers.append( IdentifierData(Identifier.ISBN, v) ) subjects = self.parse_genre_string(value("Genre")) title = value("Title") subtitle = value("SubTitle") publisher = value("Publisher") language = value("Language") contributors = list(self.contributors_from_string(value('Authors'))) published_date = None published = value("PubDate") if published: formats = [self.DATE_FORMAT, self.YEAR_FORMAT] else: published = value("PubYear") formats = [self.YEAR_FORMAT] for format in formats: try: published_date = datetime.strptime(published, format) except ValueError, e: pass
def test_filter_recommendations(self): metadata = Metadata(DataSource.OVERDRIVE) known_identifier = self._identifier() unknown_identifier = IdentifierData(Identifier.ISBN, "hey there") # Unknown identifiers are filtered out of the recommendations. metadata.recommendations += [known_identifier, unknown_identifier] metadata.filter_recommendations(self._db) eq_([known_identifier], metadata.recommendations) # It works with IdentifierData as well. known_identifier_data = IdentifierData(known_identifier.type, known_identifier.identifier) metadata.recommendations = [known_identifier_data, unknown_identifier] metadata.filter_recommendations(self._db) [result] = metadata.recommendations # The IdentifierData has been replaced by a bonafide Identifier. eq_(True, isinstance(result, Identifier)) # The genuwine article. eq_(known_identifier, result)
def extract_bibliographic(self, element, ns): identifiers = [] contributors = [] identifiers.append(IdentifierData(Identifier.ISBN, element["isbn"])) sort_name = element["author"] if not sort_name: sort_name = "Unknown" contributors.append(ContributorData(sort_name=sort_name)) primary_identifier = IdentifierData(Identifier.ENKI_ID, element["id"]) metadata = Metadata( data_source=DataSource.ENKI, title=element["title"], language="ENGLISH", medium=Edition.BOOK_MEDIUM, #series=series, publisher=element["publisher"], #imprint=imprint, #published=publication_date, primary_identifier=primary_identifier, identifiers=identifiers, #subjects=subjects, contributors=contributors, ) #TODO: This should parse the content type and look it up in the Enki Delivery Data above. Currently, # we assume everything is an ePub that uses Adobe DRM, which is a safe assumption only for now. formats = [] formats.append( FormatData(content_type=Representation.EPUB_MEDIA_TYPE, drm_scheme=DeliveryMechanism.ADOBE_DRM)) circulationdata = CirculationData( data_source=DataSource.ENKI, primary_identifier=primary_identifier, formats=formats, ) metadata.circulation = circulationdata return metadata
def extract_availability(self, circulation_data, element, ns): primary_identifier = IdentifierData(Identifier.ENKI_ID, element["id"]) if not circulation_data: circulation_data = CirculationData( data_source=DataSource.ENKI, primary_identifier=primary_identifier, ) # For now, assume there is a license available for each item. circulation_data.licenses_owned = 1 circulation_data.licenses_available = 1 circulation_data.licenses_reserved = 0 circulation_data.patrons_in_hold_queue = 0 return circulation_data
def test_circulationdata_may_require_collection(self): """Depending on the information provided in a CirculationData object, it might or might not be possible to call apply() without providing a Collection. """ identifier = IdentifierData(Identifier.OVERDRIVE_ID, "1") format = FormatData(Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM, rights_uri=RightsStatus.IN_COPYRIGHT) circdata = CirculationData(DataSource.OVERDRIVE, primary_identifier=identifier, formats=[format]) circdata.apply(self._db, collection=None) # apply() has created a LicensePoolDeliveryMechanism for this # title, even though there are no LicensePools for it. identifier_obj, ignore = identifier.load(self._db) eq_([], identifier_obj.licensed_through) [lpdm] = identifier_obj.delivery_mechanisms eq_(DataSource.OVERDRIVE, lpdm.data_source.name) eq_(RightsStatus.IN_COPYRIGHT, lpdm.rights_status.uri) mechanism = lpdm.delivery_mechanism eq_(Representation.EPUB_MEDIA_TYPE, mechanism.content_type) eq_(DeliveryMechanism.NO_DRM, mechanism.drm_scheme) # But if we put some information in the CirculationData # that can only be stored in a LicensePool, there's trouble. circdata.licenses_owned = 0 assert_raises_regexp( ValueError, 'Cannot store circulation information because no Collection was provided.', circdata.apply, self._db, collection=None)
def book_info_to_circulation(cls, book): """ Note: The json data passed into this method is from a different file/stream from the json data that goes into the book_info_to_metadata() method. """ # In Overdrive, 'reserved' books show up as books on # hold. There is no separate notion of reserved books. licenses_reserved = 0 licenses_owned = None licenses_available = None patrons_in_hold_queue = None if not 'id' in book: return None overdrive_id = book['id'] primary_identifier = IdentifierData( Identifier.OVERDRIVE_ID, overdrive_id ) if (book.get('isOwnedByCollections') is not False): # We own this book. for collection in book['collections']: if 'copiesOwned' in collection: if licenses_owned is None: licenses_owned = 0 licenses_owned += int(collection['copiesOwned']) if 'copiesAvailable' in collection: if licenses_available is None: licenses_available = 0 licenses_available += int(collection['copiesAvailable']) if 'numberOfHolds' in collection: if patrons_in_hold_queue is None: patrons_in_hold_queue = 0 patrons_in_hold_queue += collection['numberOfHolds'] return CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=primary_identifier, licenses_owned=licenses_owned, licenses_available=licenses_available, licenses_reserved=licenses_reserved, patrons_in_hold_queue=patrons_in_hold_queue, )
def record_info_to_circulation(cls, availability): """ Note: The json data passed into this method is from a different file/stream from the json data that goes into the record_info_to_metadata() method. """ if 'recordId' not in availability: return None record_id = availability['recordId'] primary_identifier = IdentifierData( Identifier.ODILO_ID, record_id) # We own this availability. licenses_owned = int(availability['totalCopies']) licenses_available = int(availability['availableCopies']) # 'licenses_reserved' is the number of patrons who put the book on hold earlier, # but who are now at the front of the queue and who could get the book right now if they wanted to. if 'notifiedHolds' in availability: licenses_reserved = int(availability['notifiedHolds']) else: licenses_reserved = 0 # 'patrons_in_hold_queue' contains the number of patrons who are currently waiting for a copy of the book. if 'holdsQueueSize' in availability: patrons_in_hold_queue = int(availability['holdsQueueSize']) else: patrons_in_hold_queue = 0 return CirculationData( data_source=DataSource.ODILO, primary_identifier=primary_identifier, licenses_owned=licenses_owned, licenses_available=licenses_available, licenses_reserved=licenses_reserved, patrons_in_hold_queue=patrons_in_hold_queue, )
class TestBibliographicCoverageProvider(DatabaseTest): BIBLIOGRAPHIC_DATA = Metadata( DataSource.OVERDRIVE, publisher=u'Perfection Learning', language='eng', title=u'A Girl Named Disaster', published=datetime.datetime(1998, 3, 1, 0, 0), primary_identifier=IdentifierData( type=Identifier.OVERDRIVE_ID, identifier=u'ba9b3419-b0bd-4ca7-a24f-26c4246b6b44'), identifiers=[ IdentifierData(type=Identifier.OVERDRIVE_ID, identifier=u'ba9b3419-b0bd-4ca7-a24f-26c4246b6b44'), IdentifierData(type=Identifier.ISBN, identifier=u'9781402550805') ], contributors=[ ContributorData(sort_name=u"Nancy Farmer", roles=[Contributor.PRIMARY_AUTHOR_ROLE]) ], subjects=[ SubjectData(type=Subject.TOPIC, identifier=u'Action & Adventure'), SubjectData(type=Subject.FREEFORM_AUDIENCE, identifier=u'Young Adult'), SubjectData(type=Subject.PLACE, identifier=u'Africa') ], ) CIRCULATION_DATA = CirculationData( DataSource.OVERDRIVE, primary_identifier=BIBLIOGRAPHIC_DATA.primary_identifier, ) def test_edition(self): provider = MockBibliographicCoverageProvider(self._db) provider.CAN_CREATE_LICENSE_POOLS = False identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID) test_metadata = self.BIBLIOGRAPHIC_DATA # Returns a CoverageFailure if the identifier doesn't have a # license pool and none can be created. result = provider.work(identifier) assert isinstance(result, CoverageFailure) eq_("No license pool available", result.exception) # Returns an Edition otherwise, creating it if necessary. edition, lp = self._edition(with_license_pool=True) identifier = edition.primary_identifier eq_(edition, provider.edition(identifier)) # The Edition will be created if necessary. lp.identifier.primarily_identifies = [] e2 = provider.edition(identifier) assert edition != e2 assert isinstance(e2, Edition) def test_work(self): provider = MockBibliographicCoverageProvider(self._db) identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID) test_metadata = self.BIBLIOGRAPHIC_DATA provider.CAN_CREATE_LICENSE_POOLS = False # Returns a CoverageFailure if the identifier doesn't have a # license pool. result = provider.work(identifier) assert isinstance(result, CoverageFailure) eq_("No license pool available", result.exception) # Returns a CoverageFailure if there's no work available. edition, lp = self._edition(with_license_pool=True) # Remove edition so that the work won't be calculated lp.identifier.primarily_identifies = [] result = provider.work(lp.identifier) assert isinstance(result, CoverageFailure) eq_("Work could not be calculated", result.exception) # Returns the work if it can be created or found. ed, lp = self._edition(with_license_pool=True) result = provider.work(lp.identifier) eq_(result, lp.work) def test_set_metadata(self): provider = MockBibliographicCoverageProvider(self._db) provider.CAN_CREATE_LICENSE_POOLS = False identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID) test_metadata = self.BIBLIOGRAPHIC_DATA test_circulationdata = self.CIRCULATION_DATA # If there is no LicensePool and it can't be autocreated, a # CoverageRecord results. result = provider.work(identifier) assert isinstance(result, CoverageFailure) eq_("No license pool available", result.exception) edition, lp = self._edition(data_source_name=DataSource.OVERDRIVE, identifier_type=Identifier.OVERDRIVE_ID, identifier_id=self.BIBLIOGRAPHIC_DATA. primary_identifier.identifier, with_license_pool=True) # If no metadata is passed in, a CoverageFailure results. result = provider.set_metadata_and_circulation_data( edition.primary_identifier, None, None) assert isinstance(result, CoverageFailure) eq_("Received neither metadata nor circulation data from input source", result.exception) # If no work can be created (in this case, because there's no title), # a CoverageFailure results. edition.title = None old_title = test_metadata.title test_metadata.title = None result = provider.set_metadata_and_circulation_data( edition.primary_identifier, test_metadata, test_circulationdata) assert isinstance(result, CoverageFailure) eq_("Work could not be calculated", result.exception) test_metadata.title = old_title # Test success result = provider.set_metadata_and_circulation_data( edition.primary_identifier, test_metadata, test_circulationdata) eq_(result, edition.primary_identifier) # If there's an exception setting the metadata, a # CoverageRecord results. This call raises a ValueError # because the primary identifier & the edition's primary # identifier don't match. test_metadata.primary_identifier = self._identifier( identifier_type=Identifier.OVERDRIVE_ID) result = provider.set_metadata_and_circulation_data( lp.identifier, test_metadata, test_circulationdata) assert isinstance(result, CoverageFailure) assert "ValueError" in result.exception def test_autocreate_licensepool(self): provider = MockBibliographicCoverageProvider(self._db) identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID) # If this constant is set to False, the coverage provider cannot # autocreate LicensePools for identifiers. provider.CAN_CREATE_LICENSE_POOLS = False eq_(None, provider.license_pool(identifier)) # If it's set to True, the coverage provider can autocreate # LicensePools for identifiers. provider.CAN_CREATE_LICENSE_POOLS = True pool = provider.license_pool(identifier) eq_(pool.data_source, provider.output_source) eq_(pool.identifier, identifier) def test_set_presentation_ready(self): provider = MockBibliographicCoverageProvider(self._db) identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID) test_metadata = self.BIBLIOGRAPHIC_DATA # If the work can't be found, it can't be made presentation ready. provider.CAN_CREATE_LICENSE_POOLS = False result = provider.set_presentation_ready(identifier) assert isinstance(result, CoverageFailure) eq_("No license pool available", result.exception) # Test success. ed, lp = self._edition(with_license_pool=True) result = provider.set_presentation_ready(ed.primary_identifier) eq_(result, ed.primary_identifier) def test_process_batch_sets_work_presentation_ready(self): work = self._work(with_license_pool=True, with_open_access_download=True) identifier = work.license_pools[0].identifier work.presentation_ready = False provider = MockBibliographicCoverageProvider(self._db) [result] = provider.process_batch([identifier]) eq_(result, identifier) eq_(True, work.presentation_ready) # ensure_coverage does the same thing. work.presentation_ready = False result = provider.ensure_coverage(identifier) assert isinstance(result, CoverageRecord) eq_(result.identifier, identifier) eq_(True, work.presentation_ready) def test_failure_does_not_set_work_presentation_ready(self): work = self._work(with_license_pool=True, with_open_access_download=True) identifier = work.license_pools[0].identifier work.presentation_ready = False provider = MockFailureBibliographicCoverageProvider(self._db) [result] = provider.process_batch([identifier]) assert isinstance(result, CoverageFailure) eq_(False, work.presentation_ready)
def book_info_to_metadata(cls, book, include_bibliographic=True, include_formats=True): """Turn Overdrive's JSON representation of a book into a Metadata object. Note: The json data passed into this method is from a different file/stream from the json data that goes into the book_info_to_circulation() method. """ if not 'id' in book: return None overdrive_id = book['id'] primary_identifier = IdentifierData(Identifier.OVERDRIVE_ID, overdrive_id) if include_bibliographic: title = book.get('title', None) sort_title = book.get('sortTitle') subtitle = book.get('subtitle', None) series = book.get('series', None) publisher = book.get('publisher', None) imprint = book.get('imprint', None) if 'publishDate' in book: published = datetime.datetime.strptime( book['publishDate'][:10], cls.DATE_FORMAT) else: published = None languages = [l['code'] for l in book.get('languages', [])] if 'eng' in languages or not languages: language = 'eng' else: language = sorted(languages)[0] contributors = [] for creator in book.get('creators', []): sort_name = creator['fileAs'] display_name = creator['name'] role = creator['role'] roles = cls.parse_roles(overdrive_id, role) or [Contributor.UNKNOWN_ROLE] contributor = ContributorData(sort_name=sort_name, display_name=display_name, roles=roles, biography=creator.get( 'bioText', None)) contributors.append(contributor) subjects = [] for sub in book.get('subjects', []): subject = SubjectData(type=Subject.OVERDRIVE, identifier=sub['value'], weight=100) subjects.append(subject) for sub in book.get('keywords', []): subject = SubjectData(type=Subject.TAG, identifier=sub['value'], weight=1) subjects.append(subject) extra = dict() if 'grade_levels' in book: # n.b. Grade levels are measurements of reading level, not # age appropriateness. We can use them as a measure of age # appropriateness in a pinch, but we weight them less # heavily than other information from Overdrive. for i in book['grade_levels']: subject = SubjectData(type=Subject.GRADE_LEVEL, identifier=i['value'], weight=10) subjects.append(subject) overdrive_medium = book.get('mediaType', None) if overdrive_medium and overdrive_medium not in cls.overdrive_medium_to_simplified_medium: cls.log.error("Could not process medium %s for %s", overdrive_medium, overdrive_id) medium = cls.overdrive_medium_to_simplified_medium.get( overdrive_medium, Edition.BOOK_MEDIUM) measurements = [] if 'awards' in book: extra['awards'] = book.get('awards', []) num_awards = len(extra['awards']) measurements.append( MeasurementData(Measurement.AWARDS, str(num_awards))) for name, subject_type in (('ATOS', Subject.ATOS_SCORE), ('lexileScore', Subject.LEXILE_SCORE), ('interestLevel', Subject.INTEREST_LEVEL)): if not name in book: continue identifier = str(book[name]) subjects.append( SubjectData(type=subject_type, identifier=identifier, weight=100)) for grade_level_info in book.get('gradeLevels', []): grade_level = grade_level_info.get('value') subjects.append( SubjectData(type=Subject.GRADE_LEVEL, identifier=grade_level, weight=100)) identifiers = [] links = [] for format in book.get('formats', []): for new_id in format.get('identifiers', []): t = new_id['type'] v = new_id['value'] orig_v = v type_key = None if t == 'ASIN': type_key = Identifier.ASIN elif t == 'ISBN': type_key = Identifier.ISBN if len(v) == 10: v = isbnlib.to_isbn13(v) if v is None or not isbnlib.is_isbn13(v): # Overdrive sometimes uses invalid values # like "n/a" as placeholders. Ignore such # values to avoid a situation where hundreds of # books appear to have the same ISBN. ISBNs # which fail check digit checks or are invalid # also can occur. Log them for review. cls.log.info("Bad ISBN value provided: %s", orig_v) continue elif t == 'DOI': type_key = Identifier.DOI elif t == 'UPC': type_key = Identifier.UPC elif t == 'PublisherCatalogNumber': continue if type_key and v: identifiers.append(IdentifierData(type_key, v, 1)) # Samples become links. if 'samples' in format: if not format['id'] in cls.format_data_for_overdrive_format: # Useless to us. continue content_type, drm_scheme = cls.format_data_for_overdrive_format.get( format['id']) if Representation.is_media_type(content_type): for sample_info in format['samples']: href = sample_info['url'] links.append( LinkData(rel=Hyperlink.SAMPLE, href=href, media_type=content_type)) # A cover and its thumbnail become a single LinkData. if 'images' in book: images = book['images'] image_data = cls.image_link_to_linkdata( images.get('cover'), Hyperlink.IMAGE) for name in ['cover300Wide', 'cover150Wide', 'thumbnail']: # Try to get a thumbnail that's as close as possible # to the size we use. image = images.get(name) thumbnail_data = cls.image_link_to_linkdata( image, Hyperlink.THUMBNAIL_IMAGE) if not image_data: image_data = cls.image_link_to_linkdata( image, Hyperlink.IMAGE) if thumbnail_data: break if image_data: if thumbnail_data: image_data.thumbnail = thumbnail_data links.append(image_data) # Descriptions become links. short = book.get('shortDescription') full = book.get('fullDescription') if full: links.append( LinkData( rel=Hyperlink.DESCRIPTION, content=full, media_type="text/html", )) if short and (not full or not full.startswith(short)): links.append( LinkData( rel=Hyperlink.SHORT_DESCRIPTION, content=short, media_type="text/html", )) # Add measurements: rating and popularity if book.get('starRating') is not None and book['starRating'] > 0: measurements.append( MeasurementData(quantity_measured=Measurement.RATING, value=book['starRating'])) if book.get('popularity'): measurements.append( MeasurementData(quantity_measured=Measurement.POPULARITY, value=book['popularity'])) metadata = Metadata( data_source=DataSource.OVERDRIVE, title=title, subtitle=subtitle, sort_title=sort_title, language=language, medium=medium, series=series, publisher=publisher, imprint=imprint, published=published, primary_identifier=primary_identifier, identifiers=identifiers, subjects=subjects, contributors=contributors, measurements=measurements, links=links, ) else: metadata = Metadata( data_source=DataSource.OVERDRIVE, primary_identifier=primary_identifier, ) if include_formats: formats = [] for format in book.get('formats', []): format_id = format['id'] if format_id in cls.format_data_for_overdrive_format: content_type, drm_scheme = cls.format_data_for_overdrive_format.get( format_id) formats.append(FormatData(content_type, drm_scheme)) elif format_id not in cls.ignorable_overdrive_formats: cls.log.error( "Could not process Overdrive format %s for %s", format_id, overdrive_id) # Also make a CirculationData so we can write the formats, circulationdata = CirculationData( data_source=DataSource.OVERDRIVE, primary_identifier=primary_identifier, formats=formats, ) metadata.circulation = circulationdata return metadata
def test_constructor(self): data = IdentifierData(Identifier.ISBN, "foo", 0.5) eq_(Identifier.ISBN, data.type) eq_("foo", data.identifier) eq_(0.5, data.weight)