def test_book_info_with_sample(self): raw, info = self.sample_json("has_sample.json") metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) [sample] = [x for x in metadata.links if x.rel == Hyperlink.SAMPLE] eq_( "http://excerpts.contentreserve.com/FormatType-410/1071-1/9BD/24F/82/BridesofConvenienceBundle9781426803697.epub", sample.href)
def process_identifier(self, identifier): # What is the correct medium? correct_medium = None lp = identifier.licensed_through for lpdm in lp.delivery_mechanisms: correct_medium = lpdm.delivery_mechanism.implicit_medium if correct_medium: break if not correct_medium and identifier.type == Identifier.OVERDRIVE_ID: content = self.overdrive.metadata_lookup(identifier) metadata = OverdriveRepresentationExtractor.book_info_to_metadata( content) correct_medium = metadata.medium if not correct_medium and identifier.type == Identifier.THREEM_ID: metadata = self.threem.bibliographic_lookup(identifier) correct_medium = metadata.medium if not correct_medium: set_trace() if lp.edition.medium != correct_medium: print "%s is actually %s, not %s" % ( lp.edition.title, correct_medium, lp.edition.medium) lp.edition.medium = correct_medium or Edition.BOOK_MEDIUM
def test_availability_info(self): data, raw = self.sample_json("overdrive_book_list.json") availability = OverdriveRepresentationExtractor.availability_link_list( raw) for item in availability: for key in 'availability_link', 'id', 'title': assert key in item
def test_availability_info_missing_data(self): data, raw = self.sample_json("overdrive_book_list_missing_data.json") [item] = OverdriveRepresentationExtractor.availability_link_list(raw) # We got a data structure for the item that has an ID but no title. # We did not get a data structure for the item that has a title # but no ID. eq_('title is missing', item['id']) eq_(None, item['title'])
def test_book_info_with_awards(self): raw, info = self.sample_json("has_awards.json") metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) [awards] = [ x for x in metadata.measurements if Measurement.AWARDS == x.quantity_measured ] eq_(1, awards.value) eq_(1, awards.weight)
def test_book_info_with_grade_levels(self): raw, info = self.sample_json("has_grade_levels.json") metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) grade_levels = sorted([ x.identifier for x in metadata.subjects if x.type == Subject.GRADE_LEVEL ]) eq_([u'Grade 4', u'Grade 5', u'Grade 6', u'Grade 7', u'Grade 8'], grade_levels)
def test_book_info_with_circulationdata(self): # Tests that can convert an overdrive json block into a CirculationData object. raw, info = self.sample_json("overdrive_availability_information.json") circulationdata = OverdriveRepresentationExtractor.book_info_to_circulation( info) # Related IDs. identifier = circulationdata.primary_identifier(self._db) eq_((Identifier.OVERDRIVE_ID, '2a005d55-a417-4053-b90d-7a38ca6d2065'), (identifier.type, identifier.identifier))
def process_identifier(self, identifier): content = self.api.metadata_lookup(identifier) metadata = OverdriveRepresentationExtractor.book_info_to_metadata(content) if not metadata: return license_pool = identifier.licensed_through for format in metadata.formats: print "%s: %s - %s" % (identifier.identifier, format.content_type, format.drm_scheme) mech = license_pool.set_delivery_mechanism( format.content_type, format.drm_scheme, format.link )
def process_identifier(self, identifier): content = self.api.metadata_lookup(identifier) metadata = OverdriveRepresentationExtractor.book_info_to_metadata( content) if not metadata: return license_pool = identifier.licensed_through for format in metadata.formats: print "%s: %s - %s" % (identifier.identifier, format.content_type, format.drm_scheme) mech = license_pool.set_delivery_mechanism(format.content_type, format.drm_scheme, format.link)
def process_identifier(self, identifier): # What is the correct medium? correct_medium = None lp = identifier.licensed_through for lpdm in lp.delivery_mechanisms: correct_medium = lpdm.delivery_mechanism.implicit_medium if correct_medium: break if not correct_medium and identifier.type==Identifier.OVERDRIVE_ID: content = self.overdrive.metadata_lookup(identifier) metadata = OverdriveRepresentationExtractor.book_info_to_metadata(content) correct_medium = metadata.medium if not correct_medium and identifier.type==Identifier.THREEM_ID: metadata = self.threem.bibliographic_lookup(identifier) correct_medium = metadata.medium if not correct_medium: set_trace() if lp.edition.medium != correct_medium: print "%s is actually %s, not %s" % (lp.edition.title, correct_medium, lp.edition.medium) lp.edition.medium = correct_medium or Edition.BOOK_MEDIUM
def test_book_info_with_metadata(self): # Tests that can convert an overdrive json block into a Metadata object. raw, info = self.sample_json("overdrive_metadata.json") metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) eq_("Agile Documentation", metadata.title) eq_( "Agile Documentation A Pattern Guide to Producing Lightweight Documents for Software Projects", metadata.sort_title) eq_( "A Pattern Guide to Producing Lightweight Documents for Software Projects", metadata.subtitle) eq_(Edition.BOOK_MEDIUM, metadata.medium) eq_("Wiley Software Patterns", metadata.series) eq_("eng", metadata.language) eq_("Wiley", metadata.publisher) eq_("John Wiley & Sons, Inc.", metadata.imprint) eq_(2005, metadata.published.year) eq_(1, metadata.published.month) eq_(31, metadata.published.day) [author] = metadata.contributors eq_(u"Rüping, Andreas", author.sort_name) eq_("Andreas Rüping", author.display_name) eq_([Contributor.AUTHOR_ROLE], author.roles) subjects = sorted(metadata.subjects, key=lambda x: x.identifier) eq_([ ("Computer Technology", Subject.OVERDRIVE, 100), ("Nonfiction", Subject.OVERDRIVE, 100), ('Object Technologies - Miscellaneous', 'tag', 1), ], [(x.identifier, x.type, x.weight) for x in subjects]) # Related IDs. eq_((Identifier.OVERDRIVE_ID, '3896665d-9d81-4cac-bd43-ffc5066de1f5'), (metadata.primary_identifier.type, metadata.primary_identifier.identifier)) ids = [(x.type, x.identifier) for x in metadata.identifiers] # The original data contains a blank ASIN in addition to the # actual ASIN, but it doesn't show up here. eq_([ (Identifier.ASIN, "B000VI88N2"), (Identifier.ISBN, "9780470856246"), (Identifier.OVERDRIVE_ID, '3896665d-9d81-4cac-bd43-ffc5066de1f5'), ], sorted(ids)) # Available formats. [kindle, pdf] = sorted(metadata.circulation.formats, key=lambda x: x.content_type) eq_(DeliveryMechanism.KINDLE_CONTENT_TYPE, kindle.content_type) eq_(DeliveryMechanism.KINDLE_DRM, kindle.drm_scheme) eq_(Representation.PDF_MEDIA_TYPE, pdf.content_type) eq_(DeliveryMechanism.ADOBE_DRM, pdf.drm_scheme) # Links to various resources. shortd, image, longd = sorted(metadata.links, key=lambda x: x.rel) eq_(Hyperlink.DESCRIPTION, longd.rel) assert longd.content.startswith("<p>Software documentation") eq_(Hyperlink.SHORT_DESCRIPTION, shortd.rel) assert shortd.content.startswith("<p>Software documentation") assert len(shortd.content) < len(longd.content) eq_(Hyperlink.IMAGE, image.rel) eq_( 'http://images.contentreserve.com/ImageType-100/0128-1/%7B3896665D-9D81-4CAC-BD43-FFC5066DE1F5%7DImg100.jpg', image.href) thumbnail = image.thumbnail eq_(Hyperlink.THUMBNAIL_IMAGE, thumbnail.rel) eq_( 'http://images.contentreserve.com/ImageType-200/0128-1/%7B3896665D-9D81-4CAC-BD43-FFC5066DE1F5%7DImg200.jpg', thumbnail.href) # Measurements associated with the book. measurements = metadata.measurements popularity = [ x for x in measurements if x.quantity_measured == Measurement.POPULARITY ][0] eq_(2, popularity.value) rating = [ x for x in measurements if x.quantity_measured == Measurement.RATING ][0] eq_(1, rating.value) # Request only the bibliographic information. metadata = OverdriveRepresentationExtractor.book_info_to_metadata( info, include_bibliographic=True, include_formats=False) eq_("Agile Documentation", metadata.title) eq_(None, metadata.circulation) # Request only the format information. metadata = OverdriveRepresentationExtractor.book_info_to_metadata( info, include_bibliographic=False, include_formats=True) eq_(None, metadata.title) [kindle, pdf] = sorted(metadata.circulation.formats, key=lambda x: x.content_type) eq_(DeliveryMechanism.KINDLE_CONTENT_TYPE, kindle.content_type) eq_(DeliveryMechanism.KINDLE_DRM, kindle.drm_scheme) eq_(Representation.PDF_MEDIA_TYPE, pdf.content_type) eq_(DeliveryMechanism.ADOBE_DRM, pdf.drm_scheme)
def test_link(self): data, raw = self.sample_json("overdrive_book_list.json") expect = OverdriveAPI.make_link_safe( "http://api.overdrive.com/v1/collections/collection-id/products?limit=300&offset=0&lastupdatetime=2014-04-28%2009:25:09&sort=popularity:desc&formats=ebook-epub-open,ebook-epub-adobe,ebook-pdf-adobe,ebook-pdf-open" ) eq_(expect, OverdriveRepresentationExtractor.link(raw, "first"))
from threem import ThreeMAPI from core.opds_import import SimplifiedOPDSLookup lookup = SimplifiedOPDSLookup("http://metadata.alpha.librarysimplified.org/") _db = production_session() overdrive = OverdriveAPI(_db) threem = ThreeMAPI(_db) q = _db.query(Edition).join(Edition.data_source).filter( DataSource.name.in_([DataSource.OVERDRIVE])).filter(Edition.author == '') print "Fixing %s books." % q.count() for edition in q: if edition.data_source.name == DataSource.OVERDRIVE: data = overdrive.metadata_lookup(edition.primary_identifier) metadata = OverdriveRepresentationExtractor.book_info_to_metadata(data) else: metadata = threem.bibliographic_lookup(edition.primary_identifier) metadata.update_contributions(_db, edition, metadata_client=lookup, replace_contributions=True) if edition.work: edition.work.calculate_presentation() else: edition.calculate_presentation() for c in edition.contributions: print "%s = %s (%s)" % (c.role, c.contributor.display_name, c.contributor.name) print edition.author, edition.sort_author
OverdriveRepresentationExtractor ) from threem import ThreeMAPI from core.opds_import import SimplifiedOPDSLookup lookup = SimplifiedOPDSLookup("http://metadata.alpha.librarysimplified.org/") _db = production_session() overdrive = OverdriveAPI(_db) threem = ThreeMAPI(_db) q = _db.query(Edition).join(Edition.data_source).filter(DataSource.name.in_([DataSource.OVERDRIVE])).filter(Edition.author=='') print "Fixing %s books." % q.count() for edition in q: if edition.data_source.name==DataSource.OVERDRIVE: data = overdrive.metadata_lookup(edition.primary_identifier) metadata = OverdriveRepresentationExtractor.book_info_to_metadata(data) else: metadata = threem.bibliographic_lookup(edition.primary_identifier) metadata.update_contributions(_db, edition, metadata_client=lookup, replace_contributions=True) if edition.work: edition.work.calculate_presentation() else: edition.calculate_presentation() for c in edition.contributions: print "%s = %s (%s)" % ( c.role, c.contributor.display_name, c.contributor.name ) print edition.author, edition.sort_author _db.commit()