def test_get_metaxml(monkeypatch, mock_memcache): import StringIO import urllib2 metadata_json = None def urlopen(url): return StringIO.StringIO(metadata_json) monkeypatch.setattr(urllib2, "urlopen", urlopen) # test with correct xml metadata_json = """{ "metadata": { "title": "Foo", "identifier": "foo00bar", "collection": ["printdisabled", "inlibrary"] } } """ print ia.get_meta_xml("foo00bar") assert ia.get_meta_xml("foo00bar") == { "title": "Foo", "identifier": "foo00bar", "collection": ["printdisabled", "inlibrary"], "access-restricted": False, "_filenames": [] } # test with metadata errors metadata_json = "{}" assert ia.get_meta_xml("foo02bar") == {}
def test_get_metaxml(monkeypatch, mock_memcache): import StringIO import urllib2 metadata_json = None def urlopen(url): return StringIO.StringIO(metadata_json) monkeypatch.setattr(urllib2, "urlopen", urlopen) # test with correct xml metadata_json = """{ "metadata": { "title": "Foo", "identifier": "foo00bar", "collection": ["printdisabled", "inlibrary"] } } """ print(ia.get_meta_xml("foo00bar")) assert ia.get_meta_xml("foo00bar") == { "title": "Foo", "identifier": "foo00bar", "collection": ["printdisabled", "inlibrary"], "access-restricted": False, "_filenames": [] } # test with metadata errors metadata_json = "{}" assert ia.get_meta_xml("foo02bar") == {}
def format_book_data(book): d = web.storage() d.key = book.get('key') d.url = book.url() d.title = book.title or None d.ocaid = book.get("ocaid") def get_authors(doc): return [web.storage(key=a.key, name=a.name or None) for a in doc.get_authors()] work = book.works and book.works[0] d.authors = get_authors(work if work else book) cover = work.get_cover() if work and work.get_cover() else book.get_cover() if cover: d.cover_url = cover.url("M") elif d.ocaid: d.cover_url = 'https://archive.org/services/img/%s' % d.ocaid if d.ocaid: collections = ia.get_meta_xml(d.ocaid).get("collection", []) if 'lendinglibrary' in collections or 'inlibrary' in collections: d.borrow_url = book.url("/borrow") else: d.read_url = book.url("/borrow") return d
def format_book_data(book): d = web.storage() d.key = book.get('key') d.url = book.url() d.title = book.title or None d.ocaid = book.get("ocaid") def get_authors(doc): return [ web.storage(key=a.key, name=a.name or None) for a in doc.get_authors() ] work = book.works and book.works[0] d.authors = get_authors(work if work else book) cover = work.get_cover() if work and work.get_cover() else book.get_cover() if cover: d.cover_url = cover.url("M") elif d.ocaid: d.cover_url = 'https://archive.org/services/img/%s' % d.ocaid if d.ocaid: collections = ia.get_meta_xml(d.ocaid).get("collection", []) if 'lendinglibrary' in collections or 'inlibrary' in collections: d.borrow_url = book.url("/borrow") else: d.read_url = book.url("/borrow") return d
def format_book_data(book): d = web.storage() d.key = book.key d.url = book.url() d.title = book.title or None def get_authors(doc): return [web.storage(key=a.key, name=a.name or None) for a in doc.get_authors()] work = book.works and book.works[0] if work: d.authors = get_authors(work) else: d.authors = get_authors(book) cover = book.get_cover() if cover: d.cover_url = cover.url("M") ia_id = book.get("ocaid") if ia_id: collections = ia.get_meta_xml(ia_id).get("collection", []) if 'printdisabled' in collections or 'lendinglibrary' in collections: d.daisy_url = book.url("/daisy") if 'lendinglibrary' in collections: d.borrow_url = book.url("/borrow") elif 'inlibrary' in collections: d.inlibrary_borrow_url = book.url("/borrow") else: d.read_url = book.url("/borrow") return d
def get_ia_availability(itemid): collections = ia.get_meta_xml(itemid).get("collection", []) if 'lendinglibrary' in collections: return 'borrow' elif 'printdisabled' in collections: return 'restricted' else: return 'full'
def test_get_metaxml(monkeypatch, mock_memcache): import StringIO import urllib2 metaxml = None def urlopen(url): return StringIO.StringIO(metaxml) monkeypatch.setattr(urllib2, "urlopen", urlopen) # test with correct xml metaxml = """<?xml version="1.0" encoding="UTF-8"?> <metadata> <title>Foo</title> <identifier>foo00bar</identifier> <collection>printdisabled</collection> <collection>inlibrary</collection> </metadata> """ assert ia.get_meta_xml("foo00bar") == { "title": "Foo", "identifier": "foo00bar", "collection": ["printdisabled", "inlibrary"], 'external-identifier': [], } # test with html errors metaxml = """<html>\n<head>\n <title>Internet Archive: Error</title>...""" assert ia.get_meta_xml("foo01bar") == {} # test with bad xml metaxml = """<?xml version="1.0" encoding="UTF-8"?> <metadata> <title>Foo</title> <identifier>foo00bar """ assert ia.get_meta_xml("foo02bar") == {}
def get_ia_meta_fields(self): # Check for cached value # $$$ we haven't assigned _ia_meta_fields the first time around but there's apparently # some magic that lets us check this way (and breaks using hasattr to check if defined) if self._ia_meta_fields: return self._ia_meta_fields if not self.get('ocaid', None): meta = {} else: meta = ia.get_meta_xml(self.ocaid) meta.setdefault("external-identifier", []) meta.setdefault("collection", []) self._ia_meta_fields = meta return self._ia_meta_fields
def format_book_data(book): d = web.storage() d.key = book.key d.url = book.url() d.title = book.title or None def get_authors(doc): return [ web.storage(key=a.key, name=a.name or None) for a in doc.get_authors() ] work = book.works and book.works[0] if work: d.authors = get_authors(work) else: d.authors = get_authors(book) cover = book.get_cover() if cover: d.cover_url = cover.url("M") overdrive = book.get("identifiers", {}).get('overdrive') if overdrive: d.overdrive_url = "http://search.overdrive.com/SearchResults.aspx?ReserveID={%s}" % overdrive ia_id = book.get("ocaid") if ia_id: collections = ia.get_meta_xml(ia_id).get("collection", []) if 'printdisabled' in collections or 'lendinglibrary' in collections: d.daisy_url = book.url("/daisy") if 'lendinglibrary' in collections: d.borrow_url = book.url("/borrow") elif 'inlibrary' in collections: d.inlibrary_borrow_url = book.url("/borrow") else: d.read_url = book.url("/borrow") return d
def format_edition(edition): """This should be moved to a books or carousel model""" collections = ia.get_meta_xml(edition.get('ocaid')).get("collection", []) book = { 'ocaid': edition.get('ocaid'), 'title': edition.title or None, 'key': edition.key, 'url': edition.url(), 'authors': [ web.storage(key=a.key, name=a.name or None) for a in edition.get_authors() ], 'collections': collections, 'protected': any([ c in collections for c in ['lendinglibrary', 'browserlending', 'inlibrary'] ]) } cover = edition.get_cover() if cover: book['cover_url'] = cover.url(u'M') if 'printdisabled' in collections or 'lendinglibrary' in collections: book['daisy_url'] = edition.url("/daisy") if 'lendinglibrary' in collections: book['borrow_url'] = edition.url("/borrow") elif 'inlibrary' in collections: book['inlibrary_borrow_url'] = edition.url("/borrow") else: book['read_url'] = "//archive.org/stream/" + book['ocaid'] return book
def format_book_data(book): d = web.storage() d.key = book.key d.url = book.url() d.title = book.title or None def get_authors(doc): return [web.storage(key=a.key, name=a.name or None) for a in doc.get_authors()] work = book.works and book.works[0] if work: d.authors = get_authors(work) else: d.authors = get_authors(book) cover = book.get_cover() if cover: d.cover_url = cover.url("M") overdrive = book.get("identifiers", {}).get('overdrive') if overdrive: d.overdrive_url = "http://search.overdrive.com/SearchResults.aspx?ReserveID={%s}" % overdrive ia_id = book.get("ocaid") if ia_id: collections = ia.get_meta_xml(ia_id).get("collection", []) if 'printdisabled' in collections or 'lendinglibrary' in collections: d.daisy_url = book.url("/daisy") if 'lendinglibrary' in collections: d.borrow_url = book.url("/borrow") elif 'inlibrary' in collections: d.inlibrary_borrow_url = book.url("/borrow") else: d.read_url = book.url("/borrow") return d
def format_book_data(book): d = web.storage() d.key = book.key d.url = book.url() d.title = book.title or None def get_authors(doc): return [ web.storage(key=a.key, name=a.name or None) for a in doc.get_authors() ] work = book.works and book.works[0] if work: d.authors = get_authors(work) else: d.authors = get_authors(book) cover = book.get_cover() if cover: d.cover_url = cover.url("M") ia_id = book.get("ocaid") if ia_id: d.ocaid = ia_id collections = ia.get_meta_xml(ia_id).get("collection", []) if 'printdisabled' in collections or 'lendinglibrary' in collections: d.daisy_url = book.url("/daisy") if 'lendinglibrary' in collections: d.borrow_url = book.url("/borrow") elif 'inlibrary' in collections: d.inlibrary_borrow_url = book.url("/borrow") else: d.read_url = book.url("/borrow") return d
def process(self, req): requests = req.split('|') bib_keys = sum([r.split(';') for r in requests], []) # filter out 'id:foo' before passing to dynlinks bib_keys = [k for k in bib_keys if k[:3].lower() != 'id:'] self.docs = dynlinks.query_docs(bib_keys) if not self.options.get('no_details'): self.detailss = dynlinks.process_result_for_details(self.docs) else: self.detailss = {} dp = dynlinks.DataProcessor() self.datas = dp.process(self.docs) self.works = dp.works # XXX control costs below with [:iaid_limit] - note that this may result # in no 'exact' item match, even if one exists # Note that it's available thru above works/docs iaid_limit = 500 self.wkey_to_iaids = dict((wkey, get_work_iaids(wkey)[:iaid_limit]) for wkey in self.works) iaids = sum(self.wkey_to_iaids.values(), []) self.iaid_to_meta = dict((iaid, ia.get_meta_xml(iaid)) for iaid in iaids) def lookup_iaids(iaids): step = 10 if len(iaids) > step and not self.options.get('debug_things'): result = [] while iaids: result += lookup_iaids(iaids[:step]) iaids = iaids[step:] return result query = { 'type': '/type/edition', 'ocaid': iaids, } result = web.ctx.site.things(query) return result ekeys = lookup_iaids(iaids) # If returned order were reliable, I could skip the below. eds = dynlinks.ol_get_many_as_dict(ekeys) self.iaid_to_ed = dict((ed['ocaid'], ed) for ed in eds.values()) # self.iaid_to_ekey = dict((iaid, ed['key']) # for iaid, ed in self.iaid_to_ed.items()) # Work towards building a dict of iaid loanability, # def has_lending_collection(meta): # collections = meta.get("collection", []) # return 'lendinglibrary' in collections or 'inlibrary' in collections # in case site.store supports get_many (unclear) # maybe_loanable_iaids = [iaid for iaid in iaids # if has_lending_collection(self.iaid_to_meta.get(iaid, {}))] # loanable_ekeys = [self.iaid_to_ekey.get(iaid) for iaid in maybe_loanable_iaids] # loanstatus = web.ctx.site.store.get('ebooks' + ekey, {'borrowed': 'false'}) result = {} for r in requests: bib_keys = r.split(';') if r.lower().startswith('id:'): result_key = bib_keys.pop(0)[3:] else: result_key = r sub_result = self.make_record(bib_keys) if sub_result: result[result_key] = sub_result if self.options.get('debug_items'): result['ekeys'] = ekeys result['eds'] = eds result['iaids'] = iaids return result
def get_readable_edition_item(edition, work, user_inlibrary, initial_edition): ocaid = edition.get('ocaid', False) if not ocaid: return None subjects = work.get_subjects() if not subjects: return None metaxml = ia.get_meta_xml(ocaid) collections = metaxml.get("collection", []) status = '' if 'lendinglibrary' in collections: if not 'Lending library' in subjects: return None status = 'lendable' elif 'inlibrary' in collections: if not 'In library' in subjects: return None if not user_inlibrary: return None status = 'lendable' elif 'printdisabled' in collections: status = 'restricted' return None else: status = 'full access' if status == 'full access': itemURL = "http://www.archive.org/stream/%s" % (ocaid) else: itemURL = u"http://openlibrary.org%s/%s/borrow" % ( edition['key'], helpers.urlsafe(edition.get("title", "untitled"))) if status == 'lendable': loanstatus = web.ctx.site.store.get('ebooks' + edition['key'], {'borrowed': 'false'}) if loanstatus['borrowed'] == 'true': status = 'checked out' if edition['key'] == initial_edition['key']: match = 'exact' else: match = 'similar' result = { 'enumcron': False, # 'orig': 'University of California' # 'htid': '' # 'lastUpdate: "" # XXX from edition.last_modified (datetime) 'match': match, 'status': status, 'fromRecord': initial_edition['key'], 'ol-edition-id': key_to_olid(edition['key']), 'ol-work-id': key_to_olid(work['key']), 'contributor': 'contributor', 'itemURL': itemURL, } if edition.get('covers'): cover_id = edition['covers'][0] # XXX covers url from yaml? result['cover'] = { "small": "http://covers.openlibrary.org/b/id/%s-S.jpg" % cover_id, "medium": "http://covers.openlibrary.org/b/id/%s-M.jpg" % cover_id, "large": "http://covers.openlibrary.org/b/id/%s-L.jpg" % cover_id, } return result
def get_readable_edition_item(edition, work, user_inlibrary, initial_edition): ocaid = edition.get('ocaid', False) if not ocaid: return None subjects = work.get_subjects() if not subjects: return None metaxml = ia.get_meta_xml(ocaid) collections = metaxml.get("collection", []) status = '' if 'lendinglibrary' in collections: if not 'Lending library' in subjects: return None status = 'lendable' elif 'inlibrary' in collections: if not 'In library' in subjects: return None if not user_inlibrary: return None status = 'lendable' elif 'printdisabled' in collections: status = 'restricted' return None else: status = 'full access' if status == 'full access': itemURL = "http://www.archive.org/stream/%s" % (ocaid) else: itemURL = u"http://openlibrary.org%s/%s/borrow" % (edition['key'], helpers.urlsafe(edition.get("title", "untitled"))) if status == 'lendable': loanstatus = web.ctx.site.store.get('ebooks' + edition['key'], {'borrowed': 'false'}) if loanstatus['borrowed'] == 'true': status = 'checked out' if edition['key'] == initial_edition['key']: match = 'exact' else: match = 'similar' result = { 'enumcron': False, # 'orig': 'University of California' # 'htid': '' # 'lastUpdate: "" # XXX from edition.last_modified (datetime) 'match': match, 'status': status, 'fromRecord': initial_edition['key'], 'ol-edition-id': key_to_olid(edition['key']), 'ol-work-id': key_to_olid(work['key']), 'contributor': 'contributor', 'itemURL': itemURL, } if edition.get('covers'): cover_id = edition['covers'][0] # XXX covers url from yaml? result['cover'] = { "small": "http://covers.openlibrary.org/b/id/%s-S.jpg" % cover_id, "medium": "http://covers.openlibrary.org/b/id/%s-M.jpg" % cover_id, "large": "http://covers.openlibrary.org/b/id/%s-L.jpg" % cover_id, } return result