def save_many(self, docs, comment=None): def write(path, text): dir = os.path.dirname(path) if not os.path.exists(dir): os.makedirs(dir) if isinstance(text, dict): text = text['value'] try: print "writing", path f = open(path, "w") f.write(text) f.close() except IOError: print "failed", path for doc in marshal(docs): path = os.path.join(self.root, doc['key'][1:]) if doc['type']['key'] == '/type/template': path = path.replace(".tmpl", ".html") write(path, doc['body']) elif doc['type']['key'] == '/type/macro': path = path + ".html" write(path, doc['macro'])
def save_many(self, docs, comment=None): """ :param typing.List[dict or web.storage] docs: :param str or None comment: only here to match the signature of OpenLibrary api """ def write(path, text): dir = os.path.dirname(path) if not os.path.exists(dir): os.makedirs(dir) if isinstance(text, dict): text = text['value'] try: print("writing", path) f = open(path, "w") f.write(text) f.close() except OSError: print("failed", path) for doc in marshal(docs): path = os.path.join(self.root, doc['key'][1:]) if doc['type']['key'] == '/type/template': path = path.replace(".tmpl", ".html") write(path, doc['body']) elif doc['type']['key'] == '/type/macro': path = path + ".html" write(path, doc['macro']) else: path = path + ".json" write(path, json.dumps(doc, indent=2))
def get_many(keys): docs = marshal(src.get_many(keys).values()) # work records may contain excerpts, which reference the author of the excerpt. # Deleting them to prevent loading the users. for doc in docs: doc.pop('excerpts', None) # Authors are now with works. We don't need authors at editions. if doc['type']['key'] == '/type/edition': doc.pop('authors', None) return docs
def ol_save2(self, key, record, message): if message != None: record = marshal(record) if message in self.savebuffer.keys(): self.savebuffer[message][key] = record if len(self.savebuffer[message]) >= 100: self.flush(message) else: self.savebuffer[message] = {} self.savebuffer[message][key] = record self.flog(key, "buffer save", message) else: raise Exception("Message for saving is missing!")
def fetch(keys): docs = [] for k in keys: if k in cache: docs.append(cache[k]) keys = [k for k in keys if k not in cache] if keys: print "fetching", keys docs2 = marshal(src.get_many(keys).values()) cache.update((doc['key'], doc) for doc in docs2) docs.extend(docs2) return docs
def main(): global options options, args = parse_options() ol = OpenLibrary(options.server) for pattern in args: docs = ol.query({"key~": pattern, "*": None}, limit=1000) for doc in marshal(docs): if doc['type']['key'] == '/type/template': write(make_path(doc), get_value(doc, 'body')) elif doc['type']['key'] == '/type/template': write(make_path(doc), get_value(doc, 'macro')) else: delete(make_path(doc))
def fix_toc(doc): doc = marshal(doc) def f(d): """function to fix one toc entry.""" if d.get('type') == '/type/text': return dict(title=d['value']) else: return d toc = doc.get('table_of_contents') if toc: if type(toc) == dict: doc['table_of_contents'] = [f(x) for x in toc] return doc
def get_many(keys): docs = marshal(src.get_many(keys).values()) # work records may contain excepts, which reference the author of the excerpt. # Deleting them to prevent loading the users. # Deleting the covers and photos also because they don't show up in the dev instance. for doc in docs: doc.pop('excerpts', None) #doc.pop('covers', None) #doc.pop('photos', None) # Authors are now with works. We don't need authors at editions. if doc['type']['key'] == '/type/edition': doc.pop('authors', None) return docs
def main(): global options options, args = parse_options() ol = OpenLibrary(options.server) for pattern in args: docs = ol.query({"key~": pattern, "*": None}, limit=1000) for doc in marshal(docs): # Anand: special care to ignore bad documents in the database. if "--duplicate" in doc['key']: continue if doc['type']['key'] == '/type/template': write(make_path(doc), get_value(doc, 'body')) elif doc['type']['key'] == '/type/macro': write(make_path(doc), get_value(doc, 'macro')) else: delete(make_path(doc))
def query(**q): print("query", q) return [x['key'] for x in marshal(src.query(q))]
def get(key): print("get", key) return marshal(src.get(list_key))
def add_source_records(key, ia, v=None): new = 'ia:' + ia sr = None e = ol.get(key, v=v) need_update = False if 'ocaid' not in e: need_update = True e['ocaid'] = ia if 'source_records' in e: if new in e['source_records'] and not need_update: return e['source_records'].append(new) else: existing = get_mc(key) amazon = 'amazon:' if existing is None: sr = [] elif existing.startswith('ia:'): sr = [existing] elif existing.startswith(amazon): sr = amazon_source_records(existing[len(amazon):]) or [existing] else: m = re_meta_mrc.match(existing) sr = ['marc:' + existing if not m else 'ia:' + m.group(1)] if 'ocaid' in e and 'ia:' + e['ocaid'] not in sr: sr.append('ia:' + e['ocaid']) if new not in sr: e['source_records'] = sr + [new] # fix other bits of the record as well new_toc = fix_toc(e) if new_toc: e['table_of_contents'] = new_toc if e.get('subjects', None) and any(has_dot(s) for s in e['subjects']): subjects = [s[:-1] if has_dot(s) else s for s in e['subjects']] e['subjects'] = subjects if 'authors' in e: assert not any(a=='None' for a in e['authors']) print e['authors'] authors = [ol.get(akey) for akey in e['authors']] authors = [ol.get(a['location']) if a['type'] == '/type/redirect' else a \ for a in authors] e['authors'] = [{'key': a['key']} for a in authors] undelete_authors(authors) print 'saving', key print marshal(e) print ol.save(key, e, 'found a matching MARC record') # for attempt in range(50): # try: # print ol.save(key, e, 'found a matching MARC record') # break # except KeyboardInterrupt: # raise # except URLError: # if attempt == 49: # raise # except: # print e # raise # print 'attempt %d failed' % attempt # sleep(30) if new_toc: new_edition = ol.get(key) # [{u'type': <ref: u'/type/toc_item'>}, ...] assert 'title' in new_edition['table_of_contents'][0] add_cover_image(key, ia)
def query(**q): print "query", q return [x['key'] for x in marshal(src.query(q))]
def get(key): print "get", key return marshal(src.get(list_key))