def get(self, page_key=None): "modify a specific page" if not page_key: return self.get() page = scan_data.read_page(page_key, db) doc = page.document() doc_name = doc.name filename = page.filename doc_key = doc.key() # where are we in the document? loc = filter(lambda x: page.key() == x[1].key(), enumerate(doc.pages())) index = loc[0][0] + 1 n_pages = len(doc.pages()) prev_link = "" next_link = "" if index > 1: prev_link = "<a href=\"/page/%s\"><<</a>" % doc.pages()[ index - 2].key() if index != n_pages: next_link = "<a href=\"/page/%s\">>></a>" % doc.pages( )[index].key() # build a page sized image if necessary pagesized_path = make_pagesized_thumbnail(page.key(), filename) controls = """ <a href="/document/%(doc_key)s">Back to Document</a><br/> <a href="/%(filename)s">Full resolution image</a><br/> %(prev_link)s         %(next_link)s<br/> """ % locals() self.set_header("Content-Type", "text/html") self.write(""" <html><head><title>%(doc_name)s page %(index)d of %(n_pages)d</title></head> <body> <h1>%(doc_name)s</h1> <h2>Page %(index)d of %(n_pages)d</h2> %(controls)s <hr/> <img src="/%(pagesized_path)s" /> <hr/> %(controls)s </body> </html>""" % locals())
def get(self, page_key=None): "modify a specific page" if not page_key: return self.get() page = scan_data.read_page(page_key, db) doc = page.document() doc_name = doc.name filename = page.filename doc_key = doc.key() # where are we in the document? loc = filter(lambda x: page.key() == x[1].key(), enumerate(doc.pages())) index = loc[0][0] + 1 n_pages = len(doc.pages()) prev_link = "" next_link = "" if index > 1: prev_link = "<a href=\"/page/%s\"><<</a>" % doc.pages()[index-2].key() if index != n_pages: next_link = "<a href=\"/page/%s\">>></a>" % doc.pages()[index].key() # build a page sized image if necessary pagesized_path = make_pagesized_thumbnail(page.key(), filename) controls = """ <a href="/document/%(doc_key)s">Back to Document</a><br/> <a href="/%(filename)s">Full resolution image</a><br/> %(prev_link)s         %(next_link)s<br/> """ % locals() self.set_header("Content-Type", "text/html") self.write(""" <html><head><title>%(doc_name)s page %(index)d of %(n_pages)d</title></head> <body> <h1>%(doc_name)s</h1> <h2>Page %(index)d of %(n_pages)d</h2> %(controls)s <hr/> <img src="/%(pagesized_path)s" /> <hr/> %(controls)s </body> </html>""" % locals())
def get(self): "retrieve all pages" pages = [] for key in db.keys(): if key.startswith("page-"): pages.append(scan_data.read_page(key, db)) pages_html = [] for page in pages: pages_html.append("<img src=\"%s\" width=400 />" % page.filename) pages_html = "\n".join(pages_html) self.set_header("Content-Type", "text/html") self.write(""" <html><head><title>All pages</title></head> <body> <h1>All Pages</h1> %(pages_html)s </body> </html>""" % locals())
def main(args): db = dbm.open("scan_data", "r") print "loaded database" mdb = pymongo.Connection() print "connected to mongo" docs = scan_data.get_documents(db) print "loaded %d documents" % len(docs) mdb_docs = mdb.scanserver.documents # go through each document and build a set of pagekeys pagekeys = set() for doc in docs: docstr = scan_data.doc2json(doc) pagekeys.update([page.key() for page in doc.pages()]) mdb_docs.insert(docstr) print "found %d pages" % len(pagekeys) mdb_pages = mdb.scanserver.pages for pk in pagekeys: page = scan_data.read_page(pk, db) pagestr = scan_data.page2json(page) mdb_pages.insert(pagestr) dirpks = set() for (base, dirs, files) in os.walk("static", topdown=True): # don't recurse any further del dirs[0:-1] for fname in files: pk = os.path.splitext(fname)[0] if pk.startswith("page-"): dirpks.add(pk) notindb = dirpks - pagekeys print "found %d page keys on disk that aren't in the database" % len( notindb)
def main(args): db = dbm.open("scan_data", "r") print "loaded database" mdb = pymongo.Connection() print "connected to mongo" docs = scan_data.get_documents(db) print "loaded %d documents" % len(docs) mdb_docs = mdb.scanserver.documents # go through each document and build a set of pagekeys pagekeys = set() for doc in docs: docstr = scan_data.doc2json(doc) pagekeys.update( [ page.key() for page in doc.pages() ] ) mdb_docs.insert(docstr) print "found %d pages" % len(pagekeys) mdb_pages = mdb.scanserver.pages for pk in pagekeys: page = scan_data.read_page(pk, db) pagestr = scan_data.page2json(page) mdb_pages.insert(pagestr) dirpks = set() for (base, dirs, files) in os.walk("static", topdown=True): # don't recurse any further del dirs[0:-1] for fname in files: pk = os.path.splitext(fname)[0] if pk.startswith("page-"): dirpks.add(pk) notindb = dirpks - pagekeys print "found %d page keys on disk that aren't in the database" % len(notindb)