def write_mongo(db, tree): schema.connect(db) for r in arxiv.oai_records(tree): art = transforms.arxiv_to_article(r) try: art.validate() art.save() except Exception as e: print e
def main(uri=schema.default_uri): import cherrypy as cp db = schema.connect(uri) providers = [ transit_routes(db), coordinate_shape(db), coordinate_shapes(db), departure_traces(db), RouteStatisticsProvider(db), route_graph_edges(db), available_date_range(db), ] resources = session_server.ResourceServer(providers) my_static = os.path.join(os.path.dirname(__file__), 'ui') root = session_server.StaticUnderlayServer(my_static) root.resources = resources cpconfig = { 'server.socket_host': config.server_host, 'server.socket_port': config.server_port, 'tools.gzip.on': True, 'tools.gzip.mime_types': ['text/*', 'application/json'] } cp.quickstart(root, config={'global': cpconfig})
def check_db(): global db, Movie, Line if not os.path.exists(movies_dir): print "Movie directory not found (%s). Please correct path and restart!" % movies_dir sys.exit() path = os.path.join(movies_dir, DB_NAME) db, Movie, Line = connect(path)
def test_arxiv_to_article(): N = 100 min_hz = 20 # copy N values from arxiv myxiv = mongo_conn().myxiv records = [x for x in myxiv.arxiv.find().limit(N)] testdb = schema.connect("testdb", host="127.0.0.1", port=27017) testdb.drop_collection("arxiv") testdb.drop_collection("article") testdb.arxiv.insert(records) # try to import them all as articles t0 = time.time() failed = transforms.mongo_map(lambda x: transforms.arxiv_to_article(x, True), testdb.arxiv.find()) dt = time.time() - t0 assert N / dt > min_hz, (N / dt, " rec/sec too slow, min is ", min_hz) # Check they all made it assert failed == [], "No fails" assert testdb.article.count() == testdb.arxiv.count(), ( "arxiv count ", testdb.arxiv.count(), ", got article count ", testdb.article.count(), )
def drop_pitches(): print "Creating lightweight DB copy..." db = os.path.join(movies_dir, DB_NAME) db_light_name = DB_NAME.replace('.', '_light.') db_light = os.path.join(movies_dir, db_light_name) copyfile(db, db_light) dbl, _, LineL = connect(db_light) query = LineL.update(pitch=None) query.execute() dbl.execute_sql('VACUUM')
from __future__ import division import os import peewee as pw import sys import subprocess import progressbar import schema DB_NAME = 'lines.db' movies_dir = sys.argv[1] db, Movie, Line = schema.connect(os.path.join(movies_dir, DB_NAME)) def _full_path(target): if target == None: target = '' return os.path.join(movies_dir, target) print "{:20s} | Video Sub Lines Wav None False".format( str(Movie.select().count()) + " Movies") print "---------------------------------------------+-----------------------------------" for movie in Movie.select(): video_ok = '✓' if os.path.isfile(_full_path(movie.video)) else '-' srt_ok = '✓' if os.path.isfile(_full_path(movie.subtitles)) else '-'