def load_date_multi(conn, date): """ Load info for a date using multiple threads """ print "Loading info for " + str(date) scoreboard_url = fetch.make_scoreboard_url(date) if fetch.validate_url(scoreboard_url): data = retrieve_and_dump_data(conn, scoreboard_url) # look for games game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"]) if game_header: game_ids = map((lambda r: r[2]), game_header[0]["rowSet"]) if not game_ids: print "No Games" else: print "Found " + str(len(game_ids)) + " games." urls = [] for game_id in game_ids: urls += fetch.make_urls_for_game_id(game_id) pool = workerpool.WorkerPool(size=10) jobs = [] for url in urls: # Enqueue the ETL jobs job = ETLJob(url, conn) pool.put(job) jobs.append(job) pool.shutdown() pool.wait()
def load_date(conn, date): """ Retrieve and load all info for a given date """ print "Loading info for " + str(date) scoreboard_url = fetch.make_scoreboard_url(date) if fetch.validate_url(scoreboard_url): data = retrieve_and_dump_data(conn, scoreboard_url) # look for games game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"]) if game_header: game_ids = map((lambda r: r[2]), game_header[0]["rowSet"]) if not game_ids: print "No Games" else: print "Found " + str(len(game_ids)) + " games." for game_id in game_ids: game_urls = fetch.make_urls_for_game_id(game_id) for url in game_urls: #print url retrieve_and_dump_data(conn, url)
def load_date_multi(conn, date): """ Load info for a date using multiple threads """ cursor = conn.cursor() print "Loading info for " + str(date) scoreboard_url = fetch.make_scoreboard_url(date) if fetch.validate_url(scoreboard_url): data = retrieve_and_dump_data(conn, scoreboard_url) # look for games game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"]) if game_header: game_ids = map((lambda r: r[2]), game_header[0]["rowSet"]) if not game_ids: print "No Games" else: print "Found " + str(len(game_ids)) + " games." urls = [] for game_id in game_ids: urls += fetch.make_urls_for_game_id(game_id) pool = workerpool.WorkerPool(size=10) jobs = [] for url in urls: # Enqueue the ETL jobs job = ETLJob(url) pool.put(job) jobs.append(job) pool.shutdown() pool.wait() #print jobs[0].data da = aggregator.DataAggregator() for job in jobs: da.add(job.data) sql = da.makeSQL() for s in sql: #print s cursor.execute(s) conn.commit()
if __name__ == "__main__": # Argument parser setup parser = argparse.ArgumentParser(description='Generate ebook files from web sources.') parser.add_argument('url', metavar='URL', nargs=1, help='url of novel (only supports lightnovel.cn urls for now), support short-hand such as ln:<id> for lightnovel.cn') parser.add_argument('output', metavar='OUTPUT', nargs=1, help='output file name') parser.add_argument('--clear-cache', action='store_true') parser.add_argument('-u', '--user', help='user account for the resource') parser.add_argument('-k', '--kindlegen', help='path to kindlegen executable (for MOBI only)') args = parser.parse_args() # Get the approriate handler for the resource url handlers, url = validate_url(args.url[0]) res_handler, login_handler = handlers # User authentication username = args.user if username: login = login_handler(username) logged_in = False while not logged_in: logged_in = login.login() # Parsing and output resource = res_handler(url) out_fn = args.output[0] out_cls = output_classes.get(out_fn.split('.')[-1], output_classes['epub']) resource.output_book(out_cls, out_fn, args)