def load_date_multi(conn, date): """ Load info for a date using multiple threads """ print "Loading info for " + str(date) scoreboard_url = fetch.make_scoreboard_url(date) if fetch.validate_url(scoreboard_url): data = retrieve_and_dump_data(conn, scoreboard_url) # look for games game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"]) if game_header: game_ids = map((lambda r: r[2]), game_header[0]["rowSet"]) if not game_ids: print "No Games" else: print "Found " + str(len(game_ids)) + " games." urls = [] for game_id in game_ids: urls += fetch.make_urls_for_game_id(game_id) pool = workerpool.WorkerPool(size=10) jobs = [] for url in urls: # Enqueue the ETL jobs job = ETLJob(url, conn) pool.put(job) jobs.append(job) pool.shutdown() pool.wait()
def load_date(conn, date): """ Retrieve and load all info for a given date """ print "Loading info for " + str(date) scoreboard_url = fetch.make_scoreboard_url(date) if fetch.validate_url(scoreboard_url): data = retrieve_and_dump_data(conn, scoreboard_url) # look for games game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"]) if game_header: game_ids = map((lambda r: r[2]), game_header[0]["rowSet"]) if not game_ids: print "No Games" else: print "Found " + str(len(game_ids)) + " games." for game_id in game_ids: game_urls = fetch.make_urls_for_game_id(game_id) for url in game_urls: #print url retrieve_and_dump_data(conn, url)
def load_date_multi(conn, date): """ Load info for a date using multiple threads """ cursor = conn.cursor() print "Loading info for " + str(date) scoreboard_url = fetch.make_scoreboard_url(date) if fetch.validate_url(scoreboard_url): data = retrieve_and_dump_data(conn, scoreboard_url) # look for games game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"]) if game_header: game_ids = map((lambda r: r[2]), game_header[0]["rowSet"]) if not game_ids: print "No Games" else: print "Found " + str(len(game_ids)) + " games." urls = [] for game_id in game_ids: urls += fetch.make_urls_for_game_id(game_id) pool = workerpool.WorkerPool(size=10) jobs = [] for url in urls: # Enqueue the ETL jobs job = ETLJob(url) pool.put(job) jobs.append(job) pool.shutdown() pool.wait() #print jobs[0].data da = aggregator.DataAggregator() for job in jobs: da.add(job.data) sql = da.makeSQL() for s in sql: #print s cursor.execute(s) conn.commit()