Example #1
0
def load_date_multi(conn, date):
  """
  Load info for a date using multiple threads
  """
  print "Loading info for " + str(date)
  scoreboard_url = fetch.make_scoreboard_url(date)
  if fetch.validate_url(scoreboard_url):
    data = retrieve_and_dump_data(conn, scoreboard_url)
    # look for games
    game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"])
    if game_header:
      game_ids = map((lambda r: r[2]), game_header[0]["rowSet"])
      if not game_ids:
        print "No Games"
      else:
        print "Found " + str(len(game_ids)) + " games."
        urls = []
        for game_id in game_ids:
          urls += fetch.make_urls_for_game_id(game_id)
        pool = workerpool.WorkerPool(size=10)
        jobs = []
        for url in urls: # Enqueue the ETL jobs
          job = ETLJob(url, conn)
          pool.put(job)
          jobs.append(job)
        pool.shutdown()
        pool.wait()
Example #2
0
def load_date(conn, date):
  """
  Retrieve and load all info for a given date
  """
  print "Loading info for " + str(date)
  scoreboard_url = fetch.make_scoreboard_url(date)
  if fetch.validate_url(scoreboard_url):
    data = retrieve_and_dump_data(conn, scoreboard_url)
    # look for games
    game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"])
    if game_header:
      game_ids = map((lambda r: r[2]), game_header[0]["rowSet"])
      if not game_ids:
        print "No Games"
      else:
        print "Found " + str(len(game_ids)) + " games."
        for game_id in game_ids:
          game_urls = fetch.make_urls_for_game_id(game_id)
          for url in game_urls:
            #print url
            retrieve_and_dump_data(conn, url)
Example #3
0
def load_date_multi(conn, date):
  """
  Load info for a date using multiple threads
  """
  cursor = conn.cursor()
  print "Loading info for " + str(date)
  scoreboard_url = fetch.make_scoreboard_url(date)
  if fetch.validate_url(scoreboard_url):
    data = retrieve_and_dump_data(conn, scoreboard_url)
    # look for games
    game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"])
    if game_header:
      game_ids = map((lambda r: r[2]), game_header[0]["rowSet"])
      if not game_ids:
        print "No Games"
      else:
        print "Found " + str(len(game_ids)) + " games."
        urls = []
        for game_id in game_ids:
          urls += fetch.make_urls_for_game_id(game_id)
        pool = workerpool.WorkerPool(size=10)
        jobs = []
        for url in urls: # Enqueue the ETL jobs
          job = ETLJob(url)
          pool.put(job)
          jobs.append(job)
        pool.shutdown()
        pool.wait()
        #print jobs[0].data
        da = aggregator.DataAggregator()
        for job in jobs:
          da.add(job.data)
        sql = da.makeSQL()
        for s in sql:
          #print s
          cursor.execute(s)
        conn.commit()