Example #1
0
def load_date_multi(conn, date):
  """
  Load info for a date using multiple threads
  """
  print "Loading info for " + str(date)
  scoreboard_url = fetch.make_scoreboard_url(date)
  if fetch.validate_url(scoreboard_url):
    data = retrieve_and_dump_data(conn, scoreboard_url)
    # look for games
    game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"])
    if game_header:
      game_ids = map((lambda r: r[2]), game_header[0]["rowSet"])
      if not game_ids:
        print "No Games"
      else:
        print "Found " + str(len(game_ids)) + " games."
        urls = []
        for game_id in game_ids:
          urls += fetch.make_urls_for_game_id(game_id)
        pool = workerpool.WorkerPool(size=10)
        jobs = []
        for url in urls: # Enqueue the ETL jobs
          job = ETLJob(url, conn)
          pool.put(job)
          jobs.append(job)
        pool.shutdown()
        pool.wait()
Example #2
0
def load_date(conn, date):
  """
  Retrieve and load all info for a given date
  """
  print "Loading info for " + str(date)
  scoreboard_url = fetch.make_scoreboard_url(date)
  if fetch.validate_url(scoreboard_url):
    data = retrieve_and_dump_data(conn, scoreboard_url)
    # look for games
    game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"])
    if game_header:
      game_ids = map((lambda r: r[2]), game_header[0]["rowSet"])
      if not game_ids:
        print "No Games"
      else:
        print "Found " + str(len(game_ids)) + " games."
        for game_id in game_ids:
          game_urls = fetch.make_urls_for_game_id(game_id)
          for url in game_urls:
            #print url
            retrieve_and_dump_data(conn, url)
Example #3
0
def load_date_multi(conn, date):
  """
  Load info for a date using multiple threads
  """
  cursor = conn.cursor()
  print "Loading info for " + str(date)
  scoreboard_url = fetch.make_scoreboard_url(date)
  if fetch.validate_url(scoreboard_url):
    data = retrieve_and_dump_data(conn, scoreboard_url)
    # look for games
    game_header = filter((lambda r: r["name"] == "GameHeader"), data["resultSets"])
    if game_header:
      game_ids = map((lambda r: r[2]), game_header[0]["rowSet"])
      if not game_ids:
        print "No Games"
      else:
        print "Found " + str(len(game_ids)) + " games."
        urls = []
        for game_id in game_ids:
          urls += fetch.make_urls_for_game_id(game_id)
        pool = workerpool.WorkerPool(size=10)
        jobs = []
        for url in urls: # Enqueue the ETL jobs
          job = ETLJob(url)
          pool.put(job)
          jobs.append(job)
        pool.shutdown()
        pool.wait()
        #print jobs[0].data
        da = aggregator.DataAggregator()
        for job in jobs:
          da.add(job.data)
        sql = da.makeSQL()
        for s in sql:
          #print s
          cursor.execute(s)
        conn.commit()
Example #4
0
if __name__ == "__main__":
    # Argument parser setup
    parser = argparse.ArgumentParser(description='Generate ebook files from web sources.')
    parser.add_argument('url', metavar='URL', nargs=1,
                        help='url of novel (only supports lightnovel.cn urls for now), support short-hand such as ln:<id> for lightnovel.cn')
    parser.add_argument('output', metavar='OUTPUT', nargs=1,
                        help='output file name')
    parser.add_argument('--clear-cache', action='store_true')
    parser.add_argument('-u', '--user', 
                        help='user account for the resource')
    parser.add_argument('-k', '--kindlegen',
                        help='path to kindlegen executable (for MOBI only)')
    args = parser.parse_args()

    # Get the approriate handler for the resource url
    handlers, url = validate_url(args.url[0])
    res_handler, login_handler = handlers
    
    # User authentication
    username = args.user
    if username:
        login = login_handler(username)
        logged_in = False
        while not logged_in:
            logged_in = login.login()
    
    # Parsing and output
    resource = res_handler(url)
    out_fn = args.output[0]
    out_cls = output_classes.get(out_fn.split('.')[-1], output_classes['epub'])
    resource.output_book(out_cls, out_fn, args)