def insert_odds(): conn = db_func.get_conn() conn.autocommit = True cur = conn.cursor() query = '''SELECT * FROM bet_type''' cur.execute(query) if len(cur.fetchall()) == 0: sif.insert_bet_type('db_src/bet_type.csv', cur) conn.commit() db_func.truncate_imports(cur) csvs = [f'csv/odds/{f}' for f in listdir('csv/odds') if isfile(join('csv/odds', f))] for csv in tqdm(csvs, colour='red', position=1): sif.insert_to_imports(csv) conn.commit() query = '''ANALYZE imports''' db_func.exec_query(conn, query) sif.imports_to_bets(cur) conn.commit() query = '''ANALYZE odds''' db_func.exec_query(conn, query) sif.imports_to_bets_total(cur) conn.commit() conn.close()
def main(): team = "DAL" #date = "201910230" match_id = '1' #shd.save_boxscore_html(team, date) #html_path = "bs4_html/boxscores/201910230DAL.html" #shd.save_player_data(html_path) #shd.save_match_html('2021') #shd.save_match_data('bs4_html/match_list/2021.html') #shd.save_all_player_performances('2021') # conn=db_func.get_conn() # cur = conn.cursor() # res = imp.get_all_matches(cur) #print(res) if not os.path.isdir("logs"): os.makedirs("logs") logging.basicConfig( level=logging.DEBUG, format= '[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', handlers=[ logging.FileHandler('logs/' + date.today().strftime("%Y-%m-%d") + '.log'), logging.StreamHandler() ]) try: conn = db_func.get_conn() cur = conn.cursor() seasons = ['2021', '2020'] html = 'bs4_html/players/l/luety01.html' bbref_endpoint = '/players/l/luety01.html' player_name = 'ty' db_func.truncate_imports(cur) conn.commit() sif.insert_to_imports('csv/boxscores/200710300GSW.csv') conn.commit() sif.imports_to_player_performance(cur) conn.commit() conn.close() #err = shd.player_data_to_csv(html, bbref_endpoint, player_name) #print(err) #matches = sbrd.get_matches() #print(matches[0:10]) #matches = sbrd.get_matches() #print(len(matches)) #shd.boxscore_to_csv('bs4_html/boxscores/201212150CHI.html') #sif.insert_to_imports('csv/boxscores/201312070UTA.csv') #The page that contains the start of playoff table #only needs rows after it modified #While all pages after only contain playoff games except Exception as err: logging.exception(traceback.print_exception(*sys.exc_info())) sys.exit()
def process_matches(cur, seasons): db_func.truncate_imports(cur) lock = Lock() pool_size = cpu_count() - 1 print(f'starting computations on {pool_size} cores') with Pool(pool_size, initializer=init_child, initargs=(lock, )) as pool: pool.map(mproc_insert_matches, seasons) sif.imports_to_match(cur)
def fill_missing_odds(): conn = db_func.get_conn() conn.autocommit = True cur = conn.cursor() db_func.truncate_imports(cur) if not os.path.isdir('csv/sbro_odds/modified'): os.makedirs('csv/sbro_odds/modified') csvs = [f'csv/sbro_odds/{f}' for f in listdir('csv/sbro_odds') if isfile(join('csv/sbro_odds', f))] for csv in tqdm(csvs, colour='red', position=1): modify_sbro_odds(csv) modified_csvs = [f'csv/sbro_odds/modified/{f}' for f in listdir('csv/sbro_odds/modified') if isfile(join('csv/sbro_odds/modified', f))] for csv in tqdm(modified_csvs, colour='red', position=1): sif.insert_to_imports(csv) conn.commit() sif.fill_missing_odds(cur) conn.commit() conn.close()
def run_scraper(): if not os.path.isdir("logs"): os.makedirs("logs") logging.basicConfig( level=logging.DEBUG, format= '[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', handlers=[ logging.FileHandler('logs/' + date.today().strftime("%Y-%m-%d") + '.log'), logging.StreamHandler() ]) try: start = timer() conn = db_func.get_conn() cur = conn.cursor() conn.autocommit = True team_list_path = 'db_src/NBA_Teams.csv' seasons_path = 'db_src/seasons.csv' seasons = get_all_seasons(cur) if len(seasons) == 0: db_func.truncate_imports(cur) sif.insert_seasons(seasons_path, cur) seasons = get_all_seasons(cur) seasons = [(str(seasons[i][0]), ) for i in range(len(seasons))] if DEBUG: print(seasons) print(len(get_teams(seasons[0][0]))) if len(get_teams(seasons[0][0])) <= 0: db_func.truncate_imports(cur) sif.insert_to_imports(team_list_path) sif.imports_to_team(cur) sif.imports_to_team_name(cur) conn.commit() db_func.truncate_imports(cur) process_players(cur, seasons) conn.commit() db_func.truncate_imports(cur) process_matches(cur, seasons) conn.commit() db_func.truncate_imports(cur) process_boxscores(cur) sif.player_performance_to_injury(cur) conn.commit() logging.info("All players inserted") end = timer() print(f'elapsed time: {end - start}') except Exception as err: logging.exception(traceback.print_exception(*sys.exc_info())) conn.rollback() sys.exit() finally: if (conn): conn.close() print("PostgreSQL connection is closed")