def store_stat(sportsVu, season, player_or_team, measure_type, is_regular_season, table, connection): try: stat_data = sportsVu.get_sportvu_data_for_stat(player_or_team, measure_type) if stat_data is None: return None connection.execute(table.insert().values( utils.add_keys(stat_data, season, is_regular_season))) except: logging.error(utils.LogException()) return None
def store_stat(season, season_type, player_or_team, measure_type, is_regular_season, table, connection): try: stat_data = sportvu_stats.get_sportvu_data_for_stat( season, season_type, player_or_team, measure_type) connection.execute( table.insert(replace_string=""), utils.add_keys(stat_data, time.strftime("%Y-%m-%d"), is_regular_season)) except: logging.error(utils.LogException()) return None
async def store_data(pool, query, data): try: async with pool.acquire() as conn: async with conn.cursor() as cur: await cur.executemany(query, data) await conn.commit() except: logging.error(utils.LogException()) print('\n') print(query) return
def store_data(connection, is_team, playtype, scheme, synergy_data, table): try: stat_data = synergy_data.get_synergy_data_for_stat( is_team, playtype, scheme) if stat_data is None: return None for dicts in stat_data: dicts['FG_MG'] = dicts.pop('FGMG') dicts['FG_M'] = dicts.pop('FGM') dicts['TEAM_ID'] = dicts.pop('TeamIDSID') if is_team == 0: for dicts in stat_data: dicts['PLAYER_ID'] = dicts.pop('PlayerIDSID') connection.execute(table.insert().values(stat_data)) except: logging.error(utils.LogException()) return None
def store_team_stat(season, season_type, measure_type, request_date, game_date, game_ids, game_team_map, is_regular_season, table, connection): try: team_stats = sportvu_stats.get_sportvu_data_for_stat( season, season_type, "Team", measure_type, start_date=request_date, end_date=request_date) team_stats_with_game_id = sportvu_stats.add_game_id_to_game_log_for_team( team_stats, game_date, game_ids, game_team_map) connection.execute( table.insert(replace_string=""), utils.add_keys(team_stats_with_game_id, game_date, is_regular_season)) except: logging.error(utils.LogException()) return None
def main(): logging.basicConfig(filename='logs/process_pbp.log', level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') config = json.loads(open('config.json').read()) # connect to database username = config['username'] password = config['password'] host = config['host'] database = config['database'] engine = create_engine('mysql://' + username + ':' + password + '@' + host + '/' + database) conn = engine.connect() # get game_ids to process games_to_process = [] missing_games_query = select([distinct(schema.pbp.c.GAME_ID) ]).where(schema.pbp.c.HOME_PLAYER1 == None) for game in conn.execute(missing_games_query): games_to_process.append(game.GAME_ID) # add players on floor to database for game_id in games_to_process: try: pbp_query = select([(schema.pbp) ]).where(schema.pbp.c.GAME_ID == game_id) results = conn.execute(pbp_query) pbp_data = DataFrame(results.fetchall()) pbp_data.columns = results.keys() game_data = pbp.Lineups(pbp_data) pbp_with_lineups = game_data.get_players_on_floor_for_game() conn.execute(schema.pbp.insert(replace_string=""), pbp_with_lineups) except: logging.error(utils.LogException())
def store_data(connection, table, data): try: connection.execute(table.insert().values(data)) except: logging.error(utils.LogException()) return None
def store_data(connection, data, table): try: connection.execute(table.insert(replace_string=""), data) except: logging.error(utils.LogException()) return None
def main(): logging.basicConfig(filename='logs/process_pbp.log',level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') config=json.loads(open('config.json').read()) if len(sys.argv) < 3: start_date = datetime.date.today() - datetime.timedelta(1) end_date = datetime.date.today() - datetime.timedelta(1) elif len(sys.argv) > 3: print "Too many arguments. Enter a start and end date with format YYYY-MM-DD" sys.exit(0) else: start = sys.argv[1] end = sys.argv[2] # validate dates try: datetime.datetime.strptime(start, '%Y-%m-%d') except: print 'invalid format for start date' sys.exit(0) try: datetime.datetime.strptime(end, '%Y-%m-%d') except: print 'invalid format for end date' sys.exit(0) start_split = start.split("-") end_split = end.split("-") start_date = datetime.date(int(start_split[0]), int(start_split[1]), int(start_split[2])) end_date = datetime.date(int(end_split[0]), int(end_split[1]), int(end_split[2])) season = config["season"] # make sure season is valid format season_pattern = re.compile('\d{4}[-]\d{2}$') if season_pattern.match(season) == None: print "Invalid Season format. Example format: 2014-15" sys.exit(0) # connect to database username = config['username'] password = config['password'] host = config['host'] database = config['database'] engine = create_engine('mysql://'+username+':'+password+'@'+host+'/'+database) conn = engine.connect() for dt in rrule(DAILY, dtstart=start_date, until=end_date): games = scrape.helper.get_game_ids_for_date(dt.strftime("%Y-%m-%d")) for game_id in games: if game_id[:3] == "002" or game_id[:3] == "004": try: pbp_query = select([(schema.pbp)]).where(schema.pbp.c.GAME_ID == game_id) results = conn.execute(pbp_query) pbp_data = DataFrame(results.fetchall()) pbp_data.columns = results.keys() game_data = pbp.Lineups(pbp_data) pbp_with_lineups = game_data.get_players_on_floor_for_game() conn.execute(schema.pbp.insert(replace_string=""), pbp_with_lineups) except: logging.error(utils.LogException())
def main(): logging.basicConfig(filename='logs/players.log',level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') config=json.loads(open('config.json').read()) season = config["season"] is_regular_season = config["is_regular_season"] # make sure season is valid format season_pattern = re.compile('\d{4}[-]\d{2}$') if season_pattern.match(season) == None: print "Invalid Season format. Example format: 2014-15" sys.exit(0) if is_regular_season == 0: season_type = "Playoffs" game_prefix = "004" elif is_regular_season == 1: season_type = "Regular Season" game_prefix = "002" else: print "Invalid is_regular_season value. Use 0 for regular season, 1 for playoffs" sys.exit(0) # connect to database username = config['username'] password = config['password'] host = config['host'] database = config['database'] engine = create_engine('mysql://'+username+':'+password+'@'+host+'/'+database) conn = engine.connect() # get player_ids to update players_to_update = {} games_in_db_query = select([distinct(schema.player_tracking_shot_logs.c.GAME_ID)]).where(schema.player_tracking_shot_logs.c.GAME_ID.ilike(game_prefix+"%")) players_to_update_query = select([schema.traditional_boxscores.c.PLAYER_ID, schema.traditional_boxscores.c.PLAYER_NAME]).where(and_(schema.traditional_boxscores.c.GAME_ID.notin_(games_in_db_query), schema.traditional_boxscores.c.GAME_ID.ilike(game_prefix+"%"))).distinct() for player in conn.execute(players_to_update_query): players_to_update[player.PLAYER_ID] = player.PLAYER_NAME if len(players_to_update.keys()) == 0: players_to_update_query = select([schema.traditional_boxscores.c.PLAYER_ID, schema.traditional_boxscores.c.PLAYER_NAME]).where(and_(schema.traditional_boxscores.c.FGA > 0, schema.traditional_boxscores.c.GAME_ID.ilike(game_prefix+"%"))).distinct() for player in conn.execute(players_to_update_query): players_to_update[player.PLAYER_ID] = player.PLAYER_NAME # get and update data for player_id in players_to_update.keys(): if int(player_id) > 0 and int(player_id) < 2147483647: player_name = players_to_update[player_id] player_data = player_stats.PlayerData(player_id, player_name, season, season_type) try: # get shot logs player_shot_logs = player_data.shot_logs() player_shot_logs_df = pd.DataFrame(player_shot_logs) if len(player_shot_logs_df.index) > 0: games_and_periods = player_shot_logs_df[['GAME_ID', 'PERIOD']] unique_games_and_periods = games_and_periods.drop_duplicates() # get shots already in db already_in_db_query = select([schema.player_tracking_shot_logs.c.GAME_ID, schema.player_tracking_shot_logs.c.PERIOD]).where(schema.player_tracking_shot_logs.c.PLAYER_ID == player_id).distinct() for period_game in conn.execute(already_in_db_query): already_in = (unique_games_and_periods.GAME_ID == period_game.GAME_ID) & (unique_games_and_periods.PERIOD == period_game.PERIOD) unique_games_and_periods = unique_games_and_periods[already_in == False] if len(unique_games_and_periods.index) > 0: # merge shot logs with pbp_data pbp_query = select([(schema.pbp)]).where(and_(schema.pbp.c.PLAYER1_ID == player_id, or_(schema.pbp.c.EVENTMSGTYPE == 1, schema.pbp.c.EVENTMSGTYPE == 2))) results = conn.execute(pbp_query) pbp_data = pd.DataFrame(results.fetchall()) pbp_data.columns = results.keys() for _, row in unique_games_and_periods.iterrows(): shots = combine_pbp_shot_logs.combine_pbp_and_shot_logs_for_player_for_period(player_shot_logs_df, pbp_data, player_id, row['PERIOD'], row['GAME_ID']) conn.execute(schema.player_tracking_shot_logs.insert(replace_string=""), shots) except: logging.error(utils.LogException()) try: # get rebound logs player_rebound_logs = player_data.rebound_logs() player_rebound_logs_df = pd.DataFrame(player_rebound_logs) if len(player_rebound_logs_df.index) > 0: games_and_periods = player_rebound_logs_df[['GAME_ID', 'PERIOD']] unique_games_and_periods = games_and_periods.drop_duplicates() # get rebounds already in db already_in_db_query = select([schema.player_tracking_rebound_logs.c.GAME_ID, schema.player_tracking_rebound_logs.c.PERIOD]).where(schema.player_tracking_rebound_logs.c.PLAYER_ID == player_id).distinct() for period_game in conn.execute(already_in_db_query): already_in = (unique_games_and_periods.GAME_ID == period_game.GAME_ID) & (unique_games_and_periods.PERIOD == period_game.PERIOD) unique_games_and_periods = unique_games_and_periods[already_in == False] if len(unique_games_and_periods.index) > 0: # merge rebound logs with pbp_data pbp_query = select([(schema.pbp)]).where(and_(schema.pbp.c.PLAYER1_ID == player_id, schema.pbp.c.EVENTMSGTYPE == 4)) results = conn.execute(pbp_query) pbp_data = pd.DataFrame(results.fetchall()) pbp_data.columns = results.keys() for _, row in unique_games_and_periods.iterrows(): rebounds = combine_pbp_rebounds_logs.combine_pbp_and_rebound_logs_for_player_for_period(player_rebound_logs_df, pbp_data, player_id, row['PERIOD'], row['GAME_ID']) conn.execute(schema.player_tracking_rebound_logs.insert(replace_string=""), rebounds) except: logging.error(utils.LogException()) try: conn.execute(schema.player_tracking_passes_made.insert(replace_string=""), utils.add_keys(player_data.passes_made(), time.strftime("%Y-%m-%d"), is_regular_season)) except: logging.error(utils.LogException()) try: conn.execute(schema.player_tracking_passes_received.insert(replace_string=""), utils.add_keys(player_data.passes_received(), time.strftime("%Y-%m-%d"), is_regular_season)) except: logging.error(utils.LogException())