def team_as_id(team): """ A helper method. If team entered is int, returns that. If team is str, returns integer id of that team. :param team: int, or str :return: int, the team ID """ if helpers.check_number(team): return int(team) elif isinstance(team, str): df = get_team_info_file().query( 'Name == "{0:s}" | Abbreviation == "{0:s}"'.format(team)) if len(df) == 0: print('Could not find ID for {0:s}'.format(team)) return None elif len(df) == 1: return df.ID.iloc[0] else: print( 'Multiple results when searching for {0:s}; returning first result' .format(team)) print(df.to_string()) return df.ID.iloc[0] else: print('Specified wrong type for team: {0:s}'.format(type(team))) return None
def player_as_id(playername, filterids=None, dob=None): """ A helper method. If player entered is int, returns that. If player is str, returns integer id of that player. :param playername: int, or str, the player whose names you want to retrieve :param filterids: a tuple of players to choose from. Needs to be tuple else caching won't work. :param dob: yyyy-mm-dd, use to help when multiple players have the same name :return: int, the player ID """ filterdf = get_player_ids_file() if filterids is None: pass else: filterdf = filterdf.merge(pd.DataFrame({'ID': filterids}), how='inner', on='ID') pids = filterdf if dob is not None: pids = pids.query('DOB == "{0:s}"'.format(dob)) if helpers.check_number(playername): return int(playername) elif isinstance(playername, str): df = pids.query('Name == "{0:s}"'.format(playername)) if len(df) == 0: # ed.print_and_log('Could not find exact match for for {0:s}; trying exact substring match'.format(player)) df = pids df = df[df.Name.str.contains(playername)] if len(df) == 0: # ed.print_and_log('Could not find exact substring match; trying fuzzy matching') name = helpers.fuzzy_match_player(playername, pids.Name) return player_as_id(name, tuple(filterdf.ID.values)) # return player_as_id(name) elif len(df) == 1: return df.ID.iloc[0] else: print('Multiple results when searching for {0:s}; returning first result'.format(playername)) print('You can specify a tuple of acceptable IDs to scrapenhl2.scrape.players.player_as_id') print(df.to_string()) return df.ID.iloc[0] elif len(df) == 1: return df.ID.iloc[0] else: default = check_default_player_id(playername) if default is None: print('Multiple results when searching for {0:s}; returning first result'.format(playername)) print('You can specify a tuple of acceptable IDs to scrapenhl2.scrape.players.player_as_id') print(df.to_string()) return df.ID.iloc[0] else: print('Multiple results when searching for {0:s}; returning default'.format(playername)) print('You can specify a tuple of acceptable IDs to scrapenhl2.scrape.players.player_as_id') print(df.to_string()) return default else: print('Specified wrong type for player: {0:s}'.format(type(playername))) return None
def update_player_log_file(playerids, seasons, games, teams, statuses): """ Updates the player log file with given players. The player log file notes which players played in which games and whether they were scratched or played. :param playerids: int or str or list of int :param seasons: int, the season, or list of int the same length as playerids :param games: int, the game, or list of int the same length as playerids :param teams: str or int, the team, or list of int the same length as playerids :param statuses: str, or list of str the same length as playerids :return: nothing """ # Change everything to lists first if need be if isinstance(playerids, int) or isinstance(playerids, str): playerids = player_as_id(playerids) playerids = [playerids] if helpers.check_number(seasons): seasons = [seasons for _ in range(len(playerids))] if helpers.check_number(games): games = [games for _ in range(len(playerids))] if helpers.check_types(teams): teams = team_info.team_as_id(teams) teams = [teams for _ in range(len(playerids))] if isinstance(statuses, str): statuses = [statuses for _ in range(len(playerids))] df = pd.DataFrame({ 'ID': playerids, # Player ID 'Team': teams, # Team 'Status': statuses, # P for played, S for scratch. 'Season': seasons, # Season 'Game': games }) # Game if len(get_player_log_file()) == 1: # In this case, the only entry is our original entry for Ovi, that sets the datatypes properly write_player_log_file(df) else: write_player_log_file(pd.concat([get_player_log_file(), df]))
def get_team_combo_corsi(season, team, games, n_players=2): """ Gets combo Corsi for team for specified games :param season: int, the season :param team: int or str, team :param games: int or iterable of int, games :param n_players: int. E.g. 1 gives you player TOI, 2 gives you 2-player group TOI, 3 makes 3-player groups, etc :return: dataframe """ if helpers.check_number(games): games = [games] teamid = team_info.team_as_id(team) corsi = teams.get_team_pbp(season, team) corsi = corsi.assign(_Secs=corsi.Time) \ .merge(pd.DataFrame({'Game': games}), how='inner', on='Game') \ .pipe(manip.filter_for_five_on_five) \ .pipe(manip.filter_for_corsi) \ [['Game', 'Time', 'Team', '_Secs']] \ .pipe(onice.add_onice_players_to_df, focus_team=team, season=season, gamecol='Game') cols_to_drop = ['Opp{0:d}'.format(i) for i in range(1, 7)] + ['{0:s}6'.format(team_info.team_as_str(team))] corsi = corsi.drop(cols_to_drop, axis=1) \ .pipe(helpers.melt_helper, id_vars=['Game', 'Time', 'Team'], var_name='P', value_name='PlayerID') \ .drop('P', axis=1) corsi2 = None for i in range(n_players): corsitemp = corsi.rename(columns={'PlayerID': 'PlayerID' + str(i+1)}) if corsi2 is None: corsi2 = corsitemp else: corsi2 = corsi2.merge(corsitemp, how='inner', on=['Game', 'Time', 'Team']) # Assign CF and CA teamid = team_info.team_as_id(team) corsi2.loc[:, 'CF'] = corsi2.Team.apply(lambda x: 1 if x == teamid else 0) corsi2.loc[:, 'CA'] = corsi2.Team.apply(lambda x: 0 if x == teamid else 1) corsi2 = corsi2.drop({'Game', 'Time', 'Team'}, axis=1) # Group by players and count groupcols = ['PlayerID' + str(i+1) for i in range(n_players)] grouped = corsi2 \ .groupby(groupcols, as_index=False) \ .sum() \ .rename(columns={'Time': 'Secs'}) # Convert to all columns allcombos = manip.convert_to_all_combos(grouped, 0, *groupcols) return allcombos
def player_as_str(playerid, filterids=None): """ A helper method. If player is int, returns string name of that player. Else returns standardized name. :param playerid: int, or str, player whose name you want to retrieve :param filterids: a tuple of players to choose from. Needs to be tuple else caching won't work. Probably not needed but you can use this method to go from part of the name to full name, in which case it may be helpful. :return: str, the player name """ filterdf = get_player_ids_file() if filterids is None: pass else: filterdf = filterdf.merge(pd.DataFrame({'ID': filterids}), how='inner', on='ID') if isinstance(playerid, str): # full name newfilterdf = filterdf realid = player_as_id(playerid) return player_as_str(realid) elif helpers.check_number(playerid): player = int(playerid) df = filterdf.query('ID == {0:.0f}'.format(playerid)) if len(df) == 0: print('Could not find name for {0:.0f}'.format(playerid)) return None elif len(df) == 1: return df.Name.iloc[0] else: print( 'Multiple results when searching for {0:d}; returning first result' .format(playerid)) print(df.to_string()) return df.Name.iloc[0] else: print('Specified wrong type for player: {0:d}'.format(type(playerid))) return None
def get_team_combo_toi(season, team, games, n_players=2): """ Gets 5v5 combo TOI for team for specified games :param season: int, the season :param team: int or str, team :param games: int or iterable of int, games :param n_players: int. E.g. 1 gives you player TOI, 2 gives you 2-player group TOI, 3 makes 3-player groups, etc :return: dataframe """ if helpers.check_number(games): games = [games] toi = teams.get_team_toi(season, team) \ .merge(pd.DataFrame({'Game': games}), how='inner', on='Game') \ .pipe(manip.filter_for_five_on_five) \ [['Game', 'Time', 'Team1', 'Team2', 'Team3', 'Team4', 'Team5']] \ .pipe(helpers.melt_helper, id_vars=['Game', 'Time'], var_name='P', value_name='PlayerID') \ .drop('P', axis=1) toi2 = None for i in range(n_players): toitemp = toi.rename(columns={'PlayerID': 'PlayerID' + str(i+1)}) if toi2 is None: toi2 = toitemp else: toi2 = toi2.merge(toitemp, how='inner', on=['Game', 'Time']) # Group by players and count groupcols = ['PlayerID' + str(i+1) for i in range(n_players)] grouped = toi2.drop('Game', axis=1) \ .groupby(groupcols, as_index=False) \ .count() \ .rename(columns={'Time': 'Secs'}) # Convert to all columns allcombos = manip.convert_to_all_combos(grouped, 0, *groupcols) return allcombos
def team_as_str(team, abbreviation=True): """ A helper method. If team entered is str, returns that. If team is int, returns string name of that team. :param team: int, or str :param abbreviation: bool, whether to return 3-letter abbreviation or full name :return: str, the team name """ team = fix_variants(team) col_to_access = 'Abbreviation' if abbreviation else 'Name' if isinstance(team, str): return team elif helpers.check_number(team): df = get_team_info_file().query('ID == {0:d}'.format(team)) if len(df) == 0: try: result = add_team_to_info_file(team) if abbreviation: return result[1] else: return result[2] except Exception as e: print('Could not find name for {0:d} {1:s}'.format( team, str(e))) return None elif len(df) == 1: return df[col_to_access].iloc[0] else: print( 'Multiple results when searching for {0:d}; returning first result' .format(team)) print(df.to_string()) return df[col_to_access].iloc[0] else: print('Specified wrong type for team: {0:s}'.format(type(team))) return None