def get_min_max_game_date_for_player(player): """Checks player game log and returns earliest and latest date player is listed""" df = get_log_for_player(player) # Join to dates df = schedules.attach_game_dates_to_dateframe(df) \ .query('Game >= 20001 & Game <= 30417') return df.Date.min(), df.Date.max()
def get_date_options(player): df = get_log_for_player(player) df = schedules.attach_game_dates_to_dateframe(df) \ .query('Game >= 20001 & Game <= 30417') \ .sort_values('Date') options = df.Date + ' (' + df.Team.apply(lambda x: team_info.team_as_str(int(x))) + ')' options = [{'label': '{0:s} ({1:s})'.format(date, team_info.team_as_str(int(team))), 'value': date} for date, team in zip(df.Date, df.Team)] return options
def make_5v5_rolling_days(df, **kwargs): """ Takes rolling sums based on roll_len_days kwarg. E.g. 30 for a ~monthly rolling sum. :param df: dataframe :param kwargs: the relevant one is roll_len_days, int :return: dataframe with extra columns """ if 'roll_len_days' in kwargs: roll_len = kwargs['roll_len_days'] # Join to schedules to get game dates df2 = schedules.attach_game_dates_to_dateframe(df) # Join to a dataframe full of days # TODO use grouper to speed this up daysdf = pd.DataFrame({'Date': [df2.Date.min(), df2.Date.max()]}) \ .assign(JoinKey=1) \ .set_index('Date') \ .asfreq('1D').reset_index() \ .assign(JoinKey=1) playersdf = df2[['PlayerID']].drop_duplicates() \ .assign(JoinKey=1) \ .merge(daysdf, how='inner', on='JoinKey') \ .drop('JoinKey', axis=1) playersdf.loc[:, 'Date'] = playersdf.Date.dt.strftime('%Y-%m-%d') fulldf = playersdf.merge(df2, how='left', on=['PlayerID', 'Date']) to_exclude = {'Game', 'Season', 'Team' } # Don't want to sum these, even though they're numeric numeric_df = df.select_dtypes(include=[np.number]) numeric_df = numeric_df.drop(to_exclude, axis=1, errors='ignore') rolling_df = fulldf[numeric_df.columns] \ .groupby('PlayerID').rolling(roll_len, min_periods=1).sum() \ .drop('PlayerID', axis=1) \ .reset_index() assert len(rolling_df) == len(fulldf) # Rename columns columnnames = { col: '{0:d}-day {1:s}'.format(roll_len, col) for col in numeric_df.columns } rolling_df = rolling_df.rename(columns=columnnames) finaldf = pd.concat([fulldf, rolling_df], axis=1).dropna(subset={'Game'}).drop('Date', axis=1) return finaldf return df
def make_5v5_rolling_gp(df, **kwargs): """ Takes rolling sums of numeric columns and concatenates onto the dataframe. Will exclude season, game, player, and team. :param df: dataframe :param kwargs: the relevant one is roll_len :return: dataframe with extra columns """ if 'roll_len' in kwargs: roll_len = kwargs['roll_len'] df = schedules.attach_game_dates_to_dateframe(df) \ .sort_values(['PlayerID', 'Date']) \ .drop('Date', axis=1) # Need this to be in order, else the groupby-cumsum below won't work right # Get df and roll to_exclude = {'Game', 'Season', 'Team'} numeric_df = df.select_dtypes(include=[np.number]) # Sometimes PlayerID gets converted to obj at some point, so just make sure it gets included # if 'PlayerID' not in numeric_df.columns: # numeric_df.loc[:, 'PlayerID'] = df.PlayerID numeric_df = numeric_df.drop(to_exclude, axis=1, errors='ignore') rollingdf = numeric_df.groupby('PlayerID') \ .rolling(roll_len, min_periods=1).sum() \ .drop('PlayerID', axis=1) \ .reset_index() \ .drop('level_1', axis=1) # Rename columns columnnames = { col: '{0:d}-game {1:s}'.format(roll_len, col) for col in numeric_df.columns if not col == 'PlayerID' } rollingdf = rollingdf.rename(columns=columnnames) # Add back to original # Order of players can change, so we'll assign row numbers in each player group df.loc[:, '_Row'] = 1 df.loc[:, '_Row'] = df[['PlayerID', '_Row']].groupby('PlayerID').cumsum() rollingdf.loc[:, '_Row'] = 1 rollingdf.loc[:, '_Row'] = rollingdf[['PlayerID', '_Row' ]].groupby('PlayerID').cumsum() df2 = df.merge(rollingdf, how='left', on=['PlayerID', '_Row']).drop('_Row', axis=1) return df2 return df
def insert_missing_team_games(df, **kwargs): """ :param df: dataframe, 5v5 player log or part of it :param kwargs: relevant ones are 'team' and 'add_missing_games' :return: dataframe with added rows """ if 'add_missing_games' in kwargs and 'team' in kwargs and kwargs['add_missing_games'] is True: _, enddate = get_startdate_enddate_from_kwargs(**kwargs) df2 = manip.convert_to_all_combos(df, np.NaN, ('Season', 'Game'), 'PlayerID') df2 = schedules.attach_game_dates_to_dateframe(df2).sort_values('Date') # Don't use the team kwarg here but this will obviously be messy if we bring in multiple teams' games # And get_and_filter_5v5_log does filter for team up above return df2 return df
def animated_usage_chart(**kwargs): """ :param kwargs: :return: """ if 'roll_len_days' not in kwargs: kwargs['roll_len_days'] = 30 qocqot = vhelper.get_and_filter_5v5_log(**kwargs) qocqot = qocqot[['PlayerID', 'TOION', 'TOIOFF', 'Game', 'Season', 'FCompSum', 'FCompN', 'DCompSum', 'DCompN', 'FTeamSum', 'FTeamN', 'DTeamSum', 'DTeamN']] qocqot.loc[:, 'FQoC'] = qocqot.FCompSum / qocqot.FCompN qocqot.loc[:, 'FQoT'] = qocqot.FTeamSum / qocqot.FTeamN qocqot.loc[:, 'DQoC'] = qocqot.DCompSum / qocqot.DCompN qocqot.loc[:, 'DQoT'] = qocqot.DTeamSum / qocqot.DTeamN qocqot.loc[:, 'TOI60'] = qocqot.TOION / (qocqot.TOION + qocqot.TOIOFF) qocqot = schedules.attach_game_dates_to_dateframe(qocqot).sort_values('Date') alldates = {i: date for i, date in enumerate(qocqot.Date.unique())} temp = qocqot.query('Date == "{0:s}"'.format(alldates[0])) scat = plt.scatter(temp.FQoC, temp.DQoC) def update(frame_number): temp = qocqot.query('Date == "{0:s}"'.format(alldates[frame_number])) data = temp[['FQoC', 'DQoC']].as_matrix() scat.set_offsets(data) plt.title('{0:d}-day rolling usage as of {1:s}'.format(kwargs['roll_len_days'], alldates[frame_number])) return scat, animation = FuncAnimation(plt.gcf(), update, blit=False, interval=1000) if 'save_file' in kwargs: animation.save(kwargs['save_file']) plt.show()
def _rolling_player_f(player, gfcf, **kwargs): """ Creates a graph with CF% or GF% (on plus off). Use gfcf to indicate which one. :param player: str or int, player to generate for :param gfcf: str. Use 'G' for GF% and GF% Off and 'C' for CF% and CF% Off :param kwargs: other filters. See scrapenhl2.plot.visualization_helper.get_and_filter_5v5_log for more information. Use x='Date' to index on date instead of game number :return: nothing, or figure """ kwargs['player'] = player fa = vhelper.get_and_filter_5v5_log(**kwargs) df = pd.concat([fa[['Season', 'Game']], _calculate_f_rates(fa, gfcf)], axis=1) col_dict = {col[col.index(' ') + 1:]: col for col in df.columns if '%' in col} plt.close('all') df.loc[:, 'Game Number'] = 1 df.loc[:, 'Game Number'] = df['Game Number'].cumsum() df = df.set_index('Game Number', drop=False) if 'x' in kwargs and kwargs['x'] == 'Date': df = schedules.attach_game_dates_to_dateframe(df) df.loc[:, 'Date'] = pd.to_datetime(df.Date) #df.loc[:, 'Date'] = pd.to_datetime(df.Date).dt.strftime('%b/%y') df = df.set_index(pd.DatetimeIndex(df['Date'])) plt.gca().xaxis_date() plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b\'%y')) plt.xlabel('Date') else: plt.xlabel('Game') kwargs['x'] = 'Game Number' series = gfcf + 'F%' series2 = gfcf + 'F% Off' # Avoid the long lines in offseason by setting first value in each season to None df.loc[:, 'PrevSeason'] = df.Season.shift(1) df.loc[:, 'PrevSeason'] = df.PrevSeason.fillna(df.Season - 1) df.loc[df.Season != df.PrevSeason, col_dict[series]] = None df.loc[df.Season != df.PrevSeason, col_dict[series2]] = None # Add YY-YY for top axis df.loc[:, 'TopLabel'] = df.Season.apply(lambda x: '{0:d}-{1:s} -->'.format(x, str(x+1)[2:])) plt.plot(df.index, df[col_dict[series]].values, label=series) plt.plot(df.index, df[col_dict[series2]].values, label=series2, ls='--') plt.legend(loc=1, fontsize=10) # Add seasons at top ax1 = plt.gca() ax2 = ax1.twiny() ax2.set_xlim(*ax1.get_xlim()) temp = df[df.Season != df.PrevSeason][[kwargs['x'], 'TopLabel']] ax2.tick_params(length=0, labelsize=8) ax2.set_xticks(temp.iloc[:, 0].values) ax2.set_xticklabels(temp.iloc[:, 1].values) for label in ax2.xaxis.get_majorticklabels(): label.set_horizontalalignment('left') for tick in ax2.xaxis.get_major_ticks(): tick.set_pad(-10) plt.title(_get_rolling_f_title(gfcf, **kwargs)) # axes plt.ylabel(gfcf + 'F%') plt.ylim(0.3, 0.7) plt.xlim(df.index.min(), df.index.max()) ticks = list(np.arange(0.3, 0.71, 0.05)) plt.yticks(ticks, ['{0:.0f}%'.format(100 * tick) for tick in ticks]) return vhelper.savefilehelper(**kwargs)