Esempio n. 1
0
def get_min_max_game_date_for_player(player):
    """Checks player game log and returns earliest and latest date player is listed"""
    df = get_log_for_player(player)
    # Join to dates
    df = schedules.attach_game_dates_to_dateframe(df) \
        .query('Game >= 20001 & Game <= 30417')
    return df.Date.min(), df.Date.max()
Esempio n. 2
0
def get_date_options(player):
    df = get_log_for_player(player)
    df = schedules.attach_game_dates_to_dateframe(df) \
        .query('Game >= 20001 & Game <= 30417') \
        .sort_values('Date')
    options = df.Date + ' (' + df.Team.apply(lambda x: team_info.team_as_str(int(x))) + ')'
    options = [{'label': '{0:s} ({1:s})'.format(date,
                                                team_info.team_as_str(int(team))), 'value': date}
               for date, team in zip(df.Date, df.Team)]
    return options
def make_5v5_rolling_days(df, **kwargs):
    """
    Takes rolling sums based on roll_len_days kwarg. E.g. 30 for a ~monthly rolling sum.

    :param df: dataframe
    :param kwargs: the relevant one is roll_len_days, int

    :return: dataframe with extra columns
    """
    if 'roll_len_days' in kwargs:
        roll_len = kwargs['roll_len_days']

        # Join to schedules to get game dates
        df2 = schedules.attach_game_dates_to_dateframe(df)

        # Join to a dataframe full of days
        # TODO use grouper to speed this up
        daysdf = pd.DataFrame({'Date': [df2.Date.min(), df2.Date.max()]}) \
            .assign(JoinKey=1) \
            .set_index('Date') \
            .asfreq('1D').reset_index() \
            .assign(JoinKey=1)
        playersdf = df2[['PlayerID']].drop_duplicates() \
            .assign(JoinKey=1) \
            .merge(daysdf, how='inner', on='JoinKey') \
            .drop('JoinKey', axis=1)
        playersdf.loc[:, 'Date'] = playersdf.Date.dt.strftime('%Y-%m-%d')
        fulldf = playersdf.merge(df2, how='left', on=['PlayerID', 'Date'])

        to_exclude = {'Game', 'Season', 'Team'
                      }  # Don't want to sum these, even though they're numeric
        numeric_df = df.select_dtypes(include=[np.number])
        numeric_df = numeric_df.drop(to_exclude, axis=1, errors='ignore')

        rolling_df = fulldf[numeric_df.columns] \
            .groupby('PlayerID').rolling(roll_len, min_periods=1).sum() \
            .drop('PlayerID', axis=1) \
            .reset_index()

        assert len(rolling_df) == len(fulldf)

        # Rename columns
        columnnames = {
            col: '{0:d}-day {1:s}'.format(roll_len, col)
            for col in numeric_df.columns
        }
        rolling_df = rolling_df.rename(columns=columnnames)

        finaldf = pd.concat([fulldf, rolling_df],
                            axis=1).dropna(subset={'Game'}).drop('Date',
                                                                 axis=1)
        return finaldf

    return df
def make_5v5_rolling_gp(df, **kwargs):
    """
    Takes rolling sums of numeric columns and concatenates onto the dataframe.
    Will exclude season, game, player, and team.

    :param df: dataframe
    :param kwargs: the relevant one is roll_len

    :return: dataframe with extra columns
    """
    if 'roll_len' in kwargs:
        roll_len = kwargs['roll_len']

        df = schedules.attach_game_dates_to_dateframe(df) \
            .sort_values(['PlayerID', 'Date']) \
            .drop('Date', axis=1)  # Need this to be in order, else the groupby-cumsum below won't work right

        # Get df and roll
        to_exclude = {'Game', 'Season', 'Team'}
        numeric_df = df.select_dtypes(include=[np.number])
        # Sometimes PlayerID gets converted to obj at some point, so just make sure it gets included
        # if 'PlayerID' not in numeric_df.columns:
        #     numeric_df.loc[:, 'PlayerID'] = df.PlayerID
        numeric_df = numeric_df.drop(to_exclude, axis=1, errors='ignore')
        rollingdf = numeric_df.groupby('PlayerID') \
            .rolling(roll_len, min_periods=1).sum() \
            .drop('PlayerID', axis=1) \
            .reset_index() \
            .drop('level_1', axis=1)

        # Rename columns
        columnnames = {
            col: '{0:d}-game {1:s}'.format(roll_len, col)
            for col in numeric_df.columns if not col == 'PlayerID'
        }
        rollingdf = rollingdf.rename(columns=columnnames)

        # Add back to original
        # Order of players can change, so we'll assign row numbers in each player group
        df.loc[:, '_Row'] = 1
        df.loc[:, '_Row'] = df[['PlayerID',
                                '_Row']].groupby('PlayerID').cumsum()
        rollingdf.loc[:, '_Row'] = 1
        rollingdf.loc[:, '_Row'] = rollingdf[['PlayerID', '_Row'
                                              ]].groupby('PlayerID').cumsum()
        df2 = df.merge(rollingdf, how='left', on=['PlayerID',
                                                  '_Row']).drop('_Row', axis=1)
        return df2
    return df
def insert_missing_team_games(df, **kwargs):
    """

    :param df: dataframe, 5v5 player log or part of it
    :param kwargs: relevant ones are 'team' and 'add_missing_games'

    :return: dataframe with added rows
    """
    if 'add_missing_games' in kwargs and 'team' in kwargs and kwargs['add_missing_games'] is True:
        _, enddate = get_startdate_enddate_from_kwargs(**kwargs)
        df2 = manip.convert_to_all_combos(df, np.NaN, ('Season', 'Game'), 'PlayerID')
        df2 = schedules.attach_game_dates_to_dateframe(df2).sort_values('Date')
        # Don't use the team kwarg here but this will obviously be messy if we bring in multiple teams' games
        # And get_and_filter_5v5_log does filter for team up above
        return df2
    return df
Esempio n. 6
0
def animated_usage_chart(**kwargs):
    """

    :param kwargs:
    :return:
    """

    if 'roll_len_days' not in kwargs:
        kwargs['roll_len_days'] = 30

    qocqot = vhelper.get_and_filter_5v5_log(**kwargs)
    qocqot = qocqot[['PlayerID', 'TOION', 'TOIOFF', 'Game', 'Season',
                     'FCompSum', 'FCompN', 'DCompSum', 'DCompN',
                     'FTeamSum', 'FTeamN', 'DTeamSum', 'DTeamN']]
    qocqot.loc[:, 'FQoC'] = qocqot.FCompSum / qocqot.FCompN
    qocqot.loc[:, 'FQoT'] = qocqot.FTeamSum / qocqot.FTeamN
    qocqot.loc[:, 'DQoC'] = qocqot.DCompSum / qocqot.DCompN
    qocqot.loc[:, 'DQoT'] = qocqot.DTeamSum / qocqot.DTeamN
    qocqot.loc[:, 'TOI60'] = qocqot.TOION / (qocqot.TOION + qocqot.TOIOFF)

    qocqot = schedules.attach_game_dates_to_dateframe(qocqot).sort_values('Date')

    alldates = {i: date for i, date in enumerate(qocqot.Date.unique())}

    temp = qocqot.query('Date == "{0:s}"'.format(alldates[0]))
    scat = plt.scatter(temp.FQoC, temp.DQoC)

    def update(frame_number):
        temp = qocqot.query('Date == "{0:s}"'.format(alldates[frame_number]))
        data = temp[['FQoC', 'DQoC']].as_matrix()
        scat.set_offsets(data)
        plt.title('{0:d}-day rolling usage as of {1:s}'.format(kwargs['roll_len_days'], alldates[frame_number]))
        return scat,

    animation = FuncAnimation(plt.gcf(), update, blit=False, interval=1000)
    if 'save_file' in kwargs:
        animation.save(kwargs['save_file'])
    plt.show()
Esempio n. 7
0
def _rolling_player_f(player, gfcf, **kwargs):
    """
    Creates a graph with CF% or GF% (on plus off). Use gfcf to indicate which one.

    :param player: str or int, player to generate for
    :param gfcf: str. Use 'G' for GF% and GF% Off and 'C' for CF% and CF% Off
    :param kwargs: other filters. See scrapenhl2.plot.visualization_helper.get_and_filter_5v5_log for more information.
        Use x='Date' to index on date instead of game number

    :return: nothing, or figure
    """

    kwargs['player'] = player
    fa = vhelper.get_and_filter_5v5_log(**kwargs)

    df = pd.concat([fa[['Season', 'Game']], _calculate_f_rates(fa, gfcf)], axis=1)
    col_dict = {col[col.index(' ') + 1:]: col for col in df.columns if '%' in col}

    plt.close('all')

    df.loc[:, 'Game Number'] = 1
    df.loc[:, 'Game Number'] = df['Game Number'].cumsum()
    df = df.set_index('Game Number', drop=False)

    if 'x' in kwargs and kwargs['x'] == 'Date':
        df = schedules.attach_game_dates_to_dateframe(df)
        df.loc[:, 'Date'] = pd.to_datetime(df.Date)
        #df.loc[:, 'Date'] = pd.to_datetime(df.Date).dt.strftime('%b/%y')
        df = df.set_index(pd.DatetimeIndex(df['Date']))
        plt.gca().xaxis_date()
        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b\'%y'))
        plt.xlabel('Date')
    else:
        plt.xlabel('Game')
        kwargs['x'] = 'Game Number'

    series = gfcf + 'F%'
    series2 = gfcf + 'F% Off'

    # Avoid the long lines in offseason by setting first value in each season to None
    df.loc[:, 'PrevSeason'] = df.Season.shift(1)
    df.loc[:, 'PrevSeason'] = df.PrevSeason.fillna(df.Season - 1)
    df.loc[df.Season != df.PrevSeason, col_dict[series]] = None
    df.loc[df.Season != df.PrevSeason, col_dict[series2]] = None

    # Add YY-YY for top axis
    df.loc[:, 'TopLabel'] = df.Season.apply(lambda x: '{0:d}-{1:s} -->'.format(x, str(x+1)[2:]))

    plt.plot(df.index, df[col_dict[series]].values, label=series)
    plt.plot(df.index, df[col_dict[series2]].values, label=series2, ls='--')

    plt.legend(loc=1, fontsize=10)

    # Add seasons at top
    ax1 = plt.gca()
    ax2 = ax1.twiny()
    ax2.set_xlim(*ax1.get_xlim())
    temp = df[df.Season != df.PrevSeason][[kwargs['x'], 'TopLabel']]
    ax2.tick_params(length=0, labelsize=8)
    ax2.set_xticks(temp.iloc[:, 0].values)
    ax2.set_xticklabels(temp.iloc[:, 1].values)
    for label in ax2.xaxis.get_majorticklabels():
        label.set_horizontalalignment('left')
    for tick in ax2.xaxis.get_major_ticks():
        tick.set_pad(-10)

    plt.title(_get_rolling_f_title(gfcf, **kwargs))

    # axes

    plt.ylabel(gfcf + 'F%')
    plt.ylim(0.3, 0.7)
    plt.xlim(df.index.min(), df.index.max())
    ticks = list(np.arange(0.3, 0.71, 0.05))
    plt.yticks(ticks, ['{0:.0f}%'.format(100 * tick) for tick in ticks])

    return vhelper.savefilehelper(**kwargs)