예제 #1
0
def _rolling_player_f(player, gfcf, **kwargs):
    """
    Creates a graph with CF% or GF% (on plus off). Use gfcf to indicate which one.

    :param player: str or int, player to generate for
    :param gfcf: str. Use 'G' for GF% and GF% Off and 'C' for CF% and CF% Off
    :param kwargs: other filters. See scrapenhl2.plot.visualization_helper.get_and_filter_5v5_log for more information.

    :return: nothing, or figure
    """

    kwargs['player'] = player
    fa = vhelper.get_and_filter_5v5_log(**kwargs)

    df = pd.concat([fa[['Season', 'Game']],
                    _calculate_f_rates(fa, gfcf)],
                   axis=1)
    col_dict = {
        col[col.index(' ') + 1:]: col
        for col in df.columns if '%' in col
    }

    plt.clf()

    df.loc[:, 'Game Number'] = 1
    df.loc[:, 'Game Number'] = df['Game Number'].cumsum()
    df.set_index('Game Number', inplace=True)

    label = gfcf + 'F%'
    plt.plot(df.index, df[col_dict[label]], label=label)
    label = gfcf + 'F% Off'
    plt.plot(df.index, df[col_dict[label]], label=label, ls='--')
    plt.legend(loc=1, fontsize=10)

    plt.title(_get_rolling_f_title(gfcf, **kwargs))

    # axes
    plt.xlabel('Game')
    plt.ylabel(gfcf + 'F%')
    plt.ylim(0.3, 0.7)
    plt.xlim(0, len(df))
    ticks = list(np.arange(0.3, 0.71, 0.05))
    plt.yticks(ticks, ['{0:.0f}%'.format(100 * tick) for tick in ticks])

    return vhelper.savefilehelper(**kwargs)
예제 #2
0
def team_lineup_cf_graph(team, **kwargs):
    """
    This method builds a 4x5 matrix of rolling CF% line graphs. The left 4x3 matrix are forward lines and the top-right
    3x2 are defense pairs.

    :param team: str or id, team to build this graph for
    :param kwargs: need to specify the following as iterables of names: l1, l2, l3, l4, p1, p2, p3.
        Three players for each of the 'l's and two for each of the 'p's.

    :return: figure, or nothing
    """
    allplayers = []
    if 'l1' in kwargs and 'l2' in kwargs and 'l3' in kwargs and 'l4' in kwargs and \
                    'p1' in kwargs and 'p2' in kwargs and 'p3' in kwargs:
        # Change all to IDs
        # Go on this strange order because it'll be the order of the plots below
        for key in ['l1', 'p1', 'l2', 'p2', 'l3', 'p3', 'l4']:
            kwargs[key] = [players.player_as_id(x) for x in kwargs[key]]
            allplayers += kwargs[key]
    else:
        # TODO Find most common lines
        # Edit get_line_combos etc from manip, and the method to get player order from game_h2h, to work at team level
        pass

    # Get data
    kwargs['add_missing_games'] = True
    kwargs['team'] = team
    kwargs['players'] = allplayers
    if 'roll_len' not in kwargs:
        kwargs['roll_len'] = 25
    data = vhelper.get_and_filter_5v5_log(**kwargs)
    df = pd.concat([
        data[['Season', 'Game', 'PlayerID']],
        rolling_cfgf._calculate_f_rates(data, 'C')
    ],
                   axis=1)
    col_dict = {
        col[col.index(' ') + 1:]: col
        for col in df.columns if '%' in col
    }

    # Set up figure to share x and y
    fig, axes = plt.subplots(4, 5, sharex=True, sharey=True, figsize=[12, 8])

    # Make chart for each player
    gamenums = df[['Season', 'Game']].drop_duplicates().assign(GameNum=1)
    gamenums.loc[:, 'GameNum'] = gamenums.GameNum.cumsum()
    df = df.merge(gamenums, how='left', on=['Season', 'Game'])

    axes = axes.flatten()
    for i in range(len(allplayers)):
        ax = axes[i]
        ax.set_title(players.player_as_str(allplayers[i]), fontsize=10)
        temp = df.query('PlayerID == {0:d}'.format(int(allplayers[i])))
        x = temp.GameNum.values
        y1 = temp[col_dict['CF%']].values
        y2 = temp[col_dict['CF% Off']].values
        ax.fill_between(x, y1, y2, where=y1 > y2, alpha=0.5)
        ax.fill_between(x, y1, y2, where=y2 > y1, alpha=0.5)
        ax.plot(x, y1)
        ax.plot(x, y2, ls='--')
        ax.plot(x, [0.5 for _ in range(len(x))], color='k')

    for i, ax in enumerate(axes):
        for direction in ['right', 'top', 'bottom', 'left']:
            ax.spines[direction].set_visible(False)
        ax.xaxis.set_ticks_position('none')
        ax.yaxis.set_ticks_position('none')

    # Set title and axis labels
    axes[0].set_ylim(0.35, 0.65)
    axes[0].set_yticks([0.4, 0.5, 0.6])
    axes[0].set_yticklabels(['40%', '50%', '60%'])
    axes[0].set_xlim(1, df.GameNum.max())

    plt.annotate('Game',
                 xy=(0.5, 0.05),
                 ha='center',
                 va='top',
                 xycoords='figure fraction')

    fig.suptitle(_team_lineup_cf_graph_title(**kwargs), fontsize=16, y=0.95)

    # Return
    return vhelper.savefilehelper(**kwargs)
예제 #3
0
def team_fline_shot_rates_scatter(team, min_line_toi=50, **kwargs):
    """
    Creates a scatterplot of team forward line shot attempr rates.

    :param team: int or str, team
    :param min_line_toi: int, number of minutes for pair to qualify
    :param kwargs: Use season- or date-range-related kwargs only.

    :return:
    """

    kwargs['team'] = team

    startdate, enddate = vhelper.get_startdate_enddate_from_kwargs(**kwargs)
    rates = get_fline_shot_rates(team, startdate, enddate)
    lines = drop_duplicate_lines(rates)
    xy = _add_xy_names_for_fline_graph(lines)

    xy = _get_colors_markers_for_fline_scatter(xy)

    # Remove players who didn't have at least one line combination above minimum
    # Remove total TOI rows first, then filter
    # Get indiv toi by finding index of max TOI of each group. Then anti-join lines onto indiv toi
    indivtoi = xy.ix[xy.groupby(['Name', 'PlayerID'], as_index=False)['TOI'].idxmax()] \
        [['Name', 'PlayerID', 'TOI', 'X', 'Y', 'Color', 'Marker']] \
        .sort_values('TOI', ascending=False)
    xy = helper.anti_join(xy.query('TOI >= {0:d}'.format(60 * min_line_toi)),
                          indivtoi[['Name', 'PlayerID', 'TOI']],
                          on=['Name', 'PlayerID', 'TOI'])

    # Now get sizes. Scaling is too poor if I do it earlier
    xy = _get_point_sizes_for_fline_scatter(xy)

    # Plot individuals
    # Ordinarily would filter for players with a qualifying line combo again
    # But this would eliminate some fourth liners who are lineup constants
    # Instead, make sure anybody with at least as much TOI as anybody on a qualifying line is in
    mintoi = indivtoi[['PlayerID', 'TOI']] \
        .merge(pd.DataFrame({'PlayerID': xy.PlayerID.unique()}), how='inner', on='PlayerID') \
        .TOI.min()
    indivtoi = indivtoi.query('TOI >= {0:d}'.format(int(mintoi)))

    fig = plt.figure(figsize=[8, 6])
    ax = plt.gca()
    for _, name, _, toi, x, y, color, marker in indivtoi.itertuples():
        # Size gets too crazy, so fix it
        ax.scatter([x], [y],
                   marker=marker,
                   s=200,
                   c=color,
                   label=helper.get_lastname(name))

    # Now plot lines
    for name in xy.Name.unique():
        temp = xy.query('Name == "{0:s}"'.format(name)).sort_values(
            'TOI', ascending=False)
        if len(temp) == 0:
            continue
        ax.scatter(temp.X.values,
                   temp.Y.values,
                   marker=temp.Marker.values[0],
                   s=temp.Size.values,
                   c=temp.Color.values)

    ax.set_xlabel('CF60')
    ax.set_ylabel('CA60')
    num_players = len(xy.Name.unique())
    plt.legend(loc='upper center', fontsize=6, ncol=num_players // 3 + 1)
    vhelper.add_good_bad_fast_slow()
    vhelper.add_cfpct_ref_lines_to_plot(ax)

    ax.set_title(', '.join(
        vhelper.generic_5v5_log_graph_title('F line shot rates', **kwargs)))

    return vhelper.savefilehelper(**kwargs)
예제 #4
0
def _rolling_player_f(player, gfcf, **kwargs):
    """
    Creates a graph with CF% or GF% (on plus off). Use gfcf to indicate which one.

    :param player: str or int, player to generate for
    :param gfcf: str. Use 'G' for GF% and GF% Off and 'C' for CF% and CF% Off
    :param kwargs: other filters. See scrapenhl2.plot.visualization_helper.get_and_filter_5v5_log for more information.
        Use x='Date' to index on date instead of game number

    :return: nothing, or figure
    """

    kwargs['player'] = player
    fa = vhelper.get_and_filter_5v5_log(**kwargs)

    df = pd.concat([fa[['Season', 'Game']], _calculate_f_rates(fa, gfcf)], axis=1)
    col_dict = {col[col.index(' ') + 1:]: col for col in df.columns if '%' in col}

    plt.close('all')

    df.loc[:, 'Game Number'] = 1
    df.loc[:, 'Game Number'] = df['Game Number'].cumsum()
    df = df.set_index('Game Number', drop=False)

    if 'x' in kwargs and kwargs['x'] == 'Date':
        df = schedules.attach_game_dates_to_dateframe(df)
        df.loc[:, 'Date'] = pd.to_datetime(df.Date)
        #df.loc[:, 'Date'] = pd.to_datetime(df.Date).dt.strftime('%b/%y')
        df = df.set_index(pd.DatetimeIndex(df['Date']))
        plt.gca().xaxis_date()
        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b\'%y'))
        plt.xlabel('Date')
    else:
        plt.xlabel('Game')
        kwargs['x'] = 'Game Number'

    series = gfcf + 'F%'
    series2 = gfcf + 'F% Off'

    # Avoid the long lines in offseason by setting first value in each season to None
    df.loc[:, 'PrevSeason'] = df.Season.shift(1)
    df.loc[:, 'PrevSeason'] = df.PrevSeason.fillna(df.Season - 1)
    df.loc[df.Season != df.PrevSeason, col_dict[series]] = None
    df.loc[df.Season != df.PrevSeason, col_dict[series2]] = None

    # Add YY-YY for top axis
    df.loc[:, 'TopLabel'] = df.Season.apply(lambda x: '{0:d}-{1:s} -->'.format(x, str(x+1)[2:]))

    plt.plot(df.index, df[col_dict[series]].values, label=series)
    plt.plot(df.index, df[col_dict[series2]].values, label=series2, ls='--')

    plt.legend(loc=1, fontsize=10)

    # Add seasons at top
    ax1 = plt.gca()
    ax2 = ax1.twiny()
    ax2.set_xlim(*ax1.get_xlim())
    temp = df[df.Season != df.PrevSeason][[kwargs['x'], 'TopLabel']]
    ax2.tick_params(length=0, labelsize=8)
    ax2.set_xticks(temp.iloc[:, 0].values)
    ax2.set_xticklabels(temp.iloc[:, 1].values)
    for label in ax2.xaxis.get_majorticklabels():
        label.set_horizontalalignment('left')
    for tick in ax2.xaxis.get_major_ticks():
        tick.set_pad(-10)

    plt.title(_get_rolling_f_title(gfcf, **kwargs))

    # axes

    plt.ylabel(gfcf + 'F%')
    plt.ylim(0.3, 0.7)
    plt.xlim(df.index.min(), df.index.max())
    ticks = list(np.arange(0.3, 0.71, 0.05))
    plt.yticks(ticks, ['{0:.0f}%'.format(100 * tick) for tick in ticks])

    return vhelper.savefilehelper(**kwargs)
예제 #5
0
def parallel_usage_chart(**kwargs):
    """

    :param kwargs: Defaults to take last month of games for all teams.

    :return: nothing, or figure
    """
    if 'startdate' not in kwargs and 'enddate' not in kwargs and \
                    'startseason' not in kwargs and 'endseason' not in kwargs:
        kwargs['last_n_days'] = 30

    qocqot = vhelper.get_and_filter_5v5_log(**kwargs)
    qocqot = qocqot[['PlayerID', 'TOION', 'TOIOFF',
                     'FCompSum', 'FCompN', 'DCompSum', 'DCompN',
                     'FTeamSum', 'FTeamN', 'DTeamSum', 'DTeamN']] \
        .groupby('PlayerID').sum().reset_index()
    qocqot.loc[:, 'FQoC'] = qocqot.FCompSum / qocqot.FCompN
    qocqot.loc[:, 'FQoT'] = qocqot.FTeamSum / qocqot.FTeamN
    qocqot.loc[:, 'DQoC'] = qocqot.DCompSum / qocqot.DCompN
    qocqot.loc[:, 'DQoT'] = qocqot.DTeamSum / qocqot.DTeamN
    qocqot.loc[:, 'TOI60'] = qocqot.TOION / (qocqot.TOION + qocqot.TOIOFF)
    qocqot = qocqot.dropna().sort_values('TOI60', ascending=False)  # In case I have zeroes

    qocqot.loc[:, 'PlayerName'] = qocqot.PlayerID.apply(lambda x: helpers.get_lastname(players.player_as_str(x)))
    qocqot.loc[:, 'PlayerInitials'] = qocqot.PlayerID.apply(lambda x: helpers.get_lastname(players.player_as_str(x)))
    qocqot.loc[:, 'Position'] = qocqot.PlayerID.apply(lambda x: players.get_player_position(x))
    qocqot.drop({'FCompSum', 'FCompN', 'DCompSum', 'DCompN', 'FTeamSum', 'FTeamN', 'DTeamSum', 'DTeamN',
                 'PlayerID'}, axis=1, inplace=True)

    # Reorder columns for the parallel coordinates plot
    qocqot = qocqot[['FQoT', 'FQoC', 'DQoC', 'DQoT', 'TOION', 'TOIOFF', 'TOI60', 'PlayerName', 'PlayerInitials',
                     'Position']] \
        .sort_values('TOION', ascending=False) \
        .drop({'TOION', 'TOION', 'TOIOFF', 'TOI60'}, axis=1)

    fig, axes = plt.subplots(2, 2, sharex=True, sharey=True, figsize=[11, 7])

    forwards = qocqot.query('Position != "D"')
    centers = forwards.query('Position == "C"').drop('Position', axis=1).iloc[:6, :]
    wingers = forwards.query('Position != "C"').drop('Position', axis=1).iloc[:6, :]
    forwards.drop('Position', axis=1, inplace=True)
    vhelper.parallel_coords(forwards, centers, 'PlayerInitials', 'PlayerName', axes.flatten()[0])
    vhelper.parallel_coords(forwards, wingers, 'PlayerInitials', 'PlayerName', axes.flatten()[1])

    alldefense = qocqot.query('Position == "D"').drop('Position', axis=1)
    defense = alldefense.iloc[:6, :]
    vhelper.parallel_coords(alldefense, defense, 'PlayerInitials', 'PlayerName', axes.flatten()[2])

    other_players = pd.concat([qocqot.drop('Position', axis=1), centers, wingers, defense]) \
        .drop_duplicates(keep=False).iloc[:6, :]
    vhelper.parallel_coords(pd.concat([forwards, defense]), other_players, 'PlayerInitials', 'PlayerName', axes.flatten()[3])

    fig.text(0.5, 0.04, 'Statistic (based on TOI/60)', ha='center')
    fig.text(0.04, 0.5, 'Minutes', va='center', rotation='vertical')
    axes.flatten()[0].set_title('Top centers')
    axes.flatten()[1].set_title('Top wingers')
    axes.flatten()[2].set_title('Top defense')
    axes.flatten()[3].set_title('Others')

    fig.suptitle(_parallel_usage_chart_title(**kwargs))

    return vhelper.savefilehelper(**kwargs)
예제 #6
0
def team_dpair_shot_rates_scatter(team, min_pair_toi=50, **kwargs):
    """
    Creates a scatterplot of team defense pair shot attempr rates.

    :param team: int or str, team
    :param min_pair_toi: int, number of minutes for pair to qualify
    :param kwargs: Use season- or date-range-related kwargs only.

    :return:
    """

    kwargs['team'] = team

    startdate, enddate = vhelper.get_startdate_enddate_from_kwargs(**kwargs)
    rates = get_dpair_shot_rates(team, startdate, enddate)
    pairs = drop_duplicate_pairs(rates).query('TOI >= {0:d}'.format(
        60 * min_pair_toi))
    xy = _add_xy_names_for_dpair_graph(pairs)

    fig = plt.figure(figsize=[8, 6])
    ax = plt.gca()

    xy = _get_point_sizes_for_dpair_scatter(xy)
    xy = _get_colors_for_dpair_scatter(xy)

    # First plot players on their own
    for name in xy.Name.unique():
        # Get first two rows, which are this player adjusted a bit. Take average
        temp = xy.query('Name == "{0:s}"'.format(name)).sort_values('TOI', ascending=False) \
            .iloc[:2, :] \
            .groupby(['Name', 'PlayerID', 'Color'], as_index=False).mean()
        if players.get_player_handedness(temp.PlayerID.iloc[0]) == 'L':
            marker = '<'
        else:
            marker = '>'
        ax.scatter(temp.X.values,
                   temp.Y.values,
                   label=name,
                   marker=marker,
                   s=temp.Size.values,
                   c=temp.Color.values)

    # Now plot pairs
    for name in xy.Name.unique():
        temp = xy.query('Name == "{0:s}"'.format(name)).sort_values(
            'TOI', ascending=False).iloc[2:, :]
        if len(temp) == 0:
            continue
        if players.get_player_handedness(temp.PlayerID.iloc[0]) == 'L':
            marker = '<'
        else:
            marker = '>'
        ax.scatter(temp.X.values,
                   temp.Y.values,
                   marker=marker,
                   s=temp.Size.values,
                   c=temp.Color.values)

    ax.set_xlabel('CF60')
    ax.set_ylabel('CA60')
    plt.legend(loc='best', fontsize=10)
    vhelper.add_good_bad_fast_slow()
    vhelper.add_cfpct_ref_lines_to_plot(ax)

    ax.set_title(', '.join(
        vhelper.generic_5v5_log_graph_title('D pair shot rates', **kwargs)))

    return vhelper.savefilehelper(**kwargs)
예제 #7
0
def rolling_player_boxcars(player, **kwargs):
    """
    A method to generate the rolling boxcars graph.

    :param player: str or int, player to generate for
    :param kwargs: other filters. See scrapenhl2.plot.visualization_helper.get_and_filter_5v5_log for more information.

    :return: nothing, or figure
    """

    kwargs['player'] = player
    if 'roll_len' not in kwargs:
        kwargs['roll_len'] = 25
    boxcars = vhelper.get_and_filter_5v5_log(**kwargs)

    boxcars = pd.concat(
        [boxcars[['Season', 'Game']],
         calculate_boxcar_rates(boxcars)], axis=1)

    col_dict = {
        col[col.index(' ') + 1:col.index('/')]: col
        for col in boxcars.columns if col[-3:] == '/60'
    }

    plt.clf()

    # Set an index
    # TODO allow for datetime index
    boxcars.loc[:, 'Game Number'] = 1
    boxcars.loc[:, 'Game Number'] = boxcars['Game Number'].cumsum()
    boxcars.set_index('Game Number', inplace=True)
    plt.fill_between(boxcars.index,
                     0,
                     boxcars[col_dict['iG']],
                     label='G',
                     color='k')
    plt.fill_between(boxcars.index,
                     boxcars[col_dict['iG']],
                     boxcars[col_dict['iP1']],
                     label='A1',
                     color='b')
    plt.fill_between(boxcars.index,
                     boxcars[col_dict['iP1']],
                     boxcars[col_dict['iP']],
                     label='A2',
                     color='dodgerblue')
    plt.fill_between(boxcars.index,
                     boxcars[col_dict['iP']],
                     boxcars[col_dict['GFON']],
                     label='Other\nGFON',
                     color='c',
                     alpha=0.3)

    plt.xlabel('Game')
    plt.ylabel('Per 60')
    plt.xlim(0, len(boxcars))
    plt.ylim(0, 4)

    position = players.get_player_position(player)
    if position == 'D':
        ypos = [0.17, 0.84, 2.5]
        ytext = ['P1\nG', 'P1\nP', 'P1\nGF']
    elif position in {'C', 'R', 'L', 'F'}:
        ypos = [0.85, 1.94, 2.7]
        ytext = ['L1\nG', 'L1\nP', 'L1\nGF']

    xlimits = plt.xlim()
    tempaxis = plt.twinx()
    tempaxis.tick_params(axis='y', which='major', pad=2)
    tempaxis.set_yticks(ypos)
    tempaxis.set_yticklabels(ytext, fontsize=8)
    tempaxis.grid(b=False)
    tempaxis.plot(xlimits, [ypos[0], ypos[0]], color='k', ls=':')
    tempaxis.plot(xlimits, [ypos[1], ypos[1]], color='dodgerblue', ls=':')
    tempaxis.plot(xlimits, [ypos[2], ypos[2]], color='c', ls=':')

    plt.legend(loc=2, bbox_to_anchor=(1.05, 1), fontsize=10)
    tempaxis.set_ylim(0, 4)
    plt.xlim(0, len(boxcars))
    plt.ylim(0, 4)

    plt.title(_get_rolling_boxcars_title(**kwargs))

    return vhelper.savefilehelper(**kwargs)