Beispiel #1
0
def shot_chart(x, y, title="", kind="scatter", color="b", cmap=None,
               xlim=(-250, 250), ylim=(422.5, -47.5),
               court_color="gray", outer_lines=False, court_lw=1,
               flip_court=False, kde_shade=True, hex_gridsize=None,
               ax=None, **kwargs):
    """
    Returns an Axes object with player shots plotted.

    TODO: explain the parameters
    """

    if ax is None:
        ax = plt.gca()

    if cmap is None:
        cmap = sns.light_palette(color, as_cmap=True)

    if not flip_court:
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
    else:
        ax.set_xlim(xlim[::-1])
        ax.set_ylim(ylim[::-1])

    ax.tick_params(labelbottom="off", labelleft="off")
    ax.set_title(title, fontsize=18)

    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    if kind == "scatter":
        ax.scatter(x, y, c=color, **kwargs)

    elif kind == "kde":
        sns.kdeplot(x, y, shade=kde_shade, cmap=cmap,
                    ax=ax, **kwargs)
        ax.set_xlabel('')
        ax.set_ylabel('')

    elif kind == "hex":
        if hex_gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            hex_gridsize = int(np.mean([x_bin, y_bin]))

        ax.hexbin(x, y, gridsize=hex_gridsize, cmap=cmap, **kwargs)

    else:
        raise ValueError("kind must be 'scatter', 'kde', or 'hex'.")

    return ax
Beispiel #2
0
def shot_chart(x, y, title="", kind="scatter", color="b", cmap=None,
               xlim=(-250, 250), ylim=(422.5, -47.5),
               court_color="gray", outer_lines=False, court_lw=1,
               flip_court=False, kde_shade=True, hex_gridsize=None,
               ax=None, **kwargs):
    """
    Returns an Axes object with player shots plotted.

    TODO: explain the parameters
    """

    if ax is None:
        ax = plt.gca()

    if cmap is None:
        cmap = sns.light_palette(color, as_cmap=True)

    if not flip_court:
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
    else:
        ax.set_xlim(xlim[::-1])
        ax.set_ylim(ylim[::-1])

    ax.tick_params(labelbottom="off", labelleft="off")
    ax.set_title(title, fontsize=18)

    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    if kind == "scatter":
        ax.scatter(x, y, c=color, **kwargs)

    elif kind == "kde":
        sns.kdeplot(x, y, shade=kde_shade, cmap=cmap,
                    ax=ax, **kwargs)
        ax.set_xlabel('')
        ax.set_ylabel('')

    elif kind == "hex":
        if hex_gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            hex_gridsize = int(np.mean([x_bin, y_bin]))

        ax.hexbin(x, y, gridsize=hex_gridsize, cmap=cmap, **kwargs)

    else:
        raise ValueError("kind must be 'scatter', 'kde', or 'hex'.")

    return ax
Beispiel #3
0
def shot_chart_jointgrid(x, y, data=None, title="", joint_type="scatter",
                         marginals_type="both", cmap=None, joint_color="b",
                         marginals_color="b", xlim=(-250, 250),
                         ylim=(422.5, -47.5), joint_kde_shade=True,
                         marginals_kde_shade=True, hex_gridsize=None, space=0,
                         size=(12, 11), court_color="gray", outer_lines=False,
                         court_lw=1, flip_court=False, joint_kws=None,
                         marginal_kws=None, **kwargs):
    """
    Returns a JointGrid object containing the shot chart.

    TODO: explain the parameters
    """

    # The joint_kws and marginal_kws idea was taken from seaborn
    # Create the default empty kwargs for joint and marginal plots
    if joint_kws is None:
        joint_kws = {}
    joint_kws.update(kwargs)

    if marginal_kws is None:
        marginal_kws = {}

    # If a colormap is not provided, then it is based off of the joint_color
    if cmap is None:
        cmap = sns.light_palette(joint_color, as_cmap=True)

    # Flip the court so that the hoop is by the bottom of the plot
    if flip_court:
        xlim = xlim[::-1]
        ylim = ylim[::-1]

    # Create the JointGrid to draw the shot chart plots onto
    grid = sns.JointGrid(x=x, y=y, data=data, xlim=xlim, ylim=ylim,
                         space=space)

    # Joint Plot
    # Create the main plot of the joint shot chart
    if joint_type == "scatter":
        grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws)

    elif joint_type == "kde":
        grid = grid.plot_joint(sns.kdeplot, cmap=cmap,
                               shade=joint_kde_shade, **joint_kws)

    elif joint_type == "hex":
        if hex_gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            hex_gridsize = int(np.mean([x_bin, y_bin]))

        grid = grid.plot_joint(plt.hexbin, gridsize=hex_gridsize, cmap=cmap,
                               **joint_kws)

    else:
        raise ValueError("joint_type must be 'scatter', 'kde', or 'hex'.")

    # Marginal plots
    # Create the plots on the axis of the main plot of the joint shot chart.
    if marginals_type == "both":
        grid = grid.plot_marginals(sns.distplot, color=marginals_color,
                                   **marginal_kws)

    elif marginals_type == "hist":
        grid = grid.plot_marginals(sns.distplot, color=marginals_color,
                                   kde=False, **marginal_kws)

    elif marginals_type == "kde":
        grid = grid.plot_marginals(sns.kdeplot, color=marginals_color,
                                   shade=marginals_kde_shade, **marginal_kws)

    else:
        raise ValueError("marginals_type must be 'both', 'hist', or 'kde'.")

    # Set the size of the joint shot chart
    grid.fig.set_size_inches(size)

    # Extract the the first axes, which is the main plot of the
    # joint shot chart, and draw the court onto it
    ax = grid.fig.get_axes()[0]
    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    # Get rid of the axis labels
    grid.set_axis_labels(xlabel="", ylabel="")
    # Get rid of all tick labels
    ax.tick_params(labelbottom="off", labelleft="off")
    # Set the title above the top marginal plot
    ax.set_title(title, y=1.2, fontsize=18)

    return grid
Beispiel #4
0
def shot_chart_jointgrid(x,
                         y,
                         data=None,
                         joint_type="scatter",
                         title="",
                         joint_color="b",
                         cmap=None,
                         xlim=(-250, 250),
                         ylim=(422.5, -47.5),
                         court_color="gray",
                         court_lw=1,
                         outer_lines=False,
                         flip_court=False,
                         joint_kde_shade=True,
                         gridsize=None,
                         marginals_color="b",
                         marginals_type="both",
                         marginals_kde_shade=True,
                         size=(12, 11),
                         space=0,
                         despine=False,
                         joint_kws=None,
                         marginal_kws=None,
                         **kwargs):
    """
    Returns a JointGrid object containing the shot chart.
    This function allows for more flexibility in customizing your shot chart
    than the ``shot_chart_jointplot`` function.
    Parameters
    ----------
    x, y : strings or vector
        The x and y coordinates of the shots taken. They can be passed in as
        vectors (such as a pandas Series) or as columns from the pandas
        DataFrame passed into ``data``.
    data : DataFrame, optional
        DataFrame containing shots where ``x`` and ``y`` represent the shot
        location coordinates.
    joint_type : { "scatter", "kde", "hex" }, optional
        The type of shot chart for the joint plot.
    title : str, optional
        The title for the plot.
    joint_color : matplotlib color, optional
        Color used to plot the shots on the joint plot.
    cmap : matplotlib Colormap object or name, optional
        Colormap for the range of data values. If one isn't provided, the
        colormap is derived from the value passed to ``color``. Used for KDE
        and Hexbin joint plots.
    {x, y}lim : two-tuples, optional
        The axis limits of the plot.  The defaults represent the out of bounds
        lines and half court line.
    court_color : matplotlib color, optional
        The color of the court lines.
    court_lw : float, optional
        The linewidth the of the court lines.
    outer_lines : boolean, optional
        If ``True`` the out of bound lines are drawn in as a matplotlib
        Rectangle.
    flip_court : boolean, optional
        If ``True`` orients the hoop towards the bottom of the plot. Default is
        ``False``, which orients the court where the hoop is towards the top of
        the plot.
    joint_kde_shade : boolean, optional
        Default is ``True``, which shades in the KDE contours on the joint plot.
    gridsize : int, optional
        Number of hexagons in the x-direction. The default is calculated using
        the Freedman-Diaconis method.
    marginals_color : matplotlib color, optional
        Color used to plot the shots on the marginal plots.
    marginals_type : { "both", "hist", "kde"}, optional
        The type of plot for the marginal plots.
    marginals_kde_shade : boolean, optional
        Default is ``True``, which shades in the KDE contours on the marginal
        plots.
    size : tuple, optional
        The width and height of the plot in inches.
    space : numeric, optional
        The space between the joint and marginal plots.
    despine : boolean, optional
        If ``True``, removes the spines.
    {joint, marginal}_kws : dicts
        Additional kewyord arguments for joint and marginal plot components.
    kwargs : key, value pairs
        Keyword arguments for matplotlib Collection properties or seaborn plots.
    Returns
    -------
     grid : JointGrid
        The JointGrid object with the shot chart plotted on it.
    """

    # The joint_kws and marginal_kws idea was taken from seaborn
    # Create the default empty kwargs for joint and marginal plots
    if joint_kws is None:
        joint_kws = {}
    joint_kws.update(kwargs)

    if marginal_kws is None:
        marginal_kws = {}

    # If a colormap is not provided, then it is based off of the joint_color
    if cmap is None:
        cmap = sns.light_palette(joint_color, as_cmap=True)

    # Flip the court so that the hoop is by the bottom of the plot
    if flip_court:
        xlim = xlim[::-1]
        ylim = ylim[::-1]

    # Create the JointGrid to draw the shot chart plots onto
    grid = sns.JointGrid(x=x,
                         y=y,
                         data=data,
                         xlim=xlim,
                         ylim=ylim,
                         space=space)

    # Joint Plot
    # Create the main plot of the joint shot chart
    if joint_type == "scatter":
        grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws)

    elif joint_type == "kde":
        grid = grid.plot_joint(sns.kdeplot,
                               cmap=cmap,
                               shade=joint_kde_shade,
                               **joint_kws)

    elif joint_type == "hex":
        if gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            gridsize = int(np.mean([x_bin, y_bin]))

        grid = grid.plot_joint(plt.hexbin,
                               gridsize=gridsize,
                               cmap=cmap,
                               **joint_kws)

    else:
        raise ValueError("joint_type must be 'scatter', 'kde', or 'hex'.")

    # Marginal plots
    # Create the plots on the axis of the main plot of the joint shot chart.
    if marginals_type == "both":
        grid = grid.plot_marginals(sns.distplot,
                                   color=marginals_color,
                                   **marginal_kws)

    elif marginals_type == "hist":
        grid = grid.plot_marginals(sns.distplot,
                                   color=marginals_color,
                                   kde=False,
                                   **marginal_kws)

    elif marginals_type == "kde":
        grid = grid.plot_marginals(sns.kdeplot,
                                   color=marginals_color,
                                   shade=marginals_kde_shade,
                                   **marginal_kws)

    else:
        raise ValueError("marginals_type must be 'both', 'hist', or 'kde'.")

    # Set the size of the joint shot chart
    grid.fig.set_size_inches(size)

    # Extract the the first axes, which is the main plot of the
    # joint shot chart, and draw the court onto it
    ax = grid.fig.get_axes()[0]
    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    # Get rid of the axis labels
    grid.set_axis_labels(xlabel="", ylabel="")
    # Get rid of all tick labels
    ax.tick_params(labelbottom="off", labelleft="off")
    # Set the title above the top marginal plot
    ax.set_title(title, y=1.2, fontsize=18)

    # Set the spines to match the rest of court lines, makes outer_lines
    # somewhate unnecessary
    for spine in ax.spines:
        ax.spines[spine].set_lw(court_lw)
        ax.spines[spine].set_color(court_color)
        # set the marginal spines to be the same as the rest of the spines
        grid.ax_marg_x.spines[spine].set_lw(court_lw)
        grid.ax_marg_x.spines[spine].set_color(court_color)
        grid.ax_marg_y.spines[spine].set_lw(court_lw)
        grid.ax_marg_y.spines[spine].set_color(court_color)

    if despine:
        ax.spines["top"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["left"].set_visible(False)

    return grid
Beispiel #5
0
def shot_chart(x,
               y,
               kind="scatter",
               title="",
               color="b",
               cmap=None,
               xlim=(-250, 250),
               ylim=(422.5, -47.5),
               court_color="gray",
               court_lw=1,
               outer_lines=False,
               flip_court=False,
               kde_shade=True,
               gridsize=None,
               ax=None,
               despine=False,
               **kwargs):
    """
    Returns an Axes object with player shots plotted.
    Parameters
    ----------
    x, y : strings or vector
        The x and y coordinates of the shots taken. They can be passed in as
        vectors (such as a pandas Series) or as columns from the pandas
        DataFrame passed into ``data``.
    data : DataFrame, optional
        DataFrame containing shots where ``x`` and ``y`` represent the
        shot location coordinates.
    kind : { "scatter", "kde", "hex" }, optional
        The kind of shot chart to create.
    title : str, optional
        The title for the plot.
    color : matplotlib color, optional
        Color used to plot the shots
    cmap : matplotlib Colormap object or name, optional
        Colormap for the range of data values. If one isn't provided, the
        colormap is derived from the valuue passed to ``color``. Used for KDE
        and Hexbin plots.
    {x, y}lim : two-tuples, optional
        The axis limits of the plot.
    court_color : matplotlib color, optional
        The color of the court lines.
    court_lw : float, optional
        The linewidth the of the court lines.
    outer_lines : boolean, optional
        If ``True`` the out of bound lines are drawn in as a matplotlib
        Rectangle.
    flip_court : boolean, optional
        If ``True`` orients the hoop towards the bottom of the plot.  Default
        is ``False``, which orients the court where the hoop is towards the top
        of the plot.
    kde_shade : boolean, optional
        Default is ``True``, which shades in the KDE contours.
    gridsize : int, optional
        Number of hexagons in the x-direction.  The default is calculated using
        the Freedman-Diaconis method.
    ax : Axes, optional
        The Axes object to plot the court onto.
    despine : boolean, optional
        If ``True``, removes the spines.
    kwargs : key, value pairs
        Keyword arguments for matplotlib Collection properties or seaborn plots.
    Returns
    -------
     ax : Axes
        The Axes object with the shot chart plotted on it.
    """

    if ax is None:
        ax = plt.gca()

    if cmap is None:
        cmap = sns.light_palette(color, as_cmap=True)

    if not flip_court:
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
    else:
        ax.set_xlim(xlim[::-1])
        ax.set_ylim(ylim[::-1])

    ax.tick_params(labelbottom="off", labelleft="off")
    ax.set_title(title, fontsize=18)

    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    if kind == "scatter":
        ax.scatter(x, y, c=color, **kwargs)

    elif kind == "kde":
        sns.kdeplot(x, y, shade=kde_shade, cmap=cmap, ax=ax, **kwargs)
        ax.set_xlabel('')
        ax.set_ylabel('')

    elif kind == "hex":
        if gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            gridsize = int(np.mean([x_bin, y_bin]))

        ax.hexbin(x, y, gridsize=gridsize, cmap=cmap, **kwargs)

    else:
        raise ValueError("kind must be 'scatter', 'kde', or 'hex'.")

    # Set the spines to match the rest of court lines, makes outer_lines
    # somewhate unnecessary
    for spine in ax.spines:
        ax.spines[spine].set_lw(court_lw)
        ax.spines[spine].set_color(court_color)

    if despine:
        ax.spines["top"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["left"].set_visible(False)

    return ax
def shot_chart_jointgrid(x,
                         y,
                         data=None,
                         title="",
                         joint_type="scatter",
                         marginals_type="both",
                         cmap=None,
                         joint_color="b",
                         marginals_color="b",
                         xlim=(-250, 250),
                         ylim=(422.5, -47.5),
                         joint_kde_shade=True,
                         marginals_kde_shade=True,
                         hex_gridsize=None,
                         space=0,
                         size=(12, 11),
                         court_color="gray",
                         outer_lines=False,
                         court_lw=1,
                         flip_court=False,
                         joint_kws=None,
                         marginal_kws=None,
                         **kwargs):
    """
    Returns a JointGrid object containing the shot chart.

    TODO: explain the parameters
    """

    # The joint_kws and marginal_kws idea was taken from seaborn
    # Create the default empty kwargs for joint and marginal plots
    if joint_kws is None:
        joint_kws = {}
    joint_kws.update(kwargs)
    if marginal_kws is None:
        marginal_kws = {}

    # If a colormap is not provided, then it is based off of the joint_color
    if cmap is None:
        cmap = sns.light_palette(joint_color, as_cmap=True)

    # Flip the court so that the hoop is by the bottom of the plot
    if flip_court:
        xlim = xlim[::-1]
        ylim = ylim[::-1]

    # Create the JointGrid to draw the shot chart plots onto
    grid = sns.JointGrid(x=x,
                         y=y,
                         data=data,
                         xlim=xlim,
                         ylim=ylim,
                         space=space)

    # Joint Plot
    # Create the main plot of the joint shot chart
    if joint_type == "scatter":
        grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws)

    elif joint_type == "kde":
        grid = grid.plot_joint(sns.kdeplot,
                               cmap=cmap,
                               shade=joint_kde_shade,
                               **joint_kws)

    elif joint_type == "hex":
        if hex_gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            hex_gridsize = int(np.mean([x_bin, y_bin]))

        grid = grid.plot_joint(plt.hexbin,
                               gridsize=hex_gridsize,
                               cmap=cmap,
                               **joint_kws)

    else:
        raise ValueError("joint_type must be 'scatter', 'kde', or 'hex'.")

    # Marginal plots
    # Create the plots on the axis of the main plot of the joint shot chart.
    if marginals_type == "both":
        grid = grid.plot_marginals(sns.distplot,
                                   color=marginals_color,
                                   **marginal_kws)

    elif marginals_type == "hist":
        grid = grid.plot_marginals(sns.distplot,
                                   color=marginals_color,
                                   kde=False,
                                   **marginal_kws)

    elif marginals_type == "kde":
        grid = grid.plot_marginals(sns.kdeplot,
                                   color=marginals_color,
                                   shade=marginals_kde_shade,
                                   **marginal_kws)

    else:
        raise ValueError("marginals_type must be 'both', 'hist', or 'kde'.")

    # Set the size of the joint shot chart
    grid.fig.set_size_inches(size)

    # Extract the the first axes, which is the main plot of the
    # joint shot chart, and draw the court onto it
    ax = grid.fig.get_axes()[0]
    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    # Get rid of the axis labels
    grid.set_axis_labels(xlabel="", ylabel="")
    # Get rid of all tick labels
    ax.tick_params(labelbottom="off", labelleft="off")
    # Set the title above the top marginal plot
    ax.set_title(title, y=1.2, fontsize=18)

    return grid
Beispiel #7
0
def data_describe(hr, hr_flag):
    # feature_type_unique = hr.dtypes.unique()
    ret_dict = {}

    # if np.dtype('int') in feature_type_unique or np.dtype('float') in feature_type_unique:
    describe_numeric = describe(hr, include='number')

    describe_numeric.loc['featureVar', :] = describe_numeric.loc['std', :]**2

    top3_list = [
        hr[col].value_counts(normalize=True).to_dict()
        for col in describe_numeric.columns.values
    ]
    top3_rate_list = []
    for tl in top3_list:
        temp_list = []
        value_list = list(tl.keys())
        if len(value_list) == 1:
            temp_list.append({'key': value_list[0], 'value': 1.00})
        elif len(value_list) == 2:
            temp_list.append({
                'key': value_list[0],
                'value': tl[value_list[0]]
            })
            temp_list.append({
                'key': value_list[1],
                'value': tl[value_list[1]]
            })
        else:
            for value in value_list[:2]:
                temp_list.append({'key': value, 'value': tl[value]})
            temp_list.append({
                'key':
                '其他',
                'value':
                1 - sum([tl[value] for value in value_list[:2]])
            })
        top3_rate_list.append(temp_list)
    top3_numeric = pd.DataFrame(dict(
        zip(describe_numeric.columns.values,
            [[trl] for trl in top3_rate_list])),
                                index=['top3'])

    range_numeric = pd.DataFrame(dict(
        zip(describe_numeric.columns.values, [
            '[' + str(mi) + ', ' + str(ma) + ']' for mi, ma in list(
                zip(describe_numeric.loc['min', :].tolist(),
                    describe_numeric.loc['max', :].tolist()))
        ])),
                                 index=['featureRange'])

    value_counts_numeric = pd.DataFrame(dict(
        zip(describe_numeric.columns.values,
            [[hr[col].value_counts().to_dict()]
             for col in describe_numeric.columns.values])),
                                        index=['featureValueCounts'])

    name_numeric = pd.DataFrame(dict(
        zip(describe_numeric.columns.values,
            describe_numeric.columns.values.tolist())),
                                index=['featureName'])

    describe_numeric = describe_numeric.append(name_numeric)
    describe_numeric = describe_numeric.append(top3_numeric)
    describe_numeric = describe_numeric.append(range_numeric)
    describe_numeric = describe_numeric.append(value_counts_numeric)

    describe_numeric.rename(
        {
            'count': "featureCount",
            'mean': "featureMean",
            'std': "featureStd",
            'min': "featureMin",
            '25%': "featurePer25",
            '50%': "featurePer50",
            '75%': "featurePer75",
            'max': "featureMax",
        },
        inplace=True)

    distribution_list = []
    for feature in describe_numeric.columns.values:

        if isinstance(hr[feature], list):
            hr[feature] = np.asarray(hr[feature])
        hr[feature] = hr[feature].astype(np.float64)
        x, y = univariate_kdeplot(hr[feature])
        kde_list = list(zip(x.tolist(), y.tolist()))

        bins = min(_freedman_diaconis_bins(hr[feature]), 50)
        m, bins = np.histogram(hr[feature], bins=bins, density=True)
        m, bins = m.tolist(), bins.tolist()
        devided_number = (bins[1] - bins[0]) * len(hr[feature])

        temp_list = []
        distribution_dict = dict()

        feature_value_counts_dict = hr[feature].value_counts().to_dict()
        feature_value_list = list(feature_value_counts_dict.keys())

        positive_list, negative_list = [], []

        positive_dict = collections.OrderedDict()
        negative_dict = collections.OrderedDict()
        for bi in bins[:-1]:
            positive_dict[bi] = 0
        for bi in bins[:-1]:
            negative_dict[bi] = 0

        for feature_value in feature_value_list:
            pos_neg_value_counts = hr[hr[feature] == feature_value][
                hr_flag].value_counts().to_dict()
            value_bin = find_bin(bins, feature_value)
            if 0 in pos_neg_value_counts.keys():
                negative_dict[value_bin] += pos_neg_value_counts[0]
            if 1 in pos_neg_value_counts.keys():
                positive_dict[value_bin] += pos_neg_value_counts[1]

        for k in positive_dict.keys():
            positive_dict[k] /= devided_number
        for k in negative_dict.keys():
            negative_dict[k] /= devided_number

        for k, v in positive_dict.items():
            positive_list.append((k, v))
        for k, v in negative_dict.items():
            negative_list.append((k, v))

        distribution_dict['feature_name'] = feature
        distribution_dict['feature_details'] = {
            'positive': positive_list,
            'negative': negative_list,
            'kde': kde_list
        }
        temp_list.append(distribution_dict)
        distribution_list.append(temp_list)

    numeric_distributions = pd.DataFrame(dict(
        zip(describe_numeric.columns.values, distribution_list)),
                                         index=['featureFreqs'])

    describe_numeric = describe_numeric.append(numeric_distributions)
    ret_dict['describe_numeric'] = describe_numeric

    # if np.dtype('O') in feature_type_unique:
    describe_category = describe(hr, include='object')

    top3_list = [
        hr[col].value_counts(normalize=True).to_dict()
        for col in describe_category.columns.values
    ]
    top3_rate_list = []
    for tl in top3_list:
        temp_list = []
        value_list = list(tl.keys())
        if len(value_list) == 1:
            temp_list.append({'key': value_list[0], 'value': 1.00})
        elif len(value_list) == 2:
            temp_list.append({
                'key': value_list[0],
                'value': tl[value_list[0]]
            })
            temp_list.append({
                'key': value_list[1],
                'value': tl[value_list[1]]
            })
        else:
            for value in value_list[:2]:
                temp_list.append({'key': value, 'value': tl[value]})
            temp_list.append({
                'key':
                '其他',
                'value':
                1 - sum([tl[value] for value in value_list[:2]])
            })
        top3_rate_list.append(temp_list)
    top3_category = pd.DataFrame(dict(
        zip(describe_category.columns.values,
            [[trl] for trl in top3_rate_list])),
                                 index=['top3'])

    col_values = [
        list(hr[col].value_counts().index)
        for col in describe_category.columns.values
    ]
    col_values_modified = []
    for col_value in col_values:
        col_values_modified.append(map(str, col_value))
    col_values_modified = [', '.join(cvm) for cvm in col_values_modified]

    range_category = pd.DataFrame(dict(
        zip(describe_category.columns.values, col_values_modified)),
                                  index=['featureRange'])

    value_counts_category = pd.DataFrame(dict(
        zip(describe_category.columns.values,
            [[hr[col].value_counts().to_dict()]
             for col in describe_category.columns.values])),
                                         index=['featureValueCounts'])

    name_category = pd.DataFrame(dict(
        zip(describe_category.columns.values,
            describe_category.columns.values.tolist())),
                                 index=['featureName'])

    describe_category = describe_category.append(name_category)
    describe_category = describe_category.append(top3_category)
    describe_category = describe_category.append(range_category)
    describe_category = describe_category.append(value_counts_category)

    describe_category.rename(
        {
            'count': "featureCount",
            'unique': "featureUnique",
            'top': "featureTop",
            'freq': "featureFreq"
        },
        inplace=True)

    distribution_list = []
    for feature in describe_category.columns.values:
        temp_list = []
        distribution_dict = dict()
        feature_value_counts_dict = hr[feature].value_counts().to_dict()
        feature_value_list = list(feature_value_counts_dict.keys())
        positive_list, negative_list = [], []
        for feature_value in feature_value_list:
            pos_neg_value_counts = hr[hr[feature] == feature_value][
                hr_flag].value_counts().to_dict()
            if 0 in pos_neg_value_counts.keys():
                negative_list.append((feature_value, pos_neg_value_counts[0]))
            if 1 in pos_neg_value_counts.keys():
                positive_list.append((feature_value, pos_neg_value_counts[1]))
        distribution_dict['feature_name'] = feature
        distribution_dict['feature_details'] = {
            'positive': positive_list,
            'negative': negative_list
        }
        temp_list.append(distribution_dict)
        distribution_list.append(temp_list)

    category_distributions = pd.DataFrame(dict(
        zip(describe_category.columns.values, distribution_list)),
                                          index=['featureFreqs'])

    describe_category = describe_category.append(category_distributions)
    ret_dict['describe_category'] = describe_category

    return ret_dict
            'mult':
            est.vmnu[np.arange(est.N),
                     est.qun.argmax(axis=1), :].argmax(axis=1) + 1
        })
        mut_table = mut_table.assign(vaf=mut_table.var_counts /
                                     mut_table.depth)
        mut_table = mut_table.assign(vaf_cn=mut_table.vaf *
                                     mut_table['total_cn'] / mut_table['mult'])
        mut_table = mut_table.assign(
            vaf_purity=mut_table.apply(lambda x: x['vaf'] / est.p * (
                (1 - est.p) * 2 + est.p * x['total_cn']) / x['mult'],
                                       axis=1))
        mut_table = mut_table.assign(trinucleotide=pd.Categorical(
            mut_table.trinucleotide, ordered=True, categories=range(96)))

        nb_bins = min(_freedman_diaconis_bins(mut_table.vaf_purity) * 2, 50)
        final_bins = np.linspace(min(mut_table.vaf_purity),
                                 max(mut_table.vaf_purity), nb_bins)
        # fig, ax = plt.subplots(1, figsize=(8, 28), sharex=False, gridspec_kw={'hspace': 0.08, 'wspace': 0, 'height_ratios': [1, 6, 1]})

        clone_cols = sns.husl_palette(mut_table.clone.nunique(), l=0.8, s=.7)
        est_sigs = [
            s for s in selected_sigs if s in mut_table.signature.unique()
        ]
        mylist = [color_dict[s] for s in est_sigs]
        my_palette = sns.color_palette(mylist)
        #cols = sns.color_palette("Set2", len(est_sigs))
        cols = sns.color_palette(my_palette, len(est_sigs))

        clone_cols = sns.husl_palette(mut_table.clone.nunique(), l=0.8, s=0.7)
        fig = plt.figure(figsize=(23, 10), dpi=80)
Beispiel #9
0
def shot_chart_jointgrid(x, y, data=None, joint_type="scatter", title="",
                         joint_color="b", cmap=None,  xlim=(-250, 250),
                         ylim=(422.5, -47.5), court_color="gray", court_lw=1,
                         outer_lines=False, flip_court=False,
                         joint_kde_shade=True, gridsize=None,
                         marginals_color="b", marginals_type="both",
                         marginals_kde_shade=True, size=(12, 11), space=0,
                         despine=False, joint_kws=None, marginal_kws=None,
                         **kwargs):

    """
    Returns a JointGrid object containing the shot chart.

    This function allows for more flexibility in customizing your shot chart
    than the ``shot_chart_jointplot`` function.

    Parameters
    ----------

    x, y : strings or vector
        The x and y coordinates of the shots taken. They can be passed in as
        vectors (such as a pandas Series) or as columns from the pandas
        DataFrame passed into ``data``.
    data : DataFrame, optional
        DataFrame containing shots where ``x`` and ``y`` represent the shot
        location coordinates.
    joint_type : { "scatter", "kde", "hex" }, optional
        The type of shot chart for the joint plot.
    title : str, optional
        The title for the plot.
    joint_color : matplotlib color, optional
        Color used to plot the shots on the joint plot.
    cmap : matplotlib Colormap object or name, optional
        Colormap for the range of data values. If one isn't provided, the
        colormap is derived from the value passed to ``color``. Used for KDE
        and Hexbin joint plots.
    {x, y}lim : two-tuples, optional
        The axis limits of the plot.  The defaults represent the out of bounds
        lines and half court line.
    court_color : matplotlib color, optional
        The color of the court lines.
    court_lw : float, optional
        The linewidth the of the court lines.
    outer_lines : boolean, optional
        If ``True`` the out of bound lines are drawn in as a matplotlib
        Rectangle.
    flip_court : boolean, optional
        If ``True`` orients the hoop towards the bottom of the plot. Default is
        ``False``, which orients the court where the hoop is towards the top of
        the plot.
    joint_kde_shade : boolean, optional
        Default is ``True``, which shades in the KDE contours on the joint plot.
    gridsize : int, optional
        Number of hexagons in the x-direction. The default is calculated using
        the Freedman-Diaconis method.
    marginals_color : matplotlib color, optional
        Color used to plot the shots on the marginal plots.
    marginals_type : { "both", "hist", "kde"}, optional
        The type of plot for the marginal plots.
    marginals_kde_shade : boolean, optional
        Default is ``True``, which shades in the KDE contours on the marginal
        plots.
    size : tuple, optional
        The width and height of the plot in inches.
    space : numeric, optional
        The space between the joint and marginal plots.
    despine : boolean, optional
        If ``True``, removes the spines.
    {joint, marginal}_kws : dicts
        Additional kewyord arguments for joint and marginal plot components.
    kwargs : key, value pairs
        Keyword arguments for matplotlib Collection properties or seaborn plots.

    Returns
    -------
     grid : JointGrid
        The JointGrid object with the shot chart plotted on it.

    """

    # The joint_kws and marginal_kws idea was taken from seaborn
    # Create the default empty kwargs for joint and marginal plots
    if joint_kws is None:
        joint_kws = {}
    joint_kws.update(kwargs)

    if marginal_kws is None:
        marginal_kws = {}

    # If a colormap is not provided, then it is based off of the joint_color
    if cmap is None:
        cmap = sns.light_palette(joint_color, as_cmap=True)

    # Flip the court so that the hoop is by the bottom of the plot
    if flip_court:
        xlim = xlim[::-1]
        ylim = ylim[::-1]

    # Create the JointGrid to draw the shot chart plots onto
    grid = sns.JointGrid(x=x, y=y, data=data, xlim=xlim, ylim=ylim,
                         space=space)

    # Joint Plot
    # Create the main plot of the joint shot chart
    if joint_type == "scatter":
        grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws)

    elif joint_type == "kde":
        grid = grid.plot_joint(sns.kdeplot, cmap=cmap,
                               shade=joint_kde_shade, **joint_kws)

    elif joint_type == "hex":
        if gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            gridsize = int(np.mean([x_bin, y_bin]))

        grid = grid.plot_joint(plt.hexbin, gridsize=gridsize, cmap=cmap,
                               **joint_kws)

    else:
        raise ValueError("joint_type must be 'scatter', 'kde', or 'hex'.")

    # Marginal plots
    # Create the plots on the axis of the main plot of the joint shot chart.
    if marginals_type == "both":
        grid = grid.plot_marginals(sns.distplot, color=marginals_color,
                                   **marginal_kws)

    elif marginals_type == "hist":
        grid = grid.plot_marginals(sns.distplot, color=marginals_color,
                                   kde=False, **marginal_kws)

    elif marginals_type == "kde":
        grid = grid.plot_marginals(sns.kdeplot, color=marginals_color,
                                   shade=marginals_kde_shade, **marginal_kws)

    else:
        raise ValueError("marginals_type must be 'both', 'hist', or 'kde'.")

    # Set the size of the joint shot chart
    grid.fig.set_size_inches(size)

    # Extract the the first axes, which is the main plot of the
    # joint shot chart, and draw the court onto it
    ax = grid.fig.get_axes()[0]
    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    # Get rid of the axis labels
    grid.set_axis_labels(xlabel="", ylabel="")
    # Get rid of all tick labels
    ax.tick_params(labelbottom="off", labelleft="off")
    # Set the title above the top marginal plot
    ax.set_title(title, y=1.2, fontsize=18)

    # Set the spines to match the rest of court lines, makes outer_lines
    # somewhate unnecessary
    for spine in ax.spines:
        ax.spines[spine].set_lw(court_lw)
        ax.spines[spine].set_color(court_color)
        # set the marginal spines to be the same as the rest of the spines
        grid.ax_marg_x.spines[spine].set_lw(court_lw)
        grid.ax_marg_x.spines[spine].set_color(court_color)
        grid.ax_marg_y.spines[spine].set_lw(court_lw)
        grid.ax_marg_y.spines[spine].set_color(court_color)

    if despine:
        ax.spines["top"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["left"].set_visible(False)

    return grid
Beispiel #10
0
def shot_chart(x, y, kind="scatter", title="", color="b", cmap=None,
               xlim=(-250, 250), ylim=(422.5, -47.5),
               court_color="gray", court_lw=1, outer_lines=False,
               flip_court=False, kde_shade=True, gridsize=None, ax=None,
               despine=False, **kwargs):
    """
    Returns an Axes object with player shots plotted.

    Parameters
    ----------

    x, y : strings or vector
        The x and y coordinates of the shots taken. They can be passed in as
        vectors (such as a pandas Series) or as columns from the pandas
        DataFrame passed into ``data``.
    data : DataFrame, optional
        DataFrame containing shots where ``x`` and ``y`` represent the
        shot location coordinates.
    kind : { "scatter", "kde", "hex" }, optional
        The kind of shot chart to create.
    title : str, optional
        The title for the plot.
    color : matplotlib color, optional
        Color used to plot the shots
    cmap : matplotlib Colormap object or name, optional
        Colormap for the range of data values. If one isn't provided, the
        colormap is derived from the valuue passed to ``color``. Used for KDE
        and Hexbin plots.
    {x, y}lim : two-tuples, optional
        The axis limits of the plot.
    court_color : matplotlib color, optional
        The color of the court lines.
    court_lw : float, optional
        The linewidth the of the court lines.
    outer_lines : boolean, optional
        If ``True`` the out of bound lines are drawn in as a matplotlib
        Rectangle.
    flip_court : boolean, optional
        If ``True`` orients the hoop towards the bottom of the plot.  Default
        is ``False``, which orients the court where the hoop is towards the top
        of the plot.
    kde_shade : boolean, optional
        Default is ``True``, which shades in the KDE contours.
    gridsize : int, optional
        Number of hexagons in the x-direction.  The default is calculated using
        the Freedman-Diaconis method.
    ax : Axes, optional
        The Axes object to plot the court onto.
    despine : boolean, optional
        If ``True``, removes the spines.
    kwargs : key, value pairs
        Keyword arguments for matplotlib Collection properties or seaborn plots.

    Returns
    -------
     ax : Axes
        The Axes object with the shot chart plotted on it.

    """

    if ax is None:
        ax = plt.gca()

    if cmap is None:
        cmap = sns.light_palette(color, as_cmap=True)

    if not flip_court:
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
    else:
        ax.set_xlim(xlim[::-1])
        ax.set_ylim(ylim[::-1])

    ax.tick_params(labelbottom="off", labelleft="off")
    ax.set_title(title, fontsize=18)

    draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines)

    if kind == "scatter":
        ax.scatter(x, y, c=color, **kwargs)

    elif kind == "kde":
        sns.kdeplot(x, y, shade=kde_shade, cmap=cmap, ax=ax, **kwargs)
        ax.set_xlabel('')
        ax.set_ylabel('')

    elif kind == "hex":
        if gridsize is None:
            # Get the number of bins for hexbin using Freedman-Diaconis rule
            # This is idea was taken from seaborn, which got the calculation
            # from http://stats.stackexchange.com/questions/798/
            from seaborn.distributions import _freedman_diaconis_bins
            x_bin = _freedman_diaconis_bins(x)
            y_bin = _freedman_diaconis_bins(y)
            gridsize = int(np.mean([x_bin, y_bin]))

        ax.hexbin(x, y, gridsize=gridsize, cmap=cmap, **kwargs)

    else:
        raise ValueError("kind must be 'scatter', 'kde', or 'hex'.")

    # Set the spines to match the rest of court lines, makes outer_lines
    # somewhate unnecessary
    for spine in ax.spines:
        ax.spines[spine].set_lw(court_lw)
        ax.spines[spine].set_color(court_color)

    if despine:
        ax.spines["top"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["left"].set_visible(False)

    return ax