Example #1
0
def plot_activity_matrix(df,
                         cmap,
                         normalized=False,
                         annotate=True,
                         out_path='',
                         title=''):
    """
    Plot activity matrix showing area of land transitioning between land-use types
    :param df:
    :param cmap:
    :param normalized:
    :param annotate:
    :param out_path:
    :param title:
    :return:
    """
    logger.info('Plot activity matrix')
    sns.set(font_scale=0.8)

    formatter = tkr.ScalarFormatter(useMathText=True)
    # normalized scale is from 0 - 100, does not need scientific scale
    if not normalized:
        formatter.set_scientific(True)
        formatter.set_powerlimits((-2, 2))

    df = df * 100.0 if normalized else df * 1.0
    vmin = math.ceil(np.nanmin(df))
    vmax = math.ceil(np.nanmax(df))  # maximum value on colorbar
    ax = sns.heatmap(df,
                     cbar_kws={'format': formatter},
                     cmap=cmap,
                     linewidths=.5,
                     linecolor='lightgray',
                     annot=annotate,
                     fmt='.2g',
                     annot_kws={'size': 6},
                     vmin=vmin,
                     vmax=vmax)
    # for annotation of heat map cells, use: annot=True, fmt='g', annot_kws={'size': 6}
    # ax.invert_yaxis()
    ax.set_ylabel('FROM')
    ax.set_xlabel('TO')

    ax.set_title(title)
    locs, labels = plt.xticks()
    plt.setp(labels, rotation=0)
    locs, labels = plt.yticks()
    plt.setp(labels, rotation=0)

    plt.savefig(out_path, dpi=constants.DPI)
    plt.close()

    # revert matplotlib params
    sns.reset_orig()
    set_matplotlib_params()
    get_colors(palette='tableau')
Example #2
0
def plot_activity_matrix(df, cmap, normalized=False, annotate=True, out_path='', title=''):
    """
    Plot activity matrix showing area of land transitioning between land-use types
    :param df:
    :param cmap:
    :param normalized:
    :param annotate:
    :param out_path:
    :param title:
    :return:
    """
    logger.info('Plot activity matrix')
    sns.set(font_scale=0.8)

    formatter = tkr.ScalarFormatter(useMathText=True)
    # normalized scale is from 0 - 100, does not need scientific scale
    if not normalized:
        formatter.set_scientific(True)
        formatter.set_powerlimits((-2, 2))

    df = df * 100.0 if normalized else df * 1.0
    vmin = math.ceil(np.nanmin(df))
    vmax = math.ceil(np.nanmax(df))  # maximum value on colorbar
    ax = sns.heatmap(df, cbar_kws={'format': formatter}, cmap=cmap,
                     linewidths=.5, linecolor='lightgray', annot=annotate, fmt='.2g', annot_kws={'size': 6}, vmin=vmin,
                     vmax=vmax)
    # for annotation of heat map cells, use: annot=True, fmt='g', annot_kws={'size': 6}
    # ax.invert_yaxis()
    ax.set_ylabel('FROM')
    ax.set_xlabel('TO')

    ax.set_title(title)
    locs, labels = plt.xticks()
    plt.setp(labels, rotation=0)
    locs, labels = plt.yticks()
    plt.setp(labels, rotation=0)

    plt.savefig(out_path, dpi=constants.DPI)
    plt.close()

    # revert matplotlib params
    sns.reset_orig()
    set_matplotlib_params()
    get_colors(palette='tableau')
Example #3
0
def plot_qq(clf, X, y, figsize=(7, 7)):
    """Generate a Q-Q plot (a.k.a. normal quantile plot).

    Parameters
    ----------
    clf : sklearn.linear_model
        A scikit-learn linear model classifier with a `predict()` method.
    X : numpy.ndarray
        Training data used to fit the classifier.
    y : numpy.ndarray
        Target training values, of shape = [n_samples].
    figsize : tuple
        A tuple indicating the size of the plot to be created, with format
        (x-axis, y-axis). Defaults to (7, 7).

    Returns
    -------
    matplotlib.figure.Figure
        The Figure instance.
    """
    # Ensure we only plot residuals using classifiers we have tested
    assert isinstance(clf, _utils.supported_linear_models), (
        "Classifiers of type {0} not currently supported.".format(type(clf)))
    residuals = stats.residuals(clf, X, y, r_type='raw')
    prob_plot = sm.ProbPlot(residuals, scipy.stats.t, fit=True)
    # Set plot style
    sns.set_style("darkgrid")
    sns.set(font_scale=1.2)
    # Generate plot
    try:
        # Q-Q plot doesn't respond to figure size, so prep a figure first
        fig, ax = plt.subplots(figsize=figsize)
        prob_plot.qqplot(line='45', ax=ax)
        plt.title("Normal Quantile Plot")
        plt.xlabel("Theoretical Standardized Residuals")
        plt.ylabel("Actual Standardized Residuals")
        plt.show()
    except:
        raise  # Re-raise the exception
    finally:
        sns.reset_orig()
    return fig
Example #4
0
def plots():
    ''' Plots results from csv table into 4 figures, 1x2 subplot each'''

    sns.reset_orig()

    data1 = mlines.Line2D([], [],
                          color='grey',
                          marker='*',
                          label="Field",
                          linestyle='',
                          markersize=12)
    data2 = mlines.Line2D([], [],
                          color='k',
                          marker='*',
                          label="Standard",
                          linestyle='',
                          markersize=12)
    data3 = mlines.Line2D(
        [], [],
        color='#0000b3',
        marker='o',
        label="Extremely Blue $(\Delta J-K_{s})\geq 2 \sigma$",
        linestyle='',
        markersize=8)
    data4 = mlines.Line2D([], [],
                          color='#0080ff',
                          marker='o',
                          label="Bluer than avg $(\Delta J-K_{s})< 2 \sigma$",
                          linestyle='',
                          markersize=8)
    data5 = mlines.Line2D(
        [], [],
        color='#b30000',
        marker='o',
        label="Extremely Red $(\Delta J-K_{s})\geq 2 \sigma$",
        linestyle='',
        markersize=8)
    data6 = mlines.Line2D([], [],
                          color='#ff5600',
                          marker='o',
                          label="Redder than avg $(\Delta J-K_{s})< 2 \sigma$",
                          linestyle='',
                          markersize=8)
    data7 = mlines.Line2D([], [],
                          color='white',
                          marker='^',
                          label="Young or Subdwarf",
                          linestyle='',
                          markersize=8)

    #LMIN/LMAX vs J-K
    plt.figure(figsize=(13, 9))
    for n in range(len(names)):

        plt.subplots_adjust(hspace=0.001)
        ax1 = plt.subplot(211)
        plt.errorbar(JK_dev[n],
                     lmin[n],
                     xerr=JK_dev_unc[n],
                     yerr=lmin_unc[n],
                     fmt='none',
                     alpha=0.5,
                     linestyle='None',
                     ecolor='k',
                     elinewidth=2)
        plt.scatter(JK_dev[n],
                    lmin[n],
                    alpha=0.9,
                    s=marker_size[n],
                    c=color_value[n],
                    marker=marker_value[n])
        plt.xlabel("$J-K-(J-K_{s})_{avg}$")
        plt.ylabel("Local Mininimum ($\lambda$)")
        plt.legend((data1, data2, data3, data4, data5, data6),
                   ("Field", "Standard",
                    "Extremely Blue $(\Delta J-K_{s})\geq 2 \sigma$",
                    "Bluer than avg $(\Delta J-K_{s})< 2 \sigma$",
                    "Extremely Red $(\Delta J-K_{s})\geq 2 \sigma$",
                    "Redder than avg $(\Delta J-K_{s})< 2 \sigma$"),
                   fontsize=11,
                   loc=3,
                   numpoints=1)  #bbox_to_anchor=(.9, 1.25)
        #bbox_to_anchor=(.9, 1.25)

        ax2 = plt.subplot(212, sharex=ax1)
        plt.errorbar(JK_dev[n],
                     lmax[n],
                     xerr=JK_dev_unc[n],
                     yerr=lmax_unc[n],
                     fmt='none',
                     alpha=0.5,
                     linestyle='None',
                     ecolor='k',
                     elinewidth=2)
        plt.scatter(JK_dev[n],
                    lmax[n],
                    alpha=0.9,
                    s=marker_size[n],
                    c=color_value[n],
                    marker=marker_value[n])
        plt.xlabel("$J-K-(J-K_{s})_{avg}$")
        plt.ylabel("Local Maximum ($\lambda$)")
        plt.xlim(-1, .8)
        plt.ylim(1.23, 1.33)
        plt.legend((data7), ("Young or Subdwarf"),
                   fontsize=11,
                   loc=4,
                   numpoints=1)

        plt.setp(ax1.get_xticklabels(), visible=False)

    #LMIN/LMAX vs H-K
    plt.figure(figsize=(13, 9))
    for n in range(len(names)):
        plt.subplots_adjust(hspace=0.001)
        ax3 = plt.subplot(211)
        plt.errorbar(HK_dev[n],
                     lmin[n],
                     xerr=HK_dev_unc[n],
                     yerr=lmin_unc[n],
                     fmt='none',
                     alpha=0.5,
                     linestyle='None',
                     ecolor='k',
                     elinewidth=2,
                     zorder=-1)
        plt.scatter(HK_dev[n],
                    lmin[n],
                    alpha=0.9,
                    s=marker_size[n],
                    c=color_value[n],
                    marker=marker_value[n],
                    zorder=1)
        plt.xlabel("$H-K-(H-K_{s})_{avg}$")
        plt.ylabel("Local Mininimum ($\lambda$)")
        plt.ylim(1.145, 1.195)

        ax4 = plt.subplot(212, sharex=ax3)
        plt.errorbar(HK_dev[n],
                     lmax[n],
                     xerr=HK_dev_unc[n],
                     yerr=lmax_unc[n],
                     fmt='none',
                     alpha=0.5,
                     linestyle='None',
                     ecolor='k',
                     elinewidth=2,
                     zorder=-1)
        plt.scatter(HK_dev[n],
                    lmax[n],
                    alpha=0.9,
                    s=marker_size[n],
                    c=color_value[n],
                    marker=marker_value[n],
                    zorder=1)
        plt.xlabel("$H-K-(H-K_{s})_{avg}$")
        plt.ylabel("Local Maximum ($\lambda$)")
        plt.ylim(1.24, 1.315)
        plt.xlim(-.4, .7)
        plt.legend((data1, data2, data3, data4, data5, data6),
                   ("Field", "Standard",
                    "Extremely Blue $(\Delta J-K_{s})\geq 2 \sigma$",
                    "Bluer than avg $(\Delta J-K_{s})< 2 \sigma$",
                    "Extremely Red $(\Delta J-K_{s})\geq 2 \sigma$",
                    "Redder than avg $(\Delta J-K_{s})< 2 \sigma$"),
                   fontsize=11,
                   loc=3,
                   numpoints=1)
        plt.legend((data7), ("Young or Subdwarf"),
                   fontsize=11,
                   loc=4,
                   numpoints=1)
        plt.setp(ax3.get_xticklabels(), visible=False)

    #LMIN/LMAX vs J-H
    plt.figure(figsize=(13, 9))
    for n in range(len(names)):
        plt.subplots_adjust(hspace=0.001)
        ax5 = plt.subplot(211)
        plt.errorbar(JH_dev[n],
                     lmin[n],
                     xerr=JH_dev_unc[n],
                     yerr=lmin_unc[n],
                     fmt='none',
                     alpha=0.5,
                     linestyle='None',
                     ecolor='k',
                     elinewidth=2,
                     zorder=-1)
        plt.scatter(JH_dev[n],
                    lmin[n],
                    alpha=0.9,
                    s=marker_size[n],
                    c=color_value[n],
                    marker=marker_value[n],
                    zorder=1)
        plt.xlabel("$J-H-(J-H)_{avg}$")
        plt.ylabel("Local Mininimum ($\lambda$)")
        plt.ylim(1.145, 1.195)
        plt.legend((data1, data2, data3, data4, data5, data6),
                   ("Field", "Standard",
                    "Extremely Blue $(\Delta J-K_{s})\geq 2 \sigma$",
                    "Bluer than avg $(\Delta J-K_{s})< 2 \sigma$",
                    "Extremely Red $(\Delta J-K_{s})\geq 2 \sigma$",
                    "Redder than avg $(\Delta J-K_{s})< 2 \sigma$"),
                   fontsize=11,
                   loc=4,
                   numpoints=1)
        plt.legend((data7), ("Young or Subdwarf"),
                   fontsize=11,
                   loc=3,
                   numpoints=1)

        ax6 = plt.subplot(212, sharex=ax5)
        plt.errorbar(JH_dev[n],
                     lmax[n],
                     xerr=JH_dev_unc[n],
                     yerr=lmax_unc[n],
                     fmt='none',
                     alpha=0.5,
                     linestyle='None',
                     ecolor='k',
                     elinewidth=2,
                     zorder=-1)
        plt.scatter(JH_dev[n],
                    lmax[n],
                    alpha=0.9,
                    s=marker_size[n],
                    c=color_value[n],
                    marker=marker_value[n],
                    zorder=1)
        plt.xlabel("$J-H-(J-H)_{avg}$")
        plt.ylabel("Local Maximum ($\lambda$)")
        plt.ylim(1.24, 1.325)
        plt.xlim(-1, 1.5)

        plt.setp(ax5.get_xticklabels(), visible=False)

    plt.figure(figsize=(13, 9))
    for n in range(len(names)):
        plt.subplots_adjust(hspace=0.001)
        P1 = plt.subplot(211)
        plt.errorbar(opt_spt[n],
                     lmin[n],
                     yerr=lmin_unc[n],
                     fmt='none',
                     alpha=0.5,
                     linestyle='None',
                     ecolor='k',
                     elinewidth=2,
                     zorder=-1)
        plt.scatter(opt_spt[n],
                    lmin[n],
                    alpha=0.9,
                    s=marker_size[n],
                    c=color_value[n],
                    marker=marker_value[n],
                    zorder=1)
        plt.xlabel("Spectral Type")
        plt.ylabel("Local Mininimum ($\lambda$)")
        plt.xticks(np.arange(9, 20, 1))
        labels = [
            '', 'L0', 'L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'L7', 'L8', 'L9'
        ]
        P1.set_xticklabels(labels)
        plt.ylim(1.14, 1.2)
        plt.legend((data1, data2, data3, data4, data5, data6),
                   ("Field", "Standard",
                    "Extremely Blue $(\Delta J-K_{s})\geq 2 \sigma$",
                    "Bluer than avg $(\Delta J-K_{s})< 2 \sigma$",
                    "Extremely Red $(\Delta J-K_{s})\geq 2 \sigma$",
                    "Redder than avg $(\Delta J-K_{s})< 2 \sigma$"),
                   fontsize=11,
                   loc=3,
                   numpoints=1)
        plt.legend((data7), ("Young or Subdwarf"),
                   fontsize=11,
                   loc=4,
                   numpoints=1)

        P2 = plt.subplot(212, sharex=P1)
        plt.errorbar(opt_spt[n],
                     lmax[n],
                     yerr=lmax_unc[n],
                     fmt='none',
                     alpha=0.5,
                     linestyle='None',
                     ecolor='k',
                     elinewidth=2,
                     zorder=-1)
        plt.scatter(opt_spt[n],
                    lmax[n],
                    alpha=0.9,
                    s=marker_size[n],
                    c=color_value[n],
                    marker=marker_value[n],
                    zorder=1)
        plt.xlabel("Spectral Type")
        plt.ylabel("Local Maximum ($\lambda$)")
        #plt.xticks(np.arange(9,20,1))
        labels = [
            '', 'L0', 'L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'L7', 'L8', 'L9'
        ]
        P2.set_xticklabels(labels)
        plt.ylim(1.23, 1.325)
        plt.setp(P1.get_xticklabels(), visible=False)
Example #5
0
def plot_scree(clf_pca,
               xlim=[-1, 10],
               ylim=[-0.1, 1.0],
               required_var=0.90,
               figsize=(10, 5)):
    """Create side-by-side scree plots for analyzing variance of principal
    components from PCA.

    Parameters
    ----------
    clf_pca : sklearn.decomposition.PCA
        A fitted scikit-learn PCA model.
    xlim : list
        X-axis range. If `required_var` is supplied, the maximum x-axis value
        will automatically be set so that the required variance line is visible
        on the plot. Defaults to [-1, 10].
    ylim : list
        Y-axis range. Defaults to [-0.1, 1.0].
    required_var : float, int, None
        A value of variance to distinguish on the scree plot. Set to None to
        not include on the plot. Defaults to 0.90.
    figsize : tuple
        A tuple indicating the size of the plot to be created, with format
        (x-axis, y-axis). Defaults to (10, 5).

    Returns
    -------
    matplotlib.figure.Figure
        The Figure instance.
    """
    # Ensure we have the a PCA model
    assert isinstance(clf_pca, decomposition.PCA), (
        "Models of type {0} are not supported. Only models of type "
        "sklearn.decomposition.PCA are supported.".format(type(clf_pca)))
    # Extract variances from the model
    variances = clf_pca.explained_variance_ratio_
    # Set plot style and scale up font size
    sns.set_style("whitegrid")
    sns.set(font_scale=1.2)
    # Set up figure and generate subplots
    try:
        fig = plt.figure('scree', figsize=figsize)
        # First plot (in subplot)
        plt.subplot(1, 2, 1)
        plt.xlabel("Component Number")
        plt.ylabel("Proportion of Variance Explained")
        plt.xlim(xlim)
        plt.ylim(ylim)
        plt.plot(variances, marker='o', linestyle='--')
        # Second plot (in subplot)
        cumsum = np.cumsum(variances)  # Cumulative sum of variances explained
        plt.subplot(1, 2, 2)
        plt.xlabel("Number of Components")
        plt.ylabel("Proportion of Variance Explained")
        plt.xlim(xlim)
        plt.ylim(ylim)
        plt.plot(cumsum, marker='o', linestyle='--')
        # Add marker for required variance line
        if required_var is not None:
            required_var_components = np.argmax(cumsum >= required_var) + 1
            # Update xlim if it is too small to see the marker
            if xlim[1] <= required_var_components:
                plt.xlim([xlim[0], required_var_components + 1])
            # Add the marker and legend to the plot
            plt.axvline(x=required_var_components,
                        c='r',
                        linestyle='dashed',
                        label="> {0:.0f}% Var. Explained: {1} "
                        "components".format(required_var * 100,
                                            required_var_components))
            legend = plt.legend(loc='lower right', frameon=True)
            legend.get_frame().set_facecolor('#FFFFFF')
        plt.show()
    except:
        raise  # Re-raise the exception
    finally:
        sns.reset_orig()
    return fig
Example #6
0
def plot_pca_pairs(clf_pca,
                   x_train,
                   y=None,
                   n_components=3,
                   diag='kde',
                   cmap=None,
                   figsize=(10, 10)):
    """
    Create pairwise plots of principal components from x data.

    Colors the components according to the `y` values.

    Parameters
    ----------
    clf_pca : sklearn.decomposition.PCA
        A fitted scikit-learn PCA model.
    x_train : numpy.ndarray
        Training data used to fit `clf_pca`, either scaled or un-scaled,
        depending on how `clf_pca` was fit.
    y : numpy.ndarray
        Target training values, of shape = [n_samples].
    n_components: int
        Desired number of principal components to plot. Defaults to 3.
    diag : str
        Type of plot to display on the diagonals. Default is 'kde'.

        * 'kde': density curves
        * 'hist': histograms

    cmap : str
        A string representation of a Seaborn color map. See available maps:
        https://stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.
    figsize : tuple
        A tuple indicating the size of the plot to be created, with format
        (x-axis, y-axis). Defaults to (10, 10).

    Returns
    -------
    matplotlib.figure.Figure
        The Figure instance.
    """
    if y is not None:
        assert y.shape[0] == x_train.shape[0], (
            "Dimensions of y {0} do not match dimensions of x_train {1}".
            format(y.shape[0], x_train.shape[0]))
    # Obtain the projections of x_train
    x_projection = clf_pca.transform(x_train)
    # Create a data frame to hold the projections of n_components PCs
    col_names = ["PC{0}".format(i + 1) for i in range(n_components)]
    df = pd.DataFrame(x_projection[:, 0:n_components], columns=col_names)
    # Generate the plot
    cmap = "Greys" if cmap is None else cmap
    color = "#55A969" if y is None else y
    sns.set_style("white", {"axes.linewidth": "0.8", "image.cmap": cmap})
    sns.set_context("notebook")
    try:
        # Create figure instance with subplot and populate the subplot with
        # the scatter matrix. You need to do this so you can access the figure
        # properties later to increase distance between subplots. If you don't,
        # Pandas will create its own figure with a tight layout.
        fig = plt.figure(figsize=figsize)
        ax = fig.add_subplot(1, 1, 1)
        from pandas.tools.plotting import scatter_matrix
        axes = scatter_matrix(df,
                              ax=ax,
                              alpha=0.7,
                              figsize=figsize,
                              diagonal=diag,
                              marker='o',
                              c=color,
                              density_kwds={'c': '#6283B9'},
                              hist_kwds={
                                  'facecolor': '#5A76A4',
                                  'edgecolor': '#3D3D3D'
                              })
        # Increase space between subplots
        fig.subplots_adjust(hspace=0.1, wspace=0.1)
        # Loop through subplots and remove top and right axes
        axes_unwound = np.ravel(axes)
        for i in range(axes_unwound.shape[0]):
            ax = axes_unwound[i]
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
        plt.show()
    except:
        raise  # Re-raise the exception
    else:
        sns.reset_orig()
        return fig
    finally:
        sns.reset_orig()
Example #7
0
def plot_residuals(clf, X, y, r_type='standardized', figsize=(10, 8)):
    """Plot residuals of a linear model.

    Parameters
    ----------
    clf : sklearn.linear_model
        A scikit-learn linear model classifier with a `predict()` method.
    X : numpy.ndarray
        Training data used to fit the classifier.
    y : numpy.ndarray
        Target training values, of shape = [n_samples].
    r_type : str
        Type of residuals to return: 'raw', 'standardized', 'studentized'.
        Defaults to 'standardized'.

        * 'raw' will return the raw residuals.
        * 'standardized' will return the standardized residuals, also known as
          internally studentized residuals, which is calculated as the residuals
          divided by the square root of MSE (or the STD of the residuals).
        * 'studentized' will return the externally studentized residuals, which
          is calculated as the raw residuals divided by sqrt(LOO-MSE * (1 -
          leverage_score)).
    figsize : tuple
        A tuple indicating the size of the plot to be created, with format
        (x-axis, y-axis). Defaults to (10, 8).

    Returns
    -------
    matplotlib.figure.Figure
        The Figure instance.
    """
    # Ensure we only plot residuals using classifiers we have tested
    assert isinstance(clf, _utils.supported_linear_models), (
        "Classifiers of type {0} not currently supported.".format(type(clf)))
    # Get residuals or standardized residuals
    resids = stats.residuals(clf, X, y, r_type)
    predictions = clf.predict(X)
    # Prepare plot labels to use, depending on which type of residuals used
    y_label = {
        'raw': 'Residuals',
        'standardized': 'Standardized Residuals',
        'studentized': 'Studentized Residuals'
    }
    # Set plot style
    sns.set_style("whitegrid")
    sns.set_context("talk")  # Increase font size on plot
    # Generate residual plot
    try:
        fig = plt.figure('residuals', figsize=figsize)
        plt.scatter(predictions, resids, s=14, c='gray', alpha=0.7)
        plt.hlines(y=0,
                   xmin=predictions.min(),
                   xmax=predictions.max(),
                   linestyle='dotted')
        plt.title("Residuals Plot")
        plt.xlabel("Predictions")
        plt.ylabel(y_label[r_type])
        plt.show()
    except:
        raise  # Re-raise the exception
    finally:
        sns.reset_orig()  # Always reset back to default matplotlib styles
    return fig