Example #1
0
def plot2Dbyclass(X,
                  y,
                  feat1=None,
                  feat2=None,
                  single_out=[],
                  alpha=0.7,
                  marker_size=100,
                  logXscale=False,
                  logYscale=False,
                  fontsize=20,
                  legend_location="upper center",
                  bbox_to_anchor=(0.5, 1.25),
                  fig_dir=None,
                  fig_format=".png"):
    """ Plot the 2D scatter plot of two features of interest
	    Add a different color for each class

    Parameters
    ----------
    X : pd.DataFrame or np.ndarray, shape = (n_samples, n_features)

    y : pd.Series or pd.DataFrame or np.ndarray, shape = (n_samples,)
    	the array of target values for a classification task

    feat1/feat2 : str, name of the features to plot against each other

    alpha : float, set the transparency of the scatter plot

    marker_size : int, size of the scatter plot marker

    logX/Yscale : bool, whether to use a log scale

    fontsize : int, the size of the X axis legend 

    fig_dir : str, directory to save figure. Specify the path relative to 
    	current directory (e.g. "./Figures/") 
    	or an absolute path  (e.g. "~/Desktop/")

    fig_format : str, choose your favorite (.eps, .png)
    """

    if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
        assert feat1 != None, "Provide feat1"
        assert feat2 != None, "Provide feat2"
        X = X[[feat1, feat2]].values

    elif isinstance(X, np.ndarray):
        assert X.shape[1] == 2, "X should be of dim (n_samples, 2)"
        assert feat1 != None, "Provide feat1"
        assert feat2 != None, "Provide feat2"

    if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series):
        y = y.values

    # Interactive plotting
    plt.ion()

    # Remove possible NaNs
    print "Removing possible NaN"
    list_not_nan = np.logical_and(np.logical_not(np.isnan(X[:, 0])),
                                  np.logical_not(np.isnan(X[:, 1])))
    X = X[list_not_nan]
    y = y[list_not_nan]

    # Get list of labels and list of color
    labels = np.unique(y)
    list_color = cm.Accent(np.linspace(0, 1, len(labels)))

    if single_out == []:
        # Plot with different color for each class
        for i, label in enumerate(labels):
            plt.scatter(X[:, 0][y == label],
                        X[:, 1][y == label],
                        c=list_color[i],
                        label="Class %s" % label,
                        s=marker_size,
                        alpha=alpha)

        # Add legend and labels
        plt.legend(loc=legend_location,
                   bbox_to_anchor=bbox_to_anchor,
                   ncol=len(labels) / 2,
                   fancybox=True)
        plt.xlabel(feat1, fontsize=fontsize)
        plt.ylabel(feat2, fontsize=fontsize)
        # Adjust white space for better visibility
        plt.subplots_adjust(top=0.8)

        if logXscale:
            plt.xscale("log")
        if logYscale:
            plt.yscale("log")

        plt.show()
        raw_input("Inspect plot then press any key: ")

        if not fig_dir:
            fig_dir = raw_input("Enter figure directory: ")
            # Create directory if needed
        if not os.path.exists(fig_dir):
            os.makedirs(fig_dir)
        if fig_dir[-1] != "/":
            fig_dir += "/"

        plt.savefig(fig_dir + feat2 + "_vs_" + feat1 + "_byclass" + fig_format)

    elif single_out != []:
        # Single out class labels in the single_out list wrt all other classes
        # Remap labels :
        new_y = np.copy(y)
        d_map = {}
        for label in labels:
            if label in single_out:
                d_map[label] = 1
            else:
                d_map[label] = 0
        for k, v in d_map.iteritems():
            new_y[y == k] = v
        not_single = [l for l in labels if l not in single_out]

        # Plot with different color for each class
        plt.scatter(X[:, 0][new_y == 0],
                    X[:, 1][new_y == 0],
                    c=list_color[0],
                    label="Classe(s) " + "-".join(map(str, not_single)),
                    s=marker_size,
                    alpha=alpha)
        plt.scatter(X[:, 0][new_y == 1],
                    X[:, 1][new_y == 1],
                    c=list_color[1],
                    label="Classe(s) " + "-".join(map(str, single_out)),
                    s=marker_size,
                    alpha=alpha)

        # Add legend and labels
        plt.legend(loc=legend_location,
                   bbox_to_anchor=bbox_to_anchor,
                   ncol=len(labels) / 2,
                   fancybox=True)
        plt.xlabel(feat1, fontsize=fontsize)
        plt.ylabel(feat2, fontsize=fontsize)
        # Adjust white space for better visibility
        plt.subplots_adjust(top=0.8)

        if logXscale:
            plt.xscale("log")
        if logYscale:
            plt.yscale("log")

        plt.show()
        raw_input("Inspect plot then press any key :")

        if not fig_dir:
            fig_dir = raw_input("Enter figure directory: ")
            # Create directory if needed
        if not os.path.exists(fig_dir):
            os.makedirs(fig_dir)
        if fig_dir[-1] != "/":
            fig_dir += "/"

        plt.savefig(fig_dir + feat2 + "_vs_" + feat1 +"_byclass_"\
         + "-".join(map(str,single_out)) + "_vs _rest" + fig_format)
Example #2
0
def plotPCA(X,
            y=None,
            n_components=None,
            plot_indices=(0, 1),
            fig_dir=None,
            fig_title=None,
            fig_format=".png",
            marker_size=50,
            alpha=1,
            legend_location="best",
            verbose=False):
    """ Plot the PCA of data provided as input

    Parameters
    ----------
    X : pd.DataFrame or np.ndarray, shape = (n_samples, n_features)

    n_components : int, (default None => keep all components).
        number of components to keep.

    plot_indices : tuple of int : indices of princ components to plot

    fig_dir : str, directory to save figure. Specify the path relative to 
    	current directory (e.g. "./Figures/") 
    	or an absolute path  (e.g. "~/Desktop/")

    fig_title : str,  figure title

    fig_format : str, choose your favorite (.eps, .png)

    marker_size : int, size of scatter plot markers

    alpha : float (0 to 1) transparency of the plot markers

    legend_location : see matplotlib doc for possible values, default = best

    verbose : bool , if True, will show explained variance ratio

    """

    if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
        X = X.values

    if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series):
        y = y.values
        labels = np.unique(y)

    # Interactive plotting mode
    plt.ion()

    # Deal with possible NaN
    print "Removing columns with NaN"
    X = X[:, ~np.isnan(X).any(axis=0)]

    print "Fitting PCA..."
    pca = PCA(n_components=n_components)
    X_r = pca.fit(X).transform(X)

    # Percentage of variance explained for each components
    if verbose:
        print('explained variance ratio (first two components): %s' %
              str(pca.explained_variance_ratio_))

    # Set up the matplotlib figure
    fig, ax = plt.subplots(figsize=(15, 15))

    if isinstance(labels, np.ndarray):
        list_color = cm.Accent(np.linspace(0, 1, len(np.unique(labels))))
        for index, label in enumerate(labels):
            color = list_color[index]
            plt.scatter(X_r[y == label, plot_indices[0]],
                        X_r[y == label, plot_indices[1]],
                        c=color,
                        label="Class " + str(label),
                        s=marker_size,
                        alpha=alpha)
    else:
        plt.scatter(X_r[:, plot_indices[0]],
                    X_r[:, plot_indices[1]],
                    c="k",
                    s=marker_size,
                    alpha=alpha)

    plt.xlabel("Princ Comp # %s" % plot_indices[0])
    plt.ylabel("Princ Comp # %s" % plot_indices[1])
    plt.legend(loc=legend_location)
    plt.show()
    raw_input("Inspect figure then press any key: ")

    if not fig_dir:
        fig_dir = raw_input("Enter figure directory location: ")
        # Create directory if needed
        if not os.path.exists(fig_dir):
            os.makedirs(fig_dir)
        if not fig_title:
            fig_title = raw_input("Enter fig title (without .XXX): ")
        if fig_dir[-1] != "/":
            fig_dir += "/"
        plt.savefig(fig_dir + fig_title + fig_format)
Example #3
0
def plot1Dbyclass(X,
                  y,
                  list_feat=None,
                  single_out=[],
                  bins=100,
                  alpha=0.7,
                  histtype="stepfilled",
                  stacked=True,
                  logscale=True,
                  fontsize=20,
                  legend_location="upper center",
                  bbox_to_anchor=(0.5, 1.25),
                  fig_dir=None,
                  fig_format=".png"):
    """ Plot 1D histogram for each column in X, with a class separation

    Parameters
    ----------
    X : pd.DataFrame or np.ndarray, shape = (n_samples, n_features)

    y : pd.Series or pd.DataFrame or np.ndarray, shape = (n_samples,)
    	the array of target values for a classification task

    list_feat : list of str, the name of each column of X. No need 	
    			to specify it if X is a pd.DataFrame

    single_out : list of int, a list of classes to single out in the plot 
    			 (i.e. plot all other classes vs singled out classes)

    bins : int : binning for the matplotlib histogram

    alpha : float, transparency for the histogram

    histtype : str, matplotlib histogram type

    stacked : bool, whether or not to stack the histograms

    logscale : bool, if True use logscale on Y axis 

    fontsize : int, fontsize for X axis label 

    legend_location : str, see matplotlib documentation for possible values 
    				  best is to keep it at its default parameters

    bbox_to_anchor : tuple of floats, where to anchor the legend box

    fig_dir : str, directory to save figure. Specify the path relative to 
    	current directory (e.g. "./Figures/") 
    	or an absolute path  (e.g. "~/Desktop/")

    fig_format : str, choose your favorite (.eps, .png)
    """

    if isinstance(X, np.ndarray):
        try:
            assert len(list_feat) == X.shape[1], \
            "Length of list_feat does not match X.shape[1]"
            X = pd.DataFrame(X, columns=list_feat)
        except TypeError:
            sys.exit("Please fill in the list_feat argument")

    if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series):
        y = y.values

    # Define list of features
    list_feat = X.columns.values

    labels = np.unique(y)
    list_color = cm.Accent(np.linspace(0, 1, len(labels)))

    if single_out == []:
        # Plot with different color for each class
        for feat in list_feat:
            list_hist = [X[feat].values[y == label] for label in labels]
            # Remove possible NaN
            list_hist = [x[np.logical_not(np.isnan(x))] for x in list_hist]
            n, _, _ = plt.hist(list_hist,
                               bins=bins,
                               histtype=histtype,
                               color=list_color,
                               stacked=stacked,
                               label=["Class %s" % i for i in labels])
            # Improve plot boundaries
            n = np.ravel(n)
            plt.xlim([np.min(X[feat]) - np.abs(np.std(X[feat])),\
               np.max(X[feat]) + np.abs(np.std(X[feat]))])
            plt.ylim([np.min(n[np.nonzero(n)]),\
               np.max(n) + np.abs(np.std(n))])
            # Add legend and labels
            plt.legend(loc=legend_location,
                       bbox_to_anchor=bbox_to_anchor,
                       ncol=len(labels) / 2,
                       fancybox=True)
            plt.xlabel(feat, fontsize=fontsize)

            # Adjust white space for better visibility
            plt.subplots_adjust(top=0.8)

            if logscale:
                plt.yscale("log")
            if not fig_dir:
                fig_dir = raw_input("Enter figure directory: ")
                # Create directory if needed
            if not os.path.exists(fig_dir):
                os.makedirs(fig_dir)
            if fig_dir[-1] != "/":
                fig_dir += "/"
            plt.savefig(fig_dir + feat + "_byclass" + fig_format)
            plt.clf()
            plt.close()

    elif single_out != []:
        # Single out class labels in the single_out list wrt all other classes
        # Remap labels :
        new_y = np.copy(y)
        d_map = {}
        for label in labels:
            if label in single_out:
                d_map[label] = 1
            else:
                d_map[label] = 0
        for k, v in d_map.iteritems():
            new_y[y == k] = v

        for feat in list_feat:
            list_hist = [X[feat].values[new_y == label] for label in [0, 1]]
            not_single = [label for label in labels if label not in single_out]
            # Remove possible NaN
            list_hist = [x[np.logical_not(np.isnan(x))] for x in list_hist]
            n,_,_ = plt.hist(list_hist, bins = bins, histtype=histtype,
                 color=list_color[:2], stacked = stacked,
                 label = ["Class " + "-".join(map(str, not_single)),\
                    "Class " + "-".join(map(str, single_out))] )
            # Improve plot boundaries
            n = np.ravel(n)
            plt.xlim([np.min(X[feat]) - np.abs(np.std(X[feat])),\
               np.max(X[feat]) + np.abs(np.std(X[feat]))])
            plt.ylim([np.min(n[np.nonzero(n)]),\
               np.max(n) + np.abs(np.std(n))])
            # Add legend and labels
            plt.legend(loc=legend_location,
                       bbox_to_anchor=bbox_to_anchor,
                       ncol=len(labels) / 2,
                       fancybox=True)
            plt.xlabel(feat, fontsize=fontsize)

            # Adjust white space for better visibility
            plt.subplots_adjust(top=0.8)

            if logscale:
                plt.yscale("log")
            if not fig_dir:
                fig_dir = raw_input("Enter figure directory: ")
                # Create directory if needed
            if not os.path.exists(fig_dir):
                os.makedirs(fig_dir)
            if fig_dir[-1] != "/":
                fig_dir += "/"
            plt.savefig(fig_dir + feat + "_byclass_" +
                        "-".join(map(str, single_out)) + "_vs _rest" +
                        fig_format)
            plt.clf()
            plt.close()
Example #4
0
def plotViolinbyclass(X,
                      y,
                      list_feat=None,
                      alpha=1,
                      fontsize=20,
                      logscale=False,
                      showmeans=False,
                      showmedians=False,
                      showextrema=False,
                      fig_dir=None,
                      fig_format=".png"):
    """ Violin Plot for each column of X with a class separation

    Parameters
    ----------
    X : pd.DataFrame or np.ndarray, shape = (n_samples, n_features)

    y : pd.Series or pd.DataFrame or np.ndarray, shape = (n_samples,)
    	the array of target values for a classification task

    list_feat : list of str, the name of each column of X. No need 	
    			to specify it if X is a pd.DataFrame

    alpha : float, transparency for the histogram

    logscale : bool, if True use logscale on Y axis 

    fontsize : int, fontsize for X axis label 

    showmeans/showmedians/showextrema : bool, whether to show means/medians/extrema

    fig_dir : str, directory to save figure. Specify the path relative to 
    	current directory (e.g. "./Figures/") 
    	or an absolute path  (e.g. "~/Desktop/")

    fig_format : str, choose your favorite (.eps, .png)
    """

    if isinstance(X, np.ndarray):
        try:
            assert len(list_feat) == X.shape[1], \
            "Length of list_feat does not match X.shape[1]"
            X = pd.DataFrame(X, columns=list_feat)
        except TypeError:
            sys.exit("Please fill in the list_feat argument")

    if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series):
        y = y.values

    # Define list of features
    list_feat = X.columns.values

    labels = np.unique(y)
    list_color = cm.Accent(np.linspace(0, 1, len(labels)))

    # Plot with different color for each class
    for feat in list_feat:

        data = [X[feat].values[y == label] for label in labels]

        # Set up the matplotlib figure
        fig, ax = plt.subplots(figsize=(15, 12))
        # plot violin plot
        print "Computing KDEs..."
        violin_parts = ax.violinplot(data,
                                     showmeans=showmeans,
                                     showmedians=showmedians,
                                     showextrema=showextrema)
        # Change color
        for index, pc in enumerate(violin_parts['bodies']):
            pc.set_facecolor(list_color[index])
            pc.set_edgecolor('black')

        # Adjust plot boundaries
        plt.ylim([np.min(X[feat].values) - np.abs(np.std(X[feat].values)),\
            np.max(X[feat].values) + np.abs(np.std(X[feat].values))])

        # adding horizontal grid lines
        ax.yaxis.grid(True)
        ax.set_xticks([c + 1 for c in range(len(data))])
        ax.set_xlabel('Class', fontsize=fontsize)
        ax.set_ylabel(feat + " values", fontsize=fontsize)

        # add x-tick labels
        plt.setp(ax,
                 xticks=[c + 1 for c in range(len(data))],
                 xticklabels=map(str, labels))

        if logscale:
            plt.yscale("log")

        if not fig_dir:
            fig_dir = raw_input("Enter figure directory: ")
            # Create directory if needed
        if not os.path.exists(fig_dir):
            os.makedirs(fig_dir)
        if fig_dir[-1] != "/":
            fig_dir += "/"
        plt.savefig(fig_dir + feat + "_violinbyclass" + fig_format)
        plt.clf()
        plt.close()
def plot_results():
    """
    Utility to compare the results of several experiments (in terms) of loss

    (WIP)
    """
    list_exp = glob.glob("./Log/*")
    list_d_log = []
    list_archi = []
    for exp_dir in list_exp:
        with open(exp_dir + "/experiment_log.json", "r") as f:
            d = json.load(f)
            list_d_log.append(d)
        with open(exp_dir + "/SFCNN_archi.json", "r") as f:
            d = json.load(f)
            list_layers = d["config"]
            list_conv = [l["config"]["name"] for l in list_layers
                         if l["class_name"] == "Convolution2D"]
            max_conv = max([int(name.split("_")[-1]) for name in list_conv])
            list_archi.append(max_conv)

    for i in range(len(list_exp)):
        d = list_d_log[i]
        max_conv = list_archi[i]
        d["max_conv"] = max_conv
        list_d_log[i] = d

    list_c = cm.Accent(np.linspace(0,1,10))

    list_by_batch = []
    list_by_aug = []
    list_by_epoch = []
    list_by_depth = []

    for d in list_d_log:
        if d["nb_epoch"] == 10 and \
                d["augmentator_config"]["transforms"] == {} and\
                d["max_conv"] == 4:
            list_by_batch.append(d)
        if d["nb_epoch"] > 10:
            list_by_epoch.append(d)
        if d["nb_epoch"] == 10 and \
                d["augmentator_config"]["transforms"] != {}and\
                d["max_conv"] == 4:
            list_by_aug.append(d)
        if d["nb_epoch"] == 10 and \
                d["augmentator_config"]["transforms"] == {} and \
                d["batch_size"] == 32:
            list_by_depth.append(d)

    # plt.figure(figsize=(12, 9))
    # c_counter = 0
    # for d in list_by_depth:

    #         # Legend
    #     label = "Batch size: %s" % d["batch_size"]
    #     label = "Data augmentation prob: %s" % d["prob"]
    #     label = "CNN depth: %s" % d["max_conv"]

    #     plt.plot(d["train_loss"], "--",
    #              label=label,
    #              color=list_c[c_counter],
    #              linewidth=3)
    #     plt.plot(d["test_loss"],
    #              label=label,
    #              color=list_c[c_counter],
    #              linewidth=3)
    #     c_counter += 1

    # plt.xlabel("Number of epochs", fontsize=18)
    # plt.ylabel("Logloss", fontsize=18)
    # plt.legend(loc="best")
    # plt.ylim([0.1, 0.8])
    # plt.tight_layout()
    # plt.show()
    # raw_input()

    gs = gridspec.GridSpec(2, 2)
    fig = plt.figure(figsize=(15, 15))
    list_labels = ["Batch size: ",
                   "Data augmentation prob: ",
                   "CNN depth: ",
                   "More epochs"
                   ]
    ll_d = [list_by_batch, list_by_aug, list_by_depth, list_by_epoch]
    for i in range(4):
        ax = plt.subplot(gs[i])
        c_counter = 0
        for d in ll_d[i]:
            if "Batch" in list_labels[i]:
                label = list_labels[i] + str(d["batch_size"])
            elif "augmentation" in list_labels[i]:
                label = list_labels[i] + str(d["prob"])
            elif "depth" in list_labels[i]:
                label = list_labels[i] + str(d["max_conv"])
            else:
                label = list_labels[i]
            ax.plot(d["train_loss"], "--",
                    label=label,
                    color=list_c[c_counter],
                    linewidth=3)
            ax.plot(d["test_loss"],
                    label=label,
                    color=list_c[c_counter],
                    linewidth=3)
            c_counter += 1
            ax.set_xlabel("Number of epochs", fontsize=18)
            ax.set_ylabel("Logloss", fontsize=18)
            ax.legend(loc="best")
            ax.set_ylim([0.1, 0.8])
            ax.text(0.05, 0.05, "Dashed: Training sample\nContinuous: Test sample",
                    transform=ax.transAxes,
                    fontsize=18,
                    bbox=dict(boxstyle='round', facecolor="white"))
    gs.tight_layout(fig)
    plt.savefig("./Figures/training_results.png")
    plt.show()
    raw_input()