def plot2Dbyclass(X, y, feat1=None, feat2=None, single_out=[], alpha=0.7, marker_size=100, logXscale=False, logYscale=False, fontsize=20, legend_location="upper center", bbox_to_anchor=(0.5, 1.25), fig_dir=None, fig_format=".png"): """ Plot the 2D scatter plot of two features of interest Add a different color for each class Parameters ---------- X : pd.DataFrame or np.ndarray, shape = (n_samples, n_features) y : pd.Series or pd.DataFrame or np.ndarray, shape = (n_samples,) the array of target values for a classification task feat1/feat2 : str, name of the features to plot against each other alpha : float, set the transparency of the scatter plot marker_size : int, size of the scatter plot marker logX/Yscale : bool, whether to use a log scale fontsize : int, the size of the X axis legend fig_dir : str, directory to save figure. Specify the path relative to current directory (e.g. "./Figures/") or an absolute path (e.g. "~/Desktop/") fig_format : str, choose your favorite (.eps, .png) """ if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): assert feat1 != None, "Provide feat1" assert feat2 != None, "Provide feat2" X = X[[feat1, feat2]].values elif isinstance(X, np.ndarray): assert X.shape[1] == 2, "X should be of dim (n_samples, 2)" assert feat1 != None, "Provide feat1" assert feat2 != None, "Provide feat2" if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series): y = y.values # Interactive plotting plt.ion() # Remove possible NaNs print "Removing possible NaN" list_not_nan = np.logical_and(np.logical_not(np.isnan(X[:, 0])), np.logical_not(np.isnan(X[:, 1]))) X = X[list_not_nan] y = y[list_not_nan] # Get list of labels and list of color labels = np.unique(y) list_color = cm.Accent(np.linspace(0, 1, len(labels))) if single_out == []: # Plot with different color for each class for i, label in enumerate(labels): plt.scatter(X[:, 0][y == label], X[:, 1][y == label], c=list_color[i], label="Class %s" % label, s=marker_size, alpha=alpha) # Add legend and labels plt.legend(loc=legend_location, bbox_to_anchor=bbox_to_anchor, ncol=len(labels) / 2, fancybox=True) plt.xlabel(feat1, fontsize=fontsize) plt.ylabel(feat2, fontsize=fontsize) # Adjust white space for better visibility plt.subplots_adjust(top=0.8) if logXscale: plt.xscale("log") if logYscale: plt.yscale("log") plt.show() raw_input("Inspect plot then press any key: ") if not fig_dir: fig_dir = raw_input("Enter figure directory: ") # Create directory if needed if not os.path.exists(fig_dir): os.makedirs(fig_dir) if fig_dir[-1] != "/": fig_dir += "/" plt.savefig(fig_dir + feat2 + "_vs_" + feat1 + "_byclass" + fig_format) elif single_out != []: # Single out class labels in the single_out list wrt all other classes # Remap labels : new_y = np.copy(y) d_map = {} for label in labels: if label in single_out: d_map[label] = 1 else: d_map[label] = 0 for k, v in d_map.iteritems(): new_y[y == k] = v not_single = [l for l in labels if l not in single_out] # Plot with different color for each class plt.scatter(X[:, 0][new_y == 0], X[:, 1][new_y == 0], c=list_color[0], label="Classe(s) " + "-".join(map(str, not_single)), s=marker_size, alpha=alpha) plt.scatter(X[:, 0][new_y == 1], X[:, 1][new_y == 1], c=list_color[1], label="Classe(s) " + "-".join(map(str, single_out)), s=marker_size, alpha=alpha) # Add legend and labels plt.legend(loc=legend_location, bbox_to_anchor=bbox_to_anchor, ncol=len(labels) / 2, fancybox=True) plt.xlabel(feat1, fontsize=fontsize) plt.ylabel(feat2, fontsize=fontsize) # Adjust white space for better visibility plt.subplots_adjust(top=0.8) if logXscale: plt.xscale("log") if logYscale: plt.yscale("log") plt.show() raw_input("Inspect plot then press any key :") if not fig_dir: fig_dir = raw_input("Enter figure directory: ") # Create directory if needed if not os.path.exists(fig_dir): os.makedirs(fig_dir) if fig_dir[-1] != "/": fig_dir += "/" plt.savefig(fig_dir + feat2 + "_vs_" + feat1 +"_byclass_"\ + "-".join(map(str,single_out)) + "_vs _rest" + fig_format)
def plotPCA(X, y=None, n_components=None, plot_indices=(0, 1), fig_dir=None, fig_title=None, fig_format=".png", marker_size=50, alpha=1, legend_location="best", verbose=False): """ Plot the PCA of data provided as input Parameters ---------- X : pd.DataFrame or np.ndarray, shape = (n_samples, n_features) n_components : int, (default None => keep all components). number of components to keep. plot_indices : tuple of int : indices of princ components to plot fig_dir : str, directory to save figure. Specify the path relative to current directory (e.g. "./Figures/") or an absolute path (e.g. "~/Desktop/") fig_title : str, figure title fig_format : str, choose your favorite (.eps, .png) marker_size : int, size of scatter plot markers alpha : float (0 to 1) transparency of the plot markers legend_location : see matplotlib doc for possible values, default = best verbose : bool , if True, will show explained variance ratio """ if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): X = X.values if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series): y = y.values labels = np.unique(y) # Interactive plotting mode plt.ion() # Deal with possible NaN print "Removing columns with NaN" X = X[:, ~np.isnan(X).any(axis=0)] print "Fitting PCA..." pca = PCA(n_components=n_components) X_r = pca.fit(X).transform(X) # Percentage of variance explained for each components if verbose: print('explained variance ratio (first two components): %s' % str(pca.explained_variance_ratio_)) # Set up the matplotlib figure fig, ax = plt.subplots(figsize=(15, 15)) if isinstance(labels, np.ndarray): list_color = cm.Accent(np.linspace(0, 1, len(np.unique(labels)))) for index, label in enumerate(labels): color = list_color[index] plt.scatter(X_r[y == label, plot_indices[0]], X_r[y == label, plot_indices[1]], c=color, label="Class " + str(label), s=marker_size, alpha=alpha) else: plt.scatter(X_r[:, plot_indices[0]], X_r[:, plot_indices[1]], c="k", s=marker_size, alpha=alpha) plt.xlabel("Princ Comp # %s" % plot_indices[0]) plt.ylabel("Princ Comp # %s" % plot_indices[1]) plt.legend(loc=legend_location) plt.show() raw_input("Inspect figure then press any key: ") if not fig_dir: fig_dir = raw_input("Enter figure directory location: ") # Create directory if needed if not os.path.exists(fig_dir): os.makedirs(fig_dir) if not fig_title: fig_title = raw_input("Enter fig title (without .XXX): ") if fig_dir[-1] != "/": fig_dir += "/" plt.savefig(fig_dir + fig_title + fig_format)
def plot1Dbyclass(X, y, list_feat=None, single_out=[], bins=100, alpha=0.7, histtype="stepfilled", stacked=True, logscale=True, fontsize=20, legend_location="upper center", bbox_to_anchor=(0.5, 1.25), fig_dir=None, fig_format=".png"): """ Plot 1D histogram for each column in X, with a class separation Parameters ---------- X : pd.DataFrame or np.ndarray, shape = (n_samples, n_features) y : pd.Series or pd.DataFrame or np.ndarray, shape = (n_samples,) the array of target values for a classification task list_feat : list of str, the name of each column of X. No need to specify it if X is a pd.DataFrame single_out : list of int, a list of classes to single out in the plot (i.e. plot all other classes vs singled out classes) bins : int : binning for the matplotlib histogram alpha : float, transparency for the histogram histtype : str, matplotlib histogram type stacked : bool, whether or not to stack the histograms logscale : bool, if True use logscale on Y axis fontsize : int, fontsize for X axis label legend_location : str, see matplotlib documentation for possible values best is to keep it at its default parameters bbox_to_anchor : tuple of floats, where to anchor the legend box fig_dir : str, directory to save figure. Specify the path relative to current directory (e.g. "./Figures/") or an absolute path (e.g. "~/Desktop/") fig_format : str, choose your favorite (.eps, .png) """ if isinstance(X, np.ndarray): try: assert len(list_feat) == X.shape[1], \ "Length of list_feat does not match X.shape[1]" X = pd.DataFrame(X, columns=list_feat) except TypeError: sys.exit("Please fill in the list_feat argument") if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series): y = y.values # Define list of features list_feat = X.columns.values labels = np.unique(y) list_color = cm.Accent(np.linspace(0, 1, len(labels))) if single_out == []: # Plot with different color for each class for feat in list_feat: list_hist = [X[feat].values[y == label] for label in labels] # Remove possible NaN list_hist = [x[np.logical_not(np.isnan(x))] for x in list_hist] n, _, _ = plt.hist(list_hist, bins=bins, histtype=histtype, color=list_color, stacked=stacked, label=["Class %s" % i for i in labels]) # Improve plot boundaries n = np.ravel(n) plt.xlim([np.min(X[feat]) - np.abs(np.std(X[feat])),\ np.max(X[feat]) + np.abs(np.std(X[feat]))]) plt.ylim([np.min(n[np.nonzero(n)]),\ np.max(n) + np.abs(np.std(n))]) # Add legend and labels plt.legend(loc=legend_location, bbox_to_anchor=bbox_to_anchor, ncol=len(labels) / 2, fancybox=True) plt.xlabel(feat, fontsize=fontsize) # Adjust white space for better visibility plt.subplots_adjust(top=0.8) if logscale: plt.yscale("log") if not fig_dir: fig_dir = raw_input("Enter figure directory: ") # Create directory if needed if not os.path.exists(fig_dir): os.makedirs(fig_dir) if fig_dir[-1] != "/": fig_dir += "/" plt.savefig(fig_dir + feat + "_byclass" + fig_format) plt.clf() plt.close() elif single_out != []: # Single out class labels in the single_out list wrt all other classes # Remap labels : new_y = np.copy(y) d_map = {} for label in labels: if label in single_out: d_map[label] = 1 else: d_map[label] = 0 for k, v in d_map.iteritems(): new_y[y == k] = v for feat in list_feat: list_hist = [X[feat].values[new_y == label] for label in [0, 1]] not_single = [label for label in labels if label not in single_out] # Remove possible NaN list_hist = [x[np.logical_not(np.isnan(x))] for x in list_hist] n,_,_ = plt.hist(list_hist, bins = bins, histtype=histtype, color=list_color[:2], stacked = stacked, label = ["Class " + "-".join(map(str, not_single)),\ "Class " + "-".join(map(str, single_out))] ) # Improve plot boundaries n = np.ravel(n) plt.xlim([np.min(X[feat]) - np.abs(np.std(X[feat])),\ np.max(X[feat]) + np.abs(np.std(X[feat]))]) plt.ylim([np.min(n[np.nonzero(n)]),\ np.max(n) + np.abs(np.std(n))]) # Add legend and labels plt.legend(loc=legend_location, bbox_to_anchor=bbox_to_anchor, ncol=len(labels) / 2, fancybox=True) plt.xlabel(feat, fontsize=fontsize) # Adjust white space for better visibility plt.subplots_adjust(top=0.8) if logscale: plt.yscale("log") if not fig_dir: fig_dir = raw_input("Enter figure directory: ") # Create directory if needed if not os.path.exists(fig_dir): os.makedirs(fig_dir) if fig_dir[-1] != "/": fig_dir += "/" plt.savefig(fig_dir + feat + "_byclass_" + "-".join(map(str, single_out)) + "_vs _rest" + fig_format) plt.clf() plt.close()
def plotViolinbyclass(X, y, list_feat=None, alpha=1, fontsize=20, logscale=False, showmeans=False, showmedians=False, showextrema=False, fig_dir=None, fig_format=".png"): """ Violin Plot for each column of X with a class separation Parameters ---------- X : pd.DataFrame or np.ndarray, shape = (n_samples, n_features) y : pd.Series or pd.DataFrame or np.ndarray, shape = (n_samples,) the array of target values for a classification task list_feat : list of str, the name of each column of X. No need to specify it if X is a pd.DataFrame alpha : float, transparency for the histogram logscale : bool, if True use logscale on Y axis fontsize : int, fontsize for X axis label showmeans/showmedians/showextrema : bool, whether to show means/medians/extrema fig_dir : str, directory to save figure. Specify the path relative to current directory (e.g. "./Figures/") or an absolute path (e.g. "~/Desktop/") fig_format : str, choose your favorite (.eps, .png) """ if isinstance(X, np.ndarray): try: assert len(list_feat) == X.shape[1], \ "Length of list_feat does not match X.shape[1]" X = pd.DataFrame(X, columns=list_feat) except TypeError: sys.exit("Please fill in the list_feat argument") if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series): y = y.values # Define list of features list_feat = X.columns.values labels = np.unique(y) list_color = cm.Accent(np.linspace(0, 1, len(labels))) # Plot with different color for each class for feat in list_feat: data = [X[feat].values[y == label] for label in labels] # Set up the matplotlib figure fig, ax = plt.subplots(figsize=(15, 12)) # plot violin plot print "Computing KDEs..." violin_parts = ax.violinplot(data, showmeans=showmeans, showmedians=showmedians, showextrema=showextrema) # Change color for index, pc in enumerate(violin_parts['bodies']): pc.set_facecolor(list_color[index]) pc.set_edgecolor('black') # Adjust plot boundaries plt.ylim([np.min(X[feat].values) - np.abs(np.std(X[feat].values)),\ np.max(X[feat].values) + np.abs(np.std(X[feat].values))]) # adding horizontal grid lines ax.yaxis.grid(True) ax.set_xticks([c + 1 for c in range(len(data))]) ax.set_xlabel('Class', fontsize=fontsize) ax.set_ylabel(feat + " values", fontsize=fontsize) # add x-tick labels plt.setp(ax, xticks=[c + 1 for c in range(len(data))], xticklabels=map(str, labels)) if logscale: plt.yscale("log") if not fig_dir: fig_dir = raw_input("Enter figure directory: ") # Create directory if needed if not os.path.exists(fig_dir): os.makedirs(fig_dir) if fig_dir[-1] != "/": fig_dir += "/" plt.savefig(fig_dir + feat + "_violinbyclass" + fig_format) plt.clf() plt.close()
def plot_results(): """ Utility to compare the results of several experiments (in terms) of loss (WIP) """ list_exp = glob.glob("./Log/*") list_d_log = [] list_archi = [] for exp_dir in list_exp: with open(exp_dir + "/experiment_log.json", "r") as f: d = json.load(f) list_d_log.append(d) with open(exp_dir + "/SFCNN_archi.json", "r") as f: d = json.load(f) list_layers = d["config"] list_conv = [l["config"]["name"] for l in list_layers if l["class_name"] == "Convolution2D"] max_conv = max([int(name.split("_")[-1]) for name in list_conv]) list_archi.append(max_conv) for i in range(len(list_exp)): d = list_d_log[i] max_conv = list_archi[i] d["max_conv"] = max_conv list_d_log[i] = d list_c = cm.Accent(np.linspace(0,1,10)) list_by_batch = [] list_by_aug = [] list_by_epoch = [] list_by_depth = [] for d in list_d_log: if d["nb_epoch"] == 10 and \ d["augmentator_config"]["transforms"] == {} and\ d["max_conv"] == 4: list_by_batch.append(d) if d["nb_epoch"] > 10: list_by_epoch.append(d) if d["nb_epoch"] == 10 and \ d["augmentator_config"]["transforms"] != {}and\ d["max_conv"] == 4: list_by_aug.append(d) if d["nb_epoch"] == 10 and \ d["augmentator_config"]["transforms"] == {} and \ d["batch_size"] == 32: list_by_depth.append(d) # plt.figure(figsize=(12, 9)) # c_counter = 0 # for d in list_by_depth: # # Legend # label = "Batch size: %s" % d["batch_size"] # label = "Data augmentation prob: %s" % d["prob"] # label = "CNN depth: %s" % d["max_conv"] # plt.plot(d["train_loss"], "--", # label=label, # color=list_c[c_counter], # linewidth=3) # plt.plot(d["test_loss"], # label=label, # color=list_c[c_counter], # linewidth=3) # c_counter += 1 # plt.xlabel("Number of epochs", fontsize=18) # plt.ylabel("Logloss", fontsize=18) # plt.legend(loc="best") # plt.ylim([0.1, 0.8]) # plt.tight_layout() # plt.show() # raw_input() gs = gridspec.GridSpec(2, 2) fig = plt.figure(figsize=(15, 15)) list_labels = ["Batch size: ", "Data augmentation prob: ", "CNN depth: ", "More epochs" ] ll_d = [list_by_batch, list_by_aug, list_by_depth, list_by_epoch] for i in range(4): ax = plt.subplot(gs[i]) c_counter = 0 for d in ll_d[i]: if "Batch" in list_labels[i]: label = list_labels[i] + str(d["batch_size"]) elif "augmentation" in list_labels[i]: label = list_labels[i] + str(d["prob"]) elif "depth" in list_labels[i]: label = list_labels[i] + str(d["max_conv"]) else: label = list_labels[i] ax.plot(d["train_loss"], "--", label=label, color=list_c[c_counter], linewidth=3) ax.plot(d["test_loss"], label=label, color=list_c[c_counter], linewidth=3) c_counter += 1 ax.set_xlabel("Number of epochs", fontsize=18) ax.set_ylabel("Logloss", fontsize=18) ax.legend(loc="best") ax.set_ylim([0.1, 0.8]) ax.text(0.05, 0.05, "Dashed: Training sample\nContinuous: Test sample", transform=ax.transAxes, fontsize=18, bbox=dict(boxstyle='round', facecolor="white")) gs.tight_layout(fig) plt.savefig("./Figures/training_results.png") plt.show() raw_input()