def stripplot_(self, adj_left=.1, adj_bottom=.1): dd = pd.melt(self.df, [self.key], var_name='Features') customplot(adj_bottom=adj_bottom, adj_left=adj_left) sns.stripplot(x="value", y="Features", hue=self.key, data=dd, dodge=True, jitter=True, alpha=.25, zorder=1) ax = sns.pointplot(x="value", y="Features", hue=self.key, data=dd, dodge=.532, join=False, palette="dark", markers="d", scale=.75, ci=None) handles, labels = ax.get_legend_handles_labels() n = len(np.unique(self.y)) ax.legend(handles[0:n], labels[0:n], loc='best', handletextpad=0, columnspacing=1, frameon=True) return ax.get_figure()
def plotregression_(self, **options): a = options.get('adj_left', 0.12) b = options.get('adj_bottom', 0.12) mode = options.get('cross_val_predict', False) fig, ax = customplot(adj_left=a, adj_bottom=b) if mode: ax.scatter(self.predictCVDF.Actual, self.predictCVDF.Prediction, s=70, c='b', marker='o', label='Training') else: ax.scatter(self.predictTrainDF.Actual, self.predictTrainDF.Prediction, s=70, c='b', marker='o', label='Training') if not self.intercv: ax.scatter(self.predictTestDF.Actual, self.predictTestDF.Prediction, s=70, c='r', marker='v', label='Testing') ax.legend() self.abline(1, 0) plt.xlabel('Actual') plt.ylabel('Prediction') return fig
def plot_(self, adj=None, yerr=True): from torclib2 import customplot import matplotlib.pyplot as plt if adj is None: aleft = .12 abottom = .12 else: aleft, abottom = adj[0], adj[1] fig, ax = customplot(adj_left=aleft, adj_bottom=abottom) if yerr: plt.bar(range(self.X.shape[1]), self.important[self.indices], color='r', yerr=self.std[self.indices], align='center') else: plt.bar(range(self.X.shape[1]), self.important[self.indices], color='r') plt.xticks(range(self.X.shape[1]), self.indices) plt.xlim([-1, self.X.shape[1]]) plt.xlabel('Feature') plt.ylabel('Score') return fig
def boxplot_(self, separate=False, adj_left=.1, adj_bottom=.1): if separate: params = {'font.family': 'serif', 'font.serif': 'DejaVu Serif', 'xtick.labelsize': 20, 'ytick.labelsize': 20, 'axes.labelsize': 28, 'figure.figsize': [10.72, 8.205], 'legend.loc': 'best', 'legend.fontsize': 18, 'legend.fancybox': False} matplotlib.rcParams.update(params) self.df.groupby(self.key).boxplot() else: customplot(adj_bottom=adj_bottom, adj_left=adj_left) dd = pd.melt(self.df, id_vars=[self.key], value_vars=list(self.df)[0:self.feature], var_name='Features') ax = sns.boxplot(x=self.key, y='value', data=dd, hue='Features') return ax.get_figure()
def pointplot_(self, **options): import math yint = range(min(self.y_train), math.ceil(max(self.y_train)) + 1) a = options.get('adj_left', 0.12) b = options.get('adj_bottom', 0.12) mode = options.get('cross_val_predict', False) if mode: trainDF = self.predictCVDF else: trainDF = self.predictTrainDF trainDF = trainDF.sort_values(by='Actual') if self.intercv: fig, ax = customplot(adj_left=a, adj_bottom=b) plt.plot(trainDF.Actual.values, '--r') plt.plot(trainDF.Prediction.values, '-bo') plt.xlabel('Samples') plt.ylabel('Class prediction') plt.yticks(yint) return fig else: testDF = self.predictTestDF testDF = testDF.sort_values(by='Actual') fig, ax = customplot(adj_bottom=b, adj_left=a) plt.subplot(2, 1, 1) plt.plot(trainDF.Actual.values, '--r') plt.plot(trainDF.Prediction.values, '-bo') plt.ylabel('Train prediction') plt.yticks(yint) plt.subplot(2, 1, 2) plt.plot(testDF.Actual.values, '--r') plt.plot(testDF.Prediction.values, '-bo') plt.ylabel('Test prediction') plt.xlabel('Samples') plt.yticks(yint) return fig
def plotcm(self, external=False, intercv=False, **options): from nanolib.utils import plot_confusion_matrix a = options.get('adj_left', 0.1) b = options.get('adj_bottom', 0.2) ps = options.get('print_stats', False) title = options.get('title', False) figsize = options.get('figsize', [5, 5]) axes_size = options.get('axes_size', 22) if self.mode == 'classification': if external: y_true = self.y_test y_pred = self.clf.predict(self.X_test) else: y_true = self.y_train if intercv: y_pred = self.crossValPredict else: y_pred = self.clf.predict(self.X_train) cm_ = confusion_matrix(y_true, y_pred) np.set_printoptions(precision=2) class_names = np.unique(y_true) self.CM = ConfusionMatrix(actual_vector=y_true, predict_vector=y_pred) if ps: print(self.CM) fig, _ = customplot(adj_bottom=b, adj_left=a, figsize=figsize, axes_size=axes_size) if title: if external: plt.title('External-Validation') else: if intercv: plt.title('Internal-Validation') else: plt.title('Training Results') plot_confusion_matrix(cm_, classes=class_names) plt.show() return fig else: print('Just for classification')
def hierarchical_clustering(self, max_d=0): from scipy.cluster.hierarchy import dendrogram, linkage X = self.X linked = linkage(X, 'ward') labelList = np.arange(0, X.shape[0]) fig, ax = customplot() dendrogram(linked, truncate_mode='lastp', p=X.shape[0], orientation='top', labels=labelList, distance_sort='descending', show_leaf_counts=True) plt.axhline(y=max_d, c='k') return ax.get_figure()
def screenplot(self, **options): a = options.get('adj_left', 0.1) b = options.get('adj_bottom', 0.2) lim = options.get('PC', None) if lim is None: data_ = self.vardf else: data_ = self.vardf.loc[:lim, :] fig, _ = customplot(adj_bottom=b, adj_left=a) plt.bar(x='PC', height='Var (%)', data=data_) plt.xticks(rotation='vertical') plt.xlabel('Principal Component') plt.ylabel('Percentage of Variance') return fig
def plotpc(self, **options): PC = options.get('PC', ['PC1', 'PC2']) a = options.get('adj_left', 0.1) b = options.get('adj_bottom', 0.15) s = options.get('size', 90) ascending = options.get('ascending', True) self.pcadf = self.pcadf.sort_values(by=['label'], ascending=ascending) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'C0', 'C1', 'C2'] markers = ["o", "v", "s", "p", "P", "*", "h", "H", "X", "D"] targets = list(self.pcadf['label'].unique()) if len(targets) > 10: raise TooMuchUnique(str(targets)) colors = colors[:len(targets)] markers = markers[:len(targets)] xlabs = f'{PC[0]} ({float(self.vardf.values[self.vardf["PC"] == PC[0], 0])}%)' ylabs = f'{PC[1]} ({float(self.vardf.values[self.vardf["PC"] == PC[1], 0])}%)' fig, ax = customplot(adj_left=a, adj_bottom=b) for target, color, mark in zip(targets, colors, markers): indicesToKeep = self.pcadf['label'] == target ax.scatter( self.pcadf.loc[indicesToKeep, PC[0]], self.pcadf.loc[indicesToKeep, PC[1]], c=color, marker=mark, s=s, ) plt.xlabel(xlabs) plt.ylabel(ylabs) plt.legend(targets) return fig
def plotlda(self, **options): import seaborn as sns a = options.get('adj_left', 0.1) b = options.get('adj_bottom', 0.15) ascending = options.get('ascending', True) self.ldadf = self.ldadf.sort_values(by=['label'], ascending=ascending) nlabel = np.unique(self.y) if len(nlabel) < 3: fig, _ = customplot(adj_left=a, adj_bottom=b) s = options.get('size', 10) if self.dual: self.ldaval = self.ldaval.sort_values(by=['label'], ascending=ascending) sns.stripplot(x="label", y="LD1", color='k', size=s, data=self.ldadf) sns.stripplot(x="label", y="LD1", marker='^', color='red', size=s, data=self.ldaval) else: sns.stripplot(x="label", y="LD1", size=s, data=self.ldadf) plt.xlabel('Classes') plt.axhline(y=0, linewidth=1.5, color='black', linestyle='--') return fig else: colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'C0', 'C1', 'C2'] markers = ["o", "v", "s", "p", "P", "*", "h", "H", "X", "D"] targets = list(self.ldadf['label'].unique()) s = options.get('size', 90) if len(targets) > 10: raise TooMuchUnique(str(targets)) colors = colors[:len(targets)] markers = markers[:len(targets)] xlabs = f'LD1 ({self.vardf.values[0, 0]}%)' ylabs = f'LD2 ({self.vardf.values[1, 0]}%)' fig, ax = customplot(adj_left=a, adj_bottom=b) for target, color, mark in zip(targets, colors, markers): indicesToKeep = self.ldadf['label'] == target ax.scatter( self.ldadf.loc[indicesToKeep, 'LD1'], self.ldadf.loc[indicesToKeep, 'LD2'], c=color, marker=mark, s=s, ) if self.dual: # plot tidak urut, tambahkan line ini: (14/05) self.ldaval = self.ldaval.sort_values(by=['label'], ascending=ascending) for target, color, mark in zip(targets, colors, markers): indicesToKeep = self.ldaval['label'] == target ax.scatter( self.ldaval.loc[indicesToKeep, 'LD1'], self.ldaval.loc[indicesToKeep, 'LD2'], # c=color, marker=mark, s=s, facecolors='none', edgecolors=color, ) plt.legend(targets) plt.xlabel(xlabs) plt.ylabel(ylabs) return fig
def corrplot_(self, adj_left=.1, adj_bottom=.1, size=20): corr_ = self.df.corr(method='pearson') customplot(adj_bottom=adj_bottom, adj_left=adj_left) ax = sns.heatmap(corr_, vmax=1, vmin=-1, cmap='YlGnBu', annot=True, annot_kws={"size": size}) return ax.get_figure()
def swarmplot_(self, adj_left=.1, adj_bottom=.1): customplot(adj_bottom=adj_bottom, adj_left=adj_left) dd = pd.melt(self.df, [self.key], var_name='Features') ax = sns.swarmplot(x='Features', y='value', data=dd, hue=self.key) return ax.get_figure()
def matrixplot_(self, adj_left=.1, adj_bottom=.1): fig, _ = customplot(adj_bottom=adj_bottom, adj_left=adj_left) matplotlib.pyplot.close() fig = sns.pairplot(self.df, hue=self.key) return fig
def plot_learning(self): from sklearn.model_selection import learning_curve train_sizes, train_scores, test_scores, fit_times, _ = learning_curve( self.grid.best_estimator_, self.X, self.y, cv=self.cv, n_jobs=-1, train_sizes=np.linspace(.6, 1.0, 5), return_times=True, ) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) fit_times_mean = np.mean(fit_times, axis=1) fit_times_std = np.std(fit_times, axis=1) # plot learning curve fig1, ax1 = customplot(adj_bottom=.13, adj_left=.12) plt.xlabel('Training examples') plt.ylabel('Score') plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes, train_scores_mean, 'o-', color="r", lw=2, label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", lw=2, label="Cross-validation score") plt.legend(loc="best") # Plot n_samples vs fit_times fig2, ax2 = customplot(adj_bottom=.13, adj_left=.15) plt.xlabel('Training examples') plt.ylabel('Fit times') plt.plot(train_sizes, fit_times_mean, 'o-', lw=2) plt.fill_between(train_sizes, fit_times_mean - fit_times_std, fit_times_mean + fit_times_std, alpha=0.1) # Plot fit_time vs score fig3, ax3 = customplot(adj_bottom=.13, adj_left=.12) plt.xlabel('Fit times') plt.ylabel('Score') plt.plot(fit_times_mean, test_scores_mean, 'o-', lw=2) plt.fill_between(fit_times_mean, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1) return fig1, fig2, fig3
def plot_validation_curve(self, ylim=None): if ylim is None: ylim = [0.5, 1.1] from sklearn.model_selection import validation_curve mode = self.grid.best_params_['svm__kernel'] if mode == 'linear': print('linear') param_range = self.params[0].get('svm__C') train_scores, test_scores = validation_curve( self.grid.best_estimator_, self.X, self.y, param_name='svm__C', param_range=param_range, scoring='accuracy', cv=self.cv, n_jobs=-1) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) fig, ax = customplot(adj_bottom=.13, adj_left=.12) plt.xlabel( f"Cost (with Gamma: {self.grid.best_params_['svm__gamma']})") plt.ylabel("Score") plt.ylim(ylim[0], ylim[1]) lw = 2 plt.semilogx(param_range, train_scores_mean, label="Training score", color="r", lw=lw) plt.fill_between(param_range, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r", lw=lw) plt.semilogx(param_range, test_scores_mean, label="Cross-validation score", color="g", lw=lw) plt.fill_between(param_range, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g", lw=lw) plt.axvline(x=self.grid.best_params_['svm__C'], color='k', linestyle='--') plt.plot(self.grid.best_params_['svm__C'], self.grid.best_score_, 'ok') ax.text(self.grid.best_params_['svm__C'], self.grid.best_score_, f' {round(self.grid.best_score_, 2)}', fontsize=20) plt.legend(loc="best") return fig else: print('radial') param_range = self.params[0].get('svm__C') train_scores, test_scores = validation_curve( self.grid.best_estimator_, self.X, self.y, param_name='svm__C', param_range=param_range, scoring='accuracy', cv=self.cv, n_jobs=-1) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) fig1, ax1 = customplot(adj_bottom=.13, adj_left=.12) plt.xlabel( f"Cost (with Gamma: {self.grid.best_params_['svm__gamma']})") plt.ylabel("Score") plt.ylim(ylim[0], ylim[1]) lw = 2 plt.semilogx(param_range, train_scores_mean, label="Training score", color="r", lw=lw) plt.fill_between(param_range, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r", lw=lw) plt.semilogx(param_range, test_scores_mean, label="Cross-validation score", color="g", lw=lw) plt.fill_between(param_range, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g", lw=lw) plt.axvline(x=self.grid.best_params_['svm__C'], color='k', linestyle='--') plt.plot(self.grid.best_params_['svm__C'], self.grid.best_score_, 'ok') ax1.text(self.grid.best_params_['svm__C'], self.grid.best_score_, f' {round(self.grid.best_score_, 2)}', fontsize=20) plt.legend(loc="best") # gamma param_range = self.params[0].get('svm__gamma') train_scores, test_scores = validation_curve( self.grid.best_estimator_, self.X, self.y, param_name='svm__gamma', param_range=param_range, scoring='accuracy', cv=self.cv, n_jobs=-1) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) fig2, ax2 = customplot(adj_bottom=.13, adj_left=.12) plt.xlabel( f"Gamma (with Cost: {self.grid.best_params_['svm__C']})") plt.ylabel("Score") plt.ylim(ylim[0], ylim[1]) lw = 2 plt.semilogx(param_range, train_scores_mean, label="Training score", color="r", lw=lw) plt.fill_between(param_range, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r", lw=lw) plt.semilogx(param_range, test_scores_mean, label="Cross-validation score", color="g", lw=lw) plt.fill_between(param_range, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g", lw=lw) plt.axvline(x=self.grid.best_params_['svm__gamma'], color='k', linestyle='--') plt.plot(self.grid.best_params_['svm__gamma'], self.grid.best_score_, 'ok') ax2.text(self.grid.best_params_['svm__gamma'], self.grid.best_score_, f' {round(self.grid.best_score_, 2)}', fontsize=20) plt.legend(loc="best") return fig1, fig2