def plt_hist(pkfile): '' params, nc = pkload(pkfile) # nc - name_coef 'Hist' coef_max, coef_min = max(nc.values()), min(nc.values()) up = math.ceil(coef_max * 100) / 100.0 if coef_min <= 0: down = math.ceil(coef_min * -100) / -100.0 else: down = math.ceil(coef_min * -100) / -100.0 print('Preparing Hist...') fig, ax = plt.subplots() plt.title('Coefficient Count ' + str(len(nc.keys())) + ' in total') plt.xlabel('Coefficient') plt.ylabel('Number') '' ax.hist(nc.values(), bins=np.arange(down, up + 0.1 * up, (up - down) / 5)) counts, edges = np.histogram(list(nc.values()), bins=5) ax.set_xticks(np.arange(down, up + 0.1 * up, (up - down) / 5)) ax.set_xlim(down, up) ax.set_yticks(np.arange(0, max(counts) + 2, 2)) print('Save fig...') pltsave(pkfile.split('.')[0] + '.png') print('Success...')
def plt_bep(): print('Plotting...') fig, ((ax1, ax2, ax5), (ax3, ax4, ax6)) = plt.subplots(nrows=2, ncols=3, figsize=(12, 8)) plt.suptitle('BEP for Methane Activation') plt.tight_layout(pad=2.0, w_pad=2.0, h_pad=2.0) plt.subplots_adjust(left=None, bottom=None, right=None, top=0.9, \ wspace=None, hspace=0.2) y, y_p, r2, mse = pkload('ts_Hab2_CH3ab.pk') ax1.set_title('(a) $E_{H^{{sp}^2}} + E_{{{CH}_3}^v}$') ax1.scatter(y, y_p, edgecolors=(0, 0, 0)) ax1.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax1.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) y, y_p, r2, mse = pkload('ts_Hab2_CH3ab2.pk') ax2.set_title('(b) $E_{H^{{sp}^2}} + E_{{{CH}_3}^p}$') ax2.scatter(y, y_p, edgecolors=(0, 0, 0)) ax2.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax2.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) y, y_p, r2, mse = pkload('ts_Hab3_CH3ab.pk') ax3.set_title('(c) $E_{H^{{sp}^3}} + E_{{{CH}_3}^v}$') ax3.scatter(y, y_p, edgecolors=(0, 0, 0)) ax3.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax3.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) y, y_p, r2, mse = pkload('ts_Hab3_CH3ab2.pk') ax4.set_title('(d) $E_{H^{{sp}^3}} + E_{{{CH}_3}^p}$') ax4.scatter(y, y_p, edgecolors=(0, 0, 0)) ax4.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax4.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) y, y_p, r2, mse = pkload('tsra_Hab2.pk') ax5.set_title('(e) $E_{H^{{sp}^2}}$') ax5.scatter(y, y_p, edgecolors=(0, 0, 0)) ax5.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax5.text(0.4, 1.4, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) y, y_p, r2, mse = pkload('tsra_Hab3.pk') ax6.set_title('(f) $E_{H^{{sp}^3}}$') ax6.scatter(y, y_p, edgecolors=(0, 0, 0)) ax6.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax6.text(0.4, 1.4, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) #plt.show() pltsave('bep.png') print('Success...')
def PltCurve(outs): a, c, n, alphas, Ns, x, y, x_s, y_s = GetAN(outs) plt.figure(figsize=(16, 6)) plt.tight_layout(pad=2.0, w_pad=2.0, h_pad=2.0) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, \ wspace=None, hspace=0.4) out_title = {'Ets500': ['(a)', 'Surface'], 'Etsra500': ['(b)', 'Radical']} t = out_title[outs] plt.suptitle(t[0] + ' LASSO for ' + t[1] + ' Pathway', fontsize=16) 'Learning Curve' ax1 = plt.subplot(121) ax1.set_title(r'Number of Nonzero Coefficient v.s. $\alpha$') ax1.set_xlabel(r'$\alpha$') ax1.set_ylabel('Number') ax1.set_xlim(0, max(alphas) + 0.01) ax1.set_ylim(0, max(Ns) + 1) ax1.scatter(x, y, edgecolor=(0, 0, 0)) ax1.plot(x, y) ### ax1.plot([ a, ] * 2, [0, n], linestyle='-.', color='b', marker='x') ax1.text( a, n + 3, r'$\alpha$=%0.3f' % a + '\n' + r'$\theta$=%0.2f' % c + '\nn=%d' % n) 'alphas' ax2 = plt.subplot(222) ax2.set_title(r'Hist for $\alpha$', fontsize=10) ax2.set_xlabel(r'$\alpha$', fontsize=10) ax2.set_ylabel('Frequency', fontsize=10) ax2.hist(alphas) 'Ns' ax3 = plt.subplot(224) ax3.set_title('Hist for Numbers', fontsize=10) ax3.set_xlabel('Number of Nonzero Coefficient', fontsize=10) ax3.set_ylabel('Frequency', fontsize=10) ax3.hist(Ns) pltsave('PerLas_' + outs + '.png')
def plt_curve(pkfile): 'Load pk' feas, best_feas, mean_MSEs, std_MSEs = pkload(pkfile) print(best_feas) 'Plt Learning Curve' print('Plotting Learning Curve...') plt.title('Feature Selection from LASSO '+\ pkfile.split('.')[0].split('_')[2],fontsize=16) plt.xlabel('Number of Feature Used') plt.ylabel('Mean MSE') '' ax = plt.gca() x = range(1, len(feas.keys()) + 1) ax.set_xlim(0, len(x) + 1) ax.set_xticks(np.arange(0, len(x) + 1, 2)) ax.set_ylim(0, max(mean_MSEs) * 1.1) ax.set_yticks(np.arange(0, max(mean_MSEs) * 1.1, 0.5)) ax.plot(x, mean_MSEs) 'Plot best_index' best_score = min(mean_MSEs) best_index = mean_MSEs.index(best_score) # Plot a dotted vertical line at the best score for that scorer marked by x ax.plot([x[best_index], ] * 2, [0, best_score], \ linestyle='-.', color='b', marker='x', markeredgewidth=3, ms=8) # Annotate the best score for that scorer ax.annotate("%0.2f" % best_score, (x[best_index], best_score + 0.005)) print('Saving fig...') pltsave(pkfile.split('.')[0] + '.png') print('Success...')
def Clf(): 'Pre Data' df = pd.read_csv('../Data/CH4_neo.csv', index_col=0) df = df.loc[df.loc[:, 'E_ts'] != 'np.nan', :] df = df.loc[df.loc[:, 'E_tsra'] != 'np.nan', :] #n_feas = ['E_Hab3', 'E_CH3ab', 'h_O5-M2-O6-M1_hab3'] n_feas = ['E_Hab3', 'E_CH3ab', \ 'h_O1-O2-O3-M1_hab3', 'a_O2-O6-M2_hab3'] indexs_cols = df.iloc[:, range(5)] vals_cols = df.loc[:, n_feas] DS = {} DS['Etype'] = indexs_cols.loc[:, 'mtype'].values En = ['E_ts', 'E_tsra'] DS['target'] = indexs_cols.loc[:, En].values.astype(np.float64) DS['features'] = vals_cols.values.astype(np.float64) DS['fea_names'] = vals_cols.columns.values Etype = DS['Etype'] Ets = DS['target'].T[0] Etsra = DS['target'].T[1] Eh = DS['features'].T[0] Ech3 = DS['features'].T[1] Dihe = DS['features'].T[2] Ange = DS['features'].T[3] def tsclf(Ets, Etsra): ts = {'ts': [], 'tsra': []} tsra = {'ts': [], 'tsra': []} for i in range(len(Etype)): if Etype[i] == 'ts': ts['ts'].append(Ets[i]) ts['tsra'].append(Etsra[i]) elif Etype[i] == 'tsra': tsra['ts'].append(Ets[i]) tsra['tsra'].append(Etsra[i]) return ts, tsra # BEP relations #Ets_bep = 0.39*(Eh+Ech3)+2.11 Ets_bep = 0.40 * (Eh + Ech3) + 2.13 Etsra_bep = 0.90 * Eh + 3.73 # Geo relations #Ets_geo = 0.30*Ech3+14*np.sin(Dihe)+0.63 Ets_geo = 0.37 * Ech3 + 4.69 * np.sin(Dihe) + 11.54 * np.sin(Ange) + 0.52 Etsra_geo = 0.90 * Eh + 3.73 '' fig, ax = plt.subplots(1, 3, figsize=(14, 4)) plt.suptitle('Classification for Methane Activation') plt.tight_layout(pad=2.0, w_pad=4.0, h_pad=2.0) plt.subplots_adjust(left=0.08, bottom=None, right=0.95, top=0.88, \ wspace=None, hspace=0.5) # true values ts, tsra = tsclf(Ets, Etsra) #print(len(ts['tsra']), len(ts['ts'])) #print(len(tsra['tsra']), len(tsra['ts'])) print(tsra) ax[0].scatter(ts['tsra'], ts['ts'], color='royalblue', marker='o') ax[0].scatter(tsra['tsra'], tsra['ts'], color='salmon', marker='o') ax[0].plot([Etsra.min(), Etsra.max()], [Etsra.min(), Etsra.max()], 'k--', lw=2) ax[0].set_title('(a) True Values', fontsize=10) ax[0].set_xlabel('$E_{TS-ra}\ /\ ev$') ax[0].set_ylabel('$E_{TS-ss}\ /\ ev$') # bep values ts, tsra = tsclf(Ets_bep, Etsra_bep) ax[1].scatter(ts['tsra'], ts['ts'], color='royalblue', marker='o') ax[1].scatter(tsra['tsra'], tsra['ts'], color='salmon', marker='o') ax[1].plot([Etsra.min(), Etsra.max()], [Etsra.min(), Etsra.max()], 'k--', lw=2) ax[1].set_title('(b) BEP Relations', fontsize=10) ax[1].set_xlabel('$E_{TS-ra}\ /\ ev$') ax[1].set_ylabel('$E_{TS-ss}\ /\ ev$') # geo values ts, tsra = tsclf(Ets_geo, Etsra_geo) ax[2].scatter(ts['tsra'], ts['ts'], color='royalblue', marker='o') ax[2].scatter(tsra['tsra'], tsra['ts'], color='salmon', marker='o') ax[2].plot([Etsra.min(), Etsra.max()], [Etsra.min(), Etsra.max()], 'k--', lw=2) ax[2].set_title('(c) Relations including Geometrical Descriptors', fontsize=10) ax[2].set_xlabel('$E_{TS-ra}\ /\ ev$') ax[2].set_ylabel('$E_{TS-ss}\ /\ ev$') pltsave('clf.png')
def plt_cv(): 'Load Pickle' print('Loading Pickle...') gslas = pkload('las_Ea.pk') 'Get Results' results = gslas.cv_results_ scoring = gslas.scorer_ #def additional(): 'Plt Learning Curve' print('Plotting Learning Curve...') plt.figure(figsize=(12, 8)) # figsize in inch, 1inch=2.54cm plt.title("GridSearchCV for LASSO", fontsize=16) plt.xlabel("Alpha") # Get the regular numpy array from the MaskedArray X_axis = np.array(results['param_alpha'].data, dtype=float) ax = plt.gca() 'R2' scorer = 'R2' color = 'g' ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.set_ylabel('R2') for sample, style in (('train', '--'), ('test', '-')): # sample sample_score_mean = results['mean_%s_%s' % (sample, scorer)] # score mean sample_score_std = results['std_%s_%s' % (sample, scorer)] # score std ax.fill_between(X_axis, sample_score_mean - sample_score_std, \ sample_score_mean + sample_score_std, \ alpha=0.1 if sample == 'test' else 0, color=color) ax.plot(X_axis, sample_score_mean, style, color=color, \ alpha=1 if sample == 'test' else 0.7, \ label="%s (%s)" % (scorer, sample)) best_index = np.nonzero(results['rank_test_%s' % scorer] == 1)[0][0] best_score = results['mean_test_%s' % scorer][best_index] # Plot a dotted vertical line at the best score for that scorer marked by x ax.plot([ X_axis[best_index], ] * 2, [0, best_score], linestyle='-.', color=color, marker='x', markeredgewidth=3, ms=8) # Annotate the best score for that scorer ax.annotate("%0.2f" % best_score, \ (X_axis[best_index], best_score + 0.005)) ax.legend(loc=2) 'MSE' scorer = 'MSE' color = 'k' ax2 = ax.twinx() ax2.set_xlim(0, 1) ax2.set_ylim(-1, 2) ax2.set_ylabel('MSE') for sample, style in (('train', '--'), ('test', '-')): # sample sample_score_mean = results['mean_%s_%s' % (sample, scorer)] # score mean sample_score_std = results['std_%s_%s' % (sample, scorer)] # score std ax2.fill_between(X_axis, sample_score_mean - sample_score_std, \ sample_score_mean + sample_score_std, \ alpha=0.1 if sample == 'test' else 0, color=color) ax2.plot(X_axis, sample_score_mean, style, color=color, \ alpha=1 if sample == 'test' else 0.7, \ label="%s (%s)" % (scorer, sample)) best_index = np.nonzero(results['rank_test_%s' % scorer] == 1)[0][0] best_score = results['mean_test_%s' % scorer][best_index] # Plot a dotted vertical line at the best score for that scorer marked by x ax2.plot([ X_axis[best_index], ] * 2, [0, best_score], linestyle='-.', color=color, marker='x', markeredgewidth=3, ms=8) # Annotate the best score for that scorer ax2.annotate("%0.2f" % best_score, \ (X_axis[best_index], best_score + 0.005)) ax2.legend(loc=1) '' plt.grid("off") pltsave('LearnCurve.png')