def clf_yx(): 'Load Lasso Results' ts_las, ts_nc = pkload('Ets_final.pk') print('--Ets--') print_dict(ts_nc) tsra_las, tsra_nc = pkload('Etsra_final.pk') print('--Etsra--') print_dict(tsra_nc)
def cmp_etype(): feas, Ets_best, means, stds = pkload('Best_las_Ets.pk') feas, Etsra_best, means, stds = pkload('Best_las_Etsra.pk') def outs(bests): for name, coef in bests.items(): print('{:<20} -> {:<10}'.format(name, round(coef, 4))) print('Ets') outs(Ets_best) print('Etsra') outs(Etsra_best)
def plt_hist(pkfile): '' params, nc = pkload(pkfile) # nc - name_coef 'Hist' coef_max, coef_min = max(nc.values()), min(nc.values()) up = math.ceil(coef_max * 100) / 100.0 if coef_min <= 0: down = math.ceil(coef_min * -100) / -100.0 else: down = math.ceil(coef_min * -100) / -100.0 print('Preparing Hist...') fig, ax = plt.subplots() plt.title('Coefficient Count ' + str(len(nc.keys())) + ' in total') plt.xlabel('Coefficient') plt.ylabel('Number') '' ax.hist(nc.values(), bins=np.arange(down, up + 0.1 * up, (up - down) / 5)) counts, edges = np.histogram(list(nc.values()), bins=5) ax.set_xticks(np.arange(down, up + 0.1 * up, (up - down) / 5)) ax.set_xlim(down, up) ax.set_yticks(np.arange(0, max(counts) + 2, 2)) print('Save fig...') pltsave(pkfile.split('.')[0] + '.png') print('Success...')
def FeaSelection(pkfile): 'Pre Data' print('Preparing Data...') params, feas = pkload(pkfile) DS = GetDS('Ets', feas.keys()) y = DS['target'] X = DS['features'] 'Feature Selection' print('Selecting Features...') mean_MSEs = [] std_MSEs = [] feas_number = len(X[0]) for i in range(feas_number): X_slice = [] for j in range(i + 1): X_slice.append(X.T[j]) X_slice = np.array(X_slice).T mean_MSE, std_MSE = reg_assemble(y, X_slice) mean_MSEs.append(mean_MSE) std_MSEs.append(std_MSE) 'Get Feas Name' names = list(feas.keys()) best_index = mean_MSEs.index(min(mean_MSEs)) best_feas = {} for i in range(best_index + 1): best_feas[names[i]] = feas[names[i]] 'Dump Pickle' print('Saving pk...') bestreg_pk = (feas, best_feas, mean_MSEs, std_MSEs) pkdump('Best' + pkfile.strip('PosCoef'), bestreg_pk) print('Success...')
def plt_bar(): '' feas, best_feas, best_coefs, meanMSEs, stdMSEs = pkload('BestReg-test.pk') 'Plot the best feas' x = range(len(best_feas)) fig, ax = plt.subplots() plt.bar(x, best_coefs) plt.xticks(x, best_feas) plt.show()
def plt_bep(): print('Plotting...') fig, ((ax1, ax2, ax5), (ax3, ax4, ax6)) = plt.subplots(nrows=2, ncols=3, figsize=(12, 8)) plt.suptitle('BEP for Methane Activation') plt.tight_layout(pad=2.0, w_pad=2.0, h_pad=2.0) plt.subplots_adjust(left=None, bottom=None, right=None, top=0.9, \ wspace=None, hspace=0.2) y, y_p, r2, mse = pkload('ts_Hab2_CH3ab.pk') ax1.set_title('(a) $E_{H^{{sp}^2}} + E_{{{CH}_3}^v}$') ax1.scatter(y, y_p, edgecolors=(0, 0, 0)) ax1.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax1.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) y, y_p, r2, mse = pkload('ts_Hab2_CH3ab2.pk') ax2.set_title('(b) $E_{H^{{sp}^2}} + E_{{{CH}_3}^p}$') ax2.scatter(y, y_p, edgecolors=(0, 0, 0)) ax2.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax2.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) y, y_p, r2, mse = pkload('ts_Hab3_CH3ab.pk') ax3.set_title('(c) $E_{H^{{sp}^3}} + E_{{{CH}_3}^v}$') ax3.scatter(y, y_p, edgecolors=(0, 0, 0)) ax3.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax3.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) y, y_p, r2, mse = pkload('ts_Hab3_CH3ab2.pk') ax4.set_title('(d) $E_{H^{{sp}^3}} + E_{{{CH}_3}^p}$') ax4.scatter(y, y_p, edgecolors=(0, 0, 0)) ax4.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax4.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) y, y_p, r2, mse = pkload('tsra_Hab2.pk') ax5.set_title('(e) $E_{H^{{sp}^2}}$') ax5.scatter(y, y_p, edgecolors=(0, 0, 0)) ax5.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax5.text(0.4, 1.4, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) y, y_p, r2, mse = pkload('tsra_Hab3.pk') ax6.set_title('(f) $E_{H^{{sp}^3}}$') ax6.scatter(y, y_p, edgecolors=(0, 0, 0)) ax6.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax6.text(0.4, 1.4, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse)) #plt.show() pltsave('bep.png') print('Success...')
def FeaSelection(): 'Pre Data' print('Preparing Data...') las, nc = pkload('Ets_final.pk') DS = GetDS('Ets', nc.keys()) y = DS['target']; X = DS['features'] names = list(nc.keys()) for i in range(len(names)): t = names[i].split('_')[0] if t == 'h': X.T[i] = np.sin((X.T[i]/2)**2) elif t == 'a': X.T[i] = X.T[i]**2 'Feature Selection' print('Selecting Features...') mean_MSEs = []; std_MSEs = []; r2s = [] feas_number = len(X[0]) for i in range(feas_number): X_slice = [] for j in range(i+1): X_slice.append(X.T[j]) X_slice = np.array(X_slice).T mean_MSE, std_MSE = reg_assemble(y, X_slice) mean_MSEs.append(mean_MSE) std_MSEs.append(std_MSE) 'Get Feas Name' best_index = mean_MSEs.index(min(mean_MSEs)) best_feas = {} for i in range(best_index+1): best_feas[names[i]] = nc[names[i]] 'Dump Pickle' print('Saving pk...') bestreg_pk = (nc, best_feas, mean_MSEs, std_MSEs) pkdump('Best_Ets.pk', bestreg_pk) print('Success...')
def PreSelection(pkfile): print('-' * 20) 'Load DataSet' print('Loading DataSet...') DS = GetDS(pkfile.split('.')[0].split('_')[1]) 'Load Pickle' print('Loading ' + pkfile + '...') gs = pkload(pkfile) if pkfile == 'lsr.pk': best_gs = gs else: best_gs = gs.best_estimator_ 'Get Positive Coef' print('Collecting Coefficients...') # get feature name and coef feanames = DS['fea_names'] coefs = best_gs.coef_ # name_coef = {} for name, coef in zip(feanames, coefs): if abs(coef) > 0: name_coef[name] = coef # sort fea by abs value nc_sorted = sorted(name_coef.items(), key=lambda d: abs(d[1]), reverse=True) nc_dict = {} for t in nc_sorted: nc_dict[t[0]] = t[1] 'Dump Pickle' print('Dumping Pickle...') laspk = nc_dict pkdump('PosCoef_' + pkfile, laspk) print('Success...') print('-' * 20)
def plt_curve(pkfile): 'Load pk' feas, best_feas, mean_MSEs, std_MSEs = pkload(pkfile) print(best_feas) 'Plt Learning Curve' print('Plotting Learning Curve...') plt.title('Feature Selection from LASSO '+\ pkfile.split('.')[0].split('_')[2],fontsize=16) plt.xlabel('Number of Feature Used') plt.ylabel('Mean MSE') '' ax = plt.gca() x = range(1, len(feas.keys()) + 1) ax.set_xlim(0, len(x) + 1) ax.set_xticks(np.arange(0, len(x) + 1, 2)) ax.set_ylim(0, max(mean_MSEs) * 1.1) ax.set_yticks(np.arange(0, max(mean_MSEs) * 1.1, 0.5)) ax.plot(x, mean_MSEs) 'Plot best_index' best_score = min(mean_MSEs) best_index = mean_MSEs.index(best_score) # Plot a dotted vertical line at the best score for that scorer marked by x ax.plot([x[best_index], ] * 2, [0, best_score], \ linestyle='-.', color='b', marker='x', markeredgewidth=3, ms=8) # Annotate the best score for that scorer ax.annotate("%0.2f" % best_score, (x[best_index], best_score + 0.005)) print('Saving fig...') pltsave(pkfile.split('.')[0] + '.png') print('Success...')
def plt_cv(): 'Load Pickle' print('Loading Pickle...') gslas = pkload('las_Ea.pk') 'Get Results' results = gslas.cv_results_ scoring = gslas.scorer_ #def additional(): 'Plt Learning Curve' print('Plotting Learning Curve...') plt.figure(figsize=(12, 8)) # figsize in inch, 1inch=2.54cm plt.title("GridSearchCV for LASSO", fontsize=16) plt.xlabel("Alpha") # Get the regular numpy array from the MaskedArray X_axis = np.array(results['param_alpha'].data, dtype=float) ax = plt.gca() 'R2' scorer = 'R2' color = 'g' ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.set_ylabel('R2') for sample, style in (('train', '--'), ('test', '-')): # sample sample_score_mean = results['mean_%s_%s' % (sample, scorer)] # score mean sample_score_std = results['std_%s_%s' % (sample, scorer)] # score std ax.fill_between(X_axis, sample_score_mean - sample_score_std, \ sample_score_mean + sample_score_std, \ alpha=0.1 if sample == 'test' else 0, color=color) ax.plot(X_axis, sample_score_mean, style, color=color, \ alpha=1 if sample == 'test' else 0.7, \ label="%s (%s)" % (scorer, sample)) best_index = np.nonzero(results['rank_test_%s' % scorer] == 1)[0][0] best_score = results['mean_test_%s' % scorer][best_index] # Plot a dotted vertical line at the best score for that scorer marked by x ax.plot([ X_axis[best_index], ] * 2, [0, best_score], linestyle='-.', color=color, marker='x', markeredgewidth=3, ms=8) # Annotate the best score for that scorer ax.annotate("%0.2f" % best_score, \ (X_axis[best_index], best_score + 0.005)) ax.legend(loc=2) 'MSE' scorer = 'MSE' color = 'k' ax2 = ax.twinx() ax2.set_xlim(0, 1) ax2.set_ylim(-1, 2) ax2.set_ylabel('MSE') for sample, style in (('train', '--'), ('test', '-')): # sample sample_score_mean = results['mean_%s_%s' % (sample, scorer)] # score mean sample_score_std = results['std_%s_%s' % (sample, scorer)] # score std ax2.fill_between(X_axis, sample_score_mean - sample_score_std, \ sample_score_mean + sample_score_std, \ alpha=0.1 if sample == 'test' else 0, color=color) ax2.plot(X_axis, sample_score_mean, style, color=color, \ alpha=1 if sample == 'test' else 0.7, \ label="%s (%s)" % (scorer, sample)) best_index = np.nonzero(results['rank_test_%s' % scorer] == 1)[0][0] best_score = results['mean_test_%s' % scorer][best_index] # Plot a dotted vertical line at the best score for that scorer marked by x ax2.plot([ X_axis[best_index], ] * 2, [0, best_score], linestyle='-.', color=color, marker='x', markeredgewidth=3, ms=8) # Annotate the best score for that scorer ax2.annotate("%0.2f" % best_score, \ (X_axis[best_index], best_score + 0.005)) ax2.legend(loc=1) '' plt.grid("off") pltsave('LearnCurve.png')