def scatter_plot_dict(): fig10, axs10 = plt.subplots(3, 2, figsize=(10, 15)) for k in range(3): t = times[density[k]] axs10[k, 0].scatter(times_max[0] * relaxation_times[:, 0] * 1e3, t[:, 0] * 1e3, c='b', marker='.', alpha=0.1) axs10[k, 0].plot(times_max[0] * relaxation_times[:, 0] * 1e3, times_max[0] * relaxation_times[:, 0] * 1e3, 'g--') r2_t1 = r2(times_max[0] * relaxation_times[:, 0] * 1e3, t[:, 0] * 1e3) axs10[k, 0].text(1, 3550, r'R2 = {:5f}'.format(r2_t1)) axs10[k, 0].set_title(r'\textbf{T1, }' + '1/{}'.format(density[k]) + r'\textbf{ density}', weight='bold') axs10[k, 0].set_ylabel(r'Predictions (ms)') axs10[k, 0].set_xlabel(r'Ground truth (ms)') axs10[k, 1].scatter(times_max[1] * relaxation_times[:, 1] * 1e3, t[:, 1] * 1e3, c='r', marker='.', alpha=0.1) axs10[k, 1].plot(times_max[1] * relaxation_times[:, 1] * 1e3, times_max[1] * relaxation_times[:, 1] * 1e3, 'g--') r2_t2 = r2(times_max[1] * relaxation_times[:, 1] * 1e3, t[:, 1] * 1e3) axs10[k, 1].text(1, 550, r'R2 = {:5f}'.format(r2_t2)) axs10[k, 1].set_title(r'\textbf{T2, }' + '1/{}'.format(density[k]) + r'\textbf{ density}', weight='bold') axs10[k, 1].set_ylabel(r'Predictions (ms)') axs10[k, 1].set_xlabel(r'Ground truth (ms)') fig10.show() return fig10
def scatter_plot_noise(level): fig10, axs10 = plt.subplots(4, 2, figsize=(10, 20)) for k in range(4): axs10[k, 0].scatter(times_max[0] * relaxation_times[:, 0] * 1e3, times[level][noise_levels[k]][:, 0] * 1e3, c='b', marker='.', alpha=0.1) axs10[k, 0].plot(times_max[0] * relaxation_times[:, 0] * 1e3, times_max[0] * relaxation_times[:, 0] * 1e3, 'g--') r2_t1 = r2(times_max[0] * relaxation_times[:, 0] * 1e3, times[level][noise_levels[k]][:, 0] * 1e3) axs10[k, 0].text(1, 3550, r'R2 = {:5f}'.format(r2_t1)) axs10[k, 0].set_title(r'\textbf{T1, }' + '{}'.format(noise_levels[k]) + r'\textbf{\% noise}', weight='bold') axs10[k, 0].set_ylabel(r'Predictions (ms)') axs10[k, 0].set_xlabel(r'Ground truth (ms)') axs10[k, 1].scatter(times_max[1] * relaxation_times[:, 1] * 1e3, times[level][noise_levels[k]][:, 1] * 1e3, c='r', marker='.', alpha=0.1) axs10[k, 1].plot(times_max[1] * relaxation_times[:, 1] * 1e3, times_max[1] * relaxation_times[:, 1] * 1e3, 'g--') r2_t2 = r2(times_max[1] * relaxation_times[:, 1] * 1e3, times[level][noise_levels[k]][:, 1] * 1e3) axs10[k, 1].text(1, 550, r'R2 = {:5f}'.format(r2_t2)) axs10[k, 1].set_title(r'\textbf{T2, }' + '{}'.format(noise_levels[k]) + r'\textbf{\% noise}', weight='bold') axs10[k, 1].set_ylabel(r'Predictions (ms)') axs10[k, 1].set_xlabel(r'Ground truth (ms)') fig10.show() return fig10
def train(self, X, y, val_size=0.2): from sklearn.model_selection import train_test_split X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_size) self.model.fit(X_train, y_train) if (self.model_type.split("_")[-1] == "Regressor"): from sklearn.metrics import mean_squared_error as mse from sklearn.metrics import r2_score as r2 y_pred_train = self.model.predict(X_train) print("Training Scores:") print("MSE : " + str(mse(y_train, y_pred_train))) print("R-Squared-Score : " + str(r2(y_train, y_pred_train))) if (val_size != 0): y_pred_val = self.model.predict(X_val) print("Validation Scores:") print("MSE : " + str(mse(y_val, y_pred_val))) print("R-Squared-Score : " + str(r2(y_val, y_pred_val))) else: from sklearn.metrics import classification_report as cr y_pred_train = self.model.predict(X_train) print("Training Scores:") print("MSE : " + str(cr(y_train, y_pred_train))) if (val_size != 0): y_pred_val = self.model.predict(X_val) print("Validation Scores:") print("MSE : " + str(cr(y_val, y_pred_val)))
def plot_simulated_len(length): fig10, axs10 = plt.subplots(1, 2, figsize=(10, 5)) axs10[0].scatter(times_max[0] * relaxation_times[:, 0] * 1e3, times[length][:, 0] * 1e3, c='b', marker='.', alpha=0.1) axs10[0].plot(times_max[0] * relaxation_times[:, 0] * 1e3, times_max[0] * relaxation_times[:, 0] * 1e3, 'g--') r2_t1 = r2(times_max[0] * relaxation_times[:, 0] * 1e3, times[length][:, 0] * 1e3) axs10[0].text(1, 3550, r'R2 = {:5f}'.format(r2_t1)) axs10[0].set_title(r'\textbf{T1, }' + '{}'.format(length + 1) + r'\textbf{ time steps}', weight='bold') axs10[0].set_ylabel(r'Predictions (ms)') axs10[0].set_xlabel(r'Ground truth (ms)') axs10[1].scatter(times_max[1] * relaxation_times[:, 1] * 1e3, times[length][:, 1] * 1e3, c='r', marker='.', alpha=0.1) axs10[1].plot(times_max[1] * relaxation_times[:, 1] * 1e3, times_max[1] * relaxation_times[:, 1] * 1e3, 'g--') r2_t2 = r2(times_max[1] * relaxation_times[:, 1] * 1e3, times[length][:, 1] * 1e3) axs10[1].text(1, 550, r'R2 = {:5f}'.format(r2_t2)) axs10[1].set_title(r'\textbf{T2, }' + '{}'.format(length + 1) + r'\textbf{ time steps}', weight='bold') axs10[1].set_ylabel(r'Predictions (ms)') axs10[1].set_xlabel(r'Ground truth (ms)') fig10.show() return fig10, r2_t1, r2_t2
def plot_simulated_tr(cell): fig_tr, ax_tr = plt.subplots(1, 2, figsize=(10, 5)) ax_tr[0].scatter(times_max[0] * relaxation_times[:, 0] * 1e3, times[cell][:, 0] * 1e3, c='b', marker='.', alpha=0.1) r2_t1 = r2(times_max[0] * relaxation_times[:, 0] * 1e3, times[cell][:, 0] * 1e3) ax_tr[0].text(1, 3550, r'R2 = {:5f}'.format(r2_t1)) ax_tr[0].plot([x for x in range(4000)], [x for x in range(4000)], 'g--') ax_tr[1].scatter(times_max[1] * relaxation_times[:, 1] * 1e3, times[cell][:, 1] * 1e3, c='r', marker='.', alpha=0.1) r2_t2 = r2(times_max[1] * relaxation_times[:, 1] * 1e3, times[cell][:, 1] * 1e3) ax_tr[1].text(1, 550, r'R2 = {:5f}'.format(r2_t2)) ax_tr[1].plot([x for x in range(600)], [x for x in range(600)], 'g--') ax_tr[0].set_title(r'\textbf{T1, target repetition - time step \#}' + '{}'.format(cell + 1)) ax_tr[0].set_xlabel(r'Ground truth (ms)') ax_tr[0].set_ylabel(r'Predictions (ms)') ax_tr[1].set_title(r'\textbf{T2, target repetition - time step \#}' + '{}'.format(cell + 1)) ax_tr[1].set_xlabel(r'Ground truth (ms)') ax_tr[1].set_ylabel(r'Predictions (ms)') fig_tr.show() return fig_tr, r2_t1, r2_t2
def RFR(x_train, y_train, x_test, y_test): estimator = RandomForestRegressor(n_estimators=1000, random_state=0, n_jobs=-1) estimator.fit(x_train, y_train) y_pred = estimator.predict(x_test) mse_score = mse(y_test, y_pred) print("mse_score: " + str(mse_score)) r2_score = r2(y_test, y_pred) print("r2_score: " + str(r2_score)) for feature in zip(labels, estimator.feature_importances_): print(feature) sfm = SelectFromModel(estimator, threshold=0.05) sfm.fit(x_train, y_train) x_train_important = sfm.transform(x_train) x_test_important = sfm.transform(x_test) estimator_important = RandomForestRegressor(n_estimators=1000, random_state=0, n_jobs=-1) estimator_important.fit(x_train_important, y_train) y_pred_important = estimator_important.predict(x_test_important) mse_important = mse(y_test, y_pred_important) print("mse_score_important: " + str(mse_important)) r2_important = r2(y_test, y_pred_important) print("r2_score_important: " + str(r2_important))
def GBR(x_train, y_train, x_test, y_test): estimator = GradientBoostingRegressor(n_estimators=1000, random_state=0) estimator.fit(x_train, y_train) y_pred = estimator.predict(x_test) mse_score = mse(y_test, y_pred) print("mse_score: " + str(mse_score)) r2_score = r2(y_test, y_pred) print("r2_score: " + str(r2_score)) pred = estimator.predict(y_test) pred2 = pd.concat([y_test, pred], axis=1) pred2.to_csv(r"C:\Users\tulincakmak\Desktop\data2.csv", index=False) for feature in zip(labels, estimator.feature_importances_): print(feature) sfm = SelectFromModel(estimator, threshold=0.05) sfm.fit(x_train, y_train) x_train_important = sfm.transform(x_train) x_test_important = sfm.transform(x_test) estimator_important = GradientBoostingRegressor(n_estimators=1000, random_state=0) estimator_important.fit(x_train_important, y_train) y_pred_important = estimator_important.predict(x_test_important) mse_important = mse(y_test, y_pred_important) print("mse_score_important: " + str(mse_important)) r2_important = r2(y_test, y_pred_important) print("r2_score_important: " + str(r2_important))
def evaluate(df, num_points, test=False): print('\n ----------------- MODEL EVALUATION ----------------- \n') df.fillna(0) open_true = df['open_next_day'] open_pred = df['pred_open_next_day'] close_true = df['close_next_day'] close_pred = df['pred_close_next_day'] if test: open_true = open_true[:-1] open_pred = open_pred[:-1] close_true = close_true[:-1] close_pred = close_pred[:-1] fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(16, 8)) ax[0, 0].plot(open_true[-num_points:], open_pred[-num_points:], 'go') ax[0, 0].set_title('Open') ax[0, 1].plot(close_true[-num_points:], close_pred[-num_points:], 'r^') ax[0, 1].set_title('Close') ax[1, 0].plot(open_true[-num_points:]) ax[1, 0].plot(open_pred[-num_points:]) ax[1, 0].set_label(['true', 'prediction']) ax[1, 1].plot(close_true[-num_points:]) ax[1, 1].plot(close_pred[-num_points:]) ax[1, 1].set_label(['true', 'prediction']) fig.suptitle('Model Price Predictions') plt.show() plt.close() mae_open = mae(open_true, open_pred) mae_close = mae(close_true, close_pred) mse_open = mse(open_true, open_pred) mse_close = mse(close_true, close_pred) r2_open = r2(open_true, open_pred) r2_close = r2(close_true, close_pred) print('OPEN PRICES') print('\t Mean Absolute Error: {}'.format(mae_open)) print('\t Mean Squared Error: {}'.format(mse_open)) print('\t R2 Score: {}'.format(r2_open)) print('CLOSE PRICES') print('\t Mean Absolute Error: {}'.format(mae_close)) print('\t Mean Squared Error: {}'.format(mse_close)) print('\t R2 Score: {}'.format(r2_close)) print('')
def searchKLMS_GMM(u,d,sgm,eps,testName): import KAF from sklearn.metrics import r2_score as r2 from sklearn.metrics import mean_squared_error as mse import pandas as pd print("QKLMS search...") #Inicializacion kf = KAF.GMM_KLMS(epsilon=eps[0],sigma=sgm[0]) out = kf.evaluate(u,d) best_r2 = r2(d[1:],out) best_mse = mse(d[1:],out) best_r2_cb = len(kf.CB) best_r2_ep = eps[0] best_r2_sgm = sgm[0] best_mse_cb = len(kf.CB) best_mse_ep = eps[0] best_mse_sgm = sgm[0] for i in sgm[1:]: for j in eps[1:]: # print("eps = ",j) kf = KAF.QKLMS3(epsilon=j,sigma=i) out = kf.evaluate(u,d) partial_r2 = r2(d[1:],out) partial_mse = mse(d[1:],out) # print("r2 = ",partial_r2) # print("mse = ",partial_mse) # print("\n") # print(kf.testDists) if partial_r2 > best_r2: best_r2 = partial_r2 best_r2_cb = len(kf.CB) best_r2_sgm = i best_r2_ep = j if partial_mse < best_mse: best_mse = partial_mse best_mse_cb = len(kf.CB) best_mse_ep = j best_mse_sgm = i results = {"Best_R2":best_r2, "Best_R2_CB_size":best_r2_cb, "Best_R2_epsilon":best_r2_ep, "Best_R2_sigma":best_r2_sgm, "Best_MSE":best_mse, "Best_MSE_CB_size":best_mse_cb, "Best_MSE_epsilon":best_mse_ep, "Best_MSE_sigma":best_mse_sgm} return pd.DataFrame(data=results,index=[testName])
def searchKLMS_BGMM(u=None,d=None, wcp=None, testName="Prueba",batchSize=100): import KAF from sklearn.metrics import r2_score as r2 from sklearn.metrics import mean_squared_error as mse import pandas as pd cl = batchSize print("KLMS BGMM in " + testName + " | Search...") u_train = u.reshape(-1,batchSize,u.shape[1]) d_train = d.reshape(-1,batchSize,d.shape[1]) #Inicializacion kf = KAF.BGMM_KLMS(clusters=cl,wcp=wcp[0]) for u_,d_ in zip(u_train,d_train): kf.fit(u_,d_) out = kf.predict(u) best_r2 = r2(d,out) best_mse = mse(d,out) best_r2_cb = len(kf.CB) best_r2_wcp = wcp[0] best_mse_cb = len(kf.CB) best_mse_wcp = wcp[0] for wcp_ in wcp[1:]: m = KAF.BGMM_KLMS(clusters=cl, wcp=wcp_) for u_,d_ in zip(u_train,d_train): m.fit(u_,d_) out = m.predict(u) partial_r2 = r2(d,out) partial_mse = mse(d,out) if partial_r2 > best_r2: best_r2 = partial_r2 best_r2_cb = len(kf.CB) best_r2_wcp = wcp_ if partial_mse < best_mse: best_mse = partial_mse best_mse_cb = len(kf.CB) best_mse_wcp = wcp_ results = {"Best_R2":best_r2, "Best_R2_CB_size":best_r2_cb, "Best_R2_wcp":best_r2_wcp, "Best_MSE":best_mse, "Best_MSE_CB_size":best_mse_cb, "Best_MSE_wcp":best_mse_wcp} print("Finished") return pd.DataFrame(data=results,index=[testName])
def score(self, X, T): self.predict(X) self.acc = r2(T, self.P) return self.acc
def test_python_explainer(): # loading the diabetes dataset columns = 'age sex bmi map tc ldl hdl tch ltg glu'.split() diabetes = load_diabetes() X = np.array(pd.DataFrame(diabetes.data, columns=columns)) y = diabetes.target # train-test-split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # model training rf_model = RandomForestRegressor().fit(X_train, y_train) y_pred = rf_model.predict(X_test) # regression evaluation: r2 score r2_eval = r2(y_test, y_pred) print('r2 of the fitted model is:', r2_eval) # prediction explanation generation expl = EnsembleTreeExplainer(rf_model) contributions, contrib_intercept = expl.predict(X_test) average_contribs = zip(columns, np.mean(contributions, axis=0)) print('Average feature contributions: \n', list(average_contribs)) assert ( ((np.abs(np.sum(contributions, axis=1) + contrib_intercept - y_pred)) < .01).all()) return
def test_python_explainer_transformer(): # loading the diabetes dataset columns = 'age sex bmi map tc ldl hdl tch ltg glu'.split() diabetes = load_diabetes() X = pd.DataFrame(diabetes.data, columns=columns) y = diabetes.target # train-test-split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # model training rf_model = RandomForestRegressor().fit(X_train, y_train) y_pred = rf_model.predict(X_test) # regression evaluation: r2 score r2_eval = r2(y_test, y_pred) print(r2_eval) X_test2 = X_test.copy() expl = EnsembleTreeExplainerTransformer(rf_model) expl.fit() X_test2 = expl.transform(X_test2) assert ('feature_contributions' in X_test2.columns) assert ('intercept_contribution' in X_test2.columns) assert ((np.abs( np.array(X_test2['feature_contributions'].apply(lambda x: sum(x[0])) + X_test2['intercept_contribution']) \ - X_test2['prediction']) < .01).all()) return
def metric(actual, predicted): e_mse = mse(actual, predicted) e_mae = mae(actual, predicted) e_r2 = r2(actual, predicted) e_agm = ((sqrt(e_mse) + e_mae) / 2) * (1 - e_r2) return e_mse, sqrt(e_mse), e_mae, e_r2, e_agm
def score(self, X, y, all_three: bool = False): n, d = X.shape # todo check the size to be either train or test if n == self.train_size: return self.train_score elif n == len(self.dates) - self.train_size: x = np.arange(self.train_size, len(self.dates)) test_pred = self.__log_sin_2(x, self.model.predict(X)) test_score = r2(y, test_pred) if all_three: return pd.Series([ self.sin_param_score, self.train_score, test_score ], index=[ 'sin_params', 'train', 'test' ]).round(3) return test_score raise Exception("unexpected size of X")
def prediction_eval(prediction, real_data): ''' This functino compute and print four differents metrics (mse ,mae ,r2 and median) to evaluate accuracy of the model prediction and real_data need to have the same size Parameters ---------- prediction : array predicted values. real_data : array real data. Returns ------- None. ''' from sklearn.metrics import mean_absolute_error as mae from sklearn.metrics import mean_squared_error as mse from sklearn.metrics import median_absolute_error as medae from sklearn.metrics import r2_score as r2 print("mean_absolute_error : ", mae(real_data, prediction)) print("mean_squared_error : ", mse(real_data, prediction)) print("median_absolute_error : ", medae(real_data, prediction)) print("r2_score : ", r2(real_data, prediction))
def performance_indicators(y, y_true, modelname, verbose=False, plot_scatter=False): # calculate different accuracy scores r2_score = r2(y, y_true) spearman_corr = spearmanr(y, y_true)[0] rms_error = np.sqrt(mean_squared_error(y, y_true)) pearson_corr = pearsonr(y, y_true)[0] if verbose: print(f"prediction accuracy for {modelname}") print(f"R^2 score: \t {r2_score}") print(f"RMS error: \t {rms_error}") print(f"Pearson: \t {pearson_corr}") print(f"Spearman: \t {spearman_corr}") if plot_scatter: data = pd.DataFrame({'true_values': y_true.reshape(-1), 'predictions': y.reshape(-1)}) joint_grid = sns.jointplot("true_values", "predictions", data=data, kind="scatter", xlim=(min(y_true), max(y_true)), ylim=(min(y_true), max(y_true)), height=7) joint_grid.ax_joint.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)], 'r') summary_dict = {"rmse": rms_error, "r2": r2_score, "pearson": pearson_corr, "spearman": spearman_corr} return summary_dict
def find_accurracy_on_testset(self, model, X_test, Y_test, clip=False, plot=True): results = model.predict(X_test) print("-----------------------------------------------------------") print("MSE: " + str(mse(Y_test, results)), "MAE: " + str(mae(Y_test, results)), "R2: " + str(math.sqrt(r2(Y_test, results)))) print("-----------------------------------------------------------") if plot: if clip: fig, ax = plt.subplots(figsize=(16, 5)) ax.plot(Y_test.values[0:100], label='True Value') ax.plot(results[0:100], label='Predicted Value') ax.set_xticks([]) ax.legend() plt.show() else: fig, ax = plt.subplots(figsize=(16, 5)) ax.plot(Y_test.values, label='True Value') ax.plot(results, label='Predicted Value') ax.set_xticks([]) ax.legend() plt.show() return None
def performance_metric(labels, prediction): """ Calculates and returns the performance score between true and predicted values based on the metric chosen. """ # Calculate the performance score between 'y_true' and 'y_predict' score = r2(labels, prediction) # Return the score return score
def Lasso(x_train, y_train, x_test, y_test): estimator = LassoLars() estimator.fit(x_train, y_train) y_pred = estimator.predict(x_test) mse_score = mse(y_test, y_pred) print("mse_score: " + str(mse_score)) r2_score = r2(y_test, y_pred) print("r2_score: " + str(r2_score))
def reconstruction_metrics(XT, XE, Fold: dict, subset_list=['train', 'val', 'test']): """Calculates R2 and MSE for train/validation/test subsets for within- and cross-modal reconstructions. Args: XT: transcriptomic data, cell x features XE: electrophysiological data, cell x features Fold (dict): Summary file, containing indices for different subsets, and within- and cross-modal reconstructions subset_list (list, optional): Defaults to ['train','val','test'] Returns: result_df: dataframe with results """ XT = deepcopy(XT) XE = deepcopy(XE) result = {} #Within-modality reconstructions T_se = (XT - Fold['XrT'])**2 E_se = (XE - Fold['XrE'])**2 for subset in subset_list: ind = Fold[subset + '_ind'] result['XT_from_XT_' + subset] = np.mean(T_se[ind, :]) result['XE_from_XE_' + subset] = np.nanmean(E_se[ind, :]) result['XT_from_XT_R2_' + subset] = r2(XT[ind, :], Fold['XrT'][ind, :]) result['XE_from_XE_R2_' + subset] = r2(XE[ind, :], Fold['XrE'][ind, :]) #Cross-modality reconstructions T_se = (XT - Fold['XrT_from_XE'])**2 E_se = (XE - Fold['XrE_from_XT'])**2 for subset in subset_list: ind = Fold[subset + '_ind'] result['XT_from_XE_' + subset] = np.mean(T_se[ind, :]) result['XE_from_XT_' + subset] = np.nanmean(E_se[ind, :]) result['XT_from_XE_R2_' + subset] = r2(XT[ind, :], Fold['XrT_from_XE'][ind, :]) result['XE_from_XT_R2_' + subset] = r2(XE[ind, :], Fold['XrE_from_XT'][ind, :]) #Pass index=[0] because only one row is expected. result_df = pd.DataFrame(result, index=[0]) return result_df
def modela_kvalitate(y_test, resultats): # Kvalitate virs 0.6 ir OK print( cl('Explained Variance Score (dispersija): {}'.format( evs(y_test, resultats)), attrs=['bold'])) print( cl('R-Squared (kvadratiska novirze): {}'.format(r2(y_test, resultats)), attrs=['bold']))
def performance_metric(y_true, y_predict): """ Calculates and returns the performance score between true and predicted values based on the metric chosen. """ # TODO: Calculate the performance score between 'y_true' and 'y_predict' score = r2(y_true, y_predict) # Return the score return score
def models(model, x_train, y_train, x_test, y_test): estimator = model() estimator.fit(x_train, y_train) t = estimator.score(x_train, y_train) y_pred = estimator.predict(x_test) mse_score = mse(y_test, y_pred) print("mse_score: " + str(mse_score)) r2_score = r2(y_test, y_pred) print("r2_score: " + str(r2_score)) print(t)
def get_errors(y_true, y_pred): err_mae = mae(y_true, y_pred) err_rmse = np.sqrt(mse(y_true, y_pred)) err_r2 = r2(y_true, y_pred) print("Ensemble MAE:" + str(err_mae) + " RMSE:" + str(err_rmse) + " R2:" + str(err_r2)) return err_mae, err_rmse, err_r2
def Bagging(x_train, y_train, x_test, y_test): estimator = BaggingRegressor(n_estimators=1000, random_state=0, n_jobs=-1) estimator.fit(x_train, y_train) t = estimator.score(x_train, y_train) y_pred = estimator.predict(x_test) mse_score = mse(y_test, y_pred) print("mse_score: " + str(mse_score)) r2_score = r2(y_test, y_pred) print("r2_score: " + str(r2_score)) print(t)
def get_cv_scores(model, X, y): """ Leave-one-out cross-validation, calculates and returns RMSE, MAE and R2 """ y_pred = cross_val_predict(model, X, y, cv=LeaveOneOut(), n_jobs=-1) rmse_score = mse(y, y_pred, squared=False) mae_score = mae(y, y_pred) r2_score = r2(y, y_pred) return rmse_score, mae_score, r2_score
def evaluate(self, model): if self.model_type == 'regression': # y_pred_train = model.predict(self.X_train) # print("\n{} Regression report on Train Data {}".format('*' * 40, '*' * 40)) # print("\t\tMean Absolute Error {:.3f}".format(mae(self.Y_train, y_pred_train))) # print("\t\tMean Squared Error {:.3f}".format(mse(self.Y_train, y_pred_train))) # print("\t\tMean Absolute Percentage Error {:.3f}".format(self.mape(self.Y_train, y_pred_train))) # print("\t\tR2 Score {:.3f}".format(r2(self.Y_train, y_pred_train))) # try: # print("\t\tMean Squared Log Error {:.3f}".format(msle(self.Y_train, y_pred_train))) # except: # pass # # # y_pred_test = model.predict(self.X_test) # print("\n{} Regression report on Test Data {}".format('*' * 40, '*' * 40)) # print("\t\tMean Absolute Error {:.3f}".format(mae(self.Y_test, y_pred_test))) # print("\t\tMean Squared Error {:.3f}".format(mse(self.Y_test, y_pred_test))) # print("\t\tMean Absolute Percentage Error {:.3f}".format(self.mape(self.Y_test, y_pred_test))) # print("\t\tR2 Score {:.3f}".format(r2(self.Y_test, y_pred_test))) # try: # print("\t\tMean Squared Log Error {:.3f}".format(msle(self.Y_train, y_pred_train))) # except: # pass y_pred_train = model.predict(self.X_train) y_pred_test = model.predict(self.X_test) print("\n{} Regression report on Train Data and Test Data {}".format('*' * 40, '*' * 40)) print("\n", model) print("\n\t\tMetrics Train Data\t\tTest Data") print("\t\t","-"*54) print("\t\tMean Absolute Error {:.3f}\t\t{:.3f}".format(mae(self.Y_train, y_pred_train),mae(self.Y_test, y_pred_test))) print("\t\tMean Squared Error {:.3f}\t\t{:.3f}".format(mse(self.Y_train, y_pred_train),mse(self.Y_test, y_pred_test))) print("\t\tMean Absolute Percentage Error {:.3f}\t\t{:.3f}".format(self.mape(self.Y_train, y_pred_train),self.mape(self.Y_test, y_pred_test))) print("\t\tR2 Score {:.3f}\t\t{:.3f}".format(r2(self.Y_train, y_pred_train),r2(self.Y_test, y_pred_test))) try: print("\t\tMean Squared Log Error {:.3f}\t\t{:.3f}".format(msle(self.Y_train, y_pred_train),msle(self.Y_train, y_pred_train))) except: pass elif self.model_type == 'classification': y_pred_train = model.predict(self.X_train) print("\n{} classification report on Train Data {}".format('*' * 40, '*' * 40)) print(classification_report(self.Y_train, y_pred_train)) y_pred_test = model.predict(self.X_test) print("\n{} classification report on Test Data {}".format('*' * 40, '*' * 40)) print(classification_report(self.Y_test, y_pred_test))
def partA(y_df, pred): rsquared = r2(y_df, pred) meanse = mse(y_df, pred) plt.figure(1) data = ['R2', 'MSE'] xs = range(len(data)) ys = [rsquared, meanse] plt.bar(xs, ys, 0.3) plt.xticks(xs, data) plt.suptitle("Fig1. Boxplot for question Q3a") plt.show() return rsquared, meanse
def evaluate_classic_model(model, str_model, features, targets, log_output, predict_model=False): if predict_model: # features = cudf.DataFrame.from_pandas(pd.DataFrame(features)) output = model.predict(features, predict_model='GPU') else: output = model.predict(features) mse_error = mse(targets, output) r2_error = r2(targets, output) print(f'MSE {mse_error}', f'R2 {r2_error}') with open('test_results_others2.txt', 'a') as logs: logs.write(f'{str_model} {log_output} MSE {mse_error} R2 {r2_error}\n')