def predict_with_dwt(dataset, testnum, featurenum): ca, cd = dwt.dwt(dataset) ca_matrix = ca[np.newaxis, :] print('DWT finish.') x_train, x_test, y_train, y_test = generate_data(ca_matrix, int(testnum / 2), featurenum) min_max_scaler = MinMaxScaler() x_train = min_max_scaler.fit_transform(x_train) x_test = min_max_scaler.transform(x_test) dbn1 = dbn.DBN(x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, hidden_layer=[250], learning_rate_rbm=0.0005, batch_size_rbm=150, n_epochs_rbm=200, verbose_rbm=1, random_seed_rbm=500, activation_function_nn='tanh', learning_rate_nn=0.005, batch_size_nn=150, n_epochs_nn=1500, verbose_nn=1, decay_rate=0) dbn1.pretraining() dbn1.finetuning() ca_pred = dbn1.result[:, 0] print('Lowpass coefficient estimation finish.') mu, sigma_2, cd_pred = generateData(cd[0:len(cd) - int(testnum / 2)], outputnum=int(testnum / 2)) print('Highpass coefficient estimation finish.') dataset_pred = dwt.idwt(ca_pred, cd_pred) print('IDWT finish.') dataset_test = dataset[len(dataset) - testnum:len(dataset)] ca_test, cd_test = dwt.dwt(dataset_test) plt.figure(figsize=(12, 9), dpi=100) plt.subplot(3, 1, 1) plt.plot(ca_test) plt.plot(ca_pred) plt.legend(['lowpass_real', 'lowpass_prediction'], loc='upper right') plt.title('lowpass coefficient prediction result', fontsize=16) plt.subplot(3, 1, 2) plt.plot(cd_test) plt.plot(cd_pred) plt.legend(['highpass_real', 'highpass_prediction'], loc='upper right') plt.title('highpass coefficient prediction result', fontsize=16) plt.subplot(3, 1, 3) mse = mean_squared_error(dataset_pred, dataset_test) plt.plot(dataset_test) plt.plot(dataset_pred) plt.legend(['dataset_real', 'dataset_prediction'], loc='upper right') plt.title('sequence prediction result', fontsize=16) plt.xlabel('MSE = %f' % mse) plt.draw() #plt.show() return dataset_pred, mse
def train_model_func(learning_rate_rbm, learning_rate, batch_size, feature, label, path_out_png, pred_num, train_deep): X_train, X_test, Y_train, Y_test = train_test_split(feature, label, test_size=0.2, shuffle=False) print("Training model...") print("RMSE (on training data):") root_mean_squared_errors = [] message_queue = Queue() for deep in range(1, train_deep + 1): RMSE_total = 0 for i in range(0, pred_num): starttime = datetime.datetime.now() x_train = np.array(X_train[X_train.shape[0] - i - deep:X_train.shape[0] - i]) y_trian = np.array(Y_train[Y_train.shape[0] - i - deep:Y_train.shape[0] - i]) x_test = np.array(X_test) y_test = np.array(Y_test) _process = Process(target=train_model, args=(learning_rate_rbm, learning_rate, batch_size, x_train, y_trian, x_test, message_queue)) _process.start() _process.join() predictions = message_queue.get() root_mean_squared_error = math.sqrt( mean_squared_error(y_test, predictions)) endtime = datetime.datetime.now() print("\t\ti:\t", root_mean_squared_error, "\t\tusing seconds:\t", (endtime - starttime).seconds) RMSE_total += root_mean_squared_error RMSE_avg = RMSE_total / pred_num root_mean_squared_errors.append(RMSE_avg) print("train_deep:", deep, "\tRMSE_avg:", RMSE_avg) # Output a graph of loss metrics over periods. # plt.subplot(1, 2, 2) plt.ylabel('RMSE') plt.xlabel('train_deep') plt.title("Root Mean Squared Error vs. Train Deep") plt.tight_layout() plt.plot(root_mean_squared_errors) plt.savefig(path_out_png) print("finished.")
def _print_regressionMetrics(_linear, _X, _y, _predict): metrics = [['Regresión Lineal', 'Datos obtenidos'], ['Coeficiente', _linear.coef_], ['Interceptación', _linear.intercept_], ['Calificación (score)', _linear.score(_X, _y)], ['Variance Score', r2_score(_y, _predict)], ['Explained Variance Score', explained_variance_score(_y, _predict)], ['Mean Squared Error', mean_squared_error(_y, _predict)], ['Mean Absolute Error', mean_absolute_error(_y, _predict)], ] print('\nMinería de Datos - Regresión Lineal - <VORT>', '\n') print(_linear, '\n') print(look(metrics))
def neural_net_2(train, test, val, train_out, test_out, val_out, BigSigma_inv): clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(100, 1), activation='logistic', batch_size=BATCH_HUMAN, shuffle=True, max_iter=5000) scaler = StandardScaler() scaler.fit(train) train1 = scaler.transform(train) # apply same transformation to test data test = scaler.transform(test) train_out = train_out.astype(float) clf.fit(X=train1, y=train_out) predict_test = clf.predict(test) predict_val = clf.predict(val) print("TEST ERMS ACCURACY", mean_squared_error(test_out, predict_test), acc_manual(test_out, predict_test)) print("VAL ERMS ACCURACY", mean_squared_error(val_out, predict_val), acc_manual(val_out, predict_test))
def run_gradient_desc_gsc_con(): train, test, val, train_out, test_out, val_out, BigSigma_inv = process_gsc__data_con( ) Weight_final = np.ones((31, ), dtype=float64) erms = [] for start in range(0, len(train), BATCH): end = start + BATCH phi_train = getphi(train[start:end], BigSigma_inv) phi_train = np.insert(phi_train, 0, 1, 1) hx = phi_train.dot(Weight_final) hx = hx.astype(np.int64) Weight_final = gradient_desc(phi_train, hx, Weight_final, train_out[start:end]) hx = phi_train.dot(Weight_final) hx = hx.astype(np.int64) Gx = getGX(hx) phi_test = getphi(test, BigSigma_inv) phi_test = np.insert(phi_test, 0, 1, 1) hx_test = phi_test.dot(Weight_final) hx_test = hx_test.astype(np.int64) Gx_test = getGX(hx_test) phi_val = getphi(val, BigSigma_inv) phi_val = np.insert(phi_val, 0, 1, 1) hx_val = phi_val.dot(Weight_final) hx_val = hx_val.astype(np.int64) Gx_val = getGX(hx_val) print( "=========================Logistic Regression using Gradient Descent for Concatenated GSC Data===========================" ) print("ERMS Train", mean_squared_error(train_out[start:end], Gx), "Accuracy", acc_manual(train_out[start:end], Gx)) print("ERMS Test", mean_squared_error(test_out, Gx_test), "Accuracy", acc_manual(test_out, Gx)) print("ERMS VAL", mean_squared_error(val_out, Gx_val), "Accuracy", acc_manual(val_out, Gx_val))
def run_logistic_human_sub(train, test, val, train_out, test_out, val_out, BigSigma_inv): Weight_final = np.ones((CENTERS + 1, ), dtype=float64) erms = [] for start in range(0, len(train), BATCH_HUMAN): end = start + BATCH_HUMAN phi_train = getphi(train[start:end], BigSigma_inv) phi_train = np.insert(phi_train, 0, 1, 1) hx = phi_train.dot(Weight_final) hx = hx.astype(np.int64) Weight_final = gradient_desc(phi_train, hx, Weight_final, train_out[start:end]) hx = phi_train.dot(Weight_final) hx = hx.astype(np.int64) Gx = getGX(hx) phi_test = getphi(test, BigSigma_inv) phi_test = np.insert(phi_test, 0, 1, 1) hx_test = phi_test.dot(Weight_final) hx_test = hx_test.astype(np.int64) Gx_test = getGX(hx_test) phi_val = getphi(val, BigSigma_inv) phi_val = np.insert(phi_val, 0, 1, 1) hx_val = phi_val.dot(Weight_final) hx_val = hx_val.astype(np.int64) Gx_val = getGX(hx_val) print( "=========================Logistic Regression using Gradient Descent for Subtracted HOD Data===========================" ) print("ERMS Train", mean_squared_error(train_out[start:end], Gx), "Accuracy", acc_manual(train_out[start:end], Gx)) print("ERMS Test", mean_squared_error(test_out, Gx_test), "Accuracy", acc_manual(test_out, Gx)) print("ERMS VAL", mean_squared_error(val_out, Gx_val), "Accuracy", acc_manual(val_out, Gx_val))
def score(self, curr_sk_ids=[]): """ This method take current ids and return Previous application score """ orig_data = pd.read_csv(self.path_to_data_store + '/previous_application.csv') orig_data = orig_data.loc[ (orig_data['SK_ID_CURR'].isin(curr_sk_ids)) & (orig_data['FLAG_LAST_APPL_PER_CONTRACT'] == 'Y') \ & (orig_data['NAME_CONTRACT_STATUS'] != 'Canceled')] sk_ids_from_payments = orig_data['SK_ID_CURR'] orig_data['NAME_CONTRACT_STATUS'].replace(self.target_map, inplace=True) test_data = self.__curate__(orig_data) y_test = test_data['NAME_CONTRACT_STATUS'] X_test = test_data.drop(['NAME_CONTRACT_STATUS'], axis=1) preds = self.model.predict(X_test) print('MSE Score : ', mean_squared_error(y_test, preds)) return self.__adjustDuplicates__(preds, sk_ids_from_payments, curr_sk_ids)
def evaluate(ytrue, ypred): """ :param ytrue: true value of the dependent variable, numpy array :param ypred: predictions for the dependent variable, numpy array :return: different evaluation metrics: R squared, mean square error, mean absolute error, and fraction of variance explained and Spearman ranking correlation coefficient """ r2 = r2_score(ytrue, ypred) mse = mean_squared_error(ytrue, ypred) mae = mean_absolute_error(ytrue, ypred) variance_explained = explained_variance_score(ytrue, ypred) spearman = spearmanr(ytrue, ypred)[0] return r2, mse, mae, variance_explained, spearman
def test_DBN(finetune_lr=0.1, pretraining_epochs=100, pretrain_lr=0.01, k=1, training_epochs=100, dataset=3, batch_size=10, layers=[1000, 1000, 1000]): # title temp_title = [ "DNA Methylation", "Gene Expression HTSeq", "miRNA Expression" ] print("\nSurvival Rate Regression with " + temp_title[dataset - 1] + " (Tensorflow)\n") # Loading dataset X, Y = load_data(dataset) # Splitting data X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1337) # Data scaling min_max_scaler = MinMaxScaler() X_train = min_max_scaler.fit_transform(X_train) # Training regressor = SupervisedDBNRegression(hidden_layers_structure=layers, learning_rate_rbm=pretrain_lr, learning_rate=finetune_lr, n_epochs_rbm=pretraining_epochs, n_iter_backprop=training_epochs, batch_size=batch_size, activation_function='relu') regressor.fit(X_train, Y_train) # Test X_test = min_max_scaler.transform(X_test) Y_pred = regressor.predict(X_test) try: print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred))) except Exception as e: print( "Infinity or a value too large for dtype('float32'). Please try different layer settings." )
def update_mse(tmp_input_element, tmp_list): data_train, label_train, data_test, label_test = \ HSMemory.create_train_and_test_data(tmp_list, tmp_input_element.number_visible_input) tmp_regression = SupervisedDBNRegression( hidden_layers_structure=[ tmp_input_element.number_visible_input, tmp_input_element.number_hidden_input ], learning_rate_rbm=tmp_input_element.learning_rate_rbm, learning_rate=tmp_input_element.learning_rate, n_epochs_rbm=tmp_input_element.n_epochs_rbm, n_iter_backprop=tmp_input_element.n_iter_back_prop, contrastive_divergence_iter=tmp_input_element. contrastive_divergence_iter, batch_size=tmp_input_element.batch_size, activation_function=tmp_input_element.activation_function, n_hidden_layers_mlp=tmp_input_element.n_hidden_layers_mlp, cost_function_name=tmp_input_element.cost_function_name) tmp_regression.fit(data_train, label_train) # train data tmp_input_element.train_mse = sum( tmp_regression.train_loss) / HSElement.config_n_iter_back_prop y_pred_test = tmp_regression.predict(data_test) check_nan = np.isnan(y_pred_test).any() if check_nan: tmp_input_element.test_mse = 1000 else: tmp_input_element.test_mse = mean_squared_error( label_test, y_pred_test) if np.isnan(tmp_input_element.train_mse) or np.isinf( tmp_input_element.train_mse): tmp_input_element.train_mse = 1000 # add to export result tmp_result_data = [ tmp_input_element.learning_rate_rbm, tmp_input_element.learning_rate, tmp_input_element.number_visible_input, tmp_input_element.number_hidden_input, tmp_input_element.train_mse, tmp_input_element.test_mse, '', '', '', '', '', '', '', '' ] TensorGlobal.followHs.append(tmp_result_data) TensorGlobal.sessFlg = True tf.reset_default_graph() del tmp_regression return tmp_input_element
def get_regression_metrics(ground_truth_value, predicted_value): regression_metric_dict = dict({}) regression_metric_dict['r2_score'] = r2_score(ground_truth_value, predicted_value) regression_metric_dict['mean_squared_error'] = mean_squared_error( ground_truth_value, predicted_value) #regression_metric_dict['mean_squared_log_error'] = mean_squared_log_error(ground_truth_value, predicted_value) regression_metric_dict['mean_absolute_error'] = mean_absolute_error( ground_truth_value, predicted_value) regression_metric_dict[ 'explained_variance_score'] = explained_variance_score( ground_truth_value, predicted_value) regression_metric_dict['median_absolute_error'] = median_absolute_error( ground_truth_value, predicted_value) regression_metric_dict['max_error'] = max_error(ground_truth_value, predicted_value) return regression_metric_dict
def read_input_data(): X_train, Y_train, X_test, Y_test = normalize_data() rng = numpy.random.RandomState(123) # print(normalized_X_train.values, normalized_Y_train) # print(normalized_X_train.shape, normalized_Y_train.shape) dbn = DBN(input=X_train, label=Y_train, n_ins=X_train.shape[1], hidden_layer_sizes=[80] * 10, n_outs=1, rng=rng) dbn.pretrain(lr=0.001, k=1, epochs=1000) dbn.finetune(lr=0.001, epochs=200) resutls = dbn.predict(X_test) print("results", resutls, Y_test) print(Y_test.shape) print(r2_score(resutls, Y_test), mean_squared_error(resutls, Y_test)) print(mean_absolute_error(resutls, Y_test))
def get_resutls_column(model, trainfolds_dfs, testfolds_dfs, train_set, test_set, feature_set, target_col_name): MSEs = [None] * len(testfolds_dfs) MAEs = [None] * len(testfolds_dfs) SPs = [None] * len(testfolds_dfs) PNs = [None] * len(testfolds_dfs) for i in range(len(testfolds_dfs)): train_X = trainfolds_dfs[i].loc[:, feature_set].values train_Y = trainfolds_dfs[i].loc[:, target_col_name].values test_X = testfolds_dfs[i].loc[:, feature_set].values test_Y = testfolds_dfs[i].loc[:, target_col_name].values model.fit(train_X, train_Y) test_pred = model.predict(test_X) MAEs[i] = MAE(test_pred, test_Y) MSEs[i] = MSE(test_pred, test_Y) SPs[i] = SPC(test_pred, test_Y) PNs[i] = PNC(test_pred, test_Y) train_cvavg_MAE = numpy.mean(MAEs) train_cvavg_MSE = numpy.mean(MSEs) train_cvavg_PN = numpy.mean(PNs) train_cvavg_SP = numpy.mean(SPs) test_Y = test_set.loc[:, target_col].values test_X = test_set.loc[:, feature_set].values train_X = train_set.loc[:, feature_set].values train_Y = train_set.loc[:, target_col].values model.fit(train_X, train_Y) test_pred = model.predict(test_X) testset_pn, _ = pearsonr(test_Y, test_pred) testset_sp, _ = spearmanr(test_Y, test_pred) testset_mae = mean_absolute_error(test_Y, test_pred) testset_mse = mean_squared_error(test_Y, test_pred) column = [ testset_mae, testset_mse, testset_pn, testset_sp, train_cvavg_MAE, train_cvavg_MSE, train_cvavg_PN, train_cvavg_SP, feature_set, len(feature_set) ] column += list(test_pred) return column
def score(self): test_target = None test_data = pd.read_csv(self.path_to_data_store + '/application_test.csv') sk_id_curr = test_data[['SK_ID_CURR']] if self.score_type == 'actual': test_data = self.createEnsembleData(test_data[['SK_ID_CURR']]) else: test_target = test_data['TARGET'] test_data = self.createEnsembleData(test_data[['SK_ID_CURR']]) score = self.ensemble.predict(test_data) if test_target is not None: print(mean_squared_error(test_target.values, score)) output = self.format_output(sk_id_curr, score, test_target) output.to_csv('submission.csv', index=False) else: output = self.format_output(sk_id_curr, score) output.to_csv('submission.csv', index=False)
def test_DBN(finetune_lr=0.1, pretraining_epochs=100, pretrain_lr=0.01, training_epochs=100, dataset=6, batch_size=10, layers=[1000, 1000, 1000], dropout=0.2, pca=2, optimizer=1): # title temp_title = ["DNA Methylation Platform GPL8490", "DNA Methylation Platform GPL16304", "Gene Expression HTSeq Count", "Gene Expression HTSeq FPKM", "Gene Expression HTSeq FPKM-UQ", "miRNA Expression"] print("\nSurvival Rate Regression with " + temp_title[dataset-1] + " (Tensorflow)\n") # Loading dataset X, Y = load_data(dataset, pca) # Splitting data X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1337) # Training regressor = SupervisedDBNRegression(hidden_layers_structure=layers, learning_rate_rbm=pretrain_lr, learning_rate=finetune_lr, n_epochs_rbm=pretraining_epochs, n_iter_backprop=training_epochs, batch_size=batch_size, activation_function='relu', dropout_p=dropout) regressor.fit(X_train, Y_train) # Test Y_pred = regressor.predict(X_test) Y_pred = numpy.transpose(Y_pred)[0] Y_pred_train = regressor.predict(X_train) Y_pred_train = numpy.transpose(Y_pred_train)[0] print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred))) print('Done. \ntraining R-squared: %f\nMSE: %f' % (r2_score(Y_train, Y_pred_train), mean_squared_error(Y_train, Y_pred_train)))
def test_data(self): housing_data = self.strat_test_set X = housing_data.drop("median_house_value", axis=1) y = housing_data["median_house_value"].copy() X_prepared = self.full_pipeline.transform(X) final_predictions = self.final_model.predict(X_prepared) final_mse = mean_squared_error(y, final_predictions) final_rmse = np.sqrt(final_mse) print('\n') print('final root mean squared error (RMSE):\n{0}\n'.format(final_rmse)) confidence = 0.95 squared_errors = (final_predictions - y) ** 2 mean = squared_errors.mean() scale = stats.sem(squared_errors) m = len(squared_errors) interval95 = np.sqrt(stats.t.interval(confidence, m-1, loc=mean,scale=scale)) print('95% confidence interval for the RMSE:\n{0}\n'.format(interval95))
def score(self, X, y, step=1, method="r2"): """ Produce multi-step prediction of y, and compute the metrics against y. Nan is ignored when computing the metrics. :param array-like X: exogenous input time series, shape = (n_samples, n_exog_inputs) :param array-like y: target time series to predict, shape = (n_samples) :param int step: prediction step. :param string method: could be "r2" (R Square) or "mse" (Mean Square Error). :return: prediction metric. Nan is ignored when computing the metrics. """ ypred = self.predict(X, y, step=step) mask = np.isnan(y) | np.isnan(ypred) if method == "r2": return r2_score(y[~mask], ypred[~mask]) elif method == "mse": return mean_squared_error(y[~mask], ypred[~mask])
def _get_error(self, used, used_bounds): if self.cv: cv_errors = [] for train_idx, test_idx, in self.kf.split(self.X): self._refit_model( self.types[used], self.bounds[used_bounds], self.X[train_idx.reshape(-1, 1), used], self.y[train_idx]) # refit the model every round pred = self.model.predict(self.X[test_idx.reshape(-1, 1), used])[0] cv_errors.append( np.sqrt(mean_squared_error(self.y[test_idx], pred))) cve = np.mean(cv_errors) else: cve = np.float('inf') self._refit_model(self.types[used], self.bounds[used_bounds], self.X[:, used], self.y) # refit the model every round oob = self.model.rf.out_of_bag_error() return oob, cve
def train(self, data, training): 'En esta funcion se realiza 10-Fold CV para entrenar la red con una expansion de entre 20-75%.' 'El algoritmo de entrenamiento es Descenso por Gradiente Estocastico o Extreme Learning Machine.' # 10-Fold Cross Validation folds = 10; iters = 10; kf = KFold(data.shape[0], n_folds=folds) hiddenNodes = arange(2*data.shape[1])+1 Error_HNodes = [] Nets_HNodes = [] for j in hiddenNodes: self.setHiddenNodes([j]) Mean_error_iter = [] Mean_nets_iter = [] for train_index, val_index in kf: X, Xval = data[train_index], data[val_index] Error_iter = [] Nets_iter = [] for i in np.arange(iters): self.initialization() # Inicializaciones comunes if training == 'elm': Out,H,N = self.sim(X) H = H[-1] pseudoinverse = pinv(H) beta = np.dot(pseudoinverse,X) self.Weights[-1] = beta # Validation Out_val,H_val,N_val = self.sim(Xval) # Se guarda el error y la red MSE = [mean_squared_error(Xval,Out_val)] Networks = [self.Weights] Error_iter.append(np.min(MSE)) Nets_iter.append(Networks[np.argmin(MSE)]) Mean_error_iter.append(np.mean(Error_iter)) Mean_nets_iter.append(Nets_iter[np.argmin(Error_iter)]) Error_HNodes.append(np.mean(Mean_error_iter)) Nets_HNodes.append(Mean_nets_iter[np.argmin(Mean_error_iter)]) self.Weights = Nets_HNodes[np.argmin(Error_HNodes)] Final_Error = np.min(Error_HNodes) selected_Nodes = hiddenNodes[np.argmin(Error_HNodes)] self.setHiddenNodes([selected_Nodes]) return Final_Error
def fnLinearRegression(TrainData, Target, Title): regr = linear_model.LinearRegression() regr.fit(TrainData, Target) prediction = regr.predict(TrainData) plt.figure(1) plt.title(Title) plt.scatter(TrainData, Target) plt.plot(TrainData, prediction, color='blue') plt.show() ''' Find out mean sqs uared error between prediction and target ''' MSE = mean_squared_error(Target, prediction) R2_Score = r2_score(Target, prediction) return MSE, regr.coef_, regr.intercept_, R2_Score
def score(self, X, y, method="r2", verbose=False): """ Produce multi-step prediction of y, and compute the metrics against y. Nan is ignored when computing the metrics. :param array-like X: exogenous input time series, shape = (n_samples, n_exog_inputs) :param array-like y: target time series to predict, shape = (n_samples) :param string method: could be "r2" (R Square) or "mse" (Mean Square Error). :return: prediction metric. Nan is ignored when computing the metrics. """ ypred = self.predict(X, y) mask = np.isnan(y) | np.isnan(ypred) if verbose: print('Evaluating {} score, {} of {} data points are evaluated.'. format(method, np.sum(~mask), y.shape[0])) if method == "r2": return r2_score(y[~mask], ypred[~mask]) elif method == "mse": return mean_squared_error(y[~mask], ypred[~mask])
def predict_without_dwt(dataset, testnum, featurenum): dataset = dataset[np.newaxis, :] x_train, x_test, y_train, y_test = generate_data(dataset, testnum, featurenum) min_max_scaler = MinMaxScaler() x_train = min_max_scaler.fit_transform(x_train) x_test = min_max_scaler.transform(x_test) dbn1 = dbn.DBN(x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, hidden_layer=[250], learning_rate_rbm=0.0005, batch_size_rbm=150, n_epochs_rbm=200, verbose_rbm=1, random_seed_rbm=500, activation_function_nn='tanh', learning_rate_nn=0.005, batch_size_nn=150, n_epochs_nn=1500, verbose_nn=1, decay_rate=0) dbn1.pretraining() dbn1.finetuning() dataset_pred = dbn1.result[:, 0] dataset_test = dataset[0, dataset.shape[1] - testnum:dataset.shape[1]] mse = mean_squared_error(dataset_pred, dataset_test) plt.figure(figsize=(12, 9), dpi=100) plt.plot(dataset_test) plt.plot(dataset_pred) plt.legend(['dataset_real', 'dataset_prediction'], loc='upper right') plt.title('sequence prediction result', fontsize=16) plt.xlabel('MSE = %f' % mse) plt.draw() #plt.show() return dataset_pred, mse
def regressionSummary(y_true, y_pred): """ print regression performance metrics Input: y_true: actual values y_pred: predicted values """ y_true = np.asarray(y_true) y_pred = np.asarray(y_pred) y_res = y_true - y_pred metrics = [ ('Mean Error (ME)', sum(y_res) / len(y_res)), ('Root Mean Squared Error (RMSE)', math.sqrt(regression.mean_squared_error(y_true, y_pred))), ('Mean Absolute Error (MAE)', sum(abs(y_res)) / len(y_res)), ('Mean Percentage Error (MPE)', 100 * sum(y_res / y_true) / len(y_res)), ('Mean Absolute Percentage Error (MAPE)', 100 * sum(abs(y_res / y_true) / len(y_res))), ] fmt1 = '{{:>{}}} : {{:.4f}}'.format(max(len(m[0]) for m in metrics)) print('\nRegression statistics\n') for metric, value in metrics: print(fmt1.format(metric, value))
def train_model_func(learning_rate_rbm, learning_rate, batch_size, feature, label, path_out_png, pred_num, train_deep): X_train, X_test, Y_train, Y_test = train_test_split(feature, label, test_size=0.2, shuffle=False) print("Training model...") print("RMSE (on training data):") root_mean_squared_errors = [] for deep in range(1, train_deep + 1): RMSE_total = 0 for i in range(0, pred_num): x_train = np.array(X_train[X_train.shape[0] - i - deep:X_train.shape[0] - i]) y_trian = np.array(Y_train[Y_train.shape[0] - i - deep:Y_train.shape[0] - i]) x_test = np.array(X_test) y_test = np.array(Y_test) predictions = train_model(learning_rate_rbm=learning_rate_rbm, learning_rate=learning_rate, batch_size=batch_size, x_train=x_train, y_trian=y_trian, x_test=x_test) root_mean_squared_error = math.sqrt(mean_squared_error(y_test, predictions)) print("\t\ti:\t", root_mean_squared_error) RMSE_total += root_mean_squared_error RMSE_avg = RMSE_total / pred_num root_mean_squared_errors.append(RMSE_avg) print("train_deep:", deep, "\tRMSE_avg:", RMSE_avg) # Output a graph of loss metrics over periods. # plt.subplot(1, 2, 2) plt.ylabel('RMSE') plt.xlabel('train_deep') plt.title("Root Mean Squared Error vs. Train Deep") plt.tight_layout() plt.plot(root_mean_squared_errors) plt.savefig(path_out_png) print("finished.")
from sklearn.preprocessing import MinMaxScaler from dbn.tensorflow import SupervisedDBNRegression # Loading dataset boston = load_boston() X, Y = boston.data, boston.target # Splitting data X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) # Data scaling min_max_scaler = MinMaxScaler() X_train = min_max_scaler.fit_transform(X_train) # Training regressor = SupervisedDBNRegression(hidden_layers_structure=[100], learning_rate_rbm=0.01, learning_rate=0.01, n_epochs_rbm=20, n_iter_backprop=200, batch_size=16, activation_function='relu') regressor.fit(X_train, Y_train) # Test X_test = min_max_scaler.transform(X_test) Y_pred = regressor.predict(X_test) print 'Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred))
X_train = min_max_scaler.fit_transform(X_train) #print('x_train number: ', X_train[0]) #print('x_test number: ', X_test.shape[1]) #print('Y_train number: ', Y_train[0]) #print('y_test number: ', Y_test.shape[0]) # Training regressor = SupervisedDBNRegression(hidden_layers_structure=[100], learning_rate_rbm=0.01, learning_rate=0.01, n_epochs_rbm=20, n_iter_backprop=200, batch_size=16, activation_function='relu') #regressor.fit(X_train, Y_train) # Save the model #regressor.save('model_regression_128.pkl') # Restore it regressor = SupervisedDBNRegression.load('models/model_regression.pkl') # Test X_test = min_max_scaler.transform(X_test) Y_pred = regressor.predict(X_test) print('Done.\nR-squared: %f\nMSE: %f' % (r2_score(Y_test, Y_pred), mean_squared_error(Y_test, Y_pred))) #print(Y_pred)
#Computing the RMSE def ComputeRMSE(result, predicted) :
tpr thresholds # ============================================================================= # REGRESSION EVALUATION METRICS # ============================================================================= from sklearn.metrics.regression import (r2_score, explained_variance_score, mean_absolute_error, median_absolute_error, mean_squared_error, mean_squared_log_error) y_true = [3, -0.5, 2, 7] y_pred = [2.5, 0.0, 2, 8] mean_squared_error(y_true, y_pred) r2_score(y_true, y_pred) y_true = [[0.5, 1], [-1, 1], [7, -6]] y_pred = [[0, 2], [-1, 2], [8, -5]] mean_squared_error(y_true, y_pred) r2_score(y_true, y_pred) # ============================================================================= # CLUSTERING EVALUATION METRICS # ============================================================================= from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split iris = load_iris()
regressor.fit(data_train, label_train) tmp_train_mse = sum(regressor.train_loss) / RandomRegression.number_iter_backprop tmp_min_mse_label = 'MORE BAD' stop_time = time.time() print("THE TIME FOR TRAINING: " + str((stop_time - start_time)) + ' second') # Test start_time = time.time() Y_pred_test = regressor.predict(data_test) print("Begin to call mean_squared_error") check_nan = np.isnan(Y_pred_test).any() if check_nan: tmp_test_mse = 1000 else: tmp_test_mse = mean_squared_error(label_test, Y_pred_test) if np.isnan(tmp_train_mse) or np.isinf(tmp_train_mse): tmp_train_mse = 1000 stop_time = time.time() print("THE TIME FOR TEST: " + str((stop_time - start_time)) + ' second') TensorGlobal.sessFlg = True tf.reset_default_graph() del regressor tmp_element_data = [tmp_lrr, tmp_lr, RandomRegression.number_visible_input, RandomRegression.number_hidden_input, tmp_train_mse, tmp_test_mse] result_data.append(tmp_element_data) # Export result to excel file print("Begin to print result") now = datetime.now()
def build_predictor(fname): s = [] d1, d2 = 0, 0 for line in open(fname): s.append(map(float, line.split())) # s = s[5000:] # print '--- Error on complete set' # y = map(lambda u: u[0], s) # for idx, val in enumerate(["reqtime", "tsafrir", "sgd"]): # xi = map(lambda u: u[idx+1], s) # print regression.mean_squared_error(y, xi), val # accuracy(y, xi) # s = [] # for i in range(-100,100,3): # # s.append([7*(i**3)-5*i*i-17*i+8, i,i*i,i*i*i]) # s.append([7*(i**3)-5*i*i-17*i+8, i]) import random random.shuffle(s) # s=[ [1, 6,8,9], [5, 6,8,9], [7, 6,8,9], [2, 7,8,9]] # keyfunc = lambda u: u[0] # s.sort(key=keyfunc) print "original data size", len(s) # mp = {} # for r in s: # key, val = r[0], r[1:] # if not key in mp: # mp[key] = [] # mp[key].append(val) # s = [] # for key in mp: # l = mp[key] # if len (l) > 1: # t = reduce(lambda u,v: [i+j for i,j in izip(u,v)], l) # t = map(lambda u: 1.0*u/len(l), t) # mp[key] = t # else: # mp[key] = l[0] # s.append([key] + mp[key]) # print "data size later", len(s) mp = {} for r in s: key, val = r[0], tuple(r[1:]) if not val in mp: mp[val] = [] mp[val].append(key) s = [] for key in mp: l = mp[key] mp[key] = sum(l)/len(l) s.append([mp[key]] + list(key)) print "data size later", len(s) X = map(lambda u: u[1:], s) # X = map(lambda u: [u[1]], s) y = map(lambda u: u[0], s) # print len(X), len(set(tuple(u) for u in X)) import itertools for x in X: t = [] for i in x: t.extend([i*i,i**3]) for a,b in itertools.combinations(x, 2): t.append(a*b) for a,b,c in itertools.combinations(x, 3): t.append(a*b*c) x.extend(t) # prepare the training and testing data for the model nCases = len(y) nTrain = int(np.floor(nCases * 0.8)) trainX = X[:nTrain] trainY = y[:nTrain] testX = X[nTrain:] testY = y[nTrain:] # print type(X), X[0] svr = SVR(kernel='linear', C=1.0, epsilon=0.2) svr = SVR(kernel='rbf', C=1.0, epsilon=0.2, gamma=.0001) log = LinearRegression(normalize=True) # train both models # svr.fit(trainX, trainY) log.fit(trainX, trainY) # predict test labels from both models predLog = log.predict(testX) # predSvr = svr.predict(testX) # show it on the plot # plt.plot(testY, testY, label='true data') # # plt.plot(testY, predSvr, 'co', label='SVR') # plt.plot(testY, predLog, 'mo', label='LogReg') # plt.legend() # plt.show() print '--- Error on test set' meta_mse = regression.mean_squared_error(testY, predLog) print int(meta_mse), "meta predictor" # print regression.mean_squared_error(testY, predSvr) well_estimated = sum([1 if abs(u-v)<2*u else 0 for u,v in izip(testY, predLog)]) print "well estimated, all, percent: ", well_estimated, len(testY), int(100.0*well_estimated/len(testY)), "%" exit(0) for idx, val in enumerate(["reqtime", "tsafrir", "sgd"]): mse = regression.mean_squared_error(testY, map(lambda u: u[idx], testX)) print "%d %.2f %% %s" % (mse, 100*(mse-meta_mse)/mse, val)
path3 = 'modelo_treinamento_2019_11_01_14_10_48_modelo_1_[90, 90, 90].pkl' pathCompleto = path1 + path2 + path3 regressor = SupervisedDBNRegression.load(pathCompleto) # Teste Y_pred = regressor.predict(X_test) # if conjTreino == 'degrauUnitario.csv': # Y_pred = Y_pred / 4.6 # 4.62073146825719 r2Score = r2_score(Y_test, Y_pred) MSE = mean_squared_error(Y_test, Y_pred) print('\nDone.\nR-squared: %f\nMSE: %f' % (r2Score, MSE)) arquivoResultados = pd.DataFrame(data={ "Arquivo": [conjTreino], "r2Score": [r2Score], "MSE": [MSE] }) arquivoResultados.to_csv(r'./Resultados/resultados_teste_' + indiceTreinamento + '.csv', sep=',', index=False, mode='a', header=primeiraExecucao)
def test_DBN(finetune_lr=0.1, pretraining_epochs=100, pretrain_lr=0.01, k=1, training_epochs=100, dataset=6, batch_size=10, layers=[1000, 1000, 1000], dropout=0.2, pca=2, optimizer=1): # Title temp_title = ["DNA Methylation Platform GPL8490", "DNA Methylation Platform GPL16304", "Gene Expression HTSeq Count", "Gene Expression HTSeq FPKM", "Gene Expression HTSeq FPKM-UQ", "miRNA Expression"] print("\Survival Rate Regression with " + temp_title[dataset-1] + " (Theano)\n") ######################### #### PREPARE DATASET #### ######################### # Load datasets datasets = load_data(dataset, pca) # Split dataset into training and test set train_input_set, test_input_set, train_label_set, test_label_set = train_test_split(datasets[0], datasets[1], test_size=0.25, random_state=100) # Size of input layer _, nr_in = train_input_set.shape # Number of training batches n_train_batches = train_input_set.shape[0] // batch_size # cast inputs and labels as shared variable to accelerate computation train_set_x, train_set_y = shared_dataset(data_xy = (train_input_set,train_label_set)) test_set_x, test_set_y = shared_dataset(data_xy = (test_input_set,test_label_set)) ######################### ##### BUILD NN MODEL #### ######################### print('Build NN Model') numpy_rng = numpy.random.RandomState(123) dbn = DBN(numpy_rng=numpy_rng, n_ins=nr_in, hidden_layers_sizes=layers, n_outs=1) ######################### ### PRETRAIN NN MODEL ### ######################### print('Pretrain NN Model') # Get the pretraining functions. It is on the amount of the number of layers. pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) # iterate for each RBMs for i in range(dbn.n_layers): # iterate for pretraining epochs for epoch in range(pretraining_epochs): c = [] # iterate for number of training batches for batch_index in range(n_train_batches): # c is a list of monitoring cost per batch for RBM[i] c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ') print(numpy.mean(c, dtype='float64')) ######################### ### FINETUNE NN MODEL ### ######################### print('Train NN Model') # Get the training functions. train_fn = dbn.build_finetune_functions(train_set_x=train_set_x, train_set_y=train_set_y, batch_size=batch_size, learning_rate=finetune_lr, dropout=dropout, optimizer=optimizer) # iterate for training epochs for j in range(training_epochs): # iterate for number of training batches for minibatch_index in range(n_train_batches): train_fn(minibatch_index) ######################### ##### TEST NN MODEL ##### ######################### print('Test NN Model') # Get the test functions. test_model = dbn.predict(test_set_x=test_set_x, dropout=dropout) # take the test result test_predicted_label_set = test_model() print(test_label_set) print(test_predicted_label_set) # accuracy, p, r, f, s mse = mean_squared_error(test_label_set, test_predicted_label_set) r2 = r2_score(test_label_set, test_predicted_label_set) # print results print("MSE = " + str(mse)) print("R2 = " + str(r2))
momentum = 1e-4 # do weight updates in imperative for pname, W, G in zip(cost_classification.list_arguments(), executor.arg_arrays, executor.grad_arrays): # Don't update inputs # MXNet makes no distinction between weights and data. if pname in ['data', 'lro']: continue # what ever fancy update to modify the parameters auto_momentum = mx.nd.minimum(momentum, mx.nd.power(mx.nd.sum(G), 2.0)) auto_k = mx.nd.minimum(0, mx.nd.minimum(1, 1 - auto_momentum)) vw = W * auto_momentum - .001 * G vn = W * momentum - .001 * G # print(auto_momentum.asnumpy(), auto_k.asnumpy()) W[:] = W + auto_k * vn + (1 - auto_k) * vw # Evaluation at each epoch output = [] for x in range(0, len(teIdx), batch_size): batchX = teIdx[x:x + batch_size] batchY = teIdy[x:x + batch_size] if batchX.shape[0] != batch_size: continue # use the test executor as we don't care about gradients executor_test.arg_dict['data'][:] = batchX executor_test.forward() output.extend(executor_test.outputs[0].asnumpy().tolist()) # print (str(num_correct) + ",") print(mean_squared_error(teIdy[:len(output)], output))
#rd = lm.LogisticRegression(penalty='l2', dual=True, tol=0.0001, # C=1, fit_intercept=True, intercept_scaling=1.0, # class_weight=None, random_state=None) rd=SVR(kernel='linear', degree=3, gamma='auto', coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=-1) print "Training data" rd.fit(data.toarray(),y_train.values) scores1 = cross_val_score(rd, data , y_train, cv=cv, scoring='mean_squared_error') print scores1 pred = rd.predict(tfv.transform(list(np.asarray(x_test))).toarray()) print pred print sqrt(mean_squared_error(pred,y_test)) pred=np.round(pred) pred=pred.astype(int) print 'predicting actual test set...' predicted=rd.predict(tfv.transform(list(np.asarray(test))).toarray()) #for p in predicted: #print p predicted=np.round(predicted) predicted=predicted.astype(int) test["publication year"]=predicted test["record Id"]=record_id test.to_csv(submission, columns=['record Id','publication year'],index=False,sep='\t')
''' norm1 = np.linalg.norm(y_train) if norm1 != 0: y_train, y_test = y_train/norm1, y_test/norm1 print norm1 ''' print y_train.shape model = SVR(C=1.0, gamma=1.0) model = LinearRegression() lasso = Lasso(alpha=0.1).fit(X_train, y_train) enet = ElasticNet(alpha=0.1, l1_ratio=0.7).fit(X_train, y_train) y_pred = lasso.predict(X_test) print "MSE", mean_squared_error(y_test, y_pred) m = np.mean(y_test) print "MSE (Mean)",mean_squared_error(y_test, m*np.ones(len(y_test))) print "r^2 on test data", r2_score(y_test, y_pred) plt.plot(enet.coef_, label='Elastic net coefficients') plt.plot(lasso.coef_, label='Lasso coefficients') plt.legend(loc='best') plt.title("Lasso R^2: %f, Elastic Net R^2: %f" % (r2_score(y_test, lasso.predict(X_test)), r2_score(y_test, enet.predict(X_test)))) plt.show()