def calculate_rmse(grid_df): # Calculate overall RMSEs and RMSE at 50 m increments for NAV data larc_grid = grid_df.larc d = grid_df.depth x = grid_df.x overall = rmse(larc_grid, d, squared=False) seg_1 = rmse(larc_grid[(x < 180)], d[(x < 180)], squared=False) seg_2 = rmse(larc_grid[(x >= 180) & (x < 230)], d[(x >= 180) & (x < 230)], squared=False) seg_3 = rmse(larc_grid[(x >= 230)], d[(x >= 230)], squared=False) return [overall, seg_1, seg_2, seg_3]
def train_score_predict(clf, X, y, X_predict, y_predict, record_performance, sh_logger, type='classification'): import time from sklearn.metrics import accuracy_score from sklearn.metrics import mean_squared_error as rmse # Train start = time.time() model = clf.fit(X,y) end = time.time() if sh_logger.debug: print('Training time: ') if sh_logger.debug: show_time(end - start) # save performance variables record_performance['TrainTime'].append(end - start) # Predict start = time.time() if(type=='classification'): record_performance['PredictAccuracyScore'].append(accuracy_score(y_predict, model.predict(X_predict))) else: record_performance['PredictAccuracyScore'].append(rmse(y_test, model.predict(X_test))) end = time.time() record_performance['PredictTime'].append(end - start) if sh_logger.debug: print('\nPrediction time: ') if sh_logger.debug: show_time(end - start) return model
def optimize_k(df, target, exclude=None): if exclude is None: exclude = ["patient_ID"] df = df.drop(exclude, axis=1) data = df.to_numpy() errors = [] for k in range(1, 20, 2): imputer = KNNImputer(n_neighbors=k) imputed = imputer.fit_transform(data) df_imputed = pd.DataFrame(imputed, columns=df.columns) X = df_imputed.drop(target, axis=1) y = df_imputed[target] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = RandomForestClassifier() model.fit(X_train, y_train) preds = model.predict(X_test) error = rmse(y_test, preds) errors.append({'K': k, 'RMSE': error}) return errors
def show_results(true_values, predicted_values): print('Algorithm finished.\n') for i in range(len(true_values)): print('True value ' + '{:02x}'.format(i + 1) + ': ' + '{:0.5f}'.format(true_values[i]) + '\t' + 'Estimated value ' + '{:02x}'.format(i + 1) + ': ' + '{:0.5f}'.format(predicted_values[i])) print('\nRMSE : ' + str(rmse(true_values, predicted_values)))
def _get_error_(self, T, P): Y = T[[self.model.target]] if self.eval_error == 'mae': error = mae(Y, P) elif self.eval_error == 'rmse': error = rmse(Y, P) elif self.eval_error == 'logloss': error = log_loss(Y, P) elif self.eval_error == 'r2': error = r2_score(Y, P) else: raise Exception('Unknown error type') return error
for u, user in enumerate(profile): for l, level in enumerate(user): if count[u][l] != 0: profile[u][l] = round(level / count[u][l]) #predicting the ratings and doing in sample validation pred = [] actual = [] for subs in train_subs: for p, problem in enumerate(problem_data): if problem[0] == subs[1]: query_level = ord(problem[1]) - 65 break pred.append(profile[unique_users.index(subs[0])][query_level]) actual.append(int(subs[2])) print('rmse for our prediction(using in sample validation) :', end=' ') print(rmse(actual, pred)) #prediciton for user and problem queried query_user = '******' + input('user_id(number) : ') query_problem = 'prob_' + input('prob_id(number) : ') for p, problem in enumerate(problem_data): if problem[0] == query_problem: query_level = ord(problem[1]) - 65 break print('prediction by our system :', end=' ') print(profile[unique_users.index(query_user)][query_level])
#takes inputs on which regressor is to be trained def fit(self, trainX, trainy): self.trainX = trainX self.trainy = trainy self.coef = np.zeros((1, trainX.shape[1])) self.coef = self.gradient_descent(trainX, trainy, self.n_runs, self.alpha) #the gradient descent optimisation algorithm def gradient_descent(self, trainX, trainy, n_runs, alpha): beta = self.coef n = len(trainy) for i in range(self.n_runs): loss = trainX.dot(beta.transpose()) loss = loss - trainy beta = beta - alpha * ((np.dot(trainX.T, loss)).T) * (1 / n) return beta #function which returns predictions based on the regressor fitted on the training data def predict(self, testX): return np.dot(testX, self.coef.T) #instantiation and prediction steps of our regressor reg = Linear_Regression(3000000, 0.00000001) reg.fit(trainX, trainy) print(reg.predict(testX)) print(rmse(np.array(reg.predict(testX)), np.array(testy)))
era = pd.read_csv(fERA, header=0, parse_dates=True, index_col='datetime', names=['datetime', 'iERA']) # selecting the same time slice noaa = noaa['1979-01-01':'2020-12-01'] indices = pd.merge(noaa, era, left_index=True, right_index=True) describe = indices.describe() stats = { 'mean': indices.mean(), 'std': indices.std(), 'pearson': indices.corr(method='pearson'), 'RMSE': rmse(indices.iNOAA, indices.iERA), 'bias': sum(indices.iNOAA - indices.iERA) / len(indices), 'variance': indices.var() } sns.regplot('iERA', 'iNOAA', data=indices, scatter=True, fit_reg=True, ci=95, color='darkslateblue')
housing_extraattrib=attradder.transform(housing.values) numpipe=Pipeline([('i', SimpleImputer(strategy="median")),('attradder', ComAttAdder()),('sscal', StandardScaler())]) housing_num_tr=numpipe.fit_transform(housing_num) nattr=list(housing_num) cattr=["ocean_proximity"] fp=ColumnTransformer([("num", numpipe, nattr),("cat", OneHotEncoder(), cattr)]) housing_final=fp.fit_transform(housing) lr=LinearRegression() lr.fit(housing_final, housing_labels) sd=housing.iloc[:5] sl=housing_labels.iloc[:5] sdp=fp.transform(sd) print("Predictions:",lr.predict(sdp)) print("Labels:",list(sl)) housing_predictions=lr.predict(housing_final) le=rmse(housing_labels, housing_predictions) lre=np.sqrt(le) print(lre) tr=DecisionTreeRegressor() tr.fit(housing_final, housing_labels) housing_predictions=tr.predict(housing_final) tmse=rmse(housing_labels, housing_predictions) trmse=np.sqrt(tmse) print(trmse) scores=cvs(tr, housing_final, housing_labels, scoring="neg_mean_squared_error", cv=10) trmses=np.sqrt(-scores) print("DecisionTreeRegressor") print("Scores:",trmses) print("Mean:",trmses.mean()) print("Standard Deviation:", trmses.std()) lscores=cvs(lr, housing_final, housing_labels, scoring="neg_mean_squared_error", cv=10)