def calculate_rmse(grid_df):
    # Calculate overall RMSEs and RMSE at 50 m increments for NAV data
    larc_grid = grid_df.larc
    d = grid_df.depth
    x = grid_df.x
    overall = rmse(larc_grid, d, squared=False)
    seg_1 = rmse(larc_grid[(x < 180)], d[(x < 180)], squared=False)
    seg_2 = rmse(larc_grid[(x >= 180) & (x < 230)],
                 d[(x >= 180) & (x < 230)],
                 squared=False)
    seg_3 = rmse(larc_grid[(x >= 230)], d[(x >= 230)], squared=False)
    return [overall, seg_1, seg_2, seg_3]
def train_score_predict(clf, X, y, X_predict, y_predict, record_performance, sh_logger, type='classification'):
    import time
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import mean_squared_error as rmse

    # Train
    start = time.time()
    model = clf.fit(X,y)
    end = time.time()
    if sh_logger.debug: print('Training time: ')
    if sh_logger.debug: show_time(end - start)
    
    # save performance variables
    record_performance['TrainTime'].append(end - start)

    # Predict
    start = time.time()
    if(type=='classification'):
        record_performance['PredictAccuracyScore'].append(accuracy_score(y_predict, model.predict(X_predict)))
    else:
        record_performance['PredictAccuracyScore'].append(rmse(y_test, model.predict(X_test)))
        
    end = time.time()
    record_performance['PredictTime'].append(end - start)
    
    if sh_logger.debug: print('\nPrediction time: ')
    if sh_logger.debug: show_time(end - start)
    
    return model
Esempio n. 3
0
def optimize_k(df, target, exclude=None):
    if exclude is None:
        exclude = ["patient_ID"]
    df = df.drop(exclude, axis=1)
    data = df.to_numpy()
    errors = []
    for k in range(1, 20, 2):
        imputer = KNNImputer(n_neighbors=k)
        imputed = imputer.fit_transform(data)
        df_imputed = pd.DataFrame(imputed, columns=df.columns)

        X = df_imputed.drop(target, axis=1)
        y = df_imputed[target]
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=42)

        model = RandomForestClassifier()
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        error = rmse(y_test, preds)
        errors.append({'K': k, 'RMSE': error})

    return errors
def show_results(true_values, predicted_values):
    print('Algorithm finished.\n')
    for i in range(len(true_values)):
        print('True value ' + '{:02x}'.format(i + 1) + ': ' +
              '{:0.5f}'.format(true_values[i]) + '\t' + 'Estimated value ' +
              '{:02x}'.format(i + 1) + ': ' +
              '{:0.5f}'.format(predicted_values[i]))

    print('\nRMSE  : ' + str(rmse(true_values, predicted_values)))
Esempio n. 5
0
 def _get_error_(self, T, P):
     Y = T[[self.model.target]]
     if self.eval_error == 'mae':
         error = mae(Y, P)
     elif self.eval_error == 'rmse':
         error = rmse(Y, P)
     elif self.eval_error == 'logloss':
         error = log_loss(Y, P)
     elif self.eval_error == 'r2':
         error = r2_score(Y, P)
     else:
         raise Exception('Unknown error type')
     return error
Esempio n. 6
0
for u, user in enumerate(profile):
    for l, level in enumerate(user):
        if count[u][l] != 0:
            profile[u][l] = round(level / count[u][l])

#predicting the ratings and doing in sample validation
pred = []
actual = []

for subs in train_subs:
    for p, problem in enumerate(problem_data):
        if problem[0] == subs[1]:
            query_level = ord(problem[1]) - 65
            break
    pred.append(profile[unique_users.index(subs[0])][query_level])
    actual.append(int(subs[2]))

print('rmse for our prediction(using in sample validation) :', end=' ')
print(rmse(actual, pred))

#prediciton for user and problem queried
query_user = '******' + input('user_id(number) : ')
query_problem = 'prob_' + input('prob_id(number) : ')

for p, problem in enumerate(problem_data):
    if problem[0] == query_problem:
        query_level = ord(problem[1]) - 65
        break
print('prediction by our system :', end=' ')
print(profile[unique_users.index(query_user)][query_level])
Esempio n. 7
0
    #takes inputs on which regressor is to be trained
    def fit(self, trainX, trainy):
        self.trainX = trainX
        self.trainy = trainy
        self.coef = np.zeros((1, trainX.shape[1]))
        self.coef = self.gradient_descent(trainX, trainy, self.n_runs,
                                          self.alpha)

    #the gradient descent optimisation algorithm
    def gradient_descent(self, trainX, trainy, n_runs, alpha):
        beta = self.coef
        n = len(trainy)
        for i in range(self.n_runs):
            loss = trainX.dot(beta.transpose())
            loss = loss - trainy
            beta = beta - alpha * ((np.dot(trainX.T, loss)).T) * (1 / n)

        return beta

    #function which returns predictions based on the regressor fitted on the training data
    def predict(self, testX):
        return np.dot(testX, self.coef.T)


#instantiation and prediction steps of our regressor
reg = Linear_Regression(3000000, 0.00000001)
reg.fit(trainX, trainy)
print(reg.predict(testX))
print(rmse(np.array(reg.predict(testX)), np.array(testy)))
era = pd.read_csv(fERA,
                  header=0,
                  parse_dates=True,
                  index_col='datetime',
                  names=['datetime', 'iERA'])

# selecting the same time slice
noaa = noaa['1979-01-01':'2020-12-01']

indices = pd.merge(noaa, era, left_index=True, right_index=True)

describe = indices.describe()

stats = {
    'mean': indices.mean(),
    'std': indices.std(),
    'pearson': indices.corr(method='pearson'),
    'RMSE': rmse(indices.iNOAA, indices.iERA),
    'bias': sum(indices.iNOAA - indices.iERA) / len(indices),
    'variance': indices.var()
}

sns.regplot('iERA',
            'iNOAA',
            data=indices,
            scatter=True,
            fit_reg=True,
            ci=95,
            color='darkslateblue')
housing_extraattrib=attradder.transform(housing.values)
numpipe=Pipeline([('i', SimpleImputer(strategy="median")),('attradder', ComAttAdder()),('sscal', StandardScaler())])
housing_num_tr=numpipe.fit_transform(housing_num)
nattr=list(housing_num)
cattr=["ocean_proximity"]
fp=ColumnTransformer([("num", numpipe, nattr),("cat", OneHotEncoder(), cattr)])
housing_final=fp.fit_transform(housing)
lr=LinearRegression()
lr.fit(housing_final, housing_labels)
sd=housing.iloc[:5]
sl=housing_labels.iloc[:5]
sdp=fp.transform(sd)
print("Predictions:",lr.predict(sdp))
print("Labels:",list(sl))
housing_predictions=lr.predict(housing_final)
le=rmse(housing_labels, housing_predictions)
lre=np.sqrt(le)
print(lre)
tr=DecisionTreeRegressor()
tr.fit(housing_final, housing_labels)
housing_predictions=tr.predict(housing_final)
tmse=rmse(housing_labels, housing_predictions)
trmse=np.sqrt(tmse)
print(trmse)
scores=cvs(tr, housing_final, housing_labels, scoring="neg_mean_squared_error", cv=10)
trmses=np.sqrt(-scores)
print("DecisionTreeRegressor")
print("Scores:",trmses)
print("Mean:",trmses.mean())
print("Standard Deviation:", trmses.std())
lscores=cvs(lr, housing_final, housing_labels, scoring="neg_mean_squared_error", cv=10)