def r2_score(X, y):
    y_rfbc1 = rff.rfbc_predict(rfbc1['rf1'], rfbc1['rf2'], X[val_blocks == 0])
    y_rfbc2 = rff.rfbc_predict(rfbc2['rf1'], rfbc2['rf2'], X[val_blocks == 1])
    y_rfbc3 = rff.rfbc_predict(rfbc3['rf1'], rfbc3['rf2'], X[val_blocks == 2])
    y_rfbc4 = rff.rfbc_predict(rfbc4['rf1'], rfbc4['rf2'], X[val_blocks == 3])
    y_rfbc5 = rff.rfbc_predict(rfbc5['rf1'], rfbc5['rf2'], X[val_blocks == 4])
    y_obs = np.hstack(
        (y[val_blocks == 0], y[val_blocks == 1], y[val_blocks == 2],
         y[val_blocks == 3], y[val_blocks == 4]))
    y_mod = np.hstack((y_rfbc1, y_rfbc2, y_rfbc3, y_rfbc4, y_rfbc5))
    temp1, temp2, r, temp3, temp4 = stats.linregress(y_obs, y_mod)
    return r**2
Esempio n. 2
0
def f(params):
    global best_mse
    global fail_count
    # check the hyperparameter set is sensible
    # - check 1: min_samples_split > min_samples_leaf
    if params['min_samples_split']<params['min_samples_leaf']:
        fail_count+=1
        #print("INVALID HYPERPARAMETER SELECTION",params)
        return {'loss': None, 'status': STATUS_FAIL}

    rf = RandomForestRegressor(**params)
    r2_scores = np.zeros(k)
    MSE_scores = np.zeros(k)
    for kk in range(k):
        train_mask = cal_blocks!=kk
        test_mask = val_blocks==kk

        rf1,rf2 = rff.rfbc_fit(rf,X[train_mask],y[train_mask])
        y_rf = rff.rfbc_predict(rf1,rf2,X[test_mask])

        temp1,temp2,r,temp3,temp4 = stats.linregress(y[test_mask],y_rf)
        r2_scores[kk] = r**2
        MSE_scores[kk] = np.mean( (y[test_mask]-y_rf) **2 )
    r2_score=r2_scores.mean()
    mse=MSE_scores.mean()
    # - if error reduced, then update best model accordingly
    if mse < best_mse:
        best_score = score
        print('new best r^2: ', r2_score, '; best RMSE: ', np.sqrt(mse), params)
    return {'loss': mse, 'status': STATUS_OK}
Esempio n. 3
0
def r2_score(X, y):
    y_obs = np.zeros(y.size)
    y_mod = np.zeros(y.size)
    index = 0
    for ii in range(0, k):
        n = val_blocks['iter%i' % (ii + 1)].sum()
        y_mod[index:index + n] = rff.rfbc_predict(
            rfbc_k['rfbc%i' % (ii + 1)]['rf1'],
            rfbc_k['rfbc%i' % (ii + 1)]['rf2'],
            X[val_blocks['iter%i' % (ii + 1)]])
        y_obs[index:index + n] = y[val_blocks['iter%i' % (ii + 1)]]
    return r**2
def f(params):
    global best_mse
    global fail_count
    # check the hyperparameter set is sensible
    # - check 1: min_samples_split > min_samples_leaf
    if params['min_samples_split'] < params['min_samples_leaf']:
        fail_count += 1
        return {'loss': None, 'status': STATUS_FAIL}

    params['random_state'] = int(np.random.random() * 10**6)
    rf = RandomForestRegressor(**params)
    r2_scores = np.zeros((k, 2))
    MSE_scores = np.zeros((k, 2))
    grad_scores = np.zeros((k, 2))
    for kk in range(k):
        train_mask = cal_blocks != kk
        test_mask = val_blocks == kk

        rf1, rf2 = rff.rfbc_fit(rf, X[train_mask], y[train_mask])
        y_rf = rff.rfbc_predict(rf1, rf2, X)

        m, temp1, r, temp2, temp3 = stats.linregress(y[test_mask],
                                                     y_rf[test_mask])
        r2_scores[kk, 0] = r**2
        MSE_scores[kk, 0] = np.mean((y[test_mask] - y_rf[test_mask])**2)
        grad_scores[kk, 0] = m

        m, temp1, r, temp2, temp3 = stats.linregress(y[train_mask],
                                                     y_rf[train_mask])
        r2_scores[kk, 1] = r**2
        MSE_scores[kk, 1] = np.mean((y[train_mask] - y_rf[train_mask])**2)
        grad_scores[kk, 1] = m

    r2_score = r2_scores[:, 0].mean()
    mse = MSE_scores[:, 0].mean()
    # - if error reduced, then update best model accordingly
    if mse < best_mse:
        best_mse = mse
        print('new best r^2: %.5f; best RMSE: %.5f' % (r2_score, np.sqrt(mse)))
        print(params)
    return {
        'loss': mse,
        'status': STATUS_OK,
        'mse_test': MSE_scores[:, 0].mean(),
        'mse_train': MSE_scores[:, 1].mean(),
        'gradient_test': grad_scores[:, 0].mean(),
        'gradient_train': grad_scores[:, 1].mean(),
        'r2_test': r2_scores[:, 0].mean(),
        'r2_train': r2_scores[:, 1].mean()
    }
Esempio n. 5
0
# Take best hyperparameter set and apply cal-val on full training set
print('Applying buffered k-fold cross validation')
rfbc_k = {}
y_obs = np.zeros(y.size)
y_mod = np.zeros(y.size)
index = 0
for ii in range(0, k):
    n = val_blocks['iter%i' % (ii + 1)].sum()
    print(n)
    rfbc_k['rfbc%i' % (ii + 1)] = {}
    rfbc_k['rfbc%i' %
           (ii + 1)]['rf1'], rfbc_k['rfbc%i' % (ii + 1)]['rf2'] = rff.rfbc_fit(
               rf, X[cal_blocks['iter%i' % (ii + 1)]],
               y[cal_blocks['iter%i' % (ii + 1)]])
    y_mod[index:index + n] = rff.rfbc_predict(
        rfbc_k['rfbc%i' % (ii + 1)]['rf1'], rfbc_k['rfbc%i' % (ii + 1)]['rf2'],
        X[val_blocks['iter%i' % (ii + 1)]])
    y_obs[index:index + n] = y[val_blocks['iter%i' % (ii + 1)]]
    index += n
temp1, temp2, r, temp3, temp4 = stats.linregress(y_obs, y_mod)
r2 = r**2
rmse = np.sqrt(np.mean((y_mod - y_obs)**2))
rel_rmse = rmse / np.mean(y_obs)
print("Validation\n\tR^2 = %.02f" % r2)
print("\tRMSE = %.02f" % rmse)
print("\trelative RMSE = %.02f" % rel_rmse)
annotation = 'R$^2$ = %.2f\nRMSE = %.1f Mg ha$^{-1}$\nrelative RMSE = %.1f%s' % (
    r2, rmse, rel_rmse * 100, '%')
fig1, axes1 = gplt.plot_validation(y_obs, y_mod, annotation=annotation)
fig1.savefig('%s%s_%s_%sm_validation_blocked_kfold.png' %
             (path2fig, site_id, version, resolution.zfill(3)))
     axes.set_title('Sentinel 2 only')
 elif sensor == 'alos':
     axes.set_title('ALOS only')
 axes.set_xlim(0, 0.5)
 fig5.tight_layout()
 fig5.savefig(
     '%s%s_%s_permutation_importances_by_texture_single_sensor_%s.png' %
     (path2fig, site_id, version, sensor))
 """
 #===============================================================================
 PART C: VALIDATION
 - Observed vs. modelled biomass using the blocked buffered strategy to avoid
   bias due to spatial autocorrelation
 #-------------------------------------------------------------------------------
 """
 y_rfbc1 = rff.rfbc_predict(rfbc1['rf1'], rfbc1['rf2'], X[val_blocks == 0])
 y_rfbc2 = rff.rfbc_predict(rfbc2['rf1'], rfbc2['rf2'], X[val_blocks == 1])
 y_rfbc3 = rff.rfbc_predict(rfbc3['rf1'], rfbc3['rf2'], X[val_blocks == 2])
 y_rfbc4 = rff.rfbc_predict(rfbc4['rf1'], rfbc4['rf2'], X[val_blocks == 3])
 y_rfbc5 = rff.rfbc_predict(rfbc5['rf1'], rfbc5['rf2'], X[val_blocks == 4])
 y_obs = np.hstack(
     (y[val_blocks == 0], y[val_blocks == 1], y[val_blocks == 2],
      y[val_blocks == 3], y[val_blocks == 4]))
 y_mod = np.hstack((y_rfbc1, y_rfbc2, y_rfbc3, y_rfbc4, y_rfbc5))
 temp1, temp2, r, temp3, temp4 = stats.linregress(y_obs, y_mod)
 r2 = r**2
 rmse = np.sqrt(np.mean((y_mod - y_obs)**2))
 rel_rmse = rmse / np.mean(y_obs)
 print("Validation\n\tR^2 = %.02f" % r2)
 print("\tRMSE = %.02f" % rmse)
 print("\trelative RMSE = %.02f" % rel_rmse)
Esempio n. 7
0
#===============================================================================
PART C: MONTECARLO UPSCALING
Fit RF model for 100 AGB maps
Save RFs for future reference
#-------------------------------------------------------------------------------
"""
# We'll load in an existing dataset to get the georeferencing information
template = io.load_geotiff(agb_list[0],option=1)
rows,cols=template.shape
agb_stack = np.zeros((N_iter,rows,cols))
#pca_predictors = pca.transform(predictors)
#predictors=None
for ii, agb_file in enumerate(agb_list):
    print('Iteration %i of %i' % (ii+1,N_iter))
    rf_dict = joblib.load('%s%s_%s_optimised_rfbc_sentinel_alos_lidar_%s.pkl' % (path2alg,site_id,version,str(ii+1).zfill(3)))
    agb_mod = rff.rfbc_predict(rf_dict['rf1'],rf_dict['rf2'],predictors)

    #let's copy to a new xarray for AGBpot
    agb = io.copy_xarray_template(template)
    agb.values[forest_mask] = agb_mod.copy()
    agb.values[agb.values==-9999]=np.nan
    agb.values[agb.values<0]=0

    outfile_prefix = '%s%s_%s_rfbc_agb_upscaled_%s' % (path2output,site_id,version,str(ii+1).zfill(3))
    io.write_xarray_to_GeoTiff(agb,outfile_prefix)

    agb_stack[ii] = agb.values

# summary arrays
agb_med = io.copy_xarray_template(template)
agb_med.values = np.median(agb_stack,axis=0)