def convergence_sensitivity_analysis(): for i in range(len(producer_names)): starting_index = producer_starting_indicies[i] producer = producers[i][starting_index:] injectors_tmp = [injector[starting_index:] for injector in injectors] X, y = production_rate_dataset(producer, *injectors_tmp) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False) for p0 in p0s: crmp = CRMP(p0=deepcopy(p0)) crmp = crmp.fit(X_train, y_train) # Fitting y_hat = crmp.predict(X_train) r2, mse = fit_statistics(y_hat, y_train) fit_data['Producer'].append(i + 1) fit_data['Model'].append(model_namer(crmp)) fit_data['tau_initial'].append(p0[0]) fit_data['tau_final'].append(crmp.tau_) fit_data['f1_initial'].append(p0[1]) fit_data['f1_final'].append(crmp.gains_[0]) fit_data['f2_initial'].append(p0[2]) fit_data['f2_final'].append(crmp.gains_[1]) fit_data['f3_initial'].append(p0[3]) fit_data['f3_final'].append(crmp.gains_[2]) fit_data['f4_initial'].append(p0[4]) fit_data['f4_final'].append(crmp.gains_[3]) fit_data['r2'].append(r2) fit_data['MSE'].append(mse) # Prediction y_hat = crmp.predict(X_test) r2, mse = fit_statistics(y_hat, y_test) predict_data['Producer'].append(i + 1) predict_data['Model'].append(model_namer(crmp)) predict_data['tau_initial'].append(p0[0]) predict_data['tau_final'].append(crmp.tau_) predict_data['f1_initial'].append(p0[1]) predict_data['f1_final'].append(crmp.gains_[0]) predict_data['f2_initial'].append(p0[2]) predict_data['f2_final'].append(crmp.gains_[1]) predict_data['f3_initial'].append(p0[3]) predict_data['f3_final'].append(crmp.gains_[2]) predict_data['f4_initial'].append(p0[4]) predict_data['f4_final'].append(crmp.gains_[3]) predict_data['r2'].append(r2) predict_data['MSE'].append(mse) # Fitting fit_df = pd.DataFrame(fit_data) fit_df.to_csv(fit_ouput_file) # Prediction predict_df = pd.DataFrame(predict_data) predict_df.to_csv(predict_output_file)
from src.helpers.features import koval_dataset, production_rate_dataset from src.helpers.models import model_namer, serialized_model_path, is_CV_model koval_fitting_file = INPUTS['wfsim']['koval_fitting'] koval_fitting_data = {'Model': [], 't_i': [], 'Fit': []} X, y = koval_dataset(W_t, f_w) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=1, shuffle=False) koval = Koval().fit(X=X_train, y=y_train) y_hat = koval.predict(X_train) time = np.linspace(1, len(y_hat), num=len(y_hat)) for k in range(len(y_hat)): koval_fitting_data['Model'].append(model_namer(koval)) koval_fitting_data['t_i'].append(k + 1) koval_fitting_data['Fit'].append(y_hat[k]) pickled_model = serialized_model_path('koval', koval) with open(pickled_model, 'wb') as f: pickle.dump(koval, f) X, y = production_rate_dataset(f_w, W_t) train_split, test_split, train_test_seperation_idx = forward_walk_splitter( X, y, 2) X_train = X[:train_test_seperation_idx] y_train = y[:train_test_seperation_idx] models = [BayesianRidge(), ElasticNetCV, LassoCV, LinearRegression()] for model in models: if is_CV_model(model):
def convergence_sensitivity_analysis(): t = time[1:] iterations = 0 for i in range(number_of_producers): X, y = production_rate_dataset(producers[i], *injectors) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, shuffle=False) train_length = len(y_train) test_length = len(y_test) train_time = t[:train_length] test_time = t[train_length:] # plt.plot(train_time, y_train, c='r', label='Fit') # plt.plot(test_time, y_test, c='g', label='Predict') # plt.plot(t, y, c='k', label='Actual') for p0 in param_grid['p0']: crmp = CRMP(p0=deepcopy(p0)) crmp = crmp.fit(X_train, y_train) # Fitting y_hat = crmp.predict(X_train) # plt.plot(train_time, y_hat, alpha=0.01, c='r', linewidth=2) r2, mse = fit_statistics(y_hat, y_train) fit_data['Producer'].append(i + 1) fit_data['Model'].append(model_namer(crmp)) fit_data['tau_initial'].append(p0[0]) fit_data['tau_final'].append(crmp.tau_) fit_data['f1_initial'].append(p0[1]) fit_data['f1_final'].append(crmp.gains_[0]) fit_data['f2_initial'].append(p0[2]) fit_data['f2_final'].append(crmp.gains_[1]) fit_data['r2'].append(r2) fit_data['MSE'].append(mse) # Prediction y_hat = crmp.predict(X_test) # plt.plot(test_time, y_hat, alpha=0.01, c='g', linewidth=2) r2, mse = fit_statistics(y_hat, y_test) predict_data['Producer'].append(i + 1) predict_data['Model'].append(model_namer(crmp)) predict_data['tau_initial'].append(p0[0]) predict_data['tau_final'].append(crmp.tau_) predict_data['f1_initial'].append(p0[1]) predict_data['f1_final'].append(crmp.gains_[0]) predict_data['f2_initial'].append(p0[2]) predict_data['f2_final'].append(crmp.gains_[1]) predict_data['r2'].append(r2) predict_data['MSE'].append(mse) iterations += 1 print(iterations) # plt.vlines(76, 0, 1000, linewidth=1, alpha=0.8) # plt.title(producer_names[i]) # plt.xlabel('Time') # plt.ylabel('Production Rate') # plt.legend() # plt.show() # Fitting fit_df = pd.DataFrame(fit_data) fit_df.to_csv(fit_ouput_file) # Prediction predict_df = pd.DataFrame(predict_data) predict_df.to_csv(predict_output_file)
def test_model_namer(self): model = BayesianRidge() model_name = model_namer(model) assert (model_name == 'BayesianRidge')
'Step size': [], 't_start': [], 't_end': [], 't_i': [], 'Prediction': [] } koval_predictions_metrics = {'Model': [], 'Step size': [], 'r2': [], 'MSE': []} # Koval Predictions X, y = koval_dataset(W_t, f_w) for step_size in step_sizes: train_split, test_split, train_test_seperation_idx = forward_walk_splitter( X, y, step_size, training_split=0.8) r2, mse, y_hat, time_step = test_model(X, y, koval, test_split) koval_predictions_metrics['Model'].append(model_namer(koval)) koval_predictions_metrics['Step size'].append(step_size) koval_predictions_metrics['r2'].append(r2) koval_predictions_metrics['MSE'].append(mse) for i in range(len(y_hat)): y_hat_i = y_hat[i] time_step_i = time_step[i] t_start = time_step_i[0] + 2 t_end = time_step_i[-1] + 2 for k in range(len(y_hat_i)): y_i = y_hat_i[k] t_i = time_step_i[k] + 2 koval_predictions['Model'].append(model_namer(koval)) koval_predictions['Step size'].append(step_size) koval_predictions['t_start'].append(t_start)
def evaluate_crmp_bhp_model(): iteration = 0 for name in producer_names: print('Producer Name: ', name) producer = get_real_producer_data(producers_df, name, bhp=True) injectors = injectors_df[['Name', 'Date', 'Water Vol']] X, y = construct_real_production_rate_dataset(producer[['Date', name]], injectors, producer['delta_p']) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.40, shuffle=False) X_train = X_train.to_numpy() X_test = X_test.to_numpy() y_train = y_train.to_numpy() y_test = y_test.to_numpy() for p0 in p0s: iteration += 1 print('Iteration: {}'.format(iteration)) crmpbhp = CrmpBHP(p0=deepcopy(p0)) crmpbhp = crmpbhp.fit(X_train, y_train) # Fitting # y_hat = crmpbhp.predict(X_train) # r2, mse = fit_statistics(y_hat, y_train, shutin=True) # fit_data['Producer'].append(name) # fit_data['Model'].append(model_namer(crmpbhp)) # fit_data['tau_initial'].append(p0[0]) # fit_data['tau_final'].append(crmpbhp.tau_) # fit_data['f1_initial'].append(p0[1]) # fit_data['f1_final'].append(crmpbhp.gains_[0]) # fit_data['f2_initial'].append(p0[2]) # fit_data['f2_final'].append(crmpbhp.gains_[1]) # fit_data['f3_initial'].append(p0[3]) # fit_data['f3_final'].append(crmpbhp.gains_[2]) # fit_data['f4_initial'].append(p0[4]) # fit_data['f4_final'].append(crmpbhp.gains_[3]) # fit_data['r2'].append(r2) # fit_data['MSE'].append(mse) # Prediction y_hat = crmpbhp.predict(X_test[:30, 1:]) r2, mse = fit_statistics(y_hat, y_test[:30], shutin=True) predict_data['Producer'].append(name) predict_data['Model'].append(model_namer(crmpbhp)) predict_data['tau_initial'].append(p0[0]) predict_data['tau_final'].append(crmpbhp.tau_) predict_data['f1_initial'].append(p0[1]) predict_data['f1_final'].append(crmpbhp.gains_[0]) predict_data['f2_initial'].append(p0[2]) predict_data['f2_final'].append(crmpbhp.gains_[1]) predict_data['f3_initial'].append(p0[3]) predict_data['f3_final'].append(crmpbhp.gains_[2]) predict_data['f4_initial'].append(p0[4]) predict_data['f4_final'].append(crmpbhp.gains_[3]) predict_data['r2'].append(r2) predict_data['MSE'].append(mse) # Fitting fit_df = pd.DataFrame(fit_data) fit_df.to_csv(fit_output_file) # Prediction predict_df = pd.DataFrame(predict_data) predict_df.to_csv(predict_output_file)