コード例 #1
0
ファイル: clair_data.py プロジェクト: akhilpotla/CRM
def convergence_sensitivity_analysis():
    for i in range(len(producer_names)):
        starting_index = producer_starting_indicies[i]
        producer = producers[i][starting_index:]
        injectors_tmp = [injector[starting_index:] for injector in injectors]
        X, y = production_rate_dataset(producer, *injectors_tmp)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            shuffle=False)
        for p0 in p0s:
            crmp = CRMP(p0=deepcopy(p0))
            crmp = crmp.fit(X_train, y_train)

            # Fitting
            y_hat = crmp.predict(X_train)
            r2, mse = fit_statistics(y_hat, y_train)
            fit_data['Producer'].append(i + 1)
            fit_data['Model'].append(model_namer(crmp))
            fit_data['tau_initial'].append(p0[0])
            fit_data['tau_final'].append(crmp.tau_)
            fit_data['f1_initial'].append(p0[1])
            fit_data['f1_final'].append(crmp.gains_[0])
            fit_data['f2_initial'].append(p0[2])
            fit_data['f2_final'].append(crmp.gains_[1])
            fit_data['f3_initial'].append(p0[3])
            fit_data['f3_final'].append(crmp.gains_[2])
            fit_data['f4_initial'].append(p0[4])
            fit_data['f4_final'].append(crmp.gains_[3])
            fit_data['r2'].append(r2)
            fit_data['MSE'].append(mse)

            # Prediction
            y_hat = crmp.predict(X_test)
            r2, mse = fit_statistics(y_hat, y_test)
            predict_data['Producer'].append(i + 1)
            predict_data['Model'].append(model_namer(crmp))
            predict_data['tau_initial'].append(p0[0])
            predict_data['tau_final'].append(crmp.tau_)
            predict_data['f1_initial'].append(p0[1])
            predict_data['f1_final'].append(crmp.gains_[0])
            predict_data['f2_initial'].append(p0[2])
            predict_data['f2_final'].append(crmp.gains_[1])
            predict_data['f3_initial'].append(p0[3])
            predict_data['f3_final'].append(crmp.gains_[2])
            predict_data['f4_initial'].append(p0[4])
            predict_data['f4_final'].append(crmp.gains_[3])
            predict_data['r2'].append(r2)
            predict_data['MSE'].append(mse)

    # Fitting
    fit_df = pd.DataFrame(fit_data)
    fit_df.to_csv(fit_ouput_file)

    # Prediction
    predict_df = pd.DataFrame(predict_data)
    predict_df.to_csv(predict_output_file)
コード例 #2
0
def objective_function():
    X, y = production_rate_dataset(producers[0], *injectors)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.5,
                                                        shuffle=False)
    crmp = CRMP().fit(X_train, y_train)
    for i in range(number_of_producers):
        X, y = production_rate_dataset(producers[i], *injectors)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            shuffle=False)
        for p0 in param_grid['p0']:
            crmp.tau_ = p0[0]
            crmp.gains_ = p0[1:]
            y_hat = crmp.predict(X_test)
            r2, mse = fit_statistics(y_hat, y_test)
            objective_function_data['Producer'].append(i + 1)
            objective_function_data['tau'].append(p0[0])
            objective_function_data['f1'].append(p0[1])
            objective_function_data['f2'].append(p0[2])
            objective_function_data['r2'].append(r2)
            objective_function_data['MSE'].append(mse)

    objective_function_df = pd.DataFrame(objective_function_data)
    objective_function_df.to_csv(objective_function_file)
コード例 #3
0
def fit_individual_initial_guesses():
    producer = producers[2]
    X, y = production_rate_dataset(producer, *injectors)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.5,
                                                        shuffle=False)
    crmp = CRMP(p0=[1e-03, 0.4, 0.6])
    crmp = crmp.fit(X_train, y_train)
    y_hat = crmp.predict(X_train)
    r2, mse = fit_statistics(y_hat, y_train)
    print(crmp.tau_)
    print(crmp.gains_)
    print(mse)
コード例 #4
0
def minimum_train_size():
    data_sizes = np.linspace(1, 148, 148).astype(int)
    for data_size in data_sizes:
        X, y = production_rate_dataset(producers[3], *injectors)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, train_size=data_size, shuffle=False)
        crmp = CRMP(p0=[5, 0.5, 0.5])
        crmp = crmp.fit(X_train, y_train)
        y_hat = crmp.predict(X_test)
        r2, mse = fit_statistics(y_hat, y_test)
        if np.log(mse) < 11.0581424463:
            print(data_size)
            print(mse)
            return
コード例 #5
0
def test_model(X, y, model, test_split):
    r2_sum, mse_sum = 0, 0
    length = len(test_split)
    y_hat = []
    time_step = []
    for train, test in test_split:
        x_train, x_test = X[train], X[test]
        y_train, y_test = y[train], y[test]
        model.fit(x_train, y_train)
        y_hat_i = model.predict(x_test)
        y_hat.append(y_hat_i)
        time_step.append(test)
        r2_i, mse_i = fit_statistics(y_hat_i, y_test)
        r2_sum += r2_i
        mse_sum += mse_i
    r2 = r2_sum / length
    mse = mse_sum / length
    return (r2, mse, y_hat, time_step)
コード例 #6
0
ファイル: lstm_best_plot.py プロジェクト: UT-DIRECT/CRM
history = model.fit(X_train_scaled,
                    y_train_scaled,
                    epochs=epoch,
                    batch_size=batch,
                    validation_split=0.1,
                    verbose=0)

y_hat_lstm = []
for j in range(30):
    y_hat_j = model.predict(X_test_scaled[j:(j + 1)])[0][0]
    X_test_scaled[j + 1] = y_hat_j
    y_hat_lstm.append(y_hat_j)

y_hat_lstm = np.array(y_hat_lstm).reshape(-1, 1)
y_hat_lstm = scaler.inverse_transform(y_hat_lstm)
r2, mse = fit_statistics(y_hat_lstm, y_test[:30])
print(mse)

crmp = CRMP().fit(X_train, y_train)
y_hat_crmp = crmp.predict(X_test[:30, 1:])
r2, mse = fit_statistics(y_hat_crmp, y_test[:30])
print(mse)

t = np.linspace(76, 105, 30)
plt.plot(t, y_test[:30], color='k', label='True Value', linewidth=2)
plt.plot(t, y_hat_crmp, alpha=0.5, label='CRMP', linewidth=2)
plt.plot(t, y_hat_lstm, alpha=0.5, label='LSTM Neural Network', linewidth=2)
plt.tight_layout()
plot_helper(
    FIG_DIR,
    title='{}: 30 Days Prediction for CRMP and LSTM Neural Network'.format(
コード例 #7
0
ファイル: mses_with_noise.py プロジェクト: UT-DIRECT/CRM
    return np.log(column + 1)


std = 25
for estimator in estimators:
    print(estimator)
    for i in range(number_of_producers):
        producer = producers[i]
        producer += np.random.normal(loc=0.0, scale=std, size=len(producer))
        X, y = production_rate_dataset(producer, *injectors)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            train_size=0.5,
                                                            shuffle=False)
        model = estimator.fit(X_train, y_train)
        y_hat = []
        # y_hat = model.predict(X_test[:30, 1:])
        y_hat_i = y_train[-1]
        for i in range(30):
            X_test_i = X_test[i, :]
            X_test_i[0] = y_hat_i
            X_test_i = X_test_i.reshape(1, -1)
            y_hat_i = model.predict(X_test_i)
            y_hat.append(y_hat_i)
        r2, mse = fit_statistics(y_hat, y_test[:30])
        print(mse)
        print(min(y_hat))
    print()
print()
print()
コード例 #8
0
 def test_not_enough_data(self):
     y_hat = [1]
     y = [1]
     r2, mse = fit_statistics(y_hat, y)
     assert (r2 in [np.nan])
コード例 #9
0
 def test_fit_statistics(self):
     y_hat = [1, 2, 3]
     y = [1, 3, 4]
     stats = fit_statistics(y_hat, y)
     assert (len(stats) == 2)
     assert (None not in stats)
コード例 #10
0
def convergence_sensitivity_analysis():
    t = time[1:]
    iterations = 0
    for i in range(number_of_producers):
        X, y = production_rate_dataset(producers[i], *injectors)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            train_size=0.5,
                                                            shuffle=False)
        train_length = len(y_train)
        test_length = len(y_test)
        train_time = t[:train_length]
        test_time = t[train_length:]
        # plt.plot(train_time, y_train, c='r', label='Fit')
        # plt.plot(test_time, y_test, c='g', label='Predict')
        # plt.plot(t, y, c='k', label='Actual')
        for p0 in param_grid['p0']:
            crmp = CRMP(p0=deepcopy(p0))
            crmp = crmp.fit(X_train, y_train)

            # Fitting
            y_hat = crmp.predict(X_train)
            # plt.plot(train_time, y_hat, alpha=0.01, c='r', linewidth=2)
            r2, mse = fit_statistics(y_hat, y_train)
            fit_data['Producer'].append(i + 1)
            fit_data['Model'].append(model_namer(crmp))
            fit_data['tau_initial'].append(p0[0])
            fit_data['tau_final'].append(crmp.tau_)
            fit_data['f1_initial'].append(p0[1])
            fit_data['f1_final'].append(crmp.gains_[0])
            fit_data['f2_initial'].append(p0[2])
            fit_data['f2_final'].append(crmp.gains_[1])
            fit_data['r2'].append(r2)
            fit_data['MSE'].append(mse)

            # Prediction
            y_hat = crmp.predict(X_test)
            # plt.plot(test_time, y_hat, alpha=0.01, c='g', linewidth=2)
            r2, mse = fit_statistics(y_hat, y_test)
            predict_data['Producer'].append(i + 1)
            predict_data['Model'].append(model_namer(crmp))
            predict_data['tau_initial'].append(p0[0])
            predict_data['tau_final'].append(crmp.tau_)
            predict_data['f1_initial'].append(p0[1])
            predict_data['f1_final'].append(crmp.gains_[0])
            predict_data['f2_initial'].append(p0[2])
            predict_data['f2_final'].append(crmp.gains_[1])
            predict_data['r2'].append(r2)
            predict_data['MSE'].append(mse)

            iterations += 1
            print(iterations)

        # plt.vlines(76, 0, 1000, linewidth=1, alpha=0.8)
        # plt.title(producer_names[i])
        # plt.xlabel('Time')
        # plt.ylabel('Production Rate')
        # plt.legend()
        # plt.show()

    # Fitting
    fit_df = pd.DataFrame(fit_data)
    fit_df.to_csv(fit_ouput_file)

    # Prediction
    predict_df = pd.DataFrame(predict_data)
    predict_df.to_csv(predict_output_file)
コード例 #11
0
ファイル: north_sea.py プロジェクト: UT-DIRECT/CRM
def train_bagging_regressor_with_crmp():
    train_sizes = [0.33, 0.735, 0.49, 0.45, 0.52, 0.66, 0.54]
    # for i in range(len(producer_names) - 1):
    n_estimators = 100
    delta_t = 1
    for i in [0, 1, 2, 3, 4, 6]:
        # Constructing dataset
        name = producer_names[i]
        print(name)
        producer = get_real_producer_data(producers_df, name, bhp=True)
        injectors = injectors_df[['Name', 'Date', 'Water Vol']]
        X, y = construct_real_production_rate_dataset(producer,
                                                      injectors,
                                                      delta_t=delta_t)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, train_size=train_sizes[i], shuffle=False)
        X_train = X_train.to_numpy()
        X_test = X_test.to_numpy()
        y_train = y_train.to_numpy()
        y_test = y_test.to_numpy()
        train_length = len(X_train)
        t_fit = np.linspace(0, train_length - 1, train_length)
        t_test = np.linspace(train_length, (train_length + 29), 30)

        # Setting up estimator
        bgr = MBBaggingRegressor(base_estimator=CrmpBHP(delta_t=delta_t),
                                 n_estimators=n_estimators,
                                 block_size=7,
                                 bootstrap=True,
                                 n_jobs=-1,
                                 random_state=0)
        bgr.fit(X_train, y_train)
        model = CrmpBHP().fit(X_train, y_train)
        y_fits = []
        for e in bgr.estimators_:
            y_hat_i = []
            for i in range(len(y_train)):
                e.q0 = X_train[i, 0]
                y_hat_i.append(e.predict(np.array([X_train[i, 1:]])))
            y_fits.append(y_hat_i)
        y_fits_by_time = np.asarray(y_fits).T.reshape(-1, n_estimators)
        y_fits_average = []
        for y_hats_i in y_fits_by_time:
            average = np.average(y_hats_i)
            y_fits_average.append(average)

        r2, mse = fit_statistics(y_fits_average, y_train)

        # Getting all bootstrapped predictions
        y_hats = []
        for e in bgr.estimators_:
            e.q0 = y_train[-1]
            y_hat_i = e.predict(X_test[:30, 1:])
            y_hats.append(y_hat_i)
        y_hats_by_time = np.asarray(y_hats).T
        p10s = []
        averages = []
        p90s = []
        for y_hats_i in y_hats_by_time:
            p10 = np.percentile(y_hats_i, 10)
            average = np.average(y_hats_i)
            p90 = np.percentile(y_hats_i, 90)
            p10s.append(p10)
            averages.append(average)
            p90s.append(p90)
        mse = fit_statistics(y_test[:30], averages)[1]

        max_train = np.amax(y_train[-100:])
        max_fit = np.amax(y_fits_average[-100:])
        max_realization = np.amax(y_hats)
        height = max(max_train, max_fit, max_realization)
        # Plotting
        plt.plot(t_fit[-100:], y_train[-100:], color='k')
        plt.plot(t_fit[-100:],
                 y_fits_average[-100:],
                 color='g',
                 label='Fitting')
        plt.plot(t_test, y_test[:30], color='k', label='True Value')
        plt.plot(t_test, averages, color='b', label='Average')
        plt.plot(t_test, p10s, color='r', alpha=0.5, label='P10 & P90')
        plt.plot(t_test, p90s, color='r', alpha=0.5)
        for hat in y_hats:
            plt.plot(t_test, hat, color='k', alpha=0.02)
        plt.annotate('r-squared = {:.4f}'.format(r2),
                     xy=(train_length - 60, height))
        plt.vlines(train_length - 1,
                   0,
                   height,
                   linewidth=2,
                   colors='k',
                   linestyles='dashed',
                   alpha=0.8)
        plot_helper(FIG_DIR,
                    title='{}: 30 Days Prediction'.format(name),
                    xlabel='Days',
                    ylabel='Production Rate [bbls/day]',
                    legend=True,
                    save=True)
コード例 #12
0
ファイル: north_sea_data.py プロジェクト: UT-DIRECT/CRM
def evaluate_crmp_bhp_model():
    iteration = 0
    for name in producer_names:
        print('Producer Name: ', name)
        producer = get_real_producer_data(producers_df, name, bhp=True)
        injectors = injectors_df[['Name', 'Date', 'Water Vol']]
        X, y = construct_real_production_rate_dataset(producer[['Date', name]],
                                                      injectors,
                                                      producer['delta_p'])
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            train_size=0.40,
                                                            shuffle=False)
        X_train = X_train.to_numpy()
        X_test = X_test.to_numpy()
        y_train = y_train.to_numpy()
        y_test = y_test.to_numpy()
        for p0 in p0s:
            iteration += 1
            print('Iteration: {}'.format(iteration))
            crmpbhp = CrmpBHP(p0=deepcopy(p0))
            crmpbhp = crmpbhp.fit(X_train, y_train)

            # Fitting
            # y_hat = crmpbhp.predict(X_train)
            # r2, mse = fit_statistics(y_hat, y_train, shutin=True)
            # fit_data['Producer'].append(name)
            # fit_data['Model'].append(model_namer(crmpbhp))
            # fit_data['tau_initial'].append(p0[0])
            # fit_data['tau_final'].append(crmpbhp.tau_)
            # fit_data['f1_initial'].append(p0[1])
            # fit_data['f1_final'].append(crmpbhp.gains_[0])
            # fit_data['f2_initial'].append(p0[2])
            # fit_data['f2_final'].append(crmpbhp.gains_[1])
            # fit_data['f3_initial'].append(p0[3])
            # fit_data['f3_final'].append(crmpbhp.gains_[2])
            # fit_data['f4_initial'].append(p0[4])
            # fit_data['f4_final'].append(crmpbhp.gains_[3])
            # fit_data['r2'].append(r2)
            # fit_data['MSE'].append(mse)

            # Prediction
            y_hat = crmpbhp.predict(X_test[:30, 1:])
            r2, mse = fit_statistics(y_hat, y_test[:30], shutin=True)
            predict_data['Producer'].append(name)
            predict_data['Model'].append(model_namer(crmpbhp))
            predict_data['tau_initial'].append(p0[0])
            predict_data['tau_final'].append(crmpbhp.tau_)
            predict_data['f1_initial'].append(p0[1])
            predict_data['f1_final'].append(crmpbhp.gains_[0])
            predict_data['f2_initial'].append(p0[2])
            predict_data['f2_final'].append(crmpbhp.gains_[1])
            predict_data['f3_initial'].append(p0[3])
            predict_data['f3_final'].append(crmpbhp.gains_[2])
            predict_data['f4_initial'].append(p0[4])
            predict_data['f4_final'].append(crmpbhp.gains_[3])
            predict_data['r2'].append(r2)
            predict_data['MSE'].append(mse)

    # Fitting
    fit_df = pd.DataFrame(fit_data)
    fit_df.to_csv(fit_output_file)

    # Prediction
    predict_df = pd.DataFrame(predict_data)
    predict_df.to_csv(predict_output_file)