예제 #1
0
def objective_function():
    X, y = production_rate_dataset(producers[0], *injectors)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.5,
                                                        shuffle=False)
    crmp = CRMP().fit(X_train, y_train)
    for i in range(number_of_producers):
        X, y = production_rate_dataset(producers[i], *injectors)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            shuffle=False)
        for p0 in param_grid['p0']:
            crmp.tau_ = p0[0]
            crmp.gains_ = p0[1:]
            y_hat = crmp.predict(X_test)
            r2, mse = fit_statistics(y_hat, y_test)
            objective_function_data['Producer'].append(i + 1)
            objective_function_data['tau'].append(p0[0])
            objective_function_data['f1'].append(p0[1])
            objective_function_data['f2'].append(p0[2])
            objective_function_data['r2'].append(r2)
            objective_function_data['MSE'].append(mse)

    objective_function_df = pd.DataFrame(objective_function_data)
    objective_function_df.to_csv(objective_function_file)
예제 #2
0
def convergence_sensitivity_analysis():
    for i in range(len(producer_names)):
        starting_index = producer_starting_indicies[i]
        producer = producers[i][starting_index:]
        injectors_tmp = [injector[starting_index:] for injector in injectors]
        X, y = production_rate_dataset(producer, *injectors_tmp)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            shuffle=False)
        for p0 in p0s:
            crmp = CRMP(p0=deepcopy(p0))
            crmp = crmp.fit(X_train, y_train)

            # Fitting
            y_hat = crmp.predict(X_train)
            r2, mse = fit_statistics(y_hat, y_train)
            fit_data['Producer'].append(i + 1)
            fit_data['Model'].append(model_namer(crmp))
            fit_data['tau_initial'].append(p0[0])
            fit_data['tau_final'].append(crmp.tau_)
            fit_data['f1_initial'].append(p0[1])
            fit_data['f1_final'].append(crmp.gains_[0])
            fit_data['f2_initial'].append(p0[2])
            fit_data['f2_final'].append(crmp.gains_[1])
            fit_data['f3_initial'].append(p0[3])
            fit_data['f3_final'].append(crmp.gains_[2])
            fit_data['f4_initial'].append(p0[4])
            fit_data['f4_final'].append(crmp.gains_[3])
            fit_data['r2'].append(r2)
            fit_data['MSE'].append(mse)

            # Prediction
            y_hat = crmp.predict(X_test)
            r2, mse = fit_statistics(y_hat, y_test)
            predict_data['Producer'].append(i + 1)
            predict_data['Model'].append(model_namer(crmp))
            predict_data['tau_initial'].append(p0[0])
            predict_data['tau_final'].append(crmp.tau_)
            predict_data['f1_initial'].append(p0[1])
            predict_data['f1_final'].append(crmp.gains_[0])
            predict_data['f2_initial'].append(p0[2])
            predict_data['f2_final'].append(crmp.gains_[1])
            predict_data['f3_initial'].append(p0[3])
            predict_data['f3_final'].append(crmp.gains_[2])
            predict_data['f4_initial'].append(p0[4])
            predict_data['f4_final'].append(crmp.gains_[3])
            predict_data['r2'].append(r2)
            predict_data['MSE'].append(mse)

    # Fitting
    fit_df = pd.DataFrame(fit_data)
    fit_df.to_csv(fit_ouput_file)

    # Prediction
    predict_df = pd.DataFrame(predict_data)
    predict_df.to_csv(predict_output_file)
예제 #3
0
def fit_all_producers():
    for i in range(number_of_producers):
        producer = producers[i]
        X, y = production_rate_dataset(producer, *injectors)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, train_size=0.5, shuffle=False
        )
        crmp = CRMP(q0=producer[0])
        crmp = crmp.fit(X_train, y_train)
        print('Producer {}'.format(i + 1))
        print('Tau: {}'.format(crmp.tau_))
        print('Gains: {}'.format(crmp.gains_))
        print()
예제 #4
0
def fit_individual_initial_guesses():
    producer = producers[2]
    X, y = production_rate_dataset(producer, *injectors)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.5,
                                                        shuffle=False)
    crmp = CRMP(p0=[1e-03, 0.4, 0.6])
    crmp = crmp.fit(X_train, y_train)
    y_hat = crmp.predict(X_train)
    r2, mse = fit_statistics(y_hat, y_train)
    print(crmp.tau_)
    print(crmp.gains_)
    print(mse)
예제 #5
0
def minimum_train_size():
    data_sizes = np.linspace(1, 148, 148).astype(int)
    for data_size in data_sizes:
        X, y = production_rate_dataset(producers[3], *injectors)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, train_size=data_size, shuffle=False)
        crmp = CRMP(p0=[5, 0.5, 0.5])
        crmp = crmp.fit(X_train, y_train)
        y_hat = crmp.predict(X_test)
        r2, mse = fit_statistics(y_hat, y_test)
        if np.log(mse) < 11.0581424463:
            print(data_size)
            print(mse)
            return
예제 #6
0
def plot_production_history_with_fit_and_predict():
    df = pd.read_csv(predict_file)
    starting_index = producer_starting_indicies[1]
    producer = producers[1][starting_index:]
    injectors_tmp = [injector[starting_index:] for injector in injectors]
    X, y = production_rate_dataset(producer, *injectors_tmp)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.5,
                                                        shuffle=False)
    crmp = CRMP().fit(X_train, y_train)
    for i in range(len(producer_names)):
        producer_df = producer_rows_from_df(df, i + 1)
        starting_index = producer_starting_indicies[i]
        producer = producers[i][starting_index:]
        injectors_tmp = [injector[starting_index:] for injector in injectors]
        X, y = production_rate_dataset(producer, *injectors_tmp)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            shuffle=False)
        producer_length = len(producer)
        t = np.linspace(1, producer_length, producer_length)
        train_length = len(y_train)
        train_time = t[:train_length]
        test_time = t[train_length:][1:]

        empty = []
        plt.plot(empty, empty, c='r', label='Fit')
        plt.plot(empty, empty, c='g', label='Predict')
        plt.plot(t, producer, c='k')

        for index, row in producer_df.iterrows():
            tau = row['tau_final']
            f1 = row['f1_final']
            f2 = row['f2_final']
            f3 = row['f3_final']
            f4 = row['f4_final']
            crmp.tau_ = tau
            crmp.gains_ = [f1, f2, f3, f4]

            # Fitting
            y_hat = crmp.predict(X_train)
            plt.plot(train_time, y_hat, '--', alpha=0.02, c='r', linewidth=2)

            # Prediction
            y_hat = crmp.predict(X_test)
            plt.plot(test_time, y_hat, ':', alpha=0.02, c='g', linewidth=2)

        plt.vlines(test_time[0],
                   0,
                   1.1 * max(producer),
                   linewidth=2,
                   alpha=0.8)
        plot_helper(FIG_DIR,
                    title=producer_names[i],
                    xlabel='Time [days]',
                    ylabel='Production Rate [bbls/day]',
                    legend=True,
                    save=True)
예제 #7
0
                    batch_size=batch,
                    validation_split=0.1,
                    verbose=0)

y_hat_lstm = []
for j in range(30):
    y_hat_j = model.predict(X_test_scaled[j:(j + 1)])[0][0]
    X_test_scaled[j + 1] = y_hat_j
    y_hat_lstm.append(y_hat_j)

y_hat_lstm = np.array(y_hat_lstm).reshape(-1, 1)
y_hat_lstm = scaler.inverse_transform(y_hat_lstm)
r2, mse = fit_statistics(y_hat_lstm, y_test[:30])
print(mse)

crmp = CRMP().fit(X_train, y_train)
y_hat_crmp = crmp.predict(X_test[:30, 1:])
r2, mse = fit_statistics(y_hat_crmp, y_test[:30])
print(mse)

t = np.linspace(76, 105, 30)
plt.plot(t, y_test[:30], color='k', label='True Value', linewidth=2)
plt.plot(t, y_hat_crmp, alpha=0.5, label='CRMP', linewidth=2)
plt.plot(t, y_hat_lstm, alpha=0.5, label='LSTM Neural Network', linewidth=2)
plt.tight_layout()
plot_helper(
    FIG_DIR,
    title='{}: 30 Days Prediction for CRMP and LSTM Neural Network'.format(
        name),
    xlabel='Days',
    ylabel='Production Rate [bbls/day]',
예제 #8
0
from sklearn.linear_model import (BayesianRidge, HuberRegressor,
                                  LinearRegression)
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor

from crmp import CRMP, MBBaggingRegressor

from src.config import INPUTS
from src.data.read_crmp import injectors, producers, producer_names
from src.helpers.analysis import fit_statistics
from src.helpers.cross_validation import goodness_score
from src.helpers.features import production_rate_dataset
from src.simulations import injector_names, number_of_producers

estimators = [CRMP()]
estimators = [
    LinearRegression(),
    BayesianRidge(),
    HuberRegressor(),
    MLPRegressor()
]


def log_transformation(column):
    return np.log(column + 1)


std = 25
for estimator in estimators:
    print(estimator)
예제 #9
0
def convergence_sensitivity_analysis():
    t = time[1:]
    iterations = 0
    for i in range(number_of_producers):
        X, y = production_rate_dataset(producers[i], *injectors)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            train_size=0.5,
                                                            shuffle=False)
        train_length = len(y_train)
        test_length = len(y_test)
        train_time = t[:train_length]
        test_time = t[train_length:]
        # plt.plot(train_time, y_train, c='r', label='Fit')
        # plt.plot(test_time, y_test, c='g', label='Predict')
        # plt.plot(t, y, c='k', label='Actual')
        for p0 in param_grid['p0']:
            crmp = CRMP(p0=deepcopy(p0))
            crmp = crmp.fit(X_train, y_train)

            # Fitting
            y_hat = crmp.predict(X_train)
            # plt.plot(train_time, y_hat, alpha=0.01, c='r', linewidth=2)
            r2, mse = fit_statistics(y_hat, y_train)
            fit_data['Producer'].append(i + 1)
            fit_data['Model'].append(model_namer(crmp))
            fit_data['tau_initial'].append(p0[0])
            fit_data['tau_final'].append(crmp.tau_)
            fit_data['f1_initial'].append(p0[1])
            fit_data['f1_final'].append(crmp.gains_[0])
            fit_data['f2_initial'].append(p0[2])
            fit_data['f2_final'].append(crmp.gains_[1])
            fit_data['r2'].append(r2)
            fit_data['MSE'].append(mse)

            # Prediction
            y_hat = crmp.predict(X_test)
            # plt.plot(test_time, y_hat, alpha=0.01, c='g', linewidth=2)
            r2, mse = fit_statistics(y_hat, y_test)
            predict_data['Producer'].append(i + 1)
            predict_data['Model'].append(model_namer(crmp))
            predict_data['tau_initial'].append(p0[0])
            predict_data['tau_final'].append(crmp.tau_)
            predict_data['f1_initial'].append(p0[1])
            predict_data['f1_final'].append(crmp.gains_[0])
            predict_data['f2_initial'].append(p0[2])
            predict_data['f2_final'].append(crmp.gains_[1])
            predict_data['r2'].append(r2)
            predict_data['MSE'].append(mse)

            iterations += 1
            print(iterations)

        # plt.vlines(76, 0, 1000, linewidth=1, alpha=0.8)
        # plt.title(producer_names[i])
        # plt.xlabel('Time')
        # plt.ylabel('Production Rate')
        # plt.legend()
        # plt.show()

    # Fitting
    fit_df = pd.DataFrame(fit_data)
    fit_df.to_csv(fit_ouput_file)

    # Prediction
    predict_df = pd.DataFrame(predict_data)
    predict_df.to_csv(predict_output_file)