Esempio n. 1
0
def plot_production_history_with_fit_and_predict():
    df = pd.read_csv(predict_file)
    starting_index = producer_starting_indicies[1]
    producer = producers[1][starting_index:]
    injectors_tmp = [injector[starting_index:] for injector in injectors]
    X, y = production_rate_dataset(producer, *injectors_tmp)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.5,
                                                        shuffle=False)
    crmp = CRMP().fit(X_train, y_train)
    for i in range(len(producer_names)):
        producer_df = producer_rows_from_df(df, i + 1)
        starting_index = producer_starting_indicies[i]
        producer = producers[i][starting_index:]
        injectors_tmp = [injector[starting_index:] for injector in injectors]
        X, y = production_rate_dataset(producer, *injectors_tmp)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            shuffle=False)
        producer_length = len(producer)
        t = np.linspace(1, producer_length, producer_length)
        train_length = len(y_train)
        train_time = t[:train_length]
        test_time = t[train_length:][1:]

        empty = []
        plt.plot(empty, empty, c='r', label='Fit')
        plt.plot(empty, empty, c='g', label='Predict')
        plt.plot(t, producer, c='k')

        for index, row in producer_df.iterrows():
            tau = row['tau_final']
            f1 = row['f1_final']
            f2 = row['f2_final']
            f3 = row['f3_final']
            f4 = row['f4_final']
            crmp.tau_ = tau
            crmp.gains_ = [f1, f2, f3, f4]

            # Fitting
            y_hat = crmp.predict(X_train)
            plt.plot(train_time, y_hat, '--', alpha=0.02, c='r', linewidth=2)

            # Prediction
            y_hat = crmp.predict(X_test)
            plt.plot(test_time, y_hat, ':', alpha=0.02, c='g', linewidth=2)

        plt.vlines(test_time[0],
                   0,
                   1.1 * max(producer),
                   linewidth=2,
                   alpha=0.8)
        plot_helper(FIG_DIR,
                    title=producer_names[i],
                    xlabel='Time [days]',
                    ylabel='Production Rate [bbls/day]',
                    legend=True,
                    save=True)
Esempio n. 2
0
def total_water_injected_and_predicted_water_cut_dimensionless_time():
    plt.figure()
    V_p = 3.53E+07
    fitting_df = pd.read_csv(koval_fitting_file)
    predictions_df = pd.read_csv(koval_predictions_file)
    predictions_step_size_12 = predictions_df.loc[predictions_df['Step size']
                                                  == 12]
    models = ['Koval', 'LinearRegression', 'ElasticNet']
    # models = ['Koval', 'LinearRegression', 'BayesianRidge', 'Lasso', 'ElasticNet']
    t_D = [0] * 30
    t_D = W_t / V_p
    for model in models:
        fitting = fitting_df.loc[fitting_df['Model'] == model]
        predictions = predictions_step_size_12.loc[
            predictions_step_size_12['Model'] == model]
        x = [0] * 30
        y = [0] * 30
        for index, row in predictions.iterrows():
            i = int(row['t_i'] - 121)
            x[i] = int(row['t_i'])
            y[i] = row['Prediction']
        x = fitting['t_i'].tolist() + x
        y = fitting['Fit'].tolist() + y
        plt.plot(t_D[x], y, linestyle='--', linewidth=2, alpha=0.6)
    plt.axvline(x=t_D[120], color='k')
    plt.plot(t_D[x], f_w[3:])
    legend = models
    legend.append('Predictions Start')
    legend.append('Data')
    plot_helper(FIG_DIR,
                title='Water Cut Fitting and Predictions',
                xlabel='Dimensionless Time',
                ylabel='Estimated Water Cut',
                legend=legend,
                save=True)
Esempio n. 3
0
def total_water_injected_and_water_cut():
    plt.figure()
    plt.plot(W_t, f_w)
    plot_helper(FIG_DIR,
                xlabel='Total Water Injected',
                ylabel='Water Cut',
                save=True)
def objective_function_contour_plot():
    for i in range(number_of_producers):
        producer = i + 1
        producer_df = producer_rows_from_df(objective_function_df, producer)
        x, y, z = contour_params(producer_df,
                                 x_column='f1',
                                 y_column='tau',
                                 z_column='MSE')
        plt.contourf(x, y, z, 15, alpha=1.0)
        plt.colorbar()
        title = 'CRMP: Producer {} Objective Function'.format(producer)
        x_true, y_true = true_params[producer]
        actual = plt.scatter(x_true,
                             y_true,
                             s=100,
                             c='r',
                             label='True Value',
                             alpha=0.4)
        plt.legend(handles=[actual], loc='upper left')
        plt.tight_layout()
        plt.ylim(0, 100)
        plot_helper(FIG_DIR,
                    title=title,
                    xlabel=xlabel,
                    ylabel=ylabel,
                    save=True)
def initial_guesses_and_mse_from_prediction():
    df = fitting_sensitivity_analysis_df
    for i in range(number_of_producers):
        producer = i + 1
        df_producer_rows = df.loc[df['Producer'] == producer]
        x, y, z = contour_params(df_producer_rows,
                                 x_column='f1_initial',
                                 y_column='tau_initial',
                                 z_column='MSE')
        plt.contourf(x, y, z, 15, alpha=1.0)
        plt.colorbar()
        title = 'CRMP: Producer {} Initial Guesses with MSEs from Prediction'.format(
            producer)
        x_true, y_true = true_params[producer]
        actual = plt.scatter(x_true,
                             y_true,
                             s=100,
                             c='r',
                             label='Actual',
                             alpha=0.5)
        plt.legend(handles=[actual], loc='upper left')
        plt.tight_layout()
        plt.ylim(0, 100)
        plot_helper(FIG_DIR,
                    title=title,
                    xlabel=xlabel,
                    ylabel=ylabel,
                    save=True)
def determine_train_test_split():
    # producer_names = ['PA01', 'PA02', 'PA03', 'PA09', 'PA10', 'PA12']
    train_sizes = np.linspace(0.1, 0.9, 81)
    for i in [4]:
        # Constructing dataset
        name = producer_names[i]
        print(name)
        producer = get_real_producer_data(producers_df, name, bhp=True)
        injectors = injectors_df[['Name', 'Date', 'Water Vol']]
        X, y = construct_real_production_rate_dataset(producer, injectors)
        for train_size in train_sizes:
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, train_size=train_size, shuffle=False)
            X_train = X_train.to_numpy()
            X_test = X_test.to_numpy()
            y_train = y_train.to_numpy()
            y_test = y_test.to_numpy()
            train_length = len(X_train)
            t_fit = np.linspace(0, train_length - 1, train_length)
            t_test = np.linspace(train_length, (train_length + 29), 30)

            model = CrmpBHP().fit(X_train, y_train)
            model.q0 = y_train[-1]
            y_hat = model.predict(X_test[:30, 1:])

            plt.plot(t_test, y_test[:30], color='k', label='True Value')
            plt.plot(t_test, y_hat, color='r', label='Prediction')
            plot_helper(FIG_DIR,
                        title='{}: {} Train Size'.format(name, train_size),
                        xlabel='Days',
                        ylabel='Production Rate [bbls/day]',
                        legend=True,
                        save=False)
            plt.show()
Esempio n. 7
0
def plot_on_line_hours_per_day():
    for name in producer_names:
        producer = producers_df.loc[producers_df['Name'] == name]
        production_rate = producer['Total Vol']
        on_line_hours = producer['On-Line']
        plt.hist(on_line_hours, bins=5)
        plot_helper(FIG_DIR, title=name, xlabel='Time [days]', save=True)
Esempio n. 8
0
def plot_histogram_of_production_rates():
    for name in producer_names:
        producer = producers_df.loc[producers_df['Name'] == name]
        production_rate = producer[producer['Total Vol'] != 0]['Total Vol']
        plt.hist(production_rate, bins=10)
        plot_helper(FIG_DIR,
                    title=name,
                    xlabel='Production Rate [bbls/day]',
                    save=True)
Esempio n. 9
0
def producers_vs_time():
    plt.figure()
    plt.plot(time, producers.T)
    plot_helper(
        FIG_DIR,
        xlabel='Time',
        ylabel='Production Rate',
        legend=producer_names,
        save=True
    )
Esempio n. 10
0
def plot_bhp():
    for name in producer_names:
        producer = producers_df.loc[producers_df['Name'] == name]
        bhp = producer['Av BHP']
        l = len(bhp)
        t = np.linspace(1, l, l)
        plt.plot(t, bhp)
        plot_helper(FIG_DIR,
                    title=name,
                    xlabel='Time [days]',
                    ylabel='Bottom Hole Pressure [psi]',
                    save=True)
Esempio n. 11
0
def plot_delta_bhp():
    for name in producer_names:
        producer = get_real_producer_data(producers_df, name, bhp=True)
        delta_p = producer['delta_p']
        l = len(delta_p)
        t = np.linspace(1, l, l)
        plt.plot(t, delta_p)
        plot_helper(FIG_DIR,
                    title=name,
                    xlabel='Time [days]',
                    ylabel='Change in Bottom Hole Pressure [psi]',
                    save=True)
Esempio n. 12
0
def plot_production_rate():
    tmp_producer_names = producer_names
    for name in tmp_producer_names:
        producer = producers_df.loc[producers_df['Name'] == name]
        production_rate = producer['total rate']
        t = np.linspace(0, len(production_rate), len(production_rate))
        plt.plot(t, production_rate)
        plot_helper(FIG_DIR,
                    xlabel='Time [days]',
                    ylabel='Production Rate [bbls/day]',
                    title=name,
                    save=True)
Esempio n. 13
0
def plot_production_rate():
    tmp_producer_names = ['PA09', 'PA12']
    for name in tmp_producer_names:
        i = producer_names.index(name)
        print(i)
        producer = producers[i]
        starting_index = producer_starting_indicies[i]
        plt.plot(time[starting_index:], producer[starting_index:])
    plot_helper(FIG_DIR,
                xlabel='Date',
                ylabel='Production Rate',
                legend=tmp_producer_names,
                save=True)
Esempio n. 14
0
def producers_vs_injector():
    for i in range(len(injectors)):
        plt.figure()
        for producer in producers:
            plt.scatter(injectors[i], producer)
        plot_helper(
            FIG_DIR,
            title='Injector {}'.format(i + 1),
            xlabel='Injection Rate',
            ylabel='Production Rate',
            legend=producer_names,
            save=True
        )
Esempio n. 15
0
def production_rate_vs_different_time_constants():
    time = tau_at_zero_df['time']
    taus = tau_at_zero_df.iloc[:, 2:]
    plt.plot(time, taus, alpha=0.5, linewidth=3)
    plot_helper(
        FIG_DIR,
        title='CRMP: Constant Injection Rate Over Different Time Constants',
        xlabel='Time',
        ylabel='Production Rate',
        legend=[
            'Tau = 1e-06', 'Tau = 1', 'Tau = 10', 'Tau = 20', 'Tau = 50',
            'Tau = 100'
        ],
        save=True)
Esempio n. 16
0
def plot_fractional_flow_curve():
    for i in range(len(producer_names)):
        starting_index = producer_starting_indicies[i]
        total_prod = producers[i][starting_index:]
        water_prod = producers_water_production[i][starting_index:]
        t = time[starting_index:]
        water_fraction = water_prod / total_prod
        water_fraction.fillna(0, inplace=True)
        plt.plot(t, water_fraction)
        plot_helper(FIG_DIR,
                    title=producer_names[i],
                    xlabel='Time [days]',
                    ylabel='Water Fraction of Total Production [unitless]',
                    save=True)
Esempio n. 17
0
def plot_imputed_and_original_production_rate():
    for name in producer_names:
        producer = get_real_producer_data(producers_df, name)
        original_data = deepcopy(producer[name])
        l = len(producer)
        y = np.zeros(l)
        impute_training_data(producer, y, name)[0]
        t = np.linspace(1, l, l)
        plt.plot(t, original_data)
        plt.plot(t, producer[name])
        plot_helper(FIG_DIR,
                    title='{}: Imputed Production Data'.format(name),
                    xlabel='Time [days]',
                    ylabel='Producer Rate [bbls/day]',
                    save=True)
Esempio n. 18
0
def production_rate_with_predictions():
    fit_df = pd.read_csv(fit_file)
    predict_df = pd.read_csv(predict_file)
    for i in range(len(producers)):
        producer_number = i + 1
        plt.figure()
        fitting_producer = fit_df.loc[
            fit_df['Producer'] == producer_number
        ]
        predictions_producer = predict_df.loc[
            predict_df['Producer'] == producer_number
        ]
        producer = producers[i]
        predictions_step_size_2 = predictions_producer.loc[
            predictions_producer['Step size'] == 12
        ]
        models = ['CRMP', 'LinearRegression', 'BayesianRidge']
        # models = ['ICRMP', 'LinearRegression', 'BayesianRidge']
        for model in models:
            fitting = fitting_producer.loc[
                fitting_producer['Model'] == model
            ]
            predictions = predictions_step_size_2.loc[
                predictions_step_size_2['Model'] == model
            ]
            x = [0] * 29
            y = [0] * 29
            for index, row in predictions.iterrows():
                k = int(row['t_i'] - 121)
                x[k] = int(row['t_i'])
                y[k] = row['Prediction']
            x = fitting['t_i'].tolist() + x
            y = fitting['Fit'].tolist() + y
            plt.plot(x, y, linestyle='--', linewidth=2, alpha=0.6)
        plt.axvline(x=120, color='k')
        plt.plot(producer)
        legend = models
        legend.append('Predictions Start')
        legend.append('Data')
        plot_helper(
            FIG_DIR,
            title='Producer {}'.format(producer_number),
            xlabel='Time',
            ylabel='Production Rate Fitting and Predictions',
            legend=legend,
            save=True
        )
Esempio n. 19
0
def plot_injection_rates():
    for name in injector_names:
        injector = injectors_df.loc[injectors_df['Name'] == name]
        injection_rate = injector['Water Vol']
        l = len(injection_rate)
        count = (injector['Water Vol'] == 0).sum()
        print('Length: {}'.format(l))
        print('Count: {}'.format(count))
        print('Shut in Fraction: {}'.format(count * 1.0 / l))
        t = np.linspace(0, l, l)
        continue
        plt.plot(t, injection_rate)
        plot_helper(FIG_DIR,
                    xlabel='Time [days]',
                    ylabel='Injection Rate [bbls/day]',
                    title=name,
                    save=True)
Esempio n. 20
0
def parameter_convergence():
    for i in range(len(producers)):
        plt.figure(figsize=[7, 4.8])
        producer = i + 1
        producer_rows_df = producer_rows_from_df(
            fitting_sensitivity_analysis_df, producer)
        x, y = initial_and_final_params_from_df(producer_rows_df)
        x_true, y_true = true_params[producer]
        for j in range(len(x)):
            initial = plt.scatter(x[j][0],
                                  y[j][0],
                                  s=40,
                                  c='g',
                                  marker='o',
                                  label='Initial')
            final = plt.scatter(x[j][1],
                                y[j][1],
                                s=40,
                                c='r',
                                marker='x',
                                label='Final')
            plt.plot(x[j], y[j], c='k', alpha=0.15)
        actual = plt.scatter(x_true,
                             y_true,
                             s=200,
                             c='b',
                             marker='X',
                             label='True Value')
        # actual = plt.scatter(
        #     x_true, y_true, s=100, c='r', label='Actual', alpha=0.5
        # )
        title = 'CRMP: Producer {} Initial Parameter Values with Convergence'.format(
            producer)
        plt.legend(handles=[actual, initial, final],
                   bbox_to_anchor=(1.04, 1),
                   loc="upper left")
        plt.xlim(0, 1)
        plt.ylim(0, 100)
        plt.tight_layout()
        plot_helper(FIG_DIR,
                    title=title,
                    xlabel=xlabel,
                    ylabel=ylabel,
                    save=True)
Esempio n. 21
0
def plot_average_hour_production_rate():
    t = np.linspace(1, 1317, 1317)
    for name in producer_names:
        producer = producers_df.loc[producers_df['Name'] == name]
        production_rate = producer['Total Vol']
        on_line_hours = producer['On-Line']
        hourly_production_rate = production_rate / on_line_hours
        hourly_production_rate.fillna(0, inplace=True)
        hourly_production_rate.replace(np.inf, 0, inplace=True)
        l = len(hourly_production_rate)
        plt.plot(t[-l:], hourly_production_rate)
        y_max = 1.1 * max(hourly_production_rate)
        print(y_max)
        plt.ylim(0, y_max)
        plot_helper(FIG_DIR,
                    title=name,
                    xlabel='Time [days]',
                    ylabel='Hourly Production Rate [bbls/hour]',
                    save=True)
Esempio n. 22
0
def gradient_across_parameter_space_prediction_data():
    for i in range(number_of_producers):
        producer = i + 1
        producer_df = producer_rows_from_df(objective_function_df, producer)
        x, y, z = contour_params(producer_df,
                                 x_column='f1',
                                 y_column='tau',
                                 z_column='MSE')
        dz = np.gradient(z)[0]
        plt.contourf(x, y, dz, 15, alpha=1.0)
        plt.colorbar()
        title = 'CRMP: Producer {} ln(Gradient) Across Parameter Space for MSEs from Prediction'.format(
            producer)
        plt.tight_layout()
        plt.ylim(0, 100)
        plot_helper(FIG_DIR,
                    title=title,
                    xlabel=xlabel,
                    ylabel=ylabel,
                    save=True)
Esempio n. 23
0
def fitted_params_and_mean_squared_error_fitting():
    for i in range(len(producers)):
        producer = i + 1
        producer_rows_df = producer_rows_from_df(
            fitting_sensitivity_analysis_df, producer)
        x, y, z = contour_params(producer_rows_df,
                                 x_column='f1_initial',
                                 y_column='tau_initial',
                                 z_column='MSE')
        plt.contourf(x, y, z)
        plt.colorbar()
        x, y = true_params[producer]
        actual = plt.scatter(x, y, c='red', label='Actual')
        plt.legend(handles=[actual])
        title = 'CRMP Producer {}: Fitted Parameter Values with ln(MSE) from Fitting'.format(
            producer)
        plot_helper(FIG_DIR,
                    title=title,
                    xlabel=xlabel,
                    ylabel=ylabel,
                    save=True)
Esempio n. 24
0
def water_cut_vs_time():
    plt.figure()
    plt.plot(f_w)
    plot_helper(FIG_DIR, xlabel='Time', ylabel='Water Cut', save=True)
Esempio n. 25
0
y_hat_lstm = []
for j in range(30):
    y_hat_j = model.predict(X_test_scaled[j:(j + 1)])[0][0]
    X_test_scaled[j + 1] = y_hat_j
    y_hat_lstm.append(y_hat_j)

y_hat_lstm = np.array(y_hat_lstm).reshape(-1, 1)
y_hat_lstm = scaler.inverse_transform(y_hat_lstm)
r2, mse = fit_statistics(y_hat_lstm, y_test[:30])
print(mse)

crmp = CRMP().fit(X_train, y_train)
y_hat_crmp = crmp.predict(X_test[:30, 1:])
r2, mse = fit_statistics(y_hat_crmp, y_test[:30])
print(mse)

t = np.linspace(76, 105, 30)
plt.plot(t, y_test[:30], color='k', label='True Value', linewidth=2)
plt.plot(t, y_hat_crmp, alpha=0.5, label='CRMP', linewidth=2)
plt.plot(t, y_hat_lstm, alpha=0.5, label='LSTM Neural Network', linewidth=2)
plt.tight_layout()
plot_helper(
    FIG_DIR,
    title='{}: 30 Days Prediction for CRMP and LSTM Neural Network'.format(
        name),
    xlabel='Days',
    ylabel='Production Rate [bbls/day]',
    legend=True,
    save=True)
Esempio n. 26
0
def best_worse():
    train_sizes = [0.33, 0.735, 0.49, 0.45, 0.52, 0.66, 0.54]
    n_estimators = 100
    delta_t = 1
    models = [
        [CrmpBHP(), False],
        [HuberRegressor(alpha=0.5, epsilon=100, fit_intercept=False), True],
        [LinearRegression(fit_intercept=False, positive=True), False],
    ]
    labels = [
        'CRMP-BHP',
        'Huber Regression (Best)',
        'Linear Regression (Worst)',
    ]

    # for i in [0, 1, 2, 3, 4, 6]:
    for i in [1]:
        # Constructing dataset
        name = producer_names[i]
        print(name)
        producer = get_real_producer_data(producers_df, name, bhp=True)
        injectors = injectors_df[['Name', 'Date', 'Water Vol']]

        X, y = construct_real_production_rate_dataset(producer,
                                                      injectors,
                                                      delta_t=delta_t)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, train_size=train_sizes[i], shuffle=False)

        train_length = len(X_train)
        t_fit = np.linspace(0, train_length - 1, train_length)
        t_test = np.linspace(train_length, (train_length + 29), 30)

        plt.plot(t_test,
                 y_test[:30],
                 color='k',
                 label='True Value',
                 linewidth=2)

        X_train_scaled = X_train.copy(deep=True)
        X_train_scaled[name] = log_transformation(X_train[name])
        X_test_scaled = X_test.copy(deep=True)
        X_test_scaled[name] = log_transformation(X_test[name])
        y_train_scaled = log_transformation(y_train)
        y_test_scaled = log_transformation(y_test)

        X_train = X_train.to_numpy()
        X_test = X_test.to_numpy()
        y_train = y_train.to_numpy()
        y_test = y_test.to_numpy()

        X_train_scaled = X_train_scaled.to_numpy()
        X_test_scaled = X_test_scaled.to_numpy()
        y_train_scaled = y_train_scaled.to_numpy()
        y_test_scaled = y_test_scaled.to_numpy()

        for j in range(len(models)):
            model = models[j][0]
            log = models[j][1]
            print(labels[j])
            bgr = MBBaggingRegressor(base_estimator=model,
                                     n_estimators=n_estimators,
                                     block_size=7,
                                     bootstrap=True,
                                     n_jobs=-1,
                                     random_state=1)

            if log:
                bgr.fit(X_train_scaled, y_train_scaled)
            else:
                bgr.fit(X_train, y_train)

            if j == 0:
                y_hats = []
                for e in bgr.estimators_:
                    e.q0 = y_train[-1]
                    y_hat_i = e.predict(X_test[:30, 1:])
                    y_hats.append(y_hat_i)
                y_hats_by_time = np.asarray(y_hats).T
                averages = []
                for y_hats_i in y_hats_by_time:
                    average = np.average(y_hats_i)
                    averages.append(average)
                plt.plot(t_test,
                         averages,
                         label=labels[j],
                         alpha=0.5,
                         linewidth=2)
                continue

            y_hats = []
            for e in bgr.estimators_:
                if log:
                    y_hat_i = y_train_scaled[-1]
                else:
                    y_hat_i = y_train[-1]
                y_hat = []
                for k in range(30):
                    if log:
                        X_test_i = X_test_scaled[k, :]
                    else:
                        X_test_i = X_test[k, :]
                    X_test_i[0] = y_hat_i
                    X_test_i = X_test_i.reshape(1, -1)
                    y_hat_i = e.predict(X_test_i)
                    if log:
                        y_hat.append(np.exp(y_hat_i) - 1)
                    else:
                        y_hat.append(y_hat_i)
                y_hats.append(y_hat)
            y_hats_by_time = np.asarray(y_hats).T.reshape(-1, n_estimators)

            averages = []
            p50s = []
            for y_hats_i in y_hats_by_time:
                average = np.average(y_hats_i)
                p50 = np.percentile(y_hats_i, 50)
                averages.append(average)
                p50s.append(p50)

            # Plotting
            p50s = np.array(p50s).clip(min=0)
            averages = np.array(averages).clip(min=0)
            plt.plot(t_test, averages, label=labels[j], alpha=0.5, linewidth=2)

        plt.tight_layout()
        plot_helper(
            FIG_DIR,
            title=
            '{}: 30 Days Prediction for CRMP-BHP and the Best and Worst Performing ML Estimators'
            .format(name),
            xlabel='Days',
            ylabel='Production Rate [bbls/day]',
            legend=True,
            save=True)
        # plt.show()
        print()
Esempio n. 27
0
def train_bagging_regressor_with_crmp():
    train_sizes = [0.33, 0.735, 0.49, 0.45, 0.52, 0.66, 0.54]
    # for i in range(len(producer_names) - 1):
    n_estimators = 100
    delta_t = 1
    for i in [0, 1, 2, 3, 4, 6]:
        # Constructing dataset
        name = producer_names[i]
        print(name)
        producer = get_real_producer_data(producers_df, name, bhp=True)
        injectors = injectors_df[['Name', 'Date', 'Water Vol']]
        X, y = construct_real_production_rate_dataset(producer,
                                                      injectors,
                                                      delta_t=delta_t)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, train_size=train_sizes[i], shuffle=False)
        X_train = X_train.to_numpy()
        X_test = X_test.to_numpy()
        y_train = y_train.to_numpy()
        y_test = y_test.to_numpy()
        train_length = len(X_train)
        t_fit = np.linspace(0, train_length - 1, train_length)
        t_test = np.linspace(train_length, (train_length + 29), 30)

        # Setting up estimator
        bgr = MBBaggingRegressor(base_estimator=CrmpBHP(delta_t=delta_t),
                                 n_estimators=n_estimators,
                                 block_size=7,
                                 bootstrap=True,
                                 n_jobs=-1,
                                 random_state=0)
        bgr.fit(X_train, y_train)
        model = CrmpBHP().fit(X_train, y_train)
        y_fits = []
        for e in bgr.estimators_:
            y_hat_i = []
            for i in range(len(y_train)):
                e.q0 = X_train[i, 0]
                y_hat_i.append(e.predict(np.array([X_train[i, 1:]])))
            y_fits.append(y_hat_i)
        y_fits_by_time = np.asarray(y_fits).T.reshape(-1, n_estimators)
        y_fits_average = []
        for y_hats_i in y_fits_by_time:
            average = np.average(y_hats_i)
            y_fits_average.append(average)

        r2, mse = fit_statistics(y_fits_average, y_train)

        # Getting all bootstrapped predictions
        y_hats = []
        for e in bgr.estimators_:
            e.q0 = y_train[-1]
            y_hat_i = e.predict(X_test[:30, 1:])
            y_hats.append(y_hat_i)
        y_hats_by_time = np.asarray(y_hats).T
        p10s = []
        averages = []
        p90s = []
        for y_hats_i in y_hats_by_time:
            p10 = np.percentile(y_hats_i, 10)
            average = np.average(y_hats_i)
            p90 = np.percentile(y_hats_i, 90)
            p10s.append(p10)
            averages.append(average)
            p90s.append(p90)
        mse = fit_statistics(y_test[:30], averages)[1]

        max_train = np.amax(y_train[-100:])
        max_fit = np.amax(y_fits_average[-100:])
        max_realization = np.amax(y_hats)
        height = max(max_train, max_fit, max_realization)
        # Plotting
        plt.plot(t_fit[-100:], y_train[-100:], color='k')
        plt.plot(t_fit[-100:],
                 y_fits_average[-100:],
                 color='g',
                 label='Fitting')
        plt.plot(t_test, y_test[:30], color='k', label='True Value')
        plt.plot(t_test, averages, color='b', label='Average')
        plt.plot(t_test, p10s, color='r', alpha=0.5, label='P10 & P90')
        plt.plot(t_test, p90s, color='r', alpha=0.5)
        for hat in y_hats:
            plt.plot(t_test, hat, color='k', alpha=0.02)
        plt.annotate('r-squared = {:.4f}'.format(r2),
                     xy=(train_length - 60, height))
        plt.vlines(train_length - 1,
                   0,
                   height,
                   linewidth=2,
                   colors='k',
                   linestyles='dashed',
                   alpha=0.8)
        plot_helper(FIG_DIR,
                    title='{}: 30 Days Prediction'.format(name),
                    xlabel='Days',
                    ylabel='Production Rate [bbls/day]',
                    legend=True,
                    save=True)