Example #1
0
def function1(df, kunag, matnr):
    df_series = individual_series(df, kunag, matnr)
    train, validation, test = splitter_2(df_series)
    seas_pres = ljung_box_test(df, matnr)
    if not seas_pres:
        return None
    seasonality_product = product_seasonal_comp_7_point(df, matnr)
    score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added(
        train, validation, test, seasonality_product)
    score2, output2, output2_val, order2, mse_val_2 = arima(
        train, validation, test)
    input_df1 = output1.set_index("dt_week")
    input_df2 = output2.set_index("dt_week")
    output1_val = output1_val.set_index("dt_week")
    output2_val = output2_val.set_index("dt_week")
    plt.figure(figsize=(16, 8))
    test_norm = pd.concat([output2_val, input_df2.iloc[-16:]])
    test_seas = pd.concat([output1_val, input_df1.iloc[-16:]])
    plt.plot(test_seas["prediction"],
             marker=".",
             color='red',
             label='test_seasonality')
    plt.plot(test_norm["prediction"],
             marker=".",
             color='blue',
             label='test_normal')
    plt.plot(output1_val["prediction"],
             marker=".",
             color='brown',
             label='val_seasonality')
    plt.plot(output2_val["prediction"], marker=".", label='val_normal')
    plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual')
    plt.legend(('test_seasonality', 'test_normal', 'val_seasonality',
                'val_normal', 'actual'),
               loc='upper left')
    plt.xlabel('time in weeks')
    plt.ylabel('quantities')
    if score2 < score1:
        plt.title('normal' + '\n'
                  'seasonality=' + str(order1) + ' test_mse_seasonality=' +
                  str(round(score1, 3)) + ' val_mse_seasonality=' +
                  str(round(mse_val_1, 3)) + ' normal=' + str(order2) +
                  ' test_mse_normal=' + str(round(score2, 3)) +
                  ' val_mse_normal=' + str(round(mse_val_2, 3)))
    else:
        plt.title('seasonality' + '\n'
                  'seasonality=' + str(order1) + ' test_mse_seasonality=' +
                  str(round(score1, 3)) + ' val_mse_seasonality=' +
                  str(round(mse_val_1, 3)) + ' normal=' + str(order2) +
                  ' test_mse_normal=' + str(round(score2, 3)) +
                  ' val_mse_normal=' + str(round(mse_val_2, 3)))
    # plt.text("05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n'
    #                              'test_mse_seasonality = ' + str(score1) + '\n'
    #                              'validation_mse_seasonality = ' + str(mse_val_1) + '\n'
    #                              'normal = ' + str(order2) + '\n'
    #                              'test_mse_normal = ' + str(score2) + '\n'
    #                              'validation_mse_normal = ' + str(mse_val_2) + '\n')
    plt.savefig(
        "/home/aman/PycharmProjects/seasonality_hypothesis/stl_plots_seasonality_108_7_point_thresh_0.01/"
        + str(kunag) + "_" + str(matnr) + ".png")
Example #2
0
def function1(df, kunag, matnr):
    df_series = individual_series(df, kunag, matnr)
    train, validation, test = splitter_2(df_series)
    seas_pres = ljung_box_test(df, matnr)
    if not seas_pres:
        return None
    seasonality_product = product_seasonal_comp_5_point(df, matnr)
    score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added(
        train, validation, test, seasonality_product)
    score2, output2, output2_val, order2, mse_val_2 = arima(
        train, validation, test)
    input_df1 = output1.set_index("dt_week")
    input_df2 = output2.set_index("dt_week")
    output1_val = output1_val.set_index("dt_week")
    output2_val = output2_val.set_index("dt_week")
    plt.figure(figsize=(16, 8))
    plt.plot(input_df1["prediction"],
             marker=".",
             color='red',
             label='arima_with_seasonality')
    plt.plot(input_df2["prediction"], marker=".", color='blue', label='arima')
    plt.plot(output1_val["prediction"],
             marker=".",
             color='brown',
             label='arima_seasonality_validation')
    plt.plot(output2_val["prediction"], marker=".", label='arima_validation')
    plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual')
    plt.xlabel('time in weeks')
    plt.ylabel('quantities')
    if score2 < score1:
        plt.title('normal')
    else:
        plt.title('seasonality')
    plt.text(
        "05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n'
        'test_mse_seasonality = ' + str(score1) + '\n'
        'validation_mse_seasonality = ' + str(mse_val_1) + '\n'
        'normal = ' + str(order2) + '\n'
        'test_mse_normal = ' + str(score2) + '\n'
        'validation_mse_normal = ' + str(mse_val_2) + '\n')
    plt.savefig(
        "/home/aman/PycharmProjects/seasonality_hypothesis/plots_seasonality_108/"
        + str(kunag) + "_" + str(matnr) + ".png")
Example #3
0
 matnr = int(row["matnr"])
 # kunag = 500068486
 # matnr = 134926
 df_series = individual_series(df, kunag, matnr)
 seasonality_product = product_seasonal_comp_7_point(df, matnr)
 result = arima_seasonality_added_rolling(df_series,
                                          seasonality_product)
 result = result.set_index("dt_week")
 new_error = mean_squared_error(result.iloc[-16:]["quantity"],
                                result.iloc[-16:]["prediction"])
 #print(result.iloc[-16:])
 plt.figure(figsize=(16, 8))
 plt.plot(result["quantity"], marker=".")
 plt.plot(result["prediction"], marker=".")
 train, validation, test = splitter_2(df_series)
 score2, output2, output2_val, order2, mse_val_2 = arima(
     train, validation, test)
 input_df2 = output2.set_index("dt_week")
 output2_val = output2_val.set_index("dt_week")
 test_norm = pd.concat([output2_val, input_df2.iloc[-16:]])
 old_error = mean_squared_error(input_df2.iloc[-16:]["quantity"],
                                input_df2.iloc[-16:]["prediction"])
 #print(input_df2.iloc[-16:])
 plt.plot(test_norm["prediction"],
          marker=".",
          color='blue',
          label='test_normal')
 plt.plot(output2_val["prediction"], marker=".", label='val_normal')
 plt.title("new_error = " + str(new_error) + "   old_error = " +
           str(old_error))
 plt.savefig(
     "/home/aman/PycharmProjects/seasonality_hypothesis/latest_plots/"
Example #4
0
from selection import remove_negative_rows
import pandas as pd
from preprocess import splitter_2
from hypothesis import arima
from selection import individual_series


def individual_series_2(input_df, kunag=500057582, matnr=103029):
    """
    selects a dataframe corresponding to a particular kunag and matnr
    param: a pandas dataframe
    return: a pandas dataframe
    """
    df_copy = input_df.copy()
    df_copy = remove_negative_rows(df_copy)
    df_copy = df_copy[df_copy["date"] >= 20160703]
    output_df = df_copy[(df_copy["kunag"] == kunag)
                        & (df_copy["matnr"] == matnr)]
    output_df["dt_week"] = output_df["date"].apply(
        lambda x: pd.to_datetime(x, format="%Y%m%d"))
    output_df = output_df.sort_values("dt_week")
    output_df = output_df.set_index("dt_week")
    return output_df


if __name__ == "__main__":
    print(individual_series_2(load_data()))
    df_series = individual_series(load_data(), 500057582, 103029)
    train, validation, test = splitter_2(df_series)
    print(arima(train, validation, test)[1])
Example #5
0
result = pd.DataFrame()
count1 = 0
count2 = 0
error = 0
for index, row in sample.iterrows():
    try:
        print("kunag: ", row["kunag"], " matnr: ", row["matnr"])
        seas_pres = ljung_box_test(df, int(row["matnr"]))
        print("Seasonality :", seas_pres)
        df_series = individual_series(df, row["kunag"], row["matnr"])
        train, validation, test = splitter_2(df_series)
        if not seas_pres:
            continue
        seasonality_product = product_seasonal_comp_7_point(
            df, int(row["matnr"]))
        score1 = arima(train, validation, test)[0]
        print("score1=", score1)
        score2 = arima_seasonality_added(train, validation, test,
                                         seasonality_product)[0]
        print("score2=", score2)
        result = result.append([[row["kunag"], row["matnr"], score1, score2]])
        if score1 < score2:
            count1 += 1
        elif score1 >= score2:
            count2 += 1
        print("count1 :", count1, "count2 :", count2, "error :", error)
    except:
        pass
result.columns = ["kunag", "matnr", "score1", "score2"]
result.to_csv(
    "/home/aman/PycharmProjects/seasonality_hypothesis/data_generated/bucket_1_sample_results_7_point_seasonality_18th_jan_thresgh_0.01_stl_c005.csv"