Exemple #1
0
def function1(df, kunag, matnr):
    df_series = individual_series(df, kunag, matnr)
    train, validation, test = splitter_2(df_series)
    seas_pres = ljung_box_test(df, matnr)
    if not seas_pres:
        return None
    seasonality_product = product_seasonal_comp_7_point(df, matnr)
    score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added(
        train, validation, test, seasonality_product)
    score2, output2, output2_val, order2, mse_val_2 = arima(
        train, validation, test)
    input_df1 = output1.set_index("dt_week")
    input_df2 = output2.set_index("dt_week")
    output1_val = output1_val.set_index("dt_week")
    output2_val = output2_val.set_index("dt_week")
    plt.figure(figsize=(16, 8))
    test_norm = pd.concat([output2_val, input_df2.iloc[-16:]])
    test_seas = pd.concat([output1_val, input_df1.iloc[-16:]])
    plt.plot(test_seas["prediction"],
             marker=".",
             color='red',
             label='test_seasonality')
    plt.plot(test_norm["prediction"],
             marker=".",
             color='blue',
             label='test_normal')
    plt.plot(output1_val["prediction"],
             marker=".",
             color='brown',
             label='val_seasonality')
    plt.plot(output2_val["prediction"], marker=".", label='val_normal')
    plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual')
    plt.legend(('test_seasonality', 'test_normal', 'val_seasonality',
                'val_normal', 'actual'),
               loc='upper left')
    plt.xlabel('time in weeks')
    plt.ylabel('quantities')
    if score2 < score1:
        plt.title('normal' + '\n'
                  'seasonality=' + str(order1) + ' test_mse_seasonality=' +
                  str(round(score1, 3)) + ' val_mse_seasonality=' +
                  str(round(mse_val_1, 3)) + ' normal=' + str(order2) +
                  ' test_mse_normal=' + str(round(score2, 3)) +
                  ' val_mse_normal=' + str(round(mse_val_2, 3)))
    else:
        plt.title('seasonality' + '\n'
                  'seasonality=' + str(order1) + ' test_mse_seasonality=' +
                  str(round(score1, 3)) + ' val_mse_seasonality=' +
                  str(round(mse_val_1, 3)) + ' normal=' + str(order2) +
                  ' test_mse_normal=' + str(round(score2, 3)) +
                  ' val_mse_normal=' + str(round(mse_val_2, 3)))
    # plt.text("05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n'
    #                              'test_mse_seasonality = ' + str(score1) + '\n'
    #                              'validation_mse_seasonality = ' + str(mse_val_1) + '\n'
    #                              'normal = ' + str(order2) + '\n'
    #                              'test_mse_normal = ' + str(score2) + '\n'
    #                              'validation_mse_normal = ' + str(mse_val_2) + '\n')
    plt.savefig(
        "/home/aman/PycharmProjects/seasonality_hypothesis/stl_plots_seasonality_108_7_point_thresh_0.01/"
        + str(kunag) + "_" + str(matnr) + ".png")
Exemple #2
0
def dtw_check(df, kunag, matnr, threshold=0.18):
    df_series = individual_series(df, kunag, matnr)
    # plt.figure(figsize=(16, 8))
    # plt.plot(df_series.set_index("dt_week"), marker=".", markerfacecolor="red", label="Weekly Aggregated Data")
    # plt.xticks(fontsize=14)
    # plt.yticks(fontsize=14)
    # plt.xlabel("Date", fontsize=14)
    # plt.ylabel("Quantity", fontsize=14)
    # plt.title("Weekly Aggregated Data")
    # plt.legend()
    # plt.show()
    df_series = smoothing_7_new(df_series)
    df_series = df_series.set_index("dt_week")
    # plt.figure(figsize=(16, 8))
    # plt.show()
    series_norm = (df_series - df_series.mean()) / df_series.std()
    # plt.plot(df_series, marker=".", markerfacecolor="red", label="Smoothened Weekly Aggregated Data")
    # plt.plot(series_norm, marker=".", markerfacecolor="red", label="Normalized Smoothened Weekly Aggregated Data")
    # plt.xlabel("Date")
    # plt.ylabel("Quantity")
    # plt.title("Normalized Smoothened Weekly Aggregated Data")
    # plt.legend()
    seasonality_product = product_seasonal_comp_7_point(df, matnr)
    seasonality_req_subset = seasonality_product.loc[df_series.index]
    seasonality_req_subset_norm = (seasonality_req_subset - seasonality_req_subset.mean())/seasonality_req_subset.std()
    # plt.figure(figsize=(16, 8))
    # plt.plot(seasonality_req_subset_norm, marker=".", markerfacecolor="red", label="Normalized seasonal Data")
    # plt.plot(df_series, marker=".", markerfacecolor="red", label="Smoothened Weekly Aggregated Data")
    # plt.plot(series_norm, marker=".", markerfacecolor="red", label="Normalized Smoothened Weekly Aggregated Data")
    # plt.xticks(fontsize=14)
    # plt.yticks(fontsize=14)
    # plt.xlabel("Date", fontsize=14)
    # plt.ylabel("Quantity", fontsize=14)
    # plt.title("Normalized Product Weekly Aggregated Data")
    # plt.legend()
    # plt.show()
    l2_norm = lambda x, y: (x - y) ** 2
    x = series_norm["quantity"]
    y = seasonality_req_subset_norm["quantity"]
    d, cost_matrix, acc_cost_matrix, path = dtw(x, y, dist=l2_norm, warp=1)
    if d <= threshold:
        return True, d
    else:
        return False, d
Exemple #3
0
 kunag = int(row["kunag"])
 matnr = int(row["matnr"])
 try:
     test1 = ljung_box_test(df, matnr)
     test1_flag = test1[0]
     test1_pvalue = test1[1]
     aggregated_product = test1[4]
     test2 = dtw_check(df, kunag, matnr)
     test2_flag = test2[0]
     test2_value = test2[1]
 except:
     test1 = False
     test2 = False
 df_series = individual_series(df, kunag, matnr)
 if test1_flag:
     seasonality_component = product_seasonal_comp_7_point(df, matnr)
     result_52_seasonal = moving_average_with_seasonality(df_series, seasonality_component, order=52)
     result_52_nonseasonal = moving_average(df_series, order=52)
     # result = arima_rolling_011(df_series)
     # result_seasonal = arima_seasonality_added_rolling_011(df_series, seasonality_component)
     error_result_nonseasonal = pow(mean_squared_error(df_series["quantity"].iloc[-16:],
                                           result_52_nonseasonal["prediction"].iloc[-16:]), 0.5)
     error_result_seasonal = pow(mean_squared_error(df_series["quantity"].iloc[-16:],
                                                    result_52_seasonal["prediction"].iloc[-16:]), 0.5)
     report = report.append([[kunag, matnr, test1_flag, test1_pvalue, test2_flag, test2_value,
                              error_result_nonseasonal, error_result_seasonal]])
     report.to_csv(file_address, index=False)
     count += 1
     print("count: ", count)
     df_series = df_series.set_index("dt_week")
     plt.figure(figsize=(16, 8))
    "/home/aman/PycharmProjects/seasonality_hypothesis/data_generated/bucket_12_20_400_sample.csv"
)
report = pd.DataFrame()
count = 0
for index, row in sample.iterrows():
    start = time.time()
    kunag = int(row["kunag"])
    matnr = int(row["matnr"])
    seas_pres = ljung_box_test(df, int(row["matnr"]))
    if not seas_pres[0]:
        count += 1
        print(count)
        continue
    else:
        dtw_flag = dtw_check(df, kunag, matnr)
        seasonality_product = product_seasonal_comp_7_point(df,
                                                            matnr).iloc[-55:-3]
        df_series = individual_series(df, kunag, matnr)
        result_1 = arima_seasonality_added_rolling_011(df_series,
                                                       seasonality_product)
        break
        result_1 = result_1.set_index("dt_week")
        result_2 = arima_rolling_011(df_series)
        result_2 = result_2.set_index("dt_week")
        plt.figure(figsize=(16, 8))
        plt.plot(result_1["prediction"].iloc[-16:],
                 marker=".",
                 markerfacecolor="red",
                 label="prediction_seasonal")
        plt.plot(result_2["prediction"].iloc[-16:],
                 marker=".",
                 markerfacecolor="red",
Exemple #5
0
)
sample["matnr"] = sample["matnr"].map(int)
result = pd.DataFrame()
count1 = 0
count2 = 0
error = 0
for index, row in sample.iterrows():
    try:
        print("kunag: ", row["kunag"], " matnr: ", row["matnr"])
        seas_pres = ljung_box_test(df, int(row["matnr"]))
        print("Seasonality :", seas_pres)
        df_series = individual_series(df, row["kunag"], row["matnr"])
        train, validation, test = splitter_2(df_series)
        if not seas_pres:
            continue
        seasonality_product = product_seasonal_comp_7_point(
            df, int(row["matnr"]))
        score1 = arima(train, validation, test)[0]
        print("score1=", score1)
        score2 = arima_seasonality_added(train, validation, test,
                                         seasonality_product)[0]
        print("score2=", score2)
        result = result.append([[row["kunag"], row["matnr"], score1, score2]])
        if score1 < score2:
            count1 += 1
        elif score1 >= score2:
            count2 += 1
        print("count1 :", count1, "count2 :", count2, "error :", error)
    except:
        pass
result.columns = ["kunag", "matnr", "score1", "score2"]
result.to_csv(