Esempio n. 1
0
def function1(df, kunag, matnr):
    df_series = individual_series(df, kunag, matnr)
    train, validation, test = splitter_2(df_series)
    seas_pres = ljung_box_test(df, matnr)
    if not seas_pres:
        return None
    seasonality_product = product_seasonal_comp_7_point(df, matnr)
    score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added(
        train, validation, test, seasonality_product)
    score2, output2, output2_val, order2, mse_val_2 = arima(
        train, validation, test)
    input_df1 = output1.set_index("dt_week")
    input_df2 = output2.set_index("dt_week")
    output1_val = output1_val.set_index("dt_week")
    output2_val = output2_val.set_index("dt_week")
    plt.figure(figsize=(16, 8))
    test_norm = pd.concat([output2_val, input_df2.iloc[-16:]])
    test_seas = pd.concat([output1_val, input_df1.iloc[-16:]])
    plt.plot(test_seas["prediction"],
             marker=".",
             color='red',
             label='test_seasonality')
    plt.plot(test_norm["prediction"],
             marker=".",
             color='blue',
             label='test_normal')
    plt.plot(output1_val["prediction"],
             marker=".",
             color='brown',
             label='val_seasonality')
    plt.plot(output2_val["prediction"], marker=".", label='val_normal')
    plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual')
    plt.legend(('test_seasonality', 'test_normal', 'val_seasonality',
                'val_normal', 'actual'),
               loc='upper left')
    plt.xlabel('time in weeks')
    plt.ylabel('quantities')
    if score2 < score1:
        plt.title('normal' + '\n'
                  'seasonality=' + str(order1) + ' test_mse_seasonality=' +
                  str(round(score1, 3)) + ' val_mse_seasonality=' +
                  str(round(mse_val_1, 3)) + ' normal=' + str(order2) +
                  ' test_mse_normal=' + str(round(score2, 3)) +
                  ' val_mse_normal=' + str(round(mse_val_2, 3)))
    else:
        plt.title('seasonality' + '\n'
                  'seasonality=' + str(order1) + ' test_mse_seasonality=' +
                  str(round(score1, 3)) + ' val_mse_seasonality=' +
                  str(round(mse_val_1, 3)) + ' normal=' + str(order2) +
                  ' test_mse_normal=' + str(round(score2, 3)) +
                  ' val_mse_normal=' + str(round(mse_val_2, 3)))
    # plt.text("05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n'
    #                              'test_mse_seasonality = ' + str(score1) + '\n'
    #                              'validation_mse_seasonality = ' + str(mse_val_1) + '\n'
    #                              'normal = ' + str(order2) + '\n'
    #                              'test_mse_normal = ' + str(score2) + '\n'
    #                              'validation_mse_normal = ' + str(mse_val_2) + '\n')
    plt.savefig(
        "/home/aman/PycharmProjects/seasonality_hypothesis/stl_plots_seasonality_108_7_point_thresh_0.01/"
        + str(kunag) + "_" + str(matnr) + ".png")
Esempio n. 2
0
def function1(df, kunag, matnr):
    df_series = individual_series(df, kunag, matnr)
    train, validation, test = splitter_2(df_series)
    seas_pres = ljung_box_test(df, matnr)
    if not seas_pres:
        return None
    seasonality_product = product_seasonal_comp_5_point(df, matnr)
    score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added(
        train, validation, test, seasonality_product)
    score2, output2, output2_val, order2, mse_val_2 = arima(
        train, validation, test)
    input_df1 = output1.set_index("dt_week")
    input_df2 = output2.set_index("dt_week")
    output1_val = output1_val.set_index("dt_week")
    output2_val = output2_val.set_index("dt_week")
    plt.figure(figsize=(16, 8))
    plt.plot(input_df1["prediction"],
             marker=".",
             color='red',
             label='arima_with_seasonality')
    plt.plot(input_df2["prediction"], marker=".", color='blue', label='arima')
    plt.plot(output1_val["prediction"],
             marker=".",
             color='brown',
             label='arima_seasonality_validation')
    plt.plot(output2_val["prediction"], marker=".", label='arima_validation')
    plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual')
    plt.xlabel('time in weeks')
    plt.ylabel('quantities')
    if score2 < score1:
        plt.title('normal')
    else:
        plt.title('seasonality')
    plt.text(
        "05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n'
        'test_mse_seasonality = ' + str(score1) + '\n'
        'validation_mse_seasonality = ' + str(mse_val_1) + '\n'
        'normal = ' + str(order2) + '\n'
        'test_mse_normal = ' + str(score2) + '\n'
        'validation_mse_normal = ' + str(mse_val_2) + '\n')
    plt.savefig(
        "/home/aman/PycharmProjects/seasonality_hypothesis/plots_seasonality_108/"
        + str(kunag) + "_" + str(matnr) + ".png")
Esempio n. 3
0
# bucket_1_sample = frequency_cleaveland[(frequency_cleaveland["frequency"] > 26) & (frequency_cleaveland["days"] > 730)].sample(400, random_state=1)
# bucket_1_sample.to_csv(
#     "/home/aman/PycharmProjects/seasonality_hypothesis/data_generated/bucket_1_sample.csv", index=False)
sample = frequency_cleaveland[(frequency_cleaveland["frequency"] >= 26) & (frequency_cleaveland["days"] > 92) &
                              (frequency_cleaveland["days"] <= 365 + 183)]
sample = sample["matnr"].unique()
# sample.to_csv(folder_address+"/sample.csv")
# sample = pd.read_csv(folder_address+"/sample.csv")
report = pd.DataFrame()
count = 0
# start = time.time()
for matnr in sample:
        if count<321:
                count +=2
                continue
        test1 = ljung_box_test(df, matnr)
        test_flag = test1[0]
        test_p_value = test1[1]
        aggregated_data = test1[4]
        plt.figure(figsize=(16, 8))
        plt.plot(aggregated_data.set_index("dt_week")["quantity"], marker=".", markerfacecolor="red", label="aggregated_data")
        plt.xlabel("Date", fontsize=14)
        plt.ylabel("Quantity", fontsize=14)
        plt.legend()
        plt.savefig(folder_address + "/" + str(test_flag) + "_" + str(test_p_value) + str(matnr) + ".png")
        count += 1
        print("count;", count)
        report = report.append([[matnr, test_flag, test_p_value]])
        report.to_csv(file_address, index=False)
        count += 1
        pass
from seasonality_detection import ljung_box_test_without_aggregation
from dtw_check import dtw_check
from hypothesis_2 import arima_seasonality_added_rolling
from hypothesis_2 import arima_rolling
from selection import individual_series
from stl_decompose import product_seasonal_comp_7_point
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt


df = load_data()
result = pd.DataFrame()
sample = pd.read_csv("/home/aman/PycharmProjects/seasonality_hypothesis/data_generated/bucket_1_sample.csv")
count = 1
for matnr in sample["matnr"].unique():
    seas_pres_1 = ljung_box_test(df, matnr)
    # seas_pres_2 = ljung_box_test_without_aggregation(df, matnr)
    plt.figure(figsize=(16, 8))
    plt.plot(seas_pres_1[3], marker=".", label="aggregated")
    plt.legend()
    plt.title("ljung p_value: " + str(seas_pres_1[1]) + "  dickey p_value: " + str(seas_pres_1[2]))
    plt.savefig("/home/aman/PycharmProjects/seasonality_hypothesis/seas_detection_compare_2018_02_08_ver_1/"+str(matnr)+".png")
    result = result.append([[matnr, seas_pres_1[1][0], seas_pres_1[2]]])
    print(count)
    count += 1
result.columns = ["matnr", "ljung_p__value", "dickey_p__value"]
result.to_csv("/home/aman/PycharmProjects/seasonality_hypothesis/seas_detection_compare_2018_02_08_ver_1/result.csv", index=False)
#         if not seas_pres:
#             continue
#         else:
#             dtw_flag = dtw_check(df, kunag, matnr)
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
import time

df = load_data()
sample = pd.read_csv(
    "/home/aman/PycharmProjects/seasonality_hypothesis/data_generated/bucket_12_20_400_sample.csv"
)
report = pd.DataFrame()
count = 0
for index, row in sample.iterrows():
    start = time.time()
    kunag = int(row["kunag"])
    matnr = int(row["matnr"])
    seas_pres = ljung_box_test(df, int(row["matnr"]))
    if not seas_pres[0]:
        count += 1
        print(count)
        continue
    else:
        dtw_flag = dtw_check(df, kunag, matnr)
        seasonality_product = product_seasonal_comp_7_point(df,
                                                            matnr).iloc[-55:-3]
        df_series = individual_series(df, kunag, matnr)
        result_1 = arima_seasonality_added_rolling_011(df_series,
                                                       seasonality_product)
        break
        result_1 = result_1.set_index("dt_week")
        result_2 = arima_rolling_011(df_series)
        result_2 = result_2.set_index("dt_week")