Exemple #1
0
def function1(df, kunag, matnr):
    df_series = individual_series(df, kunag, matnr)
    train, validation, test = splitter_2(df_series)
    seas_pres = ljung_box_test(df, matnr)
    if not seas_pres:
        return None
    seasonality_product = product_seasonal_comp_7_point(df, matnr)
    score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added(
        train, validation, test, seasonality_product)
    score2, output2, output2_val, order2, mse_val_2 = arima(
        train, validation, test)
    input_df1 = output1.set_index("dt_week")
    input_df2 = output2.set_index("dt_week")
    output1_val = output1_val.set_index("dt_week")
    output2_val = output2_val.set_index("dt_week")
    plt.figure(figsize=(16, 8))
    test_norm = pd.concat([output2_val, input_df2.iloc[-16:]])
    test_seas = pd.concat([output1_val, input_df1.iloc[-16:]])
    plt.plot(test_seas["prediction"],
             marker=".",
             color='red',
             label='test_seasonality')
    plt.plot(test_norm["prediction"],
             marker=".",
             color='blue',
             label='test_normal')
    plt.plot(output1_val["prediction"],
             marker=".",
             color='brown',
             label='val_seasonality')
    plt.plot(output2_val["prediction"], marker=".", label='val_normal')
    plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual')
    plt.legend(('test_seasonality', 'test_normal', 'val_seasonality',
                'val_normal', 'actual'),
               loc='upper left')
    plt.xlabel('time in weeks')
    plt.ylabel('quantities')
    if score2 < score1:
        plt.title('normal' + '\n'
                  'seasonality=' + str(order1) + ' test_mse_seasonality=' +
                  str(round(score1, 3)) + ' val_mse_seasonality=' +
                  str(round(mse_val_1, 3)) + ' normal=' + str(order2) +
                  ' test_mse_normal=' + str(round(score2, 3)) +
                  ' val_mse_normal=' + str(round(mse_val_2, 3)))
    else:
        plt.title('seasonality' + '\n'
                  'seasonality=' + str(order1) + ' test_mse_seasonality=' +
                  str(round(score1, 3)) + ' val_mse_seasonality=' +
                  str(round(mse_val_1, 3)) + ' normal=' + str(order2) +
                  ' test_mse_normal=' + str(round(score2, 3)) +
                  ' val_mse_normal=' + str(round(mse_val_2, 3)))
    # plt.text("05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n'
    #                              'test_mse_seasonality = ' + str(score1) + '\n'
    #                              'validation_mse_seasonality = ' + str(mse_val_1) + '\n'
    #                              'normal = ' + str(order2) + '\n'
    #                              'test_mse_normal = ' + str(score2) + '\n'
    #                              'validation_mse_normal = ' + str(mse_val_2) + '\n')
    plt.savefig(
        "/home/aman/PycharmProjects/seasonality_hypothesis/stl_plots_seasonality_108_7_point_thresh_0.01/"
        + str(kunag) + "_" + str(matnr) + ".png")
Exemple #2
0
def dtw_check(df, kunag, matnr, threshold=0.18):
    df_series = individual_series(df, kunag, matnr)
    # plt.figure(figsize=(16, 8))
    # plt.plot(df_series.set_index("dt_week"), marker=".", markerfacecolor="red", label="Weekly Aggregated Data")
    # plt.xticks(fontsize=14)
    # plt.yticks(fontsize=14)
    # plt.xlabel("Date", fontsize=14)
    # plt.ylabel("Quantity", fontsize=14)
    # plt.title("Weekly Aggregated Data")
    # plt.legend()
    # plt.show()
    df_series = smoothing_7_new(df_series)
    df_series = df_series.set_index("dt_week")
    # plt.figure(figsize=(16, 8))
    # plt.show()
    series_norm = (df_series - df_series.mean()) / df_series.std()
    # plt.plot(df_series, marker=".", markerfacecolor="red", label="Smoothened Weekly Aggregated Data")
    # plt.plot(series_norm, marker=".", markerfacecolor="red", label="Normalized Smoothened Weekly Aggregated Data")
    # plt.xlabel("Date")
    # plt.ylabel("Quantity")
    # plt.title("Normalized Smoothened Weekly Aggregated Data")
    # plt.legend()
    seasonality_product = product_seasonal_comp_7_point(df, matnr)
    seasonality_req_subset = seasonality_product.loc[df_series.index]
    seasonality_req_subset_norm = (seasonality_req_subset - seasonality_req_subset.mean())/seasonality_req_subset.std()
    # plt.figure(figsize=(16, 8))
    # plt.plot(seasonality_req_subset_norm, marker=".", markerfacecolor="red", label="Normalized seasonal Data")
    # plt.plot(df_series, marker=".", markerfacecolor="red", label="Smoothened Weekly Aggregated Data")
    # plt.plot(series_norm, marker=".", markerfacecolor="red", label="Normalized Smoothened Weekly Aggregated Data")
    # plt.xticks(fontsize=14)
    # plt.yticks(fontsize=14)
    # plt.xlabel("Date", fontsize=14)
    # plt.ylabel("Quantity", fontsize=14)
    # plt.title("Normalized Product Weekly Aggregated Data")
    # plt.legend()
    # plt.show()
    l2_norm = lambda x, y: (x - y) ** 2
    x = series_norm["quantity"]
    y = seasonality_req_subset_norm["quantity"]
    d, cost_matrix, acc_cost_matrix, path = dtw(x, y, dist=l2_norm, warp=1)
    if d <= threshold:
        return True, d
    else:
        return False, d
Exemple #3
0
def function1(df, kunag, matnr):
    df_series = individual_series(df, kunag, matnr)
    train, validation, test = splitter_2(df_series)
    seas_pres = ljung_box_test(df, matnr)
    if not seas_pres:
        return None
    seasonality_product = product_seasonal_comp_5_point(df, matnr)
    score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added(
        train, validation, test, seasonality_product)
    score2, output2, output2_val, order2, mse_val_2 = arima(
        train, validation, test)
    input_df1 = output1.set_index("dt_week")
    input_df2 = output2.set_index("dt_week")
    output1_val = output1_val.set_index("dt_week")
    output2_val = output2_val.set_index("dt_week")
    plt.figure(figsize=(16, 8))
    plt.plot(input_df1["prediction"],
             marker=".",
             color='red',
             label='arima_with_seasonality')
    plt.plot(input_df2["prediction"], marker=".", color='blue', label='arima')
    plt.plot(output1_val["prediction"],
             marker=".",
             color='brown',
             label='arima_seasonality_validation')
    plt.plot(output2_val["prediction"], marker=".", label='arima_validation')
    plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual')
    plt.xlabel('time in weeks')
    plt.ylabel('quantities')
    if score2 < score1:
        plt.title('normal')
    else:
        plt.title('seasonality')
    plt.text(
        "05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n'
        'test_mse_seasonality = ' + str(score1) + '\n'
        'validation_mse_seasonality = ' + str(mse_val_1) + '\n'
        'normal = ' + str(order2) + '\n'
        'test_mse_normal = ' + str(score2) + '\n'
        'validation_mse_normal = ' + str(mse_val_2) + '\n')
    plt.savefig(
        "/home/aman/PycharmProjects/seasonality_hypothesis/plots_seasonality_108/"
        + str(kunag) + "_" + str(matnr) + ".png")
Exemple #4
0
sample = frequency_cleaveland[(frequency_cleaveland["frequency"] >= 20)
                              & (frequency_cleaveland["frequency"] < 26)
                              & (frequency_cleaveland["days"] < 365 + 183) &
                              (frequency_cleaveland["days"] > 365)]
print("Number of combos: ", sample.shape[0])
sample = sample.sample(300, random_state=1)
sample.to_csv(folder_address + "/sample.csv")
sample = pd.read_csv(folder_address + "/sample.csv")
report = pd.DataFrame()
count = 0

for index, row in sample.iterrows():
    start = time.time()
    kunag = int(row["kunag"])
    matnr = int(row["matnr"])
    df_series = individual_series(df, kunag, matnr)
    result_06 = moving_average(df_series, order=6)
    result_09 = moving_average(df_series, order=9)
    result_12 = moving_average(df_series, order=12)
    result_52 = moving_average(df_series, order=52)
    result_011 = arima_rolling_011(df_series)
    error_06 = pow(
        mean_squared_error(df_series["quantity"].iloc[-16:],
                           result_06["prediction"].iloc[-16:]), 0.5)
    error_09 = pow(
        mean_squared_error(df_series["quantity"].iloc[-16:],
                           result_09["prediction"].iloc[-16:]), 0.5)
    error_12 = pow(
        mean_squared_error(df_series["quantity"].iloc[-16:],
                           result_12["prediction"].iloc[-16:]), 0.5)
    # error_52 = pow(mean_squared_error(df_series["quantity"].iloc[-16:],
Exemple #5
0
from selection import load_data
from selection import individual_series
import pandas as pd
import matplotlib.pyplot as plt

df = load_data()
series = individual_series(df).set_index("dt_week")
series1 = series["2016-07-07":"2016-09-15"]
series2 = series1.copy()
series3 = series1.copy()
series2["quantity"]["2016-08-25":"2016-09-15"] = 4
series3["quantity"]["2016-08-25":"2016-09-15"] = 5
plt.figure(figsize=(16, 8))
plt.plot(series1["quantity"], marker="o", label='series1')
plt.plot(series2["quantity"], marker="o", label='series2')
plt.plot(series3["quantity"], marker="o", label='series3')
plt.show()
Exemple #6
0
from selection import load_data
from selection import remove_negative_rows
import pandas as pd
from preprocess import splitter_2
from hypothesis import arima
from selection import individual_series


def individual_series_2(input_df, kunag=500057582, matnr=103029):
    """
    selects a dataframe corresponding to a particular kunag and matnr
    param: a pandas dataframe
    return: a pandas dataframe
    """
    df_copy = input_df.copy()
    df_copy = remove_negative_rows(df_copy)
    df_copy = df_copy[df_copy["date"] >= 20160703]
    output_df = df_copy[(df_copy["kunag"] == kunag)
                        & (df_copy["matnr"] == matnr)]
    output_df["dt_week"] = output_df["date"].apply(
        lambda x: pd.to_datetime(x, format="%Y%m%d"))
    output_df = output_df.sort_values("dt_week")
    output_df = output_df.set_index("dt_week")
    return output_df


if __name__ == "__main__":
    print(individual_series_2(load_data()))
    df_series = individual_series(load_data(), 500057582, 103029)
    train, validation, test = splitter_2(df_series)
    print(arima(train, validation, test)[1])
Exemple #7
0
df = load_data()
sample = pd.read_csv(
    "/home/aman/PycharmProjects/seasonality_hypothesis/data_generated/bucket_1_sample.csv"
)
sample["matnr"] = sample["matnr"].map(int)
result = pd.DataFrame()
count1 = 0
count2 = 0
error = 0
for index, row in sample.iterrows():
    try:
        print("kunag: ", row["kunag"], " matnr: ", row["matnr"])
        seas_pres = ljung_box_test(df, int(row["matnr"]))
        print("Seasonality :", seas_pres)
        df_series = individual_series(df, row["kunag"], row["matnr"])
        train, validation, test = splitter_2(df_series)
        if not seas_pres:
            continue
        seasonality_product = product_seasonal_comp_7_point(
            df, int(row["matnr"]))
        score1 = arima(train, validation, test)[0]
        print("score1=", score1)
        score2 = arima_seasonality_added(train, validation, test,
                                         seasonality_product)[0]
        print("score2=", score2)
        result = result.append([[row["kunag"], row["matnr"], score1, score2]])
        if score1 < score2:
            count1 += 1
        elif score1 >= score2:
            count2 += 1