def function1(df, kunag, matnr): df_series = individual_series(df, kunag, matnr) train, validation, test = splitter_2(df_series) seas_pres = ljung_box_test(df, matnr) if not seas_pres: return None seasonality_product = product_seasonal_comp_7_point(df, matnr) score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added( train, validation, test, seasonality_product) score2, output2, output2_val, order2, mse_val_2 = arima( train, validation, test) input_df1 = output1.set_index("dt_week") input_df2 = output2.set_index("dt_week") output1_val = output1_val.set_index("dt_week") output2_val = output2_val.set_index("dt_week") plt.figure(figsize=(16, 8)) test_norm = pd.concat([output2_val, input_df2.iloc[-16:]]) test_seas = pd.concat([output1_val, input_df1.iloc[-16:]]) plt.plot(test_seas["prediction"], marker=".", color='red', label='test_seasonality') plt.plot(test_norm["prediction"], marker=".", color='blue', label='test_normal') plt.plot(output1_val["prediction"], marker=".", color='brown', label='val_seasonality') plt.plot(output2_val["prediction"], marker=".", label='val_normal') plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual') plt.legend(('test_seasonality', 'test_normal', 'val_seasonality', 'val_normal', 'actual'), loc='upper left') plt.xlabel('time in weeks') plt.ylabel('quantities') if score2 < score1: plt.title('normal' + '\n' 'seasonality=' + str(order1) + ' test_mse_seasonality=' + str(round(score1, 3)) + ' val_mse_seasonality=' + str(round(mse_val_1, 3)) + ' normal=' + str(order2) + ' test_mse_normal=' + str(round(score2, 3)) + ' val_mse_normal=' + str(round(mse_val_2, 3))) else: plt.title('seasonality' + '\n' 'seasonality=' + str(order1) + ' test_mse_seasonality=' + str(round(score1, 3)) + ' val_mse_seasonality=' + str(round(mse_val_1, 3)) + ' normal=' + str(order2) + ' test_mse_normal=' + str(round(score2, 3)) + ' val_mse_normal=' + str(round(mse_val_2, 3))) # plt.text("05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n' # 'test_mse_seasonality = ' + str(score1) + '\n' # 'validation_mse_seasonality = ' + str(mse_val_1) + '\n' # 'normal = ' + str(order2) + '\n' # 'test_mse_normal = ' + str(score2) + '\n' # 'validation_mse_normal = ' + str(mse_val_2) + '\n') plt.savefig( "/home/aman/PycharmProjects/seasonality_hypothesis/stl_plots_seasonality_108_7_point_thresh_0.01/" + str(kunag) + "_" + str(matnr) + ".png")
def dtw_check(df, kunag, matnr, threshold=0.18): df_series = individual_series(df, kunag, matnr) # plt.figure(figsize=(16, 8)) # plt.plot(df_series.set_index("dt_week"), marker=".", markerfacecolor="red", label="Weekly Aggregated Data") # plt.xticks(fontsize=14) # plt.yticks(fontsize=14) # plt.xlabel("Date", fontsize=14) # plt.ylabel("Quantity", fontsize=14) # plt.title("Weekly Aggregated Data") # plt.legend() # plt.show() df_series = smoothing_7_new(df_series) df_series = df_series.set_index("dt_week") # plt.figure(figsize=(16, 8)) # plt.show() series_norm = (df_series - df_series.mean()) / df_series.std() # plt.plot(df_series, marker=".", markerfacecolor="red", label="Smoothened Weekly Aggregated Data") # plt.plot(series_norm, marker=".", markerfacecolor="red", label="Normalized Smoothened Weekly Aggregated Data") # plt.xlabel("Date") # plt.ylabel("Quantity") # plt.title("Normalized Smoothened Weekly Aggregated Data") # plt.legend() seasonality_product = product_seasonal_comp_7_point(df, matnr) seasonality_req_subset = seasonality_product.loc[df_series.index] seasonality_req_subset_norm = (seasonality_req_subset - seasonality_req_subset.mean())/seasonality_req_subset.std() # plt.figure(figsize=(16, 8)) # plt.plot(seasonality_req_subset_norm, marker=".", markerfacecolor="red", label="Normalized seasonal Data") # plt.plot(df_series, marker=".", markerfacecolor="red", label="Smoothened Weekly Aggregated Data") # plt.plot(series_norm, marker=".", markerfacecolor="red", label="Normalized Smoothened Weekly Aggregated Data") # plt.xticks(fontsize=14) # plt.yticks(fontsize=14) # plt.xlabel("Date", fontsize=14) # plt.ylabel("Quantity", fontsize=14) # plt.title("Normalized Product Weekly Aggregated Data") # plt.legend() # plt.show() l2_norm = lambda x, y: (x - y) ** 2 x = series_norm["quantity"] y = seasonality_req_subset_norm["quantity"] d, cost_matrix, acc_cost_matrix, path = dtw(x, y, dist=l2_norm, warp=1) if d <= threshold: return True, d else: return False, d
def function1(df, kunag, matnr): df_series = individual_series(df, kunag, matnr) train, validation, test = splitter_2(df_series) seas_pres = ljung_box_test(df, matnr) if not seas_pres: return None seasonality_product = product_seasonal_comp_5_point(df, matnr) score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added( train, validation, test, seasonality_product) score2, output2, output2_val, order2, mse_val_2 = arima( train, validation, test) input_df1 = output1.set_index("dt_week") input_df2 = output2.set_index("dt_week") output1_val = output1_val.set_index("dt_week") output2_val = output2_val.set_index("dt_week") plt.figure(figsize=(16, 8)) plt.plot(input_df1["prediction"], marker=".", color='red', label='arima_with_seasonality') plt.plot(input_df2["prediction"], marker=".", color='blue', label='arima') plt.plot(output1_val["prediction"], marker=".", color='brown', label='arima_seasonality_validation') plt.plot(output2_val["prediction"], marker=".", label='arima_validation') plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual') plt.xlabel('time in weeks') plt.ylabel('quantities') if score2 < score1: plt.title('normal') else: plt.title('seasonality') plt.text( "05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n' 'test_mse_seasonality = ' + str(score1) + '\n' 'validation_mse_seasonality = ' + str(mse_val_1) + '\n' 'normal = ' + str(order2) + '\n' 'test_mse_normal = ' + str(score2) + '\n' 'validation_mse_normal = ' + str(mse_val_2) + '\n') plt.savefig( "/home/aman/PycharmProjects/seasonality_hypothesis/plots_seasonality_108/" + str(kunag) + "_" + str(matnr) + ".png")
sample = frequency_cleaveland[(frequency_cleaveland["frequency"] >= 20) & (frequency_cleaveland["frequency"] < 26) & (frequency_cleaveland["days"] < 365 + 183) & (frequency_cleaveland["days"] > 365)] print("Number of combos: ", sample.shape[0]) sample = sample.sample(300, random_state=1) sample.to_csv(folder_address + "/sample.csv") sample = pd.read_csv(folder_address + "/sample.csv") report = pd.DataFrame() count = 0 for index, row in sample.iterrows(): start = time.time() kunag = int(row["kunag"]) matnr = int(row["matnr"]) df_series = individual_series(df, kunag, matnr) result_06 = moving_average(df_series, order=6) result_09 = moving_average(df_series, order=9) result_12 = moving_average(df_series, order=12) result_52 = moving_average(df_series, order=52) result_011 = arima_rolling_011(df_series) error_06 = pow( mean_squared_error(df_series["quantity"].iloc[-16:], result_06["prediction"].iloc[-16:]), 0.5) error_09 = pow( mean_squared_error(df_series["quantity"].iloc[-16:], result_09["prediction"].iloc[-16:]), 0.5) error_12 = pow( mean_squared_error(df_series["quantity"].iloc[-16:], result_12["prediction"].iloc[-16:]), 0.5) # error_52 = pow(mean_squared_error(df_series["quantity"].iloc[-16:],
from selection import load_data from selection import individual_series import pandas as pd import matplotlib.pyplot as plt df = load_data() series = individual_series(df).set_index("dt_week") series1 = series["2016-07-07":"2016-09-15"] series2 = series1.copy() series3 = series1.copy() series2["quantity"]["2016-08-25":"2016-09-15"] = 4 series3["quantity"]["2016-08-25":"2016-09-15"] = 5 plt.figure(figsize=(16, 8)) plt.plot(series1["quantity"], marker="o", label='series1') plt.plot(series2["quantity"], marker="o", label='series2') plt.plot(series3["quantity"], marker="o", label='series3') plt.show()
from selection import load_data from selection import remove_negative_rows import pandas as pd from preprocess import splitter_2 from hypothesis import arima from selection import individual_series def individual_series_2(input_df, kunag=500057582, matnr=103029): """ selects a dataframe corresponding to a particular kunag and matnr param: a pandas dataframe return: a pandas dataframe """ df_copy = input_df.copy() df_copy = remove_negative_rows(df_copy) df_copy = df_copy[df_copy["date"] >= 20160703] output_df = df_copy[(df_copy["kunag"] == kunag) & (df_copy["matnr"] == matnr)] output_df["dt_week"] = output_df["date"].apply( lambda x: pd.to_datetime(x, format="%Y%m%d")) output_df = output_df.sort_values("dt_week") output_df = output_df.set_index("dt_week") return output_df if __name__ == "__main__": print(individual_series_2(load_data())) df_series = individual_series(load_data(), 500057582, 103029) train, validation, test = splitter_2(df_series) print(arima(train, validation, test)[1])
df = load_data() sample = pd.read_csv( "/home/aman/PycharmProjects/seasonality_hypothesis/data_generated/bucket_1_sample.csv" ) sample["matnr"] = sample["matnr"].map(int) result = pd.DataFrame() count1 = 0 count2 = 0 error = 0 for index, row in sample.iterrows(): try: print("kunag: ", row["kunag"], " matnr: ", row["matnr"]) seas_pres = ljung_box_test(df, int(row["matnr"])) print("Seasonality :", seas_pres) df_series = individual_series(df, row["kunag"], row["matnr"]) train, validation, test = splitter_2(df_series) if not seas_pres: continue seasonality_product = product_seasonal_comp_7_point( df, int(row["matnr"])) score1 = arima(train, validation, test)[0] print("score1=", score1) score2 = arima_seasonality_added(train, validation, test, seasonality_product)[0] print("score2=", score2) result = result.append([[row["kunag"], row["matnr"], score1, score2]]) if score1 < score2: count1 += 1 elif score1 >= score2: count2 += 1