def function1(df, kunag, matnr): df_series = individual_series(df, kunag, matnr) train, validation, test = splitter_2(df_series) seas_pres = ljung_box_test(df, matnr) if not seas_pres: return None seasonality_product = product_seasonal_comp_7_point(df, matnr) score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added( train, validation, test, seasonality_product) score2, output2, output2_val, order2, mse_val_2 = arima( train, validation, test) input_df1 = output1.set_index("dt_week") input_df2 = output2.set_index("dt_week") output1_val = output1_val.set_index("dt_week") output2_val = output2_val.set_index("dt_week") plt.figure(figsize=(16, 8)) test_norm = pd.concat([output2_val, input_df2.iloc[-16:]]) test_seas = pd.concat([output1_val, input_df1.iloc[-16:]]) plt.plot(test_seas["prediction"], marker=".", color='red', label='test_seasonality') plt.plot(test_norm["prediction"], marker=".", color='blue', label='test_normal') plt.plot(output1_val["prediction"], marker=".", color='brown', label='val_seasonality') plt.plot(output2_val["prediction"], marker=".", label='val_normal') plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual') plt.legend(('test_seasonality', 'test_normal', 'val_seasonality', 'val_normal', 'actual'), loc='upper left') plt.xlabel('time in weeks') plt.ylabel('quantities') if score2 < score1: plt.title('normal' + '\n' 'seasonality=' + str(order1) + ' test_mse_seasonality=' + str(round(score1, 3)) + ' val_mse_seasonality=' + str(round(mse_val_1, 3)) + ' normal=' + str(order2) + ' test_mse_normal=' + str(round(score2, 3)) + ' val_mse_normal=' + str(round(mse_val_2, 3))) else: plt.title('seasonality' + '\n' 'seasonality=' + str(order1) + ' test_mse_seasonality=' + str(round(score1, 3)) + ' val_mse_seasonality=' + str(round(mse_val_1, 3)) + ' normal=' + str(order2) + ' test_mse_normal=' + str(round(score2, 3)) + ' val_mse_normal=' + str(round(mse_val_2, 3))) # plt.text("05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n' # 'test_mse_seasonality = ' + str(score1) + '\n' # 'validation_mse_seasonality = ' + str(mse_val_1) + '\n' # 'normal = ' + str(order2) + '\n' # 'test_mse_normal = ' + str(score2) + '\n' # 'validation_mse_normal = ' + str(mse_val_2) + '\n') plt.savefig( "/home/aman/PycharmProjects/seasonality_hypothesis/stl_plots_seasonality_108_7_point_thresh_0.01/" + str(kunag) + "_" + str(matnr) + ".png")
def function1(df, kunag, matnr): df_series = individual_series(df, kunag, matnr) train, validation, test = splitter_2(df_series) seas_pres = ljung_box_test(df, matnr) if not seas_pres: return None seasonality_product = product_seasonal_comp_5_point(df, matnr) score1, output1, output1_val, order1, mse_val_1 = arima_seasonality_added( train, validation, test, seasonality_product) score2, output2, output2_val, order2, mse_val_2 = arima( train, validation, test) input_df1 = output1.set_index("dt_week") input_df2 = output2.set_index("dt_week") output1_val = output1_val.set_index("dt_week") output2_val = output2_val.set_index("dt_week") plt.figure(figsize=(16, 8)) plt.plot(input_df1["prediction"], marker=".", color='red', label='arima_with_seasonality') plt.plot(input_df2["prediction"], marker=".", color='blue', label='arima') plt.plot(output1_val["prediction"], marker=".", color='brown', label='arima_seasonality_validation') plt.plot(output2_val["prediction"], marker=".", label='arima_validation') plt.plot(input_df1["quantity"], marker=".", color='orange', label='actual') plt.xlabel('time in weeks') plt.ylabel('quantities') if score2 < score1: plt.title('normal') else: plt.title('seasonality') plt.text( "05-04-2018", 0.1, 'seasonality = ' + str(order1) + '\n' 'test_mse_seasonality = ' + str(score1) + '\n' 'validation_mse_seasonality = ' + str(mse_val_1) + '\n' 'normal = ' + str(order2) + '\n' 'test_mse_normal = ' + str(score2) + '\n' 'validation_mse_normal = ' + str(mse_val_2) + '\n') plt.savefig( "/home/aman/PycharmProjects/seasonality_hypothesis/plots_seasonality_108/" + str(kunag) + "_" + str(matnr) + ".png")
matnr = int(row["matnr"]) # kunag = 500068486 # matnr = 134926 df_series = individual_series(df, kunag, matnr) seasonality_product = product_seasonal_comp_7_point(df, matnr) result = arima_seasonality_added_rolling(df_series, seasonality_product) result = result.set_index("dt_week") new_error = mean_squared_error(result.iloc[-16:]["quantity"], result.iloc[-16:]["prediction"]) #print(result.iloc[-16:]) plt.figure(figsize=(16, 8)) plt.plot(result["quantity"], marker=".") plt.plot(result["prediction"], marker=".") train, validation, test = splitter_2(df_series) score2, output2, output2_val, order2, mse_val_2 = arima( train, validation, test) input_df2 = output2.set_index("dt_week") output2_val = output2_val.set_index("dt_week") test_norm = pd.concat([output2_val, input_df2.iloc[-16:]]) old_error = mean_squared_error(input_df2.iloc[-16:]["quantity"], input_df2.iloc[-16:]["prediction"]) #print(input_df2.iloc[-16:]) plt.plot(test_norm["prediction"], marker=".", color='blue', label='test_normal') plt.plot(output2_val["prediction"], marker=".", label='val_normal') plt.title("new_error = " + str(new_error) + " old_error = " + str(old_error)) plt.savefig( "/home/aman/PycharmProjects/seasonality_hypothesis/latest_plots/"
from selection import remove_negative_rows import pandas as pd from preprocess import splitter_2 from hypothesis import arima from selection import individual_series def individual_series_2(input_df, kunag=500057582, matnr=103029): """ selects a dataframe corresponding to a particular kunag and matnr param: a pandas dataframe return: a pandas dataframe """ df_copy = input_df.copy() df_copy = remove_negative_rows(df_copy) df_copy = df_copy[df_copy["date"] >= 20160703] output_df = df_copy[(df_copy["kunag"] == kunag) & (df_copy["matnr"] == matnr)] output_df["dt_week"] = output_df["date"].apply( lambda x: pd.to_datetime(x, format="%Y%m%d")) output_df = output_df.sort_values("dt_week") output_df = output_df.set_index("dt_week") return output_df if __name__ == "__main__": print(individual_series_2(load_data())) df_series = individual_series(load_data(), 500057582, 103029) train, validation, test = splitter_2(df_series) print(arima(train, validation, test)[1])
result = pd.DataFrame() count1 = 0 count2 = 0 error = 0 for index, row in sample.iterrows(): try: print("kunag: ", row["kunag"], " matnr: ", row["matnr"]) seas_pres = ljung_box_test(df, int(row["matnr"])) print("Seasonality :", seas_pres) df_series = individual_series(df, row["kunag"], row["matnr"]) train, validation, test = splitter_2(df_series) if not seas_pres: continue seasonality_product = product_seasonal_comp_7_point( df, int(row["matnr"])) score1 = arima(train, validation, test)[0] print("score1=", score1) score2 = arima_seasonality_added(train, validation, test, seasonality_product)[0] print("score2=", score2) result = result.append([[row["kunag"], row["matnr"], score1, score2]]) if score1 < score2: count1 += 1 elif score1 >= score2: count2 += 1 print("count1 :", count1, "count2 :", count2, "error :", error) except: pass result.columns = ["kunag", "matnr", "score1", "score2"] result.to_csv( "/home/aman/PycharmProjects/seasonality_hypothesis/data_generated/bucket_1_sample_results_7_point_seasonality_18th_jan_thresgh_0.01_stl_c005.csv"