def model_ARIMA(key,train, test_shape, order, train_flag = 0, test = []): predictions = [] rmse_val = [] if(train_flag): test=test[0] try: train = train.values except: train = train history = [np.asscalar(x) for x in train] if train_flag==1: itr=5 data=pd.DataFrame(history) for i in range(3): pred_temp = [] v_train=[np.asscalar(x) for x in data[:-itr].values] v_expected=data.tail(itr).head(3).reset_index(drop = True) try: order=(order[0],1,order[2]) for j in range(3): model = ARIMA(v_train, order = order) model_fit = model.fit(disp=0) yhat = model_fit.forecast()[0] if yhat < 0: yhat= mu.weighted_moving_average(history,1,3) yhat=yhat[0] pred_temp.append(yhat) v_train.append(yhat) except: pred_temp.extend(mu.moving_average(v_train, 3 - len(pred_temp), 3)) mu.plotting(key, pred_temp, v_expected) rmse_val.append(mu.calculate_rmse(key, v_expected, pred_temp)) if i == 2: predictions.extend(pred_temp) else: predictions.append(pred_temp[0]) itr=itr-1 else: try: #TODO:check order order=(order[0],1,order[2]) for t in range(test_shape): model = ARIMA(history, order = order) model_fit = model.fit(disp=0) yhat = model_fit.forecast()[0] if yhat < 0: yhat= mu.weighted_moving_average(history,1,3) yhat=yhat[0] predictions.append(yhat) history.append(yhat) except: predictions.extend(mu.moving_average(history, test_shape - len(predictions), 3)) predictions = [0 if pd.isnull(i) else int(i) for i in predictions] return predictions,rmse_val
def model_LinearRegression(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0): predictions = [] rmse_val = [] if train_flag == 1: itr = 5 for i in range(3): expected = pd.DataFrame(dataset) expected = expected.tail(itr).head(3).reset_index(drop=True) train = dataset[:-itr] diff_values = ut.difference(dataset, order[1]) scaler = ut.scaler_selection('lr') diff_values = scaler.fit_transform( pd.DataFrame(diff_values).values.reshape(-1, 1)) supervised = ut.timeseries_to_supervised(diff_values, order[0]) data = supervised.values clf = LinearRegression() param = { "fit_intercept": [True, False], "normalize": [False], "copy_X": [True, False] } grid = GridSearchCV(clf, param, n_jobs=1) model = mu.fit_model(data, grid) for j in range(tsize): X, y = data[:, 0:-1], data[:, -1] yhat = mu.forecast_model(model, X) # inverted = list() # for i in range(len(yhat)): # value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i) # inverted.append(value) # inverted = np.array(inverted) forecast = yhat[-1] if forecast < 0: forecast = mu.weighted_moving_average(dataset, 1, 3)[0] predictions.append(forecast) train = np.append(train, forecast) diff_train = ut.difference(train, order[1]) diff_train = scaler.fit_transform( pd.DataFrame(diff_train).values.reshape(-1, 1)) supervised = ut.timeseries_to_supervised(train, order[0]) data = supervised.values predictions = predictions[1:4] rmse_val.append(mu.calculate_rmse('GR_LR', expected, predictions)) itr = itr - 1 predictions = [int(i) for i in predictions] return predictions, rmse_val
def model_DecisionTree(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0): predictions = [] for i in range(tsize): diff_values = ut.difference(dataset, 1) supervised = ut.timeseries_to_supervised(diff_values, 1) data = supervised.values if train_flag == 1: train = data[0:-tsize] else: train = data X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1] dtr = DecisionTreeRegressor() param_tree = { "max_depth": [3, None], "min_samples_leaf": sp_randint(1, 11), "criterion": ["mse"], "splitter": ["best", "random"], "max_features": ["auto", "sqrt", None] } gridDT = RandomizedSearchCV(dtr, param_tree, n_jobs=1, n_iter=100) gridDT.fit(X, y) clf = DecisionTreeRegressor( criterion=gridDT.best_params_["criterion"], splitter=gridDT.best_params_["splitter"], max_features=gridDT.best_params_["max_features"], max_depth=gridDT.best_params_["max_depth"], min_samples_leaf=gridDT.best_params_["min_samples_leaf"]) clf.fit(X, y) yhat = mu.forecast_model(clf, X) inverted = list() for i in range(len(yhat)): value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i) inverted.append(value) inverted = np.array(inverted) forecast = inverted[-1] if forecast < 0: forecast = mu.weighted_moving_average(dataset, 1, 3)[0] predictions.append(forecast) dataset = np.append(dataset, forecast) predictions = [int(i) for i in predictions] return predictions
def model_ElasticNet(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0): predictions = [] for i in range(tsize): diff_values = ut.difference(dataset, 1) supervised = ut.timeseries_to_supervised(diff_values, 1) data = supervised.values if train_flag == 1: train = data[0:-tsize] else: train = data X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1] elas = ElasticNet() param = { "alpha": list(np.linspace(0.000000001, 100, 100000)), "l1_ratio": list(np.linspace(0.000001, 100, 1000)), "fit_intercept": [True, False], "normalize": [True, False], "precompute": [True, False] } random_elas = RandomizedSearchCV(elas, param, n_jobs=1, n_iter=100) random_elas.fit(X, y) clf = ElasticNet( alpha=random_elas.best_params_["alpha"], l1_ratio=random_elas.best_params_["l1_ratio"], fit_intercept=random_elas.best_params_["fit_intercept"], normalize=random_elas.best_params_["normalize"], precompute=random_elas.best_params_["precompute"]) clf.fit(X, y) yhat = mu.forecast_model(clf, X) inverted = list() for i in range(len(yhat)): value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i) inverted.append(value) inverted = np.array(inverted) forecast = inverted[-1] if forecast < 0: forecast = mu.weighted_moving_average(dataset, 1, 3)[0] predictions.append(forecast) dataset = np.append(dataset, forecast) predictions = [int(i) for i in predictions] return predictions
def model_SVR_Poly(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0): predictions = [] for i in range(tsize): diff_values = ut.difference(dataset, 1) supervised = ut.timeseries_to_supervised(diff_values, 1) data = supervised.values if train_flag == 1: train = data[0:-tsize] else: train = data X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1] mod = SVR() g = list(np.linspace(0.0001, 1, 1000)) C = list(np.linspace(0.01, 10, 25)) param = { "kernel": ["poly"], "degree": range(10, 30, 1), "gamma": g, "C": C } random_search = RandomizedSearchCV(mod, param, n_jobs=1, n_iter=100) random_search.fit(X, y) clf = SVR(kernel=random_search.best_params_["kernel"], degree=random_search.best_params_["degree"], gamma=random_search.best_params_["gamma"], C=random_search.best_params_["C"]) clf.fit(X, y) yhat = mu.forecast_model(clf, X) inverted = list() for i in range(len(yhat)): value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i) inverted.append(value) inverted = np.array(inverted) forecast = inverted[-1] if forecast < 0: forecast = mu.weighted_moving_average(dataset, 1, 3)[0] predictions.append(forecast) dataset = np.append(dataset, forecast) predictions = [int(i) for i in predictions] return predictions
def model_RandomForest(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0): predictions = [] for i in range(tsize): diff_values = ut.difference(dataset, 1) supervised = ut.timeseries_to_supervised(diff_values, 1) data = supervised.values if train_flag == 1: train = data[0:-tsize] else: train = data X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1] rfr = RandomForestRegressor() param_forest = { "n_estimators": range(10, 1000, 100), "criterion": ["mse"], "bootstrap": [True, False], "warm_start": [True, False] } gridRF = RandomizedSearchCV(rfr, param_forest, n_jobs=1, n_iter=100) gridRF.fit(X, y) yhat = mu.forecast_model(clf, X) inverted = list() for i in range(len(yhat)): value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i) inverted.append(value) inverted = np.array(inverted) forecast = inverted[-1] if forecast < 0: forecast = mu.weighted_moving_average(dataset, 1, 3)[0] predictions.append(forecast) dataset = np.append(dataset, forecast) predictions = [int(i) for i in predictions] return predictions
def model_ARMA(key,train, test_shape, order, train_flag = 0, test = []): predictions = [] rmse_val=[] if(train_flag): test=test[0] try: train = train.values except: train = train history = [np.asscalar(x) for x in train] if train_flag==1: itr=5 data=pd.DataFrame(history) for i in range(3): pred_temp = [] v_train=[np.asscalar(x) for x in data[:-itr].values] v_expected=data.tail(itr).head(3).reset_index(drop = True) try: for j in range(3): model = ARMA(v_train, order = order) model_fit = model.fit(disp=0, transparams=False, trend='nc') yhat = model_fit.forecast()[0] pred = yhat if pred < 0: pred = mu.weighted_moving_average(v_train, 1, 3) pred = pred[0] pred_temp.append(pred) v_train.append(pred) except: pred_temp.extend(mu.moving_average(v_train, 3 - len(pred_temp), 3)) mu.plotting(key, pred_temp, v_expected) if i == 2: predictions.extend(pred_temp) else: predictions.append(pred_temp[0]) rmse_val.append(mu.calculate_rmse(key, v_expected, pred_temp)) itr=itr-1 else: try: for t in range(test_shape): model = ARMA(history, order = order) model_fit = model.fit(disp=0, transparams=False, trend='nc') yhat = model_fit.forecast()[0] inverted = list() for i in range(len(yhat)): value = ut.inverse_difference(history, yhat[i], len(history) - i) inverted.append(value) inverted = np.array(inverted) pred = inverted[-1] if pred < 0: pred = mu.weighted_moving_average(history, 1, 3) predictions.append(pred) history.append(yhat) except: predictions.extend(mu.moving_average(history, test_shape - len(predictions), 3)) predictions = [int(i) for i in predictions] return predictions,rmse_val
def model_ES(key, train, test_shape = 0, train_flag = 0, test = []): predictions = [] rmse_val=[] try: train = train.values except: train = train history = [np.asscalar(x) for x in train] # TRAIN if train_flag==1: itr=5 data=pd.DataFrame(history) for i in range(3): pred_temp = [] v_train=[np.asscalar(x) for x in data[:-itr].values] v_expected=data.tail(itr).head(3).reset_index(drop = True) try: for t in range(3): if key=='SES': model = SimpleExpSmoothing(history) elif key=='HWES': model = ExponentialSmoothing(history) model_fit = model.fit() yhat= model_fit.predict(len(history), len(history)) if yhat < 0: yhat= mu.weighted_moving_average(history,1,3) yhat=yhat[0] pred_temp.append(yhat) v_train.append(yhat) except: pred_temp.extend(mu.moving_average(v_train, 3 - len(pred_temp), 3)) mu.plotting(key, pred_temp, v_expected) rmse_val.append(mu.calculate_rmse(key, v_expected, pred_temp)) if i == 2: predictions.extend(pred_temp) else: predictions.append(pred_temp[0]) itr=itr-1 # FORECAST else: try: for t in range(test_shape): if key=='SES': model = SimpleExpSmoothing(history) elif key=='HWES': model = ExponentialSmoothing(history) model_fit = model.fit() yhat= model_fit.predict(len(history), len(history)) if yhat < 0: yhat= mu.weighted_moving_average(history,1,3) yhat=yhat[0] predictions.append(yhat) history.append(yhat) except: predictions.extend(mu.moving_average(history, test_shape - len(predictions), 3)) predictions = [int(i) for i in predictions] return predictions,rmse_val
def model_ML(dataset=[], tsize=0, test_shape=0, model=np.nan, key='', order=(0, 0, 0), train_flag=0): predictions = [] pred_temp = [] rmse_val = [] parameter_values = {} scale_flag = 0 if key == 'lr' or key == 'lasso' or key == 'ridge' or key == 'knn' or key == 'svmr': scale_flag = 1 if train_flag == 1: itr = 5 for i in range(3): expected = pd.DataFrame(dataset) expected = expected.tail(itr).head(3) expected = expected.reset_index(drop=True) train = dataset[:-itr] diff_values = ut.difference(train, order[1]) if scale_flag == 1: scaler = ut.scaler_selection(key) diff_values = scaler.fit_transform( pd.DataFrame(diff_values).values.reshape(-1, 1)) supervised = ut.timeseries_to_supervised(train, order[0]) data = supervised.values RF_model = mu.fit_model(data, model) pred_temp = [] for j in range(test_shape): X, y = data[:, 0:-1], data[:, -1] yhat = mu.forecast_model(RF_model, X) #TODO: Inverse differencing and scaling # if scale_flag==1: # yhat=scaler.inverse_transform(pd.DataFrame(yhat).values.reshape(-1,1)) # if order[1]!=0: # inverted = list() # for i in range(len(yhat)): # value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i) # inverted.append(value) # inverted = np.array(inverted) # forecast=inverted[-1] # else: # forecast = yhat[-1] forecast = yhat[-1] if forecast < 0: forecast = mu.weighted_moving_average(dataset, 1, 3)[0] pred_temp.append(forecast) train = np.append(train, forecast) diff_train = ut.difference(train, order[1]) if scale_flag == 1: scaler = ut.scaler_selection(key) diff_train = scaler.fit_transform( pd.DataFrame(diff_train).values.reshape(-1, 1)) supervised = ut.timeseries_to_supervised(train, order[0]) data = supervised.values pred_temp = pred_temp[1:4] mu.plotting(key, pred_temp, expected) if i == 2: predictions.extend(pred_temp) else: predictions.append(pred_temp[0]) rmse_val.append(mu.calculate_rmse(key, expected, pred_temp)) itr = itr - 1 else: dataset_1 = copy.deepcopy(dataset) diff_values = ut.difference(dataset_1, order[1]) if scale_flag == 1: scaler = ut.scaler_selection(key) diff_values = scaler.fit_transform( pd.DataFrame(diff_values).values.reshape(-1, 1)) supervised = ut.timeseries_to_supervised(diff_values, order[0]) data = supervised.values RF_model = mu.fit_model(data, model) try: parameter_values = model.best_params_ except: parameter_values = model.get_params() test_shape = test_shape + 2 for i in range(test_shape): X, y = data[:, 0:-1], data[:, -1] yhat = mu.forecast_model(RF_model, X) # # if scale_flag==1: # yhat=scaler.inverse_transform(pd.DataFrame(yhat).values.reshape(-1,1)) # if order[1]!=0: # inverted = list() # for i in range(len(yhat)): # value = ut.inverse_difference(data, yhat[i], len(data) - i) # inverted.append(value) # inverted = np.array(inverted) # forecast=inverted[-1] # else: # forecast = yhat[-1] forecast = yhat[-1] if forecast < 0: forecast = mu.weighted_moving_average(data, 1, 3)[0] predictions.append(forecast) dataset_1 = np.append(dataset_1, forecast) diff_values = ut.difference(dataset_1, order[1]) if scale_flag == 1: scaler = ut.scaler_selection(key) diff_values = scaler.fit_transform( pd.DataFrame(diff_values).values.reshape(-1, 1)) supervised = ut.timeseries_to_supervised(diff_values, order[0]) data = supervised.values predictions = predictions[2:test_shape] predictions = [int(i) for i in predictions] return predictions, rmse_val, parameter_values
def model_SVR_RBF(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0): predictions = [] for i in range(tsize): diff_values = ut.difference(dataset, 1) supervised = ut.timeseries_to_supervised(diff_values, 1) data = supervised.values if train_flag == 1: train = data[0:-tsize] else: train = data X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1] mod = SVR() g = [ pow(2, -15), pow(2, -14), pow(2, -13), pow(2, -12), pow(2, -11), pow(2, -10), pow(2, -9), pow(2, -8), pow(2, -7), pow(2, -6), pow(2, -5), pow(2, -4), pow(2, -3), pow(2, -2), pow(2, -1), pow(1, 0), pow(2, 1), pow(2, 2), pow(2, 3) ] C = [ pow(2, -5), pow(2, -4), pow(2, -3), pow(2, -2), pow(2, -1), pow(1, 0), pow(2, 1), pow(2, 2), pow(2, 3), pow(2, 4), pow(2, 5), pow(2, 6), pow(2, 7), pow(2, 8), pow(2, 9), pow(2, 10), pow(2, 11), pow(2, 12), pow(2, 13), pow(2, 14), pow(2, 15) ] param = {'gamma': g, 'kernel': ['rbf'], 'C': C} grid_search = RandomizedSearchCV(mod, param, n_jobs=1, n_iter=100) grid_search.fit(X, y) clf = SVR(gamma=grid_search.best_params_["gamma"], kernel=grid_search.best_params_["kernel"], C=grid_search.best_params_["C"]) clf.fit(X, y) yhat = mu.forecast_model(clf, X) inverted = list() for i in range(len(yhat)): value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i) inverted.append(value) inverted = np.array(inverted) forecast = inverted[-1] if forecast < 0: forecast = mu.weighted_moving_average(dataset, 1, 3)[0] predictions.append(forecast) dataset = np.append(dataset, forecast) predictions = [int(i) for i in predictions] return predictions
def training(details_data, datasets, forecast_period): facc_out = dict() rsq = dict() price = details_data['price'] price = [float(i) for i in price] price = pd.Series(price).fillna(0).tolist() sku_list = details_data['sku'] market = details_data['market'] plant = details_data['plant'] spn = details_data['spn'] abc_data = details_data['abc_data'] #Profiling data_prof = profiling.profiling_tech(datasets) #Clustering based on nature data_cluster = Cluster.clustering(data_prof) total_price = np.sum(price) sku_price = dict() for i, sku in enumerate(sku_list): sku_price[str(sku)] = price[i] #Market Based Clustering # plant_cluster=Cluster.clustering_plant(sku_list,plant,market) #XYZ based on unit cost xyz_data = xyz.xyz_class(sku_price, total_price) #ABC based on volume abc_alter = abc.abc_class(datasets) # trained_outputs = [] forecast_results = [] num = 0 for incr, sku in enumerate(datasets): num += 1 # if sku!='1702460700': ## if num!=1: # continue prof = data_prof.iloc[incr] print("------------------------------------------------------------") print("Running SKU %d: %s..." % (num, sku)) print("cluster : ", data_cluster[sku]) raw_data = copy.deepcopy(datasets[sku].T) output = ut.init_output(forecast_period, raw_data, prof) output['unit_cost'] = float(price[incr]) output['market'] = str(market[incr]) output['plant'] = str(plant[incr]) if pd.isnull(abc_data[incr]) == True: output['Variability_Segment'] = abc_alter[sku] else: output['Variability_Segment'] = abc_data[incr] output['Velocity_segment'] = xyz_data[sku] output['spn'] = spn[incr] dataset = raw_data.copy() dataset = dataset[:-1] # dataset = pp.dateformat(dataset) # dataset, interval = pp.impute_missing_dates(dataset) # print(interval.days) if ((dataset['sales'] == 0).all() == True or (set([math.isnan(x) for x in dataset['sales']]) == {True})): # print(dataset['sales']) print("All zeros/NaNs") forecast = [0] * forecast_period output['forecast_values'] = ut.assign_dates( forecast, 'forecast', dataset.tail(1)) output['facc'], output['mape'], output[ 'bias'] = ft.calculate_forecast_accuracy( raw_data.iloc[-1], forecast[0]) facc_out[sku] = np.mean( ft.calculate_validation_facc(forecast, forecast)) forecast_results = ft.output_forecast(sku, dataset, datasets[sku].T, output, forecast_results) continue sku_data = dataset.astype(np.float32) sku_data = pp.read_from_first_sales(sku_data['sales']) #size--->outlier bucket size #sparse_size ---> number of zeros to categorize as sparse data #freq ---> seasonality interval = 30 size = 6 sparse_size = 10 freq = 12 # size,sparse_size,freq=pp.get_bucket_size(interval) test_nan = pd.DataFrame(sku_data[-freq:]) test_nan = test_nan['sales'] #if last 1 year is NaN, impute data with zero and forecast is MA(6) if sum(test_nan.isnull()) >= freq: print("Last 1 year NaN") sku_data = pp.data_imputation_zero(test_nan) sku_data = sku_data[:-5] expected = [0] * 5 forecast = mu.moving_average(sku_data, forecast_period, 6) output['forecast_values'] = ut.assign_dates( forecast, 'forecast', dataset.tail(1)) output['facc'], output['mape'], output[ 'bias'] = ft.calculate_forecast_accuracy( raw_data.iloc[-1], forecast[0]) facc_out[sku] = np.mean( ft.calculate_validation_facc(expected, forecast)) forecast_results = ft.output_forecast(sku, dataset, sku_data, output, forecast_results) continue #if # NaNs more than 60% impute with 0 else impute with values if sum(pd.isnull(sku_data)) > (0.6 * len(sku_data)): print("Nan Greater than 60%") sku_data = pp.data_imputation_zero(sku_data) else: print("Nan less than 60%") sku_data = pp.data_imputation(sku_data, freq) sku_data = sku_data[0] sku_data = pp.read_from_first_sales(sku_data) #After reading from first non-zero if data is insufficient ---> weighted MA(3) if len(sku_data) < 20: try: print("Weighted Moving Average") forecast = mu.weighted_moving_average(sku_data, forecast_period, 3) output['forecast_values'] = ut.assign_dates( forecast, 'forecast', dataset.tail(1)) output['facc'], output['mape'], output[ 'bias'] = ft.calculate_forecast_accuracy( raw_data.iloc[-1], forecast[0]) facc_out[sku] = ft.calculate_forecast_accuracy( raw_data.iloc[-1], forecast[0]) forecast_results = ft.output_forecast(sku, dataset, sku_data, output, forecast_results) except: print("Less than 3") print(sku_data) forecast = mu.moving_average(sku_data, forecast_period, len(sku_data)) output['forecast_values'] = ut.assign_dates( forecast, 'forecast', dataset.tail(1)) output['facc'], output['mape'], output[ 'bias'] = ft.calculate_forecast_accuracy( raw_data.iloc[-1], forecast[0]) facc_out[sku] = ft.calculate_forecast_accuracy( raw_data.iloc[-1], forecast[0]) forecast_results = ft.output_forecast(sku, dataset, sku_data, output, forecast_results) continue data_copy = sku_data.copy() data_copy = np.array(data_copy) # plt.figure() # plt.plot(data_copy) index1, index2, sflag1, sflag2 = pp.Sesonal_detection(sku_data) sku_data = pp.outlier_treatment_tech(sku_data, interval, size) sku_data = np.array(sku_data[0]) if sflag1 == 1: sku_data[index1] = data_copy[index1] if sflag2 == 1: sku_data[index2] = data_copy[index2] else: sku_data = sku_data # plt.plot(sku_data) # plt.show() # continue sku_data = pd.DataFrame(sku_data) #Testing Stationarity d = 0 df_test_result = tests.dickeyfullertest( sku_data.T.squeeze()) #pd.Series(sku_data[0]) while df_test_result == 0: d += 1 if d == 1: new_data = ut.difference(sku_data[0].tolist()) else: new_data = ut.difference(new_data) df_test_result = tests.dickeyfullertest(new_data) sample = np.array(sku_data) repeat = mu.check_repetition(sample, freq, 1, len(sample)) #Finding p and q value try: if d == 0: p1, ps, pl = plots.acf_plot(sku_data, freq) q = plots.pacf_plot(sku_data, freq) data = sku_data else: p, ps, pl = plots.acf_plot(new_data, freq) q = plots.pacf_plot(new_data, freq) data = new_data if repeat in ps: p = repeat elif repeat in pl: p = repeat else: p = pl[0] if p > freq: p = freq except: p = 1 q = 1 data = sku_data data = sku_data best_order = (p, d, q) print("BEST ORDER :", best_order) #TODO: Calculate tsize tsize = 5 # tsize = int(0.2*len(data)) # print(test) expected = data[-tsize:].reset_index(drop=True) expected = [float(i) for i in expected.values] # print("Dimension: ", data.shape) train_6wa = sku_data[0:-tsize] predictions_ML, rmse_ML = train.time_series_using_ml( sku_data, tsize, best_order, data_cluster[sku]) rmse_ARIMA, rmse_ES, rmse_naive, rmse_ma, predictions_ARIMA, predictions_ES, predictions_naive, predictions_ma = train.time_series_models( freq, sku_data, data, tsize, best_order, data_cluster[sku]) print("Modeling done") rmse_TS = rmse_ARIMA.copy() rmse_TS.update(rmse_ES) rmse_TS.update(rmse_naive) rmse_TS.update(rmse_ma) predictions = predictions_ML predictions.update(predictions_ARIMA) predictions.update(predictions_ES) predictions.update(predictions_naive) predictions.update(predictions_ma) if data_cluster[sku] in [1, 4, 7, 10, 13, 16, 19, 22, 25]: rmse_Croston, predictions_Croston = mu.Croston_TSB(sku_data, tsize) rmse_TS.update(rmse_Croston) predictions.update(predictions_Croston) rmse_vol_ml = dict() for key in rmse_ML: std = np.std(rmse_ML[key]) mean = np.mean(rmse_ML[key]) rmse_vol_ml[key] = mean # if std == 0: # rmse_vol_ml[key]= mean # else: # rmse_vol_ml[key] = mean/std rmse_vol_ts = dict() for key in rmse_TS: mean = np.mean(rmse_TS[key]) std = np.std(rmse_TS[key]) rmse_vol_ts[key] = mean # if std == 0: # rmse_vol_ts[key] = mean # else: # rmse_vol_ts[key]= mean/std #Top 3 models best_models_ml = sorted(rmse_vol_ml, key=rmse_vol_ml.get, reverse=False)[:3] best_models_ts = sorted(rmse_vol_ts, key=rmse_vol_ts.get, reverse=False)[:3] # forecasts_ml = dict() # validation_ml = dict() bias_ml = [] accuracy_ml = [] for model in best_models_ml: # temp = ft.model_predict(model, best_order,data, forecast_period) # forecasts_ml[model] = [0 if i < 0 else int(i) for i in temp] # validation_ml[model] = predictions[model] bias_ml.append( (sum(expected) - sum(predictions[model])) / len(expected)) accuracy_ml.append(mu.calculate_facc(expected, predictions[model])) bias_ml = [float(format(i, '.3f')) for i in bias_ml] accuracy_ml = [float(format(i, '.3f')) for i in accuracy_ml] # forecasts_ts = dict() # validation_ts = dict() bias_ts = [] accuracy_ts = [] for model in best_models_ts: # temp = ft.model_predict(model, best_order, sku_data, forecast_period,repeat) # forecasts_ts[model] = [0 if i < 0 else int(i) for i in temp] # validation_ts[model] = predictions[model] bias_ts.append( (sum(expected) - sum(predictions[model])) / len(expected)) accuracy_ts.append(mu.calculate_facc(expected, predictions[model])) bias_ts = [float(format(i, '.3f')) for i in bias_ts] accuracy_ts = [float(format(i, '.3f')) for i in accuracy_ts] #For one ensemble error_ml = min(rmse_vol_ml.values()) error_ts = min(rmse_vol_ts.values()) best_models = [ min(rmse_vol_ml, key=lambda x: rmse_vol_ml.get(x)), min(rmse_vol_ts, key=lambda x: rmse_vol_ts.get(x)) ] print("BEST MODELS :", best_models) print("ERRORS OF BEST MODELS :", error_ml, error_ts) forecast_ml, param_val_fore = ft.model_predict(best_models[0], best_order, data, forecast_period) if best_models[1] == 'Croston': rmse_Croston, forecast_ts = mu.Croston_TSB(sku_data, forecast_period) forecast_ts = forecast_ts['Croston'] else: forecast_ts, param_val = ft.model_predict(best_models[1], best_order, sku_data, forecast_period, repeat) forecast_ml = [0 if i < 0 else int(i) for i in forecast_ml] forecast_ts = [0 if i < 0 else int(i) for i in forecast_ts] weight_ts, weight_ml = ut.weight_calculation(data, best_models, best_order) print("weight ts:", weight_ts) print("weight ml:", weight_ml) Vm = predictions[best_models[0]] Vt = predictions[best_models[1]] Ve = ut.method_ensemble(Vm, Vt, weight_ml, weight_ts, tsize) error_en = mu.calculate_rmse('Ensemble', expected, Ve) bias_en = [] accuracy_en = [] bias_en.append((sum(expected) - sum(Ve)) / len(expected)) accuracy_en.append(mu.calculate_facc(expected, Ve)) bias_en = [float(format(i, '.3f')) for i in bias_en] accuracy_en = [float(format(i, '.3f')) for i in accuracy_en] #Ensemble of six month naive and weighted average V6wa, rmse_6wa = ts.model_Naive('naive6wa', train_6wa, tsize, (0, 0, 0), 0, train_flag=1) error_6wa = np.mean(rmse_6wa) forecast_6wa, param_val = ft.model_predict('naive6wa', best_order, data, forecast_period) forecast_en = ut.method_ensemble(forecast_ml, forecast_ts, weight_ml, weight_ts, forecast_period) output['forecast_period'] = forecast_period output['interval'] = 'M' output['best_models_ml'] = best_models_ml output['best_models_ts'] = best_models_ts output['bias_ml'] = bias_ml output['bias_ts'] = bias_ts output['bias_en'] = bias_en output['accuracy_ml'] = accuracy_ml output['accuracy_ts'] = accuracy_ts output['accuracy_en'] = accuracy_en output['TS'] = op.best_model_details_ts(best_models[1], bias_ts[0], accuracy_ts[0], best_order) output['ML'] = op.best_model_details_ml(best_models[0], bias_ml[0], accuracy_ml[0], param_val_fore) output['Ensemble'] = {"bias": bias_en[0], "accuracy": accuracy_en[0]} error_min_model = min(error_ml, error_ts, error_en) print("Errors:", ) print("ML:", error_ml) print("TS:", error_ts) print("Ensemble:", error_en) print("six_naive_WA", error_6wa) min_error = min(error_min_model, error_6wa) if min_error == error_ml: ftt = forecast_ml elif min_error == error_ts: ftt = forecast_ts elif min_error == error_en: ftt = forecast_en else: ftt = [] if min_error == error_6wa or all(elem == ftt[0] for elem in ftt) == True: print("Best forecast from six naive") forecast = forecast_6wa output['validation'] = ut.assign_dates(V6wa, 'validation', dataset.tail(5)) validation_facc = ft.calculate_validation_facc(expected, V6wa) output['validation_facc'] = ut.assign_dates( validation_facc, 'val_facc', dataset.tail(5)) elif min_error == error_ml: print("Best forecast from ML") forecast = forecast_ml output['validation'] = ut.assign_dates(Vm, 'validation', dataset.tail(5)) validation_facc = ft.calculate_validation_facc(expected, Vm) output['validation_facc'] = ut.assign_dates( validation_facc, 'val_facc', dataset.tail(5)) elif min_error == error_en: print("Best forecast from Ensemble") forecast = forecast_en output['validation'] = ut.assign_dates(Ve, 'validation', dataset.tail(5)) validation_facc = ft.calculate_validation_facc(expected, Ve) output['validation_facc'] = ut.assign_dates( validation_facc, 'val_facc', dataset.tail(5)) elif min_error == error_ts: print("Best forecast from TS") forecast = forecast_ts output['validation'] = ut.assign_dates(Vt, 'validation', dataset.tail(5)) validation_facc = ft.calculate_validation_facc(expected, Vt) output['validation_facc'] = ut.assign_dates( validation_facc, 'val_facc', dataset.tail(5)) # print("Forecasts:") print("ML:", forecast_ml) print("TS:", forecast_ts) print("Ensemble:", forecast_en) print("Best Forecast", forecast) output['forecast_values'] = ut.assign_dates(forecast, 'forecast', dataset.tail(1)) output['facc'], output['mape'], output[ 'bias'] = ft.calculate_forecast_accuracy(raw_data.iloc[-1].sales, forecast[0]) facc_out[sku] = np.mean(validation_facc) output['forecast_ml'] = ut.assign_dates(forecast_ml, 'forecast', dataset.tail(1)) output['forecast_ts'] = ut.assign_dates(forecast_ts, 'forecast', dataset.tail(1)) output['forecast_en'] = ut.assign_dates(forecast_en, 'forecast', dataset.tail(1)) output['model_ml'] = best_models[0] output['model_ts'] = best_models[1] forecast_results = ft.output_forecast(sku, dataset, sku_data, output, forecast_results) ft.plot_all_forecasts(dataset, sku_data, forecast, forecast_en, forecast_ml, forecast_ts, sku) return forecast_results, facc_out