def model_ARIMA(key,train, test_shape, order, train_flag = 0, test = []):
    predictions = []
    rmse_val = []
    if(train_flag):
        test=test[0]
    try:
        train = train.values
    except:
        train = train
    history = [np.asscalar(x) for x in train]

    if train_flag==1:
        itr=5
        data=pd.DataFrame(history)
        for i in range(3):
            pred_temp = []
            v_train=[np.asscalar(x) for x in data[:-itr].values]
            v_expected=data.tail(itr).head(3).reset_index(drop = True)
            try:
                order=(order[0],1,order[2])
                for j in range(3):
                    model = ARIMA(v_train, order = order)
                    model_fit = model.fit(disp=0)
                    yhat = model_fit.forecast()[0]
                    if yhat < 0:
                        yhat= mu.weighted_moving_average(history,1,3)
                        yhat=yhat[0]
                    pred_temp.append(yhat)
                    v_train.append(yhat)
            except:
                pred_temp.extend(mu.moving_average(v_train, 3 - len(pred_temp), 3))
            mu.plotting(key, pred_temp, v_expected)
            rmse_val.append(mu.calculate_rmse(key, v_expected, pred_temp))

            if i == 2:
                predictions.extend(pred_temp)
            else:
                predictions.append(pred_temp[0])

            itr=itr-1
    else:
        try:
            #TODO:check order
            order=(order[0],1,order[2])
            for t in range(test_shape):
                model = ARIMA(history, order = order)
                model_fit = model.fit(disp=0)
                yhat = model_fit.forecast()[0]
                if yhat < 0:
                    yhat= mu.weighted_moving_average(history,1,3)
                yhat=yhat[0]
                predictions.append(yhat)
                history.append(yhat)
        except:
                predictions.extend(mu.moving_average(history, test_shape - len(predictions), 3))

    predictions = [0 if pd.isnull(i) else int(i) for i in predictions]
    return predictions,rmse_val
예제 #2
0
def model_LinearRegression(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    rmse_val = []

    if train_flag == 1:
        itr = 5
        for i in range(3):
            expected = pd.DataFrame(dataset)
            expected = expected.tail(itr).head(3).reset_index(drop=True)

            train = dataset[:-itr]
            diff_values = ut.difference(dataset, order[1])

            scaler = ut.scaler_selection('lr')
            diff_values = scaler.fit_transform(
                pd.DataFrame(diff_values).values.reshape(-1, 1))

            supervised = ut.timeseries_to_supervised(diff_values, order[0])
            data = supervised.values

            clf = LinearRegression()
            param = {
                "fit_intercept": [True, False],
                "normalize": [False],
                "copy_X": [True, False]
            }
            grid = GridSearchCV(clf, param, n_jobs=1)
            model = mu.fit_model(data, grid)

            for j in range(tsize):
                X, y = data[:, 0:-1], data[:, -1]
                yhat = mu.forecast_model(model, X)

                #    inverted = list()
                #    for i in range(len(yhat)):
                #        value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
                #        inverted.append(value)
                #    inverted = np.array(inverted)

                forecast = yhat[-1]
                if forecast < 0:
                    forecast = mu.weighted_moving_average(dataset, 1, 3)[0]

                predictions.append(forecast)
                train = np.append(train, forecast)
                diff_train = ut.difference(train, order[1])
                diff_train = scaler.fit_transform(
                    pd.DataFrame(diff_train).values.reshape(-1, 1))

                supervised = ut.timeseries_to_supervised(train, order[0])
                data = supervised.values

            predictions = predictions[1:4]
            rmse_val.append(mu.calculate_rmse('GR_LR', expected, predictions))
            itr = itr - 1

    predictions = [int(i) for i in predictions]
    return predictions, rmse_val
예제 #3
0
def model_DecisionTree(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    for i in range(tsize):
        diff_values = ut.difference(dataset, 1)
        supervised = ut.timeseries_to_supervised(diff_values, 1)
        data = supervised.values

        if train_flag == 1:
            train = data[0:-tsize]
        else:
            train = data

        X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1]

        dtr = DecisionTreeRegressor()
        param_tree = {
            "max_depth": [3, None],
            "min_samples_leaf": sp_randint(1, 11),
            "criterion": ["mse"],
            "splitter": ["best", "random"],
            "max_features": ["auto", "sqrt", None]
        }

        gridDT = RandomizedSearchCV(dtr, param_tree, n_jobs=1, n_iter=100)
        gridDT.fit(X, y)
        clf = DecisionTreeRegressor(
            criterion=gridDT.best_params_["criterion"],
            splitter=gridDT.best_params_["splitter"],
            max_features=gridDT.best_params_["max_features"],
            max_depth=gridDT.best_params_["max_depth"],
            min_samples_leaf=gridDT.best_params_["min_samples_leaf"])

        clf.fit(X, y)
        yhat = mu.forecast_model(clf, X)

        inverted = list()
        for i in range(len(yhat)):
            value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
            inverted.append(value)
        inverted = np.array(inverted)

        forecast = inverted[-1]
        if forecast < 0:
            forecast = mu.weighted_moving_average(dataset, 1, 3)[0]
        predictions.append(forecast)
        dataset = np.append(dataset, forecast)
    predictions = [int(i) for i in predictions]
    return predictions
예제 #4
0
def model_ElasticNet(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    for i in range(tsize):
        diff_values = ut.difference(dataset, 1)
        supervised = ut.timeseries_to_supervised(diff_values, 1)
        data = supervised.values

        if train_flag == 1:
            train = data[0:-tsize]
        else:
            train = data

        X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1]
        elas = ElasticNet()
        param = {
            "alpha": list(np.linspace(0.000000001, 100, 100000)),
            "l1_ratio": list(np.linspace(0.000001, 100, 1000)),
            "fit_intercept": [True, False],
            "normalize": [True, False],
            "precompute": [True, False]
        }
        random_elas = RandomizedSearchCV(elas, param, n_jobs=1, n_iter=100)
        random_elas.fit(X, y)
        clf = ElasticNet(
            alpha=random_elas.best_params_["alpha"],
            l1_ratio=random_elas.best_params_["l1_ratio"],
            fit_intercept=random_elas.best_params_["fit_intercept"],
            normalize=random_elas.best_params_["normalize"],
            precompute=random_elas.best_params_["precompute"])

        clf.fit(X, y)
        yhat = mu.forecast_model(clf, X)

        inverted = list()
        for i in range(len(yhat)):
            value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
            inverted.append(value)
        inverted = np.array(inverted)

        forecast = inverted[-1]
        if forecast < 0:
            forecast = mu.weighted_moving_average(dataset, 1, 3)[0]
        predictions.append(forecast)
        dataset = np.append(dataset, forecast)
    predictions = [int(i) for i in predictions]
    return predictions
예제 #5
0
def model_SVR_Poly(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    for i in range(tsize):
        diff_values = ut.difference(dataset, 1)
        supervised = ut.timeseries_to_supervised(diff_values, 1)
        data = supervised.values

        if train_flag == 1:
            train = data[0:-tsize]
        else:
            train = data

        X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1]

        mod = SVR()
        g = list(np.linspace(0.0001, 1, 1000))
        C = list(np.linspace(0.01, 10, 25))
        param = {
            "kernel": ["poly"],
            "degree": range(10, 30, 1),
            "gamma": g,
            "C": C
        }
        random_search = RandomizedSearchCV(mod, param, n_jobs=1, n_iter=100)
        random_search.fit(X, y)
        clf = SVR(kernel=random_search.best_params_["kernel"],
                  degree=random_search.best_params_["degree"],
                  gamma=random_search.best_params_["gamma"],
                  C=random_search.best_params_["C"])

        clf.fit(X, y)
        yhat = mu.forecast_model(clf, X)

        inverted = list()
        for i in range(len(yhat)):
            value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
            inverted.append(value)
        inverted = np.array(inverted)

        forecast = inverted[-1]
        if forecast < 0:
            forecast = mu.weighted_moving_average(dataset, 1, 3)[0]
        predictions.append(forecast)
        dataset = np.append(dataset, forecast)
    predictions = [int(i) for i in predictions]
    return predictions
예제 #6
0
def model_RandomForest(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    for i in range(tsize):
        diff_values = ut.difference(dataset, 1)
        supervised = ut.timeseries_to_supervised(diff_values, 1)
        data = supervised.values

        if train_flag == 1:
            train = data[0:-tsize]
        else:
            train = data

        X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1]
        rfr = RandomForestRegressor()
        param_forest = {
            "n_estimators": range(10, 1000, 100),
            "criterion": ["mse"],
            "bootstrap": [True, False],
            "warm_start": [True, False]
        }
        gridRF = RandomizedSearchCV(rfr, param_forest, n_jobs=1, n_iter=100)
        gridRF.fit(X, y)
        yhat = mu.forecast_model(clf, X)

        inverted = list()
        for i in range(len(yhat)):
            value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
            inverted.append(value)
        inverted = np.array(inverted)

        forecast = inverted[-1]
        if forecast < 0:
            forecast = mu.weighted_moving_average(dataset, 1, 3)[0]
        predictions.append(forecast)
        dataset = np.append(dataset, forecast)
    predictions = [int(i) for i in predictions]
    return predictions
def model_ARMA(key,train, test_shape, order, train_flag = 0, test = []):
    predictions = []
    rmse_val=[]
    if(train_flag):
        test=test[0]
    try:
        train = train.values
    except:
        train = train
    history = [np.asscalar(x) for x in train]

    if train_flag==1:
        itr=5
        data=pd.DataFrame(history)
        for i in range(3):
            pred_temp = []
            v_train=[np.asscalar(x) for x in data[:-itr].values]
            v_expected=data.tail(itr).head(3).reset_index(drop = True)
            try:
                for j in range(3):
                    model = ARMA(v_train, order = order)
                    model_fit = model.fit(disp=0, transparams=False, trend='nc')
                    yhat = model_fit.forecast()[0]
                    pred = yhat
                    if pred < 0:
                        pred = mu.weighted_moving_average(v_train, 1, 3)
                        pred = pred[0]
                    pred_temp.append(pred)
                    v_train.append(pred)

            except:
                pred_temp.extend(mu.moving_average(v_train, 3 - len(pred_temp), 3))
            mu.plotting(key, pred_temp, v_expected)
            if i == 2:
                predictions.extend(pred_temp)
            else:
                predictions.append(pred_temp[0])

            rmse_val.append(mu.calculate_rmse(key, v_expected, pred_temp))
            itr=itr-1


    else:
        try:
            for t in range(test_shape):
                model = ARMA(history, order = order)
                model_fit = model.fit(disp=0, transparams=False, trend='nc')
                yhat = model_fit.forecast()[0]
                inverted = list()
                for i in range(len(yhat)):
                    value = ut.inverse_difference(history, yhat[i], len(history) - i)
                    inverted.append(value)
                inverted = np.array(inverted)
                pred = inverted[-1]
                if pred < 0:
                    pred = mu.weighted_moving_average(history, 1, 3)
                predictions.append(pred)
                history.append(yhat)
        except:
            predictions.extend(mu.moving_average(history, test_shape - len(predictions), 3))

    predictions = [int(i) for i in predictions]
    return predictions,rmse_val
def model_ES(key, train, test_shape = 0, train_flag = 0, test = []):
    predictions = []
    rmse_val=[]

    try:
        train = train.values
    except:
        train = train
    history = [np.asscalar(x) for x in train]

#   TRAIN
    if train_flag==1:
        itr=5
        data=pd.DataFrame(history)
        for i in range(3):
            pred_temp = []
            v_train=[np.asscalar(x) for x in data[:-itr].values]
            v_expected=data.tail(itr).head(3).reset_index(drop = True)
            try:
                for t in range(3):
                    if key=='SES':
                        model = SimpleExpSmoothing(history)
                    elif key=='HWES':
                         model = ExponentialSmoothing(history)
                    model_fit = model.fit()
                    yhat= model_fit.predict(len(history), len(history))
                    if yhat < 0:
                        yhat= mu.weighted_moving_average(history,1,3)
                    yhat=yhat[0]
                    pred_temp.append(yhat)
                    v_train.append(yhat)
            except:
                pred_temp.extend(mu.moving_average(v_train, 3 - len(pred_temp), 3))
            mu.plotting(key, pred_temp, v_expected)
            rmse_val.append(mu.calculate_rmse(key, v_expected, pred_temp))

            if i == 2:
                predictions.extend(pred_temp)
            else:
                predictions.append(pred_temp[0])

            itr=itr-1
#   FORECAST
    else:
        try:
            for t in range(test_shape):
                if key=='SES':
                    model = SimpleExpSmoothing(history)
                elif key=='HWES':
                     model = ExponentialSmoothing(history)
                model_fit = model.fit()
                yhat= model_fit.predict(len(history), len(history))
                if yhat < 0:
                    yhat= mu.weighted_moving_average(history,1,3)
                yhat=yhat[0]
                predictions.append(yhat)
                history.append(yhat)
        except:
            predictions.extend(mu.moving_average(history, test_shape - len(predictions), 3))

    predictions = [int(i) for i in predictions]
    return predictions,rmse_val
예제 #9
0
def model_ML(dataset=[],
             tsize=0,
             test_shape=0,
             model=np.nan,
             key='',
             order=(0, 0, 0),
             train_flag=0):
    predictions = []
    pred_temp = []
    rmse_val = []
    parameter_values = {}
    scale_flag = 0
    if key == 'lr' or key == 'lasso' or key == 'ridge' or key == 'knn' or key == 'svmr':
        scale_flag = 1

    if train_flag == 1:
        itr = 5
        for i in range(3):
            expected = pd.DataFrame(dataset)
            expected = expected.tail(itr).head(3)
            expected = expected.reset_index(drop=True)

            train = dataset[:-itr]

            diff_values = ut.difference(train, order[1])

            if scale_flag == 1:
                scaler = ut.scaler_selection(key)
                diff_values = scaler.fit_transform(
                    pd.DataFrame(diff_values).values.reshape(-1, 1))

            supervised = ut.timeseries_to_supervised(train, order[0])
            data = supervised.values

            RF_model = mu.fit_model(data, model)

            pred_temp = []

            for j in range(test_shape):
                X, y = data[:, 0:-1], data[:, -1]
                yhat = mu.forecast_model(RF_model, X)

                #TODO: Inverse differencing and scaling

                #                if scale_flag==1:
                #                    yhat=scaler.inverse_transform(pd.DataFrame(yhat).values.reshape(-1,1))
                #                if order[1]!=0:
                #                    inverted = list()
                #                    for i in range(len(yhat)):
                #                        value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
                #                        inverted.append(value)
                #                    inverted = np.array(inverted)
                #                    forecast=inverted[-1]
                #                else:
                #                    forecast = yhat[-1]
                forecast = yhat[-1]
                if forecast < 0:
                    forecast = mu.weighted_moving_average(dataset, 1, 3)[0]

                pred_temp.append(forecast)

                train = np.append(train, forecast)

                diff_train = ut.difference(train, order[1])

                if scale_flag == 1:
                    scaler = ut.scaler_selection(key)
                    diff_train = scaler.fit_transform(
                        pd.DataFrame(diff_train).values.reshape(-1, 1))

                supervised = ut.timeseries_to_supervised(train, order[0])
                data = supervised.values

            pred_temp = pred_temp[1:4]
            mu.plotting(key, pred_temp, expected)
            if i == 2:
                predictions.extend(pred_temp)
            else:
                predictions.append(pred_temp[0])

            rmse_val.append(mu.calculate_rmse(key, expected, pred_temp))
            itr = itr - 1

    else:

        dataset_1 = copy.deepcopy(dataset)
        diff_values = ut.difference(dataset_1, order[1])

        if scale_flag == 1:
            scaler = ut.scaler_selection(key)
            diff_values = scaler.fit_transform(
                pd.DataFrame(diff_values).values.reshape(-1, 1))

        supervised = ut.timeseries_to_supervised(diff_values, order[0])
        data = supervised.values

        RF_model = mu.fit_model(data, model)
        try:
            parameter_values = model.best_params_
        except:
            parameter_values = model.get_params()

        test_shape = test_shape + 2
        for i in range(test_shape):

            X, y = data[:, 0:-1], data[:, -1]

            yhat = mu.forecast_model(RF_model, X)
            #
            #            if scale_flag==1:
            #                yhat=scaler.inverse_transform(pd.DataFrame(yhat).values.reshape(-1,1))
            #            if order[1]!=0:
            #                inverted = list()
            #                for i in range(len(yhat)):
            #                    value = ut.inverse_difference(data, yhat[i], len(data) - i)
            #                    inverted.append(value)
            #                    inverted = np.array(inverted)
            #                forecast=inverted[-1]
            #            else:
            #                forecast = yhat[-1]
            forecast = yhat[-1]
            if forecast < 0:
                forecast = mu.weighted_moving_average(data, 1, 3)[0]

            predictions.append(forecast)
            dataset_1 = np.append(dataset_1, forecast)

            diff_values = ut.difference(dataset_1, order[1])

            if scale_flag == 1:
                scaler = ut.scaler_selection(key)
                diff_values = scaler.fit_transform(
                    pd.DataFrame(diff_values).values.reshape(-1, 1))

            supervised = ut.timeseries_to_supervised(diff_values, order[0])
            data = supervised.values
        predictions = predictions[2:test_shape]
    predictions = [int(i) for i in predictions]
    return predictions, rmse_val, parameter_values
예제 #10
0
def model_SVR_RBF(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    for i in range(tsize):
        diff_values = ut.difference(dataset, 1)
        supervised = ut.timeseries_to_supervised(diff_values, 1)
        data = supervised.values

        if train_flag == 1:
            train = data[0:-tsize]
        else:
            train = data

        X, y = train[:, 0:-1].reshape(-1, 1), train[:, -1]

        mod = SVR()

        g = [
            pow(2, -15),
            pow(2, -14),
            pow(2, -13),
            pow(2, -12),
            pow(2, -11),
            pow(2, -10),
            pow(2, -9),
            pow(2, -8),
            pow(2, -7),
            pow(2, -6),
            pow(2, -5),
            pow(2, -4),
            pow(2, -3),
            pow(2, -2),
            pow(2, -1),
            pow(1, 0),
            pow(2, 1),
            pow(2, 2),
            pow(2, 3)
        ]

        C = [
            pow(2, -5),
            pow(2, -4),
            pow(2, -3),
            pow(2, -2),
            pow(2, -1),
            pow(1, 0),
            pow(2, 1),
            pow(2, 2),
            pow(2, 3),
            pow(2, 4),
            pow(2, 5),
            pow(2, 6),
            pow(2, 7),
            pow(2, 8),
            pow(2, 9),
            pow(2, 10),
            pow(2, 11),
            pow(2, 12),
            pow(2, 13),
            pow(2, 14),
            pow(2, 15)
        ]

        param = {'gamma': g, 'kernel': ['rbf'], 'C': C}
        grid_search = RandomizedSearchCV(mod, param, n_jobs=1, n_iter=100)
        grid_search.fit(X, y)
        clf = SVR(gamma=grid_search.best_params_["gamma"],
                  kernel=grid_search.best_params_["kernel"],
                  C=grid_search.best_params_["C"])
        clf.fit(X, y)
        yhat = mu.forecast_model(clf, X)

        inverted = list()
        for i in range(len(yhat)):
            value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
            inverted.append(value)
        inverted = np.array(inverted)

        forecast = inverted[-1]
        if forecast < 0:
            forecast = mu.weighted_moving_average(dataset, 1, 3)[0]
        predictions.append(forecast)
        dataset = np.append(dataset, forecast)
    predictions = [int(i) for i in predictions]
    return predictions
예제 #11
0
def training(details_data, datasets, forecast_period):

    facc_out = dict()
    rsq = dict()
    price = details_data['price']
    price = [float(i) for i in price]
    price = pd.Series(price).fillna(0).tolist()

    sku_list = details_data['sku']
    market = details_data['market']
    plant = details_data['plant']
    spn = details_data['spn']
    abc_data = details_data['abc_data']

    #Profiling
    data_prof = profiling.profiling_tech(datasets)

    #Clustering based on nature
    data_cluster = Cluster.clustering(data_prof)

    total_price = np.sum(price)
    sku_price = dict()
    for i, sku in enumerate(sku_list):
        sku_price[str(sku)] = price[i]

    #Market Based Clustering
#    plant_cluster=Cluster.clustering_plant(sku_list,plant,market)

#XYZ based on unit cost
    xyz_data = xyz.xyz_class(sku_price, total_price)
    #ABC based on volume
    abc_alter = abc.abc_class(datasets)

    #    trained_outputs = []
    forecast_results = []
    num = 0
    for incr, sku in enumerate(datasets):
        num += 1
        #        if sku!='1702460700':
        ##        if num!=1:
        #            continue
        prof = data_prof.iloc[incr]
        print("------------------------------------------------------------")
        print("Running SKU %d: %s..." % (num, sku))
        print("cluster :  ", data_cluster[sku])

        raw_data = copy.deepcopy(datasets[sku].T)
        output = ut.init_output(forecast_period, raw_data, prof)
        output['unit_cost'] = float(price[incr])
        output['market'] = str(market[incr])
        output['plant'] = str(plant[incr])
        if pd.isnull(abc_data[incr]) == True:
            output['Variability_Segment'] = abc_alter[sku]
        else:
            output['Variability_Segment'] = abc_data[incr]
        output['Velocity_segment'] = xyz_data[sku]
        output['spn'] = spn[incr]

        dataset = raw_data.copy()

        dataset = dataset[:-1]
        #        dataset = pp.dateformat(dataset)
        #        dataset, interval = pp.impute_missing_dates(dataset)
        #        print(interval.days)

        if ((dataset['sales'] == 0).all() == True
                or (set([math.isnan(x) for x in dataset['sales']]) == {True})):
            #            print(dataset['sales'])
            print("All zeros/NaNs")
            forecast = [0] * forecast_period
            output['forecast_values'] = ut.assign_dates(
                forecast, 'forecast', dataset.tail(1))
            output['facc'], output['mape'], output[
                'bias'] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
            facc_out[sku] = np.mean(
                ft.calculate_validation_facc(forecast, forecast))
            forecast_results = ft.output_forecast(sku, dataset,
                                                  datasets[sku].T, output,
                                                  forecast_results)
            continue

        sku_data = dataset.astype(np.float32)
        sku_data = pp.read_from_first_sales(sku_data['sales'])

        #size--->outlier bucket size
        #sparse_size ---> number of zeros to categorize as sparse data
        #freq ---> seasonality
        interval = 30
        size = 6
        sparse_size = 10
        freq = 12
        #        size,sparse_size,freq=pp.get_bucket_size(interval)

        test_nan = pd.DataFrame(sku_data[-freq:])
        test_nan = test_nan['sales']

        #if last 1 year is NaN, impute data with zero and forecast is MA(6)

        if sum(test_nan.isnull()) >= freq:
            print("Last 1 year NaN")
            sku_data = pp.data_imputation_zero(test_nan)
            sku_data = sku_data[:-5]
            expected = [0] * 5
            forecast = mu.moving_average(sku_data, forecast_period, 6)
            output['forecast_values'] = ut.assign_dates(
                forecast, 'forecast', dataset.tail(1))
            output['facc'], output['mape'], output[
                'bias'] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
            facc_out[sku] = np.mean(
                ft.calculate_validation_facc(expected, forecast))
            forecast_results = ft.output_forecast(sku, dataset, sku_data,
                                                  output, forecast_results)
            continue

#if # NaNs more than 60% impute with 0 else impute with values

        if sum(pd.isnull(sku_data)) > (0.6 * len(sku_data)):
            print("Nan Greater than 60%")
            sku_data = pp.data_imputation_zero(sku_data)

        else:
            print("Nan less than 60%")
            sku_data = pp.data_imputation(sku_data, freq)
            sku_data = sku_data[0]

        sku_data = pp.read_from_first_sales(sku_data)

        #After reading from first non-zero if data is insufficient ---> weighted MA(3)

        if len(sku_data) < 20:
            try:
                print("Weighted Moving Average")
                forecast = mu.weighted_moving_average(sku_data,
                                                      forecast_period, 3)
                output['forecast_values'] = ut.assign_dates(
                    forecast, 'forecast', dataset.tail(1))
                output['facc'], output['mape'], output[
                    'bias'] = ft.calculate_forecast_accuracy(
                        raw_data.iloc[-1], forecast[0])
                facc_out[sku] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
                forecast_results = ft.output_forecast(sku, dataset, sku_data,
                                                      output, forecast_results)
            except:
                print("Less than 3")
                print(sku_data)
                forecast = mu.moving_average(sku_data, forecast_period,
                                             len(sku_data))
                output['forecast_values'] = ut.assign_dates(
                    forecast, 'forecast', dataset.tail(1))
                output['facc'], output['mape'], output[
                    'bias'] = ft.calculate_forecast_accuracy(
                        raw_data.iloc[-1], forecast[0])
                facc_out[sku] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
                forecast_results = ft.output_forecast(sku, dataset, sku_data,
                                                      output, forecast_results)

            continue

        data_copy = sku_data.copy()
        data_copy = np.array(data_copy)
        #        plt.figure()
        #        plt.plot(data_copy)

        index1, index2, sflag1, sflag2 = pp.Sesonal_detection(sku_data)
        sku_data = pp.outlier_treatment_tech(sku_data, interval, size)
        sku_data = np.array(sku_data[0])

        if sflag1 == 1:
            sku_data[index1] = data_copy[index1]
        if sflag2 == 1:
            sku_data[index2] = data_copy[index2]
        else:
            sku_data = sku_data
#        plt.plot(sku_data)
#        plt.show()
#        continue
        sku_data = pd.DataFrame(sku_data)

        #Testing Stationarity
        d = 0
        df_test_result = tests.dickeyfullertest(
            sku_data.T.squeeze())  #pd.Series(sku_data[0])

        while df_test_result == 0:
            d += 1
            if d == 1:
                new_data = ut.difference(sku_data[0].tolist())
            else:
                new_data = ut.difference(new_data)
            df_test_result = tests.dickeyfullertest(new_data)

        sample = np.array(sku_data)
        repeat = mu.check_repetition(sample, freq, 1, len(sample))
        #Finding p and q value
        try:
            if d == 0:
                p1, ps, pl = plots.acf_plot(sku_data, freq)
                q = plots.pacf_plot(sku_data, freq)
                data = sku_data
            else:

                p, ps, pl = plots.acf_plot(new_data, freq)
                q = plots.pacf_plot(new_data, freq)
                data = new_data

            if repeat in ps:
                p = repeat
            elif repeat in pl:
                p = repeat
            else:
                p = pl[0]
            if p > freq:
                p = freq
        except:
            p = 1
            q = 1
            data = sku_data

        data = sku_data
        best_order = (p, d, q)
        print("BEST ORDER :", best_order)
        #TODO: Calculate tsize
        tsize = 5
        #        tsize = int(0.2*len(data))
        #        print(test)
        expected = data[-tsize:].reset_index(drop=True)
        expected = [float(i) for i in expected.values]
        #        print("Dimension: ", data.shape)
        train_6wa = sku_data[0:-tsize]
        predictions_ML, rmse_ML = train.time_series_using_ml(
            sku_data, tsize, best_order, data_cluster[sku])
        rmse_ARIMA, rmse_ES, rmse_naive, rmse_ma, predictions_ARIMA, predictions_ES, predictions_naive, predictions_ma = train.time_series_models(
            freq, sku_data, data, tsize, best_order, data_cluster[sku])
        print("Modeling done")

        rmse_TS = rmse_ARIMA.copy()
        rmse_TS.update(rmse_ES)
        rmse_TS.update(rmse_naive)
        rmse_TS.update(rmse_ma)

        predictions = predictions_ML
        predictions.update(predictions_ARIMA)
        predictions.update(predictions_ES)
        predictions.update(predictions_naive)
        predictions.update(predictions_ma)

        if data_cluster[sku] in [1, 4, 7, 10, 13, 16, 19, 22, 25]:
            rmse_Croston, predictions_Croston = mu.Croston_TSB(sku_data, tsize)
            rmse_TS.update(rmse_Croston)
            predictions.update(predictions_Croston)

        rmse_vol_ml = dict()
        for key in rmse_ML:
            std = np.std(rmse_ML[key])
            mean = np.mean(rmse_ML[key])
            rmse_vol_ml[key] = mean
#            if std == 0:
#                rmse_vol_ml[key]= mean
#            else:
#                rmse_vol_ml[key] = mean/std

        rmse_vol_ts = dict()
        for key in rmse_TS:
            mean = np.mean(rmse_TS[key])
            std = np.std(rmse_TS[key])
            rmse_vol_ts[key] = mean
#            if std == 0:
#                rmse_vol_ts[key] = mean
#            else:
#                rmse_vol_ts[key]= mean/std

#Top 3 models
        best_models_ml = sorted(rmse_vol_ml,
                                key=rmse_vol_ml.get,
                                reverse=False)[:3]
        best_models_ts = sorted(rmse_vol_ts,
                                key=rmse_vol_ts.get,
                                reverse=False)[:3]

        #        forecasts_ml = dict()
        #        validation_ml = dict()
        bias_ml = []
        accuracy_ml = []
        for model in best_models_ml:
            #            temp = ft.model_predict(model, best_order,data, forecast_period)
            #            forecasts_ml[model] = [0 if i < 0 else int(i) for i in temp]
            #            validation_ml[model] = predictions[model]
            bias_ml.append(
                (sum(expected) - sum(predictions[model])) / len(expected))
            accuracy_ml.append(mu.calculate_facc(expected, predictions[model]))
        bias_ml = [float(format(i, '.3f')) for i in bias_ml]
        accuracy_ml = [float(format(i, '.3f')) for i in accuracy_ml]

        #        forecasts_ts = dict()
        #        validation_ts = dict()
        bias_ts = []
        accuracy_ts = []
        for model in best_models_ts:
            #            temp = ft.model_predict(model, best_order, sku_data, forecast_period,repeat)
            #            forecasts_ts[model] = [0 if i < 0 else int(i) for i in temp]
            #            validation_ts[model] = predictions[model]
            bias_ts.append(
                (sum(expected) - sum(predictions[model])) / len(expected))
            accuracy_ts.append(mu.calculate_facc(expected, predictions[model]))
        bias_ts = [float(format(i, '.3f')) for i in bias_ts]
        accuracy_ts = [float(format(i, '.3f')) for i in accuracy_ts]

        #For one ensemble
        error_ml = min(rmse_vol_ml.values())
        error_ts = min(rmse_vol_ts.values())

        best_models = [
            min(rmse_vol_ml, key=lambda x: rmse_vol_ml.get(x)),
            min(rmse_vol_ts, key=lambda x: rmse_vol_ts.get(x))
        ]
        print("BEST MODELS :", best_models)
        print("ERRORS OF BEST MODELS :", error_ml, error_ts)
        forecast_ml, param_val_fore = ft.model_predict(best_models[0],
                                                       best_order, data,
                                                       forecast_period)

        if best_models[1] == 'Croston':
            rmse_Croston, forecast_ts = mu.Croston_TSB(sku_data,
                                                       forecast_period)
            forecast_ts = forecast_ts['Croston']
        else:
            forecast_ts, param_val = ft.model_predict(best_models[1],
                                                      best_order, sku_data,
                                                      forecast_period, repeat)

        forecast_ml = [0 if i < 0 else int(i) for i in forecast_ml]
        forecast_ts = [0 if i < 0 else int(i) for i in forecast_ts]

        weight_ts, weight_ml = ut.weight_calculation(data, best_models,
                                                     best_order)
        print("weight ts:", weight_ts)
        print("weight ml:", weight_ml)

        Vm = predictions[best_models[0]]
        Vt = predictions[best_models[1]]

        Ve = ut.method_ensemble(Vm, Vt, weight_ml, weight_ts, tsize)
        error_en = mu.calculate_rmse('Ensemble', expected, Ve)

        bias_en = []
        accuracy_en = []

        bias_en.append((sum(expected) - sum(Ve)) / len(expected))
        accuracy_en.append(mu.calculate_facc(expected, Ve))
        bias_en = [float(format(i, '.3f')) for i in bias_en]
        accuracy_en = [float(format(i, '.3f')) for i in accuracy_en]
        #Ensemble of six month naive and weighted average
        V6wa, rmse_6wa = ts.model_Naive('naive6wa',
                                        train_6wa,
                                        tsize, (0, 0, 0),
                                        0,
                                        train_flag=1)
        error_6wa = np.mean(rmse_6wa)
        forecast_6wa, param_val = ft.model_predict('naive6wa', best_order,
                                                   data, forecast_period)

        forecast_en = ut.method_ensemble(forecast_ml, forecast_ts, weight_ml,
                                         weight_ts, forecast_period)

        output['forecast_period'] = forecast_period
        output['interval'] = 'M'
        output['best_models_ml'] = best_models_ml
        output['best_models_ts'] = best_models_ts
        output['bias_ml'] = bias_ml
        output['bias_ts'] = bias_ts
        output['bias_en'] = bias_en
        output['accuracy_ml'] = accuracy_ml
        output['accuracy_ts'] = accuracy_ts
        output['accuracy_en'] = accuracy_en
        output['TS'] = op.best_model_details_ts(best_models[1], bias_ts[0],
                                                accuracy_ts[0], best_order)
        output['ML'] = op.best_model_details_ml(best_models[0], bias_ml[0],
                                                accuracy_ml[0], param_val_fore)
        output['Ensemble'] = {"bias": bias_en[0], "accuracy": accuracy_en[0]}

        error_min_model = min(error_ml, error_ts, error_en)

        print("Errors:", )
        print("ML:", error_ml)
        print("TS:", error_ts)
        print("Ensemble:", error_en)
        print("six_naive_WA", error_6wa)

        min_error = min(error_min_model, error_6wa)

        if min_error == error_ml:
            ftt = forecast_ml
        elif min_error == error_ts:
            ftt = forecast_ts
        elif min_error == error_en:
            ftt = forecast_en
        else:
            ftt = []

        if min_error == error_6wa or all(elem == ftt[0]
                                         for elem in ftt) == True:
            print("Best forecast from six naive")
            forecast = forecast_6wa
            output['validation'] = ut.assign_dates(V6wa, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, V6wa)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
        elif min_error == error_ml:
            print("Best forecast from ML")
            forecast = forecast_ml
            output['validation'] = ut.assign_dates(Vm, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, Vm)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
        elif min_error == error_en:
            print("Best forecast from Ensemble")
            forecast = forecast_en
            output['validation'] = ut.assign_dates(Ve, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, Ve)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
        elif min_error == error_ts:
            print("Best forecast from TS")
            forecast = forecast_ts
            output['validation'] = ut.assign_dates(Vt, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, Vt)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
#
        print("Forecasts:")
        print("ML:", forecast_ml)
        print("TS:", forecast_ts)
        print("Ensemble:", forecast_en)
        print("Best Forecast", forecast)

        output['forecast_values'] = ut.assign_dates(forecast, 'forecast',
                                                    dataset.tail(1))
        output['facc'], output['mape'], output[
            'bias'] = ft.calculate_forecast_accuracy(raw_data.iloc[-1].sales,
                                                     forecast[0])
        facc_out[sku] = np.mean(validation_facc)

        output['forecast_ml'] = ut.assign_dates(forecast_ml, 'forecast',
                                                dataset.tail(1))
        output['forecast_ts'] = ut.assign_dates(forecast_ts, 'forecast',
                                                dataset.tail(1))
        output['forecast_en'] = ut.assign_dates(forecast_en, 'forecast',
                                                dataset.tail(1))
        output['model_ml'] = best_models[0]
        output['model_ts'] = best_models[1]

        forecast_results = ft.output_forecast(sku, dataset, sku_data, output,
                                              forecast_results)

        ft.plot_all_forecasts(dataset, sku_data, forecast, forecast_en,
                              forecast_ml, forecast_ts, sku)

    return forecast_results, facc_out