def model_ARIMA(key,train, test_shape, order, train_flag = 0, test = []):
    predictions = []
    rmse_val = []
    if(train_flag):
        test=test[0]
    try:
        train = train.values
    except:
        train = train
    history = [np.asscalar(x) for x in train]

    if train_flag==1:
        itr=5
        data=pd.DataFrame(history)
        for i in range(3):
            pred_temp = []
            v_train=[np.asscalar(x) for x in data[:-itr].values]
            v_expected=data.tail(itr).head(3).reset_index(drop = True)
            try:
                order=(order[0],1,order[2])
                for j in range(3):
                    model = ARIMA(v_train, order = order)
                    model_fit = model.fit(disp=0)
                    yhat = model_fit.forecast()[0]
                    if yhat < 0:
                        yhat= mu.weighted_moving_average(history,1,3)
                        yhat=yhat[0]
                    pred_temp.append(yhat)
                    v_train.append(yhat)
            except:
                pred_temp.extend(mu.moving_average(v_train, 3 - len(pred_temp), 3))
            mu.plotting(key, pred_temp, v_expected)
            rmse_val.append(mu.calculate_rmse(key, v_expected, pred_temp))

            if i == 2:
                predictions.extend(pred_temp)
            else:
                predictions.append(pred_temp[0])

            itr=itr-1
    else:
        try:
            #TODO:check order
            order=(order[0],1,order[2])
            for t in range(test_shape):
                model = ARIMA(history, order = order)
                model_fit = model.fit(disp=0)
                yhat = model_fit.forecast()[0]
                if yhat < 0:
                    yhat= mu.weighted_moving_average(history,1,3)
                yhat=yhat[0]
                predictions.append(yhat)
                history.append(yhat)
        except:
                predictions.extend(mu.moving_average(history, test_shape - len(predictions), 3))

    predictions = [0 if pd.isnull(i) else int(i) for i in predictions]
    return predictions,rmse_val
def model_MA(key,train,test_shape,train_flag = 0):
    forecast = []
    rmse_val=[]
    try:
       train = train.values
    except:
       train = train
    history = [np.asscalar(x) for x in train]

# TRAIN
    if train_flag==1:
        itr=5
        data=pd.DataFrame(history)
        for i in range(3):
            pred_temp = []
            v_train=data[:-itr]
            v_train=v_train.values
            v_expected=data.tail(itr).head(3)
            for j in range(3):
                if key =='sma':
                    pred1 = np.mean(v_train[-3:])
                    pred_temp.append(pred1)
                    v_train=np.append(v_train,pred1)
                if key =='wma':
                    alpha=[0.25,0.35,0.4]
                    pred1 = v_train[-3:]
                    pred1=[np.asscalar(x) for x in pred1]
                    pred1=np.dot(pred1,alpha)
                    pred_temp.append(pred1)
                    v_train=np.append(v_train,pred1)
            rmse_val.append(mu.calculate_rmse(key, v_expected, pred_temp))

            if i == 2:
                forecast.extend(pred_temp)
            else:
                forecast.append(pred_temp[0])

            itr=itr-1
#   FORECAST
    else:

        for num in range(test_shape):
            if key =='sma':
                test_new = pd.DataFrame(history)
                pred1 = (test_new.tail(3).mean())
                pred1 = pred1[0]
                forecast.append(pred1)
                history.append(pred1)
            if key =='wma':
                alpha=[0.25,0.35,0.4]
                test_new = pd.DataFrame(history)
                pred1 = test_new.tail(3)
                pred1=np.dot(pred1[0],alpha)
                forecast.append(pred1)
                history.append(pred1)

    forecast = [int(i) for i in forecast]
    return forecast,rmse_val
Ejemplo n.º 3
0
def model_LinearRegression(dataset=[], tsize=0, order=(0, 0, 0), train_flag=0):
    predictions = []
    rmse_val = []

    if train_flag == 1:
        itr = 5
        for i in range(3):
            expected = pd.DataFrame(dataset)
            expected = expected.tail(itr).head(3).reset_index(drop=True)

            train = dataset[:-itr]
            diff_values = ut.difference(dataset, order[1])

            scaler = ut.scaler_selection('lr')
            diff_values = scaler.fit_transform(
                pd.DataFrame(diff_values).values.reshape(-1, 1))

            supervised = ut.timeseries_to_supervised(diff_values, order[0])
            data = supervised.values

            clf = LinearRegression()
            param = {
                "fit_intercept": [True, False],
                "normalize": [False],
                "copy_X": [True, False]
            }
            grid = GridSearchCV(clf, param, n_jobs=1)
            model = mu.fit_model(data, grid)

            for j in range(tsize):
                X, y = data[:, 0:-1], data[:, -1]
                yhat = mu.forecast_model(model, X)

                #    inverted = list()
                #    for i in range(len(yhat)):
                #        value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
                #        inverted.append(value)
                #    inverted = np.array(inverted)

                forecast = yhat[-1]
                if forecast < 0:
                    forecast = mu.weighted_moving_average(dataset, 1, 3)[0]

                predictions.append(forecast)
                train = np.append(train, forecast)
                diff_train = ut.difference(train, order[1])
                diff_train = scaler.fit_transform(
                    pd.DataFrame(diff_train).values.reshape(-1, 1))

                supervised = ut.timeseries_to_supervised(train, order[0])
                data = supervised.values

            predictions = predictions[1:4]
            rmse_val.append(mu.calculate_rmse('GR_LR', expected, predictions))
            itr = itr - 1

    predictions = [int(i) for i in predictions]
    return predictions, rmse_val
Ejemplo n.º 4
0
def weight_calculation(data,best_models,best_order):
    itr=5
    weight_ts=0
    weight_ml=0
    for i in range(3):
        print("Running models for ensemble ...",i)
        sample=data[:-itr]
        expected=data.tail(itr).head(3)
        forecast_ml,p = ft.model_predict(best_models[0], best_order, sample, 3)
        forecast_ts,p= ft.model_predict(best_models[1], best_order, sample, 3)
        itr-=1
        expected=expected.reset_index(drop=True)
        forecast_ts=pd.DataFrame(forecast_ts)
        rmse_ts=mu.calculate_rmse(best_models[1], expected, forecast_ts)
        rmse_ml=mu.calculate_rmse(best_models[0], expected, forecast_ml)
        weight_ts+=calculate_weight(rmse_ts,rmse_ml)
        weight_ml+=calculate_weight(rmse_ml,rmse_ts)
    weight_ts=weight_ts/3
    weight_ml=weight_ml/3
    return weight_ts,weight_ml
def model_ARMA(key,train, test_shape, order, train_flag = 0, test = []):
    predictions = []
    rmse_val=[]
    if(train_flag):
        test=test[0]
    try:
        train = train.values
    except:
        train = train
    history = [np.asscalar(x) for x in train]

    if train_flag==1:
        itr=5
        data=pd.DataFrame(history)
        for i in range(3):
            pred_temp = []
            v_train=[np.asscalar(x) for x in data[:-itr].values]
            v_expected=data.tail(itr).head(3).reset_index(drop = True)
            try:
                for j in range(3):
                    model = ARMA(v_train, order = order)
                    model_fit = model.fit(disp=0, transparams=False, trend='nc')
                    yhat = model_fit.forecast()[0]
                    pred = yhat
                    if pred < 0:
                        pred = mu.weighted_moving_average(v_train, 1, 3)
                        pred = pred[0]
                    pred_temp.append(pred)
                    v_train.append(pred)

            except:
                pred_temp.extend(mu.moving_average(v_train, 3 - len(pred_temp), 3))
            mu.plotting(key, pred_temp, v_expected)
            if i == 2:
                predictions.extend(pred_temp)
            else:
                predictions.append(pred_temp[0])

            rmse_val.append(mu.calculate_rmse(key, v_expected, pred_temp))
            itr=itr-1


    else:
        try:
            for t in range(test_shape):
                model = ARMA(history, order = order)
                model_fit = model.fit(disp=0, transparams=False, trend='nc')
                yhat = model_fit.forecast()[0]
                inverted = list()
                for i in range(len(yhat)):
                    value = ut.inverse_difference(history, yhat[i], len(history) - i)
                    inverted.append(value)
                inverted = np.array(inverted)
                pred = inverted[-1]
                if pred < 0:
                    pred = mu.weighted_moving_average(history, 1, 3)
                predictions.append(pred)
                history.append(yhat)
        except:
            predictions.extend(mu.moving_average(history, test_shape - len(predictions), 3))

    predictions = [int(i) for i in predictions]
    return predictions,rmse_val
def model_Naive(key,train,test_shape,order,rept,train_flag = 0):
   forecast = []
   p=order[0]
   rmse_val=[]
   try:
       train = train.values
   except:
       train = train

   history = [np.asscalar(x) for x in train]
   if train_flag==1:
        itr=5
        data=pd.DataFrame(history)
        for i in range(3):
            pred_temp = []
            v_train=data[:-itr]
            v_train=v_train.values
            v_expected=data.tail(itr).head(3)

            for j in range(3):

                if key =='naive':
                    try:
                        t = v_train[-p]
                    except:
                        t=0

                    pred_temp.append(t)
                    v_train=np.append(v_train,t)
                elif key == 'naive_rept':
                    try:
                        t = v_train[-rept]
                    except:
                        t=0
                    pred_temp.append(t)
                    v_train=np.append(v_train,t)
                elif key == 'naive3':
                    try:
                        t = v_train[-3]
                    except:
                        t=0
                    pred_temp.append(t)
                    v_train=np.append(v_train,t)
                elif key == 'naive6':
                    try:
                        t = v_train[-6]
                    except:
                        t=0
                    pred_temp.append(t)
                    v_train=np.append(v_train,t)
                elif key == 'naive12':
                    try:
                        t=v_train[-12]
                    except:
                        t=0
                    pred_temp.append(t)
                    v_train=np.append(v_train,t)
                elif key == 'naive12wa':
                    try:
                        yt=v_train[-12]
                    except:
                        yt=0
                    try:
                        yt_1=v_train[-24]
                    except:
                        yt_1=0
                    t = ((0.55*yt)+(0.45*yt_1))
                    pred_temp.append(t)
                    v_train=np.append(v_train,t)
                elif key=='naive6wa':
                    try:
                        #naive of six
                        try:
                            naive_six=v_train[-6]
                        except:
                            naive_six=0
                        #weighted moving average
                        alpha=[0.25,0.35,0.4]
                        pred1 = v_train[-3:]
                        pred1=[np.asscalar(x) for x in pred1]
                        weighted_avg=np.dot(pred1,alpha)
                        #ensemble
                        t=(0.6*naive_six)+(0.4*weighted_avg)
                    except:
                        t=0
                    pred_temp.append(t)
                    v_train=np.append(v_train,t)


            rmse_val.append(mu.calculate_rmse(key, v_expected, pred_temp))

            if i == 2:
                forecast.extend(pred_temp)
            else:
                forecast.append(pred_temp[0])

            itr=itr-1
   else:
        for num in range(test_shape):
           if key =='naive':
               try:
                   t = history[-p]
                   forecast.append(t)
                   history.append(t)
               except:
                   pass
           elif key == 'naive2':
               try:
                   t = history[-rept]
                   forecast.append(t)
                   history.append(t)
               except:
                   pass
           elif key == 'naive3':
               try:
                   t = history[-3]
                   forecast.append(t)
                   history.append(t)
               except:
                   pass
           elif key == 'naive6':
               try:
                   t = history[-6]
                   forecast.append(t)
                   history.append(t)
               except:
                   pass
           elif key == 'naive12':
               try:
                   t = history[-12]
                   forecast.append(t)
                   history.append(t)
               except:
                   pass
           elif key == 'naive12wa':
               try:
                   yt=history[-12]
               except:
                   yt=0
               try:
                   yt_1=history[-24]
               except:
                   yt_1=0
               t = ((0.55*yt)+(0.45*yt_1))
               forecast.append(t)
               history.append(t)
           elif key=='naive6wa':
               #naive of six
               try:
                   naive_six=history[-6]
               except:
                   naive_six=0
                #weighted moving average
               alpha=[0.25,0.35,0.4]
               pred1 = history[-3:]
#               pred1=[np.asscalar(x) for x in pred1]
               weighted_avg=np.dot(pred1,alpha)
               #ensemble
               t=(0.6*naive_six)+(0.4*weighted_avg)
               forecast.append(t)
               history.append(t)

   forecast = [int(i) for i in forecast]
   return forecast,rmse_val
def model_ES(key, train, test_shape = 0, train_flag = 0, test = []):
    predictions = []
    rmse_val=[]

    try:
        train = train.values
    except:
        train = train
    history = [np.asscalar(x) for x in train]

#   TRAIN
    if train_flag==1:
        itr=5
        data=pd.DataFrame(history)
        for i in range(3):
            pred_temp = []
            v_train=[np.asscalar(x) for x in data[:-itr].values]
            v_expected=data.tail(itr).head(3).reset_index(drop = True)
            try:
                for t in range(3):
                    if key=='SES':
                        model = SimpleExpSmoothing(history)
                    elif key=='HWES':
                         model = ExponentialSmoothing(history)
                    model_fit = model.fit()
                    yhat= model_fit.predict(len(history), len(history))
                    if yhat < 0:
                        yhat= mu.weighted_moving_average(history,1,3)
                    yhat=yhat[0]
                    pred_temp.append(yhat)
                    v_train.append(yhat)
            except:
                pred_temp.extend(mu.moving_average(v_train, 3 - len(pred_temp), 3))
            mu.plotting(key, pred_temp, v_expected)
            rmse_val.append(mu.calculate_rmse(key, v_expected, pred_temp))

            if i == 2:
                predictions.extend(pred_temp)
            else:
                predictions.append(pred_temp[0])

            itr=itr-1
#   FORECAST
    else:
        try:
            for t in range(test_shape):
                if key=='SES':
                    model = SimpleExpSmoothing(history)
                elif key=='HWES':
                     model = ExponentialSmoothing(history)
                model_fit = model.fit()
                yhat= model_fit.predict(len(history), len(history))
                if yhat < 0:
                    yhat= mu.weighted_moving_average(history,1,3)
                yhat=yhat[0]
                predictions.append(yhat)
                history.append(yhat)
        except:
            predictions.extend(mu.moving_average(history, test_shape - len(predictions), 3))

    predictions = [int(i) for i in predictions]
    return predictions,rmse_val
Ejemplo n.º 8
0
def model_ML(dataset=[],
             tsize=0,
             test_shape=0,
             model=np.nan,
             key='',
             order=(0, 0, 0),
             train_flag=0):
    predictions = []
    pred_temp = []
    rmse_val = []
    parameter_values = {}
    scale_flag = 0
    if key == 'lr' or key == 'lasso' or key == 'ridge' or key == 'knn' or key == 'svmr':
        scale_flag = 1

    if train_flag == 1:
        itr = 5
        for i in range(3):
            expected = pd.DataFrame(dataset)
            expected = expected.tail(itr).head(3)
            expected = expected.reset_index(drop=True)

            train = dataset[:-itr]

            diff_values = ut.difference(train, order[1])

            if scale_flag == 1:
                scaler = ut.scaler_selection(key)
                diff_values = scaler.fit_transform(
                    pd.DataFrame(diff_values).values.reshape(-1, 1))

            supervised = ut.timeseries_to_supervised(train, order[0])
            data = supervised.values

            RF_model = mu.fit_model(data, model)

            pred_temp = []

            for j in range(test_shape):
                X, y = data[:, 0:-1], data[:, -1]
                yhat = mu.forecast_model(RF_model, X)

                #TODO: Inverse differencing and scaling

                #                if scale_flag==1:
                #                    yhat=scaler.inverse_transform(pd.DataFrame(yhat).values.reshape(-1,1))
                #                if order[1]!=0:
                #                    inverted = list()
                #                    for i in range(len(yhat)):
                #                        value = ut.inverse_difference(dataset, yhat[i], len(dataset) - i)
                #                        inverted.append(value)
                #                    inverted = np.array(inverted)
                #                    forecast=inverted[-1]
                #                else:
                #                    forecast = yhat[-1]
                forecast = yhat[-1]
                if forecast < 0:
                    forecast = mu.weighted_moving_average(dataset, 1, 3)[0]

                pred_temp.append(forecast)

                train = np.append(train, forecast)

                diff_train = ut.difference(train, order[1])

                if scale_flag == 1:
                    scaler = ut.scaler_selection(key)
                    diff_train = scaler.fit_transform(
                        pd.DataFrame(diff_train).values.reshape(-1, 1))

                supervised = ut.timeseries_to_supervised(train, order[0])
                data = supervised.values

            pred_temp = pred_temp[1:4]
            mu.plotting(key, pred_temp, expected)
            if i == 2:
                predictions.extend(pred_temp)
            else:
                predictions.append(pred_temp[0])

            rmse_val.append(mu.calculate_rmse(key, expected, pred_temp))
            itr = itr - 1

    else:

        dataset_1 = copy.deepcopy(dataset)
        diff_values = ut.difference(dataset_1, order[1])

        if scale_flag == 1:
            scaler = ut.scaler_selection(key)
            diff_values = scaler.fit_transform(
                pd.DataFrame(diff_values).values.reshape(-1, 1))

        supervised = ut.timeseries_to_supervised(diff_values, order[0])
        data = supervised.values

        RF_model = mu.fit_model(data, model)
        try:
            parameter_values = model.best_params_
        except:
            parameter_values = model.get_params()

        test_shape = test_shape + 2
        for i in range(test_shape):

            X, y = data[:, 0:-1], data[:, -1]

            yhat = mu.forecast_model(RF_model, X)
            #
            #            if scale_flag==1:
            #                yhat=scaler.inverse_transform(pd.DataFrame(yhat).values.reshape(-1,1))
            #            if order[1]!=0:
            #                inverted = list()
            #                for i in range(len(yhat)):
            #                    value = ut.inverse_difference(data, yhat[i], len(data) - i)
            #                    inverted.append(value)
            #                    inverted = np.array(inverted)
            #                forecast=inverted[-1]
            #            else:
            #                forecast = yhat[-1]
            forecast = yhat[-1]
            if forecast < 0:
                forecast = mu.weighted_moving_average(data, 1, 3)[0]

            predictions.append(forecast)
            dataset_1 = np.append(dataset_1, forecast)

            diff_values = ut.difference(dataset_1, order[1])

            if scale_flag == 1:
                scaler = ut.scaler_selection(key)
                diff_values = scaler.fit_transform(
                    pd.DataFrame(diff_values).values.reshape(-1, 1))

            supervised = ut.timeseries_to_supervised(diff_values, order[0])
            data = supervised.values
        predictions = predictions[2:test_shape]
    predictions = [int(i) for i in predictions]
    return predictions, rmse_val, parameter_values
Ejemplo n.º 9
0
def training(details_data, datasets, forecast_period):

    facc_out = dict()
    rsq = dict()
    price = details_data['price']
    price = [float(i) for i in price]
    price = pd.Series(price).fillna(0).tolist()

    sku_list = details_data['sku']
    market = details_data['market']
    plant = details_data['plant']
    spn = details_data['spn']
    abc_data = details_data['abc_data']

    #Profiling
    data_prof = profiling.profiling_tech(datasets)

    #Clustering based on nature
    data_cluster = Cluster.clustering(data_prof)

    total_price = np.sum(price)
    sku_price = dict()
    for i, sku in enumerate(sku_list):
        sku_price[str(sku)] = price[i]

    #Market Based Clustering
#    plant_cluster=Cluster.clustering_plant(sku_list,plant,market)

#XYZ based on unit cost
    xyz_data = xyz.xyz_class(sku_price, total_price)
    #ABC based on volume
    abc_alter = abc.abc_class(datasets)

    #    trained_outputs = []
    forecast_results = []
    num = 0
    for incr, sku in enumerate(datasets):
        num += 1
        #        if sku!='1702460700':
        ##        if num!=1:
        #            continue
        prof = data_prof.iloc[incr]
        print("------------------------------------------------------------")
        print("Running SKU %d: %s..." % (num, sku))
        print("cluster :  ", data_cluster[sku])

        raw_data = copy.deepcopy(datasets[sku].T)
        output = ut.init_output(forecast_period, raw_data, prof)
        output['unit_cost'] = float(price[incr])
        output['market'] = str(market[incr])
        output['plant'] = str(plant[incr])
        if pd.isnull(abc_data[incr]) == True:
            output['Variability_Segment'] = abc_alter[sku]
        else:
            output['Variability_Segment'] = abc_data[incr]
        output['Velocity_segment'] = xyz_data[sku]
        output['spn'] = spn[incr]

        dataset = raw_data.copy()

        dataset = dataset[:-1]
        #        dataset = pp.dateformat(dataset)
        #        dataset, interval = pp.impute_missing_dates(dataset)
        #        print(interval.days)

        if ((dataset['sales'] == 0).all() == True
                or (set([math.isnan(x) for x in dataset['sales']]) == {True})):
            #            print(dataset['sales'])
            print("All zeros/NaNs")
            forecast = [0] * forecast_period
            output['forecast_values'] = ut.assign_dates(
                forecast, 'forecast', dataset.tail(1))
            output['facc'], output['mape'], output[
                'bias'] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
            facc_out[sku] = np.mean(
                ft.calculate_validation_facc(forecast, forecast))
            forecast_results = ft.output_forecast(sku, dataset,
                                                  datasets[sku].T, output,
                                                  forecast_results)
            continue

        sku_data = dataset.astype(np.float32)
        sku_data = pp.read_from_first_sales(sku_data['sales'])

        #size--->outlier bucket size
        #sparse_size ---> number of zeros to categorize as sparse data
        #freq ---> seasonality
        interval = 30
        size = 6
        sparse_size = 10
        freq = 12
        #        size,sparse_size,freq=pp.get_bucket_size(interval)

        test_nan = pd.DataFrame(sku_data[-freq:])
        test_nan = test_nan['sales']

        #if last 1 year is NaN, impute data with zero and forecast is MA(6)

        if sum(test_nan.isnull()) >= freq:
            print("Last 1 year NaN")
            sku_data = pp.data_imputation_zero(test_nan)
            sku_data = sku_data[:-5]
            expected = [0] * 5
            forecast = mu.moving_average(sku_data, forecast_period, 6)
            output['forecast_values'] = ut.assign_dates(
                forecast, 'forecast', dataset.tail(1))
            output['facc'], output['mape'], output[
                'bias'] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
            facc_out[sku] = np.mean(
                ft.calculate_validation_facc(expected, forecast))
            forecast_results = ft.output_forecast(sku, dataset, sku_data,
                                                  output, forecast_results)
            continue

#if # NaNs more than 60% impute with 0 else impute with values

        if sum(pd.isnull(sku_data)) > (0.6 * len(sku_data)):
            print("Nan Greater than 60%")
            sku_data = pp.data_imputation_zero(sku_data)

        else:
            print("Nan less than 60%")
            sku_data = pp.data_imputation(sku_data, freq)
            sku_data = sku_data[0]

        sku_data = pp.read_from_first_sales(sku_data)

        #After reading from first non-zero if data is insufficient ---> weighted MA(3)

        if len(sku_data) < 20:
            try:
                print("Weighted Moving Average")
                forecast = mu.weighted_moving_average(sku_data,
                                                      forecast_period, 3)
                output['forecast_values'] = ut.assign_dates(
                    forecast, 'forecast', dataset.tail(1))
                output['facc'], output['mape'], output[
                    'bias'] = ft.calculate_forecast_accuracy(
                        raw_data.iloc[-1], forecast[0])
                facc_out[sku] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
                forecast_results = ft.output_forecast(sku, dataset, sku_data,
                                                      output, forecast_results)
            except:
                print("Less than 3")
                print(sku_data)
                forecast = mu.moving_average(sku_data, forecast_period,
                                             len(sku_data))
                output['forecast_values'] = ut.assign_dates(
                    forecast, 'forecast', dataset.tail(1))
                output['facc'], output['mape'], output[
                    'bias'] = ft.calculate_forecast_accuracy(
                        raw_data.iloc[-1], forecast[0])
                facc_out[sku] = ft.calculate_forecast_accuracy(
                    raw_data.iloc[-1], forecast[0])
                forecast_results = ft.output_forecast(sku, dataset, sku_data,
                                                      output, forecast_results)

            continue

        data_copy = sku_data.copy()
        data_copy = np.array(data_copy)
        #        plt.figure()
        #        plt.plot(data_copy)

        index1, index2, sflag1, sflag2 = pp.Sesonal_detection(sku_data)
        sku_data = pp.outlier_treatment_tech(sku_data, interval, size)
        sku_data = np.array(sku_data[0])

        if sflag1 == 1:
            sku_data[index1] = data_copy[index1]
        if sflag2 == 1:
            sku_data[index2] = data_copy[index2]
        else:
            sku_data = sku_data
#        plt.plot(sku_data)
#        plt.show()
#        continue
        sku_data = pd.DataFrame(sku_data)

        #Testing Stationarity
        d = 0
        df_test_result = tests.dickeyfullertest(
            sku_data.T.squeeze())  #pd.Series(sku_data[0])

        while df_test_result == 0:
            d += 1
            if d == 1:
                new_data = ut.difference(sku_data[0].tolist())
            else:
                new_data = ut.difference(new_data)
            df_test_result = tests.dickeyfullertest(new_data)

        sample = np.array(sku_data)
        repeat = mu.check_repetition(sample, freq, 1, len(sample))
        #Finding p and q value
        try:
            if d == 0:
                p1, ps, pl = plots.acf_plot(sku_data, freq)
                q = plots.pacf_plot(sku_data, freq)
                data = sku_data
            else:

                p, ps, pl = plots.acf_plot(new_data, freq)
                q = plots.pacf_plot(new_data, freq)
                data = new_data

            if repeat in ps:
                p = repeat
            elif repeat in pl:
                p = repeat
            else:
                p = pl[0]
            if p > freq:
                p = freq
        except:
            p = 1
            q = 1
            data = sku_data

        data = sku_data
        best_order = (p, d, q)
        print("BEST ORDER :", best_order)
        #TODO: Calculate tsize
        tsize = 5
        #        tsize = int(0.2*len(data))
        #        print(test)
        expected = data[-tsize:].reset_index(drop=True)
        expected = [float(i) for i in expected.values]
        #        print("Dimension: ", data.shape)
        train_6wa = sku_data[0:-tsize]
        predictions_ML, rmse_ML = train.time_series_using_ml(
            sku_data, tsize, best_order, data_cluster[sku])
        rmse_ARIMA, rmse_ES, rmse_naive, rmse_ma, predictions_ARIMA, predictions_ES, predictions_naive, predictions_ma = train.time_series_models(
            freq, sku_data, data, tsize, best_order, data_cluster[sku])
        print("Modeling done")

        rmse_TS = rmse_ARIMA.copy()
        rmse_TS.update(rmse_ES)
        rmse_TS.update(rmse_naive)
        rmse_TS.update(rmse_ma)

        predictions = predictions_ML
        predictions.update(predictions_ARIMA)
        predictions.update(predictions_ES)
        predictions.update(predictions_naive)
        predictions.update(predictions_ma)

        if data_cluster[sku] in [1, 4, 7, 10, 13, 16, 19, 22, 25]:
            rmse_Croston, predictions_Croston = mu.Croston_TSB(sku_data, tsize)
            rmse_TS.update(rmse_Croston)
            predictions.update(predictions_Croston)

        rmse_vol_ml = dict()
        for key in rmse_ML:
            std = np.std(rmse_ML[key])
            mean = np.mean(rmse_ML[key])
            rmse_vol_ml[key] = mean
#            if std == 0:
#                rmse_vol_ml[key]= mean
#            else:
#                rmse_vol_ml[key] = mean/std

        rmse_vol_ts = dict()
        for key in rmse_TS:
            mean = np.mean(rmse_TS[key])
            std = np.std(rmse_TS[key])
            rmse_vol_ts[key] = mean
#            if std == 0:
#                rmse_vol_ts[key] = mean
#            else:
#                rmse_vol_ts[key]= mean/std

#Top 3 models
        best_models_ml = sorted(rmse_vol_ml,
                                key=rmse_vol_ml.get,
                                reverse=False)[:3]
        best_models_ts = sorted(rmse_vol_ts,
                                key=rmse_vol_ts.get,
                                reverse=False)[:3]

        #        forecasts_ml = dict()
        #        validation_ml = dict()
        bias_ml = []
        accuracy_ml = []
        for model in best_models_ml:
            #            temp = ft.model_predict(model, best_order,data, forecast_period)
            #            forecasts_ml[model] = [0 if i < 0 else int(i) for i in temp]
            #            validation_ml[model] = predictions[model]
            bias_ml.append(
                (sum(expected) - sum(predictions[model])) / len(expected))
            accuracy_ml.append(mu.calculate_facc(expected, predictions[model]))
        bias_ml = [float(format(i, '.3f')) for i in bias_ml]
        accuracy_ml = [float(format(i, '.3f')) for i in accuracy_ml]

        #        forecasts_ts = dict()
        #        validation_ts = dict()
        bias_ts = []
        accuracy_ts = []
        for model in best_models_ts:
            #            temp = ft.model_predict(model, best_order, sku_data, forecast_period,repeat)
            #            forecasts_ts[model] = [0 if i < 0 else int(i) for i in temp]
            #            validation_ts[model] = predictions[model]
            bias_ts.append(
                (sum(expected) - sum(predictions[model])) / len(expected))
            accuracy_ts.append(mu.calculate_facc(expected, predictions[model]))
        bias_ts = [float(format(i, '.3f')) for i in bias_ts]
        accuracy_ts = [float(format(i, '.3f')) for i in accuracy_ts]

        #For one ensemble
        error_ml = min(rmse_vol_ml.values())
        error_ts = min(rmse_vol_ts.values())

        best_models = [
            min(rmse_vol_ml, key=lambda x: rmse_vol_ml.get(x)),
            min(rmse_vol_ts, key=lambda x: rmse_vol_ts.get(x))
        ]
        print("BEST MODELS :", best_models)
        print("ERRORS OF BEST MODELS :", error_ml, error_ts)
        forecast_ml, param_val_fore = ft.model_predict(best_models[0],
                                                       best_order, data,
                                                       forecast_period)

        if best_models[1] == 'Croston':
            rmse_Croston, forecast_ts = mu.Croston_TSB(sku_data,
                                                       forecast_period)
            forecast_ts = forecast_ts['Croston']
        else:
            forecast_ts, param_val = ft.model_predict(best_models[1],
                                                      best_order, sku_data,
                                                      forecast_period, repeat)

        forecast_ml = [0 if i < 0 else int(i) for i in forecast_ml]
        forecast_ts = [0 if i < 0 else int(i) for i in forecast_ts]

        weight_ts, weight_ml = ut.weight_calculation(data, best_models,
                                                     best_order)
        print("weight ts:", weight_ts)
        print("weight ml:", weight_ml)

        Vm = predictions[best_models[0]]
        Vt = predictions[best_models[1]]

        Ve = ut.method_ensemble(Vm, Vt, weight_ml, weight_ts, tsize)
        error_en = mu.calculate_rmse('Ensemble', expected, Ve)

        bias_en = []
        accuracy_en = []

        bias_en.append((sum(expected) - sum(Ve)) / len(expected))
        accuracy_en.append(mu.calculate_facc(expected, Ve))
        bias_en = [float(format(i, '.3f')) for i in bias_en]
        accuracy_en = [float(format(i, '.3f')) for i in accuracy_en]
        #Ensemble of six month naive and weighted average
        V6wa, rmse_6wa = ts.model_Naive('naive6wa',
                                        train_6wa,
                                        tsize, (0, 0, 0),
                                        0,
                                        train_flag=1)
        error_6wa = np.mean(rmse_6wa)
        forecast_6wa, param_val = ft.model_predict('naive6wa', best_order,
                                                   data, forecast_period)

        forecast_en = ut.method_ensemble(forecast_ml, forecast_ts, weight_ml,
                                         weight_ts, forecast_period)

        output['forecast_period'] = forecast_period
        output['interval'] = 'M'
        output['best_models_ml'] = best_models_ml
        output['best_models_ts'] = best_models_ts
        output['bias_ml'] = bias_ml
        output['bias_ts'] = bias_ts
        output['bias_en'] = bias_en
        output['accuracy_ml'] = accuracy_ml
        output['accuracy_ts'] = accuracy_ts
        output['accuracy_en'] = accuracy_en
        output['TS'] = op.best_model_details_ts(best_models[1], bias_ts[0],
                                                accuracy_ts[0], best_order)
        output['ML'] = op.best_model_details_ml(best_models[0], bias_ml[0],
                                                accuracy_ml[0], param_val_fore)
        output['Ensemble'] = {"bias": bias_en[0], "accuracy": accuracy_en[0]}

        error_min_model = min(error_ml, error_ts, error_en)

        print("Errors:", )
        print("ML:", error_ml)
        print("TS:", error_ts)
        print("Ensemble:", error_en)
        print("six_naive_WA", error_6wa)

        min_error = min(error_min_model, error_6wa)

        if min_error == error_ml:
            ftt = forecast_ml
        elif min_error == error_ts:
            ftt = forecast_ts
        elif min_error == error_en:
            ftt = forecast_en
        else:
            ftt = []

        if min_error == error_6wa or all(elem == ftt[0]
                                         for elem in ftt) == True:
            print("Best forecast from six naive")
            forecast = forecast_6wa
            output['validation'] = ut.assign_dates(V6wa, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, V6wa)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
        elif min_error == error_ml:
            print("Best forecast from ML")
            forecast = forecast_ml
            output['validation'] = ut.assign_dates(Vm, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, Vm)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
        elif min_error == error_en:
            print("Best forecast from Ensemble")
            forecast = forecast_en
            output['validation'] = ut.assign_dates(Ve, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, Ve)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
        elif min_error == error_ts:
            print("Best forecast from TS")
            forecast = forecast_ts
            output['validation'] = ut.assign_dates(Vt, 'validation',
                                                   dataset.tail(5))
            validation_facc = ft.calculate_validation_facc(expected, Vt)
            output['validation_facc'] = ut.assign_dates(
                validation_facc, 'val_facc', dataset.tail(5))
#
        print("Forecasts:")
        print("ML:", forecast_ml)
        print("TS:", forecast_ts)
        print("Ensemble:", forecast_en)
        print("Best Forecast", forecast)

        output['forecast_values'] = ut.assign_dates(forecast, 'forecast',
                                                    dataset.tail(1))
        output['facc'], output['mape'], output[
            'bias'] = ft.calculate_forecast_accuracy(raw_data.iloc[-1].sales,
                                                     forecast[0])
        facc_out[sku] = np.mean(validation_facc)

        output['forecast_ml'] = ut.assign_dates(forecast_ml, 'forecast',
                                                dataset.tail(1))
        output['forecast_ts'] = ut.assign_dates(forecast_ts, 'forecast',
                                                dataset.tail(1))
        output['forecast_en'] = ut.assign_dates(forecast_en, 'forecast',
                                                dataset.tail(1))
        output['model_ml'] = best_models[0]
        output['model_ts'] = best_models[1]

        forecast_results = ft.output_forecast(sku, dataset, sku_data, output,
                                              forecast_results)

        ft.plot_all_forecasts(dataset, sku_data, forecast, forecast_en,
                              forecast_ml, forecast_ts, sku)

    return forecast_results, facc_out