Example #1
0
def ar_coefficient(x, param):
    """
    This feature calculator fits the unconditional maximum likelihood
    of an autoregressive AR(k) process.
    The k parameter is the maximum lag of the process

    .. math::

        X_{t}=\\varphi_0 +\\sum _{{i=1}}^{k}\\varphi_{i}X_{{t-i}}+\\varepsilon_{t}

    For the configurations from param which should contain the maxlag "k" and such an AR process is calculated. Then
    the coefficients :math:`\\varphi_{i}` whose index :math:`i` contained from "coeff" are returned.

    :param x: the time series to calculate the feature of
    :type x: numpy.ndarray
    :param param: contains dictionaries {"coeff": x, "k": y} with x,y int
    :type param: list
    :return x: the different feature values
    :return type: pandas.Series
    """
    calculated_ar_params = {}

    x_as_list = list(x)
    calculated_AR = AR(x_as_list)

    res = {}

    k = param["k"]
    p = param["coeff"]

    column_name = "k_{}__coeff_{}".format(k, p)

    if k not in calculated_ar_params:
        try:
            calculated_ar_params[k] = calculated_AR.fit(maxlag=k,
                                                        solver="mle").params
        except (np.linalg.LinAlgError, ValueError):
            calculated_ar_params[k] = [np.NaN] * k

    mod = calculated_ar_params[k]

    if p <= k:
        try:
            res[column_name] = mod[p]
        except IndexError:
            res[column_name] = 0
    else:
        res[column_name] = np.NaN

    return [value for key, value in res.items()][0]
Example #2
0
def spectrum0_ar(x):
    z = np.arange(1, len(x) + 1)
    z = z[:, np.newaxis]**[0, 1]
    p, res, rnk, s = lstsq(z, x)
    residuals = x - np.matmul(z, p)

    if residuals.std() == 0:
        spec = order = 0
    else:
        ar_out = AR(x).fit(ic='aic', trend='c')
        order = ar_out.k_ar
        spec = np.var(ar_out.resid) / (1 - np.sum(ar_out.params[1:]))**2

    return spec, order
Example #3
0
            def _feature_2_3(ts=_ts):
                """
                The relationship between the order of the AR model and its goodness of fit.

                It is interpreted in the following way: for models AR(1), AR(2), ... , AR(6) calculates mean residual
                and calculate linear regression versus [1, 2, ..., 6]. The result is coefficient for the regression.
                """
                ar_results = np.empty(6)
                orders = np.array(range(1, 7))
                for i in range(6):
                    ar_results = AR(ts).fit(maxlag=i).resid.mean()
                slope, _intercept = stats.linregress(orders, ar_results)

                return slope
Example #4
0
    def test_mle(self):
        # check predict with no constant, #3945
        res1 = self.res1
        endog = res1.model.endog
        res0 = AR(endog).fit(maxlag=9, method='mle', trend='nc', disp=0)
        assert_allclose(res0.fittedvalues[-10:],
                        res0.fittedvalues[-10:],
                        rtol=0.015)

        res_arma = ARMA(endog, (9, 0)).fit(method='mle', trend='nc', disp=0)
        assert_allclose(res0.params, res_arma.params, atol=5e-6)
        assert_allclose(res0.fittedvalues[-10:],
                        res_arma.fittedvalues[-10:],
                        rtol=1e-4)
Example #5
0
 def predict(self, data, start_idx, end_idx):
     if len(data.columns) > 1:
         self.model = VAR(data)
         result = self.model.fit(self.opt_p)
         y_pred = self.model.predict(result.params,
                                     start=start_idx,
                                     end=end_idx,
                                     lags=self.opt_p)
         return pd.DataFrame(data=y_pred, columns=data.columns.values)
     else:
         self.model = AR(data)
         self.model = self.model.fit(self.opt_p)
         y_pred = self.model.predict(start=start_idx, end=end_idx)
         return pd.DataFrame(data=y_pred, columns=data.columns.values)
    def process_mag_signal(self, data):
        data = self.split_data_to_windows(data)
        rez = self.process_1d_signal(data)
        x_sma = np.sum(data) / len(data) * self._freq
        np.insert(rez, 5, x_sma)

        # insert AR 4 coef
        x_ar = []
        for i in range(0, data.shape[0]):
            ar_mod = AR(data[i])
            x_ar.append(ar_mod.fit(3).params)

        np.append(rez, x_ar)
        return rez
Example #7
0
def predict_finals_week(data):
    dfs = []

    dfs.append(
        pd.DataFrame({
            "date": [
                "2017-06-12", "2017-06-13", "2017-06-14", "2017-06-15",
                "2017-06-16", "2017-06-17", "2017-06-18", "2017-06-19"
            ]
        }))

    for ndx in data.columns:
        raw_data = {}

        # split into train and test sets
        X = data[ndx].values

        try:
            # train autoregression
            model = AR(X)
            model_fit = model.fit()
            window = model_fit.k_ar
            coef = model_fit.params

            raw_data[ndx] = []

            # make predictions
            history = X[len(X) - window:]
            history = [history[i] for i in range(len(history))]
            predictions = list()

            for t in range(8):
                length = len(history)
                lag = [history[i] for i in range(length - window, length)]
                yhat = coef[0]

                for d in range(window):
                    yhat += coef[d + 1] * lag[window - d - 1]

                predictions.append(yhat)
                history.append(yhat)

            for prediction in predictions:
                raw_data[ndx].append(prediction)

            dfs.append(pd.DataFrame(raw_data))
        except:
            continue

    return pd.concat(dfs, axis=1)
Example #8
0
def choose(drop_type, startdate, enddate, pvalue, dvalue, qvalue, points):
    # print(drop_type)
    # print('----------------------------')
    # print(startdate)
    # print(type(startdate))
    # print('----------------------------')
    # print(enddate)
    # print(type(enddate))
    slicedf = df.loc[startdate : enddate]
    # print('----------------------------')
    # print(len(slicedf.index))
    test_slice = df.loc[enddate:]
    endtime = list(pd.date_range(slicedf.index[-1], periods=points, freq='H'))[-1]
    x = [0,1,2,3,4]
    trace1 = None
    trace2 = None
    trace3 = None
    if (drop_type == 'AR'):
        m = AR(slicedf['TotalVolume'])
        mnolag = m.fit(method='mle', ic='aic')
        preds = mnolag.predict(start=slicedf.index[-11],end=endtime, dynamic=False).rename('AR PREDICTIONS')
        trace1 = makeTrace(preds.index, preds.values, 'Predicitons')
        trace2 = makeTrace(test_slice.index, test_slice.TotalVolume, 'Testing Data')
        half = (slicedf.shape[0] // 100)
        trace3 = makeTrace(slicedf.index[half:], slicedf['TotalVolume'].values[half:], 'Selected Data')
    elif (drop_type == 'ARIMA'):
        m = ARIMA(slicedf['TotalVolume'], order=(pvalue, dvalue, qvalue))
        mfit = m.fit(method='mle')
        preds = mfit.predict(start=test_slice.index[0], end=endtime, dynamic=False)
        trace1 = makeTrace(preds.index, preds.values, 'Predictions')
        trace2 = makeTrace(test_slice.index, test_slice.TotalVolume, 'Testing Data')
        half = (slicedf.shape[0] // 400)
        trace3 = makeTrace(slicedf.index[points // 2:], slicedf['TotalVolume'].values[points // 2:], 'Selected Data')
    elif (drop_type == 'SARIMAX'):
        trace1 = makeTrace(x, [5,5,5,5,5], '5', 'Predictions')
        trace2 = makeTrace(x, [6,6,6,6,6], '6', 'Testing Data')
        half = (slicedf.shape[0] // 100)
        trace3 = makeTrace(slicedf.index[half:], slicedf['TotalVolume'].values[half:], 'Selected Data')
    else:
        trace1 = makeTrace(x, [7,7,7,7,7], '7', 'Predictions')
        trace2 = makeTrace(x, [8,8,8,8,8], '8', 'Testing Data')
        half = (slicedf.shape[0] // 100)
        trace3 = makeTrace(slicedf.index[half:], slicedf['TotalVolume'].values[half:], 'Selected Data')
    return {
            'data' : [trace1, trace2, trace3],
            'type' : 'scatter',
            'name' : drop_type,
            'layout' : go.Layout(title=drop_type, barmode='stack')
            }
Example #9
0
    def do_forecast_ar_model(self, today, train, test):
        # train autoregression
        model_fit = AR(train.fillna(0)).fit()
        logging.info("Fitted AR...")

        AResults = model_fit.predict(start=len(train),
                                     end=len(train) + len(test) - 1)
        logging.info("Predicted AR")

        mse = self.utils_cl.compute_mse(test, AResults)
        mae = self.utils_cl.compute_mae(test, AResults)
        mase = self.utils_cl.compute_mase(today, test, AResults)

        logging.info("Exit do_forecast_ar_model")
        return AResults, mse, mae, mase
Example #10
0
def EffectiveSize(df):
    nn, mm = df.shape
    df.columns = ["0"] * mm
    v0 = []
    ESS = []
    for jj in range(mm):
        xx = df.iloc[:, jj]
        xx_mod = AR(xx)
        xx_res = xx_mod.fit(maxlag=100, ic='aic')
        v0.append(xx_res.sigma2 / (1.0 - sum(xx_res.params))**2)
    for jj in range(mm):
        xx = df.iloc[:, jj]
        ess = xx.std()**2 / v0[jj] * nn
        ESS.append(ess)
    return (ESS)
Example #11
0
def calc_prediction(train, test):
    # train autoregression
    model = AR(train)
    model_fit = model.fit()
    print('Lag: %s' % model_fit.k_ar)
    print('Coefficients: %s' % model_fit.params)
    # make predictions
    predictions = model_fit.predict(start=len(train),
                                    end=len(train) + len(test) - 1,
                                    dynamic=False)
    for i in range(len(predictions)):
        print('predicted=%f, expected=%f' % (predictions[i], test[i]))
    error = mean_squared_error(test, predictions)
    print('Test MSE: %.3f' % error)
    return predictions[0], test[0]
Example #12
0
 def ar(self, thpt_list):
     if (len(thpt_list) < 4):
         return 0.
     if (len(thpt_list) > 15):
         return self.find_average_thpt(thpt_list)
     tmp = [0] + thpt_list[:-1]
     model = AR(tmp)
     start_params = [0, 0, 1]
     model_fit = model.fit(maxlag=1, start_params=start_params, disp=-1)
     predicted_last = model_fit.predict(len(tmp), len(tmp))[0]
     last_pt = thpt_list[-1]
     diff = abs(predicted_last - last_pt) / last_pt
     if ((last_pt != 0.) and diff < 0.1):
         return predicted_last
     return 0.
Example #13
0
 def generate_AR_para(self, rawwave, filtered=False, wavt=False, AR_order=10):
     signal = rawwave
                 
     '''
     W = fftfreq(signal.size, d= 1 / 512)
     psd = rfft(signal) #discrete Fourier transform of a real sequence
     filtered_psd = psd.copy()
     filtered_psd[(W<30)] = 0
     filtered_signal = irfft(filtered_psd)
     '''
     if filtered == True:
         if wavt == False:
             filtered_signal, _, _ = self.selective_freq_range(signal, high_freq=30, low_freq=1.5)
             ARModel = AR(filtered_signal)
         else:
             filtered_signal, _ = self.wavelet_transform(signal)
             ARModel = AR(filtered_signal)
     else:
         ARModel = AR(signal)
     
     #ARModel_fit = ARModel.fit()
     ARModel_fit = ARModel.fit(maxlag=AR_order)
     
     return ARModel_fit.params
Example #14
0
def AR_Forecast() -> None:
    df = pd.read_csv("./uspopulation.csv", parse_dates=True, index_col='DATE')
    df.asfreq('MS')
    train = df.iloc[:84]
    test = df.iloc[84:]
    model = AR(train['PopEst'])
    #ic is very important parameter for fitting AR model.
    # One has to choose the value that best fits the model
    ARFit = model.fit(ic='t-stat')
    print(ARFit.params)
    start = len(train)
    end = len(train) + len(test) - 1
    predictions = ARFit.predict(start=start, end=end)
    test.plot(legend=True, label='Test')
    predictions.plot(legend=True, label='Predictions', figsize=(12, 8))
    def test_mle(self):
        # check predict with no constant, #3945
        res1 = self.res1
        endog = res1.model.endog
        with pytest.warns(FutureWarning):
            res0 = AR(endog).fit(maxlag=9, method="mle", trend="nc", disp=0)
        assert_allclose(res0.fittedvalues[-10:],
                        res0.fittedvalues[-10:],
                        rtol=0.015)

        res_arma = ARIMA(endog, order=(9, 0, 0), trend="n").fit()
        assert_allclose(res0.params, res_arma.params[:-1], rtol=1e-2)
        assert_allclose(res0.fittedvalues[-10:],
                        res_arma.fittedvalues[-10:],
                        rtol=1e-4)
def OU_fitting(series):

    # series: pd.Series, indexed by date

    # return the fitted OU process model params.

    ar_model = AR(endog=series).fit(maxlag=1)
    [b, a] = ar_model.params.tolist()
    resid_std = np.std(ar_model.resid)

    lam = -np.log(a)
    mu = b / (1 - a)
    sigma = resid_std * np.sqrt(-2 * np.log(a) / (1 - a * a))

    res = {'ar_model': ar_model, 'lam': lam, 'mu': mu, 'sigma': sigma}
    return (res)
Example #17
0
def autoRegression():
    col_daily = db['daily']
    dailyGrossSet = []
    for record in col_daily.find({"Date": "Dec. 28"}):
        year = record['Year']
        movieNumber = record['MoviesTracked']
        gross = record['Gross($)'].replace(",", "")
        dailyGrossSet.append(int(gross) / int(movieNumber))
    del dailyGrossSet[len(dailyGrossSet) - 1]
    print(dailyGrossSet)
    # fit model
    model = AR(dailyGrossSet)
    model_fit = model.fit()
    # make prediction
    res = model_fit.predict(len(dailyGrossSet), len(dailyGrossSet))
    print(res)
Example #18
0
 def transform(self, X):
     """
     Detect and remove dropped.
     """
     out = []
     for x in X:
         tmp = []
         for a in x:
             ar_mod = AR(a[::self.subsample])
             ar_res = ar_mod.fit(self.order)
             bse = ar_res.bse
             if len(bse)!=(self.order + 1):
                 bse = np.array([np.nan] * (self.order + 1))
             tmp.append(bse)
         out.append(tmp)
     return np.array(out)
def fitOU(residual, training_size):
    dt = 1
    ou = np.cumsum(residual)
    model = AR(ou)
    fittedmodel = model.fit(maxlag=1, disp=-1)  
    a = fittedmodel.params[0]
    b = fittedmodel.params[1]
    var =  fittedmodel.sigma2
    if b > 0.0 and b < np.exp(-2.0/training_size):
        kappa = -np.log(b) / dt    
        m = a / (1.0 - np.exp(-kappa * dt))
        sigma = np.sqrt(var * 2.0 * kappa / (1.0 - np.exp(-2.0 * kappa * dt)))
        sigmaeq = np.sqrt(var / (1.0 - np.exp(-2.0 * kappa * dt)));
        return kappa, m, sigma, sigmaeq
    else:
        return -1.0,0,0,0
Example #20
0
def ar(data, gap=0, predtill=1):
    assert predtill - 1 <= gap
    true = data[:, -predtill:, :]
    pred = []
    for i in range(data.shape[2]):
        arm = AR(data[0, :-gap - 1, i])
        fitted = arm.fit()
        # print("Lag", fitted.k_ar)
        # print("Coefficients", fitted.params)
        pred.append(
            fitted.predict(start=data.shape[1] - gap - 1,
                           end=data.shape[1] - 1)[gap - predtill:gap])
    pred = np.expand_dims(np.array(pred).T, axis=0)
    mae = np.mean(np.abs(pred - true))
    mape = np.mean(np.abs(pred - true) / true) * 100
    return mae, mape, pred
Example #21
0
def ARmodel(shareFeature_data):

    dataSize = shareFeature_data.size
    #splitting of training and testing data
    trainSize = int(dataSize * 70 / 100 + 1)
    testSize = int(dataSize * 30 / 100)
    train = shareFeature_data[0:trainSize]
    test = shareFeature_data[trainSize + 1:]
    predictions = []

    #the model fitting and forcasting
    model_ar = AR(shareFeature_data)
    model_ar_fit = model_ar.fit()
    predictions = model_ar_fit.predict(start=trainSize, end=dataSize)

    return predictions
Example #22
0
def ar_coefficient(x, c, param):
    """
    This feature calculator fit the unconditional maximum likelihood of an autoregressive AR(k) process. The k parameter
    is the maximum lag of the process

    .. math::

        X_{t}=\\varphi_0 +\\sum _{{i=1}}^{k}\\varphi_{i}X_{{t-i}}+\\varepsilon_{t}

    For the configurations from param which should contain the maxlag "k" and such an AR process is calculated. Then
    the coefficients :math:`\\varphi_{i}` whose index :math:`i` contained from "coeff" are returned.

    :param x: the time series to calculate the feature of
    :type x: pandas.Series
    :param c: the time series name
    :type c: str
    :param param: contains dictionaries {"coeff": x, "k": y} with x,y int
    :type param: list
    :return x: the different feature values
    :return type: pandas.Series
    """
    df_cfg = pd.DataFrame(param)
    df_cfg["k"] = df_cfg["k"].apply(int)

    res = pd.Series()

    for k in df_cfg["k"].unique():
        coeff = df_cfg[df_cfg["k"] == k]["coeff"]
        try:
            mod = AR(list(x)).fit(maxlag=k, solver="mle").params
            res_tmp = pd.Series(index=["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p) for p in coeff])

            for p in coeff:
                if p <= k:
                    try:
                        res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = mod[p]
                    except IndexError:
                        res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = 0
                else:
                    res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = np.NaN

        except (LinAlgError, ValueError):
            res_tmp = pd.Series([np.NaN] * len(coeff),
                                index=["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p) for p in coeff])

        res = res.append(res_tmp)
    return res
Example #23
0
def predict_AR(array, p=1):
    """第一种方法: 在给定滚动周期下利用AR(P)模型预测

    输入:
        df:DataFrame, 波动率原始数据
        window: 整数滚动周期
        p: int, lag of AR model
    输出:
        vols_pred: 时间序列, 预测波动率
    """

    #fit = lambda x: AR(x).fit(maxlag=p, disp=0).predict(start=x.size, end=x.size)
    #vols_pred = df[VOL_NAME].rolling(window).apply(fit)
    vols_pred = AR(array).fit(maxlag=p, disp=0).predict(start=array.size,
                                                        end=array.size,
                                                        dynamic=True)
    return vols_pred
Example #24
0
    def update(self):
        begin = max(0, self.index - self.window)
        data = self.arrivals[begin:self.index]
        # fit model
        model = AR(data)
        model_fit = model.fit()
        self.model = model_fit
        # make prediction
        self.prediction = model_fit.predict(len(data), len(data))[0]

        minVal = min(self.predictedArrivals[-self.windowArrival:])
        maxVal = max(self.predictedArrivals[-self.windowArrival:])

        if self.prediction < minVal or self.prediction > maxVal:
            model = SimpleExpSmoothing(data)
            model_fit = model.fit()
            self.prediction = model_fit.predict(len(data), len(data))[0]
Example #25
0
def predict_AR(df, window=ROLLING_WINDOW, p=1):
    """第一种方法: 在给定滚动周期下利用AR(P)模型预测

    输入:
        df:DataFrame, 波动率原始数据
        window: 整数滚动周期
        p: int, lag of AR model
    输出:
        vols_pred: 时间序列, 预测波动率
    """

    fit = lambda x: AR(x).fit(maxlag=p, disp=0).predict(start=x.size,
                                                        end=x.size)
    vols_pred = df[VOL_NAME].rolling(window).apply(fit)
    vols_pred.name = 'AR' + '_' + repr(window) + '_' + repr(p)
    print(vols_pred.name + " prediction finished.")
    return vols_pred
Example #26
0
def autocorr():
    import pandas.tools.plotting as ptp
    from statsmodels.graphics.tsaplots import plot_acf
    from statsmodels.tsa.ar_model import AR

    qdl = Quandl()
    start, end = "2017-01-01", "2018-01-01"
    es = qdl.get_data("ES", start=start, end=end)
    print(es.head())

    xs = es['Settle']
    print(type(xs.index))

    ptp.lag_plot(xs)
    #plt.show()

    ptp.autocorrelation_plot(xs)
    #plt.show()

    plot_acf(xs, lags=7)
    #plt.show()

    train, test = xs[1:len(xs) - 7], xs[len(xs) - 7:]

    model = AR(train, dates=xs.index)
    ar_fit = model.fit()

    print('Lag: %s' % ar_fit.k_ar)
    print('Coefficients: %s' % ar_fit.params)

    #TODO fix error 'unknown string format'
    ar_predicts = ar_fit.predict(start=train[0],
                                 end=train[len(train) - 1],
                                 dynamic=False)

    for x in range(len(ar_predicts)):
        print('predicted: %f vs. expected: %f' % (ar_predicts[x], test[x]))

    print(len(test), len(ar_predicts))

    error = mean_squared_error(test, ar_predicts)
    print('Test MSE: %.3f' % error)

    plt.plot(test)
    plt.show(ar_predicts, color='red')
    plt.show()
Example #27
0
def ar2(week):
    col_weekly = db['weekly']
    weeklyGrossSet = []
    for record in col_weekly.find({"Year": "2018"}):
        wk = record['Week#']
        if int(wk) >= week:
            break
        og = record['OverallGross($)'].replace(",", "")
        tm = record['TotalMovies']
        weeklyGrossSet.append(int(og) / int(tm))
    print(weeklyGrossSet)
    # fit model
    model = AR(weeklyGrossSet)
    model_fit = model.fit()
    # make prediction
    res = model_fit.predict(len(weeklyGrossSet), len(weeklyGrossSet))
    print(res)
Example #28
0
def AutoRegression(train, test):
    model = AR(train)
    model_fit = model.fit()
    print('Lag:', model_fit.k_ar)
    print('Coefficients: %s' % model_fit.params)
    predictions = model_fit.predict(start=len(train),
                                    end=len(train) + len(test) - 1,
                                    dynamic=False)

    error = RMSE(test, predictions)
    plt.plot(test, 'lightblue')
    plt.ylabel("InBandwidth")
    plt.plot(predictions, 'r')
    plt.legend(["Original", "Predicted"])
    plt.savefig("AutoRegressionGraph.jpg")
    plt.show()
    print("RMSE : ", error)
def ar_model(time_series_raw,
             time_lag=10,
             max_lag=1,
             y_label='',
             name=None,
             title="AR Model"):
    train_length = len(time_series_raw['Value']) - time_lag
    y_hat = pd.DataFrame([], columns=['Value'])
    for train_index in range(0, train_length):
        train, test = time_series_raw['Value'].iloc[
            train_index:train_index +
            time_lag], time_series_raw['Value'].iloc[train_index + time_lag]
        start_date_train = time_series_raw['Date'].iloc[train_index]
        end_date_train = time_series_raw['Date'].iloc[train_index + time_lag -
                                                      1]
        predict_test = time_series_raw['Date'].iloc[train_index + time_lag]
        model = AR(train,
                   dates=pd.date_range(start=start_date_train,
                                       end=end_date_train,
                                       freq='M'))
        model_fit = model.fit(maxlag=max_lag)
        predictions = model_fit.predict(start=predict_test,
                                        end=predict_test,
                                        dynamic=True)
        predictions = pd.DataFrame(
            predictions[0],
            columns=['Value'],
            index=pd.DatetimeIndex(data=predictions.index.date))
        y_hat = y_hat.append(predictions)
    # Drop the first time_lag+1 rows
    time_series_raw = time_series_raw[time_lag:]
    # MSE
    diff_score = time_series_raw['Value'].subtract(y_hat['Value'], axis=0)
    diff_score = diff_score.dropna()**2
    mse = diff_score.sum()
    print("MSE: {}".format(mse))
    plt.plot(time_series_raw.index,
             time_series_raw['Value'],
             label='Real Values')
    plt.plot(y_hat.index, y_hat['Value'], label='Predicted Values')
    plt.legend(loc='upper left')
    plt.title(title)
    plt.xlabel("Date")
    plt.ylabel(y_label)
    plt.savefig(name)
    plt.close()
Example #30
0
def autoregression_analysis(country, data, output):
    """
    Country based GDP auto-regression analysis

    Parameters
    ----------
    country: str
        the name of a country
    data: str
        path to the csv file containing the GDP data.
    output: str
        The path to the output directory

    Returns
    -------
    tuple, The path of csv result file, and the path of png plot file.
    """
    # Read csv
    df = pd.read_csv(data, index_col="year")
    df = df.dropna()

    # Train model
    train = df["gdp"].values
    model = AR(train)
    model_fit = model.fit()

    # Validate model
    lag = model_fit.k_ar
    pred = model_fit.predict(start=lag, end=len(train), dynamic=False)

    # Save result
    df["pred_gdp"] = [np.nan for _ in range(lag - 1)] + list(pred)
    result_file = os.path.join(output, "result.csv")
    df.to_csv(result_file)

    # Save plot
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt

    df.plot()
    plt.grid(axis="y", linestyle="--")
    plt.title(country + "(current $)")
    plot_file = os.path.join(output, "result.png")
    plt.savefig(plot_file)

    return result_file, plot_file