Ejemplo n.º 1
0
 def forecast_ts_arima(self, returns, model, order, days=21):
     # create an n-day forecast of returns with 95%, 99% CI
     f, err95, ci95 = model.forecast(steps=days, alpha=0.05) # 95% CI
     _, err99, ci99 = model.forecast(steps=days, alpha=0.01) # 99% CI
     # generate date index for next n-days excluding last day of returns
     idx = pd.date_range(data.index[-1], periods=days, freq='D')
     # reconstruct the forecast into dataframe
     fc_95 = pd.DataFrame(
             np.column_stack([f, ci95]),
             index=idx,
             columns=['forecast', 'lower_ci_95', 'upper_ci_95']
             )
     fc_99 = pd.DataFrame(
             np.column_stack([ci99]),
             index=idx,
             columns=['lower_ci_99', 'upper_ci_99']
             )
     fc_all = fc_95.combine_first(fc_99)
     logn(fc_all.head())
     # get the returns for last sample days, say 500 days
     sample_days = 500
     ts = returns.iloc[-sample_days:].copy()
     # get the sample prediction over last sample days
     pred = model.predict(ts.index[0], ts.index[-1])
     # construct the title and file name
     title = '{} Day Returns Forecast\nARIMA{}'.format(days, order)
     filename = 'ts_forecast_arima{}{}{}.png'.format(
             order[0], order[1], order[2]
             )
     # do the plotting
     self.g.fcplot(ts, pred, fc_all, title, filename)
Ejemplo n.º 2
0
 def __construct_web_url(self, scrip_code, start_date, end_date):
     # components of the URL
     protocol = 'https://'
     host_name = 'query2.finance.yahoo.com'
     path_fmt = '/v8/finance/chart/{}'
     params_fmt = '?formatted={}&crumb={}&lang={}&region={}&period1={}&' + \
                     'period2={}&interval={}&events={}&corsDomain={}'
     # constant initializations needed in the URL
     formatted = True
     crumb = '4D5ubVRDG3o'
     lang = 'en-IN'
     region = 'IN'
     period1 = DateTime.seconds_from_date(start_date)
     period2 = DateTime.seconds_from_date(end_date)
     interval = '1d'
     events = 'div%7Csplit'
     corsDomain = 'in.finance.yahoo.com'
     # populate the path and other parameters using the computed values
     filled_path = path_fmt.format(quote(scrip_code))
     filled_params = params_fmt.format(formatted, crumb, lang, region,
                                       period1, period2, interval, events,
                                       corsDomain)
     # constructing the complete url for GET request
     web_url = protocol + host_name + filled_path + filled_params
     logn('=' * 40)
     logn(web_url)
     logn('-' * 20)
     logn('Looking for ({}) from {} till {}...[Done]'.format(
         scrip_code, start_date, end_date))
     return web_url
Ejemplo n.º 3
0
 def analyse_arma_p_q(self, p=1, q=1):
     n = self.__n_samples
     burns = n // 10
     a, b, rts = self.get_sample_data(
             m=SerialCorrelation.ModelType.arma, p=p, q=q, n=n, b=burns
             )
     self.g.tsplot(rts,
                   lags=self.__n_lags,
                   saveas='arma{}{}.png'.format(p, q)
                   )
     try:
         ar_p, ar_o, ma_p, ma_o = self.fit_arma_model_and_estimate_order(
             rts, maxlag=10, order=(p, q),
             method='mle', trend='nc', burnin=burns
             )
         logn('alpha estimate: {} | best ar lag order = {}'
           .format(ar_p, ar_o))
         logn('beta estimate: {} | best ma lag order = {}'
           .format(ma_p, ma_o))
     except ValueError:
         pass
     
     logn('true alphas = {} | true ar order = {}'
          .format(a, p))
     logn('true betas = {} | true ma order = {}'
          .format(b, q))
Ejemplo n.º 4
0
 def fit_arma_model_and_estimate_order(
         self, data, order=(0, 1), maxlag=None,
         method='mle', trend='nc', burnin=0
         ):
     if maxlag is None:
         maxlag = self.__n_lags
     log('Fitting & estimating the ARMA model to the given data...')
     mdl = smt.ARMA(data, order=order).fit(
             maxlag=maxlag,
             method=method,
             trend=trend,
             burnin=burnin
             )
     logn('[Done]')
     logn(mdl.summary())
     return mdl.arparams, mdl.k_ar, mdl.maparams, mdl.k_ma
Ejemplo n.º 5
0
 def __fetch_and_parse_json(self, web_url: str):
     log('Fetching financial data from Yahoo Finance...')
     # query the web server at URL and return the JSON response
     web_request = requests.get(web_url)
     web_response = web_request.text
     # get hold of respective fields
     json_obj = json.loads(web_response)
     timestamp = json_obj['chart']['result'][0]['timestamp']
     indicators = json_obj['chart']['result'][0]['indicators']
     opened = indicators['quote'][0]['open']
     high = indicators['quote'][0]['high']
     low = indicators['quote'][0]['low']
     closed = indicators['quote'][0]['close']
     volume = indicators['quote'][0]['volume']
     adjclosed = indicators['adjclose'][0]['adjclose']
     logn('[Done]')
     log('Parsing the json response...')
     fin_data = []
     headers = [
         'Date', 'Open', 'High', 'Low', 'Close', 'Adjusted Close', 'Volume'
     ]
     # extract information of each field and keep in a list of lists
     for index in range(len(timestamp)):
         if opened[index] is None or high[index] is None or \
             low[index] is None or closed[index] is None or \
             volume[index] is None or adjclosed[index] is None:
             continue
         data = []
         data.append(DateTime.date_string_from_seconds(timestamp[index]))
         data.append(opened[index])
         data.append(high[index])
         data.append(low[index])
         data.append(closed[index])
         data.append(adjclosed[index])
         data.append(volume[index])
         fin_data.append(data)
     df = pd.DataFrame(fin_data, columns=headers)
     # replacing zeros with respective avg so that they can be handled later
     df = df[headers].replace({'0': np.nan, 0: np.nan})
     # ignore date column for mean calculations
     headers = headers[1:]
     # replace NaN in each column with respective column mean
     for header in headers:
         df[header].fillna(df[header].mean(), inplace=True)
     logn('[Done]')
     if self.__save_mode:
         log('Exporting the data to CSV file...')
         # save as a CSV file
         csv_path = self.__csv_path
         df.to_csv(csv_path, index=False, encoding='utf-8')
         logn('[Done]')
         logn('Saved file: {}'.format(csv_path))
     # parse Date column as python datetime
     df.Date = df.Date.apply(DateTime.dateparser_short)
     # re-index the dataframe on converted Date column
     df.set_index('Date', drop=True, inplace=True)
     return df
Ejemplo n.º 6
0
 def analyse_ma_q(self, q=1):
     a, b, rts = self.get_sample_data(
             m=SerialCorrelation.ModelType.ma, q=q
             )
     self.g.tsplot(rts,
                   lags=self.__n_lags,
                   saveas='ma{}.png'.format(q)
                   )
     try:
         _, _, params, order = self.fit_arma_model_and_estimate_order(
             rts, maxlag=10, order=(0, q), method='mle', trend='nc'
             )
         logn('beta estimate: {} | best lag order = {}'
           .format(params, order))
     except ValueError:
         pass
     true_order = q
     logn('true betas = {} | true order = {}'
           .format(b, true_order))
Ejemplo n.º 7
0
 def analyse_ar_1_with_root(self, a=1.0):
     assert a != 0.0, 'AR root can not be zero'
     assert a < 1.0, 'AR root can not be greater than one'
     np.random.seed(self.seed)
     x = w = np.random.normal(size=self.__n_samples)
     for t in range(1, self.__n_samples):
         x[t] = a * x[t-1] + w[t]
     self.g.tsplot(
             x,
             lags=self.__n_lags,
             saveas='ar1_{:04.2f}.png'.format(a)
             )
     # our simulated AR model has order = 1 with alpha = 0.6
     # if we fit an AR(p) model to the above simulated data and ask it to
     # select the order, the selected values of p and a should match with
     # the actual ones
     params, order = self.fit_ar_model_and_estimate_order(x)
     true_order = 1
     logn('alpha estimate: {:3.2f} | best lag order = {}'
           .format(params[0], order))
     logn('true alpha = {:3.2f} | true order = {}'
           .format(a, true_order))
Ejemplo n.º 8
0
 def fit_ar_model_and_estimate_order(
         self, data, maxlag=None, method='mle', ic='bic', trend='nc'
         ):
     if maxlag is None:
         maxlag = self.__n_lags
     log('Fitting the AR model to the given data...')
     mdl = smt.AR(data).fit(
             maxlag=maxlag,
             method=method,
             ic=ic,
             trend=trend
             )
     logn('[Done]')
     log('Estimating the order of the AR model...')
     est_order = smt.AR(data).select_order(
             maxlag=maxlag,
             method=method,
             ic=ic,
             trend=trend
             )
     logn('[Done]')
     return mdl.params, est_order
Ejemplo n.º 9
0
 def analyse_ts_log_returns_as_ar_process(self, data):
     logged = self.__logged_data(data)
     self.g.tsplot(
             logged.Close,
             lags=self.__n_lags,
             saveas='ts_log_returns.png'
             )
     logn('BIC', '='*20, sep='\n')
     params, order = self.fit_ar_model_and_estimate_order(
             logged.Close, maxlag=10, method='mle', ic='bic', trend='nc'
             )
     if order is 1:
         logn('alpha estimate: {:.5f} | best lag order = {}'
           .format(params[0], order))
     else:
         logn('alpha estimate: {} | best lag order = {}'
               .format(params, order))
Ejemplo n.º 10
0
 def analyse_ts_arima(self, data):
     ts = data.LSPY
     best_ic = np.inf
     best_order = None
     best_mdl = None
     pq_rng = range(5)    # orders greater than 5 are not practically useful
     d_rng = range(2)     # [0,1]
     for i in pq_rng:
         for d in d_rng:
             for j in pq_rng:
                 try:
                     tmp_mdl = smt.ARIMA(ts, order=(i, d, j)).fit(
                             method='mle', trend='nc'
                             )
                     tmp_ic = tmp_mdl.aic    # using aic here
                     logn('ic={}, order=({}, {}, {})'.format(tmp_ic,i,d,j))
                     if tmp_ic < best_ic:
                         best_ic = tmp_ic
                         best_order = (i, d, j)
                         best_mdl = tmp_mdl
                 except: continue
     logn(best_mdl.summary())
     logn('using AIC', '='*20, sep='\n')
     logn('ic: {:6.5f} | estimated order: {}'.format(best_ic, best_order))
     logn('estimated alphas = {}'.format(best_mdl.arparams))
     logn('estimated betas = {}'.format(best_mdl.maparams))
     self.g.tsplot(best_mdl.resid,
                   lags=self.__n_lags,
                   saveas='ts_arima{}{}{}_residuals.png'.format(
                           best_order[0], best_order[1], best_order[2]
                           )
                   )
     # forecasting on the basis of best fit arima model
     self.forecast_ts_arima(ts, best_mdl, best_order)  # ts should have index
Ejemplo n.º 11
0
 def analyse_ts_arma(self, data):
     ts = self.__logged_data(data).Close
     best_ic = np.inf
     best_order = None
     best_mdl = None
     rng = range(5)      # orders greater than 5 are not practically useful
     for i in rng:
         for j in rng:
             try:
                 tmp_mdl = smt.ARMA(ts, order=(i, j)).fit(
                         method='mle', trend='nc'
                         )
                 tmp_ic = tmp_mdl.bic    # using bic here
                 logn('ic={}, order=({}, {})'.format(tmp_ic, i, j))
                 if tmp_ic < best_ic:
                     best_ic = tmp_ic
                     best_order = (i, j)
                     best_mdl = tmp_mdl
             except: continue
     logn(best_mdl.summary())
     logn('using BIC', '='*20, sep='\n')
     logn('ic: {:6.5f} | estimated order: {}'.format(best_ic, best_order))
     logn('estimated alphas = {}'.format(best_mdl.arparams))
     logn('estimated betas = {}'.format(best_mdl.maparams))
     self.g.tsplot(best_mdl.resid,
                   lags=self.__n_lags,
                   saveas='ts_arma{}{}_residuals.png'.format(
                           best_order[0], best_order[1]
                           )
                   )
Ejemplo n.º 12
0
 def analyse_arma_p_q_best_ic(self, p=1, q=1):
     n = 5000
     burns = 2000
     a, b, rts = self.get_sample_data(
             m=SerialCorrelation.ModelType.arma, p=p, q=q, n=n, b=burns
             )
     self.g.tsplot(rts,
                   lags=self.__n_lags,
                   saveas='arma{}{}.png'.format(p, q)
                   )
     # pick best order by minimum ic - aic or bic
     # smallest ic value wins
     best_ic = np.inf
     best_order = None
     best_mdl = None
     rng = range(5)
     for i in rng:
         for j in rng:
             try:
                 tmp_mdl = smt.ARMA(rts, order=(i, j)).fit(
                         method='mle', trend='nc'
                         )
                 tmp_ic = tmp_mdl.bic    # using bic here
                 if tmp_ic < best_ic:
                     best_ic = tmp_ic
                     best_order = (i, j)
                     best_mdl = tmp_mdl
             except: continue
     logn(best_mdl.summary())
     logn('using BIC', '='*20, sep='\n')
     logn('true order: ({}, {})'.format(p, q))
     logn('true alphas = {}'.format(a))
     logn('true betas = {}'.format(b))
     logn('ic: {:6.5f} | estimated order: {}'.format(best_ic, best_order))
     logn('estimated alphas = {}'.format(best_mdl.arparams))
     logn('estimated betas = {}'.format(best_mdl.maparams))
     # analysing the model residuals with the estimated information
     # the residuals should be a white noise process with no serial
     # correlation for any lag, if this is the case then we can say
     # that the best model has been fit to explain the data
     self.g.tsplot(best_mdl.resid,
                   lags=self.__n_lags,
                   saveas='arma{}{}_residuals.png'.format(
                           best_order[0], best_order[1]
                           )
                   )
Ejemplo n.º 13
0
 def analyse_ar_p(self, p=1):
     a, b, rts = self.get_sample_data(
             m=SerialCorrelation.ModelType.ar, p=p
             )
     self.g.tsplot(rts,
                   lags=self.__n_lags,
                   saveas='ar{}.png'.format(p)
                   )
     logn('AIC', '='*20, sep='\n')
     params, order = self.fit_ar_model_and_estimate_order(
             rts, maxlag=10, method='mle', ic='aic', trend='nc'
             )
     true_order = p
     logn('alpha estimate: {} | best lag order = {}'
           .format(params, order))
     logn('true alphas = {} | true order = {}'
           .format(a, true_order))
     logn()
     logn('BIC', '='*20, sep='\n')
     params, order = self.fit_ar_model_and_estimate_order(
             rts, maxlag=10, method='mle', ic='bic', trend='nc'
             )
     true_order = p
     logn('alpha estimate: {} | best lag order = {}'
           .format(params, order))
     logn('true alphas = {} | true order = {}'
           .format(a, true_order))