def forecast_ts_arima(self, returns, model, order, days=21): # create an n-day forecast of returns with 95%, 99% CI f, err95, ci95 = model.forecast(steps=days, alpha=0.05) # 95% CI _, err99, ci99 = model.forecast(steps=days, alpha=0.01) # 99% CI # generate date index for next n-days excluding last day of returns idx = pd.date_range(data.index[-1], periods=days, freq='D') # reconstruct the forecast into dataframe fc_95 = pd.DataFrame( np.column_stack([f, ci95]), index=idx, columns=['forecast', 'lower_ci_95', 'upper_ci_95'] ) fc_99 = pd.DataFrame( np.column_stack([ci99]), index=idx, columns=['lower_ci_99', 'upper_ci_99'] ) fc_all = fc_95.combine_first(fc_99) logn(fc_all.head()) # get the returns for last sample days, say 500 days sample_days = 500 ts = returns.iloc[-sample_days:].copy() # get the sample prediction over last sample days pred = model.predict(ts.index[0], ts.index[-1]) # construct the title and file name title = '{} Day Returns Forecast\nARIMA{}'.format(days, order) filename = 'ts_forecast_arima{}{}{}.png'.format( order[0], order[1], order[2] ) # do the plotting self.g.fcplot(ts, pred, fc_all, title, filename)
def __construct_web_url(self, scrip_code, start_date, end_date): # components of the URL protocol = 'https://' host_name = 'query2.finance.yahoo.com' path_fmt = '/v8/finance/chart/{}' params_fmt = '?formatted={}&crumb={}&lang={}®ion={}&period1={}&' + \ 'period2={}&interval={}&events={}&corsDomain={}' # constant initializations needed in the URL formatted = True crumb = '4D5ubVRDG3o' lang = 'en-IN' region = 'IN' period1 = DateTime.seconds_from_date(start_date) period2 = DateTime.seconds_from_date(end_date) interval = '1d' events = 'div%7Csplit' corsDomain = 'in.finance.yahoo.com' # populate the path and other parameters using the computed values filled_path = path_fmt.format(quote(scrip_code)) filled_params = params_fmt.format(formatted, crumb, lang, region, period1, period2, interval, events, corsDomain) # constructing the complete url for GET request web_url = protocol + host_name + filled_path + filled_params logn('=' * 40) logn(web_url) logn('-' * 20) logn('Looking for ({}) from {} till {}...[Done]'.format( scrip_code, start_date, end_date)) return web_url
def analyse_arma_p_q(self, p=1, q=1): n = self.__n_samples burns = n // 10 a, b, rts = self.get_sample_data( m=SerialCorrelation.ModelType.arma, p=p, q=q, n=n, b=burns ) self.g.tsplot(rts, lags=self.__n_lags, saveas='arma{}{}.png'.format(p, q) ) try: ar_p, ar_o, ma_p, ma_o = self.fit_arma_model_and_estimate_order( rts, maxlag=10, order=(p, q), method='mle', trend='nc', burnin=burns ) logn('alpha estimate: {} | best ar lag order = {}' .format(ar_p, ar_o)) logn('beta estimate: {} | best ma lag order = {}' .format(ma_p, ma_o)) except ValueError: pass logn('true alphas = {} | true ar order = {}' .format(a, p)) logn('true betas = {} | true ma order = {}' .format(b, q))
def fit_arma_model_and_estimate_order( self, data, order=(0, 1), maxlag=None, method='mle', trend='nc', burnin=0 ): if maxlag is None: maxlag = self.__n_lags log('Fitting & estimating the ARMA model to the given data...') mdl = smt.ARMA(data, order=order).fit( maxlag=maxlag, method=method, trend=trend, burnin=burnin ) logn('[Done]') logn(mdl.summary()) return mdl.arparams, mdl.k_ar, mdl.maparams, mdl.k_ma
def __fetch_and_parse_json(self, web_url: str): log('Fetching financial data from Yahoo Finance...') # query the web server at URL and return the JSON response web_request = requests.get(web_url) web_response = web_request.text # get hold of respective fields json_obj = json.loads(web_response) timestamp = json_obj['chart']['result'][0]['timestamp'] indicators = json_obj['chart']['result'][0]['indicators'] opened = indicators['quote'][0]['open'] high = indicators['quote'][0]['high'] low = indicators['quote'][0]['low'] closed = indicators['quote'][0]['close'] volume = indicators['quote'][0]['volume'] adjclosed = indicators['adjclose'][0]['adjclose'] logn('[Done]') log('Parsing the json response...') fin_data = [] headers = [ 'Date', 'Open', 'High', 'Low', 'Close', 'Adjusted Close', 'Volume' ] # extract information of each field and keep in a list of lists for index in range(len(timestamp)): if opened[index] is None or high[index] is None or \ low[index] is None or closed[index] is None or \ volume[index] is None or adjclosed[index] is None: continue data = [] data.append(DateTime.date_string_from_seconds(timestamp[index])) data.append(opened[index]) data.append(high[index]) data.append(low[index]) data.append(closed[index]) data.append(adjclosed[index]) data.append(volume[index]) fin_data.append(data) df = pd.DataFrame(fin_data, columns=headers) # replacing zeros with respective avg so that they can be handled later df = df[headers].replace({'0': np.nan, 0: np.nan}) # ignore date column for mean calculations headers = headers[1:] # replace NaN in each column with respective column mean for header in headers: df[header].fillna(df[header].mean(), inplace=True) logn('[Done]') if self.__save_mode: log('Exporting the data to CSV file...') # save as a CSV file csv_path = self.__csv_path df.to_csv(csv_path, index=False, encoding='utf-8') logn('[Done]') logn('Saved file: {}'.format(csv_path)) # parse Date column as python datetime df.Date = df.Date.apply(DateTime.dateparser_short) # re-index the dataframe on converted Date column df.set_index('Date', drop=True, inplace=True) return df
def analyse_ma_q(self, q=1): a, b, rts = self.get_sample_data( m=SerialCorrelation.ModelType.ma, q=q ) self.g.tsplot(rts, lags=self.__n_lags, saveas='ma{}.png'.format(q) ) try: _, _, params, order = self.fit_arma_model_and_estimate_order( rts, maxlag=10, order=(0, q), method='mle', trend='nc' ) logn('beta estimate: {} | best lag order = {}' .format(params, order)) except ValueError: pass true_order = q logn('true betas = {} | true order = {}' .format(b, true_order))
def analyse_ar_1_with_root(self, a=1.0): assert a != 0.0, 'AR root can not be zero' assert a < 1.0, 'AR root can not be greater than one' np.random.seed(self.seed) x = w = np.random.normal(size=self.__n_samples) for t in range(1, self.__n_samples): x[t] = a * x[t-1] + w[t] self.g.tsplot( x, lags=self.__n_lags, saveas='ar1_{:04.2f}.png'.format(a) ) # our simulated AR model has order = 1 with alpha = 0.6 # if we fit an AR(p) model to the above simulated data and ask it to # select the order, the selected values of p and a should match with # the actual ones params, order = self.fit_ar_model_and_estimate_order(x) true_order = 1 logn('alpha estimate: {:3.2f} | best lag order = {}' .format(params[0], order)) logn('true alpha = {:3.2f} | true order = {}' .format(a, true_order))
def fit_ar_model_and_estimate_order( self, data, maxlag=None, method='mle', ic='bic', trend='nc' ): if maxlag is None: maxlag = self.__n_lags log('Fitting the AR model to the given data...') mdl = smt.AR(data).fit( maxlag=maxlag, method=method, ic=ic, trend=trend ) logn('[Done]') log('Estimating the order of the AR model...') est_order = smt.AR(data).select_order( maxlag=maxlag, method=method, ic=ic, trend=trend ) logn('[Done]') return mdl.params, est_order
def analyse_ts_log_returns_as_ar_process(self, data): logged = self.__logged_data(data) self.g.tsplot( logged.Close, lags=self.__n_lags, saveas='ts_log_returns.png' ) logn('BIC', '='*20, sep='\n') params, order = self.fit_ar_model_and_estimate_order( logged.Close, maxlag=10, method='mle', ic='bic', trend='nc' ) if order is 1: logn('alpha estimate: {:.5f} | best lag order = {}' .format(params[0], order)) else: logn('alpha estimate: {} | best lag order = {}' .format(params, order))
def analyse_ts_arima(self, data): ts = data.LSPY best_ic = np.inf best_order = None best_mdl = None pq_rng = range(5) # orders greater than 5 are not practically useful d_rng = range(2) # [0,1] for i in pq_rng: for d in d_rng: for j in pq_rng: try: tmp_mdl = smt.ARIMA(ts, order=(i, d, j)).fit( method='mle', trend='nc' ) tmp_ic = tmp_mdl.aic # using aic here logn('ic={}, order=({}, {}, {})'.format(tmp_ic,i,d,j)) if tmp_ic < best_ic: best_ic = tmp_ic best_order = (i, d, j) best_mdl = tmp_mdl except: continue logn(best_mdl.summary()) logn('using AIC', '='*20, sep='\n') logn('ic: {:6.5f} | estimated order: {}'.format(best_ic, best_order)) logn('estimated alphas = {}'.format(best_mdl.arparams)) logn('estimated betas = {}'.format(best_mdl.maparams)) self.g.tsplot(best_mdl.resid, lags=self.__n_lags, saveas='ts_arima{}{}{}_residuals.png'.format( best_order[0], best_order[1], best_order[2] ) ) # forecasting on the basis of best fit arima model self.forecast_ts_arima(ts, best_mdl, best_order) # ts should have index
def analyse_ts_arma(self, data): ts = self.__logged_data(data).Close best_ic = np.inf best_order = None best_mdl = None rng = range(5) # orders greater than 5 are not practically useful for i in rng: for j in rng: try: tmp_mdl = smt.ARMA(ts, order=(i, j)).fit( method='mle', trend='nc' ) tmp_ic = tmp_mdl.bic # using bic here logn('ic={}, order=({}, {})'.format(tmp_ic, i, j)) if tmp_ic < best_ic: best_ic = tmp_ic best_order = (i, j) best_mdl = tmp_mdl except: continue logn(best_mdl.summary()) logn('using BIC', '='*20, sep='\n') logn('ic: {:6.5f} | estimated order: {}'.format(best_ic, best_order)) logn('estimated alphas = {}'.format(best_mdl.arparams)) logn('estimated betas = {}'.format(best_mdl.maparams)) self.g.tsplot(best_mdl.resid, lags=self.__n_lags, saveas='ts_arma{}{}_residuals.png'.format( best_order[0], best_order[1] ) )
def analyse_arma_p_q_best_ic(self, p=1, q=1): n = 5000 burns = 2000 a, b, rts = self.get_sample_data( m=SerialCorrelation.ModelType.arma, p=p, q=q, n=n, b=burns ) self.g.tsplot(rts, lags=self.__n_lags, saveas='arma{}{}.png'.format(p, q) ) # pick best order by minimum ic - aic or bic # smallest ic value wins best_ic = np.inf best_order = None best_mdl = None rng = range(5) for i in rng: for j in rng: try: tmp_mdl = smt.ARMA(rts, order=(i, j)).fit( method='mle', trend='nc' ) tmp_ic = tmp_mdl.bic # using bic here if tmp_ic < best_ic: best_ic = tmp_ic best_order = (i, j) best_mdl = tmp_mdl except: continue logn(best_mdl.summary()) logn('using BIC', '='*20, sep='\n') logn('true order: ({}, {})'.format(p, q)) logn('true alphas = {}'.format(a)) logn('true betas = {}'.format(b)) logn('ic: {:6.5f} | estimated order: {}'.format(best_ic, best_order)) logn('estimated alphas = {}'.format(best_mdl.arparams)) logn('estimated betas = {}'.format(best_mdl.maparams)) # analysing the model residuals with the estimated information # the residuals should be a white noise process with no serial # correlation for any lag, if this is the case then we can say # that the best model has been fit to explain the data self.g.tsplot(best_mdl.resid, lags=self.__n_lags, saveas='arma{}{}_residuals.png'.format( best_order[0], best_order[1] ) )
def analyse_ar_p(self, p=1): a, b, rts = self.get_sample_data( m=SerialCorrelation.ModelType.ar, p=p ) self.g.tsplot(rts, lags=self.__n_lags, saveas='ar{}.png'.format(p) ) logn('AIC', '='*20, sep='\n') params, order = self.fit_ar_model_and_estimate_order( rts, maxlag=10, method='mle', ic='aic', trend='nc' ) true_order = p logn('alpha estimate: {} | best lag order = {}' .format(params, order)) logn('true alphas = {} | true order = {}' .format(a, true_order)) logn() logn('BIC', '='*20, sep='\n') params, order = self.fit_ar_model_and_estimate_order( rts, maxlag=10, method='mle', ic='bic', trend='nc' ) true_order = p logn('alpha estimate: {} | best lag order = {}' .format(params, order)) logn('true alphas = {} | true order = {}' .format(a, true_order))