def start(): """Start the Data Analyzer""" engine = create_engine('sqlite:///' + word_to_analyze + '.sqlite') session = sessionmaker() session.configure(bind=engine) Base.metadata.create_all(engine) s = session() if count_words: positive_counter = 0 negative_counter = 0 for positive_words, negative_words in s.query(Tweet.positive_words, Tweet.negative_words): positive_counter += positive_words negative_counter += negative_words print(word_to_analyze + " had " + str(positive_counter) + " positive words and " + str(negative_counter) + " negative words.") if count_rows: print("Number of tweets used from " + word_to_analyze + ": ") print(helpers.countRows(s, Tweet)) norm_Xt_dict = helpers.getXFromData(s, Tweet, True) norm_Rt_dict = helpers.getRFromCSV( '2017/10/01', '2017/12/31', 'data/stock/' + word_to_analyze + '-stock-data' + '.csv', True) combined_2d_results_log = helpers.combineRtandXt(norm_Xt_dict, norm_Rt_dict) # VAR if test_var: pd_data = pd.DataFrame(combined_2d_results_log, columns=['Rt', 'Xt']) var_result = VAR(pd_data).fit(maxlag) print(var_result.summary()) var_result.test_causality('Rt', 'Xt') # VOORBEELD VAN HOE BESCHRIJVENDE STATESTIEK KAN WORDEN GEPLOT: # fig = plt.subplots() # fig = var_result.plot_sample_acorr() # ax.set_ylabel("Y lable") # ax.set_xlabel("X lable") # ax.set_title("Title") # plt.show() # GRANGER CAUSALITY ANALYSIS if test_granger: result = sm.tsa.stattools.grangercausalitytests( combined_2d_results_log, maxlag, addconst=True, verbose=True) # PLOT DATA if plot_figure: Xt_dict = helpers.getXFromData(s, Tweet) Rt_dict = helpers.getRFromCSV( '2017/10/01', '2017/12/31', 'data/stock/' + word_to_analyze + '-stock-data' + '.csv') Xt_df = pd.DataFrame(list(Xt_dict.items()), columns=['Date', 'Xt']) Xt_df['Date'] = pd.to_datetime(Xt_df['Date']) Rt_df = pd.DataFrame(list(Rt_dict.items()), columns=['Date', 'Rt']) Rt_df['Date'] = pd.to_datetime(Rt_df['Date']) Xt_df = Xt_df.sort_values('Date', ascending=True) plt.plot(Xt_df['Date'], Xt_df['Xt'], label='Twitter sentiment', color='black') plt.xticks(rotation='horizontal') Rt_df = Rt_df.sort_values('Date', ascending=True) plt.plot(Rt_df['Date'], Rt_df['Rt'], label='Stock return', dashes=[6, 2], color='black') plt.legend([Xt_df, Rt_df], ['Twitter sentiment', 'Stock return']) plt.xticks(rotation='horizontal') if word_to_analyze is 'ibm': plt.suptitle(word_to_analyze.upper(), fontsize=20) else: plt.suptitle(word_to_analyze.title(), fontsize=20) plt.show()
print('\n') model = VAR(train) for i in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]: result = model.fit(i) print('Lag Order =', i) print('AIC : ', result.aic) print('BIC : ', result.bic) print('FPE : ', result.fpe) print('HQIC: ', result.hqic, '\n') x = model.select_order(maxlags=12) x.summary() model = model.fit(10) model.summary() import pickle pickle.dump(model, open('model.pkl','wb')) model_fitted = pickle.load(open('model.pkl', 'rb')) forecast_input = train.values[-10000:] forecast_input temp_train = train.copy(deep=True) temp_db = main_db.copy(deep=True) for idx, row in test.iterrows(): x = { 'RESP': row['RESP'], 'HR': row['HR'],
mdata.index = pd.DatetimeIndex(quarterly) mdata['r'] = mdata['r10'] - mdata['Tbill'] mdata['IndProd'] = np.log(mdata['IndProd']).diff() mdata['Unemp'] = mdata['Unemp'].diff() mdata = mdata.drop(['r10', 'Tbill'], axis=1).dropna() # ADF Test print(ADF(mdata['r']).summary()) print(ADF(mdata['IndProd']).summary()) print(ADF(mdata['Unemp']).summary()) # VAR fit (no constant term) results = VAR(mdata).fit(ic='bic', verbose=True, trend='nc') results.plot() print(results.summary()) # Selected lag order print('Selected Order:', results.k_ar) # AIC & BIC of different lags for p in range(8): res = VAR(mdata).fit(p, trend='nc') print(res.k_ar, '&', round(res.aic, 6), '&', round(res.bic, 6), '\\\\') # Stability print(results.is_stable(True)) # Residual normality print(results.test_normality().summary()) # Granger causality
return (res) #def VAR(X): # model = VAR(X) # results = model.fit(4) # results = results.params # return results #this finds the optimal lag for the VAR series X = data() X = X[['CS_BHY_3MO', '3MO_TY']] N = 10 BIC = np.zeros((N, 1)) for i in range(N): model = VAR(X) model = model.fit(i + 1) BIC[i] = model.bic results = model.summary() BIC_min = np.min(BIC) model_min = np.argmin(BIC) print('Relative Likelihoods') print(np.exp((BIC_min - BIC) / 2)) print('Number of parameters in minimum BIC model %s' % (model_min + 1)) print(results)
model11.summary() ######## Step 12 ######### from scipy import signal as sg f, Pxx_den = sg.periodogram(bitcoin['bprice'], 10e3) # seasonality should be seen plt.xlabel('frequency [Hz]') plt.ylabel('PSD [V**2/Hz]') plt.semilogy(f, Pxx_den) # Differencing Vairable f, Pxx_den = sg.periodogram( bitcoin['dbprice'], 10e3) # should look like skyscrapers so no seasonality # Still there is no seasonality confirm with professor plt.semilogy(f, Pxx_den) ######## Step 13 ######### from statsmodels.tsa.api import VAR bitcoin.index = bitcoin['Date'] xdata = pd.concat((bitcoin['bprice'], bitcoin['sp'], bitcoin['euro'], bitcoin['gold'], bitcoin['oil']), 1) model13 = VAR(xdata).fit(maxlags=3) model13.summary() ######## Step 14 ######### # Forecasting using VAR model model13.forecast(xdata.values, steps=30) model13.plot()