def start(): """Start the Data Analyzer""" engine = create_engine('sqlite:///' + word_to_analyze + '.sqlite') session = sessionmaker() session.configure(bind=engine) Base.metadata.create_all(engine) s = session() if count_words: positive_counter = 0 negative_counter = 0 for positive_words, negative_words in s.query(Tweet.positive_words, Tweet.negative_words): positive_counter += positive_words negative_counter += negative_words print(word_to_analyze + " had " + str(positive_counter) + " positive words and " + str(negative_counter) + " negative words.") if count_rows: print("Number of tweets used from " + word_to_analyze + ": ") print(helpers.countRows(s, Tweet)) norm_Xt_dict = helpers.getXFromData(s, Tweet, True) norm_Rt_dict = helpers.getRFromCSV( '2017/10/01', '2017/12/31', 'data/stock/' + word_to_analyze + '-stock-data' + '.csv', True) combined_2d_results_log = helpers.combineRtandXt(norm_Xt_dict, norm_Rt_dict) # VAR if test_var: pd_data = pd.DataFrame(combined_2d_results_log, columns=['Rt', 'Xt']) var_result = VAR(pd_data).fit(maxlag) print(var_result.summary()) var_result.test_causality('Rt', 'Xt') # VOORBEELD VAN HOE BESCHRIJVENDE STATESTIEK KAN WORDEN GEPLOT: # fig = plt.subplots() # fig = var_result.plot_sample_acorr() # ax.set_ylabel("Y lable") # ax.set_xlabel("X lable") # ax.set_title("Title") # plt.show() # GRANGER CAUSALITY ANALYSIS if test_granger: result = sm.tsa.stattools.grangercausalitytests( combined_2d_results_log, maxlag, addconst=True, verbose=True) # PLOT DATA if plot_figure: Xt_dict = helpers.getXFromData(s, Tweet) Rt_dict = helpers.getRFromCSV( '2017/10/01', '2017/12/31', 'data/stock/' + word_to_analyze + '-stock-data' + '.csv') Xt_df = pd.DataFrame(list(Xt_dict.items()), columns=['Date', 'Xt']) Xt_df['Date'] = pd.to_datetime(Xt_df['Date']) Rt_df = pd.DataFrame(list(Rt_dict.items()), columns=['Date', 'Rt']) Rt_df['Date'] = pd.to_datetime(Rt_df['Date']) Xt_df = Xt_df.sort_values('Date', ascending=True) plt.plot(Xt_df['Date'], Xt_df['Xt'], label='Twitter sentiment', color='black') plt.xticks(rotation='horizontal') Rt_df = Rt_df.sort_values('Date', ascending=True) plt.plot(Rt_df['Date'], Rt_df['Rt'], label='Stock return', dashes=[6, 2], color='black') plt.legend([Xt_df, Rt_df], ['Twitter sentiment', 'Stock return']) plt.xticks(rotation='horizontal') if word_to_analyze is 'ibm': plt.suptitle(word_to_analyze.upper(), fontsize=20) else: plt.suptitle(word_to_analyze.title(), fontsize=20) plt.show()
# VAR fit (no constant term) results = VAR(mdata).fit(ic='bic', verbose=True, trend='nc') results.plot() print(results.summary()) # Selected lag order print('Selected Order:', results.k_ar) # AIC & BIC of different lags for p in range(8): res = VAR(mdata).fit(p, trend='nc') print(res.k_ar, '&', round(res.aic, 6), '&', round(res.bic, 6), '\\\\') # Stability print(results.is_stable(True)) # Residual normality print(results.test_normality().summary()) # Granger causality names = ['r', 'IndProd', 'Unemp'] for n in names: print('Granger for', n) # Factor `n` caused by its complement print(results.test_causality(n, list(set(names) - set(n))).summary()) # Impulse Response Analysis irf = results.irf(10) irf.plot(orth=False) plt.show()