예제 #1
0
def start():
    """Start the Data Analyzer"""
    engine = create_engine('sqlite:///' + word_to_analyze + '.sqlite')
    session = sessionmaker()
    session.configure(bind=engine)
    Base.metadata.create_all(engine)

    s = session()

    if count_words:
        positive_counter = 0
        negative_counter = 0

        for positive_words, negative_words in s.query(Tweet.positive_words,
                                                      Tweet.negative_words):

            positive_counter += positive_words
            negative_counter += negative_words

        print(word_to_analyze + " had " + str(positive_counter) +
              " positive words and " + str(negative_counter) +
              " negative words.")

    if count_rows:
        print("Number of tweets used from " + word_to_analyze + ": ")
        print(helpers.countRows(s, Tweet))

    norm_Xt_dict = helpers.getXFromData(s, Tweet, True)
    norm_Rt_dict = helpers.getRFromCSV(
        '2017/10/01', '2017/12/31',
        'data/stock/' + word_to_analyze + '-stock-data' + '.csv', True)

    combined_2d_results_log = helpers.combineRtandXt(norm_Xt_dict,
                                                     norm_Rt_dict)

    # VAR
    if test_var:
        pd_data = pd.DataFrame(combined_2d_results_log, columns=['Rt', 'Xt'])
        var_result = VAR(pd_data).fit(maxlag)

        print(var_result.summary())
        var_result.test_causality('Rt', 'Xt')

        # VOORBEELD VAN HOE BESCHRIJVENDE STATESTIEK KAN WORDEN GEPLOT:
        # fig = plt.subplots()
        # fig = var_result.plot_sample_acorr()
        # ax.set_ylabel("Y lable")
        # ax.set_xlabel("X lable")
        # ax.set_title("Title")
        # plt.show()

    # GRANGER CAUSALITY ANALYSIS
    if test_granger:
        result = sm.tsa.stattools.grangercausalitytests(
            combined_2d_results_log, maxlag, addconst=True, verbose=True)

    # PLOT DATA
    if plot_figure:
        Xt_dict = helpers.getXFromData(s, Tweet)
        Rt_dict = helpers.getRFromCSV(
            '2017/10/01', '2017/12/31',
            'data/stock/' + word_to_analyze + '-stock-data' + '.csv')

        Xt_df = pd.DataFrame(list(Xt_dict.items()), columns=['Date', 'Xt'])
        Xt_df['Date'] = pd.to_datetime(Xt_df['Date'])

        Rt_df = pd.DataFrame(list(Rt_dict.items()), columns=['Date', 'Rt'])
        Rt_df['Date'] = pd.to_datetime(Rt_df['Date'])

        Xt_df = Xt_df.sort_values('Date', ascending=True)
        plt.plot(Xt_df['Date'],
                 Xt_df['Xt'],
                 label='Twitter sentiment',
                 color='black')
        plt.xticks(rotation='horizontal')

        Rt_df = Rt_df.sort_values('Date', ascending=True)
        plt.plot(Rt_df['Date'],
                 Rt_df['Rt'],
                 label='Stock return',
                 dashes=[6, 2],
                 color='black')
        plt.legend([Xt_df, Rt_df], ['Twitter sentiment', 'Stock return'])

        plt.xticks(rotation='horizontal')

        if word_to_analyze is 'ibm':
            plt.suptitle(word_to_analyze.upper(), fontsize=20)
        else:
            plt.suptitle(word_to_analyze.title(), fontsize=20)
        plt.show()
예제 #2
0
# VAR fit (no constant term)
results = VAR(mdata).fit(ic='bic', verbose=True, trend='nc')
results.plot()
print(results.summary())
# Selected lag order
print('Selected Order:', results.k_ar)

# AIC & BIC of different lags
for p in range(8):
    res = VAR(mdata).fit(p, trend='nc')
    print(res.k_ar, '&', round(res.aic, 6), '&', round(res.bic, 6), '\\\\')

# Stability
print(results.is_stable(True))

# Residual normality
print(results.test_normality().summary())

# Granger causality
names = ['r', 'IndProd', 'Unemp']
for n in names:
    print('Granger for', n)
    # Factor `n` caused by its complement
    print(results.test_causality(n, list(set(names) - set(n))).summary())

# Impulse Response Analysis
irf = results.irf(10)
irf.plot(orth=False)

plt.show()