예제 #1
0
def start():
    """Start the Data Analyzer"""
    engine = create_engine('sqlite:///' + word_to_analyze + '.sqlite')
    session = sessionmaker()
    session.configure(bind=engine)
    Base.metadata.create_all(engine)

    s = session()

    if count_words:
        positive_counter = 0
        negative_counter = 0

        for positive_words, negative_words in s.query(Tweet.positive_words,
                                                      Tweet.negative_words):

            positive_counter += positive_words
            negative_counter += negative_words

        print(word_to_analyze + " had " + str(positive_counter) +
              " positive words and " + str(negative_counter) +
              " negative words.")

    if count_rows:
        print("Number of tweets used from " + word_to_analyze + ": ")
        print(helpers.countRows(s, Tweet))

    norm_Xt_dict = helpers.getXFromData(s, Tweet, True)
    norm_Rt_dict = helpers.getRFromCSV(
        '2017/10/01', '2017/12/31',
        'data/stock/' + word_to_analyze + '-stock-data' + '.csv', True)

    combined_2d_results_log = helpers.combineRtandXt(norm_Xt_dict,
                                                     norm_Rt_dict)

    # VAR
    if test_var:
        pd_data = pd.DataFrame(combined_2d_results_log, columns=['Rt', 'Xt'])
        var_result = VAR(pd_data).fit(maxlag)

        print(var_result.summary())
        var_result.test_causality('Rt', 'Xt')

        # VOORBEELD VAN HOE BESCHRIJVENDE STATESTIEK KAN WORDEN GEPLOT:
        # fig = plt.subplots()
        # fig = var_result.plot_sample_acorr()
        # ax.set_ylabel("Y lable")
        # ax.set_xlabel("X lable")
        # ax.set_title("Title")
        # plt.show()

    # GRANGER CAUSALITY ANALYSIS
    if test_granger:
        result = sm.tsa.stattools.grangercausalitytests(
            combined_2d_results_log, maxlag, addconst=True, verbose=True)

    # PLOT DATA
    if plot_figure:
        Xt_dict = helpers.getXFromData(s, Tweet)
        Rt_dict = helpers.getRFromCSV(
            '2017/10/01', '2017/12/31',
            'data/stock/' + word_to_analyze + '-stock-data' + '.csv')

        Xt_df = pd.DataFrame(list(Xt_dict.items()), columns=['Date', 'Xt'])
        Xt_df['Date'] = pd.to_datetime(Xt_df['Date'])

        Rt_df = pd.DataFrame(list(Rt_dict.items()), columns=['Date', 'Rt'])
        Rt_df['Date'] = pd.to_datetime(Rt_df['Date'])

        Xt_df = Xt_df.sort_values('Date', ascending=True)
        plt.plot(Xt_df['Date'],
                 Xt_df['Xt'],
                 label='Twitter sentiment',
                 color='black')
        plt.xticks(rotation='horizontal')

        Rt_df = Rt_df.sort_values('Date', ascending=True)
        plt.plot(Rt_df['Date'],
                 Rt_df['Rt'],
                 label='Stock return',
                 dashes=[6, 2],
                 color='black')
        plt.legend([Xt_df, Rt_df], ['Twitter sentiment', 'Stock return'])

        plt.xticks(rotation='horizontal')

        if word_to_analyze is 'ibm':
            plt.suptitle(word_to_analyze.upper(), fontsize=20)
        else:
            plt.suptitle(word_to_analyze.title(), fontsize=20)
        plt.show()
예제 #2
0
    print('\n')

model = VAR(train)
for i in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]:
    result = model.fit(i)
    print('Lag Order =', i)
    print('AIC : ', result.aic)
    print('BIC : ', result.bic)
    print('FPE : ', result.fpe)
    print('HQIC: ', result.hqic, '\n')

x = model.select_order(maxlags=12)
x.summary()

model = model.fit(10)
model.summary()

import pickle
pickle.dump(model, open('model.pkl','wb'))

model_fitted = pickle.load(open('model.pkl', 'rb'))
forecast_input = train.values[-10000:]
forecast_input

temp_train = train.copy(deep=True)
temp_db = main_db.copy(deep=True)

for idx, row in test.iterrows():
    x = {
        'RESP': row['RESP'],
        'HR': row['HR'],
예제 #3
0
mdata.index = pd.DatetimeIndex(quarterly)

mdata['r'] = mdata['r10'] - mdata['Tbill']
mdata['IndProd'] = np.log(mdata['IndProd']).diff()
mdata['Unemp'] = mdata['Unemp'].diff()
mdata = mdata.drop(['r10', 'Tbill'], axis=1).dropna()

# ADF Test
print(ADF(mdata['r']).summary())
print(ADF(mdata['IndProd']).summary())
print(ADF(mdata['Unemp']).summary())

# VAR fit (no constant term)
results = VAR(mdata).fit(ic='bic', verbose=True, trend='nc')
results.plot()
print(results.summary())
# Selected lag order
print('Selected Order:', results.k_ar)

# AIC & BIC of different lags
for p in range(8):
    res = VAR(mdata).fit(p, trend='nc')
    print(res.k_ar, '&', round(res.aic, 6), '&', round(res.bic, 6), '\\\\')

# Stability
print(results.is_stable(True))

# Residual normality
print(results.test_normality().summary())

# Granger causality
예제 #4
0
    return (res)


#def VAR(X):

#    model = VAR(X)
#    results = model.fit(4)
#    results = results.params
#    return results

#this finds the optimal lag for the VAR series

X = data()
X = X[['CS_BHY_3MO', '3MO_TY']]
N = 10
BIC = np.zeros((N, 1))

for i in range(N):
    model = VAR(X)
    model = model.fit(i + 1)
    BIC[i] = model.bic
    results = model.summary()

BIC_min = np.min(BIC)
model_min = np.argmin(BIC)

print('Relative Likelihoods')
print(np.exp((BIC_min - BIC) / 2))
print('Number of parameters in minimum BIC model %s' % (model_min + 1))
print(results)
예제 #5
0
model11.summary()

######## Step 12 #########

from scipy import signal as sg
f, Pxx_den = sg.periodogram(bitcoin['bprice'],
                            10e3)  # seasonality should be seen
plt.xlabel('frequency [Hz]')
plt.ylabel('PSD [V**2/Hz]')
plt.semilogy(f, Pxx_den)
# Differencing Vairable
f, Pxx_den = sg.periodogram(
    bitcoin['dbprice'], 10e3)  # should look like skyscrapers so no seasonality
# Still there is no seasonality confirm with professor
plt.semilogy(f, Pxx_den)

######## Step 13 #########

from statsmodels.tsa.api import VAR
bitcoin.index = bitcoin['Date']
xdata = pd.concat((bitcoin['bprice'], bitcoin['sp'], bitcoin['euro'],
                   bitcoin['gold'], bitcoin['oil']), 1)
model13 = VAR(xdata).fit(maxlags=3)
model13.summary()

######## Step 14 #########

# Forecasting using VAR model
model13.forecast(xdata.values, steps=30)
model13.plot()