if __name__ == '__main__': tPolls = 2 tTwitter = 14 startTrain = 52 addFake = False m.setFonts('timeseries') ### Load in data and normalise twitterColumns = [0, 2] pollColumns = [1, 3, 4, 5, 6, 7, 8, 9] # avdate, Remain (norm), Leave (norm) lh, rh, p = m.getPanda(twitterColumns, pollColumns) h_agg, p_agg, p_var = m.aggregate(lh, rh, p, splitPolls=False, interpolate=True) p_orig = p_agg.copy() h_orig = h_agg.copy() p_agg = m.shift_polls(p_agg, tPolls, addFake=addFake) h_agg = m.shift_tweets(h_agg, tTwitter) kalmanData = m.getKalmanData(p_agg, h_agg) startDate = kalmanData.index[0] + dt.timedelta(days=startTrain) endDate = dt.datetime(day=23, month=6, year=2016) ### FIND KF VARIABLES: 1) R and 2) P0 # find R preds = [] R_r = p_var['Remain'].mean()
if __name__ == '__main__': ### GET DATA ### m.setFonts('timeseries') startDate = dt.datetime(year=2016, month=3, day=1) endDate = dt.datetime(year=2016, month=6, day=1) interpolate = False m.longPrint() ### Load in data and normalise twitterColumns = [0, 2] pollColumns = [1, 3, 4, 5, 6, 7, 8, 9] # avdate, Remain (norm), Leave (norm) lh, rh, p = m.getPanda(twitterColumns, pollColumns) h_agg, p_agg, p_var = m.aggregate(lh, rh, p, splitPolls=False, interpolate=interpolate) _, p_onl, p_tel = m.aggregate(lh, rh, p, splitPolls=True, interpolate=interpolate) kalmanData = m.getKalmanData(p_agg, h_agg) kalmanData_o = m.getKalmanData(p_onl, h_agg) kalmanData_t = m.getKalmanData(p_tel, h_agg) # 1. Moving Average df_orig = kalmanData df_ma = df_orig.rolling(3, center=True, closed='both').mean()