startTrain = 52
    addFake = False
    m.setFonts('timeseries')
    ### Load in data and normalise
    twitterColumns = [0, 2]
    pollColumns = [1, 3, 4, 5, 6, 7, 8,
                   9]  # avdate, Remain (norm), Leave (norm)
    lh, rh, p = m.getPanda(twitterColumns, pollColumns)
    h_agg, p_agg, p_var = m.aggregate(lh,
                                      rh,
                                      p,
                                      splitPolls=False,
                                      interpolate=True)
    p_orig = p_agg.copy()
    h_orig = h_agg.copy()
    p_agg = m.shift_polls(p_agg, tPolls, addFake=addFake)
    h_agg = m.shift_tweets(h_agg, tTwitter)

    kalmanData = m.getKalmanData(p_agg, h_agg)
    startDate = kalmanData.index[0] + dt.timedelta(days=startTrain)
    endDate = dt.datetime(day=23, month=6, year=2016)

    ### FIND KF VARIABLES: 1) R and 2) P0
    # find R
    preds = []
    R_r = p_var['Remain'].mean()
    r = kalmanData['remain_perc'].to_numpy(dtype=float)
    P0_r = r.var() / 10
    H = 1
    K_r = P0_r * H / (H * P0_r * H + R_r)
    K_r = 0.95
Example #2
0
    twitterColumns = [0, 2]
    pollColumns = [1, 3, 4, 5, 6, 7, 8,
                   9]  # avdate, Remain (norm), Leave (norm)
    lh, rh, p = m.getPanda(twitterColumns, pollColumns)
    h_agg, p_agg, p_var = m.aggregate(lh,
                                      rh,
                                      p,
                                      splitPolls=False,
                                      interpolate=interpolate)
    _, p_onl, p_tel = m.aggregate(lh,
                                  rh,
                                  p,
                                  splitPolls=True,
                                  interpolate=interpolate)

    p_agg = m.shift_polls(p_agg, tPolls, addFake=False)
    p_onl = m.shift_polls(p_onl, tPolls, addFake=False)
    p_tel = m.shift_polls(p_tel, tPolls, addFake=False)
    h_agg = m.shift_tweets(h_agg, tTwitter)

    ### CORRELATION AND FITTING ###
    #only do non telephone and online split
    startDate = dt.datetime(year=2016, month=3, day=1)
    endDate = dt.datetime(year=2016, month=6, day=23)

    h = h_agg['remain_perc'].loc[startDate:endDate]
    p = p_agg['Remain'].loc[startDate:endDate]
    X = pd.merge(h, p, left_index=True, right_index=True)
    print("Interpolated = " + str(interpolate) + "; all data: ",
          np.corrcoef(X['remain_perc'], X['Remain']))