def demo3():
    close = get_tick('AAL')
    frac_df = frac_diff_FFD(close.to_frame(), 0.5)
    vol = get_daily_vol(close)
    events = cusum_filter(close, 2 * vol)
    t1 = get_t1(close, events, num_days=5)
    sampled = get_3barriers(close,
                            events,
                            ptsl=2,
                            trgt=vol,
                            min_ret=0,
                            num_threads=12,
                            t1=t1,
                            side=None)
    data = sampled.dropna()
    print(data)
    features_df = frac_df.loc[data.index].dropna()
    features = features_df.values
    # get the labels of these events
    label = data['t1_type'].loc[features_df.index].values
    clf = RandomForestClassifier()
    # learn on these features and labels
    clf.fit(features, label)
    # predict the features (on the same data so overfitting could be an issue)
    print(clf.predict(features))
def demo3():
    close = get_tick('AAL')
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=7)
    side = macd_side(close)
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=vol,
                           ptsl=[1, 2],
                           t1=t1,
                           side=side)
    events = events.dropna()
    bins = get_bins(events, close)

    clf = RandomForestClassifier()
    x = np.hstack([
        events['side'].values[:, np.newaxis],
        close.loc[events.index].values[:, np.newaxis]
    ])  # action and px
    # if return was positive, bins = 1
    y = bins['bin'].values  # supervised answer
    clf.fit(x, y)
    predicted_probs = np.array([x[1] for x in clf.predict_proba(x)])

    # get_signal(events.drop(columns=['side']), 0.2, predicted_probs, events['side'], 2, 1)
    get_signal(events.drop(columns=['side']), 0.2, predicted_probs,
               events['side'], 2, 12)
def demo_44():
    close = get_tick('AAL')
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=1)
    trgt = vol
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=trgt,
                           ptsl=1,
                           t1=t1)
    print(events.head())

    num_threads = 24
    num_co_events = mp_pandas_obj(get_num_co_events,
                                  ('molecule', events.index),
                                  num_threads,
                                  close_idx=close.index,
                                  t1=events['t1'])
    num_co_events = num_co_events.loc[~num_co_events.index.duplicated(
        keep='last')]
    num_co_events = num_co_events.reindex(close.index).fillna(0)
    num_threads = 24
    tw = mp_pandas_obj(get_sample_tw, ('molecule', events.index),
                       num_threads,
                       t1=events['t1'],
                       num_co_events=num_co_events)
    exp_decay = get_time_decay(tw, last_w=.1, is_exp=True)
    print(exp_decay.head())
Ejemplo n.º 4
0
def demo2():
    df = get_google_all()
    df.index = pd.DatetimeIndex(df['Date'].values)
    close = df["Close"]
    
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=1)
    side =  None
    events = get_3barriers(close, t_events=sampled_idx, trgt=vol,ptsl=1, t1=t1, side=side)
    index = events.index
    features_df = df.drop(columns=["Date"]).dropna().loc[index]
    features = features_df
    label = events['t1_type'].loc[features_df.index]
    
    clf = RandomForestClassifier()
    t1_ = t1.loc[features.index]
    
    # No purge, with embargo
    scores = []
    for _ in range(10):
        scores_ = cv_score(clf, features, label, pct_embargo=0.01, t1=t1_, purging=False)
        scores.append(np.mean(scores_))
    print(np.mean(scores), np.var(scores))
    
    # no purge without embargo
    scores = []
    for _ in range(10):
        scores_ = cv_score(clf, features, label, pct_embargo=0., t1=t1_, purging=False)
        scores.append(np.mean(scores_))
    print(np.mean(scores), np.var(scores))
    
    n_co_events = get_num_co_events(close.index, t1, events.index)
    sample_weight = get_sample_tw(t1, n_co_events, events.index)
    
    # no purge with embargo and sample weights added to samples
    scores = []
    for _ in range(10):
        scores_ = cv_score(clf, features, label, sample_weight=sample_weight, pct_embargo=0.01, t1=t1_, purging=False)
        scores.append(np.mean(scores_))
    print(np.mean(scores), np.var(scores))
    
    # no purge without embargo and sample weights added to samples
    scores = []
    for _ in range(10):
        scores_ = cv_score(clf, features, label, sample_weight=sample_weight, pct_embargo=0., t1=t1_, purging=False)
        scores.append(np.mean(scores_))
    print(np.mean(scores), np.var(scores))
def demo_42():
    close = get_tick('AAL')
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=5)
    trgt = vol
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=trgt,
                           ptsl=1,
                           t1=t1)
    print(events.head())

    ind_m = get_ind_matrix(close.index, events['t1'])
    avg_uniq = get_avg_uniq(ind_m)
    print(avg_uniq.head())
    phi = seq_bootstrap(ind_m)
    print(phi)
def demo():
    close = get_tick('AAL')
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=5)
    trgt = vol
    events = get_3barriers(close,
                           t_events=sampled_idx,
                           trgt=trgt,
                           ptsl=1,
                           t1=t1)
    print(events.head())

    num_threads = 1
    num_co_events = mp_pandas_obj(get_num_co_events,
                                  ('molecule', events.index),
                                  num_threads,
                                  close_idx=close.index,
                                  t1=events['t1'])

    fig, ax1 = plt.subplots(figsize=(16, 8))
    ax1.set_xlabel('time (s)')
    ax1.set_ylabel('num_co_events', color='red')
    ax1.plot(num_co_events, color='red')
    ax1.tick_params(axis='y', labelcolor='red')
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    ax2.set_ylabel('volatility',
                   color='blue')  # we already handled the x-label with ax1
    ax2.plot(vol, color='blue')
    ax2.tick_params(axis='y', labelcolor='blue')
    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    plt.savefig(PNG_PATH + "num_co_events.png")
    plt.close()

    fig, ax1 = plt.subplots(figsize=(16, 8))
    ax1.set_xlabel('time')
    ax1.set_ylabel('num_co_events', color='red')
    ax1.scatter(num_co_events.index, num_co_events.values, color='red')
    ax2 = ax1.twinx()
    ret = close.pct_change().dropna()
    ax2.set_ylabel('return', color='blue')
    ax2.scatter(ret.index, ret.values, color='blue')
    plt.savefig(PNG_PATH + "num_co_events_scatter.png")
    plt.close()
Ejemplo n.º 7
0
def demo():
    # close = get_tick('AAL')
    df = get_google_all()
    df.index = pd.DatetimeIndex(df['Date'].values)
    close = df["Close"]
    embg_times = get_embargo_times(close.index, pct_embargo=0.01)
    print(embg_times.head())
    
    vol = get_daily_vol(close)
    sampled_idx = cusum_filter(close, vol)
    t1 = get_t1(close, sampled_idx, num_days=1)
    side =  None
    # events = get_3barriers(close, t_events=sampled_idx, trgt=vol,ptsl=[1, 2], t1=t1, side=side)
    events = get_3barriers(close, t_events=sampled_idx, trgt=vol,ptsl=1, t1=t1, side=side)
    print(events.head())
    
    index = events.index
    features_df = df.drop(columns=["Date"]).dropna().loc[index]
    features = features_df
    label = events['t1_type'].loc[features_df.index]

    # without shuffling
    scores = []
    for _ in range(10):   
        clf = RandomForestClassifier()
        kfold = KFold(n_splits=10, shuffle=False)
        scores.append(cross_val_score(clf, features, label, cv=kfold))
    print(np.mean(scores), np.var(scores))
    
    # with shuffling the data before putting into batches
    # Shffuling data introduces data leakage because of simlarity among neighborg, 
    # If you shuffle data uniformly, training data has more information that overlaps test data.
    scores = []
    for _ in range(10):   
        clf = RandomForestClassifier()
        kfold = KFold(n_splits=10, shuffle=True)
        scores.append(cross_val_score(clf, features, label, cv=kfold))
    print(np.mean(scores), np.var(scores))