コード例 #1
0
ファイル: maps.py プロジェクト: rhouck/re
def load_pred_for_map():

    px = ql.load_quandl_data(TARGET_INDICATOR, TARGET_SERIES).stack()

    pred = pd.read_csv('data/processed/pred.csv', converters={'code': str})
    pred.Date = pd.to_datetime(pred.Date)
    pred = pred.set_index(['Date', 'code'])['pred']

    df = ut.stack_and_align([px, pred], cols=['px', 'pred'])
    df = df.dropna()
    df.index.levels[0].name = 'date'
    df.index.levels[1].name = 'code'

    geos = pd.read_csv('data/geo/{0}.csv'.format(TARGET_INDICATOR), converters={'code': str})
    df = (df.reset_index()
          .merge(geos[['code', 'lon', 'lat']], on='code')
          .set_index(['date', 'code']))

    if TARGET_INDICATOR == 'counties':
        quandl_codes = ql.load_counties()
    elif TARGET_INDICATOR == 'cities':
        quandl_codes = ql.load_cities()
    elif TARGET_INDICATOR == 'hoods':
        quandl_codes = ql.load_hoods()

    df = (df.reset_index()
          .merge(quandl_codes, on='code')
          .set_index(['date', 'code']))
    
    return df
コード例 #2
0
ファイル: composite.py プロジェクト: rhouck/re
def model_empirics(clf, df, pred):
    
    sns.jointplot(pred, df['tar'], kind='reg')

    score = r2_score(df['tar'], pred)
    corr = ut.get_xs_corr(pred, df['tar'])
    print('r2: {0:03f}\txs corr: {1:03f}'.format(score, corr))

    ret = ql.load_returns().stack().ix[df.index].unstack()
    ret = ut.xs_winsorize(ret).stack()    
    df_res = ut.stack_and_align([df['tar'], pred, ret], cols=('tar', 'pred', 'ret'))
    df_res['err'] = df_res['tar'] - df_res['pred']
    df_res['err2'] = df_res['err'].map(lambda x: x**2)
    avg_tar = df_res['tar'].unstack().mean(axis=1)
    df_res['avg_tar'] = pd.DataFrame({c: avg_tar for c in df_res.index.levels[1]}).stack()

    fig, axes = plt.subplots(ncols=2, nrows=6, figsize=(FIG_WIDTH*2, FIG_HEIGHT*6))
    (ut.gen_quintile_flat(df_res, 'tar', 'pred', agg='mean', ts=False)
        .plot(kind='bar', ax=axes[0,0], title='tar vs pred (xs)'))
    (ut.gen_quintile_flat(df_res, 'tar', 'pred', agg='mean', ts=True)
        .plot(kind='bar', ax=axes[1,0], title='tar vs pred (ts)'))
    (ut.gen_quintile_flat(df_res, 'avg_tar', 'pred', agg='mean', ts=True)
        .plot(kind='bar', ax=axes[2,0], title='xs avg tar vs pred (ts)'))
    (ut.gen_quintile_flat(df_res, 'tar', 'err2', agg='sum', ts=False)
        .plot(kind='bar', ax=axes[0,1], title='tar vs err2 (xs)'))
    (ut.gen_quintile_flat(df_res, 'tar', 'err2', agg='sum', ts=True)
        .plot(kind='bar', ax=axes[1,1], title='tar vs err2 (ts)'))
    (ut.gen_quintile_flat(df_res, 'avg_tar', 'err2', agg='sum', ts=True)
        .plot(kind='bar', ax=axes[2,1], title='xs avg tar vs err2 (ts)'))
    (ut.gen_quintile_ts(df_res, 'pred', 'pred', agg='mean')
        .plot(ax=axes[3,0], title='avg pred over time'))
    (ut.gen_quintile_ts(df_res, 'tar', 'tar', agg='mean')
        .plot(ax=axes[3,1], title='avg tar over time'))
    
    q = ut.gen_quintile_ts(df_res, 'pred', 'ret', agg='mean')
    q['mkt'] = ql.load_returns('states').ix[:,0]
    print('\n')
    print('sharpe ratios:')
    print(ut.get_sharpe_ratio(q))

    ut.avg_rank_accuracy(df_res).plot(ax=axes[4,0], title='avg pred rank accuracy')
    quint_cum_perf = ut.get_cum_perforance(q)
    quint_cum_perf.plot(ax=axes[4,1], title='continuously invested performance')
    
    xs_corr = df_res['tar'].unstack().corrwith(df_res['pred'].unstack(), axis=1)
    xs_corr.plot(ax=axes[5,0], ylim=(-1,1), title='xs tar-pred corr over time')

    return df_res, quint_cum_perf, xs_corr
コード例 #3
0
ファイル: composite.py プロジェクト: rhouck/re
def explore_series(px, px_ca, px_us, tar):
    fig, axes = plt.subplots(ncols=2, nrows=3, figsize=(FIG_WIDTH*2, FIG_HEIGHT*3))
    px_us.plot(ax=axes[0,0], title='ca and us sig', legend=True)
    px_ca.plot(ax=axes[0,0], legend=True)
    px.plot(ax=axes[0,1], legend=False, alpha=.3)

    (ut.lead_lag_corr(px, tar, rng=range(-52,52,4))
     .plot(kind='bar', title='lead lag corr', ax=axes[1,0]))#.axvline(0, linestyle='--', color='r'))

    df = ut.stack_and_align([px, tar], cols=('sig','tar')).dropna()
    sns.distplot(df['sig'], ax=axes[2,0]).set_title('sig dist')
    sns.regplot(df['sig'], df['tar'], ax=axes[2,1]).set_title('sig vs tar')

    clf = lm.LinearRegression()
    clf.fit(df[['sig']], df['tar'])
    score = clf.score(df[['sig']], df['tar'])
    corr = ut.get_xs_corr(df['sig'], df['tar'])

    print('int: {0:03f}\tcoef: {1:03f}\tr2 score: {2:03f}\txs corr: {3:03f}'.format(clf.intercept_, clf.coef_[0], score, corr))