Beispiel #1
0
def run(date):

    sql = '''
    select * from production_holdings.prelocate_request where datadate='%s'
    and shares_received is not null
    ''' % date.strftime('%Y%m%d')

    data = dbo.query(sql, df=True)
    if data is None:
        return
    rsk = lb.loadrsk2('ase1jpn', 'S', date, daily=True)
    rsk['usdp'] = rsk['USD_CAPT'] * rsk['LOC_PRIC'] / rsk['LOC_CAPT']
    data = pandas.merge(data,
                        rsk[['usdp']],
                        left_on='barrid',
                        right_index=True,
                        how='left')
    data['request_notional'] = data['usdp'] * data['shares_requested'].astype(
        float)
    data['get_notional'] = data['usdp'] * data['shares_received'].astype(float)
    data['delta_notional'] = data['request_notional'] - data['get_notional']

    a = data[['request_notional', 'get_notional', 'delta_notional'
              ]].sum() / 1e6
    a = pandas.DataFrame(a)
    a = a.reset_index()

    a['datadate'] = date.strftime('%Y%m%d')
    a = a.pivot(index='datadate', columns='index', values=0)
    a['fill_pct'] = a['get_notional'] / a['request_notional']
    print a.to_string()
Beispiel #2
0
def run(date):

    alps = [
        'MOMENTUM', 'LIQUIDIT', 'VALUE', 'GROWTH', 'SIZE', 'VOLATILI',
        'LEVERAGE'
    ]
    rsk = lb.loadrsk2('ase1jpn', 'S', date, daily=True)

    for a in alps:
        nu.write_alpha_files(rsk[a], a.lower(), date)
Beispiel #3
0
def run(date):

    ### load forward returns and recent momentums
    retdata = rets.daily_resrets(date + pandas.datetools.BDay(30),
                                 lookback=60 + RET_WINDOW)
    fret = gen_fwd_returns(retdata.copy(), date, [5, 10, 20])
    exrets = gen_pmom_returns(retdata.copy(), date, pmom=PMOM_BREAK)

    rsk = lb.loadrsk2('ase1jpn', 'S', date, daily=True)
    rsk = rsk[['COUNTRY', 'USD_CAPT']]

    ### load returns compute residual cumulative
    ret = rets.daily_resrets(date, lookback=RET_WINDOW)

    ### load volume and signal data
    voldata = vol.load_volume(date, window=VOL_WINDOW)
    voldata = gen_vol(voldata, date, STVOL_WINDOW)
    signals = load_signals(date)

    data = pandas.merge(exrets,
                        voldata,
                        left_index=True,
                        right_index=True,
                        how='inner')
    data = pandas.merge(data,
                        signals,
                        left_index=True,
                        right_index=True,
                        how='inner')
    data = pandas.merge(data,
                        fret,
                        left_index=True,
                        right_index=True,
                        how='inner')
    data = pandas.merge(data,
                        rsk,
                        left_index=True,
                        right_index=True,
                        how='inner')
    data['datadate'] = date

    univ = lb.load_production_universe('npxchnpak', date)
    data = data[data.index.isin(univ.index)]
    print len(data)
    gdata.write_gce(data,
                    'users',
                    'dsargent/{version}/{dt}.pd'.format(
                        version=VERSION, dt=date.strftime('%Y%m%d')),
                    enable_compression=True)

    return
Beispiel #4
0
def run1(date):

    RSKDATA = lb.loadrsk2("ASE1JPN", "S", date, daily=True)
    npxchnpak = lb.load_production_universe('npxchnpak', date)

    data = pandas.merge(RSKDATA[['COUNTRY', 'LOC_CAPT']],
                        npxchnpak,
                        left_index=True,
                        right_index=True)

    data = data[data['COUNTRY'] == 'KOR']
    data['VALID_UNTIL'] = data['VALID_UNTIL'].map(
        lambda x: x.strftime('%Y%m%d'))
    data['rank'] = data['LOC_CAPT'].rank()
    data = data.sort('rank')

    nbot = len(data) / 2
    ntop = len(data) - nbot

    bot = data.head(nbot)
    top = data.tail(ntop)

    print bot.head()
    print top.head()

    bot = bot[['SEDOL', 'NATURAL', 'VALID_UNTIL']]
    top = top[['SEDOL', 'NATURAL', 'VALID_UNTIL']]

    bot.to_csv("/research/data/universe/kor_small/kor_small" +
               date.strftime('%Y%m%d') + ".univ",
               header=False,
               sep='|')
    top.to_csv("/research/data/universe/kor_big/kor_big" +
               date.strftime('%Y%m%d') + ".univ",
               header=False,
               sep='|')
Beispiel #5
0
def run(as_of_date):
    """ load Barra risk data """

    RSKDATA = lb.loadrsk2("ASE1JPN", "S", as_of_date, daily=True)
    npxchnpak = lb.load_production_universe('npxchnpak', as_of_date)
    #topbot = pandas.read_csv("/research/data/prealpha/topbot_npxchnpak/topbot_npxchnpak_"+as_of_date.strftime("%Y%m%d")+".alp",header=False, \
    #         names=['BARRID','TOPBOT'])

    # old research version of the files

    nextmonth = as_of_date + datetime.timedelta(1)
    print nextmonth, as_of_date

    try:
        filename = "/production/%s/%s/%s/prealpha/ic_scaling_npxchnpak_%s.alp" % (nextmonth.strftime('%Y'), nextmonth.strftime('%m'), nextmonth.strftime('%Y%m%d'), \
                                as_of_date.strftime('%Y%m%d'))
        topbot = (pandas.read_csv(filename,
                                  index_col=0)).rename(columns={'ic1': 'BIG'})
    except:
        print 'rolling back!'
        topbot = pandas.read_csv("/research/data/prealpha/icscale_npxchnpak/icscale_npxchnpak_"+as_of_date.strftime("%Y%m%d")+".alp",header=False, \
             names=['BARRID','BIG'])


#        topbot = pandas.read_csv(topbot,header=True, names=['BARRID','BIG'])

    topbot = topbot.reset_index()
    univdata = npxchnpak.join(RSKDATA[['COUNTRY', 'USD_CAPT']], how='left')
    univdata = univdata[univdata['COUNTRY'] == 'JPN']
    univdata = topbot.join(univdata, on='BARRID', how='right')
    univdata.index = univdata.pop('BARRID')
    univdata['VALID_UNTIL'] = univdata['VALID_UNTIL'].map(
        lambda x: x.strftime('%Y%m%d'))

    #univdata_top = univdata[univdata['BIG']=='JPN_BIG']
    univdata_top = univdata[univdata['BIG'] < 1]
    univdata_top = univdata_top[['SEDOL', 'NATURAL', 'VALID_UNTIL']]

    univdata_bot = univdata[(univdata['BIG'] == 'JPN') |
                            (univdata['BIG'] == 1)]
    #    univdata_bot = univdata[univdata['BIG']==1]
    univdata_bot['rnk'] = univdata_bot['USD_CAPT'].rank()
    univdata_bot = univdata_bot.sort('rnk')

    print univdata_bot.head().to_string()
    univdata_bot = univdata_bot[['SEDOL', 'NATURAL', 'VALID_UNTIL']]
    univdata_bot.to_csv('/research/data/universe/jpnx400/jpnx400' +
                        as_of_date.strftime('%Y%m%d') + '.univ',
                        header=False,
                        sep='|')
    univdata_bot.tail(600).to_csv(
        '/research/data/universe/jpnx400_t600/jpnx400_t600' +
        as_of_date.strftime('%Y%m%d') + '.univ',
        header=False,
        sep='|')

    nbot = len(univdata_bot) / 2
    ntop = len(univdata_bot) - nbot
    print univdata_bot.head().to_string()
    print univdata_bot.tail().to_string()

    univdata_bot.head(nbot).to_csv(
        "/research/data/universe/jpnx400_small/jpnx400_small" +
        as_of_date.strftime('%Y%m%d') + ".univ",
        header=False,
        sep='|')
    univdata_bot.tail(ntop).to_csv(
        "/research/data/universe/jpnx400_big/jpnx400_big" +
        as_of_date.strftime('%Y%m%d') + ".univ",
        header=False,
        sep='|')

    #UNIVDATA.to_csv("/research/data/universe/barraestu/barraestu.univ"+yymm,header=False)
    #    univdata_top.to_csv("/research/data/universe/jpn400/jpn400"+as_of_date.strftime('%Y%m%d')+".univ",header=False,sep='|')

    return True
Beispiel #6
0
def run(date):
    print date
    """
    '''
    compute cumulative FlowPct over the past month
    '''
    epfr_sql = '''
        select FlowPct from nipun_prod.epfr_daily_asiaxjpn_flow
        where ReportDate between '{startdate:%Y%m%d}' and '{enddate:%Y%m%d}'
    '''.format(
        startdate=date-pandas.datetools.MonthEnd()-pandas.datetools.BDay(),
        enddate=date-pandas.datetools.BDay()
    )

    query = dbo.query(epfr_sql, df=True)
    if query is None:
        epfr_asia = None
        print 'epfr is None in month of {:%Y%m%d}'.format(date)
    else:
        epfr_asia = query['flowpct'].sum()
    """

    '''
    Use current predicted IR
    '''
    epfr_sql = '''
            select predicted_IR from nipun_prod.flow_predicted_sigma
            where datadate = '{usedate:%Y%m%d}'
        '''.format(usedate=date-pandas.datetools.BDay(2))
    query = dbo.query(epfr_sql)
    if query is None:
        epfr_asia = None
        print 'epfr is None in month of {:%Y%m%d}'.format(date)
    else:
        epfr_asia = query[0][0]

    data = pandas.DataFrame()
    rsk = lb.loadrsk2('ase1jpn', 'S', date)
    country_list = ['HKG', 'TWN', 'KOR', 'JPN', 'AUS', 'SGP', 'OTHER']
    alps = ['sentiment', 'analyst', 'quality', 'iu', 'fmom', 'value']
    for alp in alps:

        try:
            _tmp = lb.loadalp('/research/alphagen/alpha_v5/b_%s__w/b_%s__w_%s.alp' % (alp, alp, date.strftime('%Y%m%d')))
            _tmp.columns = [alp]
        except Exception:
            print 'alpha {} not loaded on {:%Y%m%d}'.format(alp, date)
            # no research alphas after 20160831
            continue

        if len(data) == 0:
            data = _tmp

        else:
            data = pandas.merge(data, _tmp, left_index=True, right_index=True, how='outer')

    forward_rets = rets.monthly_resrets(date+pandas.datetools.MonthEnd(), lookback=0)
    forward_rets = forward_rets[['resid_c', 'BARRID']]
    forward_rets.set_index('BARRID', inplace=True)
    data = pandas.merge(data, forward_rets, left_index=True, right_index=True, how='inner')
    data = data.fillna(0.0)
    data['COUNTRY'] = rsk['COUNTRY']
    ix = data['COUNTRY'] == 'CHX'
    data['COUNTRY'][ix] = 'HKG'

    ix = data['COUNTRY'].isin(country_list)
    data['COUNTRY'][-ix] = 'OTHER'
    data['date'] = date.strftime('%Y%m%d')
    data['epfr'] = epfr_asia
    data.to_csv('som/som.input_data/%s.csv' % date.strftime('%Y%m%d'), index=True, header=True)
Beispiel #7
0
def backfill_1b(date,
                bkt,
                bkt_df,
                univ='npxchnpak',
                alpha='alpha_v5',
                freq='daily',
                model='ase1jpn',
                ctry_df=None,
                univ_df=None,
                dir=None):
    """
    backfill one bucket
    """
    import nipun.cpa.load_barra as lb
    #import pandas.util.py3compat as py3compat

    print "backfill bucket %s for %s" % (bkt, date)

    date_str = date.strftime('%Y%m%d')
    keep_alphas = bkt_df.abs().sum()
    keep_alphas = keep_alphas[keep_alphas > 0].index.tolist()
    bkt_df = bkt_df[keep_alphas]

    ALPHA_DIR = '%(dir)s/%(freq)s/current/' % {'dir': dir, 'freq': freq}

    big_df = pandas.DataFrame()
    for alpha in keep_alphas:
        raw_alpha_dir = "%s/%s/%s/" % (ALPHA_DIR, univ, alpha)
        fn = raw_alpha_dir + '%s_%s.alp' % (alpha, date_str)
        if os.path.exists(fn):
            tmp_df = pandas.read_csv(fn,
                                     header=None,
                                     names=['barrid', alpha],
                                     index_col=0)
            big_df = big_df.join(tmp_df, how='outer')
        else:
            big_df[alpha] = None

    #the v1 def. for bucket alphas
    b_out_dir = "%s/%s/%s/" % (ALPHA_DIR, univ, 'b_' + bkt + '_eq_all')

    if not os.path.exists(b_out_dir):
        os.makedirs(b_out_dir)

    output_df = big_df.fillna(0).mean(axis=1)
    output_df.to_csv('%(dir)s/b_%(bkt)s_eq_all_%(date)s.alp' % {
        'dir': b_out_dir,
        'bkt': bkt,
        'date': date_str
    })

    #the v2 def.
    #add country into big_df
    b_out_dir = "%s/%s/%s/" % (ALPHA_DIR, univ, 'b_' + bkt + '_eq_c')

    if not os.path.exists(b_out_dir):
        os.makedirs(b_out_dir)

    if ctry_df is None:
        ctry_df = pandas.DataFrame(
            lb.loadrsk2(model, 'S', date, daily=True)['COUNTRY'])
    if univ_df is None:
        univ_df = lb.load_production_universe(univ, date)

    big_df = big_df[big_df.index.isin(univ_df.index)]
    big_df = big_df.join(ctry_df, how='left')

    output_df2 = pandas.DataFrame()
    for k, v in big_df.groupby('COUNTRY'):
        if k in bkt_df.index:
            keep_alphas = bkt_df.ix[k]
            keep_alphas = keep_alphas[keep_alphas > 0].index.tolist()
            output_df2 = pandas.concat([
                output_df2,
                pandas.DataFrame(v[keep_alphas].fillna(0).mean(axis=1))
            ])

    output_df2.to_csv('%(dir)s/b_%(bkt)s_eq_c_%(date)s.alp' % {
        'dir': b_out_dir,
        'bkt': bkt,
        'date': date_str
    },
                      header=None)
Beispiel #8
0
def backfill_b_alphas(weights_date=None,
                      buckets=[
                          'analyst', 'fmom', 'industry', 'iu', 'quality',
                          'sentiment', 'special', 'value'
                      ],
                      univ='npxchnpak',
                      alpha='alpha_v5',
                      startdate=datetime.datetime(2005, 1, 1),
                      enddate=datetime.datetime.today() - pandas.datetools.day,
                      model='ase1jpn',
                      ncpus=8,
                      freq='daily'):
    """
    this function is to calculate bucket alphas based on the latest backfilled
    raw alphas
    """
    print "start to backfill bucket alphas eq_all and eq_c"
    if weights_date is None:
        ctry_wt_df = dbo.query("select * from alphagen..country_wt__%s" %
                               alpha,
                               df=True)
        alpha_wt_df = dbo.query("select * from alphagen..alpha_wt__%s" % alpha,
                                df=True)
    else:
        ctry_wt_df = dbo.query(
            "select * from production_reporting..country_wt_backup \
        where cast(datadate AS date)='%(date)s' and alphagen_vers='%(alpha)s'"
            % {
                'alpha': alpha,
                'date': weights_date
            },
            df=True)
        alpha_wt_df = dbo.query(
            "select * from production_reporting..alpha_wt_backup \
        where cast(datadate AS date)='%(date)s' and alphagen_vers='%(alpha)s'"
            % {
                'alpha': alpha,
                'date': weights_date
            },
            df=True)

    ctry_wt_df = ctry_wt_df[['alpha_name', 'country', 'weight']]
    alpha_wt_df = alpha_wt_df[['bucket_name', 'alpha_name']]
    bucket_alpha_df = pandas.merge(alpha_wt_df,
                                   ctry_wt_df,
                                   on=['alpha_name'],
                                   how='left')

    job_server = pp.Server(ncpus)
    jobs = []
    for date in pandas.DateRange(startdate,
                                 enddate,
                                 offset=pandas.datetools.day):
        ctry_df = pandas.DataFrame(
            lb.loadrsk2(model, 'S', date, daily=True)['COUNTRY'])
        univ_df = lb.load_production_universe(univ, date)
        for bkt in buckets:
            bkt_df = bucket_alpha_df[bucket_alpha_df[
                'bucket_name'] == bkt]  #.drop(labels=['bucket_name'], axis=1)
            bkt_df = bkt_df.pivot(index='country',
                                  columns='alpha_name',
                                  values='weight')
            #backfill_1b(date, bkt, bkt_df, ctry_df=ctry_df, univ_df=univ_df, dir=DIR)
            jobs.append(
                job_server.submit(backfill_1b,
                                  (date, bkt, bkt_df, univ, alpha, freq, model,
                                   ctry_df, univ_df, DIR), (),
                                  ('pandas', 'datetime', 'os')))

    for job in jobs:
        job()

    job_server.print_stats()
    job_server.destroy()