Exemple #1
0
def run1(date):

    jpntop = lb.load_production_universe('jpn400', date)
    univ = lb.load_production_universe('npxchnpak', date)
    univ['cnt'] = map(lambda x: x.upper()[:3], univ.index)

    jpn = univ[univ['cnt'] == 'JPN']
    ix = jpn.index.isin(jpntop.index)
    jpn = jpn.ix[-ix]
    jpn = jpn.reset_index().rename(columns={'BARRID': 'barrid'})

    univ = univ[univ['cnt'] != 'JPN']

    ax = loadaxioma(date)
    data = pandas.merge(ax, univ, left_on='barrid', right_index=True)
    med = data.groupby('cnt')['adv_20d'].median()
    data = pandas.merge(data,
                        pandas.DataFrame(med, columns=['med_adv']),
                        left_on='cnt',
                        right_index=True)

    print med
    data = data[data['adv_20d'] > data['med_adv']]
    data = data.append(jpn.reset_index(), ignore_index=True)

    data = data[['barrid', 'SEDOL', 'NATURAL', 'VALID_UNTIL']]
    data['VALID_UNTIL'] = data['VALID_UNTIL'].apply(
        lambda x: x.strftime('%Y%m%d'))
    data['NATURAL'] = data['NATURAL'].astype(int)

    data.to_csv('/research/data/universe/npx_big/npx_big%s.univ' %
                date.strftime('%Y%m%d'),
                sep='|',
                header=False,
                index=False)
Exemple #2
0
def saveCSV(date):
    sql = '''SELECT isnull(tm.barrid, sm.barrid) as barrid, sm.sedol, sm.name, sm.localid, 
  sm.listed_country, s4.datadate, s4.category, s4.rank, s4.region, s4.country, s4.sector
  FROM [wzhu].[dbo].[shortcolor4] s4 join [nipun_prod].[dbo].[security_master] sm
  on s4.datadate between sm.datadate and isnull(sm.stopdate, '2050-01-01')
  and s4.sedol = sm.sedol
  and s4.section = 'TopDTC' and s4.region in ('AS', 'JP')
  and s4.datadate = '{:%Y%m%d}'
  left join nipun_prod..thai_barrid_map tm on left(sm.barrid, 6)=tm.root_id
    '''.format(date)
    dbo = db.db(connect='qai')
    data = dbo.query(sql, df=True)

    data = data.sort(['barrid', 'rank'])
    data = data.drop_duplicates(['barrid'], take_last=False)
    data = data.sort(['category', 'region', 'rank'])

    univ = lb.load_production_universe(UNIVERSE, date).index
    data = data[map(lambda x: x in univ, data['barrid'])]

    fout = "{}/shortcolorDTC.{:%Y%m%d}.csv".format(OPT_DIR, date)
    data.to_csv(fout,
                index=False,
                columns=[
                    'barrid', 'sedol', 'name', 'localid', 'listed_country',
                    'datadate', 'category', 'region', 'rank', 'sector'
                ])
Exemple #3
0
def run(date):

    ### load forward returns and recent momentums
    retdata = rets.daily_resrets(date + pandas.datetools.BDay(30),
                                 lookback=60 + RET_WINDOW)
    fret = gen_fwd_returns(retdata.copy(), date, [5, 10, 20])
    exrets = gen_pmom_returns(retdata.copy(), date, pmom=PMOM_BREAK)

    rsk = lb.loadrsk2('ase1jpn', 'S', date, daily=True)
    rsk = rsk[['COUNTRY', 'USD_CAPT']]

    ### load returns compute residual cumulative
    ret = rets.daily_resrets(date, lookback=RET_WINDOW)

    ### load volume and signal data
    voldata = vol.load_volume(date, window=VOL_WINDOW)
    voldata = gen_vol(voldata, date, STVOL_WINDOW)
    signals = load_signals(date)

    data = pandas.merge(exrets,
                        voldata,
                        left_index=True,
                        right_index=True,
                        how='inner')
    data = pandas.merge(data,
                        signals,
                        left_index=True,
                        right_index=True,
                        how='inner')
    data = pandas.merge(data,
                        fret,
                        left_index=True,
                        right_index=True,
                        how='inner')
    data = pandas.merge(data,
                        rsk,
                        left_index=True,
                        right_index=True,
                        how='inner')
    data['datadate'] = date

    univ = lb.load_production_universe('npxchnpak', date)
    data = data[data.index.isin(univ.index)]
    print len(data)
    gdata.write_gce(data,
                    'users',
                    'dsargent/{version}/{dt}.pd'.format(
                        version=VERSION, dt=date.strftime('%Y%m%d')),
                    enable_compression=True)

    return
Exemple #4
0
def run1(date):

    RSKDATA = lb.loadrsk2("ASE1JPN", "S", date, daily=True)
    npxchnpak = lb.load_production_universe('npxchnpak', date)

    data = pandas.merge(RSKDATA[['COUNTRY', 'LOC_CAPT']],
                        npxchnpak,
                        left_index=True,
                        right_index=True)

    data = data[data['COUNTRY'] == 'KOR']
    data['VALID_UNTIL'] = data['VALID_UNTIL'].map(
        lambda x: x.strftime('%Y%m%d'))
    data['rank'] = data['LOC_CAPT'].rank()
    data = data.sort('rank')

    nbot = len(data) / 2
    ntop = len(data) - nbot

    bot = data.head(nbot)
    top = data.tail(ntop)

    print bot.head()
    print top.head()

    bot = bot[['SEDOL', 'NATURAL', 'VALID_UNTIL']]
    top = top[['SEDOL', 'NATURAL', 'VALID_UNTIL']]

    bot.to_csv("/research/data/universe/kor_small/kor_small" +
               date.strftime('%Y%m%d') + ".univ",
               header=False,
               sep='|')
    top.to_csv("/research/data/universe/kor_big/kor_big" +
               date.strftime('%Y%m%d') + ".univ",
               header=False,
               sep='|')
def pair(as_of_date):

    univ = lb.load_production_universe('npxchnpak', as_of_date)
    barrids = univ.index.tolist()

    sql = ''' 
    select sm.barrid parent_barrid,  sm.name parent_name, oh.* from dsargent..owner_holdings_all oh
	join qai..ownsecmap os on oh.ownercode=os.ownercode
	join nipun_prod..security_master sm on os.securitycode=sm.own_id
	and oh.qtrdate between sm.datadate and isnull(sm.stopdate, '20500101')
	where qtrdate <= '%(max_date)s' and qtrdate >= '%(min_date)s' and sharesheld>=0 and sharesout>=0	
	and sharesheld/sharesout > %(min_threshold)s
    '''

    repl = {
            'min_threshold': min_threshold, \
            'max_date': (as_of_date - pandas.datetools.MonthEnd()*3 +datetime.timedelta(1)).strftime('%Y%m%d'), \
            'min_date': (as_of_date - pandas.datetools.MonthEnd()*9 +datetime.timedelta(1)).strftime('%Y%m%d')
            }

    ##REPORT DATE MAY BE BACKFILLED, SO NOT USED IN BACKTEST
    ##TO BE CONSERVATIVE, TAKE 3 MON LAG FOR DATADATE

    data = dbo.query(sql % repl, df=True)
    print as_of_date
    print(as_of_date - pandas.datetools.MonthEnd() * 3 +
          datetime.timedelta(1)).strftime('%Y%m%d')
    print(as_of_date - pandas.datetools.MonthEnd() * 9 +
          datetime.timedelta(1)).strftime('%Y%m%d')

    ##PARENT COMPANY NEED TO BE IN UNIVERSE; HOLDINGS CAN BE OUT OF UNIVERSE
    data = data[data['parent_barrid'].isin(barrids)]

    ##When thre are multiple reports on the same qtrdate for the same barrid, keep the most recent one
    data = data.sort(
        ['parent_barrid', 'barrid', 'qtrdate', 'ownercode', 'reportdate'])
    data = data.drop_duplicates(
        ["parent_barrid", "barrid", "qtrdate", "ownercode"], take_last=True)

    ##When thre are more than one qtrdate, keep the most recent one
    data = data.sort(['parent_barrid', 'barrid', 'ownercode', 'qtrdate'])
    data = data.drop_duplicates(["parent_barrid", "barrid", "ownercode"],
                                take_last=True)

    cols = ['valueheld', 'valuechg', 'shareschg', 'sharesheld', 'sharesout']
    data[cols] = data[cols].astype(float)

    ##sum up multiple holdings of the same company by different ownercode of same parent_barrid
    cols = ['parent_barrid', 'barrid', 'sharesheld', 'valueheld']

    sum = data[cols].groupby(['parent_barrid', 'barrid']).sum()

    sum = sum.rename(columns={
        'sharesheld': 'sharesheld_sum',
        'valueheld': 'valueheld_sum'
    })
    data = pandas.merge(data,
                        sum,
                        how='inner',
                        left_on=['parent_barrid', 'barrid'],
                        right_index=True)

    #data['shares_pct_of_holding'] = data['sharesheld'] /  data['sharesout']

    ##MERGE IN BOOK EQUITY FROM WS
    be = n.getWSCalc(as_of_date,
                     items=['common_equity_ex_mi_lag0'],
                     production=PRODUCTION)
    ttm_current = be.pick_ttm('current')
    be = be.join(ttm_current)

    idx = be['common_equity_ex_mi_lag0'] <= 0
    be['common_equity_ex_mi_lag0'][idx] = np.nan

    print as_of_date
    print "book equity distribution before currency converting"
    print be['common_equity_ex_mi_lag0'].describe()
    print be.head().to_string()

    be = be[['common_equity_ex_mi_lag0']]
    data = pandas.merge(data,
                        be,
                        left_on=['parent_barrid'],
                        right_index=True,
                        how='left')

    rskdata = n.loadrsk2('ASE1JPN', 'S', as_of_date, daily=True)

    usdcapt = rskdata[['USD_CAPT', 'INDNAME', 'LOC_CAPT']]
    usdcapt = usdcapt.rename(columns={'USD_CAPT': 'USD_CAPT_parent'})
    data = pandas.merge(data,
                        usdcapt,
                        left_on=['parent_barrid'],
                        right_index=True,
                        how='left')

    data['USD_BE'] = data['common_equity_ex_mi_lag0'] * data[
        'USD_CAPT_parent'] / data['LOC_CAPT'] * 1e6

    usdcapt = rskdata[['USD_CAPT']]
    usdcapt = usdcapt.rename(columns={'USD_CAPT': 'USD_CAPT_holding'})
    data = pandas.merge(data,
                        usdcapt,
                        left_on=['barrid'],
                        right_index=True,
                        how='left')

    data[
        'mcap_pct_of_parent'] = data['valueheld_sum'] / data['USD_CAPT_parent']
    data['mcap_pct_of_holding'] = data['valueheld_sum'] / data[
        'USD_CAPT_holding']
    data['be_pct_of_parent'] = data['valueheld_sum'] / data['USD_BE']

    cols = [
        'USD_CAPT_parent', 'USD_CAPT_holding', 'mcap_pct_of_parent',
        'mcap_pct_of_holding', 'USD_BE', 'be_pct_of_parent'
    ]
    data[cols] = data[cols].astype(float)
    print "print distribution of BE_pct_of_parent"
    print data['be_pct_of_parent'].describe().to_string()

    vars = ['mcap_pct_of_holding', 'mcap_pct_of_parent', 'be_pct_of_parent']
    for col in vars:
        data[col][data[col] > 1.0] = 1.0

    data = data.sort(['parent_barrid', 'barrid'])
    data = data.drop_duplicates(["parent_barrid", "barrid"], take_last=True)

    ##get omonthly returns
    ##compute PMOM12 PMOM6 pmom3 for every barrid
    retdata = n.monthly_returns(as_of_date, lookback=400, model='GEM2')

    retdata.columns = map(lambda x: x.strip(), retdata.columns.tolist())

    retdata['date'] = map(
        lambda x: datetime.datetime(int(str(x)[:4]), int(str(x)[4:]), 1) +
        pandas.datetools.MonthEnd(), retdata['DATADATE'])
    retdata['RET'] += 1

    ##GET PMOM3
    # if as_of_date=20111231, min_date=20111031
    min_date = as_of_date - pandas.datetools.MonthEnd(2)
    print "print pmom3 dates"
    print as_of_date
    print min_date

    retdata3 = retdata[retdata['date'] >=
                       min_date]  #(this way we filter out the old months)

    pmom3 = retdata3.groupby('BARRID').apply(
        lambda x: pandas.np.product(x['RET']) - 1)
    pmom3 = pandas.DataFrame(pmom3)
    pmom3.columns = ['pmom3']

    res = pandas.merge(data, pmom3, left_on='barrid', right_index=True)

    ##GET PMOM6
    min_date = as_of_date - pandas.datetools.MonthEnd(5)
    print "print pmom6 dates"
    print as_of_date
    print min_date
    retdata6 = retdata[retdata['date'] >=
                       min_date]  #(this way we filter out the old months)

    pmom6 = retdata6.groupby('BARRID').apply(
        lambda x: pandas.np.product(x['RET']) - 1)
    pmom6 = pandas.DataFrame(pmom6)
    pmom6.columns = ['pmom6']

    res = pandas.merge(res, pmom6, left_on='barrid', right_index=True)

    ##GET PMOM12
    min_date = as_of_date - pandas.datetools.MonthEnd(11)
    print "print pmom12 dates"
    print as_of_date
    print min_date
    retdata12 = retdata[retdata['date'] >=
                        min_date]  #(this way we filter out the old months)

    pmom12 = retdata12.groupby('BARRID').apply(
        lambda x: pandas.np.product(x['RET']) - 1)
    pmom12 = pandas.DataFrame(pmom12)
    pmom12.columns = ['pmom12']

    res = pandas.merge(res, pmom12, left_on='barrid', right_index=True)

    ##merge in past month return
    retdata = n.monthly_returns(as_of_date, lookback=0, model='GEM2')
    res = pandas.merge(res, retdata, left_on='barrid', right_on='BARRID')

    ##winsorize returns
    idx = res['pmom3'] >= 1
    res['pmom3'][idx] = 1
    idx = (res['pmom3'].notnull()) & (res['pmom3'] < -1)
    res['pmom3'][idx] = -1

    idx = res['pmom6'] >= 1
    res['pmom6'][idx] = 1
    idx = (res['pmom6'].notnull()) & (res['pmom6'] < -1)
    res['pmom6'][idx] = -1

    idx = res['pmom12'] >= 1
    res['pmom12'][idx] = 1
    idx = (res['pmom12'].notnull()) & (res['pmom12'] < -1)
    res['pmom12'][idx] = -1

    idx = res['RET'] >= 1
    res['RET'][idx] = 1
    idx = (res['RET'].notnull()) & (res['RET'] < -1)
    res['RET'][idx] = -1

    ##compute sig = equal-weighted mean of all impulse returns
    grouped = res[['parent_barrid', 'pmom3', 'pmom6', 'pmom12',
                   'RET']].groupby('parent_barrid')

    holding_pmom3 = pandas.DataFrame(grouped['pmom3'].mean())
    holding_pmom3 = holding_pmom3.rename(
        columns={'pmom3': 'crossholding_pmom3_v1beallhldtype'})
    write_alpha_files(holding_pmom3,
                      'crossholding_pmom3_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    holding_pmom6 = pandas.DataFrame(grouped['pmom6'].mean())
    holding_pmom6 = holding_pmom6.rename(
        columns={'pmom6': 'crossholding_pmom6_v1beallhldtype'})
    write_alpha_files(holding_pmom6,
                      'crossholding_pmom6_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    holding_pmom12 = pandas.DataFrame(grouped['pmom12'].mean())
    holding_pmom12 = holding_pmom12.rename(
        columns={'pmom12': 'crossholding_pmom12_v1beallhldtype'})
    write_alpha_files(holding_pmom12,
                      'crossholding_pmom12_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    holding_ret = pandas.DataFrame(grouped['RET'].mean())
    holding_ret = holding_ret.rename(
        columns={'RET': 'crossholding_ret_v1beallhldtype'})
    write_alpha_files(holding_ret,
                      'crossholding_ret_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    ##COMPUTE HOLDING PORTFOLIO RETURNS, weighted by VALUEHELD

    pf = res[[
        'parent_barrid', 'valueheld_sum', 'pmom3', 'pmom6', 'pmom12', 'RET'
    ]]
    ## compute weighted sum
    ## the following function sum_weighted_scores returns a couple of columns,
    ## barrid, N, value_weighted, relative_across (value_weighted/N) , relative_within (value_weighted / wts)
    pfret_pmom3 = nu.sum_weighted_scores(pf['parent_barrid'],
                                         pf['valueheld_sum'], pf['pmom3'])
    pfret_pmom3['crossholding_pfret_pmom3_v1beallhldtype'] = pfret_pmom3[
        'relative_within']

    write_alpha_files(pfret_pmom3['crossholding_pfret_pmom3_v1beallhldtype'],
                      'crossholding_pfret_pmom3_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    pfret_pmom6 = nu.sum_weighted_scores(pf['parent_barrid'],
                                         pf['valueheld_sum'], pf['pmom6'])
    pfret_pmom6['crossholding_pfret_pmom6_v1beallhldtype'] = pfret_pmom6[
        'relative_within']

    write_alpha_files(pfret_pmom6['crossholding_pfret_pmom6_v1beallhldtype'],
                      'crossholding_pfret_pmom6_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    pfret_pmom12 = nu.sum_weighted_scores(pf['parent_barrid'],
                                          pf['valueheld_sum'], pf['pmom12'])
    pfret_pmom12['crossholding_pfret_pmom12_v1beallhldtype'] = pfret_pmom12[
        'relative_within']

    write_alpha_files(pfret_pmom12['crossholding_pfret_pmom12_v1beallhldtype'],
                      'crossholding_pfret_pmom12_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    pfret_ret = nu.sum_weighted_scores(pf['parent_barrid'],
                                       pf['valueheld_sum'], pf['RET'])
    pfret_ret['crossholding_pfret_ret_v1beallhldtype'] = pfret_ret[
        'relative_within']

    write_alpha_files(pfret_ret['crossholding_pfret_ret_v1beallhldtype'],
                      'crossholding_pfret_ret_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    ##compute weighted average returns as impulse
    res = res.sort(['parent_barrid', 'barrid'])

    res['wgtpmom3'] = np.nan
    res['wgtpmom6'] = np.nan
    res['wgtpmom12'] = np.nan
    res['wgtret'] = np.nan

    for c in set(res['parent_barrid']):
        idx = res['parent_barrid'] == c
        res['wgtpmom3'][idx] = res['be_pct_of_parent'][idx] * res['pmom3'][idx]
        res['wgtpmom6'][idx] = res['be_pct_of_parent'][idx] * res['pmom6'][idx]
        res['wgtpmom12'][
            idx] = res['be_pct_of_parent'][idx] * res['pmom12'][idx]
        res['wgtret'][idx] = res['be_pct_of_parent'][idx] * res['RET'][idx]

    ##compute sig = weighted SUM of all impulse returns
    grouped = res[[
        'parent_barrid', 'wgtpmom3', 'wgtpmom6', 'wgtpmom12', 'wgtret'
    ]].groupby('parent_barrid')
    holding_pmom3 = pandas.DataFrame(grouped['wgtpmom3'].sum())
    holding_pmom3 = holding_pmom3.rename(
        columns={'wgtpmom3': 'crossholding_wgtpmom3_v1beallhldtype'})
    write_alpha_files(holding_pmom3,
                      'crossholding_wgtpmom3_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    crossholding_wgtpmom3_v1beallhldtype_in, crossholding_wgtpmom3_v1beallhldtype_it = ta.timing_alphas(
        holding_pmom3,
        'crossholding_wgtpmom3_v1beallhldtype',
        as_of_date,
        UNIVERSE,
        'INDUSTRY',
        production=PRODUCTION)
    #write_alpha_files(crossholding_wgtpmom3_v1beallhldtype_in, 'crossholding_wgtpmom3_v1beallhldtype_in', as_of_date, production=PRODUCTION)

    holding_pmom6 = pandas.DataFrame(grouped['wgtpmom6'].sum())
    holding_pmom6 = holding_pmom6.rename(
        columns={'wgtpmom6': 'crossholding_wgtpmom6_v1beallhldtype'})
    write_alpha_files(holding_pmom6,
                      'crossholding_wgtpmom6_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    crossholding_wgtpmom6_v1beallhldtype_in, crossholding_wgtpmom6_v1beallhldtype_it = ta.timing_alphas(
        holding_pmom6,
        'crossholding_wgtpmom6_v1beallhldtype',
        as_of_date,
        UNIVERSE,
        'INDUSTRY',
        production=PRODUCTION)
    #write_alpha_files(crossholding_wgtpmom6_v1beallhldtype_in, 'crossholding_wgtpmom6_v1beallhldtype_in', as_of_date, production=PRODUCTION)

    holding_pmom12 = pandas.DataFrame(grouped['wgtpmom12'].sum())
    holding_pmom12 = holding_pmom12.rename(
        columns={'wgtpmom12': 'crossholding_wgtpmom12_v1beallhldtype'})
    write_alpha_files(holding_pmom12,
                      'crossholding_wgtpmom12_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    crossholding_wgtpmom12_v1beallhldtype_in, crossholding_wgtpmom12_v1beallhldtype_it = ta.timing_alphas(
        holding_pmom12,
        'crossholding_wgtpmom12_v1beallhldtype',
        as_of_date,
        UNIVERSE,
        'INDUSTRY',
        production=PRODUCTION)
    #write_alpha_files(crossholding_wgtpmom12_v1beallhldtype_in, 'crossholding_wgtpmom12_v1beallhldtype_in', as_of_date, production=PRODUCTION)

    holding_ret = pandas.DataFrame(grouped['wgtret'].sum())
    holding_ret = holding_ret.rename(
        columns={'wgtret': 'crossholding_wgtret_v1beallhldtype'})
    write_alpha_files(holding_ret,
                      'crossholding_wgtret_v1beallhldtype',
                      as_of_date,
                      production=PRODUCTION)

    crossholding_wgtret_v1beallhldtype_in, crossholding_wgtret_v1beallhldtype_it = ta.timing_alphas(
        holding_ret,
        'crossholding_wgtret_v1beallhldtype',
        as_of_date,
        UNIVERSE,
        'INDUSTRY',
        production=PRODUCTION)
    #write_alpha_files(crossholding_wgtret_v1beallhldtype_in, 'crossholding_wgtret_v1beallhldtype_in', as_of_date, production=PRODUCTION)

    return True
Exemple #6
0
def run(as_of_date):
    """ load Barra risk data """

    RSKDATA = lb.loadrsk2("ASE1JPN", "S", as_of_date, daily=True)
    npxchnpak = lb.load_production_universe('npxchnpak', as_of_date)
    #topbot = pandas.read_csv("/research/data/prealpha/topbot_npxchnpak/topbot_npxchnpak_"+as_of_date.strftime("%Y%m%d")+".alp",header=False, \
    #         names=['BARRID','TOPBOT'])

    # old research version of the files

    nextmonth = as_of_date + datetime.timedelta(1)
    print nextmonth, as_of_date

    try:
        filename = "/production/%s/%s/%s/prealpha/ic_scaling_npxchnpak_%s.alp" % (nextmonth.strftime('%Y'), nextmonth.strftime('%m'), nextmonth.strftime('%Y%m%d'), \
                                as_of_date.strftime('%Y%m%d'))
        topbot = (pandas.read_csv(filename,
                                  index_col=0)).rename(columns={'ic1': 'BIG'})
    except:
        print 'rolling back!'
        topbot = pandas.read_csv("/research/data/prealpha/icscale_npxchnpak/icscale_npxchnpak_"+as_of_date.strftime("%Y%m%d")+".alp",header=False, \
             names=['BARRID','BIG'])


#        topbot = pandas.read_csv(topbot,header=True, names=['BARRID','BIG'])

    topbot = topbot.reset_index()
    univdata = npxchnpak.join(RSKDATA[['COUNTRY', 'USD_CAPT']], how='left')
    univdata = univdata[univdata['COUNTRY'] == 'JPN']
    univdata = topbot.join(univdata, on='BARRID', how='right')
    univdata.index = univdata.pop('BARRID')
    univdata['VALID_UNTIL'] = univdata['VALID_UNTIL'].map(
        lambda x: x.strftime('%Y%m%d'))

    #univdata_top = univdata[univdata['BIG']=='JPN_BIG']
    univdata_top = univdata[univdata['BIG'] < 1]
    univdata_top = univdata_top[['SEDOL', 'NATURAL', 'VALID_UNTIL']]

    univdata_bot = univdata[(univdata['BIG'] == 'JPN') |
                            (univdata['BIG'] == 1)]
    #    univdata_bot = univdata[univdata['BIG']==1]
    univdata_bot['rnk'] = univdata_bot['USD_CAPT'].rank()
    univdata_bot = univdata_bot.sort('rnk')

    print univdata_bot.head().to_string()
    univdata_bot = univdata_bot[['SEDOL', 'NATURAL', 'VALID_UNTIL']]
    univdata_bot.to_csv('/research/data/universe/jpnx400/jpnx400' +
                        as_of_date.strftime('%Y%m%d') + '.univ',
                        header=False,
                        sep='|')
    univdata_bot.tail(600).to_csv(
        '/research/data/universe/jpnx400_t600/jpnx400_t600' +
        as_of_date.strftime('%Y%m%d') + '.univ',
        header=False,
        sep='|')

    nbot = len(univdata_bot) / 2
    ntop = len(univdata_bot) - nbot
    print univdata_bot.head().to_string()
    print univdata_bot.tail().to_string()

    univdata_bot.head(nbot).to_csv(
        "/research/data/universe/jpnx400_small/jpnx400_small" +
        as_of_date.strftime('%Y%m%d') + ".univ",
        header=False,
        sep='|')
    univdata_bot.tail(ntop).to_csv(
        "/research/data/universe/jpnx400_big/jpnx400_big" +
        as_of_date.strftime('%Y%m%d') + ".univ",
        header=False,
        sep='|')

    #UNIVDATA.to_csv("/research/data/universe/barraestu/barraestu.univ"+yymm,header=False)
    #    univdata_top.to_csv("/research/data/universe/jpn400/jpn400"+as_of_date.strftime('%Y%m%d')+".univ",header=False,sep='|')

    return True
Exemple #7
0
def backfill_1b(date,
                bkt,
                bkt_df,
                univ='npxchnpak',
                alpha='alpha_v5',
                freq='daily',
                model='ase1jpn',
                ctry_df=None,
                univ_df=None,
                dir=None):
    """
    backfill one bucket
    """
    import nipun.cpa.load_barra as lb
    #import pandas.util.py3compat as py3compat

    print "backfill bucket %s for %s" % (bkt, date)

    date_str = date.strftime('%Y%m%d')
    keep_alphas = bkt_df.abs().sum()
    keep_alphas = keep_alphas[keep_alphas > 0].index.tolist()
    bkt_df = bkt_df[keep_alphas]

    ALPHA_DIR = '%(dir)s/%(freq)s/current/' % {'dir': dir, 'freq': freq}

    big_df = pandas.DataFrame()
    for alpha in keep_alphas:
        raw_alpha_dir = "%s/%s/%s/" % (ALPHA_DIR, univ, alpha)
        fn = raw_alpha_dir + '%s_%s.alp' % (alpha, date_str)
        if os.path.exists(fn):
            tmp_df = pandas.read_csv(fn,
                                     header=None,
                                     names=['barrid', alpha],
                                     index_col=0)
            big_df = big_df.join(tmp_df, how='outer')
        else:
            big_df[alpha] = None

    #the v1 def. for bucket alphas
    b_out_dir = "%s/%s/%s/" % (ALPHA_DIR, univ, 'b_' + bkt + '_eq_all')

    if not os.path.exists(b_out_dir):
        os.makedirs(b_out_dir)

    output_df = big_df.fillna(0).mean(axis=1)
    output_df.to_csv('%(dir)s/b_%(bkt)s_eq_all_%(date)s.alp' % {
        'dir': b_out_dir,
        'bkt': bkt,
        'date': date_str
    })

    #the v2 def.
    #add country into big_df
    b_out_dir = "%s/%s/%s/" % (ALPHA_DIR, univ, 'b_' + bkt + '_eq_c')

    if not os.path.exists(b_out_dir):
        os.makedirs(b_out_dir)

    if ctry_df is None:
        ctry_df = pandas.DataFrame(
            lb.loadrsk2(model, 'S', date, daily=True)['COUNTRY'])
    if univ_df is None:
        univ_df = lb.load_production_universe(univ, date)

    big_df = big_df[big_df.index.isin(univ_df.index)]
    big_df = big_df.join(ctry_df, how='left')

    output_df2 = pandas.DataFrame()
    for k, v in big_df.groupby('COUNTRY'):
        if k in bkt_df.index:
            keep_alphas = bkt_df.ix[k]
            keep_alphas = keep_alphas[keep_alphas > 0].index.tolist()
            output_df2 = pandas.concat([
                output_df2,
                pandas.DataFrame(v[keep_alphas].fillna(0).mean(axis=1))
            ])

    output_df2.to_csv('%(dir)s/b_%(bkt)s_eq_c_%(date)s.alp' % {
        'dir': b_out_dir,
        'bkt': bkt,
        'date': date_str
    },
                      header=None)
Exemple #8
0
def backfill_b_alphas(weights_date=None,
                      buckets=[
                          'analyst', 'fmom', 'industry', 'iu', 'quality',
                          'sentiment', 'special', 'value'
                      ],
                      univ='npxchnpak',
                      alpha='alpha_v5',
                      startdate=datetime.datetime(2005, 1, 1),
                      enddate=datetime.datetime.today() - pandas.datetools.day,
                      model='ase1jpn',
                      ncpus=8,
                      freq='daily'):
    """
    this function is to calculate bucket alphas based on the latest backfilled
    raw alphas
    """
    print "start to backfill bucket alphas eq_all and eq_c"
    if weights_date is None:
        ctry_wt_df = dbo.query("select * from alphagen..country_wt__%s" %
                               alpha,
                               df=True)
        alpha_wt_df = dbo.query("select * from alphagen..alpha_wt__%s" % alpha,
                                df=True)
    else:
        ctry_wt_df = dbo.query(
            "select * from production_reporting..country_wt_backup \
        where cast(datadate AS date)='%(date)s' and alphagen_vers='%(alpha)s'"
            % {
                'alpha': alpha,
                'date': weights_date
            },
            df=True)
        alpha_wt_df = dbo.query(
            "select * from production_reporting..alpha_wt_backup \
        where cast(datadate AS date)='%(date)s' and alphagen_vers='%(alpha)s'"
            % {
                'alpha': alpha,
                'date': weights_date
            },
            df=True)

    ctry_wt_df = ctry_wt_df[['alpha_name', 'country', 'weight']]
    alpha_wt_df = alpha_wt_df[['bucket_name', 'alpha_name']]
    bucket_alpha_df = pandas.merge(alpha_wt_df,
                                   ctry_wt_df,
                                   on=['alpha_name'],
                                   how='left')

    job_server = pp.Server(ncpus)
    jobs = []
    for date in pandas.DateRange(startdate,
                                 enddate,
                                 offset=pandas.datetools.day):
        ctry_df = pandas.DataFrame(
            lb.loadrsk2(model, 'S', date, daily=True)['COUNTRY'])
        univ_df = lb.load_production_universe(univ, date)
        for bkt in buckets:
            bkt_df = bucket_alpha_df[bucket_alpha_df[
                'bucket_name'] == bkt]  #.drop(labels=['bucket_name'], axis=1)
            bkt_df = bkt_df.pivot(index='country',
                                  columns='alpha_name',
                                  values='weight')
            #backfill_1b(date, bkt, bkt_df, ctry_df=ctry_df, univ_df=univ_df, dir=DIR)
            jobs.append(
                job_server.submit(backfill_1b,
                                  (date, bkt, bkt_df, univ, alpha, freq, model,
                                   ctry_df, univ_df, DIR), (),
                                  ('pandas', 'datetime', 'os')))

    for job in jobs:
        job()

    job_server.print_stats()
    job_server.destroy()