def run(date): sql = ''' select * from production_holdings.prelocate_request where datadate='%s' and shares_received is not null ''' % date.strftime('%Y%m%d') data = dbo.query(sql, df=True) if data is None: return rsk = lb.loadrsk2('ase1jpn', 'S', date, daily=True) rsk['usdp'] = rsk['USD_CAPT'] * rsk['LOC_PRIC'] / rsk['LOC_CAPT'] data = pandas.merge(data, rsk[['usdp']], left_on='barrid', right_index=True, how='left') data['request_notional'] = data['usdp'] * data['shares_requested'].astype( float) data['get_notional'] = data['usdp'] * data['shares_received'].astype(float) data['delta_notional'] = data['request_notional'] - data['get_notional'] a = data[['request_notional', 'get_notional', 'delta_notional' ]].sum() / 1e6 a = pandas.DataFrame(a) a = a.reset_index() a['datadate'] = date.strftime('%Y%m%d') a = a.pivot(index='datadate', columns='index', values=0) a['fill_pct'] = a['get_notional'] / a['request_notional'] print a.to_string()
def run(date): alps = [ 'MOMENTUM', 'LIQUIDIT', 'VALUE', 'GROWTH', 'SIZE', 'VOLATILI', 'LEVERAGE' ] rsk = lb.loadrsk2('ase1jpn', 'S', date, daily=True) for a in alps: nu.write_alpha_files(rsk[a], a.lower(), date)
def run(date): ### load forward returns and recent momentums retdata = rets.daily_resrets(date + pandas.datetools.BDay(30), lookback=60 + RET_WINDOW) fret = gen_fwd_returns(retdata.copy(), date, [5, 10, 20]) exrets = gen_pmom_returns(retdata.copy(), date, pmom=PMOM_BREAK) rsk = lb.loadrsk2('ase1jpn', 'S', date, daily=True) rsk = rsk[['COUNTRY', 'USD_CAPT']] ### load returns compute residual cumulative ret = rets.daily_resrets(date, lookback=RET_WINDOW) ### load volume and signal data voldata = vol.load_volume(date, window=VOL_WINDOW) voldata = gen_vol(voldata, date, STVOL_WINDOW) signals = load_signals(date) data = pandas.merge(exrets, voldata, left_index=True, right_index=True, how='inner') data = pandas.merge(data, signals, left_index=True, right_index=True, how='inner') data = pandas.merge(data, fret, left_index=True, right_index=True, how='inner') data = pandas.merge(data, rsk, left_index=True, right_index=True, how='inner') data['datadate'] = date univ = lb.load_production_universe('npxchnpak', date) data = data[data.index.isin(univ.index)] print len(data) gdata.write_gce(data, 'users', 'dsargent/{version}/{dt}.pd'.format( version=VERSION, dt=date.strftime('%Y%m%d')), enable_compression=True) return
def run1(date): RSKDATA = lb.loadrsk2("ASE1JPN", "S", date, daily=True) npxchnpak = lb.load_production_universe('npxchnpak', date) data = pandas.merge(RSKDATA[['COUNTRY', 'LOC_CAPT']], npxchnpak, left_index=True, right_index=True) data = data[data['COUNTRY'] == 'KOR'] data['VALID_UNTIL'] = data['VALID_UNTIL'].map( lambda x: x.strftime('%Y%m%d')) data['rank'] = data['LOC_CAPT'].rank() data = data.sort('rank') nbot = len(data) / 2 ntop = len(data) - nbot bot = data.head(nbot) top = data.tail(ntop) print bot.head() print top.head() bot = bot[['SEDOL', 'NATURAL', 'VALID_UNTIL']] top = top[['SEDOL', 'NATURAL', 'VALID_UNTIL']] bot.to_csv("/research/data/universe/kor_small/kor_small" + date.strftime('%Y%m%d') + ".univ", header=False, sep='|') top.to_csv("/research/data/universe/kor_big/kor_big" + date.strftime('%Y%m%d') + ".univ", header=False, sep='|')
def run(as_of_date): """ load Barra risk data """ RSKDATA = lb.loadrsk2("ASE1JPN", "S", as_of_date, daily=True) npxchnpak = lb.load_production_universe('npxchnpak', as_of_date) #topbot = pandas.read_csv("/research/data/prealpha/topbot_npxchnpak/topbot_npxchnpak_"+as_of_date.strftime("%Y%m%d")+".alp",header=False, \ # names=['BARRID','TOPBOT']) # old research version of the files nextmonth = as_of_date + datetime.timedelta(1) print nextmonth, as_of_date try: filename = "/production/%s/%s/%s/prealpha/ic_scaling_npxchnpak_%s.alp" % (nextmonth.strftime('%Y'), nextmonth.strftime('%m'), nextmonth.strftime('%Y%m%d'), \ as_of_date.strftime('%Y%m%d')) topbot = (pandas.read_csv(filename, index_col=0)).rename(columns={'ic1': 'BIG'}) except: print 'rolling back!' topbot = pandas.read_csv("/research/data/prealpha/icscale_npxchnpak/icscale_npxchnpak_"+as_of_date.strftime("%Y%m%d")+".alp",header=False, \ names=['BARRID','BIG']) # topbot = pandas.read_csv(topbot,header=True, names=['BARRID','BIG']) topbot = topbot.reset_index() univdata = npxchnpak.join(RSKDATA[['COUNTRY', 'USD_CAPT']], how='left') univdata = univdata[univdata['COUNTRY'] == 'JPN'] univdata = topbot.join(univdata, on='BARRID', how='right') univdata.index = univdata.pop('BARRID') univdata['VALID_UNTIL'] = univdata['VALID_UNTIL'].map( lambda x: x.strftime('%Y%m%d')) #univdata_top = univdata[univdata['BIG']=='JPN_BIG'] univdata_top = univdata[univdata['BIG'] < 1] univdata_top = univdata_top[['SEDOL', 'NATURAL', 'VALID_UNTIL']] univdata_bot = univdata[(univdata['BIG'] == 'JPN') | (univdata['BIG'] == 1)] # univdata_bot = univdata[univdata['BIG']==1] univdata_bot['rnk'] = univdata_bot['USD_CAPT'].rank() univdata_bot = univdata_bot.sort('rnk') print univdata_bot.head().to_string() univdata_bot = univdata_bot[['SEDOL', 'NATURAL', 'VALID_UNTIL']] univdata_bot.to_csv('/research/data/universe/jpnx400/jpnx400' + as_of_date.strftime('%Y%m%d') + '.univ', header=False, sep='|') univdata_bot.tail(600).to_csv( '/research/data/universe/jpnx400_t600/jpnx400_t600' + as_of_date.strftime('%Y%m%d') + '.univ', header=False, sep='|') nbot = len(univdata_bot) / 2 ntop = len(univdata_bot) - nbot print univdata_bot.head().to_string() print univdata_bot.tail().to_string() univdata_bot.head(nbot).to_csv( "/research/data/universe/jpnx400_small/jpnx400_small" + as_of_date.strftime('%Y%m%d') + ".univ", header=False, sep='|') univdata_bot.tail(ntop).to_csv( "/research/data/universe/jpnx400_big/jpnx400_big" + as_of_date.strftime('%Y%m%d') + ".univ", header=False, sep='|') #UNIVDATA.to_csv("/research/data/universe/barraestu/barraestu.univ"+yymm,header=False) # univdata_top.to_csv("/research/data/universe/jpn400/jpn400"+as_of_date.strftime('%Y%m%d')+".univ",header=False,sep='|') return True
def run(date): print date """ ''' compute cumulative FlowPct over the past month ''' epfr_sql = ''' select FlowPct from nipun_prod.epfr_daily_asiaxjpn_flow where ReportDate between '{startdate:%Y%m%d}' and '{enddate:%Y%m%d}' '''.format( startdate=date-pandas.datetools.MonthEnd()-pandas.datetools.BDay(), enddate=date-pandas.datetools.BDay() ) query = dbo.query(epfr_sql, df=True) if query is None: epfr_asia = None print 'epfr is None in month of {:%Y%m%d}'.format(date) else: epfr_asia = query['flowpct'].sum() """ ''' Use current predicted IR ''' epfr_sql = ''' select predicted_IR from nipun_prod.flow_predicted_sigma where datadate = '{usedate:%Y%m%d}' '''.format(usedate=date-pandas.datetools.BDay(2)) query = dbo.query(epfr_sql) if query is None: epfr_asia = None print 'epfr is None in month of {:%Y%m%d}'.format(date) else: epfr_asia = query[0][0] data = pandas.DataFrame() rsk = lb.loadrsk2('ase1jpn', 'S', date) country_list = ['HKG', 'TWN', 'KOR', 'JPN', 'AUS', 'SGP', 'OTHER'] alps = ['sentiment', 'analyst', 'quality', 'iu', 'fmom', 'value'] for alp in alps: try: _tmp = lb.loadalp('/research/alphagen/alpha_v5/b_%s__w/b_%s__w_%s.alp' % (alp, alp, date.strftime('%Y%m%d'))) _tmp.columns = [alp] except Exception: print 'alpha {} not loaded on {:%Y%m%d}'.format(alp, date) # no research alphas after 20160831 continue if len(data) == 0: data = _tmp else: data = pandas.merge(data, _tmp, left_index=True, right_index=True, how='outer') forward_rets = rets.monthly_resrets(date+pandas.datetools.MonthEnd(), lookback=0) forward_rets = forward_rets[['resid_c', 'BARRID']] forward_rets.set_index('BARRID', inplace=True) data = pandas.merge(data, forward_rets, left_index=True, right_index=True, how='inner') data = data.fillna(0.0) data['COUNTRY'] = rsk['COUNTRY'] ix = data['COUNTRY'] == 'CHX' data['COUNTRY'][ix] = 'HKG' ix = data['COUNTRY'].isin(country_list) data['COUNTRY'][-ix] = 'OTHER' data['date'] = date.strftime('%Y%m%d') data['epfr'] = epfr_asia data.to_csv('som/som.input_data/%s.csv' % date.strftime('%Y%m%d'), index=True, header=True)
def backfill_1b(date, bkt, bkt_df, univ='npxchnpak', alpha='alpha_v5', freq='daily', model='ase1jpn', ctry_df=None, univ_df=None, dir=None): """ backfill one bucket """ import nipun.cpa.load_barra as lb #import pandas.util.py3compat as py3compat print "backfill bucket %s for %s" % (bkt, date) date_str = date.strftime('%Y%m%d') keep_alphas = bkt_df.abs().sum() keep_alphas = keep_alphas[keep_alphas > 0].index.tolist() bkt_df = bkt_df[keep_alphas] ALPHA_DIR = '%(dir)s/%(freq)s/current/' % {'dir': dir, 'freq': freq} big_df = pandas.DataFrame() for alpha in keep_alphas: raw_alpha_dir = "%s/%s/%s/" % (ALPHA_DIR, univ, alpha) fn = raw_alpha_dir + '%s_%s.alp' % (alpha, date_str) if os.path.exists(fn): tmp_df = pandas.read_csv(fn, header=None, names=['barrid', alpha], index_col=0) big_df = big_df.join(tmp_df, how='outer') else: big_df[alpha] = None #the v1 def. for bucket alphas b_out_dir = "%s/%s/%s/" % (ALPHA_DIR, univ, 'b_' + bkt + '_eq_all') if not os.path.exists(b_out_dir): os.makedirs(b_out_dir) output_df = big_df.fillna(0).mean(axis=1) output_df.to_csv('%(dir)s/b_%(bkt)s_eq_all_%(date)s.alp' % { 'dir': b_out_dir, 'bkt': bkt, 'date': date_str }) #the v2 def. #add country into big_df b_out_dir = "%s/%s/%s/" % (ALPHA_DIR, univ, 'b_' + bkt + '_eq_c') if not os.path.exists(b_out_dir): os.makedirs(b_out_dir) if ctry_df is None: ctry_df = pandas.DataFrame( lb.loadrsk2(model, 'S', date, daily=True)['COUNTRY']) if univ_df is None: univ_df = lb.load_production_universe(univ, date) big_df = big_df[big_df.index.isin(univ_df.index)] big_df = big_df.join(ctry_df, how='left') output_df2 = pandas.DataFrame() for k, v in big_df.groupby('COUNTRY'): if k in bkt_df.index: keep_alphas = bkt_df.ix[k] keep_alphas = keep_alphas[keep_alphas > 0].index.tolist() output_df2 = pandas.concat([ output_df2, pandas.DataFrame(v[keep_alphas].fillna(0).mean(axis=1)) ]) output_df2.to_csv('%(dir)s/b_%(bkt)s_eq_c_%(date)s.alp' % { 'dir': b_out_dir, 'bkt': bkt, 'date': date_str }, header=None)
def backfill_b_alphas(weights_date=None, buckets=[ 'analyst', 'fmom', 'industry', 'iu', 'quality', 'sentiment', 'special', 'value' ], univ='npxchnpak', alpha='alpha_v5', startdate=datetime.datetime(2005, 1, 1), enddate=datetime.datetime.today() - pandas.datetools.day, model='ase1jpn', ncpus=8, freq='daily'): """ this function is to calculate bucket alphas based on the latest backfilled raw alphas """ print "start to backfill bucket alphas eq_all and eq_c" if weights_date is None: ctry_wt_df = dbo.query("select * from alphagen..country_wt__%s" % alpha, df=True) alpha_wt_df = dbo.query("select * from alphagen..alpha_wt__%s" % alpha, df=True) else: ctry_wt_df = dbo.query( "select * from production_reporting..country_wt_backup \ where cast(datadate AS date)='%(date)s' and alphagen_vers='%(alpha)s'" % { 'alpha': alpha, 'date': weights_date }, df=True) alpha_wt_df = dbo.query( "select * from production_reporting..alpha_wt_backup \ where cast(datadate AS date)='%(date)s' and alphagen_vers='%(alpha)s'" % { 'alpha': alpha, 'date': weights_date }, df=True) ctry_wt_df = ctry_wt_df[['alpha_name', 'country', 'weight']] alpha_wt_df = alpha_wt_df[['bucket_name', 'alpha_name']] bucket_alpha_df = pandas.merge(alpha_wt_df, ctry_wt_df, on=['alpha_name'], how='left') job_server = pp.Server(ncpus) jobs = [] for date in pandas.DateRange(startdate, enddate, offset=pandas.datetools.day): ctry_df = pandas.DataFrame( lb.loadrsk2(model, 'S', date, daily=True)['COUNTRY']) univ_df = lb.load_production_universe(univ, date) for bkt in buckets: bkt_df = bucket_alpha_df[bucket_alpha_df[ 'bucket_name'] == bkt] #.drop(labels=['bucket_name'], axis=1) bkt_df = bkt_df.pivot(index='country', columns='alpha_name', values='weight') #backfill_1b(date, bkt, bkt_df, ctry_df=ctry_df, univ_df=univ_df, dir=DIR) jobs.append( job_server.submit(backfill_1b, (date, bkt, bkt_df, univ, alpha, freq, model, ctry_df, univ_df, DIR), (), ('pandas', 'datetime', 'os'))) for job in jobs: job() job_server.print_stats() job_server.destroy()