def gold_view_update(ctx, startdate, enddate, viewid): #function:"更新结果" mv = cal_gold_view() today = datetime.now().strftime('%Y-%m-%d') df = {} df['globalid'] = np.repeat(viewid, len(mv)) df['bl_date'] = mv.index df['bl_view'] = mv.view_01.values df['bl_index_id'] = np.repeat('120000014', len(mv)) df['created_at'] = np.repeat(today, len(mv)) df['updated_at'] = np.repeat(today, len(mv)) df_new = pd.DataFrame(df).set_index(['globalid', 'bl_date']) db = database.connection('asset') metadata = MetaData(bind=db) t = Table('ra_bl_view', metadata, autoload=True) columns = [ t.c.globalid, t.c.bl_date, t.c.bl_view, t.c.bl_index_id, t.c.created_at, t.c.updated_at, ] s = select(columns, (t.c.globalid == viewid)) df_old = pd.read_sql(s, db, index_col=['globalid', 'bl_date'], parse_dates=['bl_date']) df_new = df_new[df_old.columns] #保持跟df_old列名同步 database.batch(db, t, df_new, df_old, timestamp=False) print('########### id=BL.000009 #######保存到表格:asset/ra_bl_view ####') print('########### gold view date:', mv.index[-1].strftime('%Y-%m-%d'), '#####value:', mv.view_01.values[-1])
def saveStockFlr(stockFlr, date, sdate=None, edate=None): df = pd.DataFrame(columns=['br_date', 'br_stock_id', 'br_flr']) df['br_stock_id'] = stockFlr.index df['br_flr'] = stockFlr.values df['br_date'] = date df.set_index(['br_date', 'br_stock_id'], inplace=True) db = create_engine(uris['multi_factor']) meta = MetaData(bind=db) t = Table('barra_resid_fluctuation_ratio', meta, autoload=True) columns = [ t.c.br_date, t.c.br_stock_id, t.c.br_flr, ] sql = select(columns) if sdate != None: sql = sql.where(t.c.br_date >= sdate) if edate != None: sql = sql.where(t.c.br_date <= edate) dfBase = pd.read_sql(sql, db) dfBase['br_date'] = dfBase['br_date'].map( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) dfBase.set_index(['br_date', 'br_stock_id'], inplace=True) database.batch(db, t, df, dfBase, timestamp=False)
def corr_update(corr, codes): ra_index = base_ra_index.find(corr['ra_index_id']) if ra_index is None: click.echo(click.style( "unknown index [%s]for calc corr!" % (corr['ra_index_id']), fg="yellow")) return False yesterday = (datetime.now() - timedelta(days=1)); enddate = yesterday.strftime("%Y-%m-%d") # # 加载指数数据 # index_code = ra_index['ra_code'] if corr['ra_date_type'] == 1: df_nav_index = DBData.db_index_value_daily('2015-10-08', enddate, codes=[index_code]) else: df_nav_index = DBData.db_index_value('2015-10-08', enddate, codes=[index_code]) df_inc_index = df_nav_index.pct_change().fillna(0.0) # # 加载基金列表 # df_fund = base_ra_fund.load(codes=codes) data = [] with click.progressbar(length=len(df_fund.index), label=('update corr for corr %d' % (corr['globalid'])).ljust(30)) as bar: for _,fund in df_fund.iterrows(): bar.update(1) tmp = corr_update_fund(corr, fund, df_inc_index) if tmp is not None: data.append([ corr['globalid'], fund['globalid'], fund['ra_code'], "%.4f" % (tmp), ]) df_new = pd.DataFrame(data, columns=['ra_corr_id', 'ra_fund_id', 'ra_fund_code', 'ra_corr']) df_new = df_new.set_index(['ra_corr_id', 'ra_fund_id']) db = database.connection('base') # 加载旧数据 t2 = Table('ra_corr_fund', MetaData(bind=db), autoload=True) columns2 = [ t2.c.ra_corr_id, t2.c.ra_fund_id, t2.c.ra_fund_code, t2.c.ra_corr, ] stmt_select = select(columns2, (t2.c.ra_corr_id == corr['globalid'])) if codes is not None: stmt_select = stmt_select.where(t2.c.ra_fund_code.in_(codes)) df_old = pd.read_sql(stmt_select, db, index_col=['ra_corr_id', 'ra_fund_id']) if not df_old.empty: df_old['ra_corr'] = df_old['ra_corr'].map("{:.4f}".format) # 更新数据库 database.batch(db, t2, df_new, df_old, timestamp=True)
def nav_update_cppi_benchmark(db, asset): df = load_index_for_asset(db['asset'], asset['globalid']) df = df.reset_index() globalid = df.loc[0, 'ra_fund_code'] df = asset_ra_portfolio_nav.load_series(globalid, xtype=8) df = df.to_frame() df = df.reset_index() df['ra_asset_id'] = asset.globalid df['ra_inc'] = df['ra_nav'].pct_change().fillna(0.0) df.columns = ['ra_date', 'ra_nav', 'ra_asset_id', 'ra_inc'] df = df.set_index(['ra_asset_id', 'ra_date']) df_new = df # 加载旧数据 t2 = Table('ra_composite_asset_nav', MetaData(bind=db['asset']), autoload=True) columns2 = [ t2.c.ra_asset_id, t2.c.ra_date, t2.c.ra_nav, t2.c.ra_inc, ] stmt_select = select(columns2, (t2.c.ra_asset_id == asset['globalid'])) df_old = pd.read_sql(stmt_select, db['asset'], index_col=['ra_asset_id', 'ra_date'], parse_dates=['ra_date']) if not df_old.empty: df_old = df_old.apply(format_nav_and_inc) # 更新数据库 database.batch(db['asset'], t2, df_new, df_old, timestamp=False)
def macro_view_update(ctx, startdate, enddate, viewid, idx): backtest_interval = pd.date_range(startdate, enddate) rev = re_view(backtest_interval) irv = ir_view(backtest_interval) epsv = eps_view(backtest_interval, idx) #print rev.tail() #print irv.tail() #print epsv.tail() mv = pd.concat([rev, irv, epsv], 1) mv['mv'] = mv['rev'] + mv['irv'] + mv['epsv'] #print mv.tail() #mv = mv.loc[:, ['mv']] today = datetime.now() mv_view_id = np.repeat(viewid, len(mv)) mv_date = mv.index mv_inc = mv.mv.values created_at = np.repeat(today, len(mv)) updated_at = np.repeat(today, len(mv)) #df_inc_value = np.column_stack([mv_view_id, mv_date, mv_inc, created_at, updated_at]) #df_inc = pd.DataFrame(df_inc_value, columns = ['mc_view_id', 'mc_date', 'mc_inc', 'created_at', 'updated_at']) union_mv = {} union_mv['globalid'] = mv_view_id union_mv['bl_date'] = mv_date union_mv['bl_view'] = np.sign(mv_inc) union_mv['created_at'] = created_at union_mv['updated_at'] = updated_at union_mv_df = pd.DataFrame( union_mv, columns=['globalid', 'bl_date', 'bl_view', 'created_at', 'updated_at']) for index_id in ['120000001', '120000002']: df_new = union_mv_df df_new['bl_index_id'] = index_id df_new = df_new.set_index(['globalid', 'bl_date', 'bl_index_id']) db = database.connection('asset') metadata = MetaData(bind=db) t = Table('ra_bl_view', metadata, autoload=True) columns = [ t.c.globalid, t.c.bl_date, t.c.bl_view, t.c.bl_index_id, t.c.created_at, t.c.updated_at, ] s = select(columns).where(t.c.globalid == viewid).where( t.c.bl_index_id == index_id) df_old = pd.read_sql(s, db, index_col=['globalid', 'bl_date', 'bl_index_id'], parse_dates=['bl_date']) database.batch(db, t, df_new, df_old, timestamp=False) print df_new.tail()
def save(gid,df): #function:"更新数据" db = database.connection('wind') t = Table('mc_gold_indicator',MetaData(bind=db),autoload=True) columns = [literal_column(c) for c in (df.index.names + list(df.columns))] s = select(columns).where(t.c.globalid==gid) df_old = pd.read_sql(s,db,index_col=['globalid','mc_gold_date'],parse_dates=['mc_gold_date']) database.batch(db,t,df,df_old,timestamp=False)
def fund_update_factor_pool(pool, adjust_points, optlimit, optcalc): ''' re calc fund for single fund pool ''' lookback = pool.ra_lookback limit = optlimit if optcalc: # # 计算每个调仓点的最新配置 # # ffr = asset_fund_factor.load_fund_factor_return() ffr = asset_stock_factor.load_stock_factor_return() ffr = ffr.unstack().T ffr.index = ffr.index.levels[1] ffe = asset_fund_factor.load_fund_factor_exposure() # ffe.to_csv('data/factor/fund_factor_exposure.csv', index_label = ['fund_id', 'ff_id', 'trade_date']) # ffe = pd.read_csv('data/factor/fund_factor_exposure.csv', index_col = ['fund_id', 'ff_id', 'trade_date'], parse_dates = ['trade_date']) ffe = ffe.reset_index() data = [] pre_codes = None with click.progressbar(length=len(adjust_points), label='calc pool %s' % (pool.id)) as bar: for day in adjust_points: bar.update(1) # codes, codes_pool = pool_by_factor(pool, day, lookback, limit, ffr, ffe) codes, codes_pool = pool_by_single_factor(pool, day, lookback, limit, ffr, ffe) if pre_codes is None: pre_codes = codes else: pre_codes_origin = pre_codes.copy() pre_codes = np.intersect1d(pre_codes, codes_pool) new_codes = np.setdiff1d(codes, pre_codes) fund_lack_num = limit - len(pre_codes) codes = np.append(pre_codes, new_codes[:fund_lack_num]) pre_codes = codes print('turnover rate:', 1 - len(np.intersect1d(codes, pre_codes_origin))/float(limit)) print(day, len(codes), codes) if codes is None or len(codes) == 0: continue ra_fund = base_ra_fund.load(codes = codes) ra_fund = ra_fund.set_index(['ra_code']) ra_pool = pool['id'] for code in ra_fund.index: ra_fund_id = ra_fund.loc[code, 'globalid'] data.append([ra_pool, day, ra_fund_id, code]) fund_df = pd.DataFrame(data, columns = ['ra_pool', 'ra_date', 'ra_fund_id', 'ra_fund_code']) fund_df = fund_df.set_index(['ra_pool', 'ra_date', 'ra_fund_id']) df_new = fund_df columns = [literal_column(c) for c in (df_new.index.names + list(df_new.columns))] s = select(columns) db = database.connection('asset') ra_pool_fund_t = Table('ra_pool_fund', MetaData(bind=db), autoload=True) s = s.where(ra_pool_fund_t.c.ra_pool.in_(df_new.index.get_level_values(0).tolist())) df_old = pd.read_sql(s, db, index_col = df_new.index.names) database.batch(db, ra_pool_fund_t, df_new, df_old)
def nav_update_category(db, pool, category): # 加载基金列表 df = load_fund_category(pool['id'], category) # 构建均分仓位 df['ra_ratio'] = 1.0 df.set_index('ra_fund_code', append=True, inplace=True) df['ra_ratio'] = df['ra_ratio'].groupby(level=0, group_keys=False).apply(lambda x: x / len(x)) df_position = df.unstack().fillna(0.0) df_position.columns = df_position.columns.droplevel(0) # 加载基金收益率 min_date = df_position.index.min() #max_date = df_position.index.max() max_date = (datetime.now() - timedelta(days=1)) # yesterday df_nav = DBData.db_fund_value_daily( min_date, max_date, codes=df_position.columns) if '000000' in df_position.columns: df_nav['000000'] = 1 df_inc = df_nav.pct_change().fillna(0.0) # 计算复合资产净值 df_nav_portfolio = DFUtil.portfolio_nav(df_inc, df_position, result_col='portfolio') # df_nav_portfolio.to_csv(datapath('category_nav_' + category + '.csv')) df_result = df_nav_portfolio[['portfolio']].rename(columns={'portfolio':'ra_nav'}).copy() df_result.index.name = 'ra_date' df_result['ra_inc'] = df_result['ra_nav'].pct_change().fillna(0.0) df_result['ra_pool'] = pool['id'] df_result['ra_category'] = category df_result['ra_type'] = pool['ra_type'] df_result = df_result.reset_index().set_index(['ra_pool', 'ra_category', 'ra_type', 'ra_date']) df_new = df_result.apply(format_nav_and_inc) # 加载旧数据 t2 = Table('ra_pool_nav', MetaData(bind=db), autoload=True) columns2 = [ t2.c.ra_pool, t2.c.ra_category, t2.c.ra_type, t2.c.ra_date, t2.c.ra_nav, t2.c.ra_inc, ] stmt_select = select(columns2, (t2.c.ra_pool == pool['id']) & (t2.c.ra_category == category) & (t2.c.ra_type == pool['ra_type'])) df_old = pd.read_sql(stmt_select, db, index_col=['ra_pool', 'ra_category', 'ra_type', 'ra_date'], parse_dates=['ra_date']) if not df_old.empty: df_old = df_old.apply(format_nav_and_inc) # 更新数据库 database.batch(db, t2, df_new, df_old, timestamp=False)
def nav_update_index(db, asset): # 加载基金列表 df = load_index_for_asset(db['asset'], asset['globalid']) # 构建均分仓位 df_position = df.unstack().fillna(0.0) df_position.columns = df_position.columns.droplevel(0) # 加载基金收益率 min_date = df_position.index.min() #max_date = df_position.index.max() max_date = (datetime.now() - timedelta(days=1)) # yesterday df_nav = DBData.db_index_value_daily(min_date, max_date, codes=df_position.columns) df_inc = df_nav.pct_change().fillna(0.0) dates = pd.date_range(df_position.index[0], datetime.now() - timedelta(1)) df_position = df_position.reindex(dates).fillna(method='pad') # 计算复合资产净值 df_nav_portfolio = DFUtil.portfolio_nav(df_inc, df_position, result_col='portfolio') # df_nav_portfolio.to_csv(datapath('category_nav_' + category + '.csv')) df_result = df_nav_portfolio[['portfolio']].rename(columns={ 'portfolio': 'ra_nav' }).copy() df_result.index.name = 'ra_date' df_result['ra_inc'] = df_result['ra_nav'].pct_change().fillna(0.0) df_result['ra_asset_id'] = asset['globalid'] df_result = df_result.reset_index().set_index(['ra_asset_id', 'ra_date']) df_new = df_result.apply(format_nav_and_inc) # 加载旧数据 t2 = Table('ra_composite_asset_nav', MetaData(bind=db['asset']), autoload=True) columns2 = [ t2.c.ra_asset_id, t2.c.ra_date, t2.c.ra_nav, t2.c.ra_inc, ] stmt_select = select(columns2, (t2.c.ra_asset_id == asset['globalid'])) df_old = pd.read_sql(stmt_select, db['asset'], index_col=['ra_asset_id', 'ra_date'], parse_dates=['ra_date']) if not df_old.empty: df_old = df_old.apply(format_nav_and_inc) # 更新数据库 database.batch(db['asset'], t2, df_new, df_old, timestamp=False)
def import_pool_sample(ctx, optpath): df = pd.read_csv(optpath.strip(), index_col=['ra_pool_id']) db = database.connection('asset') t = Table('ra_pool_sample', MetaData(bind=db), autoload=True) columns = [literal_column(c) for c in (df.index.names + list(df.columns))] for pool_id in set(df.index): s = select(columns, (t.c.ra_pool_id == pool_id)) df_old = pd.read_sql(s, db, index_col=['ra_pool_id']) database.batch(db, t, df, df_old, timestamp=True)
def wavelet(ctx, startdate, enddate, viewid, idx, wavenum, max_wave_num, wave_name): if idx is None: #idx = ['120000001', '120000002', '120000013', '120000014', '120000015', '120000080' ,'ERI000001', 'ERI000002'] idx = ['120000001'] for _id in idx: trade_dates = ATradeDate.trade_date() nav = Asset(_id).nav(reindex=trade_dates).fillna(method='pad').dropna() dates = nav.index[1000:] views = [] for d in dates: _tmp_nav = nav[nav.index <= d] wave_nav = wavefilter(_tmp_nav, wavenum, wname=wave_name, maxlevel=max_wave_num) inc = wave_nav.pct_change().fillna(0.0).iloc[-120:] wave_diff_rolling = wave_nav.diff().rolling(5).mean() views.append(wave_diff_rolling[-1]) #print(d, views[-1]) print(_id, d, inc.mean(), inc.std()) view_df = pd.DataFrame(views, index=dates, columns=['bl_view']) view_df[view_df > 0] = 1.0 view_df[view_df < 0] = 0 view_df.index.name = 'bl_date' view_df['globalid'] = viewid view_df['bl_index_id'] = _id view_df['created_at'] = datetime.now() view_df['updated_at'] = datetime.now() df_new = view_df.reset_index().set_index( ['globalid', 'bl_date', 'bl_index_id']) #print(df_new.head()) db = database.connection('asset') metadata = MetaData(bind=db) t = Table('ra_bl_view', metadata, autoload=True) columns = [ t.c.globalid, t.c.bl_date, t.c.bl_view, t.c.bl_index_id, t.c.created_at, t.c.updated_at, ] s = select(columns).where(t.c.globalid == viewid).where( t.c.bl_index_id == _id) df_old = pd.read_sql(s, db, index_col=['globalid', 'bl_date', 'bl_index_id'], parse_dates=['bl_date']) database.batch(db, t, df_new, df_old, timestamp=False)
def corr_update_category(pool, category, lookback): # 加载基金列表 df = load_fund_category(pool['id'], category) data = {} for k0, v0 in df.groupby(level=0): index = DBData.trade_date_lookback_index(end_date=k0, lookback=lookback) start_date = index.min().strftime("%Y-%m-%d") end_date = k0 df_nav = DBData.db_fund_value_daily(start_date, end_date, codes=v0['ra_fund_code']) df_inc = df_nav.pct_change().fillna(0.0) df_corr = df_inc.corr() df_corr.fillna(0.0, inplace=True) if df_corr.empty: data[k0] = 0.0 else: data[k0] = np.mean(df_corr.values) df_new = pd.DataFrame({'ra_corr': data}) df_new.index.name = 'ra_date' df_new['ra_pool'] = pool['id'] df_new['ra_category'] = category df_new = df_new.reset_index().set_index( ['ra_pool', 'ra_category', 'ra_date']) df_new = df_new.applymap("{:.4f}".format) db = database.connection('asset') # 加载旧数据 t2 = Table('ra_pool_criteria', MetaData(bind=db), autoload=True) columns2 = [ t2.c.ra_pool, t2.c.ra_category, t2.c.ra_date, t2.c.ra_corr, ] stmt_select = select(columns2, (t2.c.ra_pool == pool['id']) & (t2.c.ra_category == category)) df_old = pd.read_sql(stmt_select, db, index_col=['ra_pool', 'ra_category', 'ra_date'], parse_dates=['ra_date']) if not df_old.empty: df_old = df_old.applymap("{:.4f}".format) # 更新数据库 database.batch(db, t2, df_new, df_old, timestamp=False)
def sp_view_update(ctx, startdate, enddate, viewid): #backtest_interval = pd.date_range(startdate, enddate) mv = cal_sp_view() mv = mv.resample('d').last().fillna(method='pad') mv = mv.rolling(30).mean().dropna() print(mv.tail()) today = datetime.now() mv_view_id = np.repeat(viewid, len(mv)) mv_date = mv.index mv_inc = mv.view created_at = np.repeat(today, len(mv)) updated_at = np.repeat(today, len(mv)) #df_inc_value = np.column_stack([mv_view_id, mv_date, mv_inc, created_at, updated_at]) #df_inc = pd.DataFrame(df_inc_value, columns = ['mc_view_id', 'mc_date', 'mc_inc', 'created_at', 'updated_at']) union_mv = {} union_mv['globalid'] = mv_view_id union_mv['bl_date'] = mv_date union_mv['bl_view'] = np.sign(mv_inc) union_mv['created_at'] = created_at union_mv['updated_at'] = updated_at union_mv_df = pd.DataFrame( union_mv, columns=['globalid', 'bl_date', 'bl_view', 'created_at', 'updated_at']) for index_id in ['120000013', 'ERI000001']: df_new = union_mv_df df_new['bl_index_id'] = index_id df_new = df_new.set_index(['globalid', 'bl_date', 'bl_index_id']) db = database.connection('asset') metadata = MetaData(bind=db) t = Table('ra_bl_view', metadata, autoload=True) columns = [ t.c.globalid, t.c.bl_date, t.c.bl_view, t.c.bl_index_id, t.c.created_at, t.c.updated_at, ] s = select(columns).where(t.c.globalid == viewid).where( t.c.bl_index_id == index_id) df_old = pd.read_sql(s, db, index_col=['globalid', 'bl_date', 'bl_index_id'], parse_dates=['bl_date']) database.batch(db, t, df_new, df_old, timestamp=False) print(df_new.tail())
def saveFactorCovariance(matAll, names, dates): dates.sort() factorNum = np.shape(matAll)[1] dfList = [] k = 0 for date in dates: mat = matAll[k, :, :].reshape(factorNum, factorNum) dfTmp = pd.DataFrame( columns=['bf_date', 'bf_factor1', 'bf_factor2', 'bf_cov']) dfFactor1 = list() dfFactor2 = list() covij = list() i = 0 for name1 in names: j = i for name2 in names[i:]: dfFactor1.append(name1) dfFactor2.append(name2) covij.append(mat[i, j]) j += 1 i += 1 dfTmp['bf_factor1'] = dfFactor1 dfTmp['bf_factor2'] = dfFactor2 dfTmp['bf_cov'] = covij dfTmp['bf_date'] = date dfList.append(dfTmp) k += 1 df = pd.concat(dfList, axis=0) df['bf_date'] = df['bf_date'].map( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) df.set_index(['bf_date', 'bf_factor1', 'bf_factor2'], inplace=True) db = create_engine(uris['factor']) meta = MetaData(bind=db) t = Table('barra_factor_covariance', meta, autoload=True) columns = [ t.c.bf_date, t.c.bf_factor1, t.c.bf_factor2, t.c.bf_cov, ] sql = select(columns) sql = sql.where(t.c.bf_date.in_(dates)) dfBase = pd.read_sql(sql, db) dfBase['bf_date'] = dfBase['bf_date'].map( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) dfBase.set_index(['bf_date', 'bf_factor1', 'bf_factor2'], inplace=True) database.batch(db, t, df, dfBase, timestamp=False)
def saveStockCovariance(dfAll, dates): dates.sort() dfList = [] k = 0 for date in dates: df = pd.DataFrame( columns=['bc_date', 'bc_stock1', 'bc_stock2', 'bc_cov']) names = list(dfAll[k].columns) dfStock1 = list() dfStock2 = list() covij = list() i = 0 for name1 in names: j = i for name2 in names[i:]: dfStock1.append(name1) dfStock2.append(name2) covij.append(dfAll[k].iloc[i, j]) j += 1 i += 1 df['bc_stock1'] = dfStock1 df['bc_stock2'] = dfStock2 df['bc_cov'] = covij df['bc_date'] = date df['bc_date'] = df['bc_date'].map( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) df.set_index(['bc_date', 'bc_stock1', 'bc_stock2'], inplace=True) dfList.append(df) k += 1 dfNew = pd.concat(dfList, axis=0) db = create_engine(uris['factor']) meta = MetaData(bind=db) t = Table('barra_covariance', meta, autoload=True) columns = [ t.c.bc_date, t.c.bc_stock1, t.c.bc_stock2, t.c.bc_cov, ] sql = select(columns) sql = sql.where(t.c.bc_date.in_(dates)) dfBase = pd.read_sql(sql, db) dfBase['bc_date'] = dfBase['bc_date'].map( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) dfBase.set_index(['bc_date', 'bc_stock1', 'bc_stock2'], inplace=True) database.batch(db, t, dfNew, dfBase, timestamp=False)
def fund_update_corr_jensen(pool, adjust_points, optlimit, optcalc): ''' re calc fund for single fund pool ''' lookback = pool.ra_lookback limit = optlimit if optcalc: # # 计算每个调仓点的最新配置 # db = database.connection('asset') ra_pool_sample_t = Table('ra_pool_sample', MetaData(bind=db), autoload=True) ra_pool_fund_t = Table('ra_pool_fund', MetaData(bind=db), autoload=True) data = [] with click.progressbar(length=len(adjust_points), label='calc pool %s' % (pool.id)) as bar: for day in adjust_points: bar.update(1) codes = pool_by_corr_jensen(pool, day, lookback, limit) print day, codes if codes is None or len(codes) == 0: continue ra_fund = base_ra_fund.load(codes=codes) ra_fund = ra_fund.set_index(['ra_code']) ra_pool = pool['id'] for code in ra_fund.index: ra_fund_id = ra_fund.loc[code, 'globalid'] data.append([ra_pool, day, ra_fund_id, code]) fund_df = pd.DataFrame( data, columns=['ra_pool', 'ra_date', 'ra_fund_id', 'ra_fund_code']) fund_df = fund_df.set_index(['ra_pool', 'ra_date', 'ra_fund_id']) df_new = fund_df columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where( ra_pool_fund_t.c.ra_pool.in_( df_new.index.get_level_values(0).tolist())) df_old = pd.read_sql(s, db, index_col=df_new.index.names) database.batch(db, ra_pool_fund_t, df_new, df_old)
def fund_update_monetary_pool(pool, adjust_points, optlimit, optcalc): ''' re calc fund for single fund pool ''' lookback = pool.ra_lookback limit = optlimit if optcalc: # # 计算每个调仓点的最新配置 # data = [] with click.progressbar(length=len(adjust_points), label='calc pool %s' % (pool.id)) as bar: for day in adjust_points: bar.update(1) mnf = MonetaryFundFilter() mnf.handle() asset_ids = MonetaryAllocate.all_monetary_fund_globalid() trade_dates = ATradeDate.week_trade_date() dict_nav = {} for asset_id in asset_ids: dict_nav[asset_id] = Asset.load_nav_series(asset_id, reindex=trade_dates) df_nav = pd.DataFrame(dict_nav).fillna(method='pad') df_inc = df_nav.pct_change().fillna(0.0) codes = pool_by_scale_return(pool, day, lookback, limit, mnf, df_inc) print(day, len(codes), codes) if codes is None or len(codes) == 0: continue ra_fund = base_ra_fund.load(globalids=codes) ra_fund = ra_fund.set_index(['ra_code']) ra_pool = pool['id'] for code in ra_fund.index: ra_fund_id = ra_fund.loc[code, 'globalid'] data.append([ra_pool, day, ra_fund_id, code]) fund_df = pd.DataFrame(data, columns = ['ra_pool', 'ra_date', 'ra_fund_id', 'ra_fund_code']) fund_df = fund_df.set_index(['ra_pool', 'ra_date', 'ra_fund_id']) df_new = fund_df columns = [literal_column(c) for c in (df_new.index.names + list(df_new.columns))] s = select(columns) db = database.connection('asset') ra_pool_fund_t = Table('ra_pool_fund', MetaData(bind=db), autoload=True) s = s.where(ra_pool_fund_t.c.ra_pool.in_(df_new.index.get_level_values(0).tolist())) df_old = pd.read_sql(s, db, index_col = df_new.index.names) database.batch(db, ra_pool_fund_t, df_new, df_old)
def fc_update_nav(ctx, optid): lookback_days = 365 blacklist = [24, 32, 40] factor_ids = [ '1200000%02d' % i for i in range(1, 40) if i not in blacklist ] trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01') date = trade_dates[-1] start_date = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') end_date = date.strftime('%Y-%m-%d') corr0 = load_ind(factor_ids, start_date, end_date) res = clusterKMeansBase(corr0, maxNumClusters=10, n_init=100) asset_cluster = res[1] asset_cluster = dict( zip(sorted(asset_cluster), sorted(asset_cluster.values()))) factor_name = base_ra_index.load() for k, v in asset_cluster.iteritems(): v = np.array(v).astype('int') print factor_name.loc[v] assets = {} for factor_id in factor_ids: assets[factor_id] = Asset.load_nav_series(factor_id) df_assets = pd.DataFrame(assets) db = database.connection('asset') metadata = MetaData(bind=db) t = Table('ra_composite_asset_nav', metadata, autoload=True) for layer in asset_cluster.keys(): layer_id = 'FC.000001.%d' % (layer + 1) layer_assets = asset_cluster[layer] layer_nav = df_assets.loc[:, layer_assets] layer_ret = layer_nav.pct_change().dropna() layer_ret = layer_ret.mean(1) layer_ret = layer_ret.reset_index() layer_ret.columns = ['ra_date', 'ra_inc'] layer_ret['ra_nav'] = (1 + layer_ret['ra_inc']).cumprod() layer_ret['ra_asset_id'] = layer_id df_new = layer_ret.set_index(['ra_asset_id', 'ra_date']) df_old = asset_ra_composite_asset_nav.load_nav(layer_id) df_new = df_new.reindex(columns=['ra_nav', 'ra_inc']) database.batch(db, t, df_new, df_old, timestamp=False)
def saveStockFlr(df, dates): db = create_engine(uris['factor']) meta = MetaData(bind=db) t = Table('barra_resid_fluctuation_ratio', meta, autoload=True) columns = [ t.c.br_date, t.c.br_stock_id, t.c.br_flr, ] sql = select(columns) sql = sql.where(t.c.br_date.in_(dates)) dfBase = pd.read_sql(sql, db) dfBase['br_date'] = dfBase['br_date'].map( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) dfBase.set_index(['br_date', 'br_stock_id'], inplace=True) database.batch(db, t, df, dfBase, timestamp=False)
def turnover_update_category(pool, category): # 加载基金列表 df = load_fund_category(pool['id'], category) # 构建均分仓位 df['mask'] = 1 df.set_index('ra_fund_code', append=True, inplace=True) df = df.unstack(fill_value=0) df_prev = df.shift(1).fillna(0).astype(int) df_prev.iloc[0] = df.iloc[0] df_and = np.bitwise_and(df, df_prev) df_new = (1 - df_and.sum(axis=1) / df_prev.sum(axis=1)).to_frame('ra_turnover') df_new['ra_pool'] = pool['id'] df_new['ra_category'] = category df_new = df_new.reset_index().set_index( ['ra_pool', 'ra_category', 'ra_date']) df_new = df_new.applymap("{:.4f}".format) db = database.connection('asset') # 加载旧数据 t2 = Table('ra_pool_criteria', MetaData(bind=db), autoload=True) columns2 = [ t2.c.ra_pool, t2.c.ra_category, t2.c.ra_date, t2.c.ra_turnover, ] stmt_select = select(columns2, (t2.c.ra_pool == pool['id']) & (t2.c.ra_category == category)) df_old = pd.read_sql(stmt_select, db, index_col=['ra_pool', 'ra_category', 'ra_date'], parse_dates=['ra_date']) if not df_old.empty: df_old = df_old.applymap("{:.4f}".format) # 更新数据库 database.batch(db, t2, df_new, df_old, timestamp=False)
def saveFactorCovariance(mat, names, date, sdate=None, edate=None): df = pd.DataFrame( columns=['bf_date', 'bf_factor1', 'bf_factor2', 'bf_cov']) dfFactor1 = list() dfFactor2 = list() covij = list() i = 0 for name1 in names: j = i for name2 in names[i:]: dfFactor1.append(name1) dfFactor2.append(name2) covij.append(mat[i, j]) j += 1 i += 1 df['bf_factor1'] = dfFactor1 df['bf_factor2'] = dfFactor2 df['bf_cov'] = covij df['bf_date'] = date df['bf_date'] = df['bf_date'].map( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) df.set_index(['bf_date', 'bf_factor1', 'bf_factor2'], inplace=True) db = create_engine(uris['factor']) meta = MetaData(bind=db) t = Table('barra_factor_covariance', meta, autoload=True) columns = [ t.c.bf_date, t.c.bf_factor1, t.c.bf_factor2, t.c.bf_cov, ] sql = select(columns) if sdate != None: sql = sql.where(t.c.bf_date >= sdate) if edate != None: sql = sql.where(t.c.bf_date <= edate) dfBase = pd.read_sql(sql, db) dfBase['bf_date'] = dfBase['bf_date'].map( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) dfBase.set_index(['bf_date', 'bf_factor1', 'bf_factor2'], inplace=True) database.batch(db, t, df, dfBase, timestamp=False)
def gold_view_update2(ctx, startdate, enddate, viewid): #backtest_interval = pd.date_range(startdate, enddate) mv = cal_gold_view2() mv = mv.resample('d').last().fillna(method='pad') mv['view'] = mv['view'].shift(15) today = datetime.now() mv_view_id = np.repeat(viewid, len(mv)) mv_date = mv.index mv_inc = mv.view created_at = np.repeat(today, len(mv)) updated_at = np.repeat(today, len(mv)) #df_inc_value = np.column_stack([mv_view_id, mv_date, mv_inc, created_at, updated_at]) #df_inc = pd.DataFrame(df_inc_value, columns = ['mc_view_id', 'mc_date', 'mc_inc', 'created_at', 'updated_at']) union_mv = {} union_mv['mc_view_id'] = mv_view_id union_mv['mc_date'] = mv_date union_mv['mc_inc'] = mv_inc union_mv['created_at'] = created_at union_mv['updated_at'] = updated_at union_mv_df = pd.DataFrame(union_mv, columns=[ 'mc_view_id', 'mc_date', 'mc_inc', 'created_at', 'updated_at' ]) df_new = union_mv_df.set_index(['mc_view_id', 'mc_date']) db = database.connection('asset') metadata = MetaData(bind=db) t = Table('mc_view_strength', metadata, autoload=True) columns = [ t.c.mc_view_id, t.c.mc_date, t.c.mc_inc, t.c.created_at, t.c.updated_at, ] s = select(columns, (t.c.mc_view_id == viewid)) df_old = pd.read_sql(s, db, index_col=['mc_view_id', 'mc_date'], parse_dates=['mc_date']) database.batch(db, t, df_new, df_old, timestamp=False)
def bond_view_update(ctx, startdate, enddate, viewid): backtest_interval = pd.date_range(startdate, enddate) mv = ir_view(backtest_interval) mv = mv.rolling(60).apply(lambda x: sum(x) / 10 - 1).fillna(0.0) today = datetime.now() mv_view_id = np.repeat(viewid, len(mv)) mv_date = mv.index mv_inc = mv.irv.values created_at = np.repeat(today, len(mv)) updated_at = np.repeat(today, len(mv)) #df_inc_value = np.column_stack([mv_view_id, mv_date, mv_inc, created_at, updated_at]) #df_inc = pd.DataFrame(df_inc_value, columns = ['mc_view_id', 'mc_date', 'mc_inc', 'created_at', 'updated_at']) union_mv = {} union_mv['mc_view_id'] = mv_view_id union_mv['mc_date'] = mv_date union_mv['mc_inc'] = mv_inc union_mv['created_at'] = created_at union_mv['updated_at'] = updated_at union_mv_df = pd.DataFrame(union_mv, columns=[ 'mc_view_id', 'mc_date', 'mc_inc', 'created_at', 'updated_at' ]) df_new = union_mv_df.set_index(['mc_view_id', 'mc_date']) db = database.connection('asset') metadata = MetaData(bind=db) t = Table('mc_view_strength', metadata, autoload=True) columns = [ t.c.mc_view_id, t.c.mc_date, t.c.mc_inc, t.c.created_at, t.c.updated_at, ] s = select(columns).where(t.c.globalid == mc_view_id) df_old = pd.read_sql(s, db, index_col=['mc_view_id', 'mc_date'], parse_dates=['mc_date']) database.batch(db, t, df_new, df_old, timestamp=False)
def filter_fund_by_list_to_pool_sample(ctx, optfundtype, optinpath, optpoolid): df = pd.read_csv(optinpath.strip()) codes_set = set() for code in df['codes'].values: codes_set.add('%06d' % int(code)) fund_type = int(optfundtype.strip()) db = database.connection('base') t = Table('ra_fund', MetaData(bind=db), autoload=True) columns = [ t.c.ra_code, ] s = select(columns) s = s.where(t.c.ra_type == fund_type) df = pd.read_sql(s, db) final_codes = [] for code in df['ra_code'].values: code = '%06d' % int(code) if code in codes_set: final_codes.append(code) print final_codes df = pd.DataFrame(final_codes, columns=['ra_fund_code']) df['ra_pool_id'] = optpoolid.strip() df = df.set_index(['ra_pool_id', 'ra_fund_code']) db = database.connection('asset') t = Table('ra_pool_sample', MetaData(bind=db), autoload=True) columns = [literal_column(c) for c in (df.index.names + list(df.columns))] for pool_id in set(df.index.get_level_values(0)): s = select(columns, (t.c.ra_pool_id == pool_id)) df_old = pd.read_sql(s, db, index_col=['ra_pool_id', 'ra_fund_code']) print df database.batch(db, t, df, df_old, timestamp=True)
def saveStockVariance(dfAll, dates): dates.sort() n = len(dfAll) dfList = [] for i in range(n): dfTmp = pd.DataFrame(columns=['bs_date', 'bs_stock_id', 'bs_var']) stocks = list() stockVars = list() k = 0 for column in dfAll[i].columns: stockVars.append(dfAll[i].iloc[k, k]) stocks.append(column) k += 1 dfTmp['bs_stock_id'] = stocks dfTmp['bs_var'] = stockVars dfTmp['bs_date'] = dates[i] dfList.append(dfTmp) df = pd.concat(dfList, axis=0) df['bs_date'] = df['bs_date'].map( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) df.set_index(['bs_date', 'bs_stock_id'], inplace=True) db = create_engine(uris['factor']) meta = MetaData(bind=db) t = Table('barra_stock_variance', meta, autoload=True) columns = [ t.c.bs_date, t.c.bs_stock_id, t.c.bs_var, ] sql = select(columns) sql = sql.where(t.c.bs_date.in_(dates)) dfBase = pd.read_sql(sql, db) dfBase['bs_date'] = dfBase['bs_date'].map( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) dfBase.set_index(['bs_date', 'bs_stock_id'], inplace=True) database.batch(db, t, df, dfBase, timestamp=False)
def imp_portf(df): db = database.connection('asset') metadata = MetaData(bind=db) portfolio_t = Table('ra_portfolio', metadata, autoload=True) portfolio_alloc_t = Table('ra_portfolio_alloc', metadata, autoload=True) portfolio_asset_t = Table('ra_portfolio_asset', metadata, autoload=True) portfolio_argv_t = Table('ra_portfolio_argv', metadata, autoload=True) df = df.copy() portfolio_id = df['ra_portfolio_id'].unique().item() portfolio_name = df['ra_portfolio_name'].unique().item() highlow_id = df['mz_highlow_id'].unique().item() portfolio_df = pd.DataFrame([[portfolio_id, portfolio_name, highlow_id]], columns=['globalid', 'ra_name', 'ra_ratio_id']) portfolio_df['ra_type'] = 9 portfolio_df = portfolio_df.set_index(['globalid']) portfolio_df['ra_algo'] = df['ra_portfolio_algo'].unique().item() portfolio_df['ra_persistent'] = 0 df_new = portfolio_df columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where(portfolio_t.c.globalid.in_(df_new.index.tolist())) df_old = pd.read_sql(s, db, index_col=df_new.index.names) database.batch(db, portfolio_t, df_new, df_old) portfolio_alloc_data = [] portfolio_asset_data = [] portfolio_argv_data = [] portfolio_alloc_columns = [ 'globalid', 'ra_name', 'ra_portfolio_id', 'ra_ratio_id', 'ra_risk', 'ra_type' ] portfolio_asset_columns = [ 'ra_portfolio_id', 'ra_asset_id', 'ra_asset_name', 'ra_asset_type', 'ra_pool_id' ] portfolio_argv_columns = ['ra_portfolio_id', 'ra_key', 'ra_value'] portfolio_alloc_index = ['globalid'] portfolio_asset_index = ['ra_portfolio_id', 'ra_asset_id'] portfolio_argv_index = ['ra_portfolio_id', 'ra_key'] for k, v in df.groupby(['risk']): portfolio_id = v['ra_portfolio_id'].unique().item() risk = v['risk'].unique().item() portfolio_id_num = portfolio_id.strip().split('.')[1] portfolio_risk_id = portfolio_id.replace( portfolio_id_num, str(string.atoi(portfolio_id_num) + int(risk * 10) % 10)) highlow_id_num = highlow_id.strip().split('.')[1] highlow_risk_id = highlow_id.replace( highlow_id_num, str(string.atoi(highlow_id_num) + int(risk * 10) % 10)) portfolio_alloc_data.append([ portfolio_risk_id, portfolio_name, portfolio_id, highlow_risk_id, risk, 9 ]) for i in range(0, len(v)): record = v.iloc[i] asset_id = record['asset_id'] pool_id = record['pool_id'] asset_name = find_asset_name(asset_id) portfolio_asset_data.append( [portfolio_risk_id, asset_id, asset_name, 0, pool_id]) portfolio_argv_data = [] for col in v.columns: key = col.strip() if key.startswith('portfolio'): value = str(v[col].unique().item()).strip() value = value if not value == 'nan' else '' portfolio_argv_data.append([portfolio_risk_id, key, value]) portfolio_alloc_df = pd.DataFrame(portfolio_alloc_data, columns=portfolio_alloc_columns) portfolio_alloc_df = portfolio_alloc_df.set_index(portfolio_alloc_index) portfolio_asset_df = pd.DataFrame(portfolio_asset_data, columns=portfolio_asset_columns) portfolio_asset_df = portfolio_asset_df.set_index(portfolio_asset_index) portfolio_argv_df = pd.DataFrame(portfolio_argv_data, columns=portfolio_argv_columns) portfolio_argv_df = portfolio_argv_df.set_index(portfolio_argv_index) #print portfolio_alloc_df #print portfolio_asset_df #print portfolio_argv_df df_new = portfolio_alloc_df columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where( portfolio_alloc_t.c.ra_portfolio_id.in_(v['ra_portfolio_id'].ravel())) df_old = pd.read_sql(s, db, index_col=[df_new.index.name]) database.batch(db, portfolio_alloc_t, df_new, df_old) df_new = portfolio_asset_df columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where( portfolio_asset_t.c.ra_portfolio_id.in_( df_new.index.get_level_values(0).tolist())) #s = s.where(portfolio_asset_t.c.ra_asset_id.in_(df_new.index.get_level_values(1).tolist())) df_old = pd.read_sql(s, db, index_col=df_new.index.names) database.batch(db, portfolio_asset_t, df_new, df_old) df_new = portfolio_argv_df columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where( portfolio_argv_t.c.ra_portfolio_id.in_( df_new.index.get_level_values(0).tolist())) #s = s.where(portfolio_argv_t.c.ra_key.in_(df_new.index.get_level_values(1).tolist())) df_old = pd.read_sql(s, db, index_col=df_new.index.names) database.batch(db, portfolio_argv_t, df_new, df_old)
def imp_markowitz(df): db = database.connection('asset') metadata = MetaData(bind=db) markowitz_t = Table('mz_markowitz', metadata, autoload=True) markowitz_alloc_t = Table('mz_markowitz_alloc', metadata, autoload=True) markowitz_asset_t = Table('mz_markowitz_asset', metadata, autoload=True) markowitz_argv_t = Table('mz_markowitz_argv', metadata, autoload=True) df = df.copy() markowitz_id = df['mz_markowitz_id'].unique().item() markowitz_name = df['mz_markowitz_name'].unique().item() markowitz_algo = 0 markowitz_df = pd.DataFrame( [[markowitz_id, markowitz_name, markowitz_algo]], columns=['globalid', 'mz_name', 'mz_algo']) markowitz_df['mz_type'] = 9 markowitz_df = markowitz_df.set_index(['globalid']) df_new = markowitz_df columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where(markowitz_t.c.globalid.in_(df_new.index.tolist())) df_old = pd.read_sql(s, db, index_col=[df_new.index.name]) database.batch(db, markowitz_t, df_new, df_old) markowitz_alloc_data = [] markowitz_asset_data = [] markowitz_argv_data = [] markowitz_alloc_columns = [ 'globalid', 'mz_markowitz_id', 'mz_name', 'mz_type', 'mz_algo', 'mz_risk' ] markowitz_asset_columns = [ 'mz_markowitz_id', 'mz_asset_id', 'mz_asset_name', 'mz_markowitz_asset_id', 'mz_markowitz_asset_name', 'mz_asset_type', 'mz_upper_limit', 'mz_lower_limit', 'mz_sum1_limit', 'mz_sum2_limit' ] markowitz_argv_columns = ['mz_markowitz_id', 'mz_key', 'mz_value'] markowitz_alloc_index = ['globalid'] markowitz_asset_index = ['mz_markowitz_id', 'mz_markowitz_asset_id'] markowitz_argv_index = ['mz_markowitz_id', 'mz_key'] for k, v in df.groupby(['risk']): markowitz_id = v['mz_markowitz_id'].unique().item() risk = v['risk'].unique().item() markowitz_id_num = markowitz_id.strip().split('.')[1] markowitz_risk_id = markowitz_id.replace( markowitz_id_num, str(string.atoi(markowitz_id_num) + int(risk * 10) % 10)) markowitz_algo = v['allocate_algo'].unique().item() markowitz_alloc_data.append([ markowitz_risk_id, markowitz_id, markowitz_name, 9, markowitz_algo, risk ]) for i in range(0, len(v)): record = v.iloc[i] asset_id = record['asset_id'] asset_name = find_asset_name(asset_id) sum1 = record['sum1'] sum2 = record['sum2'] lower = record['lower'] upper = record['upper'] markowitz_asset_data.append([ markowitz_risk_id, asset_id, asset_name, asset_id, asset_name, 0, upper, lower, sum1, sum2 ]) for col in v.columns: key = col.strip() if key.startswith('allocate'): value = str(v[col].unique().item()).strip() value = value if not value == 'nan' else '' markowitz_argv_data.append([markowitz_risk_id, key, value]) markowitz_alloc_df = pd.DataFrame(markowitz_alloc_data, columns=markowitz_alloc_columns) markowitz_alloc_df = markowitz_alloc_df.set_index(markowitz_alloc_index) markowitz_asset_df = pd.DataFrame(markowitz_asset_data, columns=markowitz_asset_columns) markowitz_asset_df = markowitz_asset_df.set_index(markowitz_asset_index) markowitz_argv_df = pd.DataFrame(markowitz_argv_data, columns=markowitz_argv_columns) markowitz_argv_df = markowitz_argv_df.set_index(markowitz_argv_index) #print highlow_alloc_df df_new = markowitz_alloc_df #print df_new columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where( markowitz_alloc_t.c.mz_markowitz_id.in_( df_new['mz_markowitz_id'].ravel())) #print s.compile(compile_kwargs={"literal_binds": True}) df_old = pd.read_sql(s, db, index_col=[df_new.index.name]) database.batch(db, markowitz_alloc_t, df_new, df_old) df_new = markowitz_asset_df columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where( markowitz_asset_t.c.mz_markowitz_id.in_( df_new.index.get_level_values(0).tolist())) #s = s.where(markowitz_asset_t.c.mz_markowitz_asset_id.in_(df_new.index.get_level_values(1).tolist())) df_old = pd.read_sql(s, db, index_col=df_new.index.names) database.batch(db, markowitz_asset_t, df_new, df_old) df_new = markowitz_argv_df columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where( markowitz_argv_t.c.mz_markowitz_id.in_( df_new.index.get_level_values(0).tolist())) #s = s.where(markowitz_argv_t.c.mz_key.in_(df_new.index.get_level_values(1).tolist())) df_old = pd.read_sql(s, db, index_col=df_new.index.names) database.batch(db, markowitz_argv_t, df_new, df_old)
def imp_highlow(df): db = database.connection('asset') metadata = MetaData(bind=db) highlow_t = Table('mz_highlow', metadata, autoload=True) highlow_alloc_t = Table('mz_highlow_alloc', metadata, autoload=True) highlow_asset_t = Table('mz_highlow_asset', metadata, autoload=True) highlow_argv_t = Table('mz_highlow_argv', metadata, autoload=True) df = df.copy() highlow_id = df['mz_highlow_id'].unique().item() highlow_name = df['mz_highlow_name'].unique().item() markowitz_id = df['mz_markowitz_id'].unique().item() highlow_df = pd.DataFrame( [[highlow_id, highlow_name, markowitz_id]], columns=['globalid', 'mz_name', 'mz_markowitz_id']) highlow_df['mz_type'] = 9 highlow_df = highlow_df.set_index(['globalid']) highlow_df['mz_algo'] = df['mz_highlow_algo'].unique().item() highlow_df['mz_persistent'] = 0 df_new = highlow_df columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where(highlow_t.c.globalid.in_(df_new.index.tolist())) df_old = pd.read_sql(s, db, index_col=df_new.index.names) database.batch(db, highlow_t, df_new, df_old) highlow_alloc_data = [] highlow_asset_data = [] highlow_argv_data = [] highlow_alloc_columns = [ 'globalid', 'mz_name', 'mz_type', 'mz_highlow_id', 'mz_risk', 'mz_algo', 'mz_markowitz_id' ] highlow_asset_columns = [ 'mz_highlow_id', 'mz_asset_id', 'mz_asset_name', 'mz_asset_type', 'mz_origin_id', 'mz_riskmgr_id', 'mz_pool_id' ] highlow_argv_columns = ['mz_highlow_id', 'mz_key', 'mz_value'] highlow_alloc_index = ['globalid'] highlow_asset_index = ['mz_highlow_id', 'mz_asset_id'] highlow_argv_index = ['mz_highlow_id', 'mz_key'] for k, v in df.groupby(['risk']): highlow_id = v['mz_highlow_id'].unique().item() risk = v['risk'].unique().item() highlow_id_num = highlow_id.strip().split('.')[1] highlow_risk_id = highlow_id.replace( highlow_id_num, str(string.atoi(highlow_id_num) + int(risk * 10) % 10)) highlow_name = v['mz_highlow_name'].unique().item() markowitz_id_num = markowitz_id.strip().split('.')[1] markowitz_risk_id = markowitz_id.replace( markowitz_id_num, str(string.atoi(markowitz_id_num) + int(risk * 10) % 10)) highlow_algo = v['mz_highlow_algo'].unique().item() highlow_alloc_data.append([ highlow_risk_id, highlow_name, 9, highlow_id, risk, highlow_algo, markowitz_risk_id ]) for i in range(0, len(v)): record = v.iloc[i] asset_id = record['asset_id'] pool_id = record['pool_id'] riskmgr_id = record['riskmgr_id'] asset_name = find_asset_name(asset_id) highlow_asset_data.append([ highlow_risk_id, asset_id, asset_name, 0, markowitz_id, riskmgr_id, pool_id ]) data = [] for col in v.columns: key = col.strip() if key.startswith('highlow'): value = str(v[col].unique().item()).strip() value = value if not value == 'nan' else '' highlow_argv_data.append([highlow_risk_id, key, value]) #print argv_df highlow_alloc_df = pd.DataFrame(highlow_alloc_data, columns=highlow_alloc_columns) highlow_alloc_df = highlow_alloc_df.set_index(highlow_alloc_index) highlow_asset_df = pd.DataFrame(highlow_asset_data, columns=highlow_asset_columns) highlow_asset_df = highlow_asset_df.set_index(highlow_asset_index) highlow_argv_df = pd.DataFrame(highlow_argv_data, columns=highlow_argv_columns) highlow_argv_df = highlow_argv_df.set_index(highlow_argv_index) #print highlow_alloc_df #print highlow_asset_df #print highlow_argv_df df_new = highlow_alloc_df columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where( highlow_alloc_t.c.mz_highlow_id.in_(df_new['mz_highlow_id'].ravel())) df_old = pd.read_sql(s, db, index_col=[df_new.index.name]) database.batch(db, highlow_alloc_t, df_new, df_old) df_new = highlow_asset_df.fillna('') columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where( highlow_asset_t.c.mz_highlow_id.in_( df_new.index.get_level_values(0).tolist())) #s = s.where(highlow_asset_t.c.mz_asset_id.in_(df_new.index.get_level_values(1).tolist())) df_old = pd.read_sql(s, db, index_col=df_new.index.names) database.batch(db, highlow_asset_t, df_new, df_old) df_new = highlow_argv_df columns = [ literal_column(c) for c in (df_new.index.names + list(df_new.columns)) ] s = select(columns) s = s.where( highlow_argv_t.c.mz_highlow_id.in_( df_new.index.get_level_values(0).tolist())) #s = s.where(highlow_argv_t.c.mz_key.in_(df_new.index.get_level_values(1).tolist())) df_old = pd.read_sql(s, db, index_col=df_new.index.names) database.batch(db, highlow_argv_t, df_new, df_old)
def fund_update(pool, adjust_points, optlimit, opteliminateratio, optcalc): ''' re calc fund for single fund pool ''' lookback = pool.ra_lookback limit = optlimit if optcalc: # # 计算每个调仓点的最新配置 # data_fund = {} with click.progressbar(length=len(adjust_points), label=('calc pool %s' % (pool.id)).ljust(30)) as bar: for day in adjust_points: #print day bar.update(1) if pool['ra_fund_type'] == 1: df_indicator, df_label = LabelAsset.label_asset_stock_per_day( day, lookback, limit) else: df_indicator, df_label = LabelAsset.label_asset_bond_per_day( day, lookback, limit) fund_dates = np.array(data_fund.keys()) fund_date = fund_dates[fund_dates < day] fund_date = list(fund_date) fund_date.sort() if len(fund_date) == 0: data_fund[day] = fund_lowliest_elimination( None, df_indicator, df_label, opteliminateratio, optlimit) else: data_fund[day] = fund_lowliest_elimination( data_fund[fund_date[-1]], df_indicator, df_label, opteliminateratio, optlimit) df_fund = pd.concat(data_fund, names=['ra_date', 'ra_category', 'ra_fund_code']) df_new = df_fund.rename(index=DFUtil.categories_types(True), columns={ 'date': 'ra_date', 'category': 'ra_category', 'code': 'ra_fund_code', 'sharpe': 'ra_sharpe', 'jensen': 'ra_jensen', 'sortino': 'ra_sortino', 'ppw': 'ra_ppw' }) df_new.drop('stability', axis=1, inplace=True) df_new = df_new.applymap(lambda x: round(x, 4) if type(x) == float else x) codes = df_new.index.get_level_values(2) xtab = fund_code_to_globalid(codes) df_new['ra_fund_id'] = xtab[df_new.index.get_level_values( 'ra_fund_code')].values df_new['ra_pool'] = pool.id df_new['ra_fund_type'] = 1 df_new['ra_fund_level'] = 1 df_new.reset_index(inplace=True) df_new = df_new.reindex_axis([ 'ra_pool', 'ra_category', 'ra_date', 'ra_fund_id', 'ra_fund_code', 'ra_fund_type', 'ra_fund_level', 'ra_sharpe', 'ra_jensen', 'ra_sortino', 'ra_ppw' ], axis='columns') df_new.sort_values( by=['ra_pool', 'ra_category', 'ra_date', 'ra_fund_id'], inplace=True) #df_new.to_csv(datapath('pool_%s.csv' % (pool['id'])), index=False) else: df_new = pd.read_csv(datapath('pool_%s.csv' % (pool['id'])), parse_dates=['ra_date'], dtype={'ra_fund_code': str}) df_new.set_index(['ra_pool', 'ra_category', 'ra_date', 'ra_fund_id'], inplace=True) df_new = df_new.applymap(lambda x: '%.4f' % (x) if type(x) == float else x) db = database.connection('asset') ra_pool_fund = Table('ra_pool_fund', MetaData(bind=db), autoload=True) # 加载就数据 columns2 = [ ra_pool_fund.c.ra_pool, ra_pool_fund.c.ra_category, ra_pool_fund.c.ra_date, ra_pool_fund.c.ra_fund_id, ra_pool_fund.c.ra_fund_code, ra_pool_fund.c.ra_fund_type, ra_pool_fund.c.ra_fund_level, ra_pool_fund.c.ra_sharpe, ra_pool_fund.c.ra_jensen, ra_pool_fund.c.ra_sortino, ra_pool_fund.c.ra_ppw, ] stmt_select = select(columns2, ra_pool_fund.c.ra_pool == pool.id) df_old = pd.read_sql( stmt_select, db, index_col=['ra_pool', 'ra_category', 'ra_date', 'ra_fund_id']) if not df_old.empty: df_old = df_old.applymap(lambda x: '%.4f' % (x) if type(x) == float else x) database.batch(db, ra_pool_fund, df_new, df_old)
def handle(sdate, edate): sdate = pd.Timestamp(sdate).strftime('%Y-%m-%d') edate = pd.Timestamp(edate).strftime('%Y-%m-%d') # styleFactors.sort() industryFactors.sort() # load daily returns of every stocks db = create_engine(uris['wind']) meta = MetaData(bind=db) t = Table('ashareeodprices', meta, autoload=True) columns = [t.c.S_INFO_WINDCODE, t.c.TRADE_DT, t.c.S_DQ_ADJCLOSE] sql = select(columns) #sql = sql.where(t.c.S_DQ_TRADESTATUS != '停牌').where(t.c.S_DQ_TRADESTATUS != '待核查') sql = sql.where(t.c.TRADE_DT <= pd.Timestamp(edate).strftime('%Y%m%d')) sql = sql.where(t.c.TRADE_DT >= pd.Timestamp( datetime.strptime(sdate, '%Y-%m-%d') - timedelta(days=100)).strftime('%Y%m%d')) dfAdjClose = pd.read_sql(sql, db) dfAdjClose.TRADE_DT = dfAdjClose.TRADE_DT.apply( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) if len(dfAdjClose[dfAdjClose.TRADE_DT >= sdate]) == 0: print('failed! there is no stock data between sdate and edate!') exit() # load factor exposures of every stocks db = create_engine(uris['factor']) sdateExposure = pd.Timestamp( datetime.strptime(sdate, '%Y-%m-%d') - timedelta(days=100)).strftime('%Y-%m-%d') sql = "select * from `factor_exposure_barra_20200220` where trade_date >= '" + sdateExposure + "' and trade_date <='" + edate + "'" dfExposure = pd.read_sql(sql, db) dfExposure.trade_date = dfExposure.trade_date.apply( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) if len(dfExposure) <= 1: print( 'data is weird! please change sdate and edate and check exposure table!' ) exit() sdateExposureListRaw = list(set(dfExposure.trade_date)) helperList = [x for x in sdateExposureListRaw if x < sdate] if len(helperList) == 0: sdateExposureListRaw.sort() sdate = sdateExposureListRaw[1] helperList = sdateExposureListRaw[0:1] sdateNew = max(helperList) sdateExposureList = [x for x in sdateExposureListRaw if x >= sdateNew] sdateExposureList.sort() sdateExposureListNew = [x for x in sdateExposureListRaw if x >= sdate] if len(dfAdjClose[dfAdjClose.TRADE_DT > max(sdateExposureList)]) != 0: maxRealSdate = min(dfAdjClose[ dfAdjClose.TRADE_DT > max(sdateExposureList)]['TRADE_DT']) sdateExposureListNew.append(maxRealSdate) sdateExposureListNew.sort() dfExposure = dfExposure[dfExposure.trade_date >= sdateNew].sort_values( by='trade_date', ascending=True) dfExposure.trade_date = dfExposure.trade_date.apply( lambda x: sdateExposureListNew[sdateExposureList.index(x)]) else: sdateExposureListNew.sort() dfExposure = dfExposure[(dfExposure.trade_date >= sdateNew) & ( dfExposure.trade_date < max(sdateExposureList))].sort_values( by='trade_date', ascending=True) dfExposure.trade_date = dfExposure.trade_date.apply( lambda x: sdateExposureListNew[sdateExposureList.index(x)]) # it is necessary to make sure that stocks are both included in exposure table and wind table stocks = set(dfExposure['stock_id']).intersection( set(dfAdjClose['S_INFO_WINDCODE'])) dfExposure = dfExposure[dfExposure['stock_id'].isin(stocks)] dfExposureG = dfExposure.groupby('trade_date') dfAdjClose = dfAdjClose[dfAdjClose['S_INFO_WINDCODE'].isin(stocks)] dfAdjCloseG = dfAdjClose.groupby('S_INFO_WINDCODE') dfAdjClose = pd.DataFrame( columns=['pct_change', 'S_INFO_WINDCODE', 'TRADE_DT', 'S_DQ_ADJCLOSE']) dfList = [] for stock in stocks: dfTmp = dfAdjCloseG.get_group(stock).copy() dfTmp.sort_values(by='TRADE_DT', ascending=True, inplace=True) dfTmp.reset_index(inplace=True, drop=True) pct_change = dfTmp['S_DQ_ADJCLOSE'].pct_change() dfTmp.insert(0, 'pct_change', pct_change) dfTmp = dfTmp.fillna(0) dfList.append(dfTmp) dfAdjClose = pd.concat(dfList, axis=0, sort=True) dfAdjClose.drop_duplicates(['TRADE_DT', 'S_INFO_WINDCODE'], inplace=True) dfAdjCloseG = dfAdjClose.groupby('TRADE_DT') # main part dfResid = pd.DataFrame(columns=['trade_date', 'stock_id', 'resid']) dfParams = pd.DataFrame(columns=['trade_date'] + ['country'] + styleFactors + industryFactors) dfParams.set_index('trade_date', inplace=True) dfPList = [] dfRList = [] # rn = fc + Sigma(Xi*fi) + Sigma(Xs*fs) + un Sigma(w*fi) = 0 un is resid for date, Exposure in dfExposureG: date = pd.Timestamp(date).strftime('%Y-%m-%d') exposure = Exposure.copy() dfAdjClose = dfAdjCloseG.get_group(date).copy() exposure = exposure.dropna(subset=['industry']).fillna(0) windCodes = set(exposure['stock_id']).intersection( set(dfAdjClose['S_INFO_WINDCODE'])) dfAdjClose = dfAdjClose[dfAdjClose['S_INFO_WINDCODE'].isin(windCodes)] dfAdjClose = dfAdjClose.fillna(0) exposure = exposure[exposure['stock_id'].isin(windCodes)] exposure.sort_values(by='stock_id', inplace=True) r = np.matrix( dfAdjClose.sort_values('S_INFO_WINDCODE')['pct_change']).T # exposures of country factor Xc = np.ones((len(exposure), 1)) # exposures of style factor Xs = np.matrix(exposure[styleFactors]) # exposures of industry factor Xi = np.matrix(pd.get_dummies(exposure['industry']).sort_index(axis=1)) X = np.hstack((Xc, Xs, Xi)) w = ((Xi.T) * (np.matrix(exposure['weight']).T)) / (exposure['weight'].sum()) w = np.array(w).reshape(len(w), ) # use generalized linear model model = sm.GLM(r, X, var_weights=np.sqrt(exposure['weight'].values)) Q = np.hstack([[0], np.zeros(len(styleFactors)), w]) result = model.fit_constrained((Q, 0.0)) params = result.params resid = result.resid_response # industry changes. # sometimes new industries are added sometimes old industires are deleted # we only care about industries in industryList industryList = list(set(exposure['industry'])) industryList.sort() factors = ['country'] + styleFactors + industryFactors dfP = pd.DataFrame(columns=['trade_date'] + factors) dfP.set_index('trade_date', inplace=True) for i in range(1 + len(styleFactors)): dfP.loc[date, factors[i]] = params[i] k = 1 + len(styleFactors) for ind in industryList: dfP.loc[date, 'industry_' + ind] = params[k] k += 1 dfP = dfP.fillna(0) dfPList.append(dfP) dfR = pd.DataFrame(columns=['trade_date', 'stock_id', 'resid']) dfR['stock_id'] = exposure['stock_id'] dfR['resid'] = resid dfR['trade_date'] = date dfRList.append(dfR) dfParams = pd.concat(dfPList, axis=0) dfResid = pd.concat(dfRList, axis=0) dfParams.sort_index(axis=1, inplace=True) # connect to database and update factor returns db = create_engine(uris['factor']) meta = MetaData(bind=db) t = Table('barra_factor_return', meta, autoload=True) sql = "select trade_date, " + ','.join( dfParams.columns.values ) + " from `barra_factor_return` where trade_date >= '" + sdate + "' and trade_date <='" + edate + "'" dfBase = pd.read_sql(sql, db) dfBase.sort_index(axis=1, inplace=True) dfBase['trade_date'] = dfBase['trade_date'].apply( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) dfBase.set_index('trade_date', inplace=True) database.batch(db, t, dfParams, dfBase, timestamp=False) print('factor return updated!') dfResid.set_index(['trade_date', 'stock_id'], inplace=True) # connect to database and update regression resids db = create_engine(uris['factor']) meta = MetaData(bind=db) t = Table('barra_regression_resid', meta, autoload=True) columns = [ t.c.trade_date, t.c.stock_id, t.c.resid, ] sql = select(columns) sql = sql.where(t.c.trade_date >= sdate) sql = sql.where(t.c.trade_date <= edate) dfBase = pd.read_sql(sql, db) dfBase['trade_date'] = dfBase['trade_date'].apply( lambda x: pd.Timestamp(x).strftime('%Y-%m-%d')) dfBase.set_index(['trade_date', 'stock_id'], inplace=True) database.batch(db, t, dfResid, dfBase, timestamp=False) print('regression reside updated!')