def calcfactorRetCov(panel_factorretcov, date, allfactor): df_onedate = panel_factorretcov.T[date] df_onedate['factorid1'] = df_onedate.index df_l_factorretcov = pd.melt(df_onedate, id_vars=['factorid1'], value_vars=allfactor) df_l_factorretcov['date'] = date df_l_factorretcov['factorid1'] = list( gftIO.strSet2Np(np.array(df_l_factorretcov['factorid1']))) df_l_factorretcov['variable'] = list( gftIO.strSet2Np(np.array(df_l_factorretcov['variable']))) return df_l_factorretcov
def get_factor_exposure(risk_model, factor_list, date, symbols): ''' Return factor exposure matrix(big X). Parameters ---------- risk_model: dictionary Including specific risk, different factor exposure dataframe for all symbols. factor_list: list Factor exposure list. Returns ------- factor_exposure: DataFrame Big X on target date for input symbols. ''' factor_exposure = pd.DataFrame(index=symbols) for factor in factor_list: try: factor_exposure[factor] = risk_model[factor].asMatrix().\ loc[date, symbols] except KeyError: raise KeyError('invalid input date: %s' % date) factor_exposure.columns = gftIO.strSet2Np(factor_exposure.columns.values) factor_exposure = factor_exposure.fillna(0) return factor_exposure
def CalPortExpo(context, fexpo, wgt): df_wgt = wgt.asMatrix() ls_date_wgt = sorted(list((list(df_wgt.index)))) if 'idname' in df_wgt.columns: df_wgt = df_wgt.drop(['idname'], axis=1) ls_aname = list(list(df_wgt.columns)) ls_fexponame = list( map(gftIO.gidInt2Str, list(fexpo['osets'].asColumnTab()['O0']))) sty_factor_name = sorted( list( map(gftIO.gidInt2Str, list(fexpo[ls_fexponame[1]].asColumnTab()['O0'])))) ind_factor_name = sorted( list( map(gftIO.gidInt2Str, list(fexpo[ls_fexponame[0]].asColumnTab()['O0'])))) allfactor = ind_factor_name + sty_factor_name ##calculate factor exposure date dict_risk_expo_new = { factorname: fexpo[factorname].asMatrix().dropna(how='all') for factorname in list(allfactor) } ls_df_fexpo = [ fexpoprocess(k, v, ls_date_wgt, ls_aname, df_wgt) for k, v in dict_risk_expo_new.items() ] df_expo = pd.concat(ls_df_fexpo, axis=1) df_expo.columns = list(gftIO.strSet2Np(np.array(df_expo.columns))) return df_expo
def factor_return(self): """ get factor return from all the factors """ factors_ret = pd.DataFrame( index=self.risk_model[self.ls_factors_ret[0] + '.ret'].index, columns=self.ls_factors_ret) for factor in self.ls_factors_ret: factors_ret[factor] = self.risk_model[factor] factors_ret.columns = gftIO.strSet2Np(factors_ret.columns.values) return factors_ret
def Fexpomerge(self): dt_latest = self.selectedate ls_raw_df_fexpo = [ self.selectData()[factorname].reindex(index=[dt_latest]).rename( index={dt_latest: factorname}) for factorname in self.getallFactor() ] df_fexpo_onedate = pd.concat(ls_raw_df_fexpo, axis=0).fillna(0) df_fexpo_onedate.index = list( gftIO.strSet2Np(np.array(df_fexpo_onedate.index))) return df_fexpo_onedate.T
def covariance_matrix(self, date, factors): """ Keyword Arguments: date -- factors -- """ factors = gftIO.strSet2Np(np.array(factors)) cov_matrix = self.risk_model['ret_cov'].set_index('date') cov_matrix = cov_matrix.loc[date] cov_matrix = cov_matrix.pivot( index='factorid1', columns='factorid2', values='value') cov_matrix = cov_matrix.reindex(factors, factors, fill_value=np.nan) return cov_matrix
def get_input_factor(self, oset_idx): """ Get oset idx from risk model. Keyword Arguments: oset_idx: list of oset gid """ if len(oset_idx) < 1: return None date_index = self.model.get(oset_idx[0], None).asMatrix().index ls_factor_b_char = gftIO.strSet2Np(np.array(oset_idx)) factor_data = pd.Panel({ ls_factor_b_char[key]: self.model.get(factor).asMatrix() for key, factor in enumerate(oset_idx) }) return factor_data.transpose(1, 0, 2)
def get_output(self, post_fix, oset_idx=None): """ get target data from model Keyword Arguments: oset_idx: list of oset gid poset_fix: 'specificRisk', 'ret_cov', '*.ret' """ if oset_idx is None: return self.model.get(post_fix, None) else: factors_output = pd.DataFrame(index=self.model[oset_idx[0] + post_fix].index, columns=oset_idx) for value in oset_idx: factors_output[value] = self.model[value + post_fix] factors_output.columns = gftIO.strSet2Np( factors_output.columns.values) return factors_output
def calcfactorRetCov(df_allfactorret, date, corrwgts, varwgts, corrhalflife, varhalflife): ##calculate corr df_factorretcorr = df_allfactorret[df_allfactorret.index <= date][-corrhalflife:] df_retcorr = df_factorretcorr.apply( lambda x: np.array(x) * np.array(corrwgts)).corr() ##calculate standard deviation df_factorretstd = df_allfactorret[df_allfactorret.index <= date][-varhalflife:] df_retstd = df_factorretstd.apply( lambda x: np.array(x) * np.array(varwgts)).std() ##calculate covariance df_retcov = df_retcorr.apply( lambda x: np.array(x) * np.array(df_retstd)).T.apply( lambda x: np.array(x) * np.array(df_retstd)) df_retcov['factorid1'] = df_retcov.index df_l_factorretcov = pd.melt(df_retcov, id_vars=['factorid1']) df_l_factorretcov['date'] = date ssb_map = pd.DataFrame( data=list( set(df_l_factorretcov['factorid1'][df_l_factorretcov['factorid1'] != 'countryfactor'])), columns=['oriname']) ssb_map = ssb_map[ssb_map['oriname'] != 'countryfactor'] ssb_map['sname'] = list(gftIO.strSet2Np(np.array(ssb_map['oriname']))) dict_ssb_map = { key: list(ssb_map['sname'][ssb_map['oriname'] == key])[0] for key in ssb_map['oriname'] } dict_ssb_map['countryfactor'] = 'countryfactor' df_l_factorretcov['factorid1'] = df_l_factorretcov['factorid1'].apply( lambda x: dict_ssb_map[x]) df_l_factorretcov['variable'] = df_l_factorretcov['variable'].apply( lambda x: dict_ssb_map[x]) df_l_factorretcov = df_l_factorretcov[ df_l_factorretcov['factorid1'] != 'countryfactor'][df_l_factorretcov['variable'] != 'countryfactor'] return df_l_factorretcov
def getCashGid(): return gftIO.strSet2Np(np.array(['0AC062D610A1481FA5561EC286146BCC']))
df_multiplier = gftIO.zload(os.path.join(path, 'df_multiplier.pkl')) if isinstance(df_commission, gftIO.GftTable): df_commission = df_commission.asColumnTab().copy() if isinstance(df_position, gftIO.GftTable): df_position = df_position.asMatrix().copy() if isinstance(df_price, gftIO.GftTable): df_price = df_price.asColumnTab().copy() if isinstance(df_multiplier, gftIO.GftTable): df_multiplier = df_multiplier.asColumnTab().copy() if isinstance(data, gftIO.GftTable): data = data.asColumnTab().copy() if isinstance(target, list): target = gftIO.strSet2Np(np.array(target)) name = { 'INNERCODE': 'contract_code', 'OPTIONCODE': 'contract_name', 'SETTLEMENTDATE': 'settlement_date', 'ENDDATE': 'date', 'CLOSEPRICE': 'close_price' } data.rename(columns=lambda x: name[x], inplace=True) target = data['contract_name'].unique() roll_position = pd.DataFrame() for contract in target: contract_data = data[data['contract_name'] == contract] # contract_data.set_index('date', inplace=True)
def risk_model(df_ret, dict_risk_expo, capital, corr_half_life, var_half_life): """ Regression stock return by previous factor exposure, to get factor return covariance and residual. Pseudo code: 1. process input data, parse, drop and fill. 2. get intersection of all factor names, all symbol names, all dates. 3. Solve the problem of heteroskedasticity by square root the market capitalization. Handbook p5, p15. new return = square root of market capitalization * stock return, add a constraint column to new return. calculate factor return. calculate factor return covariance. calculate the residual(specific) variances of regression. generate final return value. Keyword Arguments: df_ret -- pd.DataFrame, stock daily return. dict_risk_expo -- dictionary, factor exposure, key=factor. capital -- pd.DataFrame, stock market capital, to calculate weight. corr_half_life -- int, to compare correlation half life. var_half_life -- int, to compare variance half life. Return: 27 industrial factors + 8 style factors return -- pd.DataFrame ret_cov -- pd.DataFrame, return covariance specificRisk -- pd.DataFrame, residual """ # get all factor names ylog.debug('parse data') ls_fexponame = list( map(gftIO.gidInt2Str, list(dict_risk_expo['osets'].asColumnTab()['O0']))) ind_factor_name = sorted( list( map(gftIO.gidInt2Str, list(dict_risk_expo[ls_fexponame[0]].asColumnTab()['O0'])))) sty_factor_name = sorted( list( map(gftIO.gidInt2Str, list(dict_risk_expo[ls_fexponame[1]].asColumnTab()['O0'])))) allfactor = ind_factor_name + sty_factor_name ##stock return preprocess df_w_ret = df_ret.asMatrix().T.dropna(how='all', axis=1) ##get factor exposure date list(all snapshots) dict_risk_expo_new = { factorname: dict_risk_expo[factorname].asMatrix().dropna(how='all') for factorname in allfactor } ls_ls_fexpodate = list([ dict_risk_expo_new[factorname].index.tolist() for factorname in dict_risk_expo_new.keys() ]) ls_alldates_fexpo = reduce(np.intersect1d, ls_ls_fexpodate) ## get factor exposure symbol list ls_ls_fexposymbol = list([ dict_risk_expo_new[factorname].columns.tolist() for factorname in dict_risk_expo_new.keys() ]) ls_allsymbols_fexpo = reduce(np.intersect1d, ls_ls_fexposymbol) ##weight preprocess weight = capital.asMatrix().T ##get the date/symbol intersection of (stock return,factor exposure,capital) ##ls_alldates save the stock return map date ##get fexpo date,find the nearest business day fexpodate = pd.DataFrame(ls_alldates_fexpo, columns=['date_fexpo']) retdate = pd.DataFrame(df_w_ret.columns, columns=['date_ret']) retdate.sort_values("date_ret", ascending=True, inplace=True) fexpodate.sort_values("date_fexpo", ascending=True, inplace=True) df_date_map = pd.merge_asof(retdate, fexpodate, left_on="date_ret", right_on="date_fexpo", allow_exact_matches=False) df_date_map.dropna(how='any', inplace=True) df_date_map = df_date_map.drop_duplicates( subset='date_fexpo').reset_index() dict_date_map = { df_date_map.date_fexpo[i]: df_date_map.date_ret[i] for i in range(len(df_date_map)) } ls_alldates = sorted( list( set(capital.columns).intersection(set( df_w_ret.columns)).intersection(set(dict_date_map.values())))) ls_alldates_ondaybefore = sorted(list(dict_date_map.keys())) ##get daily symbol list ls_allsymbols = { date: list( set(df_w_ret[[dict_date_map[date]]].dropna().index).intersection( set(ls_allsymbols_fexpo)).intersection(set(capital.index))) for date in ls_alldates_ondaybefore } ## align the stock return and factor exposure dict_df_capital_raw = { date: capital[[date]].reindex(index=ls_allsymbols[date]).fillna(0) for date in ls_alldates_ondaybefore } dict_df_capital = { date: np.sqrt(dict_df_capital_raw[date]) for date in ls_alldates_ondaybefore } dict_df_ret = { dict_date_map[date]: pd.concat([(df_w_ret[[dict_date_map[date] ]].reindex(index=ls_allsymbols[date])) * (dict_df_capital[date].rename( columns={date: dict_date_map[date]})), pd.DataFrame(data=np.zeros(1), index=['constrain'], columns=[dict_date_map[date]])], axis=0) for date in ls_alldates_ondaybefore } dict_df_fexpo_raw = { date: fexpomerge(dict_risk_expo_new, date, allfactor, ls_allsymbols) for date in ls_alldates_ondaybefore } dict_df_fexpo = { date: dict_df_fexpo_raw[date].assign(countryfactor=1).multiply( dict_df_capital[date].squeeze(), axis='index') for date in ls_alldates_ondaybefore } ##calculate constraints dict_df_fexpo_con = { date: expoconstrain(dict_df_fexpo_raw, date, ind_factor_name, allfactor, dict_df_capital_raw, sty_factor_name, dict_df_fexpo) for date in ls_alldates_ondaybefore } # for i in dict_risk_expo_new.keys(): # if dict_risk_expo_new[i].index.min() > df_l_ret.index.min( # ) or dict_risk_expo_new[i].index.max() < df_l_ret.index.max(): # raise Exception ########################step3:calculate factor return######################## ls_df_fitresult = { dict_date_map[date]: Regression(date, dict_df_ret, dict_df_fexpo_con, dict_df_capital, dict_df_fexpo, dict_date_map) for date in ls_alldates_ondaybefore } ls_df_facreturn = list( ls_df_fitresult[date]['params'].rename(columns={'params': date}) for date in ls_alldates) df_model_params = reduce( lambda df_para1, df_para2: pd.concat([df_para1, df_para2], axis=1), ls_df_facreturn) ########################step4:calculate factor return covariance######################## df_allfactorret = df_model_params.T df_allfactorret = df_allfactorret.sort_index() corrhalflife = int(corr_half_life) varhalflife = int(var_half_life) halflife = max(corrhalflife, varhalflife) if len(ls_alldates) < halflife: raise Exception("More data needed") else: ls_alldatesnew = ls_alldates[halflife - 1:len(ls_alldates)] corrwgts = list( map(lambda x: mt.sqrt(0.5**(x / int(corrhalflife))), list(range(int(corrhalflife) - 1, -1, -1)))) varwgts = list( map(lambda x: mt.sqrt(0.5**(x / int(varhalflife))), list(range(int(varhalflife) - 1, -1, -1)))) ls_factorretcov = list( calcfactorRetCov(df_allfactorret, date, corrwgts, varwgts, corrhalflife, varhalflife) for date in ls_alldatesnew) df_l_factorretcov = pd.concat( ls_factorretcov, axis=0).rename(columns={'variable': 'factorid2'}) ########################step5:calculate the residual(specific) variances of regression######################## ##part1:merge factorreturn,factor exposure and stock return ls_specificrisk = list( ls_df_fitresult[date]['resid'].rename(columns={'resid': date}) for date in ls_alldates) df_w_specificrisk = pd.concat(ls_specificrisk, axis=1).T df_w_specificrisk = df_w_specificrisk.sort_index() specificwgts = list( map(lambda x: mt.sqrt(0.5**(x / int(halflife))), list(range(int(halflife) - 1, -1, -1)))) ls_factorretspe = list( calcfactorRetSpe(df_w_specificrisk, date, specificwgts, halflife) for date in ls_alldatesnew) df_specificrisk_var = pd.concat(ls_factorretspe, axis=0) ########################step6:generate final return value######################## df_allfactorret = df_allfactorret.drop('countryfactor', axis=1) dict_factorret = { key + '.ret': df_allfactorret[[key]].rename( columns={ key: list( gftIO.strSet2Np( np.array(list(df_allfactorret[[key]].columns))))[0] }) for key in df_allfactorret.columns } dictMerged = dict( dict_factorret, **{ 'ret_cov': df_l_factorretcov, 'specificRisk': df_specificrisk_var }) return dictMerged
def RiskModelStyleOnly(df_ret, dict_risk_expo, period): ''' df_ret=x0 dict_risk_expo=x1 period=5 ''' period = int(period['CovWindow']) ls_fexponame = list( map(gftIO.gidInt2Str, list(dict_risk_expo['osets'].asColumnTab()['O0']))) allfactor = [] for i in ls_fexponame: allfactor.extend( list( map(gftIO.gidInt2Str, list(dict_risk_expo[i].asColumnTab()['O0'])))) ##stock return preprocess df_w_ret = df_ret.asMatrix().T.dropna(how='all', axis=1) ##factor exposure preprocess dict_risk_expo_new = { factorname: dict_risk_expo[factorname].asMatrix() for factorname in allfactor } ls_ls_fexpodate = list([ dict_risk_expo_new[factorname].index.tolist() for factorname in dict_risk_expo_new.keys() ]) ls_alldates_fexpo = reduce(np.intersect1d, ls_ls_fexpodate) ls_ls_fexposymbol = list([ dict_risk_expo_new[factorname].columns.tolist() for factorname in dict_risk_expo_new.keys() ]) ls_allsymbols_fexpo = reduce(np.intersect1d, ls_ls_fexposymbol) ##get fexpo date,find the nearest business day fexpodate = pd.DataFrame(ls_alldates_fexpo, columns=['date_fexpo']) retdate = pd.DataFrame(df_w_ret.columns, columns=['date_ret']) retdate.sort_values("date_ret", ascending=True, inplace=True) fexpodate.sort_values("date_fexpo", ascending=True, inplace=True) df_date_map = pd.merge_asof(retdate, fexpodate, left_on="date_ret", right_on="date_fexpo", allow_exact_matches=False) df_date_map.dropna(how='any', inplace=True) df_date_map = df_date_map.drop_duplicates( subset='date_fexpo').reset_index() dict_date_map = { df_date_map.date_fexpo[i]: df_date_map.date_ret[i] for i in range(len(df_date_map)) } ##get the date intersection of stock return and factor exposure ls_alldates = set(df_w_ret.columns).intersection( set(dict_date_map.values())) ls_alldates_ondaybefore = sorted(list(dict_date_map.keys())) ls_allsymbols = { date: list( set(df_w_ret[[dict_date_map[date]]].dropna().index).intersection( set(ls_allsymbols_fexpo))) for date in ls_alldates_ondaybefore } #align the stock return and factor exposure dict_df_ret = { dict_date_map[date]: df_w_ret[[dict_date_map[date]]].reindex(index=ls_allsymbols[date]) for date in ls_alldates_ondaybefore } dict_df_fexpo = { date: fexpomerge(dict_risk_expo_new, date, allfactor, ls_allsymbols) for date in ls_alldates_ondaybefore } #for i in dict_risk_expo_new.keys(): #if dict_risk_expo_new[i].index.min() > df_l_ret.index.min() or dict_risk_expo_new[i].index.max() < df_l_ret.index.max(): #raise Exception ########################step3:calculate factor return######################## ls_df_fitresult = { dict_date_map[date]: Regression(date, dict_df_ret, dict_df_fexpo, dict_date_map) for date in ls_alldates_ondaybefore } ls_df_facreturn = list( ls_df_fitresult[date]['params'].rename(columns={'params': date}) for date in ls_alldates) df_model_params = reduce( lambda df_para1, df_para2: pd.concat([df_para1, df_para2], axis=1), ls_df_facreturn) ########################step4:calculate factor return covariance######################## df_allfactorret = df_model_params.T df_allfactorret = df_allfactorret.sort_index() panel_factorretcov = pd.rolling_cov(df_allfactorret, window=period) ls_factorretcov = list( calcfactorRetCov(panel_factorretcov, date, allfactor) for date in list(df_allfactorret.index)) df_l_factorretcov = pd.concat( ls_factorretcov, axis=0).rename(columns={'variable': 'factorid2'}) ########################step5:calculate the residual(specific) variances of regression######################## ##part1:merge factorreturn,factor exposure and stock return ls_specificrisk = list( ls_df_fitresult[date]['resid'].rename(columns={'resid': date}) for date in ls_alldates) df_w_specificrisk = pd.concat(ls_specificrisk, axis=1).T df_w_specificrisk = df_w_specificrisk.sort_index() df_specificrisk_var = pd.rolling_var(df_w_specificrisk, window=period) df_specificrisk_var['idname'] = df_specificrisk_var.index df_specificrisk_var = pd.melt(df_specificrisk_var, id_vars=['idname']) df_specificrisk_var = df_specificrisk_var.rename(columns={ 'idname': 'date', 'variable': 'symbol', 'value': 'specificrisk' }) ########################step6:generate final return value######################## dict_factorret = { key + '.ret': df_allfactorret[[key]].rename( columns={ key: list( gftIO.strSet2Np( np.array(list(df_allfactorret[[key]].columns))))[0] }) for key in df_allfactorret.columns } dictMerged = dict( dict_factorret, **dict_risk_expo, **{ 'ret_cov': df_l_factorretcov, 'specificRisk': df_specificrisk_var }) #gftIO.zdump(dictMerged,'riskmodel.pkl') return dictMerged
'D84A4B250D077E0EFC4F7FFF393FCB44', 'EB34F4D5C92E85C2307DB2C8015C94F1', 'EFE87574B86C774ADFD08F421AF5D11B', '441E8F64A7582F553BCBE42A216285F1', '873CF94D09229206D675ACC32328DC24', 'ABEC912F31E326F4C1FC507AF787C8FA', 'B1B02CFAB81248BAA87754E760769BD2', 'B9CCDA635F039E84D489F964DB08BC5C', 'BE3E35A7C0CB49EB9E1CB41D566563E7', 'E8D70EA915C420F9B9005BB21540788C', 'EB8553C313C38BC180E1972D798622BA' ] ls_style_factor = [ '873CF94D09229206D675ACC32328DC24', '441E8F64A7582F553BCBE42A216285F1', 'B9CCDA635F039E84D489F964DB08BC5C', 'B1B02CFAB81248BAA87754E760769BD2', 'EB8553C313C38BC180E1972D798622BA', 'BE3E35A7C0CB49EB9E1CB41D566563E7', 'ABEC912F31E326F4C1FC507AF787C8FA', 'E8D70EA915C420F9B9005BB21540788C' ] all_factors_gid = gftIO.strSet2Np(np.array(all_factors)) class RiskAnlysis(object): """ risk data preparation and getting attribute. """ def __init__(self, risk_model_merge): self.risk_model = risk_model_merge self.ls_factors = [ x for x in risk_model_merge.keys() if re.match("[A-Z0-9]{32}$", x) ] self.ls_factors_ret = [ x[:-4] for x in risk_model_merge.keys() if re.search(".ret$", x) ] def get_factor_exposure(self, factor_list, date, symbols):
def create_continuous_contract(start_date, end_date, contract_data, target): ''' parse contract data to get continuous price for each group. Parameters ---------- start_date: datetime end_date: datetime contract_data: OOTTV contract name, contract code, date, settlement date, close price target: list or NULL targets to parse, NULL will parse all contracts. Returns ------- continuous_price: DataFrame ''' if isinstance(contract_data, gftIO.GftTable): data = contract_data.asColumnTab().copy() if isinstance(target, list): target = gftIO.strSet2Np(np.array(target)) name = { 'INNERCODE': 'contract_code', 'OPTIONCODE': 'contract_name', 'SETTLEMENTDATE': 'settlement_date', 'ENDDATE': 'date', 'CLOSEPRICE': 'close_price' } data.rename(columns=lambda x: name[x], inplace=True) data.dropna(subset=['settlement_date'], inplace=True) continuous_price = pd.DataFrame() if target is None: target = data['contract_name'].unique() for num_contract, contract in enumerate(target): ylog.info(num_contract) ylog.info(contract) target_data = data[data['contract_name'] == contract] target_expiry_dates = target_data[['contract_code', 'settlement_date']].\ drop_duplicates().sort_values('settlement_date') target_expiry_dates.set_index('contract_code', inplace=True) target_expiry_dates = target_expiry_dates[target_expiry_dates.columns[ 0]] target_data = target_data.loc[:, ['date', 'contract_code', 'close_price']] contract_data = target_data.pivot( index='date', columns='contract_code', values='close_price') contract_dates = contract_data.index continuous_contract_price = pd.Series( np.ones(len(contract_dates)), index=contract_dates, name=contract) # ylog.info(contract_dates) prev_date = contract_dates[0] # Loop through each contract and create the specific weightings for # each contract depending upon the rollover date and price adjusted method. # Here for backtesting, we use last trading day rollover and backward ratio price adjustment. target_data_with_datetimeindex = target_data.set_index('date') price_adjust_ratio = pd.Series( np.ones(len(target_expiry_dates)), index=target_expiry_dates.values, name='ratio') adjusted_price = pd.Series(index=contract_dates, name=contract) target_data_with_datetimeindex['close_price'].replace( to_replace=0, method='bfill', inplace=True) target_data_with_datetimeindex['close_price'].replace( to_replace=0, method='pad', inplace=True) target_data_with_datetimeindex = target_data_with_datetimeindex[ ~target_data_with_datetimeindex.index.duplicated()] for i, (item, ex_date) in enumerate(target_expiry_dates.iteritems()): # ylog.info(i) # ylog.info(item) # ylog.info(ex_date) if i < len(target_expiry_dates) - 1 \ and ex_date < target_data_with_datetimeindex.index[-1]: idx_ex_date = target_data_with_datetimeindex.index.searchsorted( ex_date) pre_ex_date = contract_dates[idx_ex_date - 1] # ex_date has no price data, move ex_date to next trading date. if ex_date not in target_data_with_datetimeindex.index and \ idx_ex_date + 1 < len(target_data_with_datetimeindex.index): ex_date = contract_dates[idx_ex_date + 1] else: continue price_adjust_ratio.loc[ex_date] = target_data_with_datetimeindex['close_price'].loc[ex_date] / \ target_data_with_datetimeindex['close_price'].loc[pre_ex_date] # to create adjusted_pricested price by the product of target price date and # adjustment ratio. for i, (item, ex_date) in enumerate(target_expiry_dates.iteritems()): #print(i, item, ex_date) idx_ex_date = contract_data.index.searchsorted(ex_date) pre_ex_date = contract_dates[idx_ex_date - 1] adjusted_price.ix[prev_date:pre_ex_date] = target_data_with_datetimeindex['close_price'].ix[prev_date:pre_ex_date] * \ price_adjust_ratio.ix[ex_date:].cumprod().iloc[-1] prev_date = ex_date continuous_price = pd.concat([continuous_price, adjusted_price], axis=1) return continuous_price
def calcRiskAttribution(dict_riskmodel, df_portwgt, df_benchwgt, dt_startdate, dt_enddate): ########################step1:parameter description######################## #dict_riskmodel:type:dict:dict_riskmodel=x0 #df_portwgt:type:dataframe df_portwgt=x1 #df_benchwgt:type:dataframe df_benchwgt=x2 #dt_startdate:type:timestamp:dt_startdate=x3 #dt_enddate type:dt_enddate =x4 ########################step2:portfolio,benchmark,portfolio active data prepare######################## ##part1:get portfolio,benchmark,portfolio active weight df_wp = df_portwgt.asColumnTab().rename(columns={'value': 'Wp'}) df_wb = df_benchwgt.asColumnTab().rename(columns={'value': 'Wb'}) df_wa = pd.merge(df_wp, df_wb, on=['idname', 'variable'], how='outer') df_wa.fillna(0, inplace=True) df_wa.set_index('variable', inplace=True) df_wa['Wa'] = df_wa['Wp'] - df_wa['Wb'] ##part2:get portfolio,benchmark,portfolio active stock pool ls_aname = list( set(df_portwgt.asMatrix().columns).union( set(df_benchwgt.asMatrix().columns))) ls_aname.remove('idname') ########################step2:data preprocessing######################## ##part1:date preprocessing ##get factor name rmodelobj = Riskmodel(dict_riskmodel, dt_startdate, ls_aname) if rmodelobj.factorcnt() < 2: ls_stylefactor = list( gftIO.strSet2Np(np.array(rmodelobj.getfactorlist(0)))) ls_industryfactor = [] else: ls_industryfactor = list( gftIO.strSet2Np(np.array(rmodelobj.getfactorlist(0)))) ls_stylefactor = list( gftIO.strSet2Np(np.array(rmodelobj.getfactorlist(1)))) ls_allfactor = list(gftIO.strSet2Np(np.array(rmodelobj.getallFactor()))) ##part2:factor loading preprocessing dict_risk_expo_new = { factorname: dict_riskmodel[factorname].asMatrix().dropna(how='all') for factorname in list(np.array(rmodelobj.getallFactor())) } ls_ls_fexpodate = list([ dict_risk_expo_new[factorname].index.tolist() for factorname in dict_risk_expo_new.keys() ]) ls_alldates_fexpo = reduce(np.intersect1d, ls_ls_fexpodate) ##part3:covM preprocessing df_covm = dict_riskmodel['ret_cov'] ##factor covariance matrix df_covm = df_covm.reset_index().reindex( columns=['factorid1', 'factorid2', 'value', 'date']) df_covm = df_covm[df_covm['factorid1'].isin(ls_allfactor)][ df_covm['factorid2'].isin(ls_allfactor)] ##part4:specRisk preprocessing df_specrisk_raw = dict_riskmodel['specificRisk'] ## make sure all the data source have the same date range #ls_date_range_new=list(pd.period_range(dt_startdate, dt_enddate, freq='D').to_timestamp()) ##for date range check #df_date_range=pd.DataFrame(data=ls_date_range_new,columns=['needdate']) ls_port_wgt = df_portwgt.asMatrix().index sourceDates = sorted( list(ls_port_wgt[(ls_port_wgt <= dt_enddate) & (ls_port_wgt >= dt_startdate)])) ###align daterange ##covm specrisk align targetDates = sorted(list(np.unique(df_covm['date']))) df_date_map_covm = pd.DataFrame({'targetDate': targetDates}, index=targetDates) df_date_map_covm = df_date_map_covm.reindex( sourceDates, method='ffill').dropna(how='any') dict_date_map_covm = { df_date_map_covm.index[i]: df_date_map_covm.targetDate[i] for i in range(len(df_date_map_covm)) } ##factor align targetDates = sorted(list(ls_alldates_fexpo)) df_date_map_expo = pd.DataFrame({'targetDate': targetDates}, index=targetDates) df_date_map_expo = df_date_map_expo.reindex( sourceDates, method='ffill').dropna(how='any') dict_date_map_expo = { df_date_map_expo.index[i]: df_date_map_expo.targetDate[i] for i in range(len(df_date_map_expo)) } ls_date_range = list( set(dict_date_map_covm.keys()).intersection( set(dict_date_map_expo.keys()))) if len(ls_date_range) == 0: raise Exception( "date length is null,risk model and risk decom didn't match") ##covm dict_df_covm = dict([(dt_selecteddate, df_covm[ df_covm['date'] == dict_date_map_covm[dt_selecteddate]].pivot_table( values='value', index='factorid1', columns='factorid2')) for dt_selecteddate in ls_date_range]) ##specrisk df_specrisk_raw = df_specrisk_raw.reset_index().pivot( 'date', 'symbol', 'specificrisk') df_specrisk_raw1 = df_specrisk_raw.reindex(columns=ls_aname).T df_specrisk = df_specrisk_raw1.fillna( df_specrisk_raw1.mean(axis=0, skipna=True)) dict_df_specrisk = { date: pd.DataFrame(np.diag(df_specrisk[dict_date_map_covm[date]].tolist()), index=ls_aname, columns=ls_aname) for date in ls_date_range } ##weight dict_df_wa = { date: df_wa[df_wa.idname == date].reindex(ls_aname).fillna(0) for date in ls_date_range } ##factor exposure dict_rmodelobj = dict([(dt_selecteddate, Riskmodel(dict_riskmodel, dict_date_map_expo[dt_selecteddate], ls_aname)) for dt_selecteddate in ls_date_range]) dict_df_fexpo = dict([(dt_selecteddate, dict_rmodelobj[dt_selecteddate].Fexpomerge()) for dt_selecteddate in dict_rmodelobj.keys()]) dict_final_return = {} ##create date-strdate dictionary for date in ls_date_range: ##part4:slice portfolio activeçš„ specific risk df_specriska_singledate = dict_df_specrisk[date].fillna(0) df_wa_singledate = dict_df_wa[date] df_covm_singledate = dict_df_covm[date] dict_df_fexpo_singledate = dict_df_fexpo[date] ##part5:calculate portfolio,benchmark,portfolio active risk p_riskreport = Riskreport(dict_df_fexpo_singledate, df_wa_singledate[['Wp']], df_covm_singledate, df_specriska_singledate, ls_industryfactor, ls_stylefactor) b_riskreport = Riskreport(dict_df_fexpo_singledate, df_wa_singledate[['Wb']], df_covm_singledate, df_specriska_singledate, ls_industryfactor, ls_stylefactor) a_riskreport = Riskreport(dict_df_fexpo_singledate, df_wa_singledate[['Wa']], df_covm_singledate, df_specriska_singledate, ls_industryfactor, ls_stylefactor) ##portfolio total risk f_varp = p_riskreport.calcTotalRisk() f_vara = a_riskreport.calcTotalRisk() ##interaction risk f_varinter = f_varp - b_riskreport.calcTotalRisk() - f_vara if f_varinter < 0: f_sigmainter = np.nan else: f_sigmainter = mt.sqrt(f_varinter) try: f_perceninter = f_varinter / f_varp except: f_perceninter = np.nan df_inter = pd.DataFrame( data=([[f_varinter, f_sigmainter, f_perceninter]]), columns=['VarInter', 'sigmaInter', 'percenInter']) xpa = a_riskreport.calcX() xpa[np.isnan(xpa)] = 0 df_allcovm = a_riskreport.df_covm dict_factorcontrib = [ pd.DataFrame(data=float((((xpa.T[[i]]).dot( df_allcovm[df_allcovm.index == i])).dot(xpa)).ix[0, 0]), index=[i], columns=['value']) for i in ls_allfactor ] df_l_factorcontrib = pd.concat(dict_factorcontrib, axis=0) if len(ls_industryfactor) > 0: df_TotIndustryFMCAR = pd.DataFrame( data=df_l_factorcontrib.reindex(index=ls_industryfactor).sum(), columns=['FMCAR']) df_TotIndustryFMCAR['percenFMCAR'] = df_TotIndustryFMCAR[ 'FMCAR'] / a_riskreport.calcTotalRisk() if df_TotIndustryFMCAR['FMCAR'][0] >= 0: df_TotIndustryFMCAR['sigmaFMCAR'] = np.sqrt( df_TotIndustryFMCAR['FMCAR'][0]) else: df_TotIndustryFMCAR['sigmaFMCAR'] = np.nan df_TotStyleFMCAR = pd.DataFrame( data=df_l_factorcontrib.reindex(index=ls_stylefactor).sum(), columns=['FMCAR']) df_TotStyleFMCAR['percenFMCAR'] = df_TotStyleFMCAR[ 'FMCAR'] / a_riskreport.calcTotalRisk() if df_TotStyleFMCAR['FMCAR'][0] >= 0: df_TotStyleFMCAR['sigmaFMCAR'] = np.sqrt( df_TotStyleFMCAR['FMCAR'][0]) else: df_TotStyleFMCAR['sigmaFMCAR'] = np.nan dict_final_return[date] = { gsConst.Const.PortRisk: p_riskreport.calcTotalRiskall(f_varp, ['VarP', 'sigmaP', 'percentP']), gsConst.Const.BenchmarkRisk: b_riskreport.calcTotalRiskall(f_varp, ['VarB', 'sigmaB', 'percentB']), gsConst.Const.PortActiveRisk: a_riskreport.calcTotalRiskall(f_varp, ['VarA', 'sigmaA', 'percentA']), gsConst.Const.InteractionRisk: df_inter, gsConst.Const.FactorRisk: a_riskreport.calcPortCommonRiskall( f_vara, ['VarFactor', 'sigmaFactor', 'percenFactor']), gsConst.Const.SpecificRisk: a_riskreport.calcPortSpecRiskall( f_vara, ['VarSS', 'sigmaSS', 'percenSS']), gsConst.Const.IndustryFMCAR: a_riskreport.calcIndustryFMCAR().reset_index(), gsConst.Const.StyleFMCAR: a_riskreport.calcstyleFMCAR().reset_index(), gsConst.Const.IndStyleFMCAR: a_riskreport.calcFMCR().reset_index(), gsConst.Const.PortExpo: p_riskreport.calcX().reset_index(), gsConst.Const.BenchmarkExpo: b_riskreport.calcX().reset_index(), gsConst.Const.PortExpoInd: p_riskreport.calcX().reindex( index=ls_industryfactor).reset_index(), gsConst.Const.PortExpoSty: p_riskreport.calcX().reindex( index=ls_stylefactor).reset_index(), gsConst.Const.BenchmarkExpoInd: b_riskreport.calcX().reindex( index=ls_industryfactor).reset_index(), gsConst.Const.BenchmarkExpoSty: b_riskreport.calcX().reindex( index=ls_stylefactor).reset_index(), gsConst.Const.TotIndustryFMCAR: df_TotIndustryFMCAR, gsConst.Const.TotStyleFMCAR: df_TotStyleFMCAR } else: df_TotStyleFMCAR = pd.DataFrame( data=df_l_factorcontrib.reindex(index=ls_stylefactor).sum(), columns=['FMCAR']) df_TotStyleFMCAR['percenFMCAR'] = df_TotStyleFMCAR[ 'FMCAR'] / a_riskreport.calcTotalRisk() if df_TotStyleFMCAR['FMCAR'][0] >= 0: df_TotStyleFMCAR['sigmaFMCAR'] = np.sqrt( df_TotStyleFMCAR['FMCAR'][0]) else: df_TotStyleFMCAR['sigmaFMCAR'] = np.nan dict_final_return[date] = { gsConst.Const.PortRisk: p_riskreport.calcTotalRiskall(f_varp, ['VarP', 'sigmaP', 'percentP']), gsConst.Const.BenchmarkRisk: b_riskreport.calcTotalRiskall(f_varp, ['VarB', 'sigmaB', 'percentB']), gsConst.Const.PortActiveRisk: a_riskreport.calcTotalRiskall(f_varp, ['VarA', 'sigmaA', 'percentA']), gsConst.Const.InteractionRisk: df_inter, gsConst.Const.FactorRisk: a_riskreport.calcPortCommonRiskall( f_vara, ['VarFactor', 'sigmaFactor', 'percenFactor']), gsConst.Const.SpecificRisk: a_riskreport.calcPortSpecRiskall( f_vara, ['VarSS', 'sigmaSS', 'percenSS']), gsConst.Const.StyleFMCAR: a_riskreport.calcstyleFMCAR().reset_index(), gsConst.Const.IndStyleFMCAR: a_riskreport.calcFMCR().reset_index(), gsConst.Const.PortExpo: p_riskreport.calcX().reset_index(), gsConst.Const.BenchmarkExpo: b_riskreport.calcX().reset_index(), gsConst.Const.TotStyleFMCAR: df_TotStyleFMCAR } dict_final_return_new = {} for i in list(dict_final_return[ls_date_range[0]].keys()): ls_final_return = [ dict_final_return[date][i].assign(date=date) for date in ls_date_range ] dict_final_return_new[i] = pd.concat(ls_final_return, axis=0) return dict_final_return_new
def riskModel(df_ret, dict_risk_expo, weight, corrhalflife, varhalflife): ''' df_ret=x0 dict_risk_expo=x1 weight=x2 corrhalflife=x3 varhalflife=x4 ''' ls_fexponame = list( map(gftIO.gidInt2Str, list(dict_risk_expo['osets'].asColumnTab()['O0']))) ind_factor_name = sorted( list( map(gftIO.gidInt2Str, list(dict_risk_expo[ls_fexponame[0]].asColumnTab()['O0'])))) sty_factor_name = sorted( list( map(gftIO.gidInt2Str, list(dict_risk_expo[ls_fexponame[1]].asColumnTab()['O0'])))) allfactor = ind_factor_name + sty_factor_name ##stock return preprocess df_w_ret = df_ret.asMatrix().T.dropna(how='all', axis=1) ##factor exposure preprocess dict_risk_expo_new = { factorname: dict_risk_expo[factorname].asMatrix().dropna(how='all') for factorname in allfactor } ls_ls_fexpodate = list([ dict_risk_expo_new[factorname].index.tolist() for factorname in dict_risk_expo_new.keys() ]) ls_alldates_fexpo = reduce(np.intersect1d, ls_ls_fexpodate) ls_ls_fexposymbol = list([ dict_risk_expo_new[factorname].columns.tolist() for factorname in dict_risk_expo_new.keys() ]) ls_allsymbols_fexpo = reduce(np.intersect1d, ls_ls_fexposymbol) ##weight preprocess weight = weight.asMatrix().T ##get the date/symbol intersection of (stock return,factor exposure,weight) ##ls_alldates save the stock return map date ##get fexpo date,find the nearest business day fexpodate = pd.DataFrame(ls_alldates_fexpo, columns=['date_fexpo']) retdate = pd.DataFrame(df_w_ret.columns, columns=['date_ret']) retdate.sort_values("date_ret", ascending=True, inplace=True) fexpodate.sort_values("date_fexpo", ascending=True, inplace=True) df_date_map = pd.merge_asof(retdate, fexpodate, left_on="date_ret", right_on="date_fexpo", allow_exact_matches=False) df_date_map.dropna(how='any', inplace=True) df_date_map = df_date_map.drop_duplicates( subset='date_fexpo').reset_index() dict_date_map = { df_date_map.date_fexpo[i]: df_date_map.date_ret[i] for i in range(len(df_date_map)) } ls_alldates = sorted( list( set(weight.columns).intersection(set( df_w_ret.columns)).intersection(set(dict_date_map.values())))) ls_alldates_ondaybefore = sorted(list(dict_date_map.keys())) ls_allsymbols = { date: list( set(df_w_ret[[dict_date_map[date]]].dropna().index).intersection( set(ls_allsymbols_fexpo)).intersection(set(weight.index))) for date in ls_alldates_ondaybefore } ##align the stock return and factor exposure dict_df_weight_raw = { date: weight[[date]].reindex(index=ls_allsymbols[date]).fillna(0) for date in ls_alldates_ondaybefore } dict_df_weight = { date: np.sqrt(dict_df_weight_raw[date]) for date in ls_alldates_ondaybefore } dict_df_ret = { dict_date_map[date]: pd.concat([ (df_w_ret[[dict_date_map[date] ]].reindex(index=ls_allsymbols[date])) * (dict_df_weight[date].rename(columns={date: dict_date_map[date]})), pd.DataFrame(data=np.zeros(1), index=['constrain'], columns=[dict_date_map[date]]) ], axis=0) for date in ls_alldates_ondaybefore } dict_df_fexpo_raw = { date: fexpomerge(dict_risk_expo_new, date, allfactor, ls_allsymbols) for date in ls_alldates_ondaybefore } dict_df_fexpo = { date: dict_df_fexpo_raw[date].assign(countryfactor=1).multiply( dict_df_weight[date].squeeze(), axis='index') for date in ls_alldates_ondaybefore } ##calculate constraints dict_df_fexpo_con = { date: expoconstrain(dict_df_fexpo_raw, date, ind_factor_name, allfactor, dict_df_weight_raw, sty_factor_name, dict_df_fexpo) for date in ls_alldates_ondaybefore } #for i in dict_risk_expo_new.keys(): #if dict_risk_expo_new[i].index.min() > df_l_ret.index.min() or dict_risk_expo_new[i].index.max() < df_l_ret.index.max(): #raise Exception ########################step3:calculate factor return######################## ls_df_fitresult = { dict_date_map[date]: Regression(date, dict_df_ret, dict_df_fexpo_con, dict_df_weight, dict_df_fexpo, dict_date_map) for date in ls_alldates_ondaybefore } ls_df_facreturn = list( ls_df_fitresult[date]['params'].rename(columns={'params': date}) for date in ls_alldates) df_model_params = reduce( lambda df_para1, df_para2: pd.concat([df_para1, df_para2], axis=1), ls_df_facreturn) ########################step4:calculate factor return covariance######################## df_allfactorret = df_model_params.T df_allfactorret = df_allfactorret.sort_index() corrhalflife = int(corrhalflife) varhalflife = int(varhalflife) halflife = max(corrhalflife, varhalflife) if len(ls_alldates) < halflife: raise Exception("More data needed") else: ls_alldatesnew = ls_alldates[halflife - 1:len(ls_alldates)] corrwgts = list( map(lambda x: mt.sqrt(0.5**(x / int(corrhalflife))), list(range(int(corrhalflife) - 1, -1, -1)))) varwgts = list( map(lambda x: mt.sqrt(0.5**(x / int(varhalflife))), list(range(int(varhalflife) - 1, -1, -1)))) ls_factorretcov = list( calcfactorRetCov(df_allfactorret, date, corrwgts, varwgts, corrhalflife, varhalflife) for date in ls_alldatesnew) df_l_factorretcov = pd.concat( ls_factorretcov, axis=0).rename(columns={'variable': 'factorid2'}) ########################step5:calculate the residual(specific) variances of regression######################## ##part1:merge factorreturn,factor exposure and stock return ls_specificrisk = list( ls_df_fitresult[date]['resid'].rename(columns={'resid': date}) for date in ls_alldates) df_w_specificrisk = pd.concat(ls_specificrisk, axis=1).T df_w_specificrisk = df_w_specificrisk.sort_index() specificwgts = list( map(lambda x: mt.sqrt(0.5**(x / int(halflife))), list(range(int(halflife) - 1, -1, -1)))) ls_factorretspe = list( calcfactorRetSpe(df_w_specificrisk, date, specificwgts, halflife) for date in ls_alldatesnew) df_specificrisk_var = pd.concat(ls_factorretspe, axis=0) ########################step6:generate final return value######################## df_allfactorret = df_allfactorret.drop('countryfactor', axis=1) dict_factorret = { key + '.ret': df_allfactorret[[key]].rename( columns={ key: list( gftIO.strSet2Np( np.array(list(df_allfactorret[[key]].columns))))[0] }) for key in df_allfactorret.columns } dictMerged = dict( dict_factorret, **dict_risk_expo, **{ 'ret_cov': df_l_factorretcov, 'specificRisk': df_specificrisk_var }) return dictMerged