def InFlowFreeMv(beg_date, end_date): """ 因子说明:过去 10天 资金净流入额/自由流通市值 流入为当日成交价上升的时候的成交额和成交量 流出为当日成交价下降时候的成交额和成交量 """ # param ################################################################################# LongTerm = 10 factor_name = "InFlowFreeMv" ipo_num = 90 # read data ################################################################################# inflow = Stock().get_factor_h5("Mf_Inflow", None, "primary_mfc").T free_mv = Stock().get_factor_h5("FreeMarketValue", None, "alpha_dfc").T # code set & date set ################################################################################# [inflow, free_mv] = Stock().make_same_index_columns([inflow, free_mv]) # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(inflow.index)) date_series.sort() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm - 1)) inflow_pre = inflow.ix[data_beg_date:current_date, :] free_mv_pre = free_mv.ix[data_beg_date:current_date, :] if len(inflow_pre) >= int(0.8 * LongTerm): print('Calculating factor %s at date %s' % (factor_name, current_date)) inflow_pre_sum = inflow_pre.sum() free_mv_pre_sum = free_mv_pre.sum() date_data = pd.concat([inflow_pre_sum, free_mv_pre_sum], axis=1) date_data.columns = ['inflow', 'free_mv'] date_data = date_data[date_data['free_mv'] != 0.0] date_data['ratio'] = date_data['inflow'] / date_data[ 'free_mv'] * 100000000 else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) date_data = pd.DataFrame([], columns=['ratio'], index=free_mv.columns) if i == 0: res = pd.DataFrame(date_data['ratio'].values, columns=[current_date], index=date_data.index) else: res_add = pd.DataFrame(date_data['ratio'].values, columns=[current_date], index=date_data.index) res = pd.concat([res, res_add], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def holding_data_yangchao(today, project_path, out_path): # 输入参数 ################################################################################## person_name = 'yangchao' before_trade_data = Date().get_trade_date_offset(today, -1) today = Date().change_to_str(today) # 基金列表 ################################################################################## fund = pd.read_excel(project_path + 'Manage_Fund_Name.xlsx', encoding='gbk') fund_val = fund.ix[:, person_name] fund_val = fund_val.dropna() fund_list = list(fund_val.values) # 基金持仓证券 ################################################################################## fund_asset = MfcData().get_group_security(before_trade_data) fund_asset = fund_asset[[ '日期', '组合名称', '基金名称', '证券代码', '证券名称', '持仓', '市值比净值(%)', '盈亏率(%)', '证券类别', '当日买金额', '当日卖金额', '资产单元名称', '持仓多空标志' ]] for i_fund in range(len(fund_list)): fund_name = fund_list[i_fund] fund_asset_fund = fund_asset[fund_asset['基金名称'] == fund_name] out_sub_path = os.path.join(out_path, person_name, today, "holding_data") if not os.path.exists(out_sub_path): os.mkdir(out_sub_path) out_file = os.path.join(out_sub_path, fund_name + '.csv') fund_asset_fund.to_csv(out_file, index=None) # 绝对收益组合资产 ################################################################################## group_name = 'yangchao_group' fund_val = fund.ix[:, group_name] fund_val = fund_val.dropna() fund_list = list(fund_val.values) fund_asset = MfcData().get_group_security(before_trade_data) fund_asset = fund_asset[[ '日期', '组合名称', '基金名称', '证券代码', '证券名称', '持仓', '市值比净值(%)', '盈亏率(%)', '证券类别', '当日买金额', '当日卖金额', '资产单元名称', '持仓多空标志' ]] for i_fund in range(len(fund_list)): fund_name = fund_list[i_fund] one_fund = fund_asset[fund_asset['组合名称'] == fund_name] out_sub_path = os.path.join(out_path, person_name, today, "holding_data") if not os.path.exists(out_sub_path): os.mkdir(out_sub_path) if fund_name == '绝对收益期货组合': fund_name = "绝对收益股指期货组合" out_file = os.path.join(out_sub_path, fund_name + '.csv') one_fund.to_csv(out_file) # 股票库 ################################################################################## pool_path = Parameter().get_load_out_file("Mfc_Data") pool_list = [ "公司超五库.xls", "公司股票库.xls", "公司关联库.xls", "公司禁止库.xls", "公司限制库.xls", "绝对收益禁止库.xls", "绝对收益投资库.xls", "量化限制库.xls" ] out_sub_path = os.path.join(out_path, person_name, today, "holding_data") for i_file in range(len(pool_list)): file = pool_list[i_file] src_file = os.path.join(pool_path, 'raw_file', today, file) out_file = os.path.join(out_sub_path, file) try: shutil.copyfile(src_file, out_file) except: pd.DataFrame().to_excel(out_file) # 股票库 英文 ################################################################################## pool_path = Parameter().get_load_out_file("Mfc_Data") pool_list = { "公司禁止库.xls": "Company Forbidden Pool.csv", "公司关联库.xls": "Company Related Pool.csv", "公司限制库.xls": "Company Limited Pool.csv", "公司股票库.xls": "Company Investment Pool.csv", "绝对收益禁止库.xls": "ABS Fund Forbidden Pool.csv", "绝对收益投资库.xls": "ABS Fund Investment Pool.csv", "量化限制库.xls": "Quantitative Limited Pool.csv" } out_sub_path = os.path.join(out_path, person_name, today, "holding_data") for scr_file, out_file in pool_list.items(): src_file = os.path.join(pool_path, 'raw_file', before_trade_data, scr_file) out_file = os.path.join(out_sub_path, out_file) data = pd.read_excel(src_file, index_col=[0]) data.index = data['证券代码'].map(stock_code_add_postfix) data.index = data.index.map(lambda x: x[0:6] + '-CN') data['Status'] = 1.0 data.to_csv(out_file, header=None, columns=['Status']) # 股票库 Company Investment Pool.csv 包括公司股票库和公司超5库 ################################################################################## stock_pool_file = os.path.join(pool_path, 'raw_file', before_trade_data, "公司股票库.xls") stock_pool = pd.read_excel(stock_pool_file, index_col=[0]) stock_pool.index = stock_pool['证券代码'].map(stock_code_add_postfix) stock_pool.index = stock_pool.index.map(lambda x: x[0:6] + '-CN') stock_pool['Status'] = 1.0 stock_5_pool_file = os.path.join(pool_path, 'raw_file', before_trade_data, "公司超五库.xls") stock_5_pool = pd.read_excel(stock_5_pool_file, index_col=[0]) stock_5_pool.index = stock_5_pool['证券代码'].map(stock_code_add_postfix) stock_5_pool.index = stock_5_pool.index.map(lambda x: x[0:6] + '-CN') stock_5_pool['Status'] = 1.0 out_file = os.path.join(out_sub_path, "Company Investment Pool.csv") res = pd.concat([stock_5_pool['Status'], stock_pool['Status']], axis=0) res.to_csv(out_file, header=None) # Recent IPO Stock.csv ################################################################################## ipo_date_pd = Stock().get_ipo_date() beg_date = (datetime.strptime(today, '%Y%m%d') - timedelta(days=365)).strftime("%Y%m%d") ipo_date_pd = ipo_date_pd[ipo_date_pd['IPO_DATE'] > beg_date] ipo_date_pd.loc[:, 'IPO_DATE'] = 1.0 ipo_date_pd.index = ipo_date_pd.index.map(lambda x: x[0:6] + '-CN') filename = 'Recent IPO Stock.csv' out_sub_path = os.path.join(out_path, person_name, today, "holding_data") print('loading ', filename, ' ......') ipo_date_pd.to_csv(os.path.join(out_sub_path, filename), header=None, columns=['IPO_DATE']) # Suspended List.csv ################################################################################## status_data = Stock().get_trade_status_date(today) ipo_date_pd = Stock().get_ipo_date() data = pd.concat([status_data, ipo_date_pd], axis=1) data = data.dropna() data = data[data['DELIST_DATE'] >= today] data['Trade_Status'] = 1.0 data.index = data.index.map(lambda x: x[0:6] + '-CN') filename = 'Suspended List.csv' out_sub_path = os.path.join(out_path, person_name, today, "holding_data") print('loading ', filename, ' ......') data.to_csv(os.path.join(out_sub_path, filename), header=None, columns=['Trade_Status']) # Benchmark.csv 5.5 现金 ################################################################################## benchmark_dict = { "000905.SH": "CSI500 Benchmark.csv", "000300.SH": "CSI300 Benchmark.csv", "000016.SH": "CSI50 Benchmark.csv" } for index_code, out_file in benchmark_dict.items(): data = Index().get_weight(index_code, before_trade_data) data.index = data.index.map(lambda x: x[0:6] + '-CN') data['WEIGHT'] *= 94.5 result = pd.DataFrame([5.5], index=["CSH_CNY"], columns=['WEIGHT']) result = pd.concat([result, data], axis=0) out_sub_path = os.path.join(out_path, person_name, today, "holding_data") result.to_csv(os.path.join(out_sub_path, out_file), header=None, columns=['WEIGHT']) # 英文持仓情况 ################################################################################## en_holding_dict = { "泰达宏利量化增强": "Quantitative Enhencement portfolio.csv", "泰达宏利业绩驱动量化": "Quantitative Earning Drive.csv", "泰达新思路": "New Thinking Portfolio.csv", "泰达宏利集利债券": "High Dividend Bond Equity.csv", "泰达宏利沪深300": "CSI300 Portfolio.csv", "泰达中证500指数分级": "CSI500 Portfolio.csv" } out_sub_path = os.path.join(out_path, person_name, today, "holding_data") fund_sec = MfcData().get_fund_security(before_trade_data) for name, out_file in en_holding_dict.items(): fund_sec_one = fund_sec[fund_sec['基金名称'] == name] fund_sec_one = fund_sec_one[fund_sec_one['证券类别'] == '股票'] fund_sec_one = fund_sec_one[['证券代码', '持仓']] fund_sec_one.index = fund_sec_one['证券代码'].map(stock_code_add_postfix) fund_sec_one.index = fund_sec_one.index.map(lambda x: x[0:6] + '-CN') print(fund_sec_one) if out_file != "High Dividend Bond Equity.csv": asset = MfcData().get_fund_asset(before_trade_data) asset.index = asset['基金名称'] asset = asset[~asset.index.duplicated()] fund_sec_one = fund_sec_one[~fund_sec_one.index.duplicated()] fund_sec_one.ix['CSH_CNY', "持仓"] = asset.ix[name, "当前现金余额"] out_file = os.path.join(out_sub_path, out_file) fund_sec_one['持仓'] = fund_sec_one['持仓'].round(0) fund_sec_one.to_csv(out_file, header=None, columns=['持仓']) # 英文绝对收益持仓情况 ################################################################################## en_holding_dict = { "绝对收益50对冲股票组合": "Absolute Return Strategy CSI50 Portfolio.csv", "绝对收益300对冲股票组合": "Absolute Return Strategy CSI300 Portfolio.csv", "绝对收益500对冲股票组合": "Absolute Return Strategy CSI500 Portfolio.csv", } out_sub_path = os.path.join(out_path, person_name, today, "holding_data") fund_sec = MfcData().get_group_security(before_trade_data) for name, out_file in en_holding_dict.items(): fund_sec_one = fund_sec[fund_sec['组合名称'] == name] fund_sec_one = fund_sec_one[fund_sec_one['证券类别'] == '股票'] fund_sec_one = fund_sec_one[['证券代码', '持仓']] fund_sec_one.index = fund_sec_one['证券代码'].map(stock_code_add_postfix) fund_sec_one.index = fund_sec_one.index.map(lambda x: x[0:6] + '-CN') out_file = os.path.join(out_sub_path, out_file) fund_sec_one.to_csv(out_file, header=None, columns=['持仓']) # China Market Index.csv ################################################################################## data = Index().get_weight("China_Index_Benchmark", before_trade_data) out_file = "China Market Index.csv" data.index = data.index.map(lambda x: x[0:6] + '-CN') out_sub_path = os.path.join(out_path, person_name, today, "holding_data") data.to_csv(os.path.join(out_sub_path, out_file), header=None, columns=['WEIGHT']) # Monitor 基金证券 ################################################################################## en_holding_dict = {"泰达中证500指数分级": "CSI500 Monitor.csv"} out_sub_path = os.path.join(out_path, person_name, today, "holding_data") fund_sec = MfcData().get_fund_security(before_trade_data) for name, out_file in en_holding_dict.items(): fund_sec_one = fund_sec[fund_sec['基金名称'] == name] fund_sec_one = fund_sec_one[fund_sec_one['证券类别'] == '股票'] fund_sec_one = fund_sec_one[['证券代码', '持仓']] fund_sec_one.columns = ['STOCK_CODE', 'HOLDING'] fund_sec_one.index = fund_sec_one['STOCK_CODE'].map( stock_code_add_postfix) out_file = os.path.join(out_sub_path, out_file) fund_sec_one.to_csv(out_file, columns=['HOLDING']) # Monitor 组合证券 绝对收益 ################################################################################## out_sub_path = os.path.join(out_path, person_name, today, "holding_data") en_holding_dict = { "绝对收益50对冲股票组合": "Absolute Trading Monitor CSI50.csv", "绝对收益300对冲股票组合": "Absolute Trading Monitor CSI300.csv", "绝对收益500对冲股票组合": "Absolute Trading Monitor CSI500.csv", "绝对收益期货组合": "Absolute Monitor Option.csv", } fund_sec = MfcData().get_group_security(before_trade_data) for name, out_file in en_holding_dict.items(): fund_sec_one = fund_sec[fund_sec['组合名称'] == name] # fund_sec_one = fund_sec_one[fund_sec_one['资产类别'] == '股票资产'] fund_sec_one = fund_sec_one[['证券代码', '持仓']] fund_sec_one.columns = ['STOCK_CODE', 'HOLDING'] if name != "绝对收益期货组合": fund_sec_one.index = fund_sec_one['STOCK_CODE'].map( stock_code_add_postfix) else: fund_sec_one.index = fund_sec_one['STOCK_CODE'] out_file = os.path.join(out_sub_path, out_file) fund_sec_one.to_csv(out_file, columns=['HOLDING'])
def AdvanceReceiptsEquity(beg_date, end_date): """ 因子说明:预收账款 / 净资产 同一财报期 若有一个为负值 结果为负值 """ # param ################################################################################# factor_name = 'AdvanceReceiptsEquity' ipo_num = 90 # read data ################################################################################# advance = Stock().get_factor_h5("AdvanceReceipts", None, "primary_mfc") equity = Stock().get_factor_h5("TotalShareHoldeRequity", None, "primary_mfc") advance = StockFactorOperate().change_quarter_to_daily_with_report_date( advance, beg_date, end_date) equity = StockFactorOperate().change_quarter_to_daily_with_report_date( equity, beg_date, end_date) # data precessing ################################################################################# [advance, equity] = Stock().make_same_index_columns([advance, equity]) # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) for i in range(0, len(date_series)): current_date = date_series[i] if current_date in advance.columns: advance_date = advance[current_date] equity_date = equity[current_date] print('Calculating factor %s at date %s' % (factor_name, current_date)) data_date = pd.concat([advance_date, equity_date], axis=1) data_date.columns = ['advance_date', 'equity_date'] data_date = data_date.dropna() data_date = data_date[data_date['equity_date'] != 0.0] data_date[ 'ratio'] = data_date['advance_date'] / data_date['equity_date'] # 只要有一个是负数 比例为负数 mimus_index = (data_date['advance_date'] < 0.0) | (data_date['equity_date'] < 0.0) data_date.loc[mimus_index, 'ratio'] = -data_date.loc[mimus_index, 'ratio'].abs() else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) data_date = pd.DataFrame([], columns=["ratio"], index=advance.index) if i == 0: res = pd.DataFrame(data_date['ratio'].values, columns=[current_date], index=data_date.index) else: res_add = pd.DataFrame(data_date['ratio'].values, columns=[current_date], index=data_date.index) res = pd.concat([res, res_add], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # param long_term = 120 effective_term = 96 # read data price = Stock().read_factor_h5("PriceCloseAdjust") trade_amount = Stock().read_factor_h5("TradeAmount") # data precessing [trade_amount, price] = Stock().make_same_index_columns([trade_amount, price]) # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(price.columns)) date_series.sort() res = pd.DataFrame() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset( current_date, -(long_term - 1)) price_before = price.loc[:, data_beg_date:current_date] price_before = price_before.T.dropna(how='all').T pct_current = price.loc[:, current_date] trade_amount_before = trade_amount.loc[:, data_beg_date:current_date] trade_amount_before = trade_amount_before.T.dropna(how='all').T if len(price_before) > effective_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) price_sub_abs = price_before.sub(pct_current, axis='index').abs() w1 = np.log(1 / price_sub_abs.mul(1 / pct_current, axis='index')) # 要扣除价格等于当前价格情况,此情况下空间权为0 w1[np.isinf(w1)] = 0.0 n = len(price_before.columns) l = len(price_before) weight = np.array(range(1, 1 + n)) / np.array(range( 1, 1 + n)).sum() w2 = pd.DataFrame(np.tile(weight, (l, 1)), index=price_before.index, columns=price_before.columns) total_power = trade_amount_before.mul(w1).mul(w2) sign = np.sign(price_before.sub(pct_current, axis='index')) resistance_power = sign.mul(total_power) ratio = resistance_power.sum(axis=1) / total_power.sum(axis=1) ratio = pd.DataFrame(ratio.values, index=ratio.index, columns=[current_date]) else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) ratio = pd.DataFrame([], columns=[current_date], index=trade_amount_before.index) res = pd.concat([res, ratio], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def SPTTMDaily(beg_date, end_date): """ 因子说明:总营收 / 总市值 TTM 为不同一财报期 最近可以得到的最新财报 若有一个为负值 结果为负值 """ # param ################################################################################# factor_name = "SPTTMDaily" ipo_num = 90 # read data ################################################################################# income = Stock().get_factor_h5("OperatingIncome", None, "primary_mfc") income = StockFactorOperate().change_single_quarter_to_ttm_quarter(income) report_data = Stock().get_factor_h5("OperatingIncomeDaily", "ReportDate", 'primary_mfc') income = StockFactorOperate().change_quarter_to_daily_with_disclosure_date( income, report_data, beg_date, end_date) mv = Stock().get_factor_h5("TotalMarketValue", None, "alpha_dfc") # data precessing ################################################################################# [income, mv] = Stock().make_same_index_columns([income, mv]) # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) for i in range(0, len(date_series)): current_date = date_series[i] if current_date in income.columns: income_date = income[current_date] mv_date = mv[current_date] print('Calculating factor %s at date %s' % (factor_name, current_date)) data_date = pd.concat([income_date, mv_date], axis=1) data_date.columns = ['income', 'mv'] data_date = data_date.dropna() data_date = data_date[data_date['mv'] != 0.0] data_date['ratio'] = data_date['income'] / data_date['mv'] # 只要有一个是负数 比例为负数 mimus_index = (data_date['income'] < 0.0) | (data_date['mv'] < 0.0) data_date.loc[mimus_index, 'ratio'] = -data_date.loc[mimus_index, 'ratio'].abs() else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) data_date = pd.DataFrame([], columns=["ratio"], index=income.index) if i == 0: res = pd.DataFrame(data_date['ratio'].values, columns=[current_date], index=data_date.index) else: res_add = pd.DataFrame(data_date['ratio'].values, columns=[current_date], index=data_date.index) res = pd.concat([res, res_add], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def VolumeLnMean120d(beg_date, end_date): """ 因子说明:过去120天的-1*log(交易额)的加权平均 权为随时间线性递减 """ # param ################################################################################# LongTerm = 120 HalfTerm = int(LongTerm / 2) factor_name = 'VolumeLnMean120d' ipo_num = 90 # read data ################################################################################# trade_amount = Stock().get_factor_h5("TradeAmount", None, "primary_mfc").T # code set & date set ################################################################################# trade_amount = trade_amount.fillna(0.0) # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(trade_amount.index) & set(date_series)) date_series.sort() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm - 1)) amount_before = trade_amount.ix[data_beg_date:current_date, :] if len(amount_before) == LongTerm: print('Calculating factor %s at date %s' % (factor_name, current_date)) zero_number = amount_before.applymap(lambda x: 1.0 if x == 0.0 else 0.0).sum() code_filter_list = (zero_number[zero_number < HalfTerm]).index amount_before = trade_amount.ix[data_beg_date:current_date, code_filter_list] amount_before_log = amount_before.applymap( lambda x: np.nan if x == 0 else -np.log(x)) weight = np.array(list(range(1, LongTerm + 1))) weight_amount_log_val = np.dot(amount_before_log.T.values, weight) weight_amount_log = pd.DataFrame(weight_amount_log_val, index=amount_before_log.columns, columns=[current_date]) else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) weight_amount_log = pd.DataFrame([], columns=[current_date], index=trade_amount.columns) if i == 0: res = weight_amount_log else: res_add = weight_amount_log res = pd.concat([res, res_add], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def rank_fund2(self, fund_pct, bench_pct, fund_code, rank_pool, beg_date, end_date, new_fund_date=None, excess=False): """ 计算某只基金在基金池的排名 三种排名方式 1、直接获取wind接口结果 2、自己给定基金池,本地基金数据取得基金绝对收益 3、基金给定基金池,本地基金数据获取基金超额收益 """ if new_fund_date is None: new_fund_date = beg_date beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) new_fund_date = Date().change_to_str(new_fund_date) print(" 正在计算基金排名 %s 在基金池 %s 从 %s 到 %s " % (fund_code, rank_pool, beg_date, end_date)) # 分类获取排名 if rank_pool == 'wind': # Wind 三级分类 date_str = "startDate=%s;endDate=%s;fundType=3" % (beg_date, end_date) data = w.wss(fund_code, "peer_fund_return_rank_per", date_str) val = str(data.Data[0][0]) data = w.wss(fund_code, "peer_fund_return_rank_prop_per", date_str) try: pct = np.round(data.Data[0][0] / 100.0, 3) except Exception as e: print(e) print("wind返回基金排名百分比非数字") pct = "None" return val, pct else: # 获取基金池 pool = FundPool().get_fund_pool_all(date="20181231", name=rank_pool) bool_series = (pool['if_connect'] == '非联接基金') & (pool['if_hk'] == '非港股基金') bool_series &= (pool['if_a'] == 'A类基金') bool_series &= (pool['if_etf'] == '非ETF基金') pool = pool[bool_series] if not excess: # 区间总收益排名 # fund_pct = Fund().get_fund_factor("Repair_Nav_Pct") fund_pct = fund_pct.loc[beg_date:end_date, pool.index] fund_pct = fund_pct.dropna(how='all') data = (fund_pct / 100.0 + 1.0).cumprod() - 1.0 data = pd.DataFrame(data.iloc[-1, :]) data.columns = ['Pct'] data = data[~data.index.duplicated()] data = data.dropna() data = pd.concat([data, pool], axis=1) data = data[data["setupdate"] <= new_fund_date] data = data.dropna(subset=['Pct']) data = data.sort_values(by='Pct', ascending=False) data['range'] = range(len(data)) data["rank"] = data['range'].map( lambda x: str(x + 1) + "/" + str(len(data))) data['rank_pct'] = data['range'].map((lambda x: (x + 1) / len(data))) try: val = data.loc[fund_code, "rank"] pct = data.loc[fund_code, "rank_pct"] pct = np.round(pct, 3) file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date, end_date) file = os.path.join(self.data_path, file) data.to_csv(file) except Exception as e: print(e) val = "None" pct = "None" return val, pct else: # 区间超额收益排名 # fund_pct = Fund().get_fund_factor("Repair_Nav_Pct") # bench_pct = Fund().get_fund_factor("Fund_Bench_Pct") * 100 excess_pct = fund_pct.sub(bench_pct) excess_pct = excess_pct.loc[beg_date:end_date, pool.index] excess_pct = excess_pct.dropna(how='all') data = (excess_pct / 100.0 + 1.0).cumprod() - 1.0 data = pd.DataFrame(data.iloc[-1, :]) data.columns = ['Pct'] data = data[~data.index.duplicated()] data = data.dropna() data = pd.concat([data, pool], axis=1) data = data[data["setupdate"] <= new_fund_date] data = data.dropna(subset=['Pct']) data = data.sort_values(by='Pct', ascending=False) data['range'] = range(len(data)) data["rank"] = data['range'].map( lambda x: str(x + 1) + "/" + str(len(data))) data['rank_pct'] = data['range'].map((lambda x: (x + 1) / len(data))) try: val = data.loc[fund_code, "rank"] pct = data.loc[fund_code, "rank_pct"] pct = np.round(pct, 3) file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date, end_date) file = os.path.join(self.data_path, file) data.to_csv(file) except Exception as e: print(e) val = "None" pct = "None" return val, pct
def ROICTTM(beg_date, end_date): """ 因子说明:(营业收入TTM - 营业成本TTM) / 全部投入资本 TTM 为统一财报期 """ # param ################################################################################# factor_name = "ROICTTM" ipo_num = 90 # read data ################################################################################# cost = Stock().get_factor_h5("OperatingCost", None, "primary_mfc") income = Stock().get_factor_h5("OperatingIncome", None, "primary_mfc") investcapital = Stock().get_factor_h5("Investcapital", None, "primary_mfc") cost = StockFactorOperate().change_single_quarter_to_ttm_quarter(cost) income = StockFactorOperate().change_single_quarter_to_ttm_quarter(income) investcapital = StockFactorOperate().change_single_quarter_to_ttm_quarter(investcapital) investcapital /= 4.0 cost = StockFactorOperate().change_quarter_to_daily_with_report_date(cost, beg_date, end_date) income = StockFactorOperate().change_quarter_to_daily_with_report_date(income, beg_date, end_date) investcapital = StockFactorOperate().change_quarter_to_daily_with_report_date(investcapital, beg_date, end_date) # data precessing ################################################################################# [cost, income, investcapital] = Stock().make_same_index_columns([cost, income, investcapital]) # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) for i in range(0, len(date_series)): current_date = date_series[i] if current_date in cost.columns: cost_date = cost[current_date] income_date = income[current_date] investcapital_date = investcapital[current_date] print('Calculating factor %s at date %s' % (factor_name, current_date)) data_date = pd.concat([cost_date, income_date, investcapital_date], axis=1) data_date.columns = ['cost', 'income', 'investcapital'] """ 这里本来应该对行业做一些调整 filename = in_path[0:len(in_path)-13] + "DataSet\\industry_citic.txt" industry = pd.read_table(filename, index_col=[0], encoding='gbk', header=None) cost_industry = pd.concat([operating_cost_ttm, industry], axis=1) cost_industry.columns = ['value', 'industry'] filter1 = cost_industry['industry'].map(lambda x: x in ['银行', '非银行金融']) filter2 = cost_industry['industry'].map(lambda x: x is np.nan) filter_total = filter1 & filter2 cost_industry.ix[filter_total, 'value'] = 0.0 operating_cost_ttm = pd.DataFrame(cost_industry['value'].values, index=cost_industry.index, columns=[curent_date]) """ data_date['diff'] = data_date['income'] - data_date['cost'] data_date = data_date.dropna() data_date = data_date[data_date['investcapital'] != 0.0] data_date['ratio'] = data_date['diff'] / data_date['investcapital'] # 只要有一个是负数 比例为负数 mimus_index = (data_date['diff'] < 0.0) | (data_date['investcapital'] < 0.0) data_date.loc[mimus_index, 'ratio'] = - data_date.loc[mimus_index, 'ratio'].abs() else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) data_date = pd.DataFrame([], columns=["ratio"], index=cost.index) if i == 0: res = pd.DataFrame(data_date['ratio'].values, columns=[current_date], index=data_date.index) else: res_add = pd.DataFrame(data_date['ratio'].values, columns=[current_date], index=data_date.index) res = pd.concat([res, res_add], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def regress_fund(self, fund_code, beg_date, end_date): """ 回归基金净值 和上季度重仓股票的涨跌幅和债券基金 """ period = "W" date_series = Date().get_trade_date_series(beg_date, end_date, period) fund_return = self.fund_pct[fund_code] fund_return = fund_return.dropna() date_series = list(set(date_series) & set(fund_return.index)) date_series.sort() # 季报持仓 quarter_weight = Fund().get_fund_holding_quarter(fund_code) r2_series = pd.DataFrame([], index=date_series, columns=['r2']) for i_date in range(0, len(date_series)): # 时间确定 # 若此时离上个季报时间较短 则回归时间很短 # 若此时离上个季报时间较长 则回归时间较长 ed_date = date_series[i_date] ed_date = Date().get_trade_date_offset(ed_date, -0) quarter_date = Date().get_last_fund_quarter_date(ed_date) bg_date = Date().get_trade_date_offset(ed_date, -(self.regression_len - 1)) bg_date = max(bg_date, quarter_date) bg_date = Date().get_trade_date_offset(bg_date, -0) date_diff = Date().get_trade_date_diff(bg_date, ed_date) # 上期持仓 try: stock_weight = pd.DataFrame(quarter_weight[quarter_date]) stock_weight = stock_weight.dropna() stock_weight.columns = ['Weight'] # 收益率数据 data = pd.concat([fund_return, self.stock_pct, self.bold_pct], axis=1) data['885062.WI'] = data['885062.WI'].fillna(0.0) regress_date_series = Date().get_trade_date_series( bg_date, ed_date) data = data.loc[regress_date_series, :] data = data.T.dropna(thresh=self.regression_min_len).T data = data.fillna(data.mean(axis=1)) # 股票池 stock_pool = list(stock_weight.index) stock_pool = list(set(stock_pool) & set(data.columns[1:])) stock_pool.sort() stock_pool.append("885062.WI") stock_ratio = self.get_fund_stock_ratio( fund_code, quarter_date) stock_weight['Weight'] /= stock_weight['Weight'].sum() stock_weight['Weight'] *= stock_ratio stock_weight.loc["885062.WI", "Weight"] = 100 - stock_ratio stock_weight /= 100.0 stock_weight = stock_weight.loc[stock_pool, :] stock_weight['Weight'] /= stock_weight['Weight'].sum() print("## Cal Regress %s %s %s %s %s ##" % (fund_code, quarter_date, bg_date, ed_date, len(data))) if (len(data) > self.regression_min_len) and (len(stock_pool) > 4): # 利用股票拟合基金收益率 最小化跟踪误差的前提 # 指数权重之和为1 指数不能做空 指数和上期季报权重换手不能太大 y = data[fund_code].values / 100.0 x = data[stock_pool].values / 100.0 n = len(y) k = x.shape[1] weight_old = stock_weight.T.values[0] turnover = date_diff * 0.8 / 100 print("TurnOver %s " % turnover) # 最优化 ############################################################################## w = cvx.Variable(k) sigma = y - x * w prob = cvx.Problem(cvx.Minimize(cvx.sum_squares(sigma)), [ cvx.sum(w) == 1.0, w >= 0, cvx.sum(cvx.abs(w - weight_old)) <= turnover ]) prob.solve() print('Solver Status : ', prob.status) params_add = pd.DataFrame(w.value, columns=[ed_date], index=stock_pool) # 计算回归R2 ############################################################################## tss = np.sum((y - np.mean(y))**2) / n y_res = y - np.dot(x, w.value) rss = np.sum(y_res**2) / (n - k - 1) r2 = 1 - rss / tss params_add.loc["R2", ed_date] = r2 print(params_add.T) else: params_add = pd.DataFrame([], columns=[ed_date], index=stock_pool) except Exception as e: params_add = pd.DataFrame([], columns=[ed_date]) if i_date == 0: params_new = params_add else: params_new = pd.concat([params_new, params_add], axis=1) # 合并新数据 #################################################################### params_new = params_new.T out_file = os.path.join(self.data_path_exposure, fund_code + '.csv') if os.path.exists(out_file): params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') params_old.index = params_old.index.map(str) params = FactorOperate().pandas_add_row(params_old, params_new) else: params = params_new params.to_csv(out_file)
def cal_fund_holder_exposure(self, fund, beg_date, end_date): # 每半年计算一次 type_list = ['STYLE', 'COUNTRY', 'INDUSTRY'] date_series = Date().get_normal_date_series(beg_date, end_date, period='S') for i_date in range(len(date_series)): date = date_series[i_date] report_date = Date().get_normal_date_month_end_day(date) trade_date = Date().get_trade_date_month_end_day(date) barra_name = list( Barra().get_factor_name(type_list)['NAME_EN'].values) barra_exposure = Barra().get_factor_exposure_date( trade_date, type_list) fund_holding = FundHolder().get_fund_holding_report_date_fund( fund, report_date) print("########## Calculate Holder Exposure %s %s ##########" % (fund, report_date)) if (barra_exposure is None) or (fund_holding is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: fund_holding = fund_holding['Weight'] data = pd.concat([fund_holding, barra_exposure], axis=1) data = data.dropna() if (len(data) == 0) or (data is None): exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) else: exposure_add = pd.DataFrame([], columns=barra_name, index=[report_date]) for i_factor in range(len(barra_name)): factor_name = barra_name[i_factor] data_weight = data[['Weight', factor_name]] data_weight['StockExposure'] = data['Weight'] * data[ factor_name] exposure_add.ix[report_date, factor_name] = data_weight[ 'StockExposure'].sum() / 100.0 if i_date == 0: exposure_new = exposure_add else: exposure_new = pd.concat([exposure_new, exposure_add], axis=0) # 合并新数据 #################################################################### out_path = Parameter().get_read_file(self.holder_exposure_name) out_file = os.path.join(out_path, 'Fund_Holder_Exposure_' + fund + '.csv') if os.path.exists(out_file): exposure_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') exposure_old.index = exposure_old.index.map(str) params = pandas_add_row(exposure_old, exposure_new) else: params = exposure_new params.to_csv(out_file)
def weight_allstock_holding_date(report_date): report_date = Date().change_to_str(report_date) data = Fund().get_fund_holding_stock_date(report_date) data = data[['FundCode', 'Weight', 'StockCode']] pool = Fund().get_fund_pool_code(report_date, "基金持仓基准基金池") fund_code = list(set(pool)) fund_code.sort() weight = Fund().get_fund_factor("Total_Asset", date_list=[report_date]).T weight = weight.dropna() for i_fund in range(len(fund_code)): fund = fund_code[i_fund] data_fund = data[data['FundCode'] == fund] data_fund = data_fund.dropna(subset=['Weight']) data_fund = data_fund.sort_values(by=['Weight'], ascending=False) try: asset = weight.ix[fund, report_date] asset /= 100000000 except Exception as e: asset = 1.0 if i_fund == 0: data_fund_all = data_fund.copy() data_fund_all["Asset_Weight"] = data_fund_all['Weight'] * asset all_weight = data_fund_all['Weight'].sum() if all_weight < 60: data_fund_all = pd.DataFrame([], columns=data_fund.columns) else: data_fund_all_add = data_fund.copy() data_fund_all_add[ "Asset_Weight"] = data_fund_all_add['Weight'] * asset all_weight = data_fund_all_add['Weight'].sum() if all_weight < 60: data_fund_all_add = pd.DataFrame([], columns=data_fund.columns) data_fund_all = pd.concat([data_fund_all, data_fund_all_add], axis=0) stock_code = list(set(data_fund_all['StockCode'].values)) stock_code.sort() weight_sum = data_fund_all['Asset_Weight'].sum() weight_code = pd.DataFrame([], index=stock_code, columns=['Asset_Weight']) for i_stock in range(len(stock_code)): stock = stock_code[i_stock] data_stock = data_fund_all[data_fund_all['StockCode'] == stock] stock_weight_sum = data_stock['Asset_Weight'].sum() weight_code.ix[stock, 'Asset_Weight'] = stock_weight_sum / weight_sum weight_code.index = weight_code.index.map(lambda x: x[0:6] + '-CN') out_path = os.path.join(Fund().data_path_holder, "fund_holding_benchmark") out_path = os.path.join(out_path, "weight_halfyear_all") if not os.path.exists(out_path): os.makedirs(out_path) out_file = os.path.join(out_path, "weight_halfyear_all_" + report_date + '.csv') print(out_file) weight_code.to_csv(out_file, header=None)
def cal_fund_regression_exposure(self, fund, beg_date, end_date, period="M"): # 参数 #################################################################### up_style_exposure = 1.5 up_position_exposure = 0.95 low_position_exposure = 0.75 position_sub = 0.10 beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) # 取得数据 #################################################################### type_list = ['STYLE', 'COUNTRY'] barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values) barra_return = Barra().get_factor_return(None, None, type_list) date_series = Date().get_trade_date_series(beg_date, end_date, period=period) fund_return = FundFactor().get_fund_factor("Repair_Nav_Pct", None, [fund]) data = pd.concat([fund_return, barra_return], axis=1) data = data.dropna() print(" Fund Code Total Len %s " % len(data)) factor_number = len(barra_name) # 循环回归计算每天的暴露 #################################################################### for i_date in range(0, len(date_series)): period_end_date = date_series[i_date] period_beg_date = Date().get_trade_date_offset( period_end_date, -self.regression_period) period_date_series = Date().get_trade_date_series( period_beg_date, period_end_date) data_periods = data.ix[period_date_series, :] data_periods = data_periods.dropna() quarter_date = Date().get_last_fund_quarter_date(period_end_date) stock_ratio = (FundFactor().get_fund_factor( "Stock_Ratio", [quarter_date], [fund]) / 100).values[0][0] print( "########## Calculate Regression Exposure %s %s %s %s %s %s ##########" % (fund, period_beg_date, period_end_date, quarter_date, len(data_periods), stock_ratio)) if len(data_periods) > self.regression_period_min: y = data_periods.ix[:, 0].values x = data_periods.ix[:, 1:].values x_add = sm.add_constant(x) low_position_exposure = max(stock_ratio - position_sub, low_position_exposure) print(low_position_exposure) P = 2 * np.dot(x_add.T, x_add) Q = -2 * np.dot(x_add.T, y) G_up = np.diag(np.ones(factor_number + 1)) G_low = -np.diag(np.ones(factor_number + 1)) G = np.row_stack((G_up, G_low)) h_up = np.row_stack((np.ones( (factor_number, 1)) * up_style_exposure, np.array([up_position_exposure]))) h_low = np.row_stack((np.ones( (factor_number, 1)) * up_style_exposure, np.array([-low_position_exposure]))) h = np.row_stack((h_up, h_low)) P = matrix(P) Q = matrix(Q) G = matrix(G) h = matrix(h) try: result = sol.qp(P, Q, G, h) params_add = pd.DataFrame(np.array(result['x'][1:]), columns=[period_end_date], index=barra_name).T print(params_add) except: params_add = pd.DataFrame([], columns=[period_end_date], index=barra_name).T print(params_add) else: params_add = pd.DataFrame([], columns=[period_end_date], index=barra_name).T print(params_add) if i_date == 0: params_new = params_add else: params_new = pd.concat([params_new, params_add], axis=0) # 合并新数据 #################################################################### out_path = Parameter().get_read_file(self.regression_exposure_name) out_file = os.path.join(out_path, 'Fund_Regression_Exposure_' + fund + '.csv') if os.path.exists(out_file): params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') params_old.index = params_old.index.map(str) params = pandas_add_row(params_old, params_new) else: params = params_new print(params) params.to_csv(out_file)
def cal_factor_liquidity(beg_date, end_date): """ 因子说明:流动性因子 LIQUIDITY LIQUIDITY_STOM 最近21个交易日的换手率总和的对数值 LIQUIDITY_STOA 最近252个交易日的换手率总和的对数值 LIQUIDITY = 0.35 * LIQUIDITY_STOM + 0.35 * LIQUIDITY_STOQ + 0.3 * LIQUIDITY_STOA LIQUIDITY 在对 SIZE 因子做回归取残差 """ # params ################################################################################## factor_name = "NORMAL_CNE5_LIQUIDITY" A = 252 beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) beg_date = # params ################################################################################## turnover_daily = Stock().get_factor_h5("TurnOver_Daily", None, 'primary_mfc').T turnover_month = turnover_daily.rolling(window=M).sum().applymap(np.log) turnover_quarter = (turnover_daily.rolling(window=Q).sum() / 3.0).applymap(np.log) turnover_yearly = (turnover_daily.rolling(window=A).sum() / 12.0).applymap(np.log) turnover_quarter = turnover_quarter.dropna(how='all').T turnover_yearly = turnover_yearly.dropna(how='all').T Stock().write_factor_h5(turnover_quarter, "RAW_CNE5_LIQUIDITY_STOQ", 'barra_risk_dfc') Stock().write_factor_h5(turnover_yearly, "RAW_CNE5_LIQUIDITY_STOA", 'barra_risk_dfc') turnover_quarter = FactorPreProcess().remove_extreme_value_mad(turnover_quarter) turnover_quarter = FactorPreProcess().standardization_free_mv(turnover_quarter) turnover_yearly = FactorPreProcess().remove_extreme_value_mad(turnover_yearly) turnover_yearly = FactorPreProcess().standardization_free_mv(turnover_yearly) Stock().write_factor_h5(turnover_quarter, "NORMAL_CNE5_LIQUIDITY_STOQ", 'barra_risk_dfc') Stock().write_factor_h5(turnover_yearly, "NORMAL_CNE5_LIQUIDITY_STOA", 'barra_risk_dfc') turnover = 0.35 * turnover_month + 0.35 * turnover_quarter + 0.3 * turnover_yearly turnover = turnover.T.dropna(how='all').T size_data = Stock().get_factor_h5("NORMAL_CNE5_SIZE", None, 'barra_risk_dfc') [size_data, turnover] = FactorPreProcess().make_same_index_columns([size_data, turnover]) turnover_res = pd.DataFrame([], index=turnover.index, columns=turnover.columns) for i_index in range(len(turnover.columns)): date = turnover.columns[i_index] print('Calculating Barra Risk factor %s at date %s' % (factor_name, date)) regression_data = pd.concat([size_data[date], turnover[date]], axis=1) regression_data.columns = ['x', 'y'] regression_data = regression_data.dropna() y = regression_data['y'].values x = regression_data['x'].values x_add = sm.add_constant(x) model = sm.OLS(y, x_add).fit() regression_data['res'] = regression_data['y'] - model.fittedvalues turnover_res[date] = regression_data['res'] turnover_res = FactorPreProcess().remove_extreme_value_mad(turnover_res) turnover_res = FactorPreProcess().standardization_free_mv(turnover_res) Stock().write_factor_h5(turnover_res, factor_name, 'barra_risk_dfc')
def weight_top10stock_holding_date(report_date): report_date = Date().change_to_str(report_date) data = Fund().get_fund_holding_report_date(report_date) data = data[['FundCode', 'Weight', 'StockCode']] pool = Fund().get_fund_pool_code(report_date, "基金持仓基准基金池") fund_code = list(set(pool)) fund_code.sort() weight = Fund().get_wind_fund_asset(report_date) for i_fund in range(len(fund_code)): fund = fund_code[i_fund] data_fund = data[data['FundCode'] == fund] data_fund = data_fund.dropna(subset=['Weight']) data_fund = data_fund.sort_values(by=['Weight'], ascending=False) try: asset = weight.ix[fund, report_date] asset /= 100000000 except: asset = 1.0 if i_fund == 0: data_fund_top10 = data_fund.iloc[:10, :] data_fund_top10["Asset_Weight"] = data_fund_top10['Weight'] * asset top10_weight = data_fund_top10['Weight'].sum() if top10_weight < 30: data_fund_top10 = pd.DataFrame([], columns=data_fund.columns) else: data_fund_top10_add = data_fund.iloc[:10, :] data_fund_top10_add[ "Asset_Weight"] = data_fund_top10_add['Weight'] * asset top10_weight = data_fund_top10_add['Weight'].sum() if top10_weight < 30: data_fund_top10_add = pd.DataFrame([], columns=data_fund.columns) data_fund_top10 = pd.concat([data_fund_top10, data_fund_top10_add], axis=0) stock_code = list(set(data_fund_top10['StockCode'].values)) stock_code.sort() weight_sum = data_fund_top10['Asset_Weight'].sum() weight_code = pd.DataFrame([], index=stock_code, columns=['Asset_Weight']) for i_stock in range(len(stock_code)): stock = stock_code[i_stock] data_stock = data_fund_top10[data_fund_top10['StockCode'] == stock] stock_weight_sum = data_stock['Asset_Weight'].sum() weight_code.ix[stock, 'Asset_Weight'] = stock_weight_sum / weight_sum weight_code.index = weight_code.index.map(lambda x: x[0:6] + '-CN') out_path = Parameter().get_read_file("Fund_Stock_Holding_BenchMark") out_path = os.path.join(out_path, "weight_quarter_top10") if not os.path.exists(out_path): os.makedirs(out_path) out_file = os.path.join(out_path, "weight_quarter_top10_" + report_date + '.csv') print(out_file) weight_code.to_csv(out_file, header=None)
def IlliquidityBias(beg_date, end_date): """ 因子说明: 涨跌幅的绝对值 / 交易额 最近10天均值 / 最近40天均值 """ # param ################################################################################# LongTerm = 40 ShortTerm = 10 HalfTerm = LongTerm / 2 factor_name = "IlliquidityBias" ipo_num = 90 # read data ################################################################################# pct = Stock().get_factor_h5("Pct_chg", None, "primary_mfc").T trade_amount = Stock().get_factor_h5("TradeAmount", None, "primary_mfc").T # data precessing ################################################################################# [pct, trade_amount] = Stock().make_same_index_columns([pct, trade_amount]) trade_amount = trade_amount.fillna(0.0) # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(pct.index)) date_series.sort() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm - 1)) trade_amount_before = trade_amount.ix[data_beg_date:current_date, :] if len(trade_amount_before) > HalfTerm: print('Calculating factor %s at date %s' % (factor_name, current_date)) zero_number = trade_amount_before.applymap( lambda x: 1.0 if x == 0.0 else 0.0).sum() code_filter_list = (zero_number[zero_number < ShortTerm]).index amount_before = trade_amount.ix[data_beg_date:current_date, code_filter_list] pct_before = pct.ix[data_beg_date:current_date, code_filter_list] illiq = pct_before.abs().div(amount_before, axis='index') * 100000000 illiq[illiq > 100.0] = np.nan illiq_bias = illiq.ix[-1 - ShortTerm:, :].mean() / illiq.mean() price_mean = pd.DataFrame(illiq_bias.values, columns=[current_date], index=illiq_bias.index) else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) price_mean = pd.DataFrame([], columns=[current_date], index=trade_amount_before.columns) if i == 0: res = price_mean else: res = pd.concat([res, price_mean], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
'*****@*****.**', '*****@*****.**', '*****@*****.**' ] acc_mail_name = [] subject_header = "指数基金风格暴露周报%s_自动发送" % last_date email = EmailSender() exposure_file = os.path.join(path, xlsx_name) email.attach_file(exposure_file) email.attach_picture_inside_body("最近交易日风格暴露" + last_date, os.path.join(path, pic_name + '.png')) email.attach_picture_inside_body( "最近半年报风格暴露" + report_date_halfyear, os.path.join(path, pic_name_halfyear + '.png')) email.send_mail_mfcteda(sender_mail_name, receivers_mail_name, acc_mail_name, subject_header) ################################################################################ if __name__ == '__main__': ################################################################################ path = 'E:\\Data\\fund_data\\fund_index_exposure_weekly\\' # Date().load_trade_date_series_all() today = datetime.today().strftime("%Y%m%d") last_date = Date().get_trade_date_offset(today, -1) report_date_halfyear = Date().get_last_fund_halfyear_date(today) print(" 最近半年报是 %s 最近一个交易日为 %s " % (report_date_halfyear, last_date)) mail_exposure(path, last_date, report_date_halfyear)
def Resistance(beg_date, end_date): """ 因子说明:股票上行阻力 国泰君安 阻力比例 resistance_ratio=resistance_num/power_num 相当于绝对阻力除以总力量,是-1至1之间数字,1为全部是向上阻力,-1为全部是向下阻力 绝对阻力 resistance_num=sum(sign(pi-p)*Vi*w1i*w2i) ,相当于多头阻力减去空头阻力 双向力量和 power_num=sum(Vi*w1i*w2i) ,相当于多头阻力加上空头阻力 其中 w1i=ln(p/abs(pi-p)),空间距离,价格距离越远作用越小 w2i=ln(1+i)/ln(1+N),时间距离,价格距离越近作用越大 pi为i日前价格,Vi为i日交易额,N为时间区间长度 """ # param ################################################################################# LongTerm = 120 MinimumSize = 96 factor_name = "Resistance" ipo_num = 90 # read data ################################################################################# price = Stock().get_factor_h5("PriceCloseAdjust", None, "alpha_dfc") trade_amount = Stock().get_factor_h5("TradeAmount", None, "primary_mfc") # data precessing ################################################################################# [trade_amount, price] = Stock().make_same_index_columns([trade_amount, price]) # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(price.columns)) date_series.sort() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm - 1)) price_before = price.ix[:, data_beg_date:current_date] price_before = price_before.T.dropna(how='all').T pct_current = price.ix[:, current_date] trade_amount_before = trade_amount.ix[:, data_beg_date:current_date] trade_amount_before = trade_amount_before.T.dropna(how='all').T if len(price_before) > MinimumSize: print('Calculating factor %s at date %s' % (factor_name, current_date)) price_sub_abs = price_before.sub(pct_current, axis='index').abs() W1 = np.log(1 / price_sub_abs.mul(1 / pct_current, axis='index')) # 要扣除价格等于当前价格情况,此情况下空间权为0 W1[np.isinf(W1)] = 0.0 N = len(price_before.columns) L = len(price_before) Weight = np.array(range(1, 1 + N)) / np.array(range(1, 1 + N)).sum() W2 = pd.DataFrame(np.tile(Weight, (L, 1)), index=price_before.index, columns=price_before.columns) TotalPower = trade_amount_before.mul(W1).mul(W2) Sign = np.sign(price_before.sub(pct_current, axis='index')) ResistancePower = Sign.mul(TotalPower) ratio = ResistancePower.sum(axis=1) / TotalPower.sum(axis=1) ratio = pd.DataFrame(ratio.values, index=ratio.index, columns=[current_date]) else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) ratio = pd.DataFrame([], columns=[current_date], index=trade_amount_before.index) if i == 0: res = ratio else: res = pd.concat([res, ratio], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def VolumeUpRatio(beg_date, end_date): """ 因子说明:1*以当日收盘价为下限 当日收盘价*1.1为上限, 过去120天的在上下限之间的天的成交额的总和占过去120天总成交额的比例 最后乘以 -1 注意:补齐nan为0,,去掉过去120天超过60天交易额为0的股票 """ # param ################################################################################# LongTerm = 120 HalfTerm = int(LongTerm / 2) PctRange = 0.1 factor_name = "VolumeUpRatio" ipo_num = 90 # read data ################################################################################# close = Stock().get_factor_h5("PriceCloseAdjust", None, "alpha_dfc").T trade_amount = Stock().get_factor_h5("TradeAmount", None, "primary_mfc").T # data precessing ################################################################################# [close, trade_amount] = Stock().make_same_index_columns([close, trade_amount]) trade_amount = trade_amount.fillna(0.0) # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(trade_amount.index) & set(date_series)) date_series.sort() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm-1)) amount_before = trade_amount.ix[data_beg_date:current_date, :] if len(amount_before) >= int(0.8*LongTerm): print('Calculating factor %s at date %s' % (factor_name, current_date)) zero_number = amount_before.applymap(lambda x: 1.0 if x == 0.0 else 0.0).sum() code_filter_list = (zero_number[zero_number < HalfTerm]).index close_low_limit = close.ix[current_date, code_filter_list] close_up_limit = close.ix[current_date, code_filter_list] * (1 + PctRange) close_before = close.ix[data_beg_date:current_date, code_filter_list] price_center = (close_before > close_low_limit) & (close_before < close_up_limit) trade_amount_filter_sum = amount_before[price_center].sum() trade_amount_sum = amount_before.sum() trade_amount_sum = pd.concat([trade_amount_filter_sum, trade_amount_sum], axis=1) trade_amount_sum.columns = ['filter_sum', 'sum'] trade_amount_sum = trade_amount_sum[trade_amount_sum['sum'] != 0.0] trade_amount_sum['ratio'] = - trade_amount_sum['filter_sum'] / trade_amount_sum['sum'] else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) trade_amount_sum = pd.DataFrame([], columns=['ratio'], index=amount_before.columns) if i == 0: res = pd.DataFrame(trade_amount_sum['ratio'].values, columns=[current_date], index=trade_amount_sum.index) else: res_add = pd.DataFrame(trade_amount_sum['ratio'].values, columns=[current_date], index=trade_amount_sum.index) res = pd.concat([res, res_add], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def risk_factor_performance(self, factor_name, stock_pool_name="AllChinaStockFilter", beg_date=None, end_date=None, period='M'): """ 计算单风险因子的因子收益率波动率、自相关性、T值大于2的比例等等 找到有定价能力的风险因子 """ exposure = self.get_risk_factor_exposure(factor_name) price = Stock().read_factor_h5("Price_Unadjust") num = Date().get_period_number_for_year(period) if beg_date is None: beg_date = exposure.columns[0] if end_date is None: end_date = exposure.columns[-1] date_series = Date().get_trade_date_series(beg_date, end_date, period) date_series = list(set(date_series) & set(exposure.columns) & set(price.columns)) date_series.sort() factor_return = pd.DataFrame([], index=date_series, columns=['因子收益率']) for i_date in range(0, len(date_series)-1): cur_date = date_series[i_date] buy_date = cur_date sell_date = date_series[i_date + 1] stock_list = Stock().get_invest_stock_pool(stock_pool_name, cur_date) stock_pct = price[sell_date] / price[buy_date] - 1.0 exposure_date = exposure[cur_date] exposure_next = exposure[sell_date] data = pd.concat([exposure_date, exposure_next], axis=1) data = data.dropna() stock_list_finally = list(set(stock_list) & set(data.index)) stock_list_finally.sort() data = data.loc[stock_list_finally, :] auto_corr = data.corr().iloc[0, 1] data = pd.concat([exposure_date, stock_pct], axis=1) stock_list_finally = list(set(stock_list) & set(data.index)) stock_list_finally.sort() data = data.loc[stock_list_finally, :] data.columns = ['x', 'y'] data = data.dropna() if len(data) > 0: print("Risk Factor %s %s %s" % (factor_name, stock_pool_name, cur_date)) y = data['y'].values x = data['x'].values x_add = sm.add_constant(x) model = sm.OLS(y, x_add).fit() factor_return_date = model.params[1] rank_corr = data.corr(method="spearman").iloc[0, 1] t_value = model.tvalues[1] r2 = model.rsquared_adj factor_return.loc[cur_date, '因子收益率'] = factor_return_date factor_return.loc[cur_date, 'IC'] = rank_corr factor_return.loc[cur_date, 'T值'] = t_value factor_return.loc[cur_date, '自相关系数'] = auto_corr factor_return.loc[cur_date, 'R2'] = r2 else: print("Risk Factor is Null %s %s %s" % (factor_name, stock_pool_name, cur_date)) factor_return = factor_return.dropna(subset=['因子收益率', 'T值']) factor_return['因子累计收益率'] = factor_return['因子收益率'].cumsum() factor_return_mean = factor_return['因子收益率'].mean() * num factor_return_std = factor_return['因子收益率'].std() * np.sqrt(num) rank_ic_mean = factor_return['IC'].mean() rank_ic_ir = rank_ic_mean / factor_return['IC'].std() * np.sqrt(num) if len(factor_return) > 0: abs_t_2_ratio = len(factor_return[factor_return['T值'].abs() > 2]) / len(factor_return) data_beg_date = factor_return.index[0] data_end_date = factor_return.index[-1] abs_t_mean = factor_return['T值'].abs().mean() auto_corr_mean = factor_return['自相关系数'].mean() r2_mean = factor_return['R2'].mean() summary = pd.DataFrame([], columns=['因子表现']) summary.loc['因子年化收益率', "因子表现"] = factor_return_mean summary.loc['因子年化波动率', "因子表现"] = factor_return_std summary.loc['IC均值', "因子表现"] = rank_ic_mean summary.loc['ICIR', "因子表现"] = rank_ic_ir summary.loc['平均R2', "因子表现"] = r2_mean summary.loc['T值绝对值大于2的比率', "因子表现"] = abs_t_2_ratio summary.loc['T值绝对值平均数', "因子表现"] = abs_t_mean summary.loc['自相关系数平均', "因子表现"] = auto_corr_mean summary.loc['期数', "因子表现"] = str(len(factor_return)) summary.loc['开始日期', "因子表现"] = data_beg_date summary.loc['结束日期', "因子表现"] = data_end_date path = os.path.join(self.factor_performance_path, stock_pool_name) if not os.path.exists(path): os.makedirs(path) file = os.path.join(path, 'Summary_%s.xlsx' % factor_name) excel = WriteExcel(file) num_format_pd = pd.DataFrame([], columns=summary.columns, index=['format']) num_format_pd.loc['format', :] = '0.00%' worksheet = excel.add_worksheet(factor_name) excel.write_pandas(summary, worksheet, begin_row_number=0, begin_col_number=1, num_format_pd=num_format_pd, color="red", fillna=True) num_format_pd = pd.DataFrame([], columns=factor_return.columns, index=['format']) num_format_pd.loc['format', :] = '0.00%' excel.write_pandas(factor_return, worksheet, begin_row_number=0, begin_col_number=4, num_format_pd=num_format_pd, color="red", fillna=True) excel.close() else: print("Risk Factor %s is Null in %s" % (factor_name, stock_pool_name))
def TestFundAlphaFactor(name, periods="Q"): # 参数 shift_name 为后置一期 ########################################################################################### # name = "FundHolderQuarter_AlphaReturnMean_480" # periods = "Q" path = r'E:\3_Data\4_fund_data\2_fund_factor' group_number = 10 # 读取基金每个季度的数据 ########################################################################################### fund_nav = Fund().get_fund_factor("Repair_Nav", None, None).T # 基金 alpha factor 因子值 ########################################################################################### file = os.path.join(path, "exposure", name + '.csv') values = pd.read_csv(file, index_col=[0], encoding='gbk') values.columns = values.columns.map(str) values.columns = values.columns.map(lambda x: Date().get_trade_date_offset(x, 0)) values = FactorPreProcess().remove_extreme_value_mad(values) values = FactorPreProcess().standardization(values) label = ["Group_" + str(x) for x in range(1, 1 + group_number)] result = pd.DataFrame([], index=values.columns, columns=label) # 回测日期 ########################################################################################### backtest_date_series = Date().get_trade_date_series("20040101", datetime.today(), periods) values_date_series = list(map(str, list(values.columns))) nav_date_series = list(map(str, list(fund_nav.columns))) date_series = list(set(backtest_date_series) & set(values_date_series) & set(nav_date_series)) date_series.sort() # 每期做截面回归 并分组 ########################################################################################### for i in range(len(date_series)-1): # 确定日期 ########################################################################################### date = date_series[i] next_date = date_series[i + 1] val = pd.DataFrame(values[date]) val_next = pd.DataFrame(values[next_date]) pct = pd.DataFrame(fund_nav[next_date] / fund_nav[date] - 1.0) # 合并数据 ########################################################################################### data = pd.concat([val, pct], axis=1) data = data.loc[~data.index.duplicated(), :] data.columns = ['val', 'pct'] data = data.dropna() data['rank_val'] = data['val'].rank() data['rank_pct'] = data['pct'].rank() if len(data) > 10: try: print(name, date) # 取得数据 ########################################################################################### x = data['val'] y = data['pct'] # 回归求取因子收益率 IC ########################################################################################### x_add = sm.add_constant(x) model = sm.OLS(y, x_add).fit() beta = model.params[1] result.loc[date, 'AlphaFactor'] = beta result.loc[date, "IC"] = data.corr().iloc[0, 1] result.loc[date, "RankIC"] = data["rank_val"].corr(data["rank_pct"]) # 按照因子值排序 求分组平均收益 ########################################################################################### data = data.sort_values(by=['val'], ascending=False) data['group'] = pd.qcut(data['val'], group_number, labels=label) gb = data.groupby(by=["group"]).mean()["pct"] result.loc[date, label] = gb mean = gb.mean() result.loc[date, "G1ExcessReturn"] = gb.loc["Group_1"] - mean result.loc[date, "G10ExcessReturn"] = gb.loc["Group_10"] - mean corr_data = pd.concat([val, val_next], axis=1) result.loc[date, "AutoCorr"] = corr_data.corr().iloc[0, 1] except Exception as e: pass ########################################################################################### else: pass ########################################################################################### result = result.dropna() # 计算累计收益率 ########################################################################################### for i_col in range(len(result.columns)): col = result.columns[i_col] result['Cum' + col] = result[col].cumsum() # 存储数据 ########################################################################################### file = name + "BackTestReturn_" + str(periods) + ".csv" file = os.path.join(path, "alpha_factor_test_result", file) result.to_csv(file)
def rank_fund(self, fund_code, rank_pool, beg_date, end_date, new_fund_date=None, excess=False): """ 计算某只基金在基金池的排名 三种排名方式 1、直接获取wind接口结果 2、自己给定基金池,从wind接口获取基金绝对收益 3、基金给定基金池,从wind接口获取基金超额收益 """ if new_fund_date is None: new_fund_date = beg_date beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) new_fund_date = Date().change_to_str(new_fund_date) print(" 正在计算基金排名 %s 在基金池 %s 从 %s 到 %s " % (fund_code, rank_pool, beg_date, end_date)) # 分类获取排名 if rank_pool == 'wind': # Wind 三级分类 date_str = "startDate=%s;endDate=%s;fundType=3" % (beg_date, end_date) data = w.wss(fund_code, "peer_fund_return_rank_per", date_str) val = str(data.Data[0][0]) data = w.wss(fund_code, "peer_fund_return_rank_prop_per", date_str) try: pct = np.round(data.Data[0][0] / 100.0, 3) except Exception as e: print(e) print("wind返回基金排名百分比非数字") pct = "None" return val, pct else: # 获取基金池 pool = FundPool().get_fund_pool_all(date="20181231", name=rank_pool) bool_series = (pool['if_connect'] == '非联接基金') & (pool['if_hk'] == '非港股基金') bool_series &= (pool['if_a'] == 'A类基金') bool_series &= (pool['if_etf'] == '非ETF基金') pool = pool[bool_series] fund_code_str = ','.join(pool.index.values) if not excess: # 区间总收益排名 data = w.wss(fund_code_str, "NAV_adj_return", "startDate=%s;endDate=%s" % (beg_date, end_date)) data = pd.DataFrame(data.Data, columns=data.Codes, index=['NAV_adj_return']).T data = data[~data.index.duplicated()] data = pd.concat([data, pool], axis=1) data = data[data["setupdate"] <= new_fund_date] data = data.dropna(subset=['NAV_adj_return']) data = data.sort_values(by='NAV_adj_return', ascending=False) data['range'] = range(len(data)) data["rank"] = data['range'].map( lambda x: str(x + 1) + "/" + str(len(data))) data['rank_pct'] = data['range'].map((lambda x: (x + 1) / len(data))) try: val = data.loc[fund_code, "rank"] pct = data.loc[fund_code, "rank_pct"] pct = np.round(pct, 3) file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date, end_date) file = os.path.join(self.data_path, file) data.to_csv(file) except Exception as e: print(e) val = "None" pct = "None" return val, pct else: # 区间超额收益排名 date_str = "startDate=%s;endDate=%s" % (beg_date, end_date) data = w.wss(fund_code_str, "NAV_over_bench_return_per", date_str) data = pd.DataFrame(data.Data, columns=data.Codes, index=['NAV_over_bench_return_per']).T data = pd.concat([data, pool], axis=1) data = data[data["setupdate"] <= new_fund_date] data = data.dropna(subset=['NAV_over_bench_return_per']) data = data.sort_values(by='NAV_over_bench_return_per', ascending=False) data['range'] = range(len(data)) data["rank"] = data['range'].map( lambda x: str(x + 1) + "/" + str(len(data))) data['rank_pct'] = data['range'].map((lambda x: (x + 1) / len(data))) try: val = data.loc[fund_code, "rank"] pct = data.loc[fund_code, "rank_pct"] pct = np.round(pct, 3) file = "%s_%s_%s_%s.csv" % (fund_code, rank_pool, beg_date, end_date) file = os.path.join(self.data_path, file) data.to_csv(file) except Exception as e: print(e) val = "None" pct = "None" return val, pct
def EMA5(beg_date, end_date): """ 因子说明: 最近5天价格平均 时间越近权重越大 这里的权重为等差数列 并非指数加权平均(即权重为等比数列) """ # param ################################################################################# LongTerm = 5 factor_name = "EMA5" ipo_num = 90 # read data ################################################################################# close = Stock().get_factor_h5("PriceCloseAdjust", None, "alpha_dfc") # data precessing ################################################################################# pass # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(close.columns)) date_series.sort() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm - 1)) price_before = close.ix[:, data_beg_date:current_date] price_stock = price_before.T.dropna(how='all') if len(price_stock) == LongTerm: print('Calculating factor %s at date %s' % (factor_name, current_date)) weight = np.array(list(range(1, LongTerm + 1))) weight = weight * 2 / (1 + LongTerm) weight = weight / weight.sum() price_mean = pd.DataFrame(np.dot(weight, price_stock.values), columns=[current_date], index=price_stock.columns) else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) price_mean = pd.DataFrame([], columns=[current_date], index=price_stock.columns) if i == 0: res = price_mean else: res = pd.concat([res, price_mean], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def cal_fund_regression_exposure_style(self, fund, beg_date, end_date, period="D"): # 参数 #################################################################### up_style_exposure = 1.25 up_position_exposure = 0.95 low_position_exposure = 0.75 position_sub = 0.08 beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) # 取得数据 因子收益率数据 和 基金涨跌幅数据 #################################################################### type_list = ['STYLE', 'COUNTRY'] barra_name = list(Barra().get_factor_name(type_list)['NAME_EN'].values) barra_return = Barra().get_factor_return(None, None, type_list) date_series = Date().get_trade_date_series(beg_date, end_date, period=period) if fund[len(fund) - 2:] == 'OF': fund_return = FundFactor().get_fund_factor("Repair_Nav_Pct", None, [fund]) else: fund_return = Index().get_index_factor(fund, attr=["PCT"]) * 100 fund_return.columns = [fund] data = pd.concat([fund_return, barra_return], axis=1) data = data.dropna() print(" Fund Code Total Len %s " % len(data)) factor_number = len(barra_name) stock_ratio = FundFactor().get_fund_factor("Stock_Ratio", None, [fund]) / 100 date_series = list(set(date_series) & set(data.index)) date_series.sort() # 循环回归计算每天的暴露 计算当天的暴露之时需要 前一天及之前数据 #################################################################### for i_date in range(0, len(date_series)): # 回归所需要的数据 #################################################################### period_end_date = date_series[i_date] period_beg_date = Date().get_trade_date_offset( period_end_date, -self.regression_period) data_end_date = Date().get_trade_date_offset(period_end_date, -0) period_date_series = Date().get_trade_date_series( period_beg_date, data_end_date) data_periods = data.ix[period_date_series, :] data_periods = data_periods.dropna() # 上个季度基金仓位 ##################################################################################### quarter_date = Date().get_last_fund_quarter_date(period_end_date) stock_ratio_fund = stock_ratio.loc[quarter_date, fund] print( "########## Calculate Regression Exposure %s %s %s %s %s %s ##########" % (fund, period_beg_date, period_end_date, quarter_date, len(data_periods), stock_ratio_fund)) if len(data_periods) > self.regression_period_min: y = data_periods.ix[:, 0].values x = data_periods.ix[:, 1:].values x_add = sm.add_constant(x) low_position_exposure = max(stock_ratio_fund - position_sub, low_position_exposure) if np.isnan(low_position_exposure): low_position_exposure = 0.75 P = 2 * np.dot(x_add.T, x_add) Q = -2 * np.dot(x_add.T, y) G_up = np.diag(np.ones(factor_number + 1)) G_low = -np.diag(np.ones(factor_number + 1)) G = np.row_stack((G_up, G_low)) h_up = np.row_stack((np.ones( (factor_number, 1)) * up_style_exposure, np.array([up_position_exposure]))) h_low = np.row_stack((np.ones( (factor_number, 1)) * up_style_exposure, np.array([-low_position_exposure]))) h = np.row_stack((h_up, h_low)) P = matrix(P) Q = matrix(Q) G = matrix(G) h = matrix(h) try: result = sol.qp(P, Q, G, h) params_add = pd.DataFrame(np.array(result['x'][1:]), columns=[period_end_date], index=barra_name).T print(params_add) except Exception as e: params_add = pd.DataFrame([], columns=[period_end_date], index=barra_name).T print(params_add) else: params_add = pd.DataFrame([], columns=[period_end_date], index=barra_name).T print(params_add) if i_date == 0: params_new = params_add else: params_new = pd.concat([params_new, params_add], axis=0) # 合并新数据 #################################################################### out_path = os.path.join(self.data_path_exposure, 'fund_regression_exposure_style') out_file = os.path.join( out_path, 'Fund_Regression_Exposure_Style_' + fund + '.csv') if os.path.exists(out_file): params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') params_old.index = params_old.index.map(str) params = FactorOperate().pandas_add_row(params_old, params_new) else: params = params_new print(params) params.to_csv(out_file)
def cal_style_position(self, beg_date, end_date, code): """ 计算一个基金或指数的风格仓位和仓位 利用OLS有约束回归 """ x_pct = self.data_return[self.index_code_list] x_pct = x_pct.dropna(how='all') y_pct = pd.DataFrame(self.data_return[code]) y_pct = y_pct.dropna() all_date_series = Date().get_trade_date_series(beg_date, end_date, period="D") y_series = Date().get_trade_date_series(y_pct.index[0], y_pct.index[-1]) date_series = list(set(y_series) & set(all_date_series)) date_series.sort() error = False for i_date in range(len(date_series)): ed_date = date_series[i_date] bg_date = Date().get_trade_date_offset(ed_date, -self.regress_length) last_date = Date().get_trade_date_offset(ed_date, -1) x_pct_period = x_pct.loc[bg_date:ed_date, :] x_pct_period = x_pct_period.T.dropna().T x_columns = x_pct_period.columns data = pd.concat([y_pct, x_pct_period], axis=1) data = data.dropna() # 如果是第一天或者上次结果错误 则开放换手率 并假定上次平均持仓 if i_date != 0: turnover_daily = self.turnover_daily old_weight = old_weight.loc[x_columns, :] old_weight = old_weight.fillna(0.0) else: n = len(x_columns) old_weight = pd.DataFrame(n * [1.0 / n], index=x_columns, columns=[last_date]) turnover_daily = 2.0 if error: n = len(x_columns) old_weight = pd.DataFrame(n * [1.0 / n], index=x_columns, columns=[last_date]) turnover_daily = 2.00 # print(error, old_weight.columns) print("## Cal Regress %s %s %s %s TurnOver %s##" % (code, bg_date, ed_date, data.shape, turnover_daily)) if len(data) >= self.regress_length_min: y = data[code].values x = data.iloc[:, 1:].values k = x.shape[1] old = old_weight.T.values[0] try: w = cvx.Variable(k) sigma = y - x * w prob = cvx.Problem(cvx.Minimize(cvx.sum_squares(sigma)), [ cvx.sum(w) == 1.0, cvx.sum(w[1:]) >= self.stock_ratio_low, cvx.sum(w[1:]) <= self.stock_ratio_up, cvx.sum(cvx.abs(w - old)) <= turnover_daily, w >= 0 ]) prob.solve() print('Solver Status : ', prob.status) params_add = pd.DataFrame(w.value, columns=[ed_date], index=x_columns) stock_sum = params_add.loc[self.index_code_list[1:], ed_date].sum() concat_data = pd.concat([params_add, old_weight], axis=1) concat_data = concat_data.dropna() turnover_real = (concat_data[last_date] - concat_data[ed_date]).abs().sum() params_add.loc['StockRatio', ed_date] = stock_sum params_add.loc['BondRatio', ed_date] = params_add.loc[ self.index_code_list[0], ed_date] params_add.loc['TurnOverDaily', ed_date] = turnover_real print(params_add.T) old_weight = params_add error = False except Exception as e: print(end_date, code, "回归失败") error = True else: print(end_date, code, "数据长度不够") error = True if i_date == 0: params_new = params_add else: params_new = pd.concat([params_new, params_add], axis=1) # 合并新数据 #################################################################### params_new = params_new.T out_file = os.path.join(self.data_path, 'RestraintOLSStylePosition_%s.csv' % code) if os.path.exists(out_file): params_old = pd.read_csv(out_file, index_col=[0], encoding='gbk') params_old.index = params_old.index.map(str) params = FactorOperate().pandas_add_row(params_old, params_new) else: params = params_new params.to_csv(out_file)
def BetaInverse(beg_date, end_date): """ 因子说明:利用回归方法计算个股Beta 但是个股收益和指数收益换过来位置 Beta_inverse = Corr * BenchMark_Std / Stock_Std 市场收益的股票平均收益 """ # param ################################################################################# LongTerm = 120 MinimumSize = 40 factor_name = "BetaInverse" ipo_num = 90 # read data ################################################################################# pct = Stock().get_factor_h5("Pct_chg", None, "primary_mfc") # data precessing ################################################################################# pass # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(pct.columns) & set(date_series)) date_series.sort() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm-1)) pct_before = pct.ix[:, data_beg_date:current_date] pct_stock = pct_before.T.dropna(how='all') pct_average = pct_stock.mean(axis=1) if len(pct_stock) > MinimumSize: print('Calculating factor %s at date %s' % (factor_name, current_date)) corr_date = pct_stock.corrwith(pct_average) std_stock = pct_stock.std() std_market = pct_average.std() beta = corr_date * std_market / std_stock effective_number = pct_stock.count() beta[effective_number <= MinimumSize] = np.nan corr_date = pd.DataFrame(corr_date.values, columns=[current_date], index=corr_date.index) else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) corr_date = pd.DataFrame([], columns=[current_date], index=pct.index) if i == 0: res = corr_date else: res = pd.concat([res, corr_date], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
buf_size = 1024 fp = open(local_file, 'rb') self.ftp.storbinary('STOR ' + file_name, fp, buf_size) def upload_folder(self, ftp_folder): """ 上传文件夹 """ print('Begin UpLoading %s ......' % ftp_folder) try: self.ftp.cwd(ftp_folder) except Exception as e: self.ftp.mkd(ftp_folder) self.ftp.cwd(ftp_folder) if __name__ == '__main__': from quant.mfc.mfc_data import MfcData from datetime import datetime date = Date().change_to_str(datetime.today()) ftp_path = os.path.join(MfcData().ftp_path, date) local_path = os.path.join(MfcData().data_path, date) if not os.path.exists(local_path): os.mkdir(local_path) ftp = MyFtp() ftp.connect()
5、中证500基金优选500AlphaIR """ # 更新基金Alpha # self = NiceStockFund() # self.update_data() # 1、主动股票型基金优选750AlphaIR fund_pool_name = "基金持仓基准基金池" benchmark_name = "基金持仓基准基金池" alpha_len = 750 alpha_column = "RegressAlphaIR" port_name = "主动股票型基金优选750AlphaIR" beg_date = "20181201" end_date = "20190404" end_last_date = Date().get_trade_date_offset(end_date, -1) bench_code = "885000.WI" style_deviate = 0.20 position_deviate = 0.02 fund_up_ratio = 0.15 turnover = 0.25 self = NiceStockFund(fund_pool_name, benchmark_name, alpha_len, alpha_column, port_name, style_deviate, position_deviate, fund_up_ratio, turnover) # self.cal_fund_factor_alldate(beg_date, end_date) # self.opt_alldate(beg_date, end_date, turnover_control=True) # self.upload_all_wind_port() # self.backtest(bench_code) # self.cal_fund_factor_date(end_date) # self.opt_date(end_date, end_last_date, turnover_control=True)
def get_all_data(self, beg_date, end_date): """ 得到数据 (这里只取年报和半年报数据在,注意区分二季报和四季报)""" share_fund = Stock().read_factor_h5("HolderTotalByFund") # 基金 share_inst = Stock().read_factor_h5("HolderTotalByInst") # 机构 share_general_corp = Stock().read_factor_h5("HolderTotalByGeneralCorp") # 一般法人 share_hf = Stock().read_factor_h5("HolderTotalByHF") # 私募 share_qfii = Stock().read_factor_h5("HolderTotalByQFII") # Qfii share_social_security = Stock().read_factor_h5("HolderTotalBySocialSecurity") # 社保 share_insurance = Stock().read_factor_h5("HolderTotalByInsurance") # 保险 halfyear_date = Date().get_last_fund_halfyear_date(end_date) date_series = Date().get_normal_date_series(Date().get_trade_date_offset(beg_date, -200), halfyear_date, "S") print(date_series) share_fund = share_fund[date_series] share_inst = share_inst[date_series] share_general_corp = share_general_corp[date_series] share_hf = share_hf[date_series] share_qfii = share_qfii[date_series] share_social_security = share_social_security[date_series] share_insurance = share_insurance[date_series] share_hk = Stock().read_factor_h5("HK2CHoldShare") / 1000000 # 陆股通 share_hk = share_hk.T.fillna(method="pad", limit=3).T share_all = Stock().read_factor_h5("Share_TotalA") / 100000000 # 全A price_unadjust = Stock().read_factor_h5("PriceCloseUnadjust") # 不复权价格 share_free = Stock().read_factor_h5("Free_FloatShare")/ 100000000 print(share_all.columns) result = pd.DataFrame([]) date_series_data = Date().get_normal_date_series(beg_date, end_date, "M") for i_date in range(len(date_series_data)): date = date_series_data[i_date] price_date = price_unadjust.columns[price_unadjust.columns <= date][-1] share_date = share_fund.columns[share_fund.columns <= date][-1] print(date, price_date, share_date) try: share_hk[price_date] except Exception as e: share_hk.loc[:, price_date] = np.nan data = pd.concat([share_fund[share_date], share_inst[share_date], share_general_corp[share_date], share_hf[share_date], share_qfii[share_date], share_social_security[share_date], share_insurance[share_date], share_hk[price_date], share_all[price_date], share_free[price_date], price_unadjust[price_date]], axis=1) data.columns = ['公募基金', '机构', '一般法人', '私募', 'QFII', '社保', '保险', '陆股通', '全A', "流通", '价格'] col = ['公募基金', '机构', '一般法人', '私募', 'QFII', '社保', '保险', '陆股通', '全A', "流通"] data_mv = data[col].mul(data['价格'], axis='index') data_mv_sum = pd.DataFrame(data_mv.sum()) data_mv_sum.columns = [date] data_mv_sum = data_mv_sum.T result = pd.concat([result, data_mv_sum], axis=0) result["总和"] = result[['公募基金', 'QFII', '社保', '保险', '陆股通']].sum(axis=1) result["总和(剔除保险)"] = result[['公募基金', 'QFII', '社保', '陆股通']].sum(axis=1) ratio = result.div(result['全A'], axis='index') ratio_free = result.div(result['流通'], axis='index') return result, ratio, ratio_free
def generate_excel(self, end_date): """ 陆股通信息 输出到Excel """ beg_date = Date().get_trade_date_offset(end_date, -60) beg_1m_date = Date().get_trade_date_offset(end_date, -20) beg_1m_date = "20190201" # 一段时间内增减持额时间序列 from quant.stock.index import Index index_data = Index().get_index_factor(index_code="000300.SH") date_series = Date().get_trade_date_series(beg_date, end_date, "W") result = pd.DataFrame([]) ed_date = end_date for i in range(len(date_series)-1): bg_date = Date().get_trade_date_offset(ed_date, -5) print("Hk Inflow Period %s %s" % (bg_date, ed_date)) result_add = self.hk_inflow_period(bg_date, ed_date) result_add.loc[ed_date, '沪深300'] = index_data.loc[ed_date, "CLOSE"] result = pd.concat([result, result_add], axis=0) ed_date = bg_date result = result.sort_index() # 最近一个月平均持股金额最大、最小的几个股票 stock = self.hk_inflow_period_stock(beg_1m_date, end_date) # 最近一个月平均持股金额行业排序 industry = self.hk_inflow_period_industry(beg_1m_date, end_date) # 数据存贮位置 sub_path = os.path.join(self.data_path, end_date) if not os.path.exists(sub_path): os.makedirs(sub_path) # 信息写入文件 filename = os.path.join(sub_path, '融资融券情况春节后.xlsx') print(filename) excel = WriteExcel(filename) sheet_name = "融资融券情况" worksheet = excel.add_worksheet(sheet_name) num_format_pd = pd.DataFrame([], columns=result.columns, index=['format']) num_format_pd.loc['format', :] = '0.00' num_format_pd.loc['format', "净流入占比"] = '0.00%' excel.write_pandas(result, worksheet, begin_row_number=0, begin_col_number=1, num_format_pd=num_format_pd, color="orange", fillna=True) excel.chart_columns_plot(worksheet, sheet_name=sheet_name, series_name=["净流入金额", '沪深300'], chart_name="最近3个月每周融资余额净买入金额(亿元)", insert_pos="I15", cat_beg="B2", cat_end="B13", val_beg_list=["F2", "H2"], val_end_list=["F13", "H13"]) num_format_pd = pd.DataFrame([], columns=stock.columns, index=['format']) num_format_pd.loc['format', :] = '0.00' num_format_pd.loc['format', "净流入占比"] = '0.00%' excel.write_pandas(stock, worksheet, begin_row_number=0, begin_col_number=8, num_format_pd=num_format_pd, color="orange", fillna=True) num_format_pd = pd.DataFrame([], columns=industry.columns, index=['format']) num_format_pd.loc['format', :] = '0.00' num_format_pd.loc['format', "净流入占比"] = '0.00%' excel.write_pandas(industry, worksheet, begin_row_number=0, begin_col_number=15, num_format_pd=num_format_pd, color="orange", fillna=True) excel.chart_columns_plot(worksheet, sheet_name=sheet_name, series_name=["净流入占比", '净流入金额'], chart_name="行业最近1月融资净买入金额", insert_pos="I32", cat_beg="P2", cat_end="P30", val_beg_list=["U2", "S2"], val_end_list=["U30", "S30"]) excel.close()
def cal_factor_barra_hsigma(self, beg_date, end_date): """ 股票收益率和市场收益率回归之后的残差收益率标准差 (残差收益率的beta中计算过了) 需要 Beta Size 因子做回归取残差 """ term = 252 half_life = 62 min_periods = 20 res_pct = self.get_risk_factor_exposure("cne5_raw_beta_res_pct").T size_data = self.get_risk_factor_exposure("cne5_normal_size") beta_data = self.get_risk_factor_exposure("cne5_normal_beta") date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list( set(res_pct.index) & set(date_series) & set(size_data.columns) & set(beta_data.columns)) date_series.sort() res = pd.DataFrame([]) for i in range(0, len(date_series)): date = date_series[i] print('Calculating Barra Risk factor %s at date %s' % (self.factor_name, date)) data_beg_date = Date().get_trade_date_offset(date, -(term - 1)) data_pre = res_pct.ix[data_beg_date:date, :] data_pre = data_pre.dropna(how='all') data_std = data_pre.ewm(halflife=half_life, min_periods=min_periods).std().loc[date, :] data_date = pd.DataFrame(data_std) * np.sqrt(250) data_date.columns = [date] regression_data = pd.concat( [size_data[date], beta_data[date], data_date], axis=1) regression_data.columns = ['sise', 'beta', 'y'] regression_data = regression_data.dropna() if len(regression_data) > 0: y = regression_data['y'].values x = regression_data[['sise', 'beta']].values x_add = sm.add_constant(x) model = sm.OLS(y, x_add).fit() regression_data[ 'res'] = regression_data['y'] - model.fittedvalues res_data_date = pd.DataFrame(regression_data['res']) res_data_date.columns = [date] res = pd.concat([res, res_data_date], axis=1) res = res.T.dropna(how='all').T if len(res) != 0: self.save_risk_factor_exposure(res, self.raw_factor_name_hsigma) res = Stock().remove_extreme_value_mad(res) res = Stock().standardization(res) self.save_risk_factor_exposure(res, self.factor_name_hsigma) else: print("The Result Risk factor %s from date %s to %s" % (self.factor_name, beg_date, end_date))