def get_wind_file(self): """ 得到wind权重 """ fund_index = FundRegressionExposure( self.port_name).get_fund_regression_exposure(self.fund_index_code) fund_index = fund_index.dropna(how='all') fund_index = fund_index.T date_series = Date().get_trade_date_series(fund_index.columns[0], fund_index.columns[-1], "W") date_series = list(set(date_series) & set(fund_index.columns)) date_series.sort() sub_path = os.path.join(self.wind_port_path, self.port_name) if not os.path.exists(sub_path): os.makedirs(sub_path) for i_date in range(len(date_series)): date = date_series[i_date] print("Generate File %s" % date) data_date = pd.DataFrame(fund_index[date]) next_date = Date().get_trade_date_offset(date, 1) data_date.columns = ['Weight'] data_date.index.name = 'Code' data_date["CreditTrading"] = "No" data_date["Date"] = next_date data_date["Price"] = 0.0 data_date["Direction"] = "Long" file = os.path.join(sub_path, '%s_%s.csv' % (self.port_name, next_date)) data_date.to_csv(file)
def cal_factor_barra_cumulative_range(self, beg_date, end_date): """ 过去1-12月最大累计收益 和最小累计收益的差 """ # param t = 12 month_days = 21 pct_chg = Stock().read_factor_h5("Pct_chg").applymap( lambda x: np.log(x / 100 + 1)).T date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(pct_chg.columns)) date_series.sort() pct_chg_panel = pd.Panel() for i in range(t): length = month_days * (i + 1) pct_chg_sum = pct_chg.rolling(length).sum() pct_chg_sum = pct_chg_sum.dropna(how='all') pct_chg_panel = pd.concat([pct_chg_panel, pct_chg_sum], axis=0) pct_max = pct_chg_panel.max(axis=0) pct_max = pct_max.applymap(lambda x: np.log(x + 1)).T pct_min = pct_chg_panel.min(axis=0) pct_min = pct_min.applymap(lambda x: np.log(x + 1)).T res = pct_max.sub(pct_min) self.save_risk_factor_exposure(res, self.raw_factor_name_range) res = Stock().remove_extreme_value_mad(res) res = Stock().standardization(res) self.save_risk_factor_exposure(res, self.factor_name_range)
def generate_patch_file(self, factor_name, beg_date, end_date): """ 将因子生成邮件 patch 格式 """ data = self.get_risk_factor_exposure(factor_name) data = data.loc[:, beg_date:end_date] date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(data.columns)) date_series.sort() path = os.path.join(self.exposure_txt_path, factor_name) if os.path.exists(path): shutil.rmtree(path) for date in date_series: data_date = pd.DataFrame(data[date]) data_date.columns = [factor_name] data_date = data_date.dropna() data_date = data_date.round(6) if len(data_date) > 0: print("Patch Txt File %s %s" % (factor_name, date)) if not os.path.exists(path): os.makedirs(path) file = os.path.join(path, "%s_%s.txt" % (factor_name, date)) PandasToTxt().to_txt(data_date, file)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ long_term = 35 short_term = 5 # read data ltg = Stock().read_factor_h5("ExpectedNetProfitYoY").T date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(ltg.index)) date_series.sort() result = pd.DataFrame() for i in range(0, len(date_series)): current_date = date_series[i] long_beg_date = Date().get_trade_date_offset( current_date, -(long_term - 1)) short_beg_date = Date().get_trade_date_offset( current_date, -(short_term - 1)) long_mean = ltg.loc[long_beg_date:current_date, :].mean() short_mean = ltg.loc[short_beg_date:current_date, :].mean() bias = short_mean - long_mean std = ltg.loc[long_beg_date:short_beg_date, :].std() res_add = pd.DataFrame(bias / (1 + std)) res_add.columns = [current_date] result = pd.concat([result, res_add], axis=1) # save data ltg = ltg.T.dropna(how='all').T self.save_alpha_factor_exposure(ltg, self.raw_factor_name)
def TurnOverBias6m3m(beg_date, end_date): """ 因子说明:160天平均换手率 - 60天平均换手率 函数名有错误 以后可以更改 """ # param ################################################################################# LongTerm = 120 ShortTerm = 60 factor_name = "TurnOverBias6m3m" ipo_num = 90 # read data ################################################################################# turn_over = Stock().get_factor_h5("TurnOver_Daily", None, "primary_mfc").T # code set & date set ################################################################################# pass # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(turn_over.index) & set(date_series)) date_series.sort() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date_long = Date().get_trade_date_offset(current_date, -(LongTerm-1)) data_beg_date_short = Date().get_trade_date_offset(current_date, -(ShortTerm-1)) turn_over_long = turn_over.ix[data_beg_date_long:current_date, :] turn_over_long = turn_over_long.T.dropna(how='all').T turn_over_short = turn_over.ix[data_beg_date_short:current_date, :] turn_over_short = turn_over_short.T.dropna(how='all').T if len(turn_over_long) >= int(0.8*LongTerm): print('Calculating factor %s at date %s' % (factor_name, current_date)) turn_over_diff = turn_over_long.mean() - turn_over_short.mean() else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) turn_over_diff = pd.DataFrame([], columns=[current_date], index=turn_over.columns) if i == 0: res = pd.DataFrame(turn_over_diff.values, columns=[current_date], index=turn_over_diff.index) else: res_add = pd.DataFrame(turn_over_diff.values, columns=[current_date], index=turn_over_diff.index) res = pd.concat([res, res_add], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def cal_factor_exposure(self, beg_date=None, end_date=None): """ 计算因子暴露 """ # read data size_data = self.get_risk_factor_exposure("cne5_normal_size") square_size_data = size_data**3 date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(size_data.columns)) date_series.sort() res_data = pd.DataFrame([]) # calculate everyday for i_date in range(len(date_series)): date = date_series[i_date] print('Calculating Barra Risk factor %s at date %s' % (self.factor_name, date)) regression_data = pd.concat( [size_data[date], square_size_data[date]], axis=1) regression_data.columns = ['x', 'y'] regression_data = regression_data.dropna() y = regression_data['y'].values x = regression_data['x'].values x_add = sm.add_constant(x) model = sm.OLS(y, x_add).fit() regression_data['res'] = regression_data['y'] - model.fittedvalues res_data_date = pd.DataFrame(regression_data['res']) res_data_date.columns = [date] res_data = pd.concat([res_data, res_data_date], axis=1) res_data = res_data.T.dropna(how='all').T res_data = FactorPreProcess().remove_extreme_value_mad(res_data) res_data = FactorPreProcess().standardization(res_data) self.save_risk_factor_exposure(res_data, self.factor_name)
def change_quarter_to_daily_with_disclosure_date(data, report_data, beg_date=None, end_date=None): """ 将季度数据转化为日度数据 按照披露季报时间 """ data = data.dropna(how='all') report_data = report_data.dropna(how='all') if beg_date is None: beg_date = data.columns[0] if end_date is None: end_date = datetime.today() date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(report_data.columns) & set(date_series)) date_series.sort() for i_date in range(len(date_series)): date_daily = date_series[i_date] report_data_val = report_data[date_daily] report_data_val = report_data_val.dropna() report_date_list = list(set(list(report_data_val.values))) print("Calculate Daily Data at %s with %s " % (date_daily, report_date_list)) for i_set in range(len(report_date_list)): report_date_number = report_date_list[i_set] report_date = str(int(report_date_number)) stock_index = list( (report_data_val[report_data_val == report_date_number] ).index.values) stock_index = list(set(stock_index) & set(data.index)) stock_index.sort() try: data_ttm = data.ix[stock_index, report_date] data_ttm = pd.DataFrame(data_ttm.values, columns=[date_daily], index=data_ttm.index) except Exception as e: data_ttm = pd.DataFrame([], columns=[date_daily]) if i_set == 0: res = data_ttm else: res_add = data_ttm res = pd.concat([res, res_add], axis=0) res = res.loc[~res.index.duplicated(keep='first'), :] index_sort = list(set(res.index)) index_sort.sort() res = res.loc[index_sort, :] if i_date == 0: result = res else: result = pd.concat([result, res], axis=1) return result
def lasso_fund_pool(): fund_holder = Fund().get_fund_holding_all() position_all = Fund().get_fund_factor("Stock_Ratio", date_list=["20180331"], fund_pool=None) code_list = list(code_list['wind_code'].values) date_list = Date().get_normal_date_series(beg_date="20041231", end_date=datetime.today(), period="Q") code_list.sort() date_list.sort() result = pd.DataFrame([], index=code_list, columns=date_list) for i_date in range(len(date_list)): for i_fund in range(len(code_list)): fund_code = code_list[i_fund] date = date_list[i_date] holder = fund_holder[fund_holder.FundCode == fund_code] holder = holder[holder.Date == date] holder = holder.sort_values(by=['Weight'], ascending=False) holder = holder.reset_index(drop=True) if len(holder) >= 10: holder = holder.ix[0:10, :] result.ix[fund_code, date] = holder.Weight.sum() print("计算 %s 在 %s 的前10大重仓股票为 %s" % (fund_code, date, holder.Weight.sum())) result.to_csv(path + '')
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # read data pe_ttm = Stock().read_factor_h5("PE_ttm") # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(pe_ttm.columns) & set(date_series)) date_series.sort() res = pd.DataFrame() for i in range(0, len(date_series)): current_date = date_series[i] print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) data_cur = pe_ttm[current_date] data_cur = data_cur[data_cur != 0.0] ep_ttm = 1.0 / data_cur ep_ttm = pd.DataFrame(ep_ttm.values, columns=[current_date], index=ep_ttm.index) res = pd.concat([res, ep_ttm], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # param long_term = 20 effective_term = int(long_term / 2) extreme_value = 80 # read data pct = Stock().read_factor_h5("Pct_chg").T trade_amount = Stock().read_factor_h5("TradeAmount").T / 100000000 # data precessing [pct, trade_amount] = Stock().make_same_index_columns([pct, trade_amount]) trade_amount = trade_amount.fillna(0.0) # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(pct.index)) date_series.sort() res = pd.DataFrame() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset( current_date, -(long_term - 1)) trade_amount_before = trade_amount.loc[ data_beg_date:current_date, :] if len(trade_amount_before) > effective_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) zero_number = trade_amount_before.applymap( lambda x: 1.0 if x == 0.0 else 0.0).sum() code_filter_list = ( zero_number[zero_number < effective_term]).index amount_before = trade_amount.loc[data_beg_date:current_date, code_filter_list] pct_before = pct.loc[data_beg_date:current_date, code_filter_list] iq = pct_before.abs().div(amount_before) iq[iq > extreme_value] = np.nan bias = iq.mean() bias = pd.DataFrame(bias) bias.columns = [current_date] else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) bias = pd.DataFrame([], columns=[current_date], index=trade_amount_before.columns) res = pd.concat([res, bias], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def VolumeMean20d(beg_date, end_date): """ 因子说明:过去20个交易日的平均交易额 """ # param ################################################################################# LongTerm = 20 factor_name = "VolumeMean20d" ipo_num = 90 # read data ################################################################################# trade_amount = Stock().get_factor_h5("TradeAmount", None, "primary_mfc") # data precessing ################################################################################# pass # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(trade_amount.columns) & set(date_series)) date_series.sort() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm - 1)) trade_amount_before = trade_amount.ix[:, data_beg_date:current_date] if current_date in trade_amount.columns: print('Calculating factor %s at date %s' % (factor_name, current_date)) avg_trade_amount = trade_amount_before.mean(axis=1) avg_trade_amount = pd.DataFrame(avg_trade_amount.values, columns=[current_date], index=avg_trade_amount.index) else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) avg_trade_amount = pd.DataFrame([], columns=[current_date], index=trade_amount.index) if i == 0: res = avg_trade_amount else: res = pd.concat([res, avg_trade_amount], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def THSBias(beg_date, end_date): """ 因子说明: 最近10天平均 减去 之前30天平均 同花顺点击数量 的负值 """ # param ################################################################################# LongTerm = 40 HalfTerm = int(LongTerm/2) factor_name = "THSBias" ipo_num = 90 # read data ################################################################################# click_num = Stock().get_factor_h5("click_num", None, "primary_mfc") # data precessing ################################################################################# pass # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(click_num.columns)) date_series.sort() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm-1)) data_period = click_num.ix[:, data_beg_date:current_date] data_period = data_period.T.dropna(how='all') if len(data_period) > HalfTerm: print('Calculating factor %s at date %s' % (factor_name, current_date)) data_date_pre30 = data_period.ix[0:30, :].mean() data_date_next10 = data_period.ix[-10:, :].mean() data_date = -(data_date_next10 - data_date_pre30) effective_number = data_period.count() data_date[effective_number <= HalfTerm] = np.nan data_date = pd.DataFrame(data_date.values, columns=[current_date], index=data_date.index) else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) data_date = pd.DataFrame([], columns=[current_date], index=click_num.index) if i == 0: res = data_date else: res = pd.concat([res, data_date], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def Skewness(beg_date, end_date): """ 因子说明: -1 * 偏度 """ # param ################################################################################# LongTerm = 150 MinimumSize = 120 factor_name = "Skewness" ipo_num = 90 # read data ################################################################################# pct = Stock().get_factor_h5("Pct_chg", None, "primary_mfc") # data precessing ################################################################################# pass # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(pct.columns) & set(date_series)) date_series.sort() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(LongTerm-1)) pct_before = pct.ix[:, data_beg_date:current_date] pct_stock = pct_before.T.dropna(how='all') if len(pct_stock) > MinimumSize: print('Calculating factor %s at date %s' % (factor_name, current_date)) skew_date = -pct_stock.skew() effective_number = pct_stock.count() skew_date[effective_number <= MinimumSize] = np.nan skew_date = pd.DataFrame(skew_date.values, columns=[current_date], index=skew_date.index) else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) skew_date = pd.DataFrame([], columns=[current_date], index=pct.index) if i == 0: res = skew_date else: res = pd.concat([res, skew_date], axis=1) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def cal_factor_exposure(self, beg_date, end_date): """ 残差波动率因子加和(考虑有其中几个数据缺失该怎么办) """ self.cal_factor_barra_std(beg_date, end_date) self.cal_factor_barra_cumulative_range(beg_date, end_date) self.cal_factor_barra_hsigma(beg_date, end_date) dastd = 0.74 * self.get_risk_factor_exposure("cne5_normal_res_vol_std") cr = 0.16 * self.get_risk_factor_exposure( "cne5_normal_res_vol_cumulative_range") hsigma = 0.10 * self.get_risk_factor_exposure( "cne5_normal_res_vol_hsigma") size_data = self.get_risk_factor_exposure("cne5_normal_size") beta_data = self.get_risk_factor_exposure("cne5_normal_beta") residual_volatility = dastd.add(cr, fill_value=0.0) residual_volatility = residual_volatility.add(hsigma, fill_value=0.0) residual_volatility = residual_volatility.T.dropna(how='all').T date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list( set(size_data.columns) & set(residual_volatility.columns) & set(beta_data.columns) & set(date_series)) date_series.sort() residual_volatility_res = pd.DataFrame([]) for i_date in range(len(date_series)): date = date_series[i_date] print('Calculating Barra Risk factor %s at date %s' % (self.factor_name, date)) regression_data = pd.concat( [size_data[date], beta_data[date], residual_volatility[date]], axis=1) regression_data.columns = ['size', 'beta', 'residual_volatility'] regression_data = regression_data.dropna() if len(regression_data) > 0: y = regression_data['residual_volatility'].values x = regression_data[['size', 'beta']].values x_add = sm.add_constant(x) model = sm.OLS(y, x_add).fit() regression_data['res'] = regression_data[ 'residual_volatility'] - model.fittedvalues res_date = pd.DataFrame(regression_data['res']) res_date.columns = [date] residual_volatility_res = pd.concat( [residual_volatility_res, res_date], axis=1) # save data res = Stock().remove_extreme_value_mad(residual_volatility_res) res = Stock().standardization(res) self.save_risk_factor_exposure(res, self.factor_name)
def wind_file(self): """ 一般因子不做行业和风格回归 但是限制每个行业不能太多 """ date_series = Date().get_trade_date_series(self.alpha_data.columns[0], self.alpha_data.columns[-1], "M") date_series = list( set(date_series) & set(self.free_mv.columns) & set(self.alpha_data.columns)) date_series.sort() for i_date in range(len(date_series)): date = date_series[i_date] print(date) alpha_date = pd.DataFrame(self.alpha_data[date]) alpha_date.columns = ['Alpha'] mv_date = pd.DataFrame(self.free_mv[date]) mv_date.columns = ['FreeMV'] mv_date['FreeMV'] = mv_date['FreeMV'].map(np.sqrt) data = pd.concat([alpha_date, mv_date], axis=1) data = data.dropna() # 去掉流通市值小的股票 data = data.sort_values(by=['FreeMV'], ascending=False) data = data.iloc[0:int(len(data) * 0.60), :] data = data.sort_values(by=['Alpha'], ascending=False) sub_path = os.path.join(self.wind_port_path, self.port_name) if not os.path.exists(sub_path): os.makedirs(sub_path) if len(data) > 150: l = int(len(data) / 10) data = data.iloc[0:l, :] date = date_series[i_date] print("Generate File %s" % date, len(data)) next_date = Date().get_trade_date_offset(date, 1) data['Weight'] = data['FreeMV'] / data['FreeMV'].sum() data.index.name = 'Code' data["CreditTrading"] = "No" data["Date"] = next_date data["Price"] = 0.0 data["Direction"] = "Long" file = os.path.join(sub_path, '%s_%s.csv' % (self.port_name, next_date)) data.to_csv(file)
def ReturnBetweendayLn(beg_date, end_date): """ 因子说明:日间收益率 的对数 今天开盘 / 昨日收盘价 权重为线性加权 """ # param ################################################################################# factor_name = 'ReturnBetweendayLn' ipo_num = 90 # read data ################################################################################# close = Stock().get_factor_h5("PriceCloseAdjust", None, "alpha_dfc") open = Stock().get_factor_h5("PriceOpenAdjust", None, "alpha_dfc") # data precessing ################################################################################# [close, open] = Stock().make_same_index_columns([close, open]) # calculate data daily ################################################################################# date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(close.columns)) date_series.sort() res = pd.DataFrame([], columns=date_series, index=close.index) for i in range(1, len(date_series)): current_date = date_series[i] before_date = Date().get_trade_date_offset(current_date, -1) if current_date in close.columns: print('Calculating factor %s at date %s' % (factor_name, current_date)) close_yes = close[before_date] open_today = open[current_date] data_date = (open_today / close_yes).map(np.log) * 100 res[current_date] = data_date else: print('Calculating factor %s at date %s is null' % (factor_name, current_date)) res = res.T.dropna(how='all').T # save data ############################################################################# Stock().write_factor_h5(res, factor_name, "alpha_dfc") return res
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # param term = 10 effective_term = int(0.8 * term) # read data inflow = Stock().read_factor_h5("Mf_Inflow") price_unadjust = Stock().read_factor_h5("Price_Unadjust") free_share = Stock().read_factor_h5("Free_FloatShare") # calculate data [price_unadjust, free_share] = Stock().make_same_index_columns([price_unadjust, free_share]) free_mv = price_unadjust.mul(free_share) [inflow, free_mv] = Stock().make_same_index_columns([inflow, free_mv]) inflow = inflow.T free_mv = free_mv.T # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(inflow.index)) date_series.sort() res = pd.DataFrame() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(term - 1)) inflow_pre = inflow.loc[data_beg_date:current_date, :] free_mv_pre = free_mv.loc[data_beg_date:current_date, :] if len(inflow_pre) >= effective_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) inflow_pre_sum = inflow_pre.sum() free_mv_pre_sum = free_mv_pre.sum() date_data = pd.concat([inflow_pre_sum, free_mv_pre_sum], axis=1) date_data.columns = ['inflow', 'free_mv'] date_data = date_data[date_data['free_mv'] != 0.0] date_data['ratio'] = date_data['inflow'] / date_data['free_mv'] date_data = pd.DataFrame(date_data['ratio']) * 100 date_data.columns = [current_date] else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) date_data = pd.DataFrame([], columns=[current_date], index=free_mv.columns) res = pd.concat([res, date_data], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # params long_term = 40 short_term = int(long_term * 0.5) min_term = int(long_term * 0.8) # read data trade_amount = Stock().read_factor_h5("TradeAmount").T trade_amount = trade_amount.dropna(how='all') # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(trade_amount.index) & set(date_series)) date_series.sort() res = pd.DataFrame([]) for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset( current_date, -(long_term - 1)) amount_before = trade_amount.loc[data_beg_date:current_date, :] amount_before = amount_before.fillna(0.0) if len(amount_before) >= min_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) zero_number = amount_before.applymap( lambda x: 1.0 if x == 0.0 else 0.0).sum() code_filter_list = ( zero_number[zero_number < short_term]).index amount_pre = trade_amount.loc[data_beg_date:current_date, code_filter_list] amount_pre_cv = -amount_pre.std() / amount_pre.mean() amount_pre_cv = pd.DataFrame(amount_pre_cv) amount_pre_cv.columns = [current_date] else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) amount_pre_cv = pd.DataFrame([], columns=[current_date], index=trade_amount.columns) res = pd.concat([res, amount_pre_cv], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_return_all(self, beg_date, end_date, risk_model_name, period='D'): """ 计算每天的因子收益率、因子暴露、股票的残差收益率 """ self.set_model_name(risk_model_name) self.get_data_all(risk_model_name) date_series = Date().get_trade_date_series(beg_date, end_date, period) date_series = list( set(date_series) & set(self.pct_chg.columns) & set(self.free_mv_data.columns) & set(self.trade_status.columns) & set(self.industry.columns)) date_series.sort() factor_return = pd.DataFrame() res_return = pd.DataFrame() # 因子暴露文件 for i_date in range(len(date_series)): date = date_series[i_date] before_date = Date().get_trade_date_offset(date, -1) factor_return_date, exposure_before_date, res_return_date = self.cal_return_date( date) exposure_file = os.path.join(self.exposure_path, "exposure_%s.csv" % before_date) exposure_before_date.to_csv(exposure_file) factor_return = pd.concat([factor_return, factor_return_date], axis=1) res_return = pd.concat([res_return, res_return_date], axis=1) # 因子收益率文件 factor_return = factor_return.T factor_return_file = os.path.join(self.factor_return_path, "factor_return.csv") if os.path.exists(factor_return_file): old_data = self.get_factor_return() factor_return = Stock().pandas_add_row(old_data, factor_return) factor_return.to_csv(factor_return_file) factor_return_cum = factor_return.cumsum() factor_return_file = os.path.join(self.factor_return_path, "factor_return_cum.csv") factor_return_cum.to_csv(factor_return_file) # 股票残差率文件 res_return = res_return.T res_return_file = os.path.join(self.res_return_path, "stock_residual_return.csv") if os.path.exists(res_return_file): old_data = self.get_stock_residual_return() res_return = Stock().pandas_add_row(old_data, res_return) res_return.to_csv(res_return_file)
def cal_factor_exposure(self, beg_date, end_date): """ 流动性因子 LIQUIDITY LIQUIDITY = 0.35 * LIQUIDITY_STOM + 0.35 * LIQUIDITY_STOQ + 0.3 * LIQUIDITY_STOA LIQUIDITY 在对 SIZE 因子做回归取残差 """ # params self.cal_factor_liquidity_month() self.cal_factor_liquidity_quarter() self.cal_factor_liquidity_yearly() # calculate turnover_month = 0.35 * self.get_risk_factor_exposure(self.factor_name_month) turnover_quarter = 0.35 * self.get_risk_factor_exposure(self.factor_name_quarter) turnover_yearly = 0.30 * self.get_risk_factor_exposure(self.factor_name_yearly) liquidity = turnover_month.add(turnover_quarter, fill_value=0.0) liquidity = liquidity.add(turnover_yearly, fill_value=0.0) liquidity = liquidity.T.dropna(how='all').T # get res of regression size_data = self.get_risk_factor_exposure("cne5_normal_size") [size_data, liquidity] = FactorPreProcess().make_same_index_columns([size_data, liquidity]) date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(liquidity.columns)) date_series.sort() turnover_res = pd.DataFrame([]) for i_date in range(len(date_series)): date = date_series[i_date] print('Calculating Barra Risk factor %s at date %s' % (self.factor_name, date)) regression_data = pd.concat([size_data[date], liquidity[date]], axis=1) regression_data.columns = ['x', 'y'] regression_data = regression_data.dropna() y = regression_data['y'].values x = regression_data['x'].values x_add = sm.add_constant(x) model = sm.OLS(y, x_add).fit() regression_data['res'] = regression_data['y'] - model.fittedvalues res_date = pd.DataFrame(regression_data['res']) res_date.columns = [date] turnover_res = pd.concat([turnover_res, res_date], axis=1) turnover_res = FactorPreProcess().remove_extreme_value_mad(turnover_res) turnover_res = FactorPreProcess().standardization(turnover_res) self.save_risk_factor_exposure(turnover_res, self.factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # param term = 28 effective_term = int(term * 0.8) # data pct = Stock().read_factor_h5("Pct_chg") # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(pct.columns)) date_series.sort() res = pd.DataFrame() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset( current_date, -(term - 1)) data_period = pct.ix[:, data_beg_date:current_date] data_period = data_period.T.dropna(how='all') if len(data_period) > effective_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) data_positive = data_period[data_period > 0.0].mean() data_negative = -data_period[data_period <= 0.0].mean() data_sum = data_positive + data_negative code_list = data_sum[data_sum != 0.0].index data_date = data_positive[code_list] / data_sum[code_list] effective_number = data_period.count() data_date[effective_number <= effective_term] = np.nan data_date = -pd.DataFrame(data_date.values, columns=[current_date], index=data_date.index) else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) data_date = pd.DataFrame([], columns=[current_date], index=pct.index) res = pd.concat([res, data_date], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # param long_term = 60 short_term = 20 effective_term = int(0.8 * long_term) # read data turn_over = Stock().read_factor_h5("TurnOver_Daily").T # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(turn_over.index) & set(date_series)) date_series.sort() res = pd.DataFrame() for i in range(0, len(date_series)): current_date = date_series[i] beg_date_long = Date().get_trade_date_offset( current_date, -(long_term - 1)) beg_date_short = Date().get_trade_date_offset( current_date, -(short_term - 1)) to_long = turn_over.loc[beg_date_long:current_date, :] to_long = to_long.T.dropna(how='all').T to_short = turn_over.loc[beg_date_short:current_date, :] to_short = to_short.T.dropna(how='all').T if len(to_long) >= effective_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) turn_over_diff = to_long.mean() - to_short.mean() turn_over_diff = pd.DataFrame(turn_over_diff) turn_over_diff.columns = [current_date] else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) turn_over_diff = pd.DataFrame([], columns=[current_date], index=turn_over.columns) res = pd.concat([res, turn_over_diff], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def stock_ratio_10(beg_date, end_date): factor_name = "Stock_Ratio_10" fund_holder = Fund().get_fund_holding_all() quarter_date = Date().get_last_fund_quarter_date(end_date) position_all = Fund().get_fund_factor("Stock_Ratio", date_list=[quarter_date], fund_pool=None).T position_all.columns = ['Stock_Weight'] position_all = position_all[position_all['Stock_Weight'] > 65] code_list = list(position_all.index) date_list = Date().get_normal_date_series(beg_date=beg_date, end_date=end_date, period="Q") code_list.sort() date_list.sort() new_data = pd.DataFrame([], index=code_list, columns=date_list) for i_date in range(len(date_list)): for i_fund in range(len(code_list)): fund_code = code_list[i_fund] date = date_list[i_date] holder = fund_holder[fund_holder.FundCode == fund_code] holder = holder[holder.Date == date] holder = holder.sort_values(by=['Weight'], ascending=False) holder = holder.reset_index(drop=True) if len(holder) >= 10: holder = holder.ix[0:10, :] new_data.ix[fund_code, date] = holder.Weight.sum() print("计算 %s 在 %s 的前10大重仓股票为 %s" % (fund_code, date, holder.Weight.sum())) out_file = Parameter().get_read_file(factor_name) if os.path.exists(out_file): data = pd.read_csv(out_file, encoding='gbk', index_col=[0]) data.index = data.index.map(str) data = pandas_add_row(data, new_data) else: print(" File No Exist ", factor_name) data = new_data data.to_csv(out_file)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # param term = 40 before_term = 30 next_term = 10 effective_term = int(term / 2) # read data click_num = Stock().read_factor_h5("click_num") # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(click_num.columns)) date_series.sort() res = pd.DataFrame() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset( current_date, -(term - 1)) data_period = click_num.loc[:, data_beg_date:current_date] data_period = data_period.T.dropna(how='all') if len(data_period) > effective_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) data_date_pre30 = data_period.iloc[0:before_term, :].mean() data_date_next10 = data_period.iloc[-next_term:, :].mean() data_date = -(data_date_next10 - data_date_pre30) effective_number = data_period.count() data_date[effective_number <= effective_term] = np.nan data_date = pd.DataFrame(data_date) data_date.columns = [current_date] else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) data_date = pd.DataFrame([], columns=[current_date], index=click_num.index) res = pd.concat([res, data_date], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # param term = 60 effective_term = int(term * 0.6) # read data ff3_r2 = FamaFrench().get_data("model_ff3", "FF3_R2") # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(ff3_r2.columns) & set(date_series)) date_series.sort() res = pd.DataFrame() # FamaFrench().cal_all_factor_pct() # FamaFrench().ff3_model(beg_date, end_date) for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset( current_date, -(term - 1)) data_period = ff3_r2.loc[:, data_beg_date:current_date] data_period = data_period.T.dropna(how='all') if len(data_period) > effective_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) data_date = -data_period.std() * np.sqrt(250) / 100.0 effective_number = data_period.count() data_date[effective_number <= effective_term] = np.nan data_date = pd.DataFrame(data_date) data_date.columns = [current_date] else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) data_date = pd.DataFrame([], columns=[current_date], index=ff3_r2.index) res = pd.concat([res, data_date], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # param long_term = 60 short_term = 20 effective_term = int(long_term * 0.8) # read data ff3_residual = FamaFrench().get_data("model_ff3", "FF3_ResidualReturn") / 100.0 # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(ff3_residual.columns) & set(date_series)) date_series.sort() res = pd.DataFrame() # FamaFrench().cal_all_factor_pct() # FamaFrench().ff3_model(beg_date, end_date) for i in range(0, len(date_series)): current_date = date_series[i] long_beg_date = Date().get_trade_date_offset(current_date, -(long_term - 1)) short_beg_date = Date().get_trade_date_offset(current_date, -(short_term - 1)) data_long = ff3_residual.loc[:, long_beg_date:current_date] data_short = ff3_residual.loc[:, short_beg_date:current_date] data_long = data_long.T.dropna(how='all') data_short = data_short.T.dropna(how='all') if len(data_long) > effective_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) data_date = - data_short.std() / data_long.std() effective_number = data_long.count() data_date[effective_number <= effective_term] = np.nan data_date = pd.DataFrame(data_date) data_date.columns = [current_date] else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) data_date = pd.DataFrame([], columns=[current_date], index=ff3_residual.index) res = pd.concat([res, data_date], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # params long_term = 120 short_term = int(long_term * 0.5) # read data trade_amount = Stock().read_factor_h5("TradeAmount").T / 100000000 trade_amount = trade_amount.dropna(how='all') # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(trade_amount.index) & set(date_series)) date_series.sort() res = pd.DataFrame([]) for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset(current_date, -(long_term - 1)) amount_before = trade_amount.loc[data_beg_date:current_date, :] amount_before = amount_before.fillna(0.0) if len(amount_before) == long_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) zero_number = amount_before.applymap(lambda x: 1.0 if x == 0.0 else 0.0).sum() code_filter_list = (zero_number[zero_number < short_term]).index amount_before = trade_amount.loc[data_beg_date:current_date, code_filter_list] amount_log = amount_before.applymap(lambda x: np.nan if x == 0 else -np.log(x)) weight = np.array(list(range(1, long_term + 1))) weight_amount = np.dot(amount_log.T.values, weight) weight_amount = pd.DataFrame(weight_amount, index=amount_log.columns, columns=[current_date]) else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) weight_amount = pd.DataFrame([], columns=[current_date], index=trade_amount.columns) res = pd.concat([res, weight_amount], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # param term = 150 effective_term = 120 # read data pct = Stock().read_factor_h5("Pct_chg") # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(pct.columns) & set(date_series)) date_series.sort() res = pd.DataFrame() for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset( current_date, -(term - 1)) pct_before = pct.ix[:, data_beg_date:current_date] pct_stock = pct_before.T.dropna(how='all') if len(pct_stock) > effective_term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) skew_date = -pct_stock.skew() effective_number = pct_stock.count() skew_date[effective_number <= effective_term] = np.nan skew_date = pd.DataFrame(skew_date.values, columns=[current_date], index=skew_date.index) else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) skew_date = pd.DataFrame([], columns=[current_date], index=pct.index) res = pd.concat([res, skew_date], axis=1) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_factor_exposure(self, beg_date, end_date): """ 计算因子暴露 """ # param term = 60 effective_term = 30 # read data pct = Stock().read_factor_h5("Pct_chg").T # calculate data daily date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(pct.index)) date_series.sort() res = pd.DataFrame([]) for i in range(0, len(date_series)): current_date = date_series[i] data_beg_date = Date().get_trade_date_offset( current_date, -(term - 1)) data_period = pct.loc[data_beg_date:current_date, :] data_period = data_period.dropna(how='all') data_period /= 100.0 if len(data_period) == term: print('Calculating factor %s at date %s' % (self.raw_factor_name, current_date)) momentum = ((data_period + 1.0).cumprod() - 1.0).loc[current_date, :] vaild = data_period.count() >= effective_term momentum[~vaild] = np.nan momentum = -pd.DataFrame(momentum) momentum.columns = [current_date] res = pd.concat([res, momentum], axis=1) else: print('Calculating factor %s at date %s is null' % (self.raw_factor_name, current_date)) res = res.T.dropna(how='all').T self.save_alpha_factor_exposure(res, self.raw_factor_name)
def cal_weight_at_all_daily(self): """ 计算在每个交易日的股票权重 """ self.get_weight_at_all_change_date() beg_date = self.port_hold.columns[0] end_date = datetime.today().strftime("%Y%m%d") date_series = Date().get_trade_date_series(beg_date, end_date) date_series = list(set(date_series) & set(self.asset_return.columns)) date_series.sort() date_change_date_list = list(self.port_hold.columns) for i_date in range(len(date_series)): date = date_series[i_date] date_before = Date().get_trade_date_offset(date, -1) if date in date_change_date_list: self.port_hold_daily[date] = self.port_hold[date] print(" Calculating Weight of Portfolio %s At Date %s " % (self.port_name, date)) else: print(" Calculating Weight of Portfolio %s At Date %s " % (self.port_name, date)) weight_before = self.port_hold_daily[date_before] pct_date = self.asset_return[date] concat_data = pd.concat([weight_before, pct_date], axis=1) concat_data.columns = ["WeightBefore", "PctCur"] if "Cash" in concat_data.index: concat_data.loc['Cash', "PctCur"] = 0.0 concat_data = concat_data.dropna(subset=["WeightBefore"]) average_pct = concat_data["PctCur"].median() concat_data["PctCur"] = concat_data["PctCur"].fillna( average_pct) concat_data["Weight"] = concat_data["WeightBefore"] * ( 1.0 + concat_data["PctCur"] / 100.0) concat_data["Weight"] = concat_data["Weight"] / concat_data[ "Weight"].sum() self.port_hold_daily[date] = concat_data["Weight"] sub_path = os.path.join(self.save_path, self.port_name) self.port_hold_daily.to_csv( os.path.join(sub_path, self.port_name + '_PortHoldDaily.csv'))