def factor_calculate(**kwargs): print("constrain_kwargs: {}".format(kwargs)) date_index = kwargs['date_index'] session = kwargs['session'] earning = FactorEarning( 'factor_earning') # 注意, 这里的name要与client中新建table时的name一致, 不然回报错 content1 = cache_data.get_cache(session + str(date_index) + "1", date_index) content2 = cache_data.get_cache(session + str(date_index) + "2", date_index) content3 = cache_data.get_cache(session + str(date_index) + "3", date_index) tp_earning = json_normalize(json.loads(str(content1, encoding='utf8'))) ttm_earning_5y = json_normalize(json.loads(str(content2, encoding='utf8'))) ttm_earning = json_normalize(json.loads(str(content3, encoding='utf8'))) # cache_date.get_cache使得index的名字丢失, 所以数据需要按照下面的方式设置index tp_earning.set_index('symbol', inplace=True) ttm_earning.set_index('symbol', inplace=True) ttm_earning_5y.set_index('symbol', inplace=True) total_earning_data = { 'tp_earning': tp_earning, 'ttm_earning_5y': ttm_earning_5y, 'ttm_earning': ttm_earning } calculate(date_index, total_earning_data, earning)
def factor_calculate(**kwargs): print("per_share_kwargs: {}".format(kwargs)) date_index = kwargs['date_index'] session = kwargs['session'] per_share = PerShareIndicators('factor_per_share') # 注意, 这里的name要与client中新建table时的name一致, 不然回报错 content = cache_data.get_cache(session + str(date_index), date_index) total_pre_share_data = json_normalize(json.loads(str(content, encoding='utf8'))) print("len_total_per_share_data {}".format(len(total_pre_share_data))) calculate(date_index, total_pre_share_data, per_share)
def factor_calculate(**kwargs): print("history_value_kwargs: {}".format(kwargs)) date_index = kwargs['date_index'] session = kwargs['session'] historical_value = HistoricalValue('factor_historical_value') # 注意, 这里的name要与client中新建table时的name一致, 不然回报错 content = cache_data.get_cache(session + str(date_index), date_index) total_history_data = json_normalize(json.loads(str(content, encoding='utf8'))) print("len_history_value_data {}".format(len(total_history_data))) calculate(date_index, total_history_data, historical_value)
def factor_calculate(**kwargs): print("cash_flow_kwargs: {}".format(kwargs)) date_index = kwargs['date_index'] session = kwargs['session'] cash_flow = FactorCashFlow('factor_cash_flow') # 注意, 这里的name要与client中新建table时的name一致, 不然回报错 content = cache_data.get_cache(session, date_index) total_cash_flow_data = json_normalize(json.loads(str(content, encoding='utf8'))) print("len_total_cash_flow_data {}".format(len(total_cash_flow_data))) calculate(date_index, total_cash_flow_data, cash_flow)
def factor_calculate(**kwargs): print("growth_kwargs: {}".format(kwargs)) date_index = kwargs['date_index'] session = kwargs['session'] growth = Growth('factor_growth') # 注意, 这里的name要与client中新建table时的name一致, 不然回报错 content = cache_data.get_cache(session + str(date_index), date_index) total_growth_data = json_normalize(json.loads(str(content, encoding='utf8'))) print("len_total_growth_data {}".format(len(total_growth_data))) calculate(date_index, total_growth_data, growth)
def factor_analysis(**kwargs): #total_data = json_normalize(json.loads(kwargs['factors_sets'])) print(kwargs) factor_name = kwargs['factor_name'] neutralized_styles = kwargs["risk_styles"] + industry_styles benchmark_code = kwargs['benchmark_code'] session = kwargs['session'] content = cache_data.get_cache(session, factor_name) total_data = json_normalize(json.loads(content)) print(factor_name, neutralized_styles) total_data['trade_date'] = total_data['trade_date'].apply( lambda x: datetime.datetime.fromtimestamp(x / 1000).date()) ##写入数据库 destination = sa.create_engine( "mysql+mysqlconnector://quant:[email protected]:3306/quant") destsession = sessionmaker(bind=destination, autocommit=False, autoflush=True) task_id = str( int(time.time() * 1000000 + datetime.datetime.now().microsecond)) ## factor process total_data = factor_process(total_data, factor_name, neutralized_styles) ## 五分位收益 cum_df = cum_quintile(total_data) ## 记录五分位和超额 excess_quintile(destsession, session, factor_name, task_id, cum_df) ## 逐年收益 yearly_quintile(destsession, session, factor_name, task_id, cum_df) ## IC序列 ic_series = ic_serialize(destsession, session, factor_name, task_id, total_data) ## 行业IR industry_ir(destsession, session, factor_name, task_id, total_data) ## IC 半衰 ic_decay(destsession, session, factor_name, task_id, total_data) ## T值序列 fac_rets_series = t_serialize(destsession, session, factor_name, task_id, neutralized_styles, cum_df, total_data) ## 基本信息 basic_dict = kwargs del basic_dict['factors_sets'] basic_info(destsession, session, factor_name, task_id, fac_rets_series, ic_series, cum_df, total_data, kwargs) print('update_destdb % s is end' % (factor_name)) return "任务结果"
def factor_calculate(**kwargs): print("constrain_kwargs: {}".format(kwargs)) date_index = kwargs['date_index'] session = kwargs['session'] constrain = FactorConstrain( 'factor_constrain') # 注意, 这里的name要与client中新建table时的name一致, 不然回报错 content1 = cache_data.get_cache(session + str(date_index) + '1', date_index) content2 = cache_data.get_cache(session + str(date_index) + '2', date_index) balance_sets = json_normalize(json.loads(str(content1, encoding='utf8'))) ttm_factors_sets = json_normalize( json.loads(str(content2, encoding='utf8'))) balance_sets.set_index('symbol', inplace=True) ttm_factors_sets.set_index('symbol', inplace=True) print("len_constrain_data {}".format(len(balance_sets))) print("len_ttm_constrain_data {}".format(len(ttm_factors_sets))) total_constrain_data_dic = { 'balance_sets': balance_sets, 'ttm_factors_sets': ttm_factors_sets } calculate(date_index, total_constrain_data_dic, constrain)
def factor_calculate(**kwargs): print("cash_flow_kwargs: {}".format(kwargs)) date_index = kwargs['date_index'] session = kwargs['session'] cash_flow = FactorCashFlow( 'factor_cash_flow') # 注意, 这里的name要与client中新建table时的name一致, 不然回报错 content1 = cache_data.get_cache(session + str(date_index) + "1", date_index) content2 = cache_data.get_cache(session + str(date_index) + "2", date_index) tp_cash_flow = json_normalize(json.loads(str(content1, encoding='utf8'))) ttm_factor_sets = json_normalize(json.loads(str(content2, encoding='utf8'))) tp_cash_flow.set_index('symbol', inplace=True) ttm_factor_sets.set_index('symbol', inplace=True) print("len_tp_cash_flow_data {}".format(len(tp_cash_flow))) print("len_ttm_cash_flow_data {}".format(len(ttm_factor_sets))) total_cash_flow_data = { 'tp_cash_flow': tp_cash_flow, 'ttm_factor_sets': ttm_factor_sets } calculate(date_index, total_cash_flow_data, cash_flow)
def calculate(**kwargs): """ :param trade_date: :return: """ fb = FactorBase('factor_scale_value') print(kwargs) factor_name = kwargs['factor_name'] session = kwargs['session'] trade_date = kwargs['trade_date'] content = cache_data.get_cache(session, factor_name) total_data = json_normalize(json.loads(content)) print(len(total_data)) factor_scale_value = lcap(total_data, total_data) factor_scale_value = lflo(factor_scale_value, factor_scale_value) factor_scale_value = nlsize(factor_scale_value, factor_scale_value) factor_scale_value = lst(factor_scale_value, factor_scale_value) factor_scale_value = ltlqa(factor_scale_value, factor_scale_value) factor_scale_value.rename(columns={ 'market_cap': 'MktValue', 'circulating_market_cap': 'CirMktValue', 'total_operating_revenue': 'SalesTTM', 'total_assets': 'TotalAssets', 'log_of_mkt_value': 'LogofMktValue', 'log_of_neg_mkt_value': 'LogofNegMktValue', 'nl_size': 'NLSIZE', 'log_total_last_qua_assets': 'LogSalesTTM', 'log_sales_ttm': 'LogTotalLastQuaAssets' }, inplace=True) factor_scale_value = factor_scale_value[[ 'symbol', 'MktValue', 'CirMktValue', 'SalesTTM', 'TotalAssets', 'LogofMktValue', 'LogofNegMktValue', 'NLSIZE', 'LogSalesTTM', 'LogTotalLastQuaAssets' ]] factor_scale_value['id'] = factor_scale_value['symbol'] + str(trade_date) factor_scale_value['trade_date'] = str(trade_date) # super(HistoricalValue, self)._storage_data(factor_scale_value, trade_date) fb._storage_data(factor_scale_value, trade_date)
def distributed_factor(session, trade_date, packet_sets, name): calc_engine = CalcEngine(name, packet_sets) content = cache_data.get_cache(session, factor_name) total_data = json_normalize(json.loads(content)) calc_engine.distributed_factor(total_data)
def calculate(**kwargs): fb = FactorBase('factor_volatility_value') print(kwargs) factor_name = kwargs['factor_name'] session = kwargs['session'] trade_date = kwargs['trade_date'] golbal_obj['trade_date'] = trade_date content = cache_data.get_cache(session, factor_name) data = json.loads(content) total_data = json_normalize(json.loads(data['total_data'])) index_daily_price_sets = json_normalize( json.loads(data['index_daily_price_sets'])) index_daily_price_sets.set_index("symbol", inplace=True) golbal_obj['tp_index'] = index_daily_price_sets # content_a = cache_data.get_cache(session, factor_name+'_a') # content_b = cache_data.get_cache(session, factor_name+'_b') # total_data = json_normalize(json.loads(content_a)) # golbal_obj['index_daily_price_sets'] = json_normalize(json.loads(content_b)) print(len(total_data)) print(len(golbal_obj['tp_index'])) total_data.sort_values(by=['symbol', 'trade_date'], ascending=True, inplace=True) symbol_sets = list(set(total_data['symbol'])) symbol_sets.sort() factor_list = [] for symbol in symbol_sets: tp_price = total_data[total_data['symbol'] == symbol] if (tp_price.iloc[-1]['close'] != 0 and len(tp_price) > 120): factor_list.append(symbol_calcu(tp_price)) factor_volatility_value = pd.DataFrame(factor_list) factor_volatility_value.rename(columns={ 'variance_20d': 'Variance20D', 'variance_60d': 'Variance60D', 'variance_120d': 'Variance120D', 'kurtosis_20d': 'Kurtosis20D', 'kurtosis_60d': 'Kurtosis60D', 'kurtosis_120d': 'Kurtosis120D', 'alpha_20d': 'Alpha20D', 'alpha_60d': 'Alpha60D', 'alpha_120d': 'Alpha120D', 'beta_20d': 'Beta20D', 'beta_60d': 'Beta60D', 'beta_120d': 'Beta120D', 'sharp_20d': 'Sharp20D', 'sharp_60d': 'Sharp60D', 'sharp_120d': 'Sharp120D', 'tr_20d': 'TR20D', 'tr_60d': 'TR60D', 'tr_120d': 'TR120D', 'ir_20d': 'IR20D', 'ir_60d': 'IR60D', 'ir_120d': 'IR120D', 'gain_variance_20d': 'GainVariance20D', 'gain_variance_60d': 'GainVariance60D', 'gain_variance_120d': 'GainVariance120D', 'loss_variance_20d': 'LossVariance20D', 'loss_variance_60d': 'LossVariance60D', 'loss_variance_120d': 'LossVariance120D', 'gain_loss_variance_ratio_20d': 'GainLossVarianceRatio20D', 'gain_loss_variance_ratio_60d': 'GainLossVarianceRatio60D', 'gain_loss_variance_ratio_120d': 'GainLossVarianceRatio120D', 'dastd_252d': 'DailyReturnSTD252D', 'ddnsr_12m': 'DDNSR12M', 'ddncr_12m': 'DDNCR12M', 'dvrat': 'DVRAT' }, inplace=True) factor_volatility_value = factor_volatility_value[[ 'symbol', 'Variance20D', 'Variance60D', 'Variance120D', 'Kurtosis20D', 'Kurtosis60D', 'Kurtosis120D', 'Alpha20D', 'Alpha60D', 'Alpha120D', 'Beta20D', 'Beta60D', 'Beta120D', 'Sharp20D', 'Sharp60D', 'Sharp120D', 'TR20D', 'TR60D', 'TR120D', 'IR20D', 'IR60D', 'IR120D', 'GainVariance20D', 'GainVariance60D', 'GainVariance120D', 'LossVariance20D', 'LossVariance60D', 'LossVariance120D', 'GainLossVarianceRatio20D', 'GainLossVarianceRatio60D', 'GainLossVarianceRatio120D', 'DailyReturnSTD252D', 'DDNSR12M', 'DDNCR12M', 'DVRAT' ]] factor_volatility_value['id'] = factor_volatility_value['symbol'] + str( trade_date) factor_volatility_value['trade_date'] = str(trade_date) fb._storage_data(factor_volatility_value, trade_date)