def import_wind_index_daily_first(table_name, wind_codes): # wind_codes 可以是字符串,也可以是字符串的集合, table_name是数据库的表名 engine = get_db_engine() yestday = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d') info = rest.wss(wind_codes, "basedate,sec_name") for code in info.index: begin_date = info.loc[code, 'BASEDATE'].strftime('%Y-%m-%d') index_name = info.loc[code, 'SEC_NAME'] temp = rest.wsd(code, "open,high,low,close,volume,amt", begin_date, yestday) temp.reset_index(inplace=True) temp.rename(columns={'index': 'trade_date'}, inplace=True) temp.trade_date = pd.to_datetime(temp.trade_date) temp.trade_date = temp.trade_date.map(lambda x: x.date()) temp['wind_code'] = code temp['index_name'] = index_name temp.set_index(['wind_code', 'trade_date'], inplace=True) temp.to_sql(table_name, engine, if_exists='append', index_label=['wind_code', 'trade_date'], dtype={ 'wind_code': String(20), 'trade_date': Date, }) print('Success import %s - %s' % (code, index_name))
def import_wind_index_daily_first(wind_codes): """ 首次导入某指数使用 :param wind_codes: 可以是字符串,也可以是字符串的list :return: """ engine = get_db_engine() yestday = date.today() - timedelta(days=1) info = rest.wss(wind_codes, "basedate,sec_name") for code in info.index: begin_date = str_2_date(info.loc[code, 'BASEDATE']) index_name = info.loc[code, 'SEC_NAME'] index_df = rest.wsd(code, "open,high,low,close,volume,amt,turn,free_turn", begin_date, yestday) index_df.reset_index(inplace=True) index_df.rename(columns={'index': 'trade_date'}, inplace=True) index_df.trade_date = pd.to_datetime(index_df.trade_date) index_df.trade_date = index_df.trade_date.map(lambda x: x.date()) index_df['wind_code'] = code index_df['index_name'] = index_name index_df.set_index(['wind_code', 'trade_date'], inplace=True) table_name = 'wind_index_daily' index_df.to_sql(table_name, engine, if_exists='append', index_label=['wind_code', 'trade_date'], dtype={ 'wind_code': String(20), 'trade_date': Date, }) logger.info('Success import %s - %s with %d data' % (code, index_name, index_df.shape[0]))
def get_fund_nav(fund_code_list, from_date, to_date): """ 获取基金净值增长率,所有基金代码横向数据对其,补插值方案为”上一条数据净值” :param fund_code_list: 基金代码列表 :param from_date: 起始日期 :param to_date: 截止日期 :return: """ fund_code_count = len(fund_code_list) from_date = from_date if type(from_date) is str else datetime.strptime( from_date, STR_FORMAT_DATE) to_date = to_date if type(to_date) is str else datetime.strptime( to_date, STR_FORMAT_DATE) param_str = ", ".join(['%s' for n in range(fund_code_count)]) sql_str = 'select wind_code, nav_date_friday, nav_acc from fund_nav_friday where wind_code in (%s)' % param_str engine = get_db_engine() df = pd.read_sql(sql_str, engine, params=fund_code_list, parse_dates=['nav_date_friday']) if df.shape[0] == 0: logger.info('No data found') return None fund_df_all = df.pivot(index='nav_date_friday', columns='wind_code', values='nav_acc') \ .ffill().bfill() fund_df = fund_df_all.iloc[(from_date <= fund_df_all.index) & (fund_df_all.index <= to_date)] pct_df = (fund_df.pct_change().fillna(0) + 1).cumprod() return pct_df
def import_trade_date(): """ 增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据 :return: """ w = WindRest(WIND_REST_URL) engine = get_db_engine() trade_date_start = None with get_db_session(engine) as session: try: trade_date_max = session.execute( 'select max(trade_date) from wind_trade_date').first()[0] if trade_date_max is not None: trade_date_start = ( trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE) except Exception as exp: logging.exception("交易日获取异常") if trade_date_start is None: trade_date_start = '1990-01-01' end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE) date_df = w.tdays(trade_date_start, end_date_str) if date_df is not None: print(date_df.shape) date_df = date_df.set_index('date').rename( columns={'date': 'trade_date'}) date_df.to_sql('wind_trade_date', engine, if_exists='append') logging.info('%d trade date has been imported', date_df.shape[0])
def get_fund_nav_by_wind_code(wind_code, limit=5): """ 输入 wind_code 基金代码,返回基金代码以及同期沪深300指数的对比走势 :param wind_code: 基金代码 :param limit: 限制返回数量 :return: 拟合后的对比走势,最新净值日的基金增长率、指数增长率、最新净值日期 """ engine = get_db_engine() if limit == 0: sql_str = r"select nav_date, nav, nav_acc from fund_nav where wind_code =%s order by nav_date desc " fund_df = pd.read_sql(sql_str, engine, params=[wind_code], parse_dates=['nav_date']) else: sql_str = r"select nav_date, nav, nav_acc from fund_nav where wind_code =%s order by nav_date desc limit %s" fund_df = pd.read_sql(sql_str, engine, params=[wind_code, limit + 1], parse_dates=['nav_date']) # if fund_df.shape[0] == 0: logger.info('No data found') return None fund_df.sort_values('nav_date', inplace=True) fund_df.set_index('nav_date', inplace=True) fund_df['pct'] = fund_df['nav_acc'].pct_change().fillna(0) if fund_df.shape[0] > limit: fund_df = fund_df.iloc[-limit:] return fund_df
def get_fof_fund_date_range(wind_code): """获取FOF基金下面每一只子基金的日期范围""" fund_date_df = None sql_str = """select wind_code_s, date_adj, invest_scale from fof_fund_pct f1 where f1.wind_code_p = %s""" engine = get_db_engine() data_df = pd.read_sql(sql_str, engine, params=[wind_code]) if data_df.shape[0] == 0: return None fund_date_df = data_df.pivot(columns='wind_code_s', index='date_adj', values='invest_scale') date_list = list(fund_date_df.index) date_list.sort() fund_date_range_dic = {} for wind_code_s in fund_date_df.columns: fund_date_s = fund_date_df[wind_code_s] date_range_list = [] date_range = [] for fund_date in date_list: invest_scale = fund_date_s[fund_date] if not np.isnan(invest_scale): date_range.append(fund_date) if np.isnan(invest_scale) and len(date_range) > 0: date_range_list.append((min(date_range), fund_date)) date_range = [] else: if len(date_range) > 0: date_range_list.append((min(date_range), datetime.max.date())) date_range = [] fund_date_range_dic[wind_code_s] = date_range_list return fund_date_range_dic
def get_fund_nav_df(self, fund_code_list, start_date, end_date, weight_list): start_date_str = date_2_str(start_date) end_date_str = date_2_str(end_date) fund_nav_df = pd.DataFrame() sql_str = '''select nav_date_friday, nav_acc from fund_nav fv, ( select wind_code, adddate(nav_date, 4 - weekday(nav_date)) as nav_date_friday, max(nav_date) as nav_date_max from fund_nav where wind_code=%s and nav_date between %s and %s group by nav_date_friday) as nav_t where fv.wind_code = %s and fv.wind_code = nav_t.wind_code and fv.nav_date = nav_t.nav_date_max group by nav_date_friday order by nav_date_friday''' engine = get_db_engine() ok_list = [] for fund_code in fund_code_list: fund_nv_tmp = pd.read_sql( sql_str, engine, params=[fund_code, start_date_str, end_date_str, fund_code]) if fund_nv_tmp.shape[0] <= 1: logger.info('%s with %d nav data will be ignored', fund_code, fund_nv_tmp.shape[0]) ok_list.append(False) continue if len(fund_nav_df) == 0: fund_nav_df = fund_nv_tmp logger.info('%s with %d nav data', fund_code, fund_nav_df.shape[0]) else: fund_nav_df = fund_nav_df.merge(fund_nv_tmp, on='nav_date_friday') logger.info('%s with %d nav data have %d data merged', fund_code, fund_nv_tmp.shape[0], fund_nav_df.shape[0]) ok_list.append(True) fund_nav_df.dropna(how='any', inplace=True) if fund_nav_df.shape[0] <= 1: logger.error('no data found from fund_list: %s', fund_code_list) return None, None, None fund_nav_df.set_index('nav_date_friday', inplace=True) fund_nav_pct_df = fund_nav_df.pct_change().fillna(0) # 权重归一化处理 weight_arr = np.array(weight_list)[ok_list] weight_arr = weight_arr / sum(weight_arr) # 筛选有效 wind_code_list wind_code_new_list = np.array(fund_code_list)[ok_list] return wind_code_new_list, weight_arr, fund_nav_pct_df
def calc_index_by_wind_code_dic(wind_code_dic, date_from_str, date_to_str): """ 根据子基金投资比例获得历史加权净值走势 :param wind_code_dic: :param date_from_str: :param date_to_str: :return: """ wind_code_list = list(wind_code_dic.keys()) wind_code_count = len(wind_code_list) if wind_code_count == 0: return None engine = get_db_engine() # sql_str = 'select * from fund_nav where wind_code in (%s)' % ("'" + "', '".join(wind_code_list) + "'") sql_str = """select fn.wind_code, nav_acc, nav_date_week from fund_nav fn, (select wind_code, max(nav_date) nav_date , adddate(nav_date, 4 - weekday(nav_date)) as nav_date_week from fund_nav where wind_code in (%s) group by wind_code, adddate(nav_date, 4 - weekday(nav_date)) ) as fnw where fn.wind_code = fnw.wind_code and fn.nav_date = fnw.nav_date""" sql_str = sql_str % (", ".join([r'%s' for n in range(wind_code_count)])) data_df = pd.read_sql(sql_str, engine, params=wind_code_list) data_len = data_df.shape[0] logger.debug('data_df.shape[0]: %d', data_len) if data_len == 0: return None date_from = str_2_date(date_from_str) date_to = str_2_date(date_to_str) fund_nav_df = data_df.pivot(columns='wind_code', index='nav_date_week', values='nav_acc') logger.debug('fund_nav_df.shape: %s', fund_nav_df.shape) # 对各个基金的数据进行内插值 fh_utils.DataFrame.interpolate_inner(fund_nav_df, inplace=True) # 过滤有效日期列表 date_available = [ d for d in fund_nav_df.index if date_from <= d <= date_to ] fund_nav_df_filter = fund_nav_df.ix[date_available, :] # 归一化处理 fund_nav_pct_df = fund_nav_df_filter.pct_change().fillna(0) index_pct_s = None wind_code_list = list(fund_nav_pct_df.columns) tot_weight = sum( [wind_code_dic[wind_code] for wind_code in wind_code_list]) for wind_code in wind_code_list: pct_s = fund_nav_pct_df[wind_code] * wind_code_dic[ wind_code] / tot_weight if index_pct_s is None: index_pct_s = pct_s else: index_pct_s += pct_s index_nav_s = (index_pct_s + 1).cumprod() return index_nav_s
def update_strategy_index(date_from_str, date_to_str): """ strategy_index_info 中所有 strategy_name 更新指数净值到数据库 strategy_index_val 中 :param date_from_str: 起始日期 %Y-%m-%d :param date_to_str: 截止日期 %Y-%m-%d :return: """ engine = get_db_engine() with get_db_session(engine) as session: stg_table = session.execute( 'select strategy_name from strategy_index_info group by strategy_name' ) strategy_name_list = [stg_info[0] for stg_info in stg_table.fetchall()] strategy_name_count = len(strategy_name_list) if strategy_name_count == 0: logger.info('strategy_index_info table is empty') return # strategy_name_list = ['long_only', 'cta', 'arbitrage', 'alpha', 'macro'] date_from = datetime.strptime(date_from_str, '%Y-%m-%d').date() date_to = datetime.strptime(date_to_str, '%Y-%m-%d').date() index_df_list = [] for strategy_name in strategy_name_list: # strategy_name = 'long_only' # index_df = calc_strategy_index(strategy_name, date_from, date_to, calc_sample_name='main') stg_index_s = get_strategy_index_by_name(strategy_name, date_from, date_to, statistic=False) if stg_index_s is not None: logger.info('生成%s策略指数【%s ~ %s】', strategy_name, stg_index_s.index[0], stg_index_s.index[-1]) # index_df.to_csv('%s_sample_%s_%s.csv' % (strategy_name, date_from, date_to)) index_df = pd.DataFrame({'value': stg_index_s}) index_df.index.rename('nav_date', inplace=True) index_df.reset_index(inplace=True) # index_df.rename(columns={'nav_date_week': 'nav_date', strategy_name: 'value'}, inplace=True) index_df['index_name'] = strategy_name index_df_list.append(index_df) else: logger.info('No Data for shown on %s', strategy_name) index_df_all = pd.concat(index_df_list) index_df_all.set_index(['index_name', 'nav_date'], inplace=True) # 重置内容 table_name = 'strategy_index_val' with get_db_session(engine) as session: # session.execute("delete from %s where nav_date between '%s' and '%s'" % (table_name, date_from_str, date_to_str)) session.execute("truncate table %s" % table_name) index_df_all.to_sql(table_name, engine, if_exists='append', dtype={ 'index_name': String(20), 'nav_date': Date, 'value': FLOAT, })
def do_copula_4_scheme(scheme_id): """ 根据 scheme_id 计算相关组合的 copula 压力测试 :param scheme_id: :return: """ # sql_str = """select id, ffp.wind_code_p, ffp.wind_code_s, wind_code, date_adj, invest_scale # from fof_fund_pct ffp, # ( # select wind_code_p, max(date_adj) date_latest from fof_fund_pct group by wind_code_p # ) ff_date_latest, # fund_essential_info ffm # where ffp.wind_code_p = ff_date_latest.wind_code_p # and ffp.wind_code_s = ffm.wind_code_s # and ffp.date_adj = ff_date_latest.date_latest""" sql_str = "SELECT wind_code, invest_scale FROM scheme_fund_pct where scheme_id=%(scheme_id)s" engine = get_db_engine() fund_pct_df = pd.read_sql(sql_str, engine, params={'scheme_id': str(scheme_id)}) date_to = date.today() date_to_str = date_to.strftime(STR_FORMAT_DATE) date_from = date_to - timedelta(days=365) date_from_str = date_from.strftime(STR_FORMAT_DATE) simulate_count = STRESS_TESTING_SIMULATE_COUNT_COPULA r = get_redis() wind_code_list = list( fund_pct_df['wind_code']) # ['XT1410445.XT', 'J11039.OF'] wind_code_count = len(wind_code_list) logger.info('do_copula for %d on wind_code_p with %s', scheme_id, wind_code_list) if wind_code_count <= 0: logger.warning('scheme %s has no sub fund list', scheme_id) return st = StressTest('Clayton') weighted_list = np.ones(wind_code_count) # max_dd_list = st.get_max_drawdown(['XT1410445.XT', 'J11039.OF'], '2016-01-01', '2016-12-31', np.ones(2), 10) max_dd_list = st.get_max_drawdown(wind_code_list, date_from_str, date_to_str, weighted_list, simulate_count) if max_dd_list is None or len(max_dd_list) == 0: logger.error('scheme %s has no copula test data. sub fund list: %s', scheme_id, wind_code_list) return # max_dd_list_str = json.dumps(max_dd_list) y, x, patches = plt.hist(max_dd_list, 20) y = list(map(int, y)) x = list(map(float, ["%.3f" % i for i in x])) key = 'scheme_%s_%s' % (scheme_id, 'copula') val_str = json.dumps({"x": x, "y": y}) logger.debug('%s has been completer\n%s', key, val_str) r.set(key, val_str)
def get_fund_nav_weekly_by_strategy(strategy_type_en, date_from, date_to, show_fund_name=False, do_filter_wind_code=False): """ 输入策略代码,起止日期,返回该策略所有基金周净值 :param strategy_type_en: :param date_from: :param date_to: :param show_fund_name: :return: """ global STRATEGY_TYPE_EN_CN_DIC sql_str = """select fv.wind_code, nav_date_week, fv.nav_acc from ( select wind_code, adddate(nav_date, 4 - weekday(nav_date)) as nav_date_week, max(nav_date) as nav_date_max from fund_nav where wind_code in (select wind_code from fund_info where strategy_type = '%s') group by wind_code, nav_date_week having nav_date_week between '%s' and '%s' ) as ffv, fund_nav fv where ffv.nav_date_week between '%s' and '%s' and fv.wind_code = ffv.wind_code and fv.nav_date = ffv.nav_date_max group by wind_code, nav_date_week order by nav_date_week desc""" strategy_name_cn = STRATEGY_TYPE_EN_CN_DIC[strategy_type_en] query_str = sql_str % (strategy_name_cn, date_from, date_to, date_from, date_to) # logger.debug('策略子基金净值查询sql:\n%s', query_str) engine = get_db_engine() data_df = pd.read_sql_query(query_str, engine) fund_nav_df = data_df.pivot(index='nav_date_week', columns='wind_code', values='nav_acc') # 筛选子基金列表 if do_filter_wind_code: fund_nav_df = filter_wind_code(fund_nav_df, strategy_type_en) if show_fund_name: # 获取样本子基金名称 sql_str = "select wind_code, sec_name from fund_info where wind_code in (%s)" wind_code_list_str = ', '.join( ["'" + wind_code + "'" for wind_code in list(fund_nav_df.columns)]) query_str = sql_str % wind_code_list_str with get_db_session(engine) as session: table = session.execute(query_str) fund_code_name_dic = dict(table.fetchall()) # logger.info(df_fund_nav) fund_nav_df.rename(columns=fund_code_name_dic, inplace=True) return fund_nav_df
def save_fund_info(fund_info_df, get_df=False, mode='replace_insert'): table_name = 'wind_fund_info' engine = get_db_engine() fund_info_df.rename(columns={ 'SEC_NAME': 'sec_name', 'FUND_SETUPDATE': 'fund_setupdate', 'FUND_MATURITYDATE': 'fund_maturitydate', 'FUND_MGRCOMP': 'fund_mgrcomp', 'FUND_EXISTINGYEAR': 'fund_existingyear', 'FUND_PTMYEAR': 'fund_ptmyear', 'FUND_TYPE': 'fund_type', 'FUND_FUNDMANAGER': 'fund_fundmanager' }, inplace=True) fund_info_df.index.names = ['wind_code'] fund_info_df.drop_duplicates(inplace=True) if mode == 'append': fund_info_df.to_sql(table_name, engine, if_exists='append', dtype={ 'wind_code': String(200), 'sec_name': String(200), 'strategy_type': String(200), 'fund_setupdate': Date, 'fund_maturitydate': Date }) elif mode == 'replace_insert': fund_info_df.reset_index(inplace=True) data_list = list(fund_info_df.T.to_dict().values()) sql_str = """REPLACE INTO wind_fund_info (wind_code,sec_name,strategy_type,fund_setupdate,fund_maturitydate,fund_mgrcomp,fund_existingyear,fund_ptmyear,fund_type,fund_fundmanager) VALUES (:wind_code,:sec_name,:strategy_type,:fund_setupdate,:fund_maturitydate,:fund_mgrcomp,:fund_existingyear,:fund_ptmyear,:fund_type,:fund_fundmanager); """ with get_db_session() as session: session.execute(sql_str, data_list) else: raise ValueError('mode="%s" is not available' % mode) logging.info('%d funds inserted' % fund_info_df.shape[0]) # wind_fund_info 表中增量数据插入到 fund_info sql_str = """insert into fund_info(wind_code, sec_name, strategy_type, fund_setupdate, fund_maturitydate, fund_mgrcomp, fund_existingyear, fund_ptmyear, fund_type, fund_fundmanager) select wfi.wind_code, wfi.sec_name, wfi.strategy_type, wfi.fund_setupdate, wfi.fund_maturitydate, wfi.fund_mgrcomp, wfi.fund_existingyear, wfi.fund_ptmyear, wfi.fund_type, wfi.fund_fundmanager from wind_fund_info wfi left outer join fund_info fi on wfi.wind_code=fi.wind_code where fi.wind_code is null""" with get_db_session(engine) as session: table = session.execute(sql_str) logging.info( 'new data was inserted into fund_info from wind_fund_info table') return fund_info_df if get_df else None
def get_fund_nv(self, fund_list, start_date, end_date): fund_nv = pd.DataFrame() for fund_code in fund_list: sql_fund = self.sql_getfund % (fund_code, start_date, end_date) engine = get_db_engine() fund_nv_tmp = pd.read_sql(sql_fund, engine) if len(fund_nv) == 0: fund_nv = fund_nv_tmp else: fund_nv = fund_nv.merge(fund_nv_tmp, on='nav_date2') fund_nv.dropna(how='any', inplace=True) fund_nv.set_index('nav_date2', inplace=True) return fund_nv
def cal_perform_attrib(wind_code, date_from, date_to): fac_profit, fac_name, fac_data, origin_data = cal_factor_profit( date_from, date_to) # 获取指数数据,中证500 engine = get_db_engine() # sql_origin = "select * from index_tradeinfo where index_tradeinfo.Index_Code = '000905.SH' and " + \ # "(index_tradeinfo.Trade_Date = '%s' or index_tradeinfo.Trade_Date = '%s' )" # sql_close = sql_origin % (date_from, date_to) sql_close = """select * from wind_index_daily where wind_code = '000905.SH' and (Trade_Date = %s or Trade_Date = %s )""" index_close = pd.read_sql(sql_close, engine, params=[date_from, date_to]) index_change = index_close['CLOSE'][index_close.trade_date == str_2_date(date_to)].values / \ index_close['CLOSE'][index_close.trade_date == str_2_date(date_from)].values - 1 if len(index_change) == 0: return None origin_data['Profit'] = origin_data['Profit'] - index_change # 对冲后增长率 # 获取Fund的涨跌幅 sql_fund_origin = "select * FROM fund_nav where wind_code = '%s' and nav_date between '%s' and '%s'" sql_fundnv = sql_fund_origin % (wind_code, date_from, date_to) fund_nav_df = pd.read_sql(sql_fundnv, engine) # sql_fund_origin = "select * FROM fundnav where wind_code = %s and (fundnav.trade_date = %s or fundnav.trade_date = %s)" # fund_close = pd.read_sql(sql_fund_origin, engine, [fundId, startdate, enddate]) date_from = fund_nav_df['nav_date'].max() date_to = fund_nav_df['nav_date'].min() fund_profit = fund_nav_df['nav'][ fund_nav_df['nav_date'] == date_to].values / fund_nav_df['nav'][ fund_nav_df['nav_date'] == date_from].values - 1 # fund_close_df['nav_date'] = fund_close_df['nav_date'].apply(lambda x: x.strftime('%Y-%m-%d')) # 存储格式 std_stock = origin_data['Profit'].std() # 寻找最接近的涨幅 fund_profit_low = fund_profit[0] - .01 * std_stock fund_profit_high = fund_profit[0] + .01 * std_stock target_data = fac_data[(origin_data['Profit'] > fund_profit_low) & (origin_data['Profit'] < fund_profit_high)] # return origin_data, fund_profit_low, fund_profit_high fund_exposure = target_data.mean() fund_exposure = fund_exposure[fac_name] # fund_fac_profit = fund_exposure[fac_name] * np.array(fac_profit) index_names = fund_exposure.index.values fund_exposure_pd = pd.DataFrame(fund_exposure) indictor_names = [fac_name_map[i] for i in index_names] fund_exposure_pd[u'因子名称'] = indictor_names fund_exposure_pd.rename(columns={0: u'因子暴露'}, inplace=True) # 去除Const index_ig = fund_exposure_pd[fund_exposure_pd.index == 'Const'].index fund_exposure_pd.drop(index_ig, axis=0, inplace=True) fund_exposure_pd.set_index(u'因子名称', inplace=True) return fund_exposure_pd
def get_stg_indexes(): """获取全部策略指数走势""" sql_str = """select iv.index_name, nav_date_friday, value from strategy_index_val iv, ( select index_name, adddate(nav_date, 4 - weekday(nav_date)) as nav_date_friday, max(nav_date) nav_date_max from strategy_index_val group by index_name, nav_date_friday ) ivf where iv.index_name=ivf.index_name and iv.nav_date = ivf.nav_date_max""" engine = get_db_engine() all_index_df = pd.read_sql(sql_str, engine) stg_index_df = all_index_df.pivot(index='nav_date_friday', columns='index_name', values='value') stg_index_df.ffill(inplace=True) return stg_index_df
def save_factor_explore(date_start, date_end): logging.info('save_factor_explore', date_start, date_end) rest = WindRest(WIND_REST_URL) date_start = rest.tdaysoffset(0, date_start) # 获取起始交易日 date_end = rest.tdaysoffset(0, date_end) # 获取结束交易日 # 提取数据 # 全部数据按照20天计算 lagnum = 20 date_start_getdata = rest.tdaysoffset(-1 * (lagnum - 1), date_start) # 首先,提取股票代码 options_set = "date = %s; windcode = 881001.WI" % date_end stock_info_df = rest.wset("sectorconstituent", options_set) stock_code_s = stock_info_df['wind_code'] stock_name_s = stock_info_df['sec_name'] stock_count = len(stock_name_s) # 获取各个因子 options_stock = "unit=1;currencyType=;PriceAdj=B" data_shortnames = "close,pe_ttm,mkt_cap_float,turn" logging.info('date between %s and %s', date_start_getdata, date_end) stock_num = 0 logging.info('Need To update %d Stocks' % len(stock_code_s)) for stock_code, stock_name in zip(stock_code_s, stock_name_s): stock_factor_df = rest.wsd(stock_code, data_shortnames, date_start_getdata, date_end, options_stock) stock_factor_df['fac_Inverse_Moment'] = stock_factor_df[ 'CLOSE'] / stock_factor_df['CLOSE'].shift(19) # 19日涨跌判断 stock_factor_df['fac_Mv'] = np.log10(stock_factor_df.MKT_CAP_FLOAT) stock_factor_df['fac_Pe'] = stock_factor_df['PE_TTM'] stock_factor_df['fac_Turn'] = stock_factor_df['TURN'].rolling( lagnum).mean() stock_factor_df['fac_Vol'] = ( stock_factor_df['CLOSE'] / stock_factor_df['CLOSE'].shift(1)).rolling(lagnum).std() stock_factor_df['Stock_Name'] = stock_name stock_factor_df['Stock_Code'] = stock_code stock_factor_df.dropna(how='any', inplace=True) factor_name_saved = [ 'Stock_Name', 'Stock_Code', 'fac_Inverse_Moment', 'fac_Mv', 'fac_Pe', 'fac_Turn', 'fac_Vol' ] engine = get_db_engine() stock_factor_df[factor_name_saved].to_sql('Stock_FacExposure', engine, if_exists='append', index_label=['Trade_Date']) stock_num += 1 logging.info('Successful Input %s [%d / %d] stock' % (stock_name, stock_num, stock_count))
def import_wind_stock_info(refresh=False): # 获取全市场股票代码及名称 if refresh: date_fetch = datetime.strptime('2005-1-1', STR_FORMAT_DATE).date() else: date_fetch = date.today() date_end = date.today() stock_code_set = set() while date_fetch < date_end: stock_code_set_sub = get_stock_code_set(date_fetch) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub date_fetch += timedelta(days=365) stock_code_set_sub = get_stock_code_set(date_fetch) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub # 获取股票对应上市日期,及摘牌日期 # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng") stock_code_list = list(stock_code_set) stock_code_count = len(stock_code_list) seg_count = 1000 loop_count = math.ceil(float(stock_code_count) / seg_count) stock_info_df_list = [] for n in range(loop_count): num_start = n * seg_count num_end = (n + 1) * seg_count num_end = num_end if num_end <= stock_code_count else stock_code_count stock_code_list_sub = stock_code_list[num_start:num_end] # 尝试将 stock_code_list_sub 直接传递给wss,是否可行 stock_info_df = w.wss(stock_code_list_sub, "sec_name,trade_code,ipo_date,delist_date,mkt,exch_city,exch_eng,prename") stock_info_df_list.append(stock_info_df) stock_info_all_df = pd.concat(stock_info_df_list) stock_info_all_df.index.rename('WIND_CODE', inplace=True) logging.info('%s stock data will be import', stock_info_all_df.shape[0]) engine = get_db_engine() stock_info_all_df.reset_index(inplace=True) data_list = list(stock_info_all_df.T.to_dict().values()) sql_str = "REPLACE INTO wind_stock_info (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)" # sql_str = "insert INTO wind_stock_info (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)" with get_db_session(engine) as session: session.execute(sql_str, data_list) stock_count = session.execute('select count(*) from wind_stock_info').first()[0] logging.info("%d stocks have been in wind_stock_info", stock_count)
def get_fund_nav_between(wind_code, from_date, to_date, index_code='000300.SH'): """ 输入 wind_code 基金代码,返回基金代码以及同期沪深300指数的对比走势 :param wind_code: 基金代码 :param from_date: 起始日期 :param to_date: 截止日期 :return: 拟合后的对比走势,最新净值日的基金增长率、指数增长率、最新净值日期 """ engine = get_db_engine() sql_str = r"select nav_date, nav_acc from fund_nav where wind_code =%s order by nav_date" fund_df = pd.read_sql(sql_str, engine, params=[wind_code], parse_dates=['nav_date']) # if fund_df.shape[0] == 0: logger.info('No data found') return None fund_df.set_index('nav_date', inplace=True) fund_df.rename(columns={'nav_acc': wind_code}, inplace=True) sql_str = "select trade_date, close from wind_index_daily where wind_code=%s order by trade_date" hs300_df = pd.read_sql(sql_str, engine, params=[index_code], parse_dates=['trade_date']) # # hs300_df.set_index('trade_date', inplace=True) hs300_df.rename(columns={'close': index_code}, inplace=True) # df = pd.concat([fund_df, hs300_df], axis=1, join='inner') fund_tmp_df = fund_df.reset_index() fund_tmp_df = fund_tmp_df.rename(columns={'nav_date': 'trade_date'}) df = pd.merge(fund_tmp_df, hs300_df, on=['trade_date']) df.set_index('trade_date', inplace=True) fund_df = df.iloc[(from_date <= df.index) & (df.index <= to_date)].copy() if fund_df.shape[0] == 0: logger.info('No data between %s - %s', from_date, to_date) return None fund_df[index_code] = (fund_df[index_code].pct_change().fillna(0) + 1).cumprod() # * fund_df[wind_code][0] # pct_df = (fund_df.pct_change().fillna(0) + 1).cumprod() date_latest = fund_df.index[-1].date() # fund_rr = pct_df.iloc[-1, 0] # index_rr = pct_df.iloc[-1, 1] # return fund_df, fund_rr, index_rr, date_latest return {"fund_df": fund_df, "date_latest": date_latest}
def calc_portfolio_optim_fund(): sql = "select nav_date, nav_acc from fund_nav where wind_code = 'fh_0052'" engine = get_db_engine() wind_code = 'fh_0052' fund_db_name = 'fund_sec_pct' query_df = pd.read_sql(sql, engine) query_df = query_df.T.to_dict() query_obj = [{ "nav_date": v['nav_date'].strftime("%Y-%m-%d"), "nav_acc": v['nav_acc'] } for _, v in query_df.items()] query_json = json.dumps({"result": query_obj}) robjects.r.source(os.path.join(basedir, "Factor_Analysis_python.R")) mongo_ip = '10.0.5.107' ret_data = robjects.r['BigBro'](query_json, wind_code, fund_db_name, mongo_ip) print(ret_data)
def export_nav_excel(wind_code): """ 导出指定基金的净值数据到excel文件 :param wind_code: :return: """ engine = get_db_engine() sql_str = 'SELECT nav_date, nav, nav_acc FROM fund_nav where wind_code=%s' df = pd.read_sql(sql_str, engine, params=[wind_code]) df.rename(columns={ 'nav_date': '日期', 'nav': '净值', 'nav_acc': '累计净值' }, inplace=True) df.set_index('日期', inplace=True) file_path = get_cache_file_path('%s_nav.xls' % wind_code) df.to_excel(file_path, sheet_name=wind_code)
def update_wind_fund_info(get_df=False, mode='replace_insert'): # 更新 基金信息 table_name = 'wind_fund_info' rest = WindRest(WIND_REST_URL) # 初始化服务器接口,用于下载万得数据 engine = get_db_engine() # 初始化数据库,并获取旧表信息 old = pd.read_sql_query('select wind_code from %s' % table_name, engine) old_set = set(old['wind_code']) # 从万得获取最新基金列表 types = { u'股票多头策略': 1000023122000000, u'股票多空策略': 1000023123000000, u'其他股票策略': 1000023124000000, u'阿尔法策略': 1000023125000000, u'其他市场中性策略': 1000023126000000, u'事件驱动策略': 1000023113000000, u'债券策略': 1000023114000000, u'套利策略': 1000023115000000, u'宏观策略': 1000023116000000, u'管理期货策略': 1000023117000000, u'组合基金策略': 1000023118000000, u'货币市场策略': 1000023119000000, u'多策略': 100002312000000, u'其他策略': 1000023121000000 } df = pd.DataFrame() yestday = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d') for i in types.keys(): temp = rest.wset("sectorconstituent", "date=%s;sectorid=%s" % (yestday, str(types[i]))) temp['strategy_type'] = i logging.info('%s sectorconstituent %s df.shape:%s', yestday, i, temp.shape) df = pd.concat([df, temp], axis=0) fund_types_df = df[['wind_code', 'strategy_type']] # , 'sec_name' 后续wss接口可以获得 new_set = set(fund_types_df['wind_code']) target_set = new_set.difference(old_set) # in new_set but not old_set fund_types_df.set_index('wind_code', inplace=True) # # 获取新成立基金各项基本面信息 fund_info_df = get_fund_info_df_by_wind(list(target_set)) fund_info_df = fund_types_df.join(fund_info_df, how='right') return save_fund_info(fund_info_df, get_df, mode)
def do_fund_multi_factor_by_scheme(scheme_id): """ 根据 scheme 进行所有子基金的压力测试 :param scheme_id: :return: """ sql_str = "SELECT wind_code, invest_scale FROM scheme_fund_pct where scheme_id=%(scheme_id)s" engine = get_db_engine() fund_pct_df = pd.read_sql(sql_str, engine, params={'scheme_id': str(scheme_id)}) wind_code_list = list( fund_pct_df['wind_code']) # ['XT1410445.XT', 'J11039.OF'] wind_code_count = len(wind_code_list) logger.info('multi factor for %d on wind_code_p with %s', scheme_id, wind_code_list) if wind_code_count <= 0: logger.warning('scheme %s has no sub fund list', scheme_id) return do_fund_multi_factor_by_wind_code_list(wind_code_list)
def calc_fof_index(wind_code_p): """ 计算FOF产品净值走势 根据持仓资金走势按比例计算权重组合收益指数 :param wind_code_p: :return: """ engine = get_db_engine() # sql_str = 'select wind_code, date_adj, invest_scale from fof_fund_pct where wind_code_p = %s' sql_str = """select ffm.wind_code, date_adj, invest_scale from fof_fund_pct ffp, fund_essential_info ffm where ffp.wind_code_p = %s and ffp.wind_code_s = ffm.wind_code_s""" data_df = pd.read_sql(sql_str, engine, params=[wind_code_p], parse_dates=['date_adj']) fund_inv_df = data_df.pivot(columns='date_adj', index='wind_code', values='invest_scale') fund_inv_df.columns = [dt.date() for dt in fund_inv_df.columns] nav_index_pct_all_s = None for dt in fund_inv_df.columns: fund_s = fund_inv_df[dt] tot_inv = fund_s.sum() wind_code_dic = { wind_code: float(invest_scale) / tot_inv for wind_code, invest_scale in fund_s.items() if not np.isnan(invest_scale) } nav_index_pct_s = calc_wind_code_weighted_index(wind_code_dic, dt) if nav_index_pct_all_s is None: nav_index_pct_all_s = nav_index_pct_s else: nav_index_pct_all_s = nav_index_pct_all_s[ nav_index_pct_all_s.index < dt].append( nav_index_pct_s[nav_index_pct_s.index >= dt]) nav_index_pct_all_s.drop_duplicates(inplace=True) return nav_index_pct_all_s
def clean_fund_nav(date_str): """ wind数据库中存在部分数据净值记录前后不一致的问题 比如:某日记录净值 104,次一后期净值变为 1.04 导致净值收益率走势出现偏差 此脚本主要目的在于对这种偏差进行修正 :param date_str: :return: """ sql_str = """select fn_before.wind_code, fn_before.nav_date nav_date_before, fn_after.nav_date nav_date_after, fn_before.nav_acc nav_acc_before, fn_after.nav_acc nav_acc_after, fn_after.nav_acc / fn_before.nav_acc nav_acc_pct from fund_nav fn_before, fund_nav fn_after, ( select wind_code, max(if(nav_date<%s, nav_date, null)) nav_date_before, min(if(nav_date>=%s, nav_date, null)) nav_date_after from fund_nav group by wind_code having nav_date_before is not null and nav_date_after is not null ) fn_date where fn_before.nav_date = fn_date.nav_date_before and fn_before.wind_code = fn_date.wind_code and fn_after.nav_date = fn_date.nav_date_after and fn_after.wind_code = fn_date.wind_code and fn_after.nav_acc / fn_before.nav_acc < 0.5 """ engine = get_db_engine() data_df = pd.read_sql(sql_str, engine, params=[date_str, date_str]) data_count = data_df.shape[0] if data_count == 0: logger.info('no data for clean on %s', date_str) return logger.info('\n%s', data_df) data_list = data_df.to_dict(orient='records') with get_db_session(engine) as session: for content in data_list: wind_code = content['wind_code'] nav_date_before = content['nav_date_before'] logger.info('update wind_code=%s nav_date<=%s', wind_code, nav_date_before) sql_str = "update fund_nav set nav = nav/100, nav_acc = nav_acc/100 where wind_code = :wind_code and nav_date <= :nav_date" session.execute(sql_str, params={ 'wind_code': wind_code, 'nav_date': nav_date_before })
def update_fundnav_by_csv2(file_path, mode='delete_insert'): df = pd.read_csv(file_path) df.set_index('nav_date_week', inplace=True) df_fund = df.unstack().reset_index().dropna() col_name_list = list(df_fund.columns) df_fund.rename(columns={ col_name_list[0]: 'wind_code', col_name_list[1]: 'nav_date', col_name_list[2]: 'nav' }, inplace=True) df_fund[['trade_date', 'nav_acc']] = df_fund[['nav_date', 'nav']] if mode == 'delete_insert': df_fund.set_index(['wind_code', 'trade_date'], inplace=True) # df_fund['nav_acc'] = df_fund['nav'] table_name = 'fund_nav_tmp' sql_str = 'delete from fund_nav_tmp where wind_code in (%s)' % ( "'" + "', '".join(df.columns) + "'") engine = get_db_engine() with get_db_session(engine) as session: session.execute(sql_str) df_fund.to_sql(table_name, engine, if_exists='append', dtype={ 'wind_code': String(20), 'trade_date': Date, 'nav_date': Date, 'nav': FLOAT, 'nav_acc': FLOAT, }) elif mode == 'replace_insert': data_list = list(df_fund.T.to_dict().values()) sql_str = "REPLACE INTO fund_nav_tmp (wind_code, trade_date, nav, nav_acc, nav_date) values (:wind_code, :trade_date, :nav, :nav_acc, :nav_date)" with get_db_session() as session: session.execute(sql_str, data_list) pass else: raise ValueError('mode="%s" is not available' % mode)
def import_wind_index_daily(): """导入指数数据""" engine = get_db_engine() yesterday = date.today() - timedelta(days=1) query = pd.read_sql_query( 'select wind_code,index_name, max(trade_date) as latest_date from wind_index_daily group by wind_code', engine) query.set_index('wind_code', inplace=True) with get_db_session(engine) as session: # 获取市场有效交易日数据 sql_str = "select trade_date from wind_trade_date where trade_date > '2005-1-1'" table = session.execute(sql_str) trade_date_sorted_list = [t[0] for t in table.fetchall()] trade_date_sorted_list.sort() date_to = get_last(trade_date_sorted_list, lambda x: x <= yesterday) logger.info('%d indexes will been import', query.shape[0]) for code in query.index: date_from = (query.loc[code, 'latest_date'] + timedelta(days=1)) date_from = get_first(trade_date_sorted_list, lambda x: x >= date_from) if date_from is None or date_to is None or date_from > date_to: continue index_name = query.loc[code, 'index_name'] temp = rest.wsd(code, "open,high,low,close,volume,amt,turn,free_turn", date_from, date_to) temp.reset_index(inplace=True) temp.rename(columns={'index': 'trade_date'}, inplace=True) temp.trade_date = pd.to_datetime(temp.trade_date) temp.trade_date = temp.trade_date.map(lambda x: x.date()) temp['wind_code'] = code temp['index_name'] = index_name temp.set_index(['wind_code', 'trade_date'], inplace=True) temp.to_sql('wind_index_daily', engine, if_exists='append', index_label=['wind_code', 'trade_date'], dtype={ 'wind_code': String(20), 'trade_date': Date, }) logger.info('Success update %s - %s' % (code, index_name))
def get_stg_index(index_name, wind_code): """ 获取策略指数及对比指数的的对比走势 :param index_name: 策略指数名称 :param wind_code: 对比的指数wind_code :return: """ sql_str = """select nav_date, value, close from (SELECT * FROM strategy_index_val where index_name=%s) stg left outer join (select * from wind_index_daily where wind_code = %s) idx on stg.nav_date = idx.trade_date order by nav_date""" engine = get_db_engine() alpha_index_df = pd.read_sql(sql_str, engine, params=[index_name, wind_code]) alpha_index_df[ wind_code] = alpha_index_df['CLOSE'] / alpha_index_df['CLOSE'][0] alpha_index_df.ffill(inplace=True) alpha_index_df.rename(columns={'value': index_name}, inplace=True) return alpha_index_df[['nav_date', index_name, wind_code]]
def cal_factor_profit(startdate, enddate): # 获取股票暴露信息以及起止日期收益率信息 sql_origin = '''select stock_facexposure.*, stock_facexposure.Trade_Date, sti1.Trade_Date as date_start,sti1.close AS close_start, sti2.Trade_Date as date_end, sti2.close as close_end from stock_facexposure, wind_stock_daily as sti2 , wind_stock_daily as sti1 where stock_facexposure.Trade_Date= '%s' and sti1.wind_code = stock_facexposure.stock_code and sti2.wind_code = stock_facexposure.stock_code and sti1.Trade_Date = '%s' and sti2.Trade_Date = '%s' ''' sql_pd = sql_origin % (startdate, startdate, enddate) engine = get_db_engine() origin_data_df = pd.read_sql(sql_pd, engine) # 剔除价格没变的股票 origin_data_sameclose = origin_data_df[origin_data_df['close_end'] == origin_data_df['close_start']].index origin_data_df.drop(origin_data_sameclose, axis=0, inplace=True) # 获取 因子数据(因子变量存储以fac开头) data_columns = origin_data_df.columns.values fac_names = [i for i in data_columns if i[:3] == 'fac'] fac_data = origin_data_df[fac_names].copy() # 对因子进行标准化归一化去极值处理 fac_data = (fac_data - fac_data.mean()) / fac_data.std() fac_data[fac_data > 3] = 3 fac_data[fac_data < -3] = -3 # 增加常数项 fac_data['Const'] = 1 # 计算因子收益率 x_traindata = np.array(fac_data) y_traindata = np.array(origin_data_df['close_end'] / origin_data_df['close_start'] - 1) origin_data_df['Profit'] = y_traindata facprofit = np.linalg.lstsq(x_traindata, y_traindata)[0] fac_names = fac_data.columns return facprofit, fac_names, fac_data, origin_data_df
def get_fof_nav_rr_between(wind_code, from_date, to_date): """ 获取母基金及全部子基金以及指数对比走势 时间序列以母基金为准 指数做归一处理,母基金及子基金显示原始净值 :param wind_code: :param from_date: :param to_date: :return: """ from_date = str_2_date(from_date) to_date = str_2_date(to_date) engine = get_db_engine() # 获取母基金净值 fund_p_sql_str = 'select nav_date, nav_acc from fund_nav where wind_code = %s ' fund_p_data_df = pd.read_sql(fund_p_sql_str, engine, params=[wind_code]) fund_p_nav_df = fund_p_data_df.rename(columns={ 'nav_acc': wind_code }).set_index('nav_date') # # 获取子基金净值 sql_str = """select fn.wind_code, nav_date_friday as nav_date, nav_acc from fund_nav fn, ( select fund_nav.wind_code, adddate(nav_date, 4 - weekday(nav_date)) as nav_date_friday, max(nav_date) as nav_date_max from fund_nav, (select wind_code_s from fof_fund_pct f1, (select wind_code_p, max(date_adj) date_max from fof_fund_pct group by wind_code_p) f2 where f1.wind_code_p = %s and f2.date_max = f1.date_adj and f1.wind_code_p = f2.wind_code_p) wind_t where fund_nav.wind_code = wind_t.wind_code_s group by wind_code, nav_date_friday ) fnf where fn.wind_code = fnf.wind_code and fn.nav_date = fnf.nav_date_max order by nav_date_friday""" fund_data_df = pd.read_sql(sql_str, engine, params=[wind_code]) if fund_data_df.shape[0] > 0: logger.info('get fund nav data %s', fund_data_df.shape) fund_nav_df = fund_data_df.pivot(index='nav_date', columns='wind_code', values='nav_acc') # fund_nav_pct_df = fund_nav_df.pct_change() # fund_nav_rr_df = (fund_nav_pct_df + 1).cumprod() fof_nav_df = fund_p_nav_df.merge(fund_nav_df, how='left', left_index=True, right_index=True) logger.info('merge fund nav data %s', fof_nav_df.shape) else: fof_nav_df = fund_p_nav_df logger.info('get fof nav data %s', fof_nav_df.shape) # logger.info(fof_nav_df.shape) # 获取指数数据 index_code = '000300.SH' sql_str = "select trade_date, close from wind_index_daily where wind_code=%s order by trade_date" hs300_df = pd.read_sql(sql_str, engine, params=[index_code]) # , parse_dates=['trade_date'] # hs300_df.set_index('trade_date', inplace=True) hs300_df.rename(columns={'close': index_code}, inplace=True) hs300_df.set_index('trade_date', inplace=True) # df = pd.concat([fund_df, hs300_df], axis=1, join='inner') # fund_tmp_df = fof_nav_df.reset_index() # fund_tmp_df = fund_tmp_df.rename(columns={'nav_date': 'trade_date'}) # df = pd.merge(fof_nav_df, hs300_df, how='left') # df.set_index('trade_date', inplace=True) # 向左合并对齐 df = pd.merge(fof_nav_df, hs300_df, how='left', left_index=True, right_index=True) # df.ffill(inplace=True) # 取日期区间数据 fund_rr_df = df.iloc[(from_date <= df.index) & (df.index <= to_date)].copy() if fund_rr_df.shape[0] == 0: return {"fund_df": None, "rr_latest_s": None, "date_latest": None} fund_rr_dic = {} fund_rr_latest_dic = {} wind_code_list = list(fund_rr_df.columns) for code in wind_code_list: if code == wind_code: fund_nav_s = fund_rr_df[code] fund_rr_latest_dic[code] = fund_nav_s.pct_change().iloc[-1] else: fund_nav_s = fund_rr_df.pop(code) # fund_pct_s = fund_rr_df[code] if fund_nav_s.shape[0] == 0: continue is_not_nan = fund_nav_s.index[~np.isnan(fund_nav_s)] date_range = (min(is_not_nan), max(is_not_nan)) fund_nav_s = fund_nav_s[date_range[0]:date_range[1]].interpolate() fund_pct_s = fund_nav_s.pct_change().fillna(0) fund_rr_s = (fund_pct_s + 1).cumprod() fund_rr_df = fund_rr_df.merge(pd.DataFrame(fund_rr_s), how='left', left_index=True, right_index=True) fund_rr_latest_dic[code] = fund_pct_s.iloc[-1] rr_latest_s = pd.Series(fund_rr_latest_dic) if fund_rr_df.shape[0] == 0: date_latest = None else: date_latest = fund_rr_df.index[-1] return { "fund_df": fund_rr_df, "rr_latest_s": rr_latest_s, "date_latest": date_latest }
def update_fof_stg_pct(wind_code_p): """ 根据子基金投资额及子基金策略比例调整fof基金总体策略比例 :param wind_code_p: fof基金代码 :return: """ # 获取子基金投资额 # sql_str = "select wind_code_s, date_adj, invest_scale from fof_fund_pct where wind_code_p = %s" sql_str = """select ffm.wind_code, date_adj, sum(invest_scale) invest_scale from fof_fund_pct ffp, fund_essential_info ffm where ffp.wind_code_p = %s and ffp.wind_code_s = ffm.wind_code_s GROUP BY ffm.wind_code, date_adj""" engine = get_db_engine() data_df = pd.read_sql(sql_str, engine, params=[wind_code_p]) if data_df.shape[0] == 0: logger.warning('%s 没有找到子基金策略信息') return fof_fund_df = data_df.pivot(columns='wind_code', index='date_adj', values='invest_scale') fof_fund_pct_df = fof_fund_df.fillna(0) / fof_fund_df.sum( axis=1).values.repeat(fof_fund_df.shape[1]).reshape(fof_fund_df.shape) # 获取子基金策略比例 sql_str = """select DISTINCT ffm.wind_code, stg_code, trade_date, stg_pct from fund_stg_pct sp, fund_essential_info ffm, (select wind_code_s from fof_fund_pct where wind_code_p = %s group by wind_code_s) fp where sp.wind_code = ffm.wind_code and ffm.wind_code_s = fp.wind_code_s;""" data_df = pd.read_sql(sql_str, engine, params=[wind_code_p]) fund_stg_df = data_df.set_index(['wind_code', 'stg_code', 'trade_date']).unstack().T.copy() # fund_stg_df.set_index(fund_stg_df.index.levels[1], inplace=True) # fund_stg_df.index = [dt.date() for dt in fund_stg_df.index] fund_stg_df.index = [dt.date() for dt in fund_stg_df.index.levels[1]] add_df = pd.DataFrame( columns=fund_stg_df.columns, index=list(set(fof_fund_pct_df.index) - set(fund_stg_df.index))) fund_stg_df = fund_stg_df.append(add_df).sort_index() for code in fund_stg_df.columns.levels[0]: is_fill_rows = fund_stg_df[code].sum(axis=1) != 0 for stg_code in fund_stg_df[code].columns: is_fill_rows_col = np.isnan(fund_stg_df[code][stg_code]) fill_df = fund_stg_df.loc[is_fill_rows & is_fill_rows_col, code] fill_df[stg_code] = 0 fund_stg_df.loc[is_fill_rows & is_fill_rows_col, code] = fill_df.values fund_stg_df.ffill(inplace=True) fund_stg_df.fillna(0, inplace=True) # 按日循环、调整各基金、各策略比例 adj_date_last = None fund_stg_df_index = fund_stg_df.index wind_code_list = list(fund_stg_df.columns.levels[0]) fof_fund_df_index = fof_fund_pct_df.index for trade_date in fund_stg_df_index: if trade_date in fof_fund_df_index: adj_date_last = trade_date if adj_date_last is None: fund_stg_df.loc[trade_date, :] = 0 continue for code in wind_code_list: fund_stg_df.loc[trade_date, code] = ( fund_stg_df.loc[trade_date, code] * fof_fund_pct_df[code][adj_date_last]).values fund_stg_pct_df = fund_stg_df.sum(level=1, axis=1) fund_stg_pct_df = fund_stg_pct_df.drop( fund_stg_pct_df.index[fund_stg_pct_df.sum(axis=1) == 0]) fund_stg_pct_df = fund_stg_pct_df.unstack().reset_index() fund_stg_pct_df['wind_code'] = wind_code_p fund_stg_pct_df.rename(columns={ 'level_1': 'trade_date', 0: 'stg_pct' }, inplace=True) # fund_stg_pct_df.set_index('wind_code', inplace=True) # 清除原有记录 logger.info('调整基金策略比例信息:') for trade_date, df in fund_stg_pct_df.groupby('trade_date'): logger.info('交易日:%s 策略比例如下:\n%s', trade_date, df) with get_db_session(engine) as session: session.execute( 'delete from fund_stg_pct where wind_code = :wind_code', params={'wind_code': wind_code_p}) # 插入最新fof基金策略比例 fund_stg_pct_df.to_sql('fund_stg_pct', engine, if_exists='append', index=False)