def calculate(idx): dfs = pd.DataFrame() PEIndex = sf.PEIndex(idx) first_year = PEIndex.firstyear result_r = {} components_num = {} for year in range(first_year, process_date.year + 1): if year == process_date.timetuple().tm_year: month = process_date.month else: month = 12 sql_i = sf.SQL_PEIndex(PEIndex.idx, year).yeardata_m conn = engine_rd.connect() date_s = dt.date(year, month, 1) - dt.timedelta(1) su.tic("Getting Data") d = pd.read_sql(sql_i, conn) conn.close() su.tic("Preprocessing...") d["statistic_date"] = d["statistic_date"].apply( lambda x: time.mktime(x.timetuple())) d_dd = d.drop_duplicates("fund_id") idx_slice = d_dd.index.tolist() idx_slice.append(len(d)) t_std = tu.timeseries_std(dt.datetime(year, month, 10), month, 12, 1, use_lastday=True) t_std1 = t_std[:-1] su.tic("Grouping...") ds = [ d[idx_slice[i]:idx_slice[i + 1]] for i in range(len(idx_slice) - 1) ] ts = [x["statistic_date"].tolist() for x in ds] navs = [x["nav"].tolist() for x in ds] su.tic("Matching...") matchs1 = [tu.outer_match4index_f7(x, t_std1, False) for x in ts] matchs2 = [tu.outer_match4index_b7(x, t_std1) for x in ts] matchs3 = [tu.outer_match4index_m(x, t_std, False) for x in ts] matchs = [ su.merge_result(x1, x2, x3) for x1, x2, x3 in zip(matchs1, matchs2, matchs3) ] su.tic("Getting Result...") t_matchs = [x[0] for x in matchs] t_matchs = [tu.tr(x) for x in t_matchs] idx_matchs = [x[1] for x in matchs] nav_matchs = [[ navs[i][idx] if idx is not None else None for idx in idx_matchs[i].values() ] for i in range(len(idx_matchs))] su.tic("Calculating Index...") nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix() rs = nvs[:-1] / nvs[1:] - 1 rs[rs > 30] = np.nan rs[rs < -1] = np.nan r = np.nanmean(rs, axis=1) r[np.isnan(r)] = 0 result_r[year] = r components_num[year] = np.sum(~np.isnan(rs), axis=1) su.tic("Year:{0}, Done...".format(year)) values_r = [] values_num = [] for year in range(first_year, process_date.timetuple().tm_year + 1): if len(values_r) == 0: values_r = result_r[year].tolist()[::-1] values_num = components_num[year].tolist()[::-1] else: values_r.extend(result_r[year].tolist()[::-1]) values_num.extend(components_num[year].tolist()[::-1]) adjust_periods = 1 date_tmp = date_s - relativedelta.relativedelta(months=adjust_periods + 1) date_tmp = dt.date(date_tmp.year, date_tmp.month, cld.monthrange(date_tmp.year, date_tmp.month)[1]) sql_base = "SELECT index_value FROM fund_month_index_static WHERE index_id = '{idx_id}' \ AND statistic_date = '{sd}'".format(idx_id=PEIndex.id, sd=date_tmp) base = pd.read_sql(sql_base, engine_rd).get_value(0, "index_value") result = (np.array(values_r)[-(adjust_periods + 1) - 1:] + 1).cumprod() * base result = result.tolist() values_num = values_num[-(adjust_periods + 1) - 1:] tag = tu.timeseries_std( dt.datetime(year, month + 1, 10), tu.periods_in_interval(dt.datetime(year, month + 1, 10), dt.datetime(first_year, 1, 10), 12), 12)[::-1] tag = [dt.date.fromtimestamp(x - 864000) for x in tag] tag = tag[-(adjust_periods + 1) - 1:] op = pd.DataFrame(list(zip(tag, result, values_num))) op.columns = ["statistic_date", "index_value", "funds_num"] cols = [ "index_id", "index_name", "typestandard_code", "typestandard_name", "type_code", "type_name", "stype_code", "stype_name", "index_method", "data_source", "data_source_name" ] values = [ PEIndex.id, PEIndex.name, PEIndex.typestandard["code"], PEIndex.typestandard["name"], PEIndex.type["code"], PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1, 0, "私募云通" ] col_dict = dict(zip(cols, values)) for col, val in col_dict.items(): op[col] = val dfs = dfs.append(op[:-1]) return dfs
idx_slice.append(len(d)) ids = d_dd["fund_id"].tolist() last_monday = date_s - dt.timedelta( cld.weekday(date_s.year, date_s.month, date_s.day)) # # t_std = T.timeseries_std(last_monday, T.periods_in_interval(last_monday, dt.date(year-1,12,31), 12)) # t_std = TU.timeseries_std(last_monday, "y", 52, extend=1) # if year == first_date.timetuple().tm_year: t_std = t_std[:-1] # tic("Slicing") t_std_long = TU.timeseries_std( last_monday, TU.periods_in_interval(last_monday, dt.date(year - 1, 11, 30), 12)) t_std_long_p1m = [(x + relativedelta(months=1)).timestamp() for x in TU.tr(t_std_long)] real_p1m = compare(t_min, t_std_long) # 实际最早日期和标准序列日期比较 p1m_std = compare(t_std_long_p1m, t_std) # 加一个月的标准序列日期和标准序列日期比较 data_used = [p1m_std[x - 1] for x in real_p1m] tic("Grouping...") ds = [d[idx_slice[i]:idx_slice[i + 1]] for i in range(len(idx_slice) - 1)] ts = [x["statistic_date"].tolist() for x in ds] navs = [x["nav"].tolist() for x in ds] tic("Matching...") matchs = [TU.outer_match4index_w(x, t_std, False) for x in ts] tic("Getting Result...") t_matchs = [x[0] for x in matchs] t_matchs = [TU.tr(x) for x in t_matchs]
def calculate(idx, export_path=None): dfs = pd.DataFrame() PEIndex = sf.PEIndex(idx) first_date = PEIndex.firstmonday result_r = {} components_num = {} components = {} for year in range(first_date.timetuple().tm_year, _process_date.year + 1): if year == _process_date.year: month = _process_date.month day = tu.date_of_weekday(_process_date, 0, (0, 0)).day if day > _process_date.day: # 修正周一在上个月,跨月后产生的日期问题 month -= 1 else: month = 12 day = 31 date_s = dt.date(year, month, day) # sql_i = sf.SQL_PEIndex(idx, year).yeardata_w["nv"] sql_mindate = sf.SQL_PEIndex(idx, year).yeardata_w["t_min"] conn = engine_rd.connect() su.tic("Getting Data") d = pd.read_sql(sql_i, conn) d.index = range(len(d)) t_min = pd.read_sql(sql_mindate, conn)["statistic_date_earliest"].tolist() t_min = [time.mktime(x.timetuple()) for x in t_min] # conn.close() su.tic("Preprocessing...") d["statistic_date"] = d["statistic_date"].apply( lambda x: time.mktime(x.timetuple())) d_dd = d.drop_duplicates("fund_id") idx_slice = d_dd.index.tolist() idx_slice.append(len(d)) ids = d_dd["fund_id"].tolist() last_monday = date_s - dt.timedelta( cld.weekday(date_s.year, date_s.month, date_s.day)) # t_std = tu.timeseries_std(last_monday, "a", 52, extend=1) # if year == first_date.timetuple().tm_year: t_std = t_std[:-1] # su.tic("Slicing") t_std_long = tu.timeseries_std( last_monday, tu.periods_in_interval(last_monday, dt.date(year - 1, 11, 30), 12)) t_std_long_p1m = [(x + relativedelta(months=1)).timestamp() for x in tu.tr(t_std_long)] real_p1m = su.compare(t_min, t_std_long) # 实际最早日期和标准序列日期比较 p1m_std = su.compare(t_std_long_p1m, t_std) # 加一个月的标准序列日期和标准序列日期比较 data_used = [p1m_std[x - 1] for x in real_p1m] su.tic("Grouping...") ds = [ d[idx_slice[i]:idx_slice[i + 1]] for i in range(len(idx_slice) - 1) ] ts = [x["statistic_date"].tolist() for x in ds] navs = [x["nav"].tolist() for x in ds] su.tic("Matching...") matchs = [tu.outer_match4index_w(x, t_std, False) for x in ts] su.tic("Getting Result...") # t_matchs = [x[0] for x in matchs] # t_matchs = [tu.tr(x) for x in t_matchs] idx_matchs = [x[1] for x in matchs] nav_matchs = [[ navs[i][idx] if idx is not None else None for idx in idx_matchs[i].values() ] for i in range(len(idx_matchs))] su.tic("Calculating Index...") nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix() print(nvs.shape) for i in range(len(ids)): nvs[data_used[i] + 1:, i] = np.nan rs = nvs[:-1] / nvs[1:] - 1 rs[rs > 10] = np.nan rs[rs < -1] = np.nan r = np.nanmean(rs, axis=1) r[np.isnan(r)] = 0 result_r[year] = r components_num[year] = np.sum(~np.isnan(rs), axis=1) # log samples tmp = pd.DataFrame(nvs, columns=ids).T tmp["fund_id"] = tmp.index tmp = tmp[[tmp.columns[-1], *tmp.columns[:-1]]] components[year] = tmp su.tic("Year:{0}, Done...".format(year)) values_r = [] values_num = [] for year in range(first_date.timetuple().tm_year, date_s.year + 1): if len(values_r) == 0: values_r = result_r[year].tolist()[::-1] values_num = components_num[year].tolist()[::-1] else: values_r.extend(result_r[year].tolist()[::-1]) values_num.extend(components_num[year].tolist()[::-1]) result = (np.array(values_r) + 1).cumprod() * 1000 result = result.tolist() result.insert(0, 1000) values_num.insert(0, 0) tag = tu.timeseries_std(dt.datetime(year, month, day), tu.periods_in_interval( dt.datetime(year, month, day), dt.datetime(first_date.year, 1, 1), 12), 52, extend=5)[::-1] tag = [x for x in tag if x >= first_date.timestamp()] tag = [dt.date.fromtimestamp(x) for x in tag] # local debug op = pd.DataFrame(list(zip(tag, result, values_num))) op.columns = ["statistic_date", "index_value", "funds_num"] cols = [ "index_id", "index_name", "typestandard_code", "typestandard_name", "type_code", "type_name", "stype_code", "stype_name", "index_method", "data_source", "data_source_name" ] values = [ PEIndex.id, PEIndex.name, PEIndex.typestandard["code"], PEIndex.typestandard["name"], PEIndex.type["code"], PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1, 0, "私募云通" ] col_dict = dict(zip(cols, values)) for col, val in col_dict.items(): op[col] = val dfs = dfs.append(op) if export_path is not None: tmp = tag.copy() for year in sorted(components.keys(), reverse=True): # print(year, len(tmp)) components[year].columns = [ "fund_id", *[tmp.pop() for i in range(len(components[year].columns) - 2)], tmp[-1] ] io.export_to_xl( components, "{sd}_{index_name}_w_samples".format( sd=last_monday.strftime("%Y%m%d"), index_name=PEIndex.id), export_path) return dfs
def calculate(): conn = engine_rd.connect() year, month = yesterday.year, yesterday.month month_range = cld.monthrange(year, month)[1] time_to_fill = sf.Time(dt.datetime(year, month, month_range)) year, month = time_to_fill.year, time_to_fill.month bms_used = [ "hs300", "csi500", "sse50", "ssia", "cbi", "y1_treasury_rate", "nfi" ] sql_bm = sf.SQL.market_index(date=time_to_fill.today, benchmarks=bms_used, whole=True) # Get benchmark prices bm = pd.read_sql(sql_bm, conn) # bm.loc[bm["statistic_date"] == dt.date(1995, 8, 16), "y1_treasury_rate"] = 2.35 bm["y1_treasury_rate"] = bm["y1_treasury_rate"].fillna(method="backfill") bm["y1_treasury_rate"] = bm["y1_treasury_rate"].apply(su.annually2weekly) bm["statistic_date"] = bm["statistic_date"].apply(su.date2tstp) prices_bm = [ bm.dropna(subset=[bm_name])[bm_name].tolist() for bm_name in bms_used ] ts_bm = [ bm.dropna(subset=[bm_name])["statistic_date"].tolist() for bm_name in bms_used ] prices = prices_bm.copy() ts = ts_bm.copy() t_mins_pe_all = sf.PEIndex().firstmonday # 寻找指数中可被计算的 pesid_used = [] for k in t_mins_pe_all: if t_mins_pe_all[k].year < year: pesid_used.append(k) elif t_mins_pe_all[k].year == year: if t_mins_pe_all[k].month <= month: pesid_used.append(k) else: continue else: continue prices_pe = [] ts_pe = [] pes_used = [] for idx in pesid_used: PE = sf.PEIndex(idx) pes_used.append(PE.id) sql_pe = sf.SQL.pe_index(time_to_fill.today, index_id=PE.id, freq="w") pe = pd.read_sql(sql_pe, conn) pe["statistic_date"] = pe["statistic_date"].apply(su.date2tstp) prices_pe.append(pe["index_value"].tolist()) ts_pe.append(pe["statistic_date"].tolist()) conn.close() prices.extend(prices_pe) ts.extend(ts_pe) t_mins_tstp = [min(x) for x in ts] t_mins = tu.tr(t_mins_tstp) intervals = table.intervals intervals1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] intervals3 = [0, 1, 2, 3, 4, 5, 6, 10, 11] index_used = bms_used.copy() index_used.extend(pes_used) index_name = { "FI01": "私募全市场指数", "FI02": "阳光私募指数", "FI03": "私募FOF指数", "FI04": "股票多头策略私募指数", "FI05": "股票多空策略私募指数", "FI06": "市场中性策略私募指数", "FI07": "债券基金私募指数", "FI08": "管理期货策略私募指数", "FI09": "宏观策略私募指数", "FI10": "事件驱动策略私募指数", "FI11": "相对价值策略私募指数", "FI12": "多策略私募指数", "FI13": "组合投资策略私募指数", "hs300": "沪深300指数", "csi500": "中证500指数", "sse50": "上证50指数", "ssia": "上证A股指数", "cbi": "中债指数", "nfi": "南华商品指数", "y1_treasury_rate": "y1_treasury_rate" } result = [] for mday in range(1, yesterday.day + 1): print("Day {0}: {1}".format(mday, dt.datetime.now())) date_s = sf.Time(dt.datetime(year, month, mday)) # Generate statistic_date # t_stds = [ tu.timeseries_std(date_s.today, interval, 52, extend=1) for interval in intervals ] # 标准序列 t_std_lens = [len(x) - 1 for x in t_stds] # 标准序列净值样本个数 t_std_week = tu.timeseries_std(date_s.today, "w", 52, 1) # 标准序列_本周 ts_std_total = [ tu.timeseries_std(date_s.today, tu.periods_in_interval(date_s.today, t_min, 12), extend=4) for t_min in t_mins ] # 标准序列_成立以来 ts_std_total = [ t_std_total[:len([x for x in t_std_total if x >= t_min]) + 1] for t_std_total, t_min in zip(ts_std_total, t_mins_tstp) ] # 基准指数的标准序列_成立以来 matchs = [ tu.outer_match4indicator_w(t, t_std_all, False) for t, t_std_all in zip(ts, ts_std_total) ] idx_matchs = [x[1] for x in matchs] prices_total = [[ price[ix] if ix is not None else None for ix in idx.values() ] for price, idx in zip(prices, idx_matchs)] # 基准指标的收益率_不同频率 rs_total = [ fi.gen_return_series(price_total) for price_total in prices_total ] # 无风险国债的收益率 r_f_total = prices_total[5][ 1:] # the list `y1_treasury_rate` in prices_total is not price, but return r_f_total = pd.DataFrame(r_f_total).fillna( method="backfill")[0].tolist() r_f_all = [r_f_total[:length - 1] for length in t_std_lens] r_f_all.append(r_f_total) # for i in range(len(index_used)): for i in range(len(index_used)): if index_name[index_used[i]] == "y1_treasury_rate": continue price_all = [] r_all = [] for j in range(7): if dt.date.fromtimestamp( (t_mins[i] + relativedelta(months=intervals[j]) ).timestamp()) <= date_s.today: price_all.append(prices_total[i][:t_std_lens[j]]) r_all.append(rs_total[i][:t_std_lens[j] - 1]) else: price_all.append([]) r_all.append([]) for j in range(7, 11): price_all.append(prices_total[i][:t_std_lens[j]]) if rs_total[i] is not None: r_all.append(rs_total[i][:t_std_lens[j] - 1]) else: r_all.append([]) price_all.append(prices_total[i]) r_all.append(rs_total[i]) price_all1 = [price_all[i] for i in intervals1] price_all3 = [price_all[i] for i in intervals3] r_all1 = [r_all[i] for i in intervals1] r_all3 = [r_all[i] for i in intervals3] r_f_all1 = [r_f_all[i] for i in intervals1][:-1] r_f_all3 = [r_f_all[i] for i in intervals3][:-1] r_f_all1.append(r_f_all[-1][:len(r_all[-1])]) r_f_all3.append(r_f_all[-1][:len(r_all[-1])]) ir = [fi.accumulative_return(price) for price in price_all1] ir_a = [fi.return_a(r) for r in r_all1] stdev_a = [fi.standard_deviation_a(r) for r in r_all3] dd_a = [ fi.downside_deviation_a(r, r_f) for r, r_f in zip(r_all3, r_f_all3) ] mdd = [fi.max_drawdown(price)[0] for price in price_all3] sharpe_a = [ fi.sharpe_a(r, r_f) for r, r_f in zip(r_all3, r_f_all3) ] calmar_a = [ fi.calmar_a(price, r_f) for price, r_f in zip(price_all3, r_f_all3) ] sortino_a = [ fi.sortino_a(r, r_f) for r, r_f in zip(r_all3, r_f_all3) ] p_earning_weeks = [fi.periods_positive_return(r) for r in r_all3] n_earning_weeks = [fi.periods_npositive_return(r) for r in r_all3] con_rise_weeks = [fi.periods_continuous_rise(r)[0] for r in r_all3] con_fall_weeks = [fi.periods_continuous_fall(r)[0] for r in r_all3] tmp = [ ir, ir_a, stdev_a, dd_a, mdd, sharpe_a, calmar_a, sortino_a, p_earning_weeks, n_earning_weeks, con_rise_weeks, con_fall_weeks ] result_i = [index_used[i], index_name[index_used[i]], date_s.today] for x in tmp: result_i.extend(x) result.append(result_i) df = pd.DataFrame(result) df[list(range(3, 117))] = df[list(range(3, 117))].astype(np.float64) df[list(range(3, 117))] = df[list(range(3, 117))].apply(lambda x: round(x, 6)) df.columns = columns df.index_id = df.index_id.apply(lambda x: x.upper()) return df
def calculate(idx, export_path=None): dfs = pd.DataFrame() PEIndex = sf.PEIndex(idx) first_year = PEIndex.firstyear result_r = {} components_num = {} components = {} for year in range(first_year, process_date.year + 1): if year == process_date.timetuple().tm_year: month = process_date.month else: month = 12 sql_i = sf.SQL_PEIndex(PEIndex.idx, year).yeardata_m conn = engine_rd.connect() su.tic("Getting Data") d = pd.read_sql(sql_i, conn) conn.close() su.tic("Preprocessing...") d["statistic_date"] = d["statistic_date"].apply( lambda x: time.mktime(x.timetuple())) d_dd = d.drop_duplicates("fund_id") idx_slice = d_dd.index.tolist() idx_slice.append(len(d)) ids = d_dd["fund_id"].tolist() t_std = tu.timeseries_std(dt.datetime(year, month, 10), month, 12, 1, use_lastday=True) t_std1 = t_std[:-1] su.tic("Grouping...") ds = [ d[idx_slice[i]:idx_slice[i + 1]] for i in range(len(idx_slice) - 1) ] ts = [x["statistic_date"].tolist() for x in ds] navs = [x["nav"].tolist() for x in ds] su.tic("Matching...") matchs1 = [tu.outer_match4index_f7(x, t_std1, False) for x in ts] matchs2 = [tu.outer_match4index_b7(x, t_std1) for x in ts] matchs3 = [tu.outer_match4index_m(x, t_std, False) for x in ts] matchs = [ su.merge_result(x1, x2, x3) for x1, x2, x3 in zip(matchs1, matchs2, matchs3) ] su.tic("Getting Result...") t_matchs = [x[0] for x in matchs] t_matchs = [tu.tr(x) for x in t_matchs] idx_matchs = [x[1] for x in matchs] nav_matchs = [[ navs[i][idx] if idx is not None else None for idx in idx_matchs[i].values() ] for i in range(len(idx_matchs))] su.tic("Calculating Index...") nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix() rs = nvs[:-1] / nvs[1:] - 1 rs[rs > 30] = np.nan rs[rs < -1] = np.nan r = np.nanmean(rs, axis=1) r[np.isnan(r)] = 0 result_r[year] = r components_num[year] = np.sum(~np.isnan(rs), axis=1) # log samples tmp = pd.DataFrame(nvs, columns=ids).T tmp["fund_id"] = tmp.index tmp = tmp[[tmp.columns[-1], *tmp.columns[:-1]]] components[year] = tmp su.tic("Year:{0}, Done...".format(year)) values_r = [] values_num = [] for year in range(first_year, process_date.timetuple().tm_year + 1): if len(values_r) == 0: values_r = result_r[year].tolist()[::-1] values_num = components_num[year].tolist()[::-1] else: values_r.extend(result_r[year].tolist()[::-1]) values_num.extend(components_num[year].tolist()[::-1]) result = (np.array(values_r) + 1).cumprod() * 1000 result = result.tolist() result.insert(0, 1000) values_num.insert(0, 0) # tag = tu.timeseries_std(dt.datetime(year, month + 1, 10), # tu.periods_in_interval(dt.datetime(year, month + 1, 10), dt.datetime(first_year, 1, 10), # 12), 12)[::-1] tag = tu.timeseries_std( dt.datetime(year, month + 1, 10), tu.periods_in_interval(dt.datetime(year, month + 1, 10), dt.datetime(first_year, 1, 10), 12), 12)[::-1] tag = [dt.date.fromtimestamp(x - 864000) for x in tag] op = pd.DataFrame(list(zip(tag, result, values_num))) op.columns = ["statistic_date", "index_value", "funds_num"] cols = [ "index_id", "index_name", "typestandard_code", "typestandard_name", "type_code", "type_name", "stype_code", "stype_name", "index_method", "data_source", "data_source_name" ] values = [ PEIndex.id, PEIndex.name, PEIndex.typestandard["code"], PEIndex.typestandard["name"], PEIndex.type["code"], PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1, 0, "私募云通" ] col_dict = dict(zip(cols, values)) for col, val in col_dict.items(): op[col] = val dfs = dfs.append(op[:-1]) if export_path is not None: tmp = tag.copy() for year in sorted(components.keys(), reverse=True): print(year, len(tmp)) components[year].columns = [ "fund_id", *[tmp.pop() for i in range(len(components[year].columns) - 2)], tmp[-1] ] io.export_to_xl( components, "{sd}_{index_name}_m_samples".format(sd=tag[-2].strftime("%Y%m%d"), index_name=PEIndex.id), export_path) return dfs
def cal_std(fund_ids=None, whole=False): """ 每次计算一个基金 Args: fund_ids: str whole: Returns: """ if whole is True: sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard" else: sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard \ WHERE update_time >= {ut}".format(ut=yesterday) if fund_ids: if 'WHERE' in sql_navs: sql_navs += " AND fund_id in ({})".format(fund_ids) else: sql_navs += " WHERE fund_id in ({})".format(fund_ids) su.tic("Fetching nv Data......") df_nav = pd.read_sql(sql_navs, conn) criterias = [(df_nav["nav"] >= 0.2), (df_nav["added_nav"] >= 0.2), (df_nav["statistic_date"] >= dt.date(1970, 1, 2)), (df_nav["statistic_date"] <= dt.date.today())] su.tic("Preprocessing......") criteria = get_creterias(criterias) df_nav = df_nav.loc[criteria].sort_values(["fund_id", "statistic_date"], ascending=[True, False]) df_nav.index = range(len(df_nav)) ids = df_nav["fund_id"].drop_duplicates().tolist() t_mins = list(df_nav.groupby("fund_id")["statistic_date"].min()) t_mins_tstp = [time.mktime(x.timetuple()) for x in t_mins] t_maxs = list(df_nav.groupby("fund_id")["statistic_date"].max()) t_maxs_tstp = [time.mktime(x.timetuple()) for x in t_maxs] idx4slice = su.idx4slice(df_nav, slice_by="fund_id") navs = su.slice(df_nav, idx4slice, "nav") added_navs = su.slice(df_nav, idx4slice, "added_nav") swanavs = su.slice(df_nav, idx4slice, "swanav") t_reals = su.slice(df_nav, idx4slice, "statistic_date") t_reals_tstp = [] for t_real in t_reals: t_reals_tstp.append([time.mktime(x.timetuple()) for x in t_real]) t_std_alls_w = [ tu.timeseries_std(friday, tu.periods_in_interval(friday, t_min, 12), extend=4) for t_min in t_mins ] # 标准序列_成立以来 t_std_alls_w = [ t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1] for t_std_all, t_min in zip(t_std_alls_w, t_mins_tstp) ] t_std_alls_w = [ t_std_all[-len([x for x in t_std_all if x < t_max]) - 1:] for t_std_all, t_max in zip(t_std_alls_w, t_maxs_tstp) ] t_std_alls_m = [ tu.timeseries_std(date, tu.periods_in_interval(date, t_min, 12), periods_y=12, use_lastday=True, extend=6) for date, t_min in zip(t_maxs, t_mins) ] # 标准序列_成立以来 t_std_alls_m = [ t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1] for t_std_all, t_min in zip(t_std_alls_m, t_mins_tstp) ] su.tic("Matching......") matchs_w = [ tu.outer_match4indicator_w(t_real, t_std) for t_real, t_std in zip(t_reals_tstp, t_std_alls_w) ] idx_matchs_w = [x[1] for x in matchs_w] nav_matchs_w = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(navs, idx_matchs_w)] anav_matchs_w = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(added_navs, idx_matchs_w)] swanav_matchs_w = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(swanavs, idx_matchs_w)] t_matchs_w = [[ t_real[ix] if ix is not None else None for ix in idx.values() ] for t_real, idx in zip(t_reals, idx_matchs_w)] t_matchs_std_w = [ tu.tr(x[:-1], "date") if x is not None else None for x in t_std_alls_w ] matchs_m = [ tu.outer_match4indicator_m(t_real, t_std) for t_real, t_std in zip(t_reals_tstp, t_std_alls_m) ] idx_matchs_m = [x[1] for x in matchs_m] nav_matchs_m = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(navs, idx_matchs_m)] anav_matchs_m = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(added_navs, idx_matchs_m)] swanav_matchs_m = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(swanavs, idx_matchs_m)] t_matchs_m = [[ t_real[ix] if ix is not None else None for ix in idx.values() ] for t_real, idx in zip(t_reals, idx_matchs_m)] t_matchs_std_m = [ tu.tr(x[:-1], "date") if x is not None else None for x in t_std_alls_m ] result_w = { "fund_id": ids, "nav": nav_matchs_w, "added_nav": anav_matchs_w, "swanav": swanav_matchs_w, "statistic_date": t_matchs_w, "statistic_date_std": t_matchs_std_w } result_m = { "fund_id": ids, "nav": nav_matchs_m, "added_nav": anav_matchs_m, "swanav": swanav_matchs_m, "statistic_date": t_matchs_m, "statistic_date_std": t_matchs_std_m } su.tic("Merging Result......") result = {} result["w"] = pd.DataFrame.from_dict(merge_result(result_w, ids)) result["m"] = pd.DataFrame.from_dict(merge_result(result_m, ids)) return result