print("length of Data: {0}".format(len(d))) conn.close() # t_stds = [tu.timeseries_std(date_s.today, interval, periods_y=12, use_lastday=True, extend=1) for interval in intervals] # 标准序列 ###w->m t_std_y5 = t_stds[6] t_stds_len = [len(x) - 1 for x in t_stds] # 标准序列净值样本个数 t_std_alls = [tu.timeseries_std(date_s.today, tu.periods_in_interval(date_s.today, t_min, 12), periods_y=12, use_lastday=True, extend=6) for t_min in t_mins] # 标准序列_成立以来 t_std_alls = [t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1] for t_std_all, t_min in zip(t_std_alls, t_mins_tstp)] # 基金标准序列_成立以来 matchs_all = [tu.outer_match4indicator_m(t_real, t_std_all, False) for t_real, t_std_all in zip(t_reals, t_std_alls)] # 匹配 ###w->m idx_matchs_all = [x[1] for x in matchs_all] t_matchs_all = [[t_real[ix] if ix is not None else None for ix in idx.values()] for t_real, idx in zip(t_reals, idx_matchs_all)] nav_matchs_all = [[nav[ix] if ix is not None else None for ix in idx.values()] for nav, idx in zip(navs, idx_matchs_all)] navs_all_len = dict(zip(range(len(ids_used)), [len(x) for x in nav_matchs_all])) # 实际序列净值样本个数 # 基准指数的标准序列_成立以来 matchs_bm = [tu.outer_match4indicator_m(t_bm, t_std_all, False) for t_std_all in t_std_alls] idx_matchs_bm = [x[1] for x in matchs_bm] price_bm0_all = [[prices_bm[0][ix] if ix is not None else None for ix in idx.values()] for idx in idx_matchs_bm] price_bm1_all = [[prices_bm[1][ix] if ix is not None else None for ix in idx.values()] for idx in idx_matchs_bm]
def calculate(): df_whole = pd.DataFrame() conn = engine_read.connect() year = process_date.year month = process_date.month month_range = cld.monthrange(year, month)[1] time_to_fill = sf.Time(dt.datetime(year, month, month_range)) # year, month = time_to_fill.year, time_to_fill.month # month_range = time_to_fill.month_range sql_bm = sf.SQL.market_index(time_to_fill.today) # Get benchmark prices sql_pe = sf.SQL.pe_index(time_to_fill.today, freq="m") ###w->m bm = pd.read_sql(sql_bm, engine_read) bm["y1_treasury_rate"] = bm["y1_treasury_rate"].fillna(method="backfill") bm["y1_treasury_rate"] = bm["y1_treasury_rate"].apply(su.annually2monthly) bm["statistic_date"] = bm["statistic_date"].apply(su.date2tstp) pe = pd.read_sql(sql_pe, engine_read) pe["statistic_date"] = pe["statistic_date"].apply(su.date2tstp) conn.close() prices_bm = [ bm["hs300"].tolist(), bm["csi500"].tolist(), bm["sse50"].tolist(), bm["cbi"].tolist(), bm["nfi"] ] price_pe = pe["index_value"].tolist() r_tbond = bm["y1_treasury_rate"].tolist() t_bm = bm["statistic_date"].tolist() t_pe = pe["statistic_date"].tolist() intervals = table.intervals intervals5 = [1, 2, 3, 4, 5, 6, 10, 11] intervals6 = [2, 3, 4, 5, 6, 10, 11] result = [] conn = engine_read.connect() # Get Data date_s = sf.Time(process_date - dt.timedelta(process_date.day)) # Generate statistic_date sql_fids_updated = sf.SQL.ids_updated_sd(date_s.today, "om") ids_updated = tuple( x[0] for x in conn.execute(sql_fids_updated).fetchall()) # 找到当月净值有更新的基金 sql_o_updated = "SELECT DISTINCT fom.org_id FROM fund_org_mapping fom \ JOIN org_info oi ON fom.org_id = oi.org_id \ WHERE org_type_code = 1 AND oi.found_date <= '{0}' AND fund_id IN {1}".format( date_s.today - relativedelta(months=3), ids_updated) # 根据净值更新的基金确定需要计算的投顾 o_updated = tuple(x[0] for x in conn.execute(sql_o_updated).fetchall()) sql_fom = "SELECT fom.org_id, fom.fund_id, oi.found_date, oi.org_name FROM fund_org_mapping fom \ JOIN org_info oi ON fom.org_id = oi.org_id \ JOIN fund_info fi ON fom.fund_id = fi.fund_id \ WHERE fom.org_id IN {0} AND fom.org_type_code = 1 AND oi.found_date <= '{1}' AND fi.foundation_date <= '{2}'".format( o_updated, date_s.today - relativedelta(months=3), date_s.today - relativedelta(months=1)) fom = pd.read_sql(sql_fom, conn) # 根据需要计算的投顾找到其旗下管理的所有基金 fid_used = tuple(fom["fund_id"]) sql_fnd = sf.SQL.nav(fid_used) fnd = pd.read_sql(sql_fnd, conn) fnd = fnd.dropna() fnd.index = range(len(fnd)) data = fom.merge(fnd, how="inner", on="fund_id") data = data.sort_values(by=["org_id", "fund_id", "statistic_date"], ascending=[True, True, False]) t_mins = data.groupby(["org_id"])["statistic_date"].min().tolist() t_mins_tstp = [time.mktime(x.timetuple()) for x in t_mins] data["statistic_date"] = data["statistic_date"].apply( lambda x: time.mktime(x.timetuple())) data.index = range(len(data)) ids_o = data["org_id"].drop_duplicates().tolist() names_o = data.drop_duplicates(subset=["org_id"])["org_name"].tolist() idx4slice_o = su.idx4slice(data, "org_id") dfs = [ data[idx4slice_o[i]:idx4slice_o[i + 1]] if i != len(idx4slice_o) - 1 else data[idx4slice_o[i]:] for i in range(len(idx4slice_o) - 1) ] # Proprocess # 标准序列 t_stds = [ tu.timeseries_std(date_s.today, interval, periods_y=12, use_lastday=True, extend=1) for interval in intervals ] t_std_y5 = t_stds[6] t_stds_len = [len(x) - 1 for x in t_stds] # 基金标准序列_成立以来 t_std_alls = [ tu.timeseries_std(date_s.today, tu.periods_in_interval(date_s.today, t_min, 12), periods_y=12, use_lastday=True, extend=6) for t_min in t_mins ] # 标准序列_成立以来 t_std_alls = [ t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1] for t_std_all, t_min in zip(t_std_alls, t_mins_tstp) ] # 基准指数的标准序列_成立以来 matchs_bm = [ tu.outer_match4indicator_m(t_bm, t_std_all, False) for t_std_all in t_std_alls ] idx_matchs_bm = [x[1] for x in matchs_bm] price_bm0_all = [[ prices_bm[0][ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_bm] price_bm1_all = [[ prices_bm[1][ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_bm] price_bm2_all = [[ prices_bm[2][ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_bm] price_bm3_all = [[ prices_bm[3][ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_bm] price_bm4_all = [[ prices_bm[4][ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_bm] matchs_pe = [ tu.outer_match4indicator_m(t_pe, t_std_all, False) for t_std_all in t_std_alls ] idx_matchs_pe = [x[1] for x in matchs_pe] price_pe_all = [[ price_pe[ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_pe] # 基准指标的收益率_成立以来 r_bm0_all = [fi.gen_return_series(x) for x in price_bm0_all] r_bm1_all = [fi.gen_return_series(x) for x in price_bm1_all] r_bm2_all = [fi.gen_return_series(x) for x in price_bm2_all] r_bm3_all = [fi.gen_return_series(x) for x in price_bm3_all] r_bm4_all = [fi.gen_return_series(x) for x in price_bm4_all] r_pe_all = [fi.gen_return_series(x) for x in price_pe_all] tmp = [len(idx_matchs_bm[i]) for i in range(len(idx_matchs_bm))] tmp_id = tmp.index(max(tmp)) tmp_list = [ r_tbond[ix] if ix is not None else None for ix in idx_matchs_bm[tmp_id].values() ] tmp = pd.DataFrame(tmp_list)[0].fillna(method="backfill").tolist() r_f_all = [[ r_tbond[idx[k]] if idx[k] is not None else tmp[k] for k in idx.keys() ] for idx in idx_matchs_bm] r_f_all = [x[1:] for x in r_f_all] # 基准指标的收益率_不同频率 matchs_bm = tu.outer_match4indicator_m(t_bm, t_std_y5, False) # 基准指数标准序列_成立以来 matchs_pe = tu.outer_match4indicator_m(t_pe, t_std_y5, False) idx_matchs_bm = matchs_bm[1] idx_matchs_pe = matchs_pe[1] price_bm0_y5 = [ prices_bm[0][ix] if ix is not None else None for ix in idx_matchs_bm.values() ] price_bm1_y5 = [ prices_bm[1][ix] if ix is not None else None for ix in idx_matchs_bm.values() ] price_bm2_y5 = [ prices_bm[2][ix] if ix is not None else None for ix in idx_matchs_bm.values() ] price_bm3_y5 = [ prices_bm[3][ix] if ix is not None else None for ix in idx_matchs_bm.values() ] price_bm4_y5 = [ prices_bm[4][ix] if ix is not None else None for ix in idx_matchs_bm.values() ] price_pe_y5 = [ price_pe[ix] if ix is not None else None for ix in idx_matchs_pe.values() ] # 基准指标的收益率_不同频率 r_bm0_y5 = fi.gen_return_series(price_bm0_y5) r_bm1_y5 = fi.gen_return_series(price_bm1_y5) r_bm2_y5 = fi.gen_return_series(price_bm2_y5) r_bm3_y5 = fi.gen_return_series(price_bm3_y5) r_bm4_y5 = fi.gen_return_series(price_bm4_y5) r_pe_y5 = fi.gen_return_series(price_pe_y5) r_f_y5 = [ r_tbond[ix] if ix is not None else None for ix in idx_matchs_bm.values() ] r_f_y5 = r_f_y5[1:] rs_bm0 = [r_bm0_y5[:length - 1] for length in t_stds_len] rs_bm1 = [r_bm1_y5[:length - 1] for length in t_stds_len] rs_bm2 = [r_bm2_y5[:length - 1] for length in t_stds_len] rs_bm3 = [r_bm3_y5[:length - 1] for length in t_stds_len] rs_bm4 = [r_bm4_y5[:length - 1] for length in t_stds_len] rs_pe = [r_pe_y5[:length - 1] for length in t_stds_len] rs_f = [r_f_y5[:length - 1] for length in t_stds_len] benchmark = { 1: rs_bm0, 2: rs_bm1, 3: rs_bm2, 4: rs_pe, 6: rs_bm3, 7: rs_bm4 } benchmark_all = { 1: r_bm0_all, 2: r_bm1_all, 3: r_bm2_all, 4: r_pe_all, 6: r_bm3_all, 7: r_bm4_all } for i in range(len(ids_o)): df = dfs[i] df.index = range(len(df)) idx4slice = su.idx4slice(df, "fund_id") navs = su.slice(df, idx4slice, "nav") t_reals = su.slice(df, idx4slice, "statistic_date") matchs_all = [ tu.outer_match4indicator_m(t_real, t_std_alls[i], drop_none=False) for t_real in t_reals ] idx_matchs_all = [x[1] for x in matchs_all] nav_matchs_all = [[ nav[ix] if ix is not None else np.NaN for ix in idx.values() ] for nav, idx in zip(navs, idx_matchs_all)] nv_matrix = np.array(nav_matchs_all).T r_total = np.nanmean((nv_matrix[:-1] / nv_matrix[1:] - 1), axis=1) price_total = np.nancumprod(1 + r_total[::-1])[::-1].tolist() price_total.append(1) # 定义基期伪价格为1 r_total = fi.gen_return_series(price_total) prices = [] for j in range(7): if t_mins[i] + relativedelta(months=intervals[j]) <= date_s.today: length = min(len(price_total), t_stds_len[j]) prices.append(price_total[:length]) else: prices.append(None) for j in range(7, 11): length = min(len(price_total), t_stds_len[j]) prices.append(price_total[:length]) prices.append(price_total) navs2 = [prices[i] for i in intervals5] navs3 = [prices[i] for i in intervals6] rs2 = [fi.gen_return_series(x) for x in navs2] rs3 = [fi.gen_return_series(x) for x in navs3] rs_f_ = rs_f.copy() rs_f_.append(r_f_all[i]) rs_f2_ = [rs_f_[i] for i in intervals5] rs_f3_ = [rs_f_[i] for i in intervals6] for k in benchmark.keys(): rs_bm_ = benchmark[k].copy() # 指定benchmark rs_bm_.append(benchmark_all[k][i]) rs_bm2 = [rs_bm_[i] for i in intervals5] rs_bm3 = [rs_bm_[i] for i in intervals6] s_time = [ fi.competency_timing(r, r_bm, r_f) for r, r_bm, r_f in zip(rs3, rs_bm3, rs_f3_) ] s_security = [ fi.competency_stock(r, r_bm, r_f) for r, r_bm, r_f in zip(rs3, rs_bm3, rs_f3_) ] persistence = [ fi.persistence_er(r, r_bm) for r, r_bm in zip(rs2, rs_bm2) ] odds = [fi.odds(r, r_bm) for r, r_bm in zip(rs2, rs_bm2)] tmp = [odds, persistence, s_time, s_security] result_i = [ ids_o[i], names_o[i], k, 1, 1, nv_matrix.shape[1], 60001, "全产品", 6000101, "全产品", date_s.today ] for x in tmp: result_i.extend(x) result.append(result_i) df = pd.DataFrame(result) df[list(range(11, 41))] = df[list(range(11, 41))].astype(np.float64) df[list(range(11, 41))] = df[list(range(11, 41))].apply(lambda x: round(x, 6)) df.columns = columns df_whole = df_whole.append(df) return df_whole
def match_by_std(obj, **kwargs): """ Match objects with time series to standard time series, and apply the strategy to its other attributes. Args: obj: dict Dict like {id: {key1: Iterable, key2: Iterable, ...}}; **kwargs: key_tm: str Key of the time series; key_used: Iterable<str> keys to match to standard time series; date_s: datetime.date, datetime.datetime, or float Statistic date(or the start date) of the standard time series; date_e: datetime.date, datetime.datetime, float, or dict Earliest date(or the end date) of the standard time series. If a dict is passed, then it should be {id: date}_like and its ids should be the same as the `obj` length; intervals: Iterable Interval of the standard time series to match, optional {1, 3, 6, 12, 24, 36, 60, "w", "m", "q", "a", "whole"}; freq: str Frequency of the standard time series, optional {"w", "m"}; extend: int, or dict Extra sample number of `interval` to use. If an int is parsed, then all intervals in `interval` will use this int as the extra sample number, else if an dict like {interval: extra_num} is parsed, then the specified interval will use the given extra number. Default None; shift: dict Dict like {id: shift_num} to specified ids which need to be shifted on its match case; apply: dict Dict like {id: func} Returns: dict like {id: {key_used:{interval: Iterable}}} """ kw_used = ("key_tm", "key_used", "date_s", "date_e", "intervals", "freq", "shift", "extend", "apply") key_tm, key_used, date_s, date_e, intervals, freq, shift, extend, apply = meta.get_kwargs_used( kw_used, **kwargs) if isinstance(date_e, (dt.date, dt.datetime, float)): date_e = dict.fromkeys(obj.keys(), tu.universal_time(date_e)[1]) elif date_e is None: date_e = { iid: tu.universal_time(min(attributes[key_tm]))[1] for iid, attributes in obj.items() } tm_series_std_alls = gen_stdseries_longest(iids=obj, key_tm=key_tm, date_s=date_s, date_e=date_e, freq=freq, weekday=kwargs.get( "weekday", False)) tm_series_std = gen_stdseries(date_s=date_s, freq=freq, intervals=intervals, extend=extend, weekday=kwargs.get("weekday", False)) sample_nums = { interval: len(tm_serie_std) for interval, tm_serie_std in tm_series_std.items() } if freq == "w" or freq == "d": matchs_whole = { iid: tu.outer_match4indicator_w(attributes[key_tm], tm_series_std_alls[iid], False)[1] for iid, attributes in obj.items() } matchs_w = { iid: tu.outer_match4indicator_w(attributes[key_tm], tm_series_std["w"])[1] for iid, attributes in obj.items() } elif freq == "m": matchs_whole = { iid: tu.outer_match4indicator_m(attributes[key_tm], tm_series_std_alls[iid], False)[1] for iid, attributes in obj.items() } result = dict.fromkeys(obj.keys(), {}) if shift is None: shift = {} # match for each object date_s_dt = dt.date.fromtimestamp(tu.universal_time(date_s)[1]) # intervals_regular = [interval for interval in intervals if interval not in ("w", "a", "whole")] # intervals_regular = [ interval for interval in intervals if interval not in ("w", "whole") ] for iid, attributes in obj.items(): shift_iid = shift.get(iid, 0) result[iid] = dict.fromkeys(key_used, {}) date_e_iid = dt.date.fromtimestamp(date_e[iid]) for key in result[iid].keys(): freq_of_key = {} freq_of_key["whole"] = [ attributes[key][idx] if idx is not None else None for idx in matchs_whole[iid].values() ] if apply is not None and iid in apply: freq_of_key["whole"] = apply[iid](freq_of_key["whole"]) if shift_iid > 0: freq_of_key["whole"] = freq_of_key["whole"][shift_iid:] # 根据每个基金产品的成立时间判断可以计算多长区间 interval_used = _check_intervals(date_s_dt, date_e_iid, intervals) length_max = len(freq_of_key["whole"]) for interval in intervals_regular: if interval_used[interval]: sp_num = sample_nums[interval] - shift_iid - 1 # # sp_num = sample_nums[interval] - 1 freq_of_key[interval] = freq_of_key["whole"][:sp_num] if sp_num > length_max: freq_of_key[interval].extend([None] * (sp_num - length_max)) else: freq_of_key[interval] = None # freq_of_key["a"] = [attributes[key][idx] if idx is not None else None for idx in matchs_a[iid].values()] # if freq == "w" or freq == "d": freq_of_key["w"] = [ attributes[key][idx] if idx is not None else None for idx in matchs_w[iid].values() ] result[iid][key] = freq_of_key return result
def calculate(): conn = engine_rd.connect() year, month = yesterday.year, yesterday.month month_range = cld.monthrange(year, month)[1] time_to_fill = sf.Time(dt.datetime(year, month, month_range)) year, month = time_to_fill.year, time_to_fill.month bms_used = [ "hs300", "csi500", "sse50", "ssia", "cbi", "y1_treasury_rate", "nfi" ] sql_bm = sf.SQL.market_index(date=time_to_fill.today, benchmarks=bms_used, whole=True) # Get benchmark prices bm = pd.read_sql(sql_bm, conn) bm.loc[bm["statistic_date"] == dt.date(1995, 8, 16), "y1_treasury_rate"] = 2.35 bm["y1_treasury_rate"] = bm["y1_treasury_rate"].fillna(method="backfill") bm["y1_treasury_rate"] = bm["y1_treasury_rate"].apply(su.annually2monthly) bm["statistic_date"] = bm["statistic_date"].apply(su.date2tstp) prices_bm = [ bm.dropna(subset=[bm_name])[bm_name].tolist() for bm_name in bms_used ] ts_bm = [ bm.dropna(subset=[bm_name])["statistic_date"].tolist() for bm_name in bms_used ] prices = prices_bm.copy() ts = ts_bm.copy() t_mins_pe_all = sf.PEIndex().firstyear # 寻找指数中可被计算的 t_mins_pe_all = { k: dt.datetime(x - 1, 12, 31) for (k, x) in t_mins_pe_all.items() } pesid_used = [] for k in t_mins_pe_all: if t_mins_pe_all[k].year < year: pesid_used.append(k) elif t_mins_pe_all[k].year == year: if t_mins_pe_all[k].month < month: # w -> m pesid_used.append(k) else: continue else: continue prices_pe = [] ts_pe = [] pes_used = [] for idx in pesid_used: PE = sf.PEIndex(idx) pes_used.append(PE.id) sql_pe = sf.SQL.pe_index(time_to_fill.today, index_id=PE.id, freq="m") pe = pd.read_sql(sql_pe, conn) pe["statistic_date"] = pe["statistic_date"].apply( lambda x: su.date2tstp(x) - 864000) prices_pe.append(pe["index_value"].tolist()) ts_pe.append(pe["statistic_date"].tolist()) conn.close() prices.extend(prices_pe) ts.extend(ts_pe) t_mins_tstp = [min(x) for x in ts] t_mins = tu.tr(t_mins_tstp) intervals = table.intervals intervals1 = [1, 2, 3, 4, 5, 6, 9, 10, 11] intervals3 = [1, 2, 3, 4, 5, 6, 10, 11] index_used = bms_used.copy() index_used.extend(pes_used) index_name = { "FI01": "私募全市场指数", "FI02": "阳光私募指数", "FI03": "私募FOF指数", "FI04": "股票多头策略私募指数", "FI05": "股票多空策略私募指数", "FI06": "市场中性策略私募指数", "FI07": "债券基金私募指数", "FI08": "管理期货策略私募指数", "FI09": "宏观策略私募指数", "FI10": "事件驱动策略私募指数", "FI11": "相对价值策略私募指数", "FI12": "多策略私募指数", "FI13": "组合投资策略私募指数", "hs300": "沪深300指数", "csi500": "中证500指数", "sse50": "上证50指数", "ssia": "上证A股指数", "cbi": "中债指数", "nfi": "南华商品指数", "y1_treasury_rate": "y1_treasury_rate" } result = [] for mday in range(7, 8): print("Day {0}: {1}".format(mday, dt.datetime.now())) date_s = sf.Time(dt.datetime(year, month, mday) - dt.timedelta(mday)) # Generate statistic_date # t_stds = [ tu.timeseries_std(date_s.today, interval, periods_y=12, use_lastday=True, extend=1) for interval in intervals ] # 标准序列 t_std_lens = [len(x) - 1 for x in t_stds] # 标准序列净值样本个数 t_std_y5 = t_stds[6] ts_std_total = [ tu.timeseries_std(date_s.today, tu.periods_in_interval(date_s.today, t_min, 12), periods_y=12, use_lastday=True, extend=6) for t_min in t_mins ] # 标准序列_成立以来 ts_std_total = [ t_std_total[:len([x for x in t_std_total if x >= t_min]) + 1] for t_std_total, t_min in zip(ts_std_total, t_mins_tstp) ] # 基准指数的标准序列_成立以来 matchs = [ tu.outer_match4indicator_m(t, t_std_all, False) for t, t_std_all in zip(ts, ts_std_total) ] idx_matchs = [x[1] for x in matchs] prices_total = [[ price[ix] if ix is not None else None for ix in idx.values() ] for price, idx in zip(prices, idx_matchs)] # 基准指标的收益率_不同频率 rs_total = [ fi.gen_return_series(price_total) for price_total in prices_total ] # 无风险国债的收益率 r_f_total = prices_total[5][ 1:] # the list `y1_treasury_rate` in prices_total is not price, but return r_f_total = pd.DataFrame(r_f_total).fillna( method="backfill")[0].tolist() r_f_all = [r_f_total[:length - 1] for length in t_std_lens] r_f_all.append(r_f_total) for i in range(len(index_used)): if index_name[index_used[i]] == "y1_treasury_rate": continue price_all = [] r_all = [] for j in range(7): if dt.date.fromtimestamp( (t_mins[i] + relativedelta(months=intervals[j]) ).timestamp()) <= date_s.today: price_all.append(prices_total[i][:t_std_lens[j]]) r_all.append(rs_total[i][:t_std_lens[j] - 1]) else: price_all.append([]) r_all.append([]) for j in range(7, 11): price_all.append(prices_total[i][:t_std_lens[j]]) if rs_total[i] is not None: r_all.append(rs_total[i][:t_std_lens[j] - 1]) else: r_all.append([]) price_all.append(prices_total[i]) r_all.append(rs_total[i]) price_all1 = [price_all[i] for i in intervals1] price_all3 = [price_all[i] for i in intervals3] r_all1 = [r_all[i] for i in intervals1] r_all3 = [r_all[i] for i in intervals3] r_f_all1 = [r_f_all[i] for i in intervals1][:-1] r_f_all3 = [r_f_all[i] for i in intervals3][:-1] r_f_all1.append(r_f_all[-1][:len(r_all[-1])]) r_f_all3.append(r_f_all[-1][:len(r_all[-1])]) ir = [fi.accumulative_return(price) for price in price_all1] ir_a = [fi.return_a(r, 12) for r in r_all1] stdev_a = [fi.standard_deviation_a(r, 12) for r in r_all3] dd_a = [ fi.downside_deviation_a(r, r_f, 12) for r, r_f in zip(r_all3, r_f_all3) ] mdd = [fi.max_drawdown(price)[0] for price in price_all3] sharpe_a = [ fi.sharpe_a(r, r_f, 12) for r, r_f in zip(r_all3, r_f_all3) ] calmar_a = [ fi.calmar_a(price, r_f, 12) for price, r_f in zip(price_all3, r_f_all3) ] sortino_a = [ fi.sortino_a(r, r_f, 12) for r, r_f in zip(r_all3, r_f_all3) ] p_earning_months = [fi.periods_positive_return(r) for r in r_all3] n_earning_months = [fi.periods_npositive_return(r) for r in r_all3] con_rise_months = [ fi.periods_continuous_rise(r)[0] for r in r_all3 ] con_fall_months = [ fi.periods_continuous_fall(r)[0] for r in r_all3 ] tmp = [ ir, ir_a, stdev_a, dd_a, mdd, sharpe_a, calmar_a, sortino_a, p_earning_months, n_earning_months, con_rise_months, con_fall_months ] result_i = [index_used[i], index_name[index_used[i]], date_s.today] for x in tmp: result_i.extend(x) result.append(result_i) df = pd.DataFrame(result) df[list(range(3, 101))] = df[list(range(3, 101))].astype(np.float64) df[list(range(3, 101))] = df[list(range(3, 101))].apply(lambda x: round(x, 6)) df.columns = columns df.index_id = df.index_id.apply(lambda x: x.upper()) return df
try: t_std_total = tu.timeseries_std(date_s, tu.periods_in_interval( date_s, tmins[id_], 12), periods_y=12, extend=1, use_last_day=True) t_std_totals.append(t_std_total[:-1]) except Exception as e: print(e) print(id_) t_std_totals_len = [len(x) - 1 for x in t_std_totals] matchs = [ tu.outer_match4indicator_m(t_real, t_std_total, False) for t_real, t_std_total in zip(ts, t_std_totals) ] idx_matchs = [list(match[1].values()) for match in matchs] t_matchs = [match[0] for match in matchs] r_con = {} for since in [2014, 2015]: fund_discontinuous = [] tolerance = 1 shift = 0 #unit: month for i in range(len(idx_matchs)): if idx_matchs[i][shift:month + 12 * (year - since)].count( None) > tolerance or len( idx_matchs[i]) < month - shift + (year - since - 1) * 12: fund_discontinuous.append(i)
def cal_std(fund_ids=None, whole=False): """ 每次计算一个基金 Args: fund_ids: str whole: Returns: """ if whole is True: sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard" else: sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard \ WHERE update_time >= {ut}".format(ut=yesterday) if fund_ids: if 'WHERE' in sql_navs: sql_navs += " AND fund_id in ({})".format(fund_ids) else: sql_navs += " WHERE fund_id in ({})".format(fund_ids) su.tic("Fetching nv Data......") df_nav = pd.read_sql(sql_navs, conn) criterias = [(df_nav["nav"] >= 0.2), (df_nav["added_nav"] >= 0.2), (df_nav["statistic_date"] >= dt.date(1970, 1, 2)), (df_nav["statistic_date"] <= dt.date.today())] su.tic("Preprocessing......") criteria = get_creterias(criterias) df_nav = df_nav.loc[criteria].sort_values(["fund_id", "statistic_date"], ascending=[True, False]) df_nav.index = range(len(df_nav)) ids = df_nav["fund_id"].drop_duplicates().tolist() t_mins = list(df_nav.groupby("fund_id")["statistic_date"].min()) t_mins_tstp = [time.mktime(x.timetuple()) for x in t_mins] t_maxs = list(df_nav.groupby("fund_id")["statistic_date"].max()) t_maxs_tstp = [time.mktime(x.timetuple()) for x in t_maxs] idx4slice = su.idx4slice(df_nav, slice_by="fund_id") navs = su.slice(df_nav, idx4slice, "nav") added_navs = su.slice(df_nav, idx4slice, "added_nav") swanavs = su.slice(df_nav, idx4slice, "swanav") t_reals = su.slice(df_nav, idx4slice, "statistic_date") t_reals_tstp = [] for t_real in t_reals: t_reals_tstp.append([time.mktime(x.timetuple()) for x in t_real]) t_std_alls_w = [ tu.timeseries_std(friday, tu.periods_in_interval(friday, t_min, 12), extend=4) for t_min in t_mins ] # 标准序列_成立以来 t_std_alls_w = [ t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1] for t_std_all, t_min in zip(t_std_alls_w, t_mins_tstp) ] t_std_alls_w = [ t_std_all[-len([x for x in t_std_all if x < t_max]) - 1:] for t_std_all, t_max in zip(t_std_alls_w, t_maxs_tstp) ] t_std_alls_m = [ tu.timeseries_std(date, tu.periods_in_interval(date, t_min, 12), periods_y=12, use_lastday=True, extend=6) for date, t_min in zip(t_maxs, t_mins) ] # 标准序列_成立以来 t_std_alls_m = [ t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1] for t_std_all, t_min in zip(t_std_alls_m, t_mins_tstp) ] su.tic("Matching......") matchs_w = [ tu.outer_match4indicator_w(t_real, t_std) for t_real, t_std in zip(t_reals_tstp, t_std_alls_w) ] idx_matchs_w = [x[1] for x in matchs_w] nav_matchs_w = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(navs, idx_matchs_w)] anav_matchs_w = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(added_navs, idx_matchs_w)] swanav_matchs_w = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(swanavs, idx_matchs_w)] t_matchs_w = [[ t_real[ix] if ix is not None else None for ix in idx.values() ] for t_real, idx in zip(t_reals, idx_matchs_w)] t_matchs_std_w = [ tu.tr(x[:-1], "date") if x is not None else None for x in t_std_alls_w ] matchs_m = [ tu.outer_match4indicator_m(t_real, t_std) for t_real, t_std in zip(t_reals_tstp, t_std_alls_m) ] idx_matchs_m = [x[1] for x in matchs_m] nav_matchs_m = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(navs, idx_matchs_m)] anav_matchs_m = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(added_navs, idx_matchs_m)] swanav_matchs_m = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(swanavs, idx_matchs_m)] t_matchs_m = [[ t_real[ix] if ix is not None else None for ix in idx.values() ] for t_real, idx in zip(t_reals, idx_matchs_m)] t_matchs_std_m = [ tu.tr(x[:-1], "date") if x is not None else None for x in t_std_alls_m ] result_w = { "fund_id": ids, "nav": nav_matchs_w, "added_nav": anav_matchs_w, "swanav": swanav_matchs_w, "statistic_date": t_matchs_w, "statistic_date_std": t_matchs_std_w } result_m = { "fund_id": ids, "nav": nav_matchs_m, "added_nav": anav_matchs_m, "swanav": swanav_matchs_m, "statistic_date": t_matchs_m, "statistic_date_std": t_matchs_std_m } su.tic("Merging Result......") result = {} result["w"] = pd.DataFrame.from_dict(merge_result(result_w, ids)) result["m"] = pd.DataFrame.from_dict(merge_result(result_m, ids)) return result