def get_equal_weight_individual(signal=pd.DataFrame(), start_date='2017-01-01', end_date='2020-08-31'):
    signal = signal[(signal.index >= start_date) & (signal.index <= end_date)]
    weight = (uc.cs_rank(signal) >= 0.9).astype(int)
    weight = weight.div(weight.sum(axis=1), axis=0)
    weight = weight.where(weight > 0)
    weight = weight.dropna(axis=1, how='all')
    return weight
def add_fac(base_comb, base_fac, wait_delete):
    fac_comb = {}
    for fac_add in wait_delete:
        temp = {k: v for k, v in base_fac.items()}
        temp[fac_add] = uc.cs_rank(fac_data[fac_add])
        comb = pd.concat(temp.values())
        com_name = '(' + base_comb + ',' + fac_add + ')'
        fac_comb[com_name] = comb.groupby(comb.index).mean()
        fac_comb[com_name].index = pd.to_datetime(fac_comb[com_name].index)
    return fac_comb
예제 #3
0
def get_equal_weight_individual(
    signal=pd.DataFrame(),
    start_date='2017-01-01',
    end_date='2020-08-31',
    out_path='E:/FT_Users/LihaiYang/Files/factor_comb_data/all_cluster_comb/1_eq.csv'
):
    signal = signal[(signal.index >= start_date) & (signal.index <= end_date)]
    weight = (uc.cs_rank(signal) >= 0.9).astype(int)  # 记得修改
    weight = weight.div(weight.sum(axis=1), axis=0)
    weight = weight.where(weight > 0)
    weight = weight.dropna(axis=1, how='all')
    weight.to_csv(out_path)
                                            axis=0,
                                            ascending=False)
    new_com = perf_summary.index[0]
    new_sharp = perf_summary.loc[new_com, 'sharp_ratio']
    print("增加一个因子后的最优组合 ", new_com, "增加一个因子后的最优夏普 ", new_sharp)
    return new_sharp, new_com


fac_info = pd.read_excel(data_pat + '/fac_addfunda/all_addfunda.xlsx',
                         sheet_name='各类聚合因子的表现',
                         index_col=0)
# 初始化
wait_del = fac_info.index.to_list()
base_com = wait_del[0]
base_fa = {}
base_fa[base_com] = uc.cs_rank(fac_data[base_com])
base_sharpe = fac_info.loc[base_com, 'sharp_ratio']
wait_del.remove(base_com)

while (len(wait_del) > 0):
    print("当前最优因子组合: ", base_com, "当前最优夏普比率: ", base_sharpe)
    # 在当前最优的基础上遍历添加一个因子
    fac_new = add_fac(base_com, base_fa, wait_del)
    # 回测因子的策略效果
    new_sharp, new_com = test_fac(fac_new)
    if new_sharp > base_sharpe:
        base_com = new_com
        base_sharpe = new_sharp
        rem = base_com.split(',')[-1][:-1]  # list中要去除的
        base_fa[rem] = uc.cs_rank(fac_data[rem])
        print("移除 ", rem)
예제 #5
0
for tag in list(fac_meaning[cluster_h].unique()):
    temp = fac_meaning[fac_meaning[cluster_h] == tag].index.tolist()
    temp_name = [i[15:-3] for i in temp]
    print(tag, len(temp))
    co = rank_corr.loc[temp_name, temp_name]
    co1 = co.reindex(co.columns)  # 调整顺序,化为对称阵
    cluster_corr[tag] = co1.mask(co1.isna(), co1.T)
    sharp = fac_meaning.loc[temp, 'sharp_ratio']
    sharp.index = [i[15:-3] for i in sharp.index.tolist()]
    cluster_sharp[tag] = sharp
    fac_perf.loc[temp, :].to_csv(out_path + '/' + str(tag) + '.csv')

fac_data = pd.read_pickle(data_pat + '/all_fac_20170101-20210228.pkl')
fac_comb = {}
for tag in cluster_sharp.keys():
    fac_comb[tag] = uc.cs_rank(fac_data[cluster_sharp[tag].idxmax()])
    fac_comb[tag].index = pd.to_datetime(fac_comb[tag].index)
f = open(out_path + '/fac.pkl', 'wb')
pickle.dump(fac_comb, f, -1)
f.close()

# 新聚合因子之间的相关性
co_rank = cal_factor_corr(fac_comb, out_path)
print(co_rank)
"""
# 把聚合因子的表现结果汇总
type = 'best1_1'  # 记得修改
perf_path = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/mf/' + str(type) + '/eq_tvwap'
results_perf = {}
results_hperf = {}
results_to = {}
f.close()
"""

# 因子聚合方式(六):同一类别下取sharpe比率最高的前15%进行等权聚合
fac_comb = {}
for type, v in all_fac.items():
    for tag, fac_names in v.items():
        fac_names = [
            fa for fa in fac_names
            if fa not in ['factor_20216_vp', 'factor_90007_daily_vp']
        ]  # 这两个因子似乎略有问题
        fac_names = [
            fa for fa in fac_names if sharpe_rank[type].loc[fa] >= 0.85
        ]  # 选出夏普比率排名前15%的
        print(type, tag, len(fac_names))
        if len(fac_names) > 0:
            temp = {}
            for fac_name in fac_names:
                temp[fac_name] = uc.cs_rank(all_data[fac_name])
            print('concat')
            comb = pd.concat(temp.values())
            print('mean')
            fac_comb['15%_eq_1_' + tag + '_' + type] = comb.groupby(
                comb.index).mean()
            fac_comb['15%_eq_1_' + tag + '_' + type].index = pd.to_datetime(
                fac_comb['15%_eq_1_' + tag + '_' + type].index)

f = open(data_pat + '/fac_expand/15%_eq/fac.pkl', 'wb')
pickle.dump(fac_comb, f, -1)
f.close()
"""
# 聚合方式(十一):在expand前sharp比率最高的的七个聚合因子里,遍历所有的组合方式(2**n种),进行sharpe加权聚合
fac_choose = [
    '50%_eq_1_高频资金流分布_hfmf', 'sharpe_weight_反转因子相关_vp',
    'sharpe_weight_1_日间资金流波动_mf', '50%_eq_1_收益率和波动率的相关性_vp',
    '15%_eq_1_日内成交额分布的稳定性_hfvp', '15%_eq_1_日间成交量(额)的波动率_vp',
    'sharpe_weight_1_收盘行为异常_hfvp'
]
comb = []
for i in range(len(fac_choose)):
    comb.extend(list(combinations(fac_choose, i + 1)))
fac_comb = {}
for com in comb:
    temp = {}
    for ele in com:
        temp[ele] = uc.cs_rank(fac_data[ele]) * fac_meaning.loc[ele,
                                                                'sharp_ratio']
    comb = pd.concat(temp.values())
    fac_comb['iter7same_' + str(com) + '_sharpe_weight'] = comb.groupby(
        comb.index).mean()
    fac_comb['iter7same_' + str(com) +
             '_sharpe_weight'].index = pd.to_datetime(
                 fac_comb['iter7same_' + str(com) + '_sharpe_weight'].index)
f = open(data_pat + '/iter7same_sharpe_weight/fac.pkl', 'wb')  # 记得修改
pickle.dump(fac_comb, f, -1)
f.close()
"""
# 把聚合因子的表现结果汇总
type = 'iter7same_eq'  # 记得修改
perf_path = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/all_cluster/fac_expand/all_cluster/' + str(type) + '/eq_tvwap'
results_perf = {}
예제 #8
0
    print(tag, len(temp))
    co = rank_corr.loc[temp_name, temp_name]
    co1 = co.reindex(co.columns)  # 调整顺序,化为对称阵
    cluster_corr[tag] = co1.mask(co1.isna(), co1.T)
    sharp = fac_meaning.loc[temp, 'sharp_ratio']
    sharp.index = [i[15:-3] for i in sharp.index.tolist()]
    cluster_sharp[tag] = sharp
    fac_perf.loc[temp, :].to_csv(out_path + '/' + str(tag) + '.csv')

# 因子聚合
fac_data = pd.read_pickle(data_pat + '/all_fac_20170101-20210228.pkl')
fac_comb = {}
for tag in cluster_sharp.keys():
    temp = {}
    for i in cluster_sharp[tag].index.tolist():
        temp[i] = uc.cs_rank(fac_data[i])
    comb = pd.concat(temp.values())
    fac_comb[tag] = comb.groupby(comb.index).mean()
    fac_comb[tag].index = pd.to_datetime(fac_comb[tag].index)
f = open(out_path + '/fac.pkl', 'wb')
pickle.dump(fac_comb, f, -1)
f.close()

# 新聚合因子之间的相关性
co_rank = cal_factor_corr(fac_comb, out_path)
print(co_rank)

# 把聚合因子的表现结果汇总
type = '50%_eq_1'  # 记得修改
perf_path = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/mf/' + str(
    type) + '/eq_tvwap'
예제 #9
0
pickle.dump(fac_comb, f, -1)
f.close()
"""
"""
fac_meaning = fac_meaning[fac_meaning['tag1'] == 'earning']
fac_comb = {}
temp = {}
for tag in fac_meaning.index:
    temp[tag[:-3]] = uc.cs_rank(fac_earning[tag[:-3]])
comb = pd.concat(temp.values())
fac_comb['50%_eq_fundamental_earning'] = comb.groupby(comb.index).mean()
fac_comb['50%_eq_fundamental_earning'].index = pd.to_datetime(fac_comb['50%_eq_fundamental_earning'].index)
f = open(data_pat + '/50%_eq/fac_earning.pkl', 'wb')  # 记得修改
pickle.dump(fac_comb, f, -1)
f.close()
"""
# """
fac_meaning = fac_meaning[fac_meaning['tag1'] == 'valuation']
fac_comb = {}
temp = {}
for tag in fac_meaning.index:
    temp[tag[:-3]] = uc.cs_rank(fac_valuation[tag[:-3]])
comb = pd.concat(temp.values())
fac_comb['50%_eq_fundamental_valuation'] = comb.groupby(comb.index).mean()
fac_comb['50%_eq_fundamental_valuation'].index = pd.to_datetime(
    fac_comb['50%_eq_fundamental_valuation'].index)
f = open(data_pat + '/50%_eq/fac_valuation.pkl', 'wb')  # 记得修改
pickle.dump(fac_comb, f, -1)
f.close()
# """
    print(tag, len(temp))
    co = rank_corr.loc[temp_name, temp_name]
    co1 = co.reindex(co.columns)  # 调整顺序,化为对称阵
    cluster_corr[tag] = co1.mask(co1.isna(), co1.T)
    sharp = fac_meaning.loc[temp, 'sharp_ratio']
    sharp.index = [i[15:-3] for i in sharp.index.tolist()]
    cluster_sharp[tag] = sharp
    fac_perf.loc[temp, :].to_csv(out_path + '/' + str(tag) + '.csv')

# 因子聚合
fac_data = pd.read_pickle(data_pat + '/all_fac_20170101-20210228.pkl')
fac_comb = {}
for tag in cluster_sharp.keys():
    temp = {}
    for i in cluster_sharp[tag].index.tolist():
        temp[i] = uc.cs_rank(fac_data[i]) * cluster_sharp[tag][i]
    comb = pd.concat(temp.values())
    fac_comb[tag] = comb.groupby(comb.index).mean()
    fac_comb[tag].index = pd.to_datetime(fac_comb[tag].index)
f = open(out_path + '/fac.pkl', 'wb')
pickle.dump(fac_comb, f, -1)
f.close()

# 新聚合因子之间的相关性
co_rank = cal_factor_corr(fac_comb, out_path)
print(co_rank)

# 把聚合因子的表现结果汇总
type = 'sharpe_weight_1'  # 记得修改
perf_path = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/hfmf/' + str(type) + '/eq_tvwap'
results_perf = {}
pickle.dump(fac_comb, f, -1)
f.close()
"""
# """
# 聚合方式(八):遍历所有的组合方式(2**n种),进行等权聚合
fac_meaning = fac_meaning.sort_values(by='sharp_ratio', axis=0, ascending=False)
fac_choose = fac_meaning.index
comb = []
for i in range(len(fac_choose)):
    comb.extend(list(combinations(fac_choose, i+1)))
fac_comb = {}
for com in comb:
    temp = {}
    comb_name = '('
    for ele in com:
        temp[ele] = uc.cs_rank(fac_data[ele])
        comb_name = comb_name + ele.split('_')[-2] + ','
    comb = pd.concat(temp.values())
    comb_name = comb_name + ')'
    print(comb_name)
    fac_comb['iter_' + comb_name + '_eq'] = comb.groupby(comb.index).mean()
    fac_comb['iter_' + comb_name + '_eq'].index = pd.to_datetime(fac_comb['iter_' + comb_name + '_eq'].index)
# 拆解
new_name = list(fac_comb.keys())
factor_1 = {}
factor_1 = {k: fac_comb[k] for k in new_name[0:400]}  # 记得修改
f = open(data_pat + '/fac_select/iter_eq/fac_1.pkl', 'wb')  # 记得修改
pickle.dump(factor_1, f, -1)
f.close()
# """
"""
예제 #12
0
from ft_platform.utils import utils_calculation as uc
import pandas as pd
import pickle

data_pat = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/5group/linear_model'  # 记得修改

fac_model = {}

ols_pred = pd.read_pickle(data_pat + '/ols/fac_pool.pkl')
fac_model['ols'] = uc.cs_rank(ols_pred['pool_480'])

ridge_pred = pd.read_pickle(data_pat + '/ridge/fac_0.2.pkl')
fac_model['ridge'] = uc.cs_rank(ridge_pred['pool_480_0.2'])

lasso_pred = pd.read_pickle(data_pat + '/lasso/fac_4e-05.pkl')
fac_model['lasso'] = uc.cs_rank(lasso_pred['pool_480_4e-05'])

elnet_pred = pd.read_pickle(data_pat + '/elnet/fac_0.1_0.0004.pkl')
fac_model['elnet'] = uc.cs_rank(elnet_pred['pool_480_0.1_0.0004'])

logit_pred = pd.read_pickle(data_pat + '/logit/fac_none.pkl')
fac_model['logit'] = uc.cs_rank(logit_pred['pool_480_none'])

nbayes_pred = pd.read_pickle(data_pat + '/bayes/fac.pkl')
fac_model['nbayes'] = uc.cs_rank(nbayes_pred['pool_480'])

pls_pred = pd.read_pickle(data_pat + '/pls/fac_6.pkl')
fac_model['pls'] = uc.cs_rank(pls_pred['pool_480_6'])

rf_pred = pd.read_pickle(data_pat + '/random_forest/fac_300_10_0.6.pkl')
fac_model['rf'] = uc.cs_rank(rf_pred['pool_480_300_10_0.6'])
예제 #13
0
f = open(data_pat + '/fac_addfunda/best_sharpe_weight/fac.pkl', 'wb')  # 记得修改
pickle.dump(fac_comb, f, -1)
f.close()
"""
# 聚合方式(三):取出夏普比率排名前十的聚合因子,遍历所有的组合方式(2**n种),进行等权聚合
fac_meaning = fac_meaning.sort_values(by='sharp_ratio', axis=0, ascending=False)
fac_choose = fac_meaning.index[0:10]
comb = []
for i in range(len(fac_choose)):
    comb.extend(list(combinations(fac_choose, i+1)))
fac_comb = {}
for com in comb:
    temp = {}
    comb_name = '('
    for ele in com:
        temp[ele] = uc.cs_rank(fac_all[ele])
        if ele.split('_')[-2] == 'fundamental':
            comb_name = comb_name + ele.split('_')[-1] + ','
        else:
            comb_name = comb_name + ele.split('_')[-2] + ','
    comb = pd.concat(temp.values())
    comb_name = comb_name + ')'
    print(comb_name)
    fac_comb['iter_' + comb_name + '_eq'] = comb.groupby(comb.index).mean()
    fac_comb['iter_' + comb_name + '_eq'].index = pd.to_datetime(fac_comb['iter_' + comb_name + '_eq'].index)
# 拆解
new_name = list(fac_comb.keys())
factor_1 = {}
factor_1 = {k: fac_comb[k] for k in new_name[0:200]}  # 记得修改
f = open(data_pat + '/fac_addfunda/iter10_eq/fac_1.pkl', 'wb')  # 记得修改
pickle.dump(factor_1, f, -1)
        os.makedirs(pat_str)
    total_data = pd.concat(fac_dict.values(), keys=fac_dict.keys())
    total_data = total_data.reset_index().set_index('level_1')
    corank_total = total_data.groupby(total_data.index).apply(lambda g: g.set_index('level_0').T.corr('spearman'))
    co_rank = corank_total.groupby(corank_total.index.get_level_values(1)).mean()
    co_rank = co_rank.reindex(co_rank.columns)  # 调整顺序,化为对称阵
    co_rank.to_csv(pat_str + "/mf_hfmf_cluster/mf_hfmf_rank_corr.csv", index=True, encoding='utf_8_sig')  # 记得修改
    return co_rank

# 新聚合因子之间的相关性
co_rank = cal_factor_corr(fac_comb, data_pat)
print(co_rank)

# 聚合方式(一):同类别等权平均
new_fac = {}
comb = pd.concat([uc.cs_rank(fac_comb['高频资金流分布']), uc.cs_rank(fac_comb['日间资金流波动'])])
new_fac['资金流的稳定性'] = comb.groupby(comb.index).mean()
comb = pd.concat([uc.cs_rank(fac_comb['反转因子改进_日频资金流']), uc.cs_rank(fac_comb['反转因子改进_高频资金流'])])
new_fac['反转因子改进_资金流'] = comb.groupby(comb.index).mean()
comb = pd.concat([uc.cs_rank(fac_comb['高频资金流分布']), uc.cs_rank(fac_comb['日间资金流波动']), uc.cs_rank(fac_comb['主力流入流出占比'])])
new_fac['资金流的稳定性+主力流入流出占比'] = comb.groupby(comb.index).mean()
f = open(data_pat + '/mf_hfmf_cluster/eq/fac.pkl', 'wb')
pickle.dump(new_fac, f, -1)
f.close()

# 聚合方式(二):同类别sharpe加权
new_fac = {}
comb = pd.concat([uc.cs_rank(fac_comb['高频资金流分布']) * 0.648901798, uc.cs_rank(fac_comb['日间资金流波动']) * 0.509416429])
new_fac['资金流的稳定性'] = comb.groupby(comb.index).mean()
comb = pd.concat([uc.cs_rank(fac_comb['反转因子改进_日频资金流']) * 0.06240904, uc.cs_rank(fac_comb['反转因子改进_高频资金流']) * 0.110874718])
new_fac['反转因子改进_资金流'] = comb.groupby(comb.index).mean()
예제 #15
0
hfmf_value = {}
path = 'E:/Share/FengWang/Alpha/mine/hfmf_factor/oos/clean'
for j in os.listdir(path):
    temp = h5py.File(path + '/' + j, 'r')
    hfmf_value[j[:-3]] = pd.DataFrame(temp['data'][:].astype(float),
                                      columns=temp['code'][:].astype(str),
                                      index=temp['trade_date'][:].astype(str))
all_fac = dict(factor_value_adj, **hfmf_value)

# 各类因子数据的样本外组合
print('cluster_comb')
fac_cluster = {}
for fac_gr, fac_names in factor_name.items():
    comb_temp = {}
    for fac_name in fac_names:
        comb_temp[fac_name] = uc.cs_rank(all_fac[fac_name])
    comb = pd.concat(comb_temp.values())
    fac_cluster[fac_gr] = comb.groupby(comb.index).mean()
    fac_cluster[fac_gr].index = pd.to_datetime(fac_cluster[fac_gr].index)

f = open(
    'E:/FT_Users/LihaiYang/Files/factor_comb_data/all_cluster_comb_oos/simple_avg/9.pkl',
    'wb')  # 这边路径记得改
pickle.dump(fac_cluster, f, -1)
f.close()

# 类与类之间因子数据的样本外组合
print('all_cluster')
cluster_num = 1  # 记得修改
all_cluster = {}
all_cluster_name = pd.read_pickle(
fac_comb['all_eq_fundamental_growth'].index = pd.to_datetime(fac_comb['all_eq_fundamental_growth'].index)
f = open(data_pat + '/all_eq/fac_growth.pkl', 'wb')  # 记得修改
pickle.dump(fac_comb, f, -1)
f.close()
"""
"""
cal_factor_corr(fac_earning, data_pat + '/earning')
fac_comb = {}
temp = {}
for tag in fac_earning.keys():
    temp[tag] = uc.cs_rank(fac_earning[tag])
comb = pd.concat(temp.values())
fac_comb['all_eq_fundamental_earning'] = comb.groupby(comb.index).mean()
fac_comb['all_eq_fundamental_earning'].index = pd.to_datetime(fac_comb['all_eq_fundamental_earning'].index)
f = open(data_pat + '/all_eq/fac_earning.pkl', 'wb')  # 记得修改
pickle.dump(fac_comb, f, -1)
f.close()
"""

cal_factor_corr(fac_valuation, data_pat + '/valuation')
fac_comb = {}
temp = {}
for tag in fac_valuation.keys():
    temp[tag] = uc.cs_rank(fac_valuation[tag])
comb = pd.concat(temp.values())
fac_comb['all_eq_fundamental_valuation'] = comb.groupby(comb.index).mean()
fac_comb['all_eq_fundamental_valuation'].index = pd.to_datetime(fac_comb['all_eq_fundamental_valuation'].index)
f = open(data_pat + '/all_eq/fac_valuation.pkl', 'wb')  # 记得修改
pickle.dump(fac_comb, f, -1)
f.close()