print(len(mine_summary))
# 提取因子名
fac_name = [i['factor_name'] for i in mine_summary]
print(len(fac_name))
# 提取因子ic的正负
ic = [i['perf'][list(i['perf'].keys())[0]]['ic-mean'] for i in mine_summary]
print(len(ic))
ic_sign = [
    uc.sign(i['perf'][list(i['perf'].keys())[0]]['ic-mean'])
    for i in mine_summary
]
print(len(ic_sign))
# 提取因子值
factor_value = fetch.fetch_factor(begin,
                                  end,
                                  fields=fac_name,
                                  standard='clean1_alla',
                                  codes=None,
                                  df=False)
# 有些因子数据不足1009个,比较奇怪
factor_value = {
    k: v
    for k, v in factor_value.items() if len(v) == len(
        query_data.get_trade_days('d', from_trade_day=begin, to_trade_day=end))
}
# 调整因子正负
factor_value_adj = {}
for summa in mine_summary:
    if summa['factor_name'] in list(factor_value.keys()):
        factor_value_adj[summa['factor_name']] = factor_value[summa['factor_name']] * \
                                                 uc.sign(summa['perf'][list(summa['perf'].keys())[0]]['ic-mean'])
end = '2021-03-31'

data_pat = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/5group/num_restrict'  # 记得修改

# 读取最后选取的因子文件和对应的权重
with open(data_pat + "/fac_chosen.json", 'r') as f:
    fac_choose = json.load(f)
fac_choose = [(k, v) for k, v in fac_choose.items() if v != 0]
print(fac_choose)
factor_list = {fa[0]: fa[1] for fa in fac_choose}

# 提取因子数据
print('fetch')
fac_data = fetch.fetch_factor(begin,
                              end,
                              fields=list(factor_list.keys()),
                              standard='clean1_alla',
                              codes=None,
                              df=False)

# top2000股票池
cap_data = fetch_data.fetch(begin, end, ['stock_tcap'])
cap_rank = cap_data['stock_tcap'].rank(axis=1, ascending=False)
# 每日的top2000股票标记为1,否则为nan
top2000 = (cap_rank <= 2000).where(
    (cap_rank <= 2000) == 1)  # 2015年8月6日只有1999只?

# 根据top2000股票池把因子值在非2000的置为空值
fac_data = {k: (v * top2000) for k, v in fac_data.items()}
fac_data = {(k, v): (v * fac_data[k]) for k, v in factor_list.items()}

# 生成最终因子