コード例 #1
0
    def easy_compose_optimization(
        self,
        stock_pool,
        bench,
    ):
        # 得到风险因子的因子矩阵,以dict形式存储,key为因子名称。
        risk_factors = ['size']  # self.factor_dict['risk_factor']
        codes = list(stock_pool.index)
        risk_fac_data = {
            fac: get_factor([fac], codes)[fac]
            for fac in risk_factors
        }

        # 把因子矩阵形式的存储,变成字典形式的存储,每个key是日期,value是行为codes,列为factors的dataframe
        limit_fac_data = concat_factors_panel(risk_factors,
                                              risk_fac_data,
                                              codes,
                                              ind=True,
                                              mktcap=False)

        data_dict = {
            'limit_fac_data': limit_fac_data,
            'index_wt': self.index_wei,
            'est_stock_rets': stock_pool
        }
        stock_wt = linear_programming(data_dict)

        return stock_wt
コード例 #2
0
def layer_division_bt(factors, path_d, layer_num):

    sf_test_save_path = path_d['sf_test_save_path']
    factor_path = path_d['factor_path']
    factor_matrix_path = path_d['factor_matrix_path']

    start_date = '2009-02-27'
    end_date = '2019-07-31'
    if_concise = True  # 是否进行月频简化回测

    save_path_tmp = os.path.join(sf_test_save_path, '分层回测')
    # 创建分层回测结果图的存放目录
    if not os.path.exists(save_path_tmp):
        os.mkdir(save_path_tmp)

    # print('因子数据创建完毕')
    pct_chg_nm = get_factor(['PCT_CHG_NM'],
                            basic_path=factor_matrix_path)['PCT_CHG_NM']

    # 对选中的因子或者全部因子遍历
    # print("开始进行因子分层回测...")
    for fname in factors:
        # print(fname)
        openname = fname.replace('/', '_div_')
        facdat = pd.read_csv(os.path.join(factor_matrix_path,
                                          openname + '.csv'),
                             encoding='gbk',
                             engine='python',
                             index_col=[0])
        facdat.columns = pd.to_datetime(facdat.columns)

        s = SingleFactorLayerDivisionBacktest(factor_name=fname,
                                              factor_data=facdat,
                                              num_layers=layer_num,
                                              if_concise=if_concise,
                                              start_date=start_date,
                                              end_date=end_date,
                                              pct_chg_nm=pct_chg_nm)

        records = s.run_layer_division_bt(equal_weight=True)

        if not records.empty:
            plot_layerdivision(path_d, records, fname, if_concise)  # 绘制分层图
            bar_plot_yearly(path_d, records, fname, if_concise)  # 绘制分年分层收益柱形图
            plot_group_diff_plot(path_d, records, fname,
                                 if_concise)  # 绘制组1-组5净值图

    print(f"分层回测结束!结果见目录:{sf_test_save_path}")

    print('*' * 80)
コード例 #3
0
    def compute_factor_return(self):

        factors_dict = self.factor_dict
        path_dict = self.path_dict

        # ----------------------------------
        # 因子检测
        f_list = []
        for values in factors_dict.values():
            for v in values:
                f_list.append(v)
        f_list = [col.replace('_div_', '/') for col in f_list]

        tmp = os.listdir(path_dict['factor_panel_path'])
        df_tmp = pd.read_csv(os.path.join(path_dict['factor_panel_path'],
                                          tmp[-1]),
                             engine='python',
                             encoding='gbk',
                             index_col=[0])

        cols = [col for col in df_tmp.columns]

        if not set(f_list).issubset(cols):
            print('factor 不够, 缺失的因子为:')
            print(set(f_list) - set(cols))
        else:
            print('通过因子完备性测试')

        # ----------------------------------
        # 因子合成
        # if os.path.exists(os.path.join(path_dict['save_path'], '新合成因子')):
        #     shutil.rmtree(os.path.join(path_dict['save_path'], '新合成因子'))
        # print('开始进行因子合成处理.....')
        # for factor_con, factors_to_con in factors_dict.items():
        #     # 'equal_weight'   'max_ic_ir'
        #     my_factor_concat(path_dict, factors_to_con, factor_con, concat_type='equal_weight')
        # print('因子合成完毕!')

        params = {
            'factors': [key for key in factors_dict.keys()],
            'window': 6,
            'half_life': None,
        }

        copy_matrix(path_dict['old_matrix_path'],
                    os.path.join(path_dict['save_path'], '新合成因子', '因子矩阵'))

        # 估计预期收益
        path_dict.update({
            'matrix_path':
            os.path.join(path_dict['save_path'], '新合成因子', '因子矩阵')
        })
        matrix_path = path_dict['matrix_path']

        factors, window, half_life = params['factors'], params['window'], \
                                     params['half_life']

        factors.extend(['Pct_chg_nm'])
        # 得到所有因子的因子矩阵,以dict形式存储,行业部分没有风险因子
        factors_dict = {
            fac: get_factor(factors, basic_path=matrix_path)[fac]
            for fac in factors
        }
        # 将alpha因子整理为截面形式
        factors_panel = concat_factors_panel(factors=None,
                                             factors_dict=factors_dict,
                                             codes=None,
                                             ind=False,
                                             mktcap=False,
                                             perchg_nm=False)

        # 删除开始factor不全的截面
        to_del = []
        for key, values in factors_panel.items():
            print(key)
            for f in factors:
                if f not in values.columns:
                    print(f)
                    to_del.append(key)
                    break

        for d in to_del:
            factors_panel.pop(d)

        self.factors_panel = factors_panel

        factor_ret_history, sig = history_factor_return(
            factors, factors_panel, window, half_life)
        # 最后一行的nan先不能删除,因为后面预测时要向后shift一行
        return factor_ret_history, sig
コード例 #4
0
def get_equal_wei_with_index_ret(est_stock_rets,
                                 bestN_stock=None,
                                 est_indus_rets=None,
                                 bestN_indux=None):
    if isinstance(est_indus_rets, pd.DataFrame):
        # 等权配置预测表现好的行业
        wt_indus = pd.DataFrame(np.zeros(np.shape(est_indus_rets)),
                                index=est_indus_rets.index,
                                columns=est_indus_rets.columns)
        # 逐列遍历
        for col, itmes in est_indus_rets.iteritems():
            itmes = itmes.sort_values(ascending=False)
            select_id = itmes.index[0:bestN_indux]
            wt_indus.loc[select_id, col] = 1
        wt_indus.columns = pd.to_datetime(wt_indus.columns)

        # 把二级行业与股票预测数据合并
        si = get_factor(
            ['second_industry'],
            basic_path=r'D:\pythoncode\IndexEnhancement\多因子选股\正交后因子\因子矩阵'
        )['second_industry']
        est_indus_rets.columns = pd.to_datetime(est_indus_rets.columns)
        si.columns = pd.to_datetime(si.columns)

        # 日期对齐
        dl = list(
            set(est_indus_rets.columns) & set(est_stock_rets.columns)
            & set(si.columns))
        dl = sorted(dl)

        bestN_stock_each_indus = int(bestN_stock / bestN_indux)

        wt_stock = pd.DataFrame()
        for date in dl:
            stock_bd = pd.concat([est_stock_rets[date], si[date]], axis=1)
            stock_bd = stock_bd.dropna(how='any')
            stock_bd.columns = ['ext_rets', 'second_indus']

            indus_wei = wt_indus[date]
            grouped = stock_bd.groupby('second_indus')
            wei_tmp = pd.DataFrame()

            for name, group in grouped:
                # 未配置该行业
                if indus_wei[name] == 0:
                    wei_ttmp = pd.DataFrame(np.zeros([group.shape[0], 1]),
                                            index=group.index,
                                            columns=[date])
                elif indus_wei[name] > 0:
                    if group.shape[0] <= bestN_stock_each_indus:
                        wei_ttmp = (1 / (bestN_stock_each_indus * group.shape[0])) * \
                                   pd.DataFrame(np.ones([group.shape[0], 1]), index=group.index, columns=[date])
                    else:
                        itmes = group['ext_rets'].sort_values(ascending=False)
                        select_id = itmes.index[0:5]
                        wei_ttmp = pd.DataFrame(np.zeros([group.shape[0], 1]),
                                                index=group.index,
                                                columns=[date])
                        wei_ttmp.loc[select_id, date] = 1 / bestN_stock

                wei_tmp = pd.concat([wei_tmp, wei_ttmp], axis=0)

            wei_tmp = wei_tmp / np.sum(wei_tmp)
            wt_stock = pd.concat([wt_stock, wei_tmp], axis=1)
            wt_stock = wt_stock.fillna(0)
    else:

        wt_stock = pd.DataFrame()
        for col, itmes in est_stock_rets.iteritems():
            wei_tmp = pd.DataFrame(index=est_stock_rets.index)
            items = itmes.sort_values(ascending=False)
            not_nan_stock = items.index[~pd.isna(items)]
            nan_stock = items.index[pd.isna(items)]
            # 非nan的股票数量
            items = items[not_nan_stock]
            # 选择出来的数量
            s_num = int(len(items) * bestN_stock)
            select_id = itmes.index[0:s_num]
            wei_tmp.loc[select_id, col] = 1
            wei_tmp = wei_tmp.fillna(0)
            wei_tmp = wei_tmp / np.sum(wei_tmp)
            wt_stock = pd.concat([wt_stock, wei_tmp], axis=1)

    return wt_stock
コード例 #5
0
def optimization(est_stock_rets, index_wei, est_indus_rets=None):
    '''
    :param est_indus_rets:   预测的股票收益率,若有,则是超配低配模式,若没有,则是行业中性模式
    :param est_stock_rets:   预测的行业收益率
    :param index_wei:        指数的行业权重
    :return:                 股票各期权重
    '''

    if isinstance(est_indus_rets, pd.DataFrame):
        # 获取股票的二级行业情况数据
        si = get_factor(
            ['second_industry'],
            basic_path=r'D:\pythoncode\IndexEnhancement\多因子选股\正交后因子\因子矩阵'
        )['second_industry']

        est_indus_rets.columns = pd.to_datetime(est_indus_rets.columns)
        si.columns = pd.to_datetime(si.columns)

        dl = list(
            set(est_indus_rets.columns) & set(est_stock_rets.columns)
            & set(index_wei.columns))
        dl = sorted(dl)
    else:
        # 获取股票的一级行业情况数据
        si = get_factor(
            ['industry_zx'],
            basic_path=r'D:\pythoncode\IndexEnhancement\多因子选股\正交后因子\因子矩阵'
        )['industry_zx']
        est_indus_rets.columns = pd.to_datetime(est_indus_rets.columns)
        si.columns = pd.to_datetime(si.columns)

        dl = list(set(est_stock_rets.columns) & set(index_wei.columns))
        dl = sorted(dl)

    total_wei = pd.DataFrame()
    for date in dl:
        if isinstance(est_indus_rets, pd.DataFrame):
            # 超配低配模式下的行业暴露
            indus_bd = pd.concat([est_indus_rets[date], index_wei[date]],
                                 axis=1)
            indus_bd = indus_bd.dropna(how='any')
            indus_bd.columns = ['est_rets', 'index_wet']
            up_quantile = 25
            expand_beta = 2
            dow_quantile = 70
            shrink_beta = 0.5
            indus_exposure = adjust_weight(indus_bd, up_quantile, expand_beta,
                                           dow_quantile, shrink_beta)
            # pd.isna(indus_exposure)
        else:
            # 超配低配模式下的行业暴露就是指数的行业暴露
            indus_exposure = index_wei[date]

        stock_bd = pd.concat([est_stock_rets[date], si[date]], axis=1)
        stock_bd = stock_bd.dropna(how='any')
        stock_bd.columns = ['ext_rets', 'second_indus']
        stock_bd = pd.get_dummies(stock_bd)
        stock_bd.columns = ['ext_rets'] + [
            col.split('_')[-1] for col in stock_bd.columns if 'second' in col
        ]

        c = stock_bd['ext_rets'].values.reshape(1, len(stock_bd))
        x0 = np.ones(len(stock_bd)) / len(stock_bd)  # 初始值

        c.shape
        bnds = []
        for j in range(0, len(stock_bd)):
            bnds.append((0, 1))
        bnds = tuple(bnds)

        stock_num = 100

        A_eq1 = stock_bd[list(stock_bd.columns[1:])].values.T
        [a, b] = A_eq1.shape
        tmp = np.ones([1, b])
        A_eq = np.r_[A_eq1, tmp]
        A_eq.shape
        # 对齐
        indus_exposure = indus_exposure.reindex(list(stock_bd.columns[1:]))
        indus_exposure = indus_exposure.fillna(0)
        b_eq = indus_exposure.values.reshape(-1, 1)
        b_eq = np.r_[b_eq, np.array(1).reshape(1, 1)]
        b_eq.shape
        args = [stock_num, A_eq, b_eq]
        cons = get_con(args)

        res = minimize(target_fun, x0, args=c, bounds=bnds, constraints=cons)
        wei = res.x
        wei = wei / np.sum(wei)
        cur_wt = pd.DataFrame(wei, index=stock_bd.index, columns=[date])

    total_wei = pd.concat([total_wei, cur_wt], axis=1)

    return total_wei
コード例 #6
0
def optimization_equalwei(est_stock_rets, est_indus_rets, bestN):

    # 等权配置预测表现好的行业
    wt_indus = pd.DataFrame(np.zeros(np.shape(est_indus_rets)),
                            index=est_indus_rets.index,
                            columns=est_indus_rets.columns)
    # 逐列遍历
    for col, itmes in est_indus_rets.iteritems():
        itmes = itmes.sort_values(ascending=False)
        select_id = itmes.index[0:bestN]
        wt_indus.loc[select_id, col] = 1 / bestN
    wt_indus.columns = pd.to_datetime(wt_indus.columns)

    # 把二级行业与股票预测数据合并
    si = get_factor(
        ['second_industry'],
        basic_path=r'D:\pythoncode\IndexEnhancement\多因子选股\正交后因子\因子矩阵'
    )['second_industry']
    est_indus_rets.columns = pd.to_datetime(est_indus_rets.columns)
    si.columns = pd.to_datetime(si.columns)

    # 日期对齐
    dl = list(
        set(est_indus_rets.columns) & set(est_stock_rets.columns)
        & set(si.columns))
    dl = sorted(dl)

    stock_num = 100

    for date in dl:
        date = dl[0]
        stock_bd = pd.concat([est_stock_rets[date], si[date]], axis=1)
        stock_bd = stock_bd.dropna(how='any')
        stock_bd.columns = ['ext_rets', 'second_indus']
        stock_bd = pd.get_dummies(stock_bd)
        stock_bd.columns = ['ext_rets'] + [
            col.split('_')[-1] for col in stock_bd.columns if 'second' in col
        ]

        c = stock_bd['ext_rets'].values.reshape(len(stock_bd), 1)
        x0 = np.zeros(len(stock_bd)) / len(stock_bd)  # 初始值

        # c.shape
        bnds = []
        for j in range(0, len(stock_bd)):
            bnds.append((0.0, 1.0))
        bnds = tuple(bnds)

        A_eq = stock_bd[list(stock_bd.columns[1:])].values.T
        #A_eq.shape
        # 对齐
        indus_exposure = wt_indus[date]
        indus_exposure = indus_exposure.reindex(list(stock_bd.columns[1:]))
        indus_exposure = indus_exposure.fillna(0)
        b_eq = indus_exposure.values.reshape(-1, 1)
        # b_eq.shape
        args = [stock_num, A_eq, b_eq]
        cons = get_con(args)

        res = minimize(target_fun,
                       x0,
                       args=c,
                       bounds=bnds,
                       constraints=cons,
                       options={'disp': True})
        wei = res.x
        print(res.success)

        res.status
        res.message
        res.nit

        wei = wei / np.sum(wei)
        cur_wt = pd.DataFrame(wei, index=stock_bd.index, columns=[date])