Ejemplo n.º 1
0
    def _prepare_benchmark_style(self, styles, data=None, **kwargs):
        """
        基准的风格因子

        Return:
        ======
        style_benchmark: Series
        基准的风格因子,Series(index:[factor_names], values:style_value)
        """
        weight = self._bchmrk_weight
        members = weight.index.tolist()
        if data is None:
            style_data = self._rskds.load_style_factor(styles, ids=members, dates=[self._date])\
                .reset_index(level='date', drop=True)
            if kwargs.get('std_qt', False):
                style_data = style_data.apply(
                    lambda x: StandardByQT(x.to_frame(), x.name))
        else:
            style_data = data.reindex(columns=styles).reset_index(level='date',
                                                                  drop=True)

        if weight.sum() > 0:
            style_benchmark = style_data.mul(
                weight, axis='index').sum() / weight.sum()
        else:
            style_benchmark = style_data.mul(weight, axis='index').sum()
        return style_benchmark
Ejemplo n.º 2
0
    def _get_style_factors(self, styles, std_qt=False):
        """提取风格因子数据

        Return
        -----------------
        data: pd.DataFrame(index=[date, IDs], values=style_factor)
        """
        data = self._rskds.load_style_factor(styles, dates=[self._date])
        if std_qt:
            data = data.apply(lambda x: StandardByQT(x.to_frame(), x.name))
        return data
Ejemplo n.º 3
0
 def _prepare_benchmark_style_std(self, styles, data=None, **kwargs):
     """
     基准风格因子的标准差
     """
     weight = self._bchmrk_weight / self._bchmrk_weight.sum()
     members = weight.index.tolist()
     if data is None:
         style_data = self._rskds.load_style_factor(styles, ids=members, dates=[self._date])\
             .reset_index(level='date', drop=True)
         if kwargs.get('std_qt', False):
             style_data = style_data.apply(lambda x: StandardByQT(x.to_frame(), x.name))
     else:
         style_data = data.reindex(columns=styles).reset_index(level='date', drop=True)
     style_data = style_data.fillna(style_data.mean())
     std = ((style_data - style_data.mean()) ** 2).mul(weight, axis='index').sum()
     return np.sqrt(std)
Ejemplo n.º 4
0
    def _prepare_portfolio_style(self, styles, data=None, **kwargs):
        """
        组合的风格因子

        Returns:
        ========
        data: DataFrame
            DataFrame(index:[factor_names], columns:[IDs])
        """
        if data is None:
            portfolio = self._signal.index.tolist()
            data = self._rskds.load_style_factor(styles, ids=portfolio, dates=[self._date]) \
                              .reset_index(level='date', drop=True)
            if kwargs.get('std_qt', False):
                data = data.apply(lambda x: StandardByQT(x.to_frame(), x.name))
        else:
            data = data.reindex(columns=styles).reset_index(level='date', drop=True)
        return data.reindex(self._signal.index)
Ejemplo n.º 5
0
def _load_data(factor_list, idx, freq):
    """提取因子数据
    数据包含了因子数据和股票的收益率
    """
    factors = []
    for factor in factor_list:
        factor_data = data_source.load_factor(factor[0], factor[1], idx=idx)
        factor_data_standard = StandardByQT(factor_data, factor[0])
        factors.append(factor_data_standard)
    factors = pd.concat(factors, axis=1)
    dates = np.unique(idx.get_level_values(0).to_pydatetime()).tolist()
    ids = idx.get_level_values(1).unique().tolist()
    ret_dates = dates + [
        DateStr2Datetime(
            data_source.trade_calendar.tradeDayOffset(dates[-1], 1, freq=freq))
    ]
    ret = data_source.get_periods_return(
        ids, dates=ret_dates).groupby('IDs').shift(-1).dropna() * 100
    return pd.concat([ret, factors], axis=1, join='inner')
Ejemplo n.º 6
0
    def _add_userlimit(self, user_conf, **kwargs):
        """添加用户自定义的因子限制
        user_conf: dict
        自定义限制条件 :
            factor_data : pd.DataFrame
            自定义风险因子的数据, 每一列是一个因子,[date, IDs]为索引

            factor_name : str
            若factor_data为None, factor_name和factor_dir必须非空, h5db
            会从中提取数据

            factor_dir : str
            若factor_data为None, factor_name和factor_dir必须非空, h5db
            会从中提取数据

            limit : float or list of floats or dict
            每个风险因子的限制值, 若limit是列表,其长度必须与因子个数相同
            若limit是字典型, key值为factor_data中的列名, value是列表或者
            scalar

            standard : bool
            在加入到优化器之前是否对输入的因子进行QT标准化

        kwargs: dict
            active : bool
            限制条件是否是相对行业的限制,默认为True

            sense : str
            限制类型: 'E': equal / 'G': greater than / 'L': lower than
        """
        if user_conf.get('factor_data', pd.DataFrame()).empty:
            factor_name = user_conf.get('factor_name')
            factor_dir = user_conf.get('factor_dir')
            factor_data = data_source.load_factor(factor_name, factor_dir, dates=[self._date])
        else:
            factor_data = user_conf.get('factor_data')
            factor_name = factor_data.columns
        limit = user_conf.get('limit')
        is_standard = user_conf.get('standard', False)
        is_active = kwargs.get('active', False)
        sense = kwargs.get('sense', 'E')
        limit_type = kwargs.get('limit_type', 'value')

        if isinstance(factor_data, pd.Series):
            factor_data = factor_data.to_frame(factor_name)
            factor_name = [factor_name]

        limit_min = {}
        limit_max = {}
        limit_sense = {}
        if isinstance(limit, dict):
            for k, v in limit.items():
                if isinstance(v, list):
                    assert v[0] <= v[1]
                    limit_min[k] = v[0]
                    limit_max[k] = v[1]
                elif isinstance(v, (int, float)):
                    limit_sense[k] = float(v)
                else:
                    raise ValueError("自定义因子敞口限定值不合法!")
        if isinstance(limit, (int, float)):
            limit_sense = {x: limit for x in factor_name}
        if isinstance(limit, list):
            if len(limit) != len(factor_name):
                raise ValueError("limit dimension dose not match factor dimension")
            limit_sense = {x: y for x, y in zip(factor_name, limit)}

        if isinstance(sense, str):
            sense = [sense] * len(limit_sense)
        else:
            if len(sense) != len(limit_sense):
                raise ValueError("sense dimension dose not match factor dimension")

        for f, l in limit_min.items():
            if is_standard:
                factor_data2 = StandardByQT(factor_data, f).loc[self._date].reindex(self._allids, fill_value=0.0)
            else:
                factor_data2 = factor_data.loc[self._date, f].reindex(self._allids, fill_value=0.0)
            if is_active:
                if limit_type == 'std':
                    l *= self._prepare_benchmark_userexpo_std(factor_data2)
                l += self._prepare_benchmark_userexpo(factor_data2)
            portfolio_factor = factor_data2.loc[self._allids]
            if np.any(np.isnan(portfolio_factor.values)):
                raise ValueError("自定义因子因子数据存在缺失值!")
            lin_expr = []
            sense = ['G']
            rhs = [l]
            name = [get_available_names(x, self.names_used) for x in ['user_%s' % f]]
            lin_expr.append([portfolio_factor.index.tolist(), portfolio_factor.values.tolist()])
            self._c.linear_constraints.add(lin_expr=lin_expr, senses=sense, rhs=rhs, names=name)
            self.names_used += name

        for f, l in limit_max.items():
            if is_standard:
                factor_data2 = StandardByQT(factor_data, f).loc[self._date].reindex(self._allids, fill_value=0.0)
            else:
                factor_data2 = factor_data.loc[self._date, f].reindex(self._allids, fill_value=0.0)
            if is_active:
                if limit_type == 'std':
                    l *= self._prepare_benchmark_userexpo_std(factor_data2)
                l += self._prepare_benchmark_userexpo(factor_data2)
            portfolio_factor = factor_data2.loc[self._allids]
            if np.any(np.isnan(portfolio_factor.values)):
                raise ValueError("自定义因子因子数据存在缺失值!")
            lin_expr = []
            sense = ['L']
            rhs = [l]
            name = [get_available_names(x, self.names_used) for x in ['user_%s' % f]]
            lin_expr.append([portfolio_factor.index.tolist(), portfolio_factor.values.tolist()])
            self._c.linear_constraints.add(lin_expr=lin_expr, senses=sense, rhs=rhs, names=name)
            self.names_used += name

        for f, s in zip(limit_sense, sense):
            l = limit_sense[f]
            if is_standard:
                factor_data2 = StandardByQT(factor_data, f).loc[self._date].reindex(self._allids, fill_value=0.0)
            else:
                factor_data2 = factor_data.loc[self._date, f].reindex(self._allids, fill_value=0.0)
            if is_active:
                if limit_type == 'std':
                    l *= self._prepare_benchmark_userexpo_std(factor_data2)
                l += self._prepare_benchmark_userexpo(factor_data2)
            portfolio_factor = factor_data2.loc[self._allids]
            if np.any(np.isnan(portfolio_factor.values)):
                raise ValueError("自定义因子因子数据存在缺失值!")
            lin_expr = []
            sense = [s]
            rhs = [l]
            name = [get_available_names(x, self.names_used) for x in ['user_%s' % f]]
            lin_expr.append([portfolio_factor.index.tolist(), portfolio_factor.values.tolist()])
            self._c.linear_constraints.add(lin_expr=lin_expr, senses=sense, rhs=rhs, names=name)
            self.names_used += name