Example #1
0
    def __init__(
            self,
            factor: Union[pd.Series,
                          pd.DataFrame],
            prices: Union[pd.Series,
                          pd.DataFrame,
                          Callable],
            groupby: Union[pd.Series,
                           pd.DataFrame,
                           Callable],
            stock_start_date: Union[pd.Series,
                                    pd.DataFrame,
                                    Callable],
            weights: Union[float,
                           pd.Series,
                           pd.DataFrame,
                           Callable] = 1.0,
            frequence: str = "DAY",
            quantiles: Union[int,
                             Tuple[float],
                             List[float]] = 5,
            bins: Union[int,
                        Tuple[float],
                        List[float]] = None,
            periods: Union[int,
                           Tuple[int],
                           List[int]] = (1,
                                         5,
                                         10),
            binning_by_group: bool = False,
            max_loss: float = 0.25,
            zero_aware: bool = False,
    ):
        """
        初始化输入

        说明:
            - quantiles 与 bins 只能有一个为非空
        """

        self.factor = preprocess.QA_fmt_factor(factor)
        self.prices = prices
        self.groupby = groupby
        self.stock_start_date = stock_start_date
        self.weights = weights
        self.frequence = utils.get_frequence(frequence)

        self.quantiles = quantiles
        self.bins = bins
        if isinstance(periods, int):
            periods = (periods,)
        self.periods = periods
        self.binning_by_group = binning_by_group
        self.max_loss = max_loss
        self.zero_aware = zero_aware

        # 因子加工与添加因子远期收益
        self.__gen_clean_factor_and_forward_returns()
Example #2
0
    def __init__(
            self,
            jq_username: str = None,
            jq_password: str = None,
            price_type: str = "close",  # 价格类型
            fq: str = "qfq",  # 复权方式
            factor_time_range: list = None,
            industry_cls: str = "sw_l1",  # 行业分类
            industry_data: Union[dict, pd.Series, pd.DataFrame] = None,  # 行业信息
            weight_cls: str = "avg",  # 权重分类
            weight_data: Union[dict, pd.Series, pd.DataFrame] = None,  # 权重信息
            frequence: str = "DAY",  # 频率
            detailed: bool = False,  # 是否按日期进行行业查询
    ):
        """
        根据输入的参数进行初始化

        说明
        ---
        如果需要使用自定义的行业,权重信息,需要设置 industry_cls, weight_cls 为 None, 然后再输入 industry_data, weight_data

        参数
        ---
        :param jq_username: 聚宽账号
        :param jq_password: 聚宽密码
        :param price_type: 计算因子收益时,使用价格数据类型,支持 ['open', 'high', 'low', 'close', 'avg'], 默认为 'close'
        :param fq: 价格数据的复权方式
        :param factor_time_range: 因子对应的时间范围
        :param industry_cls: 行业类型,目前支持
            - "sw_l1": 申万一级
            - "sw_l2": 申万二级
            - "sw_l3": 申万三级
            - "jq_l1": 聚宽一级
            - "jq_l2": 聚宽二级
            - "zjw": 证监会
        :param indsutry_data: 行业数据 [可选], 为 industry_cls 只能有一个为非 None
        :param weight_cls: 权重类型,目前支持
            - "avg": 等权重
            - "mktcap": 总市值加权
            - "cmktcap": 流通市值加权
            - "ln_mktcap": 对数市值加权
            - "ln_cmktcap": 对数流通市值加权
            - "sqrt_mktcap": 市值平方根加权
            - "sqrt_cmktcap": 流通市值平方根加权
        :param weight_data: 权重数据 [可选], 与 weight_cls 两种只能有一个为非 None
        :param frequence: 频率
        :param detailed: 是否使用详细的按日期分类行业信息,默认使用 end_date 的行业数据
        """
        jqdatasdk.auth(jq_username, jq_password)

        if price_type is None:
            price_type = "close"
            warnings.warn("没有指定使用的价格类型,采用收盘价进行计算", UserWarning)
        assert price_type.lower() in PRICE_TYPE
        self.price_type = price_type

        if fq is not None:
            assert fq.lower() in FQ_TYPE
        else:
            warnings.warn("使用不复权的价格数据进行计算,结果可能受到分红配股等影响而不准确")
        self.fq = fq

        self.start_time = min(factor_time_range)
        self.end_time = max(factor_time_range)

        if industry_cls is not None:
            assert industry_cls.lower() in INDUSTRY_CLS
            # 如果输入了指定行业分类方式,则忽略行业数据数据
            self.industry_cls = industry_cls
            self.industry_data = None
        else:
            self.industry_cls = industry_cls
            self.industry_data = industry_data
        if (self.industry_cls is None) and (self.industry_data is None):
            warnings.warn("没有指定行业分类方式,也没有输入行业数据", UserWarning)

        if weight_cls is not None:
            assert weight_cls.lower() in WEIGHT_CLS
            self.weight_cls = weight_cls
            self.weight_data = None
        else:
            self.weight_cls = weight_cls
            self.weight_data = weight_data
        if (self.weight_cls is None) and (self.weight_data is None):
            warnings.warn("没有指定加权方式,也没有输入加权数据,默认采用等权重方式", UserWarning)
            self.weight_cls = "avg"
            self.weight_data = None

        self.frequence = utils.get_frequence(frequence)
        self.detailed = detailed
Example #3
0
    def get_prices(
        self,
        code_list: Union[str, Tuple[str], List[str]] = None,
        start_time: Union[str, datetime.datetime] = None,
        end_time: Union[str, datetime.datetime] = None,
        fq: str = None,
        frequence: str = None,
        price_type: str = None,
    ):
        """
        价格数据获取接口,单因子输入后,可以通过单因子获取股票代码,时间等参数信息

        参数
        ---
        :param code_list: 股票代码
        :param start_time: 起始时间
        :param end_time: 截止时间
        :param fq: 复权方式
        :param frequence: 时间频率
        """
        # 1. 股票池
        if isinstance(code_list, tuple):
            code_list = list(code_list)

        # 2. 时间频率
        if not frequence:
            frequence = self.frequence
        frequence = utils.get_frequence(frequence)

        if not start_time:
            start_time = self.start_time

        if not end_time:
            end_time = self.end_time

        if (not start_time) or (not end_time):
            raise ValueError("价格获取接口需要指定起始时间与结束时间")

        start_time = str(pd.Timestamp(start_time))[:19]
        end_time = str(pd.Timestamp(end_time))[:19]

        data = QA_fetch_stock_day_adv(code=code_list,
                                      start=start_time,
                                      end=end_time)
        index_data = QA_fetch_index_day_adv(code="000001",
                                            start=start_time,
                                            end=end_time)
        # 3. 复权
        if not fq:
            fq = self.fq

        if not fq:
            data = data
        elif fq.lower() in ["pre", "qfq", "前复权"]:
            data = data.to_qfq()
        elif fq.lower() in ["post", "hfq", "后复权"]:
            data = data.to_hfq()
        elif fq.lower() in ["none", "bfq", "不复权"]:
            data = data

        # 4. 重采样
        # 考虑到停牌退市等原因,重采样会有异常值,即日期与我们需要的日期不一致
        # 这里采用指数作为基准,对重采样数据进行再处理
        # 对于停牌数据缺失,采用前值作为填充
        if frequence == "1d":
            data = data.data.unstack().ffill().stack()
        else:
            index_data = index_data.resample(frequence).unstack()
            data = data.resample(frequence).unstack().ffill()
            data = data.reindex(index_data.index).stack()
            if frequence == '1q':
                data.index = data.index.map(lambda x:
                                            (utils.QA_fmt_quarter(x[0]), x[1]))

        # 5. 价格类型
        if not price_type:
            price_type = self.price_type

        if price_type.lower() is "avg":
            avg = data["amount"] / data["volume"] / 100.0
            return avg.unstack()
        return data[price_type.lower()].unstack()
Example #4
0
    def __init__(
            self,
            factor: Union[pd.Series,
                          pd.DataFrame],
            prices: Union[pd.Series,
                          pd.DataFrame,
                          Callable],
            groupby: Union[pd.Series,
                           pd.DataFrame,
                           Callable],
            stock_start_date: Union[pd.Series,
                                    pd.DataFrame,
                                    Callable],
            weights: Union[float,
                           pd.Series,
                           pd.DataFrame,
                           Callable] = 1.0,
            frequence: str = "DAY",
            quantiles: Union[int,
                             Tuple[float],
                             List[float]] = 5,
            bins: Union[int,
                        Tuple[float],
                        List[float]] = None,
            periods: Union[int,
                           Tuple[int],
                           List[int]] = (1,
                                         5,
                                         10),
            binning_by_group: bool = False,
            max_loss: float = 0.25,
            zero_aware: bool = False,
    ):
        """
        初始化输入

        参数
        ---
        :param factor: 即经过我们处理过的单因子数据
        :param prices: 价格数据
        :param groupby: 行业数据
        :param stock_start_date: 上市时间
        :param weights: 因子加权数据
        :param frequence: 因子频率,如果是季线,相应的为 `1q`, 日线,则为 `1d` 等
        :param quantiles: 分位处理,既可以是 int 值,将因子等分,也可以是 0 到 1 之间的非等分区间
        :param bins: 分位处理,与 `quantiles` 只能有一个为非空
        :param periods: 用于计算因子远期收益的期数
        :param binning_by_group: 是否按照行业分别进行分位处理
        :param max_loss: 在单因子处理时,能够忍受的最大的因子损失 (丢弃的 NaN 值比例)
        :param zero_aware: 是否按照因子正负值分别进行分位处理


        说明:
            - quantiles 与 bins 只能有一个为非空
        """

        self.factor = preprocess.QA_fmt_factor(factor)
        self.prices = prices
        self.groupby = groupby
        self.stock_start_date = stock_start_date
        self.weights = weights
        self.frequence = utils.get_frequence(frequence)

        self.quantiles = quantiles
        self.bins = bins
        if isinstance(periods, int):
            periods = (periods,)
        self.periods = periods
        self.binning_by_group = binning_by_group
        self.max_loss = max_loss
        self.zero_aware = zero_aware

        # 因子加工与添加因子远期收益
        self.__gen_clean_factor_and_forward_returns()