def __init__( self, factor: Union[pd.Series, pd.DataFrame], prices: Union[pd.Series, pd.DataFrame, Callable], groupby: Union[pd.Series, pd.DataFrame, Callable], stock_start_date: Union[pd.Series, pd.DataFrame, Callable], weights: Union[float, pd.Series, pd.DataFrame, Callable] = 1.0, frequence: str = "DAY", quantiles: Union[int, Tuple[float], List[float]] = 5, bins: Union[int, Tuple[float], List[float]] = None, periods: Union[int, Tuple[int], List[int]] = (1, 5, 10), binning_by_group: bool = False, max_loss: float = 0.25, zero_aware: bool = False, ): """ 初始化输入 说明: - quantiles 与 bins 只能有一个为非空 """ self.factor = preprocess.QA_fmt_factor(factor) self.prices = prices self.groupby = groupby self.stock_start_date = stock_start_date self.weights = weights self.frequence = utils.get_frequence(frequence) self.quantiles = quantiles self.bins = bins if isinstance(periods, int): periods = (periods,) self.periods = periods self.binning_by_group = binning_by_group self.max_loss = max_loss self.zero_aware = zero_aware # 因子加工与添加因子远期收益 self.__gen_clean_factor_and_forward_returns()
def __init__( self, jq_username: str = None, jq_password: str = None, price_type: str = "close", # 价格类型 fq: str = "qfq", # 复权方式 factor_time_range: list = None, industry_cls: str = "sw_l1", # 行业分类 industry_data: Union[dict, pd.Series, pd.DataFrame] = None, # 行业信息 weight_cls: str = "avg", # 权重分类 weight_data: Union[dict, pd.Series, pd.DataFrame] = None, # 权重信息 frequence: str = "DAY", # 频率 detailed: bool = False, # 是否按日期进行行业查询 ): """ 根据输入的参数进行初始化 说明 --- 如果需要使用自定义的行业,权重信息,需要设置 industry_cls, weight_cls 为 None, 然后再输入 industry_data, weight_data 参数 --- :param jq_username: 聚宽账号 :param jq_password: 聚宽密码 :param price_type: 计算因子收益时,使用价格数据类型,支持 ['open', 'high', 'low', 'close', 'avg'], 默认为 'close' :param fq: 价格数据的复权方式 :param factor_time_range: 因子对应的时间范围 :param industry_cls: 行业类型,目前支持 - "sw_l1": 申万一级 - "sw_l2": 申万二级 - "sw_l3": 申万三级 - "jq_l1": 聚宽一级 - "jq_l2": 聚宽二级 - "zjw": 证监会 :param indsutry_data: 行业数据 [可选], 为 industry_cls 只能有一个为非 None :param weight_cls: 权重类型,目前支持 - "avg": 等权重 - "mktcap": 总市值加权 - "cmktcap": 流通市值加权 - "ln_mktcap": 对数市值加权 - "ln_cmktcap": 对数流通市值加权 - "sqrt_mktcap": 市值平方根加权 - "sqrt_cmktcap": 流通市值平方根加权 :param weight_data: 权重数据 [可选], 与 weight_cls 两种只能有一个为非 None :param frequence: 频率 :param detailed: 是否使用详细的按日期分类行业信息,默认使用 end_date 的行业数据 """ jqdatasdk.auth(jq_username, jq_password) if price_type is None: price_type = "close" warnings.warn("没有指定使用的价格类型,采用收盘价进行计算", UserWarning) assert price_type.lower() in PRICE_TYPE self.price_type = price_type if fq is not None: assert fq.lower() in FQ_TYPE else: warnings.warn("使用不复权的价格数据进行计算,结果可能受到分红配股等影响而不准确") self.fq = fq self.start_time = min(factor_time_range) self.end_time = max(factor_time_range) if industry_cls is not None: assert industry_cls.lower() in INDUSTRY_CLS # 如果输入了指定行业分类方式,则忽略行业数据数据 self.industry_cls = industry_cls self.industry_data = None else: self.industry_cls = industry_cls self.industry_data = industry_data if (self.industry_cls is None) and (self.industry_data is None): warnings.warn("没有指定行业分类方式,也没有输入行业数据", UserWarning) if weight_cls is not None: assert weight_cls.lower() in WEIGHT_CLS self.weight_cls = weight_cls self.weight_data = None else: self.weight_cls = weight_cls self.weight_data = weight_data if (self.weight_cls is None) and (self.weight_data is None): warnings.warn("没有指定加权方式,也没有输入加权数据,默认采用等权重方式", UserWarning) self.weight_cls = "avg" self.weight_data = None self.frequence = utils.get_frequence(frequence) self.detailed = detailed
def get_prices( self, code_list: Union[str, Tuple[str], List[str]] = None, start_time: Union[str, datetime.datetime] = None, end_time: Union[str, datetime.datetime] = None, fq: str = None, frequence: str = None, price_type: str = None, ): """ 价格数据获取接口,单因子输入后,可以通过单因子获取股票代码,时间等参数信息 参数 --- :param code_list: 股票代码 :param start_time: 起始时间 :param end_time: 截止时间 :param fq: 复权方式 :param frequence: 时间频率 """ # 1. 股票池 if isinstance(code_list, tuple): code_list = list(code_list) # 2. 时间频率 if not frequence: frequence = self.frequence frequence = utils.get_frequence(frequence) if not start_time: start_time = self.start_time if not end_time: end_time = self.end_time if (not start_time) or (not end_time): raise ValueError("价格获取接口需要指定起始时间与结束时间") start_time = str(pd.Timestamp(start_time))[:19] end_time = str(pd.Timestamp(end_time))[:19] data = QA_fetch_stock_day_adv(code=code_list, start=start_time, end=end_time) index_data = QA_fetch_index_day_adv(code="000001", start=start_time, end=end_time) # 3. 复权 if not fq: fq = self.fq if not fq: data = data elif fq.lower() in ["pre", "qfq", "前复权"]: data = data.to_qfq() elif fq.lower() in ["post", "hfq", "后复权"]: data = data.to_hfq() elif fq.lower() in ["none", "bfq", "不复权"]: data = data # 4. 重采样 # 考虑到停牌退市等原因,重采样会有异常值,即日期与我们需要的日期不一致 # 这里采用指数作为基准,对重采样数据进行再处理 # 对于停牌数据缺失,采用前值作为填充 if frequence == "1d": data = data.data.unstack().ffill().stack() else: index_data = index_data.resample(frequence).unstack() data = data.resample(frequence).unstack().ffill() data = data.reindex(index_data.index).stack() if frequence == '1q': data.index = data.index.map(lambda x: (utils.QA_fmt_quarter(x[0]), x[1])) # 5. 价格类型 if not price_type: price_type = self.price_type if price_type.lower() is "avg": avg = data["amount"] / data["volume"] / 100.0 return avg.unstack() return data[price_type.lower()].unstack()
def __init__( self, factor: Union[pd.Series, pd.DataFrame], prices: Union[pd.Series, pd.DataFrame, Callable], groupby: Union[pd.Series, pd.DataFrame, Callable], stock_start_date: Union[pd.Series, pd.DataFrame, Callable], weights: Union[float, pd.Series, pd.DataFrame, Callable] = 1.0, frequence: str = "DAY", quantiles: Union[int, Tuple[float], List[float]] = 5, bins: Union[int, Tuple[float], List[float]] = None, periods: Union[int, Tuple[int], List[int]] = (1, 5, 10), binning_by_group: bool = False, max_loss: float = 0.25, zero_aware: bool = False, ): """ 初始化输入 参数 --- :param factor: 即经过我们处理过的单因子数据 :param prices: 价格数据 :param groupby: 行业数据 :param stock_start_date: 上市时间 :param weights: 因子加权数据 :param frequence: 因子频率,如果是季线,相应的为 `1q`, 日线,则为 `1d` 等 :param quantiles: 分位处理,既可以是 int 值,将因子等分,也可以是 0 到 1 之间的非等分区间 :param bins: 分位处理,与 `quantiles` 只能有一个为非空 :param periods: 用于计算因子远期收益的期数 :param binning_by_group: 是否按照行业分别进行分位处理 :param max_loss: 在单因子处理时,能够忍受的最大的因子损失 (丢弃的 NaN 值比例) :param zero_aware: 是否按照因子正负值分别进行分位处理 说明: - quantiles 与 bins 只能有一个为非空 """ self.factor = preprocess.QA_fmt_factor(factor) self.prices = prices self.groupby = groupby self.stock_start_date = stock_start_date self.weights = weights self.frequence = utils.get_frequence(frequence) self.quantiles = quantiles self.bins = bins if isinstance(periods, int): periods = (periods,) self.periods = periods self.binning_by_group = binning_by_group self.max_loss = max_loss self.zero_aware = zero_aware # 因子加工与添加因子远期收益 self.__gen_clean_factor_and_forward_returns()