def cond_percentile(self, df, cond): cond = cond.fillna(0.0).astype(bool) df, cond = self._align_bivariate(df, cond) df = self._mask_non_index_member(df) rank = rank_with_mask(df, mask=cond, axis=1, normalize=True) return rank
def percentile(self, df, mask=None): """Return a DataFrame with values ranging from 0.0 to 1.0""" df = self._align_univariate(df) df = self._mask_non_index_member(df) df = self._mask_df(df, mask) rank = rank_with_mask(df, axis=1, normalize=True) return rank
def group_percentile(self, x, group): x = self._align_univariate(x) x = self._mask_non_index_member(x) vals = np.unique(pd.Series(group.values.flatten()).dropna()) res = None for val in vals: mask = (group == val) rank = rank_with_mask(x, mask=mask, axis=1, normalize=True) if res is None: res = rank else: res = res.fillna(rank) return res
def group_rank(self, df, group, mask=None): df = self._align_univariate(df) df = self._mask_non_index_member(df) df = self._mask_df(df, mask) vals = np.unique(pd.Series(group.values.flatten()).dropna()) res = None for val in vals: mask = (group == val) rank = rank_with_mask(df, mask=mask, axis=1, normalize=False) if res is None: res = rank else: res = res.fillna(rank) return res
def group_percentile(self, df, group, mask=None): df = self._align_univariate(df) df = self._mask_non_index_member(df) df = self._mask_df(df, mask) vals = np.unique(pd.Series(group.values.flatten()).dropna()) res = None for val in vals: mask = (group == val) rank = rank_with_mask(df, mask=mask, axis=1, normalize=True) if res is None: res = rank else: res = res.fillna(rank) return res
def rank_standardize(factor_df, index_member=None): """ 输入因子值, 将因子用排序分值重构,并处理到0-1之间(默认为升序——因子越大 排序分值越大(越好) :param index_member: :param factor_df: 因子值 (pandas.Dataframe类型),index为datetime, colunms为股票代码。 形如: AAPL BA CMG DAL LULU date 2016-06-24 0.165260 0.002198 0.085632 -0.078074 0.173832 2016-06-27 0.165537 0.003583 0.063299 -0.048674 0.180890 2016-06-28 0.135215 0.010403 0.059038 -0.034879 0.111691 2016-06-29 0.068774 0.019848 0.058476 -0.049971 0.042805 2016-06-30 0.039431 0.012271 0.037432 -0.027272 0.010902 :return: 排序重构后的因子值。 取值范围在0-1之间 """ factor_df = jutil.fillinf(factor_df) factor_df = _mask_non_index_member(factor_df, index_member) return jutil.rank_with_mask(factor_df, axis=1, normalize=True)
def test_rank_percentile(): df = pd.DataFrame(np.random.rand(500, 3000)) res1 = jutil.rank_with_mask(df, axis=1, mask=None, normalize=False) res2 = jutil.rank_with_mask(df, axis=1, mask=None, normalize=True) print
def rank(self, df): """Return a DataFrame with values ranging from 0.0 to 1.0""" df = self._align_univariate(df) df = self._mask_non_index_member(df) rank = rank_with_mask(df, axis=1, normalize=False) return rank