Esempio n. 1
0
def test_pdutil():
    df = pd.DataFrame(np.random.rand(4, 20))
    df.iloc[1, 2] = np.nan
    df.iloc[3, 4] = np.nan
    df.iloc[1, 4] = np.nan
    assert df.isnull().sum().sum() == 3
    df.iloc[2, 11] = np.inf
    df.iloc[2, 12] = -np.inf
    assert df.isnull().sum().sum() == 3
    df2 = jutil.fillinf(df)
    assert df2.isnull().sum().sum() == 5

    res_q = jutil.to_quantile(df, 5, axis=1)

    df3 = df.copy()
    df3['group'] = ['a', 'a', 'b', 'a']

    dic = jutil.group_df_to_dict(df3, by='group')
    assert set(list(dic.keys())) == {'a', 'b'}
Esempio n. 2
0
    def get_signal_data(self, signal):
        """
        Returns
        -------
        res : pd.DataFrame
            Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'upside_ret(N)','downside_ret(N)','quantile']
        """
        self._judge(signal)  # 判断signal与其他关键参数是否格式一致
        self._cal_ret()  # 计算信号收益
        signal = jutil.fillinf(signal)
        signal = signal.shift(1)  # avoid forward-looking bias

        # forward or not
        if not self.forward:
            signal = signal.shift(self.period)

        # 处理mask
        mask = np.logical_or(self.mask, signal.isnull())

        # calculate quantile
        signal_masked = signal.copy()
        signal_masked = signal_masked[~mask]
        if self.n_quantiles == 1:
            df_quantile = signal_masked.copy()
            df_quantile.loc[:, :] = 1.0
        else:
            df_quantile = jutil.to_quantile(signal_masked,
                                            n_quantiles=self.n_quantiles)

        # ----------------------------------------------------------------------
        # stack
        def stack_td_symbol(df):
            df = pd.DataFrame(df.stack(dropna=False))  # do not dropna
            df.index.names = ['trade_date', 'symbol']
            df.sort_index(axis=0, level=['trade_date', 'symbol'], inplace=True)
            return df

        # ----------------------------------------------------------------------
        # concat signal value
        res = stack_td_symbol(signal)  # 信号
        res.columns = ['signal']

        for ret_type in self.signal_ret.keys():
            if self.signal_ret[ret_type] is not None:
                res[ret_type] = stack_td_symbol(
                    self.signal_ret[ret_type]).fillna(0)  # 收益

        if self.group is not None:
            res["group"] = stack_td_symbol(self.group)

        res['quantile'] = stack_td_symbol(df_quantile)  # quantile
        mask = stack_td_symbol(mask)
        res = res.loc[~(mask.iloc[:, 0]), :]

        if len(res) > 0:
            print("Nan Data Count (should be zero) : {:d};  " \
                  "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(),
                                                                 len(res) * 100. / signal.size))
        else:
            print("No signal available.")
        res = res.astype({'signal': float, 'return': float, 'quantile': int})
        return res
Esempio n. 3
0
    X3[signal_name] = factor_dict[signal_name].shift(2).stack()
X1_ = X1.join(X2,rsuffix='_2')
X1_ = X1_.join(X3,rsuffix='_3') 
'''
X1_ = X1

# In[ ]:

train_indexer = dv.get_ts('close_adj').loc[:20160101].stack().index.values
test_indexer = dv.get_ts('close_adj').loc[20160101:].stack().index.values
X = X1_
Y = dv.get_ts('close_adj').pct_change(period).shift(-period).stack().reindex(
    index=X.index)
import jaqs.util as jutil

Y_q = jutil.to_quantile(
    dv.get_ts('close_adj').pct_change(period).shift(-period), n_quantiles=7)
Y_q_clip = Y_q.stack().reindex(index=X.index)
Y_q_clip = Y_q_clip[np.logical_or(Y_q_clip == 1.0, Y_q_clip == 7.0)]
Y_clip = Y.reindex(index=Y_q_clip.index)
Y_clip_class = pd.Series(np.where(Y_q_clip == 7.0, 1, 0), index=Y_q_clip.index)
X_clip = X.reindex(index=Y_q_clip.index)
from sklearn.linear_model import LogisticRegression


def split(X, max_train_size=5, period=1):
    n = len(X)
    lis = []
    for i in range(1, n):
        pred_index = [n - i]
        if (n - i - max_train_size - period) >= 0:
            train_index = [
Esempio n. 4
0
    def process_signal_before_analysis(self,
                                       signal,
                                       price=None,
                                       ret=None,
                                       benchmark_price=None,
                                       period=5,
                                       n_quantiles=5,
                                       mask=None,
                                       forward=False):
        """
        Prepare for signal analysis.

        Parameters
        ----------
        signal : pd.DataFrame
            Index is date, columns are stocks.
        price : pd.DataFrame
            Index is date, columns are stocks.
        ret : pd.DataFrame
            Index is date, columns are stocks.
        benchmark_price : pd.DataFrame or pd.Series or None
            Price of benchmark.
        mask : pd.DataFrame
            Data cells that should NOT be used.
        n_quantiles : int
        period : int
            periods to compute forward returns on.

        Returns
        -------
        res : pd.DataFrame
            Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'quantile']
            
        """
        """
        Deal with suspensions:
            If the period of calculating return is d (from T to T+d), then
            we do not use signal values of those suspended on T,
            we do not calculate return for those suspended on T+d.
        """
        # ----------------------------------------------------------------------
        # parameter validation
        if price is None and ret is None:
            raise ValueError("One of price / ret must be provided.")
        if price is not None and ret is not None:
            raise ValueError("Only one of price / ret should be provided.")
        if ret is not None and benchmark_price is not None:
            raise ValueError(
                "You choose 'return' mode but benchmark_price is given.")
        if not (n_quantiles > 0 and isinstance(n_quantiles, int)):
            raise ValueError(
                "n_quantiles must be a positive integer. Input is: {}".format(
                    n_quantiles))

        # ensure inputs are aligned
        data = price if price is not None else ret
        assert np.all(signal.index == data.index)
        assert np.all(signal.columns == data.columns)
        if mask is not None:
            assert np.all(signal.index == mask.index)
            assert np.all(signal.columns == mask.columns)
            mask = jutil.fillinf(mask)
            mask = mask.astype(int).fillna(0).astype(
                bool)  # dtype of mask could be float. So we need to convert.
        else:
            mask = pd.DataFrame(index=signal.index,
                                columns=signal.columns,
                                data=False)
        signal = jutil.fillinf(signal)
        data = jutil.fillinf(data)

        # ----------------------------------------------------------------------
        # save data
        self.n_quantiles = n_quantiles
        self.period = period

        # ----------------------------------------------------------------------
        # Get dependent variables
        if price is not None:
            df_ret = pfm.price2ret(price, period=self.period, axis=0)
            if benchmark_price is not None:
                benchmark_price = benchmark_price.loc[signal.index]
                bench_ret = pfm.price2ret(benchmark_price, self.period, axis=0)
                self.benchmark_ret = bench_ret
                residual_ret = df_ret.sub(bench_ret.values.flatten(), axis=0)
            else:
                residual_ret = df_ret
        else:
            residual_ret = ret

        # Get independent varibale
        signal = signal.shift(1)  # avoid forward-looking bias

        # forward or not
        if forward:
            # point-in-time signal and forward return
            residual_ret = residual_ret.shift(-self.period)
        else:
            # past signal and point-in-time return
            signal = signal.shift(self.period)

        # ----------------------------------------------------------------------
        # get masks
        # mask_prices = data.isnull()
        # Because we use FORWARD return, if one day's price is broken, the day that is <period> days ago is also broken.
        # mask_prices = np.logical_or(mask_prices, mask_prices.shift(self.period))
        mask_price_return = residual_ret.isnull()
        mask_signal = signal.isnull()

        mask = np.logical_or(mask_signal, mask_price_return)
        # mask = np.logical_or(mask, mask_signal)

        # if price is not None:
        #     mask_forward = np.logical_or(mask, mask.shift(self.period).fillna(True))
        #     mask = np.logical_or(mask, mask_forward)

        # ----------------------------------------------------------------------
        # calculate quantile
        signal_masked = signal.copy()
        signal_masked = signal_masked[~mask]
        if n_quantiles == 1:
            df_quantile = signal_masked.copy()
            df_quantile.loc[:, :] = 1.0
        else:
            df_quantile = jutil.to_quantile(signal_masked,
                                            n_quantiles=n_quantiles)

        # ----------------------------------------------------------------------
        # stack
        def stack_td_symbol(df):
            df = pd.DataFrame(df.stack(dropna=False))  # do not dropna
            df.index.names = ['trade_date', 'symbol']
            df.sort_index(axis=0, level=['trade_date', 'symbol'], inplace=True)
            return df

        mask = stack_td_symbol(mask)
        df_quantile = stack_td_symbol(df_quantile)
        residual_ret = stack_td_symbol(residual_ret)

        # ----------------------------------------------------------------------
        # concat signal value
        res = stack_td_symbol(signal)
        res.columns = ['signal']
        res['return'] = residual_ret
        res['quantile'] = df_quantile
        res = res.loc[~(mask.iloc[:, 0]), :]

        print("Nan Data Count (should be zero) : {:d};  " \
              "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(),
                                                             len(res) * 100. / signal.size))
        res = res.astype({'signal': float, 'return': float, 'quantile': int})
        self.signal_data = res
Esempio n. 5
0
    def create_single_signal_report(self,
                                    signal,
                                    price,
                                    periods,
                                    n_quantiles,
                                    mask=None,
                                    buy_condition=None):
        """
        
        Parameters
        ----------
        signal : pd.Series
        index is integer date, values are signals
        price : pd.Series
        index is integer date, values are prices
        mask : pd.Series or None, optional
        index is integer date, values are bool
        periods : list of int
        buy_condition : dict , optional
            {'cond_name1': {'col_name': str, 'hold': int, 'filter': func},
             'cond_name2': {'col_name': str, 'hold': int, 'filter': func},
            }
        
        Returns
        -------
        res : dict
        
        """
        if isinstance(signal, pd.DataFrame):
            signal = signal.iloc[:, 0]
        if isinstance(price, pd.DataFrame):
            price = price.iloc[:, 0]

        # calc return
        ret_l = {
            period: pfm.price2ret(price, period=period, axis=0)
            for period in periods
        }
        df_ret = pd.concat(ret_l, axis=1)

        # ----------------------------------------------------------------------
        # calculate quantile
        if n_quantiles == 1:
            df_quantile = signal.copy()
            df_quantile.loc[:] = 1.0
        else:
            df_quantile = jutil.to_quantile(signal,
                                            n_quantiles=n_quantiles,
                                            axis=0)

        # ----------------------------------------------------------------------
        # concat signal value
        res = pd.DataFrame(signal.shift(1))
        res.columns = ['signal']
        res['quantile'] = df_quantile
        res = pd.concat([res, df_ret], axis=1)
        res = res.dropna()

        print("Nan Data Count (should be zero) : {:d};  " \
              "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(),
                                                             len(res) * 100. / signal.size))

        # calc quantile stats
        gp = res.groupby(by='quantile')
        dic_raw = {k: v for k, v in gp}
        dic_stats = OrderedDict()
        for q, df in gp:
            df_stat = pd.DataFrame(index=['mean', 'std'],
                                   columns=df_ret.columns,
                                   data=np.nan)
            df_stat.loc['mean', :] = df.loc[:, df_ret.columns].mean(axis=0)
            df_stat.loc['std', :] = df.loc[:, df_ret.columns].std(axis=0)
            dic_stats[q] = df_stat

        # calculate IC
        ics = calc_various_ic(res, ret_cols=df_ret.columns)

        # backtest
        if buy_condition is not None:

            def sim_backtest(df, dic_of_cond):
                dic_cum_ret = dict()
                for key, dic in dic_of_cond.items():
                    col_name = dic['column']
                    func = dic['filter']
                    n_hold = dic['hold']
                    mask = df[col_name].apply(func).astype(int)
                    dic_cum_ret[key] = (df[n_hold] * mask).cumsum()
                df_cumret = pd.concat(dic_cum_ret, axis=1)
                return df_cumret

            df_backtest = sim_backtest(res, buy_condition)

        # plot
        gf = plotting.GridFigure(rows=3, cols=1, height_ratio=1.2)
        gf.fig.suptitle("Event Return Analysis (annualized)")

        plotting.plot_ic_decay(ics, ax=gf.next_row())

        plotting.plot_quantile_return_mean_std(dic_stats, ax=gf.next_row())

        if buy_condition is not None:
            plotting.plot_batch_backtest(df_backtest, ax=gf.next_row())

        self.show_fig(gf.fig, 'single_inst.pdf')
Esempio n. 6
0
    def process_signal_before_analysis(self,
                                       signal,
                                       price=None,
                                       daily_ret=None,
                                       benchmark_price=None,
                                       daily_benchmark_ret=None,
                                       high=None,
                                       low=None,
                                       group=None,
                                       period=5,
                                       n_quantiles=5,
                                       mask=None,
                                       can_enter=None,
                                       can_exit=None,
                                       forward=True,
                                       commission=0.0008):
        """
        Prepare for signal analysis.

        Parameters
        ----------
        signal : pd.DataFrame
            Index is date, columns are stocks.
        price : pd.DataFrame
            Index is date, columns are stocks.
        high : pd.DataFrame
            Index is date, columns are stocks.
        low : pd.DataFrame
            Index is date, columns are stocks.
        daily_ret : pd.DataFrame
            Index is date, columns are stocks.
        daily_benchmark_ret : pd.DataFrame or pd.Series or None
            Daily ret of benchmark.
        group : pd.DataFrame
            Index is date, columns are stocks.
        benchmark_price : pd.DataFrame or pd.Series or None
            Price of benchmark.
        mask : pd.DataFrame
            Data cells that should NOT be used.
        can_enter: pd.DataFrame
            Date the security can be traded and BUY.
        can_exit:pd.DataFrame
            Date the security can be traded and SELL.
        n_quantiles : int
        period : int
            periods to compute forward returns on.
        forward :bool
            Return cal method. True by default.
        commission: float
            commission ratio per trade.

        Returns
        -------
        res : pd.DataFrame
            Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'upside_ret(N)','downside_ret(N)','quantile']
        """
        """
        Deal with suspensions:
            If the period of calculating return is d (from T to T+d), then
            we do not use signal values of those suspended on T,
            we do not calculate return for those suspended on T+d.
        """
        # ----------------------------------------------------------------------
        # parameter validation
        if price is None and daily_ret is None:
            raise ValueError("One of price / daily_ret must be provided.")
        if price is not None and daily_ret is not None:
            raise ValueError(
                "Only one of price / daily_ret should be provided.")
        if benchmark_price is not None and daily_benchmark_ret is not None:
            raise ValueError(
                "Only one of benchmark_price / daily_benchmark_ret should be provided."
            )
        if not (n_quantiles > 0 and isinstance(n_quantiles, int)):
            raise ValueError(
                "n_quantiles must be a positive integer. Input is: {}".format(
                    n_quantiles))

        if daily_ret is not None:
            warnings.warn(
                "Warning: 检查到使用daily_ret模式。未避免未来函数,请注意确保daily_ret格式为对应日期能实现的日收益."
            )

        # ensure inputs are aligned
        if mask is not None:
            try:
                assert np.all(signal.index == mask.index)
                assert np.all(signal.columns == mask.columns)
            except:
                warnings.warn("Warning: signal与mask的index/columns不一致,请检查输入参数!")
                mask = mask.reindex_like(signal)
            mask = jutil.fillinf(mask)
            mask = mask.astype(int).fillna(0).astype(
                bool)  # dtype of mask could be float. So we need to convert.
        else:
            mask = pd.DataFrame(index=signal.index,
                                columns=signal.columns,
                                data=False)
        if can_enter is not None:
            try:
                assert np.all(signal.index == can_enter.index)
                assert np.all(signal.columns == can_enter.columns)
            except:
                warnings.warn(
                    "Warning: signal与can_enter的index/columns不一致,请检查输入参数!")
                can_enter = can_enter.reindex_like(signal)
            can_enter = jutil.fillinf(can_enter)
            can_enter = can_enter.astype(int).fillna(0).astype(
                bool
            )  # dtype of can_enter could be float. So we need to convert.
        else:
            can_enter = pd.DataFrame(index=signal.index,
                                     columns=signal.columns,
                                     data=True)
        if can_exit is not None:
            try:
                assert np.all(signal.index == can_exit.index)
                assert np.all(signal.columns == can_exit.columns)
            except:
                warnings.warn(
                    "Warning: signal与can_exit的index/columns不一致,请检查输入参数!")
                can_exit = can_exit.reindex_like(signal)
            can_exit = jutil.fillinf(can_exit)
            can_exit = can_exit.astype(int).fillna(0).astype(
                bool
            )  # dtype of can_exit could be float. So we need to convert.
        else:
            can_exit = pd.DataFrame(index=signal.index,
                                    columns=signal.columns,
                                    data=True)
        if group is not None:
            try:
                assert np.all(signal.index == group.index)
                assert np.all(signal.columns == group.columns)
            except:
                warnings.warn(
                    "Warning: signal与group的index/columns不一致,请检查输入参数!")
                group = group.reindex_like(signal)
            group = group.astype(str)

        # ----------------------------------------------------------------------
        # save data
        self.n_quantiles = n_quantiles
        self.period = period

        # ----------------------------------------------------------------------
        # Get dependent variables

        # 计算benchmark收益
        self.benchmark_ret = None
        if benchmark_price is not None:
            benchmark_price = benchmark_price.reindex(index=signal.index)
            self.benchmark_ret = pfm.price2ret(benchmark_price,
                                               self.period,
                                               axis=0,
                                               compound=True)
        elif daily_benchmark_ret is not None:
            daily_benchmark_ret = daily_benchmark_ret.reindex(
                index=signal.index)
            self.benchmark_ret = pfm.daily_ret_to_ret(daily_benchmark_ret,
                                                      self.period)

        # 计算区间持仓收益
        isRealPrice = False
        if daily_ret is not None:
            try:
                assert np.all(signal.index == daily_ret.index)
                assert np.all(signal.columns == daily_ret.columns)
            except:
                warnings.warn(
                    "Warning: signal与daily_ret的index/columns不一致,请检查输入参数!")
                daily_ret = daily_ret.reindex_like(signal)
            daily_ret = jutil.fillinf(daily_ret).fillna(0)
            price = pfm.daily_ret_to_cum(daily_ret)
        else:
            # 有price
            isRealPrice = True
            try:
                assert np.all(signal.index == price.index)
                assert np.all(signal.columns == price.columns)
            except:
                warnings.warn(
                    "Warning: signal与price的index/columns不一致,请检查输入参数!")
                price = price.reindex_like(signal)
            price = jutil.fillinf(price)

        can_enter = np.logical_and(price != np.NaN, can_enter)
        df_ret = pfm.price2ret(price,
                               period=self.period,
                               axis=0,
                               compound=True)
        price_can_exit = price.copy()
        price_can_exit[~can_exit] = np.NaN
        price_can_exit = price_can_exit.fillna(method="bfill")
        ret_can_exit = pfm.price2ret(price_can_exit,
                                     period=self.period,
                                     axis=0,
                                     compound=True)
        df_ret[~can_exit] = ret_can_exit[~can_exit]

        if self.benchmark_ret is not None:
            # 计算持有期相对收益
            residual_ret = df_ret.sub(self.benchmark_ret.values.flatten(),
                                      axis=0)
        else:
            residual_ret = df_ret
        residual_ret = jutil.fillinf(residual_ret)
        residual_ret -= commission

        # 计算潜在上涨空间和潜在下跌空间
        if high is not None and isRealPrice:
            try:
                assert np.all(signal.index == high.index)
                assert np.all(signal.columns == high.columns)
            except:
                warnings.warn("Warning: signal与high的index/columns不一致,请检查输入参数!")
                high = high.reindex_like(signal)
            high = jutil.fillinf(high)
        else:
            high = price
        upside_ret = compute_upside_returns(price,
                                            high,
                                            can_exit,
                                            self.period,
                                            compound=True)
        upside_ret = jutil.fillinf(upside_ret)
        upside_ret -= commission

        if low is not None and isRealPrice:
            try:
                assert np.all(signal.index == low.index)
                assert np.all(signal.columns == low.columns)
            except:
                warnings.warn("Warning: signal与low的index/columns不一致,请检查输入参数!")
                low = low.reindex_like(signal)
            low = jutil.fillinf(low)
        else:
            low = price
        downside_ret = compute_downside_returns(price,
                                                low,
                                                can_exit,
                                                self.period,
                                                compound=True)
        downside_ret = jutil.fillinf(downside_ret)
        downside_ret -= commission

        # ----------------------------------------------------------------------
        # Get independent varibale
        signal = jutil.fillinf(signal)
        signal = signal.shift(1)  # avoid forward-looking bias
        # forward or not
        if forward:
            # point-in-time signal and forward return
            residual_ret = residual_ret.shift(-self.period)
            upside_ret = upside_ret.shift(-self.period)
            downside_ret = downside_ret.shift(-self.period)
        else:
            # past signal and point-in-time return
            signal = signal.shift(self.period)
            can_enter = can_enter.shift(self.period)
            mask = mask.shift(self.period)

        self.ret = dict()
        self.ret["return"] = residual_ret
        self.ret["upside_ret"] = upside_ret
        self.ret["downside_ret"] = downside_ret

        # ----------------------------------------------------------------------
        # get masks
        # mask_prices = data.isnull()
        # Because we use FORWARD return, if one day's price is broken, the day that is <period> days ago is also broken.
        # mask_prices = np.logical_or(mask_prices, mask_prices.shift(self.period))
        # mask_price_return = residual_ret.isnull()
        mask_signal = signal.isnull()

        mask = np.logical_or(
            mask.fillna(True),
            np.logical_or(mask_signal, ~(can_enter.fillna(False))))
        mask = np.logical_or(mask, self.ret["return"].isnull())
        # mask = np.logical_or(mask, mask_signal)

        # if price is not None:
        #     mask_forward = np.logical_or(mask, mask.shift(self.period).fillna(True))
        #     mask = np.logical_or(mask, mask_forward)

        # ----------------------------------------------------------------------
        # calculate quantile
        signal_masked = signal.copy()
        signal_masked = signal_masked[~mask]
        if n_quantiles == 1:
            df_quantile = signal_masked.copy()
            df_quantile.loc[:, :] = 1.0
        else:
            if group is None:
                df_quantile = jutil.to_quantile(signal_masked,
                                                n_quantiles=n_quantiles)
            else:
                from jaqs_fxdayu.data.py_expression_eval import Parser
                ps = Parser()
                ps.index_member = None
                df_quantile = ps.group_quantile(df=signal_masked,
                                                group=group,
                                                n_quantiles=n_quantiles)
        # ----------------------------------------------------------------------
        # stack
        def stack_td_symbol(df):
            df = pd.DataFrame(df.stack(dropna=False))  # do not dropna
            df.index.names = ['trade_date', 'symbol']
            df.sort_index(axis=0, level=['trade_date', 'symbol'], inplace=True)
            return df

        # ----------------------------------------------------------------------
        # concat signal value
        res = stack_td_symbol(signal)
        res.columns = ['signal']
        for ret_type in self.ret.keys():
            res[ret_type] = stack_td_symbol(self.ret[ret_type]).fillna(0)
        res['quantile'] = stack_td_symbol(df_quantile)
        if group is not None:
            res["group"] = stack_td_symbol(group)
        mask = stack_td_symbol(mask)
        res = res.loc[~(mask.iloc[:, 0]), :]

        if len(res) > 0:
            print("Nan Data Count (should be zero) : {:d};  " \
                  "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(),
                                                                 len(res) * 100. / signal.size))
        else:
            print("No signal available.")
        res = res.astype({'signal': float, 'return': float, 'quantile': int})
        self.signal_data = res
Esempio n. 7
0
    def process_signal(self,
                       enter_signal,
                       exit_signal=None,
                       sig_type="long",
                       price=None,
                       daily_ret=None,
                       max_holding_period=None,
                       stoploss=None,
                       stopprofit=None,
                       mask=None,
                       can_enter=None,
                       can_exit=None,
                       group=None,
                       n_quantiles=1,
                       commission=0.0008):
        """
        Prepare for signal analysis.

        Parameters
        ----------
        enter_signal : pd.DataFrame
            Index is date, columns are stocks.value can only be -2/0/2
        exit_signal : pd.DataFrame/list of pd.DataFrame
            Index is date, columns are stocks.value can only be -1/0/1
        sig_type: str
            "long"/"short", which type of signal to process
        price : pd.DataFrame
            Index is date, columns are stocks.
        daily_ret : pd.DataFrame
            Index is date, columns are stocks.
        mask : pd.DataFrame
            Data cells that should NOT be used.
        can_enter: pd.DataFrame
            Date the security can open.
        can_exit:pd.DataFrame
            Date the security can close.
        max_holding_period : int
            Limit the max holding period
        stoploss:float
            stoploss ratio per trade
        stopprofit:float
            stopprofit ratio per trade
        n_quantiles: int
        group : pd.DataFrame
            Index is date, columns are stocks.
        commission: float
            commission ratio per trade.
        Returns
        -------
        res : pd.DataFrame
            Signal processed
        """
        # ensure inputs are aligned
        # parameter validation
        if sig_type not in ["long", "short"]:
            raise ValueError("信号类型(sig_type)只能为long/short.")

        if price is None and daily_ret is None:
            raise ValueError("One of price / daily_ret must be provided.")
        if price is not None and daily_ret is not None:
            raise ValueError(
                "Only one of price / daily_ret should be provided.")
        if not (n_quantiles > 0 and isinstance(n_quantiles, int)):
            raise ValueError(
                "n_quantiles must be a positive integer. Input is: {}".format(
                    n_quantiles))

        enter_signal = jutil.fillinf(enter_signal)
        if n_quantiles == 1:  # 事件类进场信号
            # 确保enter_signal里的信号只能为-2(开空),0(不做操作),2(开多)
            enter_signal = enter_signal.fillna(0)
            if not enter_signal.isin([-2, 0, 2]).all().all():
                raise ValueError("检测到n_quantiles为1,该模式下测试的enter_signal为事件类因子."
                                 "请确保enter_signal里的信号只能为-2(开空),0(不做操作),2(开多))."
                                 "如需测试普通因子,请指定n_quantiles为大于1的整数.")
            # 确保至少有一种出场信号
            if (exit_signal is None) and (max_holding_period is None) and \
                    (stoploss is None) and (stopprofit is None):
                raise ValueError(
                    "确保至少有一种出场信号(exit_signal/max_holding_period/stoploss/stopprofit)"
                )
        else:  # 普通进场信号
            if max_holding_period is None:
                raise ValueError("检测到n_quantiles不为1,该模式下测试的enter_signal为普通因子."
                                 "该模式下,max_holding_period参数不能为空.")
            self.period = max_holding_period

        if exit_signal is not None:
            # 确保exit_signal里的信号只能为-1(平空),0(不做操作),1(平多)
            if not isinstance(exit_signal, list):
                exit_signal = [exit_signal]
            for i in range(len(exit_signal)):
                exit_signal[i] = exit_signal[i].reindex_like(enter_signal)
                exit_signal[i] = jutil.fillinf(exit_signal[i]).fillna(0)
                if not exit_signal[i].isin([-1, 0, 1]).all().all():
                    raise ValueError(
                        "请确保所有exit_signal里的信号只能为-1(平空),0(不做操作),1(平多)")
        else:
            exit_signal = []

        if group is not None:
            group = group.reindex_like(enter_signal)

        sig_filter = {
            "mask": mask,
            "can_enter": can_enter,
            "can_exit": can_exit,
        }

        for _filter in sig_filter.keys():
            if sig_filter[_filter] is not None:
                sig_filter[_filter] = sig_filter[_filter].reindex_like(
                    enter_signal)
                sig_filter[_filter] = jutil.fillinf(
                    sig_filter[_filter]).astype(int).fillna(0)
            else:
                sig_filter[_filter] = pd.DataFrame(
                    index=enter_signal.index,
                    columns=enter_signal.columns,
                    data=0 if _filter == "mask" else 1)

        # process
        #=============================================================
        # 信号在当天的收盘时候统计,具体执行则在下一天的交易日的开盘--设置price=open,
        # 或下一天交易日的收盘--设置price=close,或别的价格--如设置price=vwap
        # 防止未来函数
        enter_signal = enter_signal.shift(1)
        for i in range(len(exit_signal)):
            exit_signal[i] = exit_signal[i].shift(1)

        # 处理价格数据
        if daily_ret is not None:
            daily_ret = daily_ret.reindex_like(enter_signal)
            daily_ret = jutil.fillinf(daily_ret).fillna(0)
            price = pfm.daily_ret_to_cum(daily_ret)  # 取净值
        else:
            # 有price
            price = price.reindex_like(enter_signal)
            price = jutil.fillinf(price)  # 取价格

        self.price = price

        #=====================
        # 调整出场点
        pos = []
        # 定时出场位置
        if max_holding_period is not None:
            pos.append(
                get_period_exit_pos(enter_signal, period=max_holding_period))

        # 止损出场位置
        if stoploss is not None:
            pos.append(
                get_stop_pos(price,
                             stoploss,
                             sig_type=sig_type,
                             stop_type="stop_loss"))

        # 止盈出场位置
        if stopprofit is not None:
            pos.append(
                get_stop_pos(price,
                             stopprofit,
                             sig_type=sig_type,
                             stop_type="stop_profit"))

        # 自定义出场信号位置
        for es in exit_signal:
            pos.append(get_exit_pos(es, exit_type="close_%s" % (sig_type, )))

        # 综合了各种出场条件,选择最先触发的出场条件出场
        exit_pos = reduce(get_first_pos, pos).replace(LONGINT, np.nan)
        # 每天允许出场的最近的出场点
        exit_permited_pos = get_exit_pos(sig_filter["can_exit"], value=[1])
        self.final_exit_pos[sig_type] = get_exit_value(exit_permited_pos,
                                                       exit_pos)
        # =====================
        # 计算信号收益
        price_exit = get_exit_value(price, self.final_exit_pos[sig_type])
        ret_exit = jutil.fillinf((price_exit - price) / price)
        if sig_type == "short":
            ret_exit = -1 * ret_exit
        self.ret[sig_type] = ret_exit - commission

        # =====================
        # 计算signal_data
        # ----------------------------------------------------------------------
        # mask signal
        if n_quantiles == 1:  # 事件因子
            if sig_type == "long":
                value = 2
            else:
                value = -2
            mask_signal = enter_signal != value
        else:  # 普通因子
            mask_signal = enter_signal.isnull()

        mask_signal = np.logical_or(
            mask_signal,
            np.logical_or(sig_filter["mask"], sig_filter["can_enter"] != 1))
        mask_signal = np.logical_or(mask_signal, self.ret[sig_type].isnull())

        # ban掉出场信号在进场那天的
        # get sig pos
        sig_pos = get_sig_pos(self.final_exit_pos[sig_type])
        mask_signal = np.logical_or(mask_signal,
                                    sig_pos == self.final_exit_pos[sig_type])

        # calculate quantile
        if n_quantiles == 1:
            df_quantile = pd.DataFrame(1,
                                       index=enter_signal.index,
                                       columns=enter_signal.columns)
        else:
            signal_masked = enter_signal.copy()
            signal_masked = signal_masked[~mask_signal]
            if group is None:
                df_quantile = jutil.to_quantile(signal_masked,
                                                n_quantiles=n_quantiles)
            else:
                from jaqs_fxdayu.data.py_expression_eval import Parser
                ps = Parser()
                ps.index_member = None
                df_quantile = ps.group_quantile(df=signal_masked,
                                                group=group,
                                                n_quantiles=n_quantiles)

        # ----------------------------------------------------------------------
        # concat signal value
        res = stack_td_symbol(enter_signal)
        res.columns = ['signal']
        res["return"] = stack_td_symbol(self.ret[sig_type])
        res["exit_time"] = stack_td_symbol(self.final_exit_pos[sig_type])
        res['quantile'] = stack_td_symbol(df_quantile)
        if group is not None:
            res["group"] = stack_td_symbol(group)
        res["sig_type"] = sig_type
        mask_signal = stack_td_symbol(mask_signal)
        res = res.loc[~(mask_signal.iloc[:, 0]), :]

        if len(res) > 0:
            print("Nan Data Count (should be zero) : {:d};  " \
                  "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(),
                                                                 len(res) * 100. / enter_signal.size))
            res = res.astype({
                'signal': float,
                'return': float,
                'quantile': int
            })
            self.signal_data[sig_type] = res
        else:
            print("sig_type %s:No signal available." % (sig_type, ))
Esempio n. 8
0
    def create_single_signal_report(self, signal, price, periods, n_quantiles, mask=None, buy_condition=None):
        """
        
        Parameters
        ----------
        signal : pd.Series
        index is integer date, values are signals
        price : pd.Series
        index is integer date, values are prices
        mask : pd.Series or None, optional
        index is integer date, values are bool
        periods : list of int
        buy_condition : dict , optional
            {'cond_name1': {'col_name': str, 'hold': int, 'filter': func},
             'cond_name2': {'col_name': str, 'hold': int, 'filter': func},
            }
        
        Returns
        -------
        res : dict
        
        """
        if isinstance(signal, pd.DataFrame):
            signal = signal.iloc[:, 0]
        if isinstance(price, pd.DataFrame):
            price = price.iloc[:, 0]
            
        # calc return
        ret_l = {period: pfm.price2ret(price, period=period, axis=0) for period in periods}
        df_ret = pd.concat(ret_l, axis=1)

        # ----------------------------------------------------------------------
        # calculate quantile
        if n_quantiles == 1:
            df_quantile = signal.copy()
            df_quantile.loc[:] = 1.0
        else:
            df_quantile = jutil.to_quantile(signal, n_quantiles=n_quantiles, axis=0)

        # ----------------------------------------------------------------------
        # concat signal value
        res = pd.DataFrame(signal.shift(1))
        res.columns = ['signal']
        res['quantile'] = df_quantile
        res = pd.concat([res, df_ret], axis=1)
        res = res.dropna()

        print("Nan Data Count (should be zero) : {:d};  " \
              "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(),
                                                             len(res) * 100. / signal.size))
        
        # calc quantile stats
        gp = res.groupby(by='quantile')
        dic_raw = {k: v for k, v in gp}
        dic_stats = OrderedDict()
        for q, df in gp:
            df_stat = pd.DataFrame(index=['mean', 'std'], columns=df_ret.columns, data=np.nan)
            df_stat.loc['mean', :] = df.loc[:, df_ret.columns].mean(axis=0)
            df_stat.loc['std', :] = df.loc[:, df_ret.columns].std(axis=0)
            dic_stats[q] = df_stat
        
        # calculate IC
        ics = calc_various_ic(res, ret_cols=df_ret.columns)
        
        # backtest
        if buy_condition is not None:
            def sim_backtest(df, dic_of_cond):
                dic_cum_ret = dict()
                for key, dic in dic_of_cond.items():
                    col_name = dic['column']
                    func = dic['filter']
                    n_hold = dic['hold']
                    mask = df[col_name].apply(func).astype(int)
                    dic_cum_ret[key] = (df[n_hold] * mask).cumsum()
                df_cumret = pd.concat(dic_cum_ret, axis=1)
                return df_cumret
            df_backtest = sim_backtest(res, buy_condition)
            
        # plot
        gf = plotting.GridFigure(rows=3, cols=1, height_ratio=1.2)
        gf.fig.suptitle("Event Return Analysis (annualized)")
        
        plotting.plot_ic_decay(ics, ax=gf.next_row())
        
        plotting.plot_quantile_return_mean_std(dic_stats, ax=gf.next_row())
        
        if buy_condition is not None:
            plotting.plot_batch_backtest(df_backtest, ax=gf.next_row())
        
        self.show_fig(gf.fig, 'single_inst.pdf')
Esempio n. 9
0
    def process_signal_before_analysis(self,
                                       signal, price=None, ret=None, benchmark_price=None,
                                       period=5, n_quantiles=5,
                                       mask=None,
                                       forward=False):
        """
        Prepare for signal analysis.

        Parameters
        ----------
        signal : pd.DataFrame
            Index is date, columns are stocks.
        price : pd.DataFrame
            Index is date, columns are stocks.
        ret : pd.DataFrame
            Index is date, columns are stocks.
        benchmark_price : pd.DataFrame or pd.Series or None
            Price of benchmark.
        mask : pd.DataFrame
            Data cells that should NOT be used.
        n_quantiles : int
        period : int
            periods to compute forward returns on.

        Returns
        -------
        res : pd.DataFrame
            Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'quantile']
            
        """
        """
        Deal with suspensions:
            If the period of calculating return is d (from T to T+d), then
            we do not use signal values of those suspended on T,
            we do not calculate return for those suspended on T+d.
        """
        # ----------------------------------------------------------------------
        # parameter validation
        if price is None and ret is None:
            raise ValueError("One of price / ret must be provided.")
        if price is not None and ret is not None:
            raise ValueError("Only one of price / ret should be provided.")
        if ret is not None and benchmark_price is not None:
            raise ValueError("You choose 'return' mode but benchmark_price is given.")
        if not (n_quantiles > 0 and isinstance(n_quantiles, int)):
            raise ValueError("n_quantiles must be a positive integer. Input is: {}".format(n_quantiles))
        
        # ensure inputs are aligned
        data = price if price is not None else ret
        assert np.all(signal.index == data.index)
        assert np.all(signal.columns == data.columns)
        if mask is not None:
            assert np.all(signal.index == mask.index)
            assert np.all(signal.columns == mask.columns)
            mask = jutil.fillinf(mask)
            mask = mask.astype(int).fillna(0).astype(bool)  # dtype of mask could be float. So we need to convert.
        else:
            mask = pd.DataFrame(index=signal.index, columns=signal.columns, data=False)
        signal = jutil.fillinf(signal)
        data = jutil.fillinf(data)

        # ----------------------------------------------------------------------
        # save data
        self.n_quantiles = n_quantiles
        self.period = period

        # ----------------------------------------------------------------------
        # Get dependent variables
        if price is not None:
            df_ret = pfm.price2ret(price, period=self.period, axis=0)
            if benchmark_price is not None:
                benchmark_price = benchmark_price.loc[signal.index]
                bench_ret = pfm.price2ret(benchmark_price, self.period, axis=0)
                self.benchmark_ret = bench_ret
                residual_ret = df_ret.sub(bench_ret.values.flatten(), axis=0)
            else:
                residual_ret = df_ret
        else:
            residual_ret = ret
        
        # Get independent varibale
        signal = signal.shift(1)  # avoid forward-looking bias

        # forward or not
        if forward:
            # point-in-time signal and forward return
            residual_ret = residual_ret.shift(-self.period)
        else:
            # past signal and point-in-time return
            signal = signal.shift(self.period)

        # ----------------------------------------------------------------------
        # get masks
        # mask_prices = data.isnull()
        # Because we use FORWARD return, if one day's price is broken, the day that is <period> days ago is also broken.
        # mask_prices = np.logical_or(mask_prices, mask_prices.shift(self.period))
        mask_price_return = residual_ret.isnull()
        mask_signal = signal.isnull()

        mask = np.logical_or(mask_signal, mask_price_return)
        # mask = np.logical_or(mask, mask_signal)

        # if price is not None:
        #     mask_forward = np.logical_or(mask, mask.shift(self.period).fillna(True))
        #     mask = np.logical_or(mask, mask_forward)

        # ----------------------------------------------------------------------
        # calculate quantile
        signal_masked = signal.copy()
        signal_masked = signal_masked[~mask]
        if n_quantiles == 1:
            df_quantile = signal_masked.copy()
            df_quantile.loc[:, :] = 1.0
        else:
            df_quantile = jutil.to_quantile(signal_masked, n_quantiles=n_quantiles)

        # ----------------------------------------------------------------------
        # stack
        def stack_td_symbol(df):
            df = pd.DataFrame(df.stack(dropna=False))  # do not dropna
            df.index.names = ['trade_date', 'symbol']
            df.sort_index(axis=0, level=['trade_date', 'symbol'], inplace=True)
            return df
        
        mask = stack_td_symbol(mask)
        df_quantile = stack_td_symbol(df_quantile)
        residual_ret = stack_td_symbol(residual_ret)

        # ----------------------------------------------------------------------
        # concat signal value
        res = stack_td_symbol(signal)
        res.columns = ['signal']
        res['return'] = residual_ret
        res['quantile'] = df_quantile
        res = res.loc[~(mask.iloc[:, 0]), :]
        
        print("Nan Data Count (should be zero) : {:d};  " \
              "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(),
                                                             len(res) * 100. / signal.size))
        res = res.astype({'signal': float, 'return': float, 'quantile': int})
        self.signal_data = res