Esempio n. 1
0
    def generate_pairs(self, clustering_results: Dict[int,
                                                      Tuple[Tuple[Tickers]]],
                       hurst_exp_threshold: float, current_window: Window):
        # run cointegration_analysis on all poss combinations of pairs within the same cluster

        current_cointegrated_pairs = []
        n_cointegrated = 0
        tickers_per_cluster = [i for i in clustering_results.values()]

        for cluster in tickers_per_cluster:
            for pair in itertools.combinations(list(cluster), 2):
                t1 = current_window.get_data(tickers=[pair[0]],
                                             features=[Features.CLOSE])
                t2 = current_window.get_data(tickers=[pair[1]],
                                             features=[Features.CLOSE])
                try:
                    # sometimes there are no price data, in which case, skip
                    residuals, beta, reg_output = self.__logged_lin_reg(t1, t2)
                except ValueError:
                    continue
                adf_test_statistic, adf_critical_values = self.__adf(
                    residuals.flatten())
                hl_test = self.__hl(residuals)
                he_test = self.__hurst_exponent_test(residuals, current_window)
                ou_mean, ou_std, ou_diffusion_v, \
                recent_dev, recent_dev_scaled = self.__ou_params(residuals)
                ols_stdzed_residuals = (residuals - ou_mean) / ou_std
                is_cointegrated = self.__acceptance_rule(
                    adf_test_statistic,
                    adf_critical_values,
                    self.adf_confidence_level,
                    hl_test,
                    self.max_mean_rev_time,
                    he_test,
                    hurst_exp_threshold,
                    ols_stdzed_residuals,
                    at_least=int(current_window.window_length.days / 6))
                if is_cointegrated:
                    #a = pd.concat([t1, t2], axis=1).iplot(asFigure=True)
                    #b = pd.concat([np.log(t1), np.log(t2)], axis=1).iplot(asFigure=True)
                    #a.show()
                    #b.show()
                    n_cointegrated += 1
                    t1_most_recent = float(t1.iloc[-1, :])
                    t2_most_recent = float(t2.iloc[-1, :])
                    hedge_ratio = beta * t1_most_recent / t2_most_recent
                    scaled_beta = hedge_ratio / (hedge_ratio - 1)
                    recent_dev_scaled_hist = [recent_dev_scaled]
                    cointegration_rank = self.__score_coint(
                        adf_test_statistic, self.adf_confidence_level,
                        adf_critical_values, he_test, hurst_exp_threshold, 10)
                    #a = pd.DataFrame(ols_stdzed_residuals).iplot(asFigure=True)
                    #a.show()
                    position = Position(pair[0], pair[1])
                    current_cointegrated_pairs.append(
                        CointegratedPair(pair, reg_output, scaled_beta,
                                         hl_test, ou_mean, ou_std,
                                         ou_diffusion_v, recent_dev,
                                         recent_dev_scaled,
                                         recent_dev_scaled_hist,
                                         cointegration_rank,
                                         ols_stdzed_residuals, position))

                    if n_cointegrated == self.target_number_of_coint_pairs:
                        current_cointegrated_pairs = sorted(
                            current_cointegrated_pairs,
                            key=lambda coint_pair: coint_pair.
                            cointegration_rank,
                            reverse=True)
                        self.previous_cointegrated_pairs = current_cointegrated_pairs
                        return current_cointegrated_pairs

        self.previous_cointegrated_pairs = current_cointegrated_pairs
        return current_cointegrated_pairs
Esempio n. 2
0
    def generate_pairs(self, clustering_results: Dict[int,
                                                      Tuple[Tuple[Tickers]]],
                       hurst_exp_threshold: float, current_window: Window):
        # run cointegration_analysis on all poss combinations of pairs

        current_cointegrated_pairs = []
        n_cointegrated = 0
        list_of_lists = [i for i in clustering_results.values()]
        flattened = [pair for x in list_of_lists for pair in x]

        sorted_cluster_results = sorted(flattened, key=lambda x: x[0].value)

        for pair in sorted_cluster_results:
            t1 = current_window.get_data(universe=Universes.SNP,
                                         tickers=[pair[0]],
                                         features=[Features.CLOSE])
            t2 = current_window.get_data(universe=Universes.SNP,
                                         tickers=[pair[1]],
                                         features=[Features.CLOSE])

            try:
                # sometimes there are no price data
                residuals, beta, reg_output = self.__logged_lin_reg(t1, t2)
            except:
                continue
            # for some reason residuals is a (60,1) array not (60,) array when i run the code so have changed input to residuals.flatten
            adf_test_statistic, adf_critical_values = self.__adf(
                residuals.flatten())
            hl_test = self.__hl(residuals)
            he_test = self.__hurst_exponent_test(residuals, current_window)

            is_cointegrated = self.__acceptance_rule(
                adf_test_statistic, adf_critical_values,
                self.adf_confidence_level, hl_test, self.max_mean_rev_time,
                he_test, hurst_exp_threshold)

            target_coint = 300
            if is_cointegrated:
                n_cointegrated += 1
                print(n_cointegrated, " / ", target_coint, "cointegrated")
                r_x = self.__log_returner(t1)
                mu_x_ann = float(250 * np.mean(r_x))
                sigma_x_ann = float(250**0.5 * np.std(r_x))
                ou_mean, ou_std, ou_diffusion_v, recent_dev, recent_dev_scaled = self.__ou_params(
                    residuals)
                scaled_beta = beta / (beta - 1)
                recent_dev_scaled_hist = [recent_dev_scaled]
                cointegration_rank = self.__score_coint(
                    adf_test_statistic, self.adf_confidence_level,
                    adf_critical_values)
                current_cointegrated_pairs.append(
                    CointegratedPair(pair, mu_x_ann, sigma_x_ann, reg_output,
                                     scaled_beta, hl_test, ou_mean, ou_std,
                                     ou_diffusion_v, recent_dev,
                                     recent_dev_scaled, recent_dev_scaled_hist,
                                     cointegration_rank))

                if n_cointegrated == target_coint:
                    # logic to be fixed and made more efficient by: 1) having proper
                    # clustering algorithm; 2) not running clustering and cointegration
                    # everyday 3) taking best 10 pairs according to some score

                    current_cointegrated_pairs = sorted(
                        current_cointegrated_pairs,
                        key=lambda coint_pair: coint_pair.cointegration_rank,
                        reverse=True)
                    self.previous_cointegrated_pairs = current_cointegrated_pairs
                    return current_cointegrated_pairs

        self.previous_cointegrated_pairs = current_cointegrated_pairs

        return current_cointegrated_pairs