def generate_pairs(self, clustering_results: Dict[int, Tuple[Tuple[Tickers]]], hurst_exp_threshold: float, current_window: Window): # run cointegration_analysis on all poss combinations of pairs within the same cluster current_cointegrated_pairs = [] n_cointegrated = 0 tickers_per_cluster = [i for i in clustering_results.values()] for cluster in tickers_per_cluster: for pair in itertools.combinations(list(cluster), 2): t1 = current_window.get_data(tickers=[pair[0]], features=[Features.CLOSE]) t2 = current_window.get_data(tickers=[pair[1]], features=[Features.CLOSE]) try: # sometimes there are no price data, in which case, skip residuals, beta, reg_output = self.__logged_lin_reg(t1, t2) except ValueError: continue adf_test_statistic, adf_critical_values = self.__adf( residuals.flatten()) hl_test = self.__hl(residuals) he_test = self.__hurst_exponent_test(residuals, current_window) ou_mean, ou_std, ou_diffusion_v, \ recent_dev, recent_dev_scaled = self.__ou_params(residuals) ols_stdzed_residuals = (residuals - ou_mean) / ou_std is_cointegrated = self.__acceptance_rule( adf_test_statistic, adf_critical_values, self.adf_confidence_level, hl_test, self.max_mean_rev_time, he_test, hurst_exp_threshold, ols_stdzed_residuals, at_least=int(current_window.window_length.days / 6)) if is_cointegrated: #a = pd.concat([t1, t2], axis=1).iplot(asFigure=True) #b = pd.concat([np.log(t1), np.log(t2)], axis=1).iplot(asFigure=True) #a.show() #b.show() n_cointegrated += 1 t1_most_recent = float(t1.iloc[-1, :]) t2_most_recent = float(t2.iloc[-1, :]) hedge_ratio = beta * t1_most_recent / t2_most_recent scaled_beta = hedge_ratio / (hedge_ratio - 1) recent_dev_scaled_hist = [recent_dev_scaled] cointegration_rank = self.__score_coint( adf_test_statistic, self.adf_confidence_level, adf_critical_values, he_test, hurst_exp_threshold, 10) #a = pd.DataFrame(ols_stdzed_residuals).iplot(asFigure=True) #a.show() position = Position(pair[0], pair[1]) current_cointegrated_pairs.append( CointegratedPair(pair, reg_output, scaled_beta, hl_test, ou_mean, ou_std, ou_diffusion_v, recent_dev, recent_dev_scaled, recent_dev_scaled_hist, cointegration_rank, ols_stdzed_residuals, position)) if n_cointegrated == self.target_number_of_coint_pairs: current_cointegrated_pairs = sorted( current_cointegrated_pairs, key=lambda coint_pair: coint_pair. cointegration_rank, reverse=True) self.previous_cointegrated_pairs = current_cointegrated_pairs return current_cointegrated_pairs self.previous_cointegrated_pairs = current_cointegrated_pairs return current_cointegrated_pairs
def generate_pairs(self, clustering_results: Dict[int, Tuple[Tuple[Tickers]]], hurst_exp_threshold: float, current_window: Window): # run cointegration_analysis on all poss combinations of pairs current_cointegrated_pairs = [] n_cointegrated = 0 list_of_lists = [i for i in clustering_results.values()] flattened = [pair for x in list_of_lists for pair in x] sorted_cluster_results = sorted(flattened, key=lambda x: x[0].value) for pair in sorted_cluster_results: t1 = current_window.get_data(universe=Universes.SNP, tickers=[pair[0]], features=[Features.CLOSE]) t2 = current_window.get_data(universe=Universes.SNP, tickers=[pair[1]], features=[Features.CLOSE]) try: # sometimes there are no price data residuals, beta, reg_output = self.__logged_lin_reg(t1, t2) except: continue # for some reason residuals is a (60,1) array not (60,) array when i run the code so have changed input to residuals.flatten adf_test_statistic, adf_critical_values = self.__adf( residuals.flatten()) hl_test = self.__hl(residuals) he_test = self.__hurst_exponent_test(residuals, current_window) is_cointegrated = self.__acceptance_rule( adf_test_statistic, adf_critical_values, self.adf_confidence_level, hl_test, self.max_mean_rev_time, he_test, hurst_exp_threshold) target_coint = 300 if is_cointegrated: n_cointegrated += 1 print(n_cointegrated, " / ", target_coint, "cointegrated") r_x = self.__log_returner(t1) mu_x_ann = float(250 * np.mean(r_x)) sigma_x_ann = float(250**0.5 * np.std(r_x)) ou_mean, ou_std, ou_diffusion_v, recent_dev, recent_dev_scaled = self.__ou_params( residuals) scaled_beta = beta / (beta - 1) recent_dev_scaled_hist = [recent_dev_scaled] cointegration_rank = self.__score_coint( adf_test_statistic, self.adf_confidence_level, adf_critical_values) current_cointegrated_pairs.append( CointegratedPair(pair, mu_x_ann, sigma_x_ann, reg_output, scaled_beta, hl_test, ou_mean, ou_std, ou_diffusion_v, recent_dev, recent_dev_scaled, recent_dev_scaled_hist, cointegration_rank)) if n_cointegrated == target_coint: # logic to be fixed and made more efficient by: 1) having proper # clustering algorithm; 2) not running clustering and cointegration # everyday 3) taking best 10 pairs according to some score current_cointegrated_pairs = sorted( current_cointegrated_pairs, key=lambda coint_pair: coint_pair.cointegration_rank, reverse=True) self.previous_cointegrated_pairs = current_cointegrated_pairs return current_cointegrated_pairs self.previous_cointegrated_pairs = current_cointegrated_pairs return current_cointegrated_pairs