def _mi_pq(self, p, q): """ estimate tmi between two time series :param p: time series with index p :param q: time series with index q :return: p, q and the estimated value of tmi(p,q) """ if self.adaptive_window: x = window_representation(self.series[self.names[p]], windows_size=self.window_matrix[ self.names[q]].loc[self.names[p]]) y = window_representation(self.series[self.names[q]], windows_size=self.window_matrix[ self.names[p]].loc[self.names[q]]) print("Nodes and windows:") print(self.names[p], self.window_matrix[self.names[q]].loc[self.names[p]]) print(self.names[q], self.window_matrix[self.names[p]].loc[self.names[q]]) else: x = self.data_dict[self.names[p]] y = self.data_dict[self.names[q]] mi_pval, mi_val = tmi( x, y, sampling_rate_tuple=(self.sampling_rate[self.names[p]], self.sampling_rate[self.names[q]]), gamma=self.gamma_matrix[self.names[q]].loc[self.names[p]], p_value=self.p_value) # mi_pval, mi_val = ctmi(x, y, None, self.names[p], self.names[q], self.sampling_rate, # gamma_matrix=self.gamma_matrix, p_value=self.rank_using_p_value) return p, q, mi_pval
def noise_based(data_pair, lag_max, cond_ind_test="ParCorr", verbose=True): if cond_ind_test == "ParCorr": indep = TestParCorr() elif cond_ind_test == "CMI": indep = TestMI() X1 = window_representation(data_pair[data_pair.coluumn[0]], windows_size=lag_max) X2 = window_representation(data_pair[data_pair.coluumn[1]], windows_size=lag_max) Y1 = data_pair Y2 = data_pair gpr12 = GaussianProcessRegressor().fit(X1, Y2) errors12 = gpr12.predict(X1) - Y2 errors12 = errors12.flatten() c12 = indep.fit(X1, errors12) gpr21 = GaussianProcessRegressor().fit(X2, Y1) errors21 = gpr21.predict(X2) - Y1 errors21 = errors21.flatten() c21 = indep.fit(X2, errors21) if c12 > c21: return np.array([[1, 2], [1, 1]]) elif c12 < c21: return np.array([[1, 1], [2, 1]]) else: return np.array([[1, 2], [2, 1]])
def find_sep_set(self): """ find the most contributing separation set (if it exists) between each pair of time series """ if self.verbose: print("######################################") print("Skeletion Speperation") print("######################################") print("max set size = " + str(self.graph.d - 1)) for set_size in range(1, self.graph.d - 1): ranks = self.rank_cmi_sep_set_parallel(set_size) if self.verbose: print("Ranking:") print("p: " + str(ranks.elem_p)) print("p: " + str(ranks.elem_q)) print("p: " + str(ranks.elem_r)) print("p: " + str(ranks.val)) for p, q, r_set, cmi in zip(ranks.elem_p, ranks.elem_q, ranks.elem_r, ranks.val): test = (self.graph.edges[p, q] != 0) for r in r_set: if not test: break test = test and ((self.graph.edges[q, r] != 0) or (self.graph.edges[p, r] != 0)) # test = test and ((self.graph.sep[p, r, q] == 0) and (self.graph.sep[q, r, p] == 0)) if test: mi = self.mi_array[p, q] if self.p_value != self.rank_using_p_value: if self.adaptive_window: x = window_representation( self.series[self.names[p]], windows_size=self.window_matrix[ self.names[q]].loc[self.names[p]]) y = window_representation( self.series[self.names[q]], windows_size=self.window_matrix[ self.names[p]].loc[self.names[q]]) else: x = self.data_dict[self.names[p]] y = self.data_dict[self.names[q]] z = dict() for r in r_set: if self.adaptive_window: # select and drop NA z[self.names[r]] = self.series[ self.names[r]].dropna() else: z[self.names[r]] = self.data_dict[ self.names[r]] if self.graphical_optimization: # cmi, _ = gctmi(x, y, z, self.names[p], self.names[q], self.sampling_rate, # gamma_matrix=self.gamma_matrix, p_value=self.p_value, graph=self.graph.edges) cmi_pval, cmi_val = ctmi( x, y, z, self.names[p], self.names[q], self.sampling_rate, gamma_matrix=self.gamma_matrix, graph=self.graph.edges, p_value=self.p_value, instantaneous_dict=self.instantaneous_dict) else: cmi, _ = ctmi( x, y, z, self.names[p], self.names[q], self.sampling_rate, gamma_matrix=self.gamma_matrix, p_value=self.p_value, instantaneous_dict=self.instantaneous_dict) if self.verbose: print("p=" + str(p) + "; q=" + str(q) + "; r=" + str(r_set) + "; I(p,q|r)=" + "{: 0.5f}".format(cmi) + "; I(p,q)=" + "{: 0.5f}".format(mi), end=" ") if self.p_value: test = mi < self.sig_lev < cmi else: test = cmi < self.alpha if test: self.cmi_array[p, q] = cmi self.cmi_array[q, p] = cmi if self.verbose: print("=> remove link between " + str(p) + " and " + str(q)) self.graph.edges[p, q] = 0 self.graph.edges[q, p] = 0 for r in r_set: self.graph.add_sep(q, p, r) self.biggamma[p, q, r] = self.gamma_matrix[ self.names[p]].loc[self.names[r]] self.biggamma[q, p, r] = self.gamma_matrix[ self.names[q]].loc[self.names[r]] else: if self.verbose: print()
def _cmi_sep_set_pq(self, p, q, set_size): """ estimate ctmi between two time series conditioned on each set of neighbors with cardinality equal to set_size :param p: time series with index p :param q: time series with index q :param set_size: cardinality of the set of neighbors :return: p, q, list if estimated value of ctmi(p,q,r_set), and list of all r_sets """ v_list = [] # r_list = [r for r in range(self.graph.d) if (r != p) and (r != q) and (( # (self.graph.edges[p, r] != 0) and (self.gamma_matrix[self.names[p]].loc[self.names[r]] >= 0)) or ( # (self.graph.edges[q, r] != 0) and (self.gamma_matrix[self.names[q]].loc[self.names[r]] >= 0)))] r_list = [ r for r in range(self.graph.d) if (r != p) and (r != q) and (self.graph.edges[r, q] == 2) ] r_list = [list(r) for r in itertools.combinations(r_list, set_size)] r_list_temp = r_list.copy() # if set_size == 1: for rs in r_list_temp: print(rs) print(all(elem >= self.d for elem in rs)) if all(elem >= self.d for elem in rs): r_list.remove(rs) del r_list_temp if self.adaptive_window: x = window_representation( self.series[self.names[p]], windows_size=int( self.window_matrix[self.names[q]].loc[self.names[p]])) y = window_representation( self.series[self.names[q]], windows_size=int( self.window_matrix[self.names[p]].loc[self.names[q]])) else: x = self.data_dict[self.names[p]] y = self.data_dict[self.names[q]] for rs in r_list: z = dict() for r in rs: if self.adaptive_window: # select and drop NA z[self.names[r]] = self.series[self.names[r]].dropna() else: z[self.names[r]] = self.data_dict[self.names[r]] if self.graphical_optimization: # cmi_pval, cmi_val = gctmi(x, y, z, self.names[p], self.names[q], self.sampling_rate, # gamma_matrix=self.gamma_matrix, p_value=self.rank_using_p_value, # graph=self.graph.edges) cmi_pval, cmi_val = ctmi( x, y, z, self.names[p], self.names[q], self.sampling_rate, gamma_matrix=self.gamma_matrix, graph=self.graph.edges, p_value=self.rank_using_p_value, instantaneous_dict=self.instantaneous_dict) else: cmi_pval, cmi_val = ctmi( x, y, z, self.names[p], self.names[q], self.sampling_rate, gamma_matrix=self.gamma_matrix, p_value=self.rank_using_p_value, instantaneous_dict=self.instantaneous_dict) if self.rank_using_p_value: v_list.append(cmi_pval) else: v_list.append(cmi_val) if v_list: return p, q, v_list, r_list
def __init__(self, series, sig_lev=0.05, lag_max=5, p_value=True, rank_using_p_value=False, verbose=True, num_processor=-1, graphical_optimization=False, pairwise=True): """ Causal inference (Wrapper) using TMI and CTMI (contain functions for skeleton construction) :param series: d-time series (with possibility of different sampling rate) :param sig_lev: significance level. By default 0.05 :param p_value: Use p_value for decision making. By default True :param verbose: Print results. By default: True :param num_processor: number of processors for parallelization. By default -1 (all) """ self.graph = Graph(series.shape[1]) training_epoch = 1000 noise = True # d*(order-1)*2 learning_rate = 0.01 if pairwise: for i in range(series.shape[1]): for j in range(i + 1, series.shape[1]): data_pair = series[[series.columns[i], series.columns[j]]] # res_order_pair = tskiko_mv(data_pair, lag_max, learning_rate, training_epoch, noise, sig_lev, "ParCorr", verbose) res_order_pair = run_timino_pw_R([[data_pair, "data"], [0.00, "alpha"], [5, "nlags"]]) res_order_pair = pd.DataFrame(res_order_pair, columns=data_pair.columns, index=data_pair.columns) if res_order_pair[series.columns[j]].loc[ series.columns[i]] == 2: self.graph.edges[i, j] = 2 if res_order_pair[series.columns[i]].loc[ series.columns[j]] == 2: self.graph.edges[j, i] = 2 else: self.graph.edges = run_timino_pw_R([[series, "data"], [0.00, "alpha"], [5, "nlags"]]) for i in range(series.shape[1]): self.graph.edges[i, i] = 1 # order_kiko = tskiko_mv(series, lag_max, learning_rate, training_epoch, noise, sig_lev, "ParCorr", verbose) # print(order_kiko) # self.graph.edges = order_kiko.values if verbose: print("Order") print(self.graph.edges) self.adaptive_window = True self.series = series self.n = series.shape[0] self.d = series.shape[1] self.names = self.series.columns self.num_processor = num_processor self.p_value = p_value self.graphical_optimization = graphical_optimization if self.p_value == rank_using_p_value: self.rank_using_p_value = rank_using_p_value elif not rank_using_p_value: self.rank_using_p_value = rank_using_p_value else: print( "Warning: rank_using_p_value can be True iff p_value is True. Using rank_using_p_value=False" ) self.rank_using_p_value = False self.verbose = verbose self.data_dict = dict() self.instantaneous_dict = dict() self.lags = [] self.sampling_rate = dict() for col in range(series.shape[1]): _, s_r = get_sampling_rate(self.series[self.names[col]]) self.sampling_rate[self.names[col]] = s_r self.sig_lev = sig_lev self.alpha = get_alpha(series) for col in range(series.shape[1]): # self.lags.append(window_size(series[series.columns[col]], alpha=self.alpha, lag_max=lag_max)) if not self.adaptive_window: self.lags.append(1) self.data_dict[self.names[col]] = window_representation( self.series[self.names[col]], windows_size=self.lags[col]) self.instantaneous_dict[self.names[col]] = True if self.adaptive_window: self.gamma_matrix, self.window_matrix = gamma_matrix_window_matrix( self.series, series.columns, self.sampling_rate, self.graph.edges) else: self.gamma_matrix = align_matrix(self.data_dict, series.columns, self.sampling_rate) self.cap_gamma_df = pd.DataFrame(columns=["p", "q", "r", "Grp", "Grq"]) self.mi_array = np.ones([self.graph.d, self.graph.d]) self.cmi_array = np.ones([self.graph.d, self.graph.d]) self.biggamma = np.zeros([self.d, self.d, self.d]) if self.verbose: print("n: " + str(self.n)) print("d: " + str(self.d)) print("names: " + str(self.names)) print("sampling_rate: " + str(self.sampling_rate)) print("significance level:" + str(self.sig_lev)) print("alpha:" + str(self.alpha)) print("window size:" + str(self.lags)) print("gamma matrix:" + str(self.gamma_matrix)) if self.adaptive_window: print("window matrix" + str(self.window_matrix)) print("instantaneous dict :" + str(self.instantaneous_dict))