コード例 #1
0
    def _mi_pq(self, p, q):
        """
        estimate tmi between two time series
        :param p: time series with index p
        :param q: time series with index q
        :return: p, q and the estimated value of tmi(p,q)
        """
        if self.adaptive_window:
            x = window_representation(self.series[self.names[p]],
                                      windows_size=self.window_matrix[
                                          self.names[q]].loc[self.names[p]])
            y = window_representation(self.series[self.names[q]],
                                      windows_size=self.window_matrix[
                                          self.names[p]].loc[self.names[q]])
            print("Nodes and windows:")
            print(self.names[p],
                  self.window_matrix[self.names[q]].loc[self.names[p]])
            print(self.names[q],
                  self.window_matrix[self.names[p]].loc[self.names[q]])
        else:
            x = self.data_dict[self.names[p]]
            y = self.data_dict[self.names[q]]

        mi_pval, mi_val = tmi(
            x,
            y,
            sampling_rate_tuple=(self.sampling_rate[self.names[p]],
                                 self.sampling_rate[self.names[q]]),
            gamma=self.gamma_matrix[self.names[q]].loc[self.names[p]],
            p_value=self.p_value)
        # mi_pval, mi_val = ctmi(x, y, None, self.names[p], self.names[q], self.sampling_rate,
        #                          gamma_matrix=self.gamma_matrix, p_value=self.rank_using_p_value)
        return p, q, mi_pval
コード例 #2
0
def noise_based(data_pair, lag_max, cond_ind_test="ParCorr", verbose=True):
    if cond_ind_test == "ParCorr":
        indep = TestParCorr()
    elif cond_ind_test == "CMI":
        indep = TestMI()

    X1 = window_representation(data_pair[data_pair.coluumn[0]],
                               windows_size=lag_max)
    X2 = window_representation(data_pair[data_pair.coluumn[1]],
                               windows_size=lag_max)
    Y1 = data_pair
    Y2 = data_pair

    gpr12 = GaussianProcessRegressor().fit(X1, Y2)
    errors12 = gpr12.predict(X1) - Y2
    errors12 = errors12.flatten()
    c12 = indep.fit(X1, errors12)

    gpr21 = GaussianProcessRegressor().fit(X2, Y1)
    errors21 = gpr21.predict(X2) - Y1
    errors21 = errors21.flatten()
    c21 = indep.fit(X2, errors21)

    if c12 > c21:
        return np.array([[1, 2], [1, 1]])
    elif c12 < c21:
        return np.array([[1, 1], [2, 1]])
    else:
        return np.array([[1, 2], [2, 1]])
コード例 #3
0
    def find_sep_set(self):
        """
        find the most contributing separation set (if it exists) between each pair of time series
        """
        if self.verbose:
            print("######################################")
            print("Skeletion Speperation")
            print("######################################")

        print("max set size = " + str(self.graph.d - 1))
        for set_size in range(1, self.graph.d - 1):
            ranks = self.rank_cmi_sep_set_parallel(set_size)
            if self.verbose:
                print("Ranking:")
                print("p: " + str(ranks.elem_p))
                print("p: " + str(ranks.elem_q))
                print("p: " + str(ranks.elem_r))
                print("p: " + str(ranks.val))
            for p, q, r_set, cmi in zip(ranks.elem_p, ranks.elem_q,
                                        ranks.elem_r, ranks.val):
                test = (self.graph.edges[p, q] != 0)
                for r in r_set:
                    if not test:
                        break
                    test = test and ((self.graph.edges[q, r] != 0) or
                                     (self.graph.edges[p, r] != 0))
                    # test = test and ((self.graph.sep[p, r, q] == 0) and (self.graph.sep[q, r, p] == 0))
                if test:
                    mi = self.mi_array[p, q]

                    if self.p_value != self.rank_using_p_value:
                        if self.adaptive_window:
                            x = window_representation(
                                self.series[self.names[p]],
                                windows_size=self.window_matrix[
                                    self.names[q]].loc[self.names[p]])
                            y = window_representation(
                                self.series[self.names[q]],
                                windows_size=self.window_matrix[
                                    self.names[p]].loc[self.names[q]])
                        else:
                            x = self.data_dict[self.names[p]]
                            y = self.data_dict[self.names[q]]

                        z = dict()
                        for r in r_set:
                            if self.adaptive_window:
                                # select and drop NA
                                z[self.names[r]] = self.series[
                                    self.names[r]].dropna()
                            else:
                                z[self.names[r]] = self.data_dict[
                                    self.names[r]]
                        if self.graphical_optimization:
                            # cmi, _ = gctmi(x, y, z, self.names[p], self.names[q], self.sampling_rate,
                            #                gamma_matrix=self.gamma_matrix, p_value=self.p_value, graph=self.graph.edges)
                            cmi_pval, cmi_val = ctmi(
                                x,
                                y,
                                z,
                                self.names[p],
                                self.names[q],
                                self.sampling_rate,
                                gamma_matrix=self.gamma_matrix,
                                graph=self.graph.edges,
                                p_value=self.p_value,
                                instantaneous_dict=self.instantaneous_dict)
                        else:
                            cmi, _ = ctmi(
                                x,
                                y,
                                z,
                                self.names[p],
                                self.names[q],
                                self.sampling_rate,
                                gamma_matrix=self.gamma_matrix,
                                p_value=self.p_value,
                                instantaneous_dict=self.instantaneous_dict)
                    if self.verbose:
                        print("p=" + str(p) + "; q=" + str(q) + "; r=" +
                              str(r_set) + "; I(p,q|r)=" +
                              "{: 0.5f}".format(cmi) + "; I(p,q)=" +
                              "{: 0.5f}".format(mi),
                              end=" ")

                    if self.p_value:
                        test = mi < self.sig_lev < cmi
                    else:
                        test = cmi < self.alpha
                    if test:
                        self.cmi_array[p, q] = cmi
                        self.cmi_array[q, p] = cmi
                        if self.verbose:
                            print("=> remove link between " + str(p) +
                                  " and " + str(q))
                        self.graph.edges[p, q] = 0
                        self.graph.edges[q, p] = 0

                        for r in r_set:
                            self.graph.add_sep(q, p, r)
                            self.biggamma[p, q, r] = self.gamma_matrix[
                                self.names[p]].loc[self.names[r]]
                            self.biggamma[q, p, r] = self.gamma_matrix[
                                self.names[q]].loc[self.names[r]]
                    else:
                        if self.verbose:
                            print()
コード例 #4
0
    def _cmi_sep_set_pq(self, p, q, set_size):
        """
        estimate ctmi between two time series conditioned on each set of neighbors with cardinality equal to set_size
        :param p: time series with index p
        :param q: time series with index q
        :param set_size: cardinality of the set of neighbors
        :return: p, q, list if estimated value of ctmi(p,q,r_set), and list of all r_sets
        """
        v_list = []
        # r_list = [r for r in range(self.graph.d) if (r != p) and (r != q) and ((
        #         (self.graph.edges[p, r] != 0) and (self.gamma_matrix[self.names[p]].loc[self.names[r]] >= 0)) or (
        #         (self.graph.edges[q, r] != 0) and (self.gamma_matrix[self.names[q]].loc[self.names[r]] >= 0)))]
        r_list = [
            r for r in range(self.graph.d)
            if (r != p) and (r != q) and (self.graph.edges[r, q] == 2)
        ]

        r_list = [list(r) for r in itertools.combinations(r_list, set_size)]

        r_list_temp = r_list.copy()
        # if set_size == 1:
        for rs in r_list_temp:
            print(rs)
            print(all(elem >= self.d for elem in rs))
            if all(elem >= self.d for elem in rs):
                r_list.remove(rs)
        del r_list_temp

        if self.adaptive_window:
            x = window_representation(
                self.series[self.names[p]],
                windows_size=int(
                    self.window_matrix[self.names[q]].loc[self.names[p]]))
            y = window_representation(
                self.series[self.names[q]],
                windows_size=int(
                    self.window_matrix[self.names[p]].loc[self.names[q]]))
        else:
            x = self.data_dict[self.names[p]]
            y = self.data_dict[self.names[q]]

        for rs in r_list:
            z = dict()
            for r in rs:
                if self.adaptive_window:
                    # select and drop NA
                    z[self.names[r]] = self.series[self.names[r]].dropna()
                else:
                    z[self.names[r]] = self.data_dict[self.names[r]]
            if self.graphical_optimization:
                # cmi_pval, cmi_val = gctmi(x, y, z, self.names[p], self.names[q], self.sampling_rate,
                #                           gamma_matrix=self.gamma_matrix, p_value=self.rank_using_p_value,
                #                           graph=self.graph.edges)
                cmi_pval, cmi_val = ctmi(
                    x,
                    y,
                    z,
                    self.names[p],
                    self.names[q],
                    self.sampling_rate,
                    gamma_matrix=self.gamma_matrix,
                    graph=self.graph.edges,
                    p_value=self.rank_using_p_value,
                    instantaneous_dict=self.instantaneous_dict)
            else:
                cmi_pval, cmi_val = ctmi(
                    x,
                    y,
                    z,
                    self.names[p],
                    self.names[q],
                    self.sampling_rate,
                    gamma_matrix=self.gamma_matrix,
                    p_value=self.rank_using_p_value,
                    instantaneous_dict=self.instantaneous_dict)

            if self.rank_using_p_value:
                v_list.append(cmi_pval)
            else:
                v_list.append(cmi_val)
        if v_list:
            return p, q, v_list, r_list
コード例 #5
0
    def __init__(self,
                 series,
                 sig_lev=0.05,
                 lag_max=5,
                 p_value=True,
                 rank_using_p_value=False,
                 verbose=True,
                 num_processor=-1,
                 graphical_optimization=False,
                 pairwise=True):
        """
        Causal inference (Wrapper) using TMI and CTMI (contain functions for skeleton construction)
        :param series: d-time series (with possibility of different sampling rate)
        :param sig_lev: significance level. By default 0.05
        :param p_value: Use p_value for decision making. By default True
        :param verbose: Print results. By default: True
        :param num_processor: number of processors for parallelization. By default -1 (all)
        """
        self.graph = Graph(series.shape[1])

        training_epoch = 1000
        noise = True  # d*(order-1)*2
        learning_rate = 0.01
        if pairwise:
            for i in range(series.shape[1]):
                for j in range(i + 1, series.shape[1]):
                    data_pair = series[[series.columns[i], series.columns[j]]]
                    # res_order_pair = tskiko_mv(data_pair, lag_max, learning_rate, training_epoch, noise, sig_lev, "ParCorr", verbose)
                    res_order_pair = run_timino_pw_R([[data_pair, "data"],
                                                      [0.00, "alpha"],
                                                      [5, "nlags"]])
                    res_order_pair = pd.DataFrame(res_order_pair,
                                                  columns=data_pair.columns,
                                                  index=data_pair.columns)
                    if res_order_pair[series.columns[j]].loc[
                            series.columns[i]] == 2:
                        self.graph.edges[i, j] = 2
                    if res_order_pair[series.columns[i]].loc[
                            series.columns[j]] == 2:
                        self.graph.edges[j, i] = 2
        else:
            self.graph.edges = run_timino_pw_R([[series, "data"],
                                                [0.00, "alpha"], [5, "nlags"]])
            for i in range(series.shape[1]):
                self.graph.edges[i, i] = 1

        # order_kiko = tskiko_mv(series, lag_max, learning_rate, training_epoch, noise, sig_lev, "ParCorr", verbose)
        # print(order_kiko)
        # self.graph.edges = order_kiko.values

        if verbose:
            print("Order")
            print(self.graph.edges)

        self.adaptive_window = True
        self.series = series
        self.n = series.shape[0]
        self.d = series.shape[1]
        self.names = self.series.columns
        self.num_processor = num_processor
        self.p_value = p_value
        self.graphical_optimization = graphical_optimization
        if self.p_value == rank_using_p_value:
            self.rank_using_p_value = rank_using_p_value
        elif not rank_using_p_value:
            self.rank_using_p_value = rank_using_p_value
        else:
            print(
                "Warning: rank_using_p_value can be True iff p_value is True. Using rank_using_p_value=False"
            )
            self.rank_using_p_value = False
        self.verbose = verbose

        self.data_dict = dict()
        self.instantaneous_dict = dict()

        self.lags = []
        self.sampling_rate = dict()
        for col in range(series.shape[1]):
            _, s_r = get_sampling_rate(self.series[self.names[col]])
            self.sampling_rate[self.names[col]] = s_r

        self.sig_lev = sig_lev
        self.alpha = get_alpha(series)

        for col in range(series.shape[1]):
            # self.lags.append(window_size(series[series.columns[col]], alpha=self.alpha, lag_max=lag_max))
            if not self.adaptive_window:
                self.lags.append(1)
                self.data_dict[self.names[col]] = window_representation(
                    self.series[self.names[col]], windows_size=self.lags[col])
            self.instantaneous_dict[self.names[col]] = True

        if self.adaptive_window:
            self.gamma_matrix, self.window_matrix = gamma_matrix_window_matrix(
                self.series, series.columns, self.sampling_rate,
                self.graph.edges)
        else:
            self.gamma_matrix = align_matrix(self.data_dict, series.columns,
                                             self.sampling_rate)

        self.cap_gamma_df = pd.DataFrame(columns=["p", "q", "r", "Grp", "Grq"])

        self.mi_array = np.ones([self.graph.d, self.graph.d])
        self.cmi_array = np.ones([self.graph.d, self.graph.d])

        self.biggamma = np.zeros([self.d, self.d, self.d])

        if self.verbose:
            print("n: " + str(self.n))
            print("d: " + str(self.d))
            print("names: " + str(self.names))
            print("sampling_rate: " + str(self.sampling_rate))
            print("significance level:" + str(self.sig_lev))
            print("alpha:" + str(self.alpha))
            print("window size:" + str(self.lags))
            print("gamma matrix:" + str(self.gamma_matrix))
            if self.adaptive_window:
                print("window matrix" + str(self.window_matrix))
            print("instantaneous dict :" + str(self.instantaneous_dict))