예제 #1
0
def indrect_graph_matching(costs: Dict,
                           probs: Dict,
                           p_t: np.ndarray,
                           idx2nodes: Dict,
                           ot_hyperpara: Dict,
                           weights: Dict = None) -> Tuple[List, List, List]:
    """
    Matching two or more graphs indirectly via calculate their Gromov-Wasserstein barycenter.
    costs: a dictionary of graphs {key: graph idx,
                                       value: (n_s, n_s) adjacency matrix of source graph}
        probs: a dictionary of graphs {key: graph idx,
                                       value: (n_s, 1) the distribution of source nodes}
        p_t: (n_t, 1) the distribution of target nodes
        idx2nodes: a dictionary of graphs {key: graph idx,
                                           value: a dictionary {key: idx of row in cost,
                                                                value: name of node}}
        ot_hyperpara: a dictionary of hyperparameters
        weights: a dictionary of graph {key: graph idx,
                                       value: the weight of the graph}

    Returns:
        set_idx: a list of node index paired set
        set_name: a list of node name paired set
        set_confidence: a list of confidence set of node pairs
    """
    cost_t, trans, _ = Gwl.gromov_wasserstein_barycenter(
        costs, probs, p_t, ot_hyperpara, weights)
    set_idx, set_name, set_confidence = node_set_assignment(
        trans, probs, idx2nodes)
    return set_idx, set_name, set_confidence
예제 #2
0
def direct_graph_matching(
        cost_s: csr_matrix, cost_t: csr_matrix, p_s: np.ndarray,
        p_t: np.ndarray, idx2node_s: Dict, idx2node_t: Dict,
        ot_hyperpara: Dict) -> Tuple[List, List, List, np.ndarray]:
    """
    Matching two graphs directly via calculate their Gromov-Wasserstein discrepancy.
    Args:
        cost_s: a (n_s, n_s) adjacency matrix of source graph
        cost_t: a (n_t, n_t) adjacency matrix of target graph
        p_s: a (n_s, 1) vector representing the distribution of source nodes
        p_t: a (n_t, 1) vector representing the distribution of target nodes
        idx2node_s: a dictionary {key: idx of cost_s's row, value: the name of source node}
        idx2node_t: a dictionary {key: idx of cost_s's row, value: the name of source node}
        ot_hyperpara: a dictionary of hyperparameters

    Returns:
        pairs_idx: a list of node index pairs
        pairs_name: a list of node name pairs
        pairs_confidence: a list of confidence of node pairs
    """
    trans, d_gw, p_s = Gwl.gromov_wasserstein_discrepancy(
        cost_s, cost_t, p_s, p_t, ot_hyperpara)
    pairs_idx, pairs_name, pairs_confidence = node_pair_assignment(
        trans, p_s, p_t, idx2node_s, idx2node_t)
    return pairs_idx, pairs_name, pairs_confidence, trans
예제 #3
0
def graph_partition(
        cost_s: csr_matrix,
        p_s: np.ndarray,
        p_t: np.ndarray,
        idx2node: Dict,
        ot_hyperpara: Dict,
        trans0: np.ndarray = None) -> Tuple[Dict, Dict, Dict, np.ndarray]:
    """
    Achieve a single graph partition via calculating Gromov-Wasserstein discrepancy
    between the target graph and proposed one

    Args:
        cost_s: (n_s, n_s) adjacency matrix of source graph
        p_s: (n_s, 1) the distribution of source nodes
        p_t: (n_t, 1) the distribution of target nodes
        idx2node: a dictionary {key = idx of row in cost, value = name of node}
        ot_hyperpara: a dictionary of hyperparameters

    Returns:
        sub_costs: a dictionary {key: cluster idx,
                                 value: sub cost matrices}
        sub_probs: a dictionary {key: cluster idx,
                                 value: sub distribution of nodes}
        sub_idx2nodes: a dictionary {key: cluster idx,
                                     value: a dictionary mapping indices to nodes' names
        trans: (n_s, n_t) the optimal transport
    """
    cost_t = csr_matrix(np.diag(p_t[:, 0]))
    # cost_t = 1 / (1 + cost_t)
    trans, d_gw, p_s = Gwl.gromov_wasserstein_discrepancy(
        cost_s, cost_t, p_s, p_t, ot_hyperpara, trans0)
    sub_costs, sub_probs, sub_idx2nodes = node_cluster_assignment(
        cost_s, trans, p_s, p_t, idx2node)
    return sub_costs, sub_probs, sub_idx2nodes, trans
예제 #4
0
def multi_graph_partition(costs: Dict, probs: Dict, p_t: np.ndarray,
                          idx2nodes: Dict, ot_hyperpara: Dict,
                          weights: Dict = None,
                          predefine_barycenter: bool = False) -> \
        Tuple[List[Dict], List[Dict], List[Dict], Dict, np.ndarray]:
    """
    Achieve multi-graph partition via calculating Gromov-Wasserstein barycenter
    between the target graphs and a proposed one
    Args:
        costs: a dictionary of graphs {key: graph idx,
                                       value: (n_s, n_s) adjacency matrix of source graph}
        probs: a dictionary of graphs {key: graph idx,
                                       value: (n_s, 1) the distribution of source nodes}
        p_t: (n_t, 1) the distribution of target nodes
        idx2nodes: a dictionary of graphs {key: graph idx,
                                           value: a dictionary {key: idx of row in cost,
                                                                value: name of node}}
        ot_hyperpara: a dictionary of hyperparameters
        weights: a dictionary of graph {key: graph idx,
                                       value: the weight of the graph}
        predefine_barycenter: False: learn barycenter, True: use predefined barycenter

    Returns:
        sub_costs_all: a list of graph dictionary: a dictionary {key: graph idx,
                                                                 value: sub cost matrices}}
        sub_idx2nodes: a list of graph dictionary: a dictionary {key: graph idx,
                                                                 value: a dictionary mapping indices to nodes' names}}
        trans: a dictionary {key: graph idx,
                             value: an optimal transport between the graph and the barycenter}
        cost_t: the reference graph corresponding to partition result
    """
    sub_costs_cluster = []
    sub_idx2nodes_cluster = []
    sub_probs_cluster = []

    sub_costs_all = {}
    sub_idx2nodes_all = {}
    sub_probs_all = {}
    if predefine_barycenter is True:
        cost_t = csr_matrix(np.diag(p_t[:, 0]))
        trans = {}
        for n in costs.keys():
            sub_costs_all[n], sub_probs_all[n], sub_idx2nodes_all[n], trans[
                n] = graph_partition(costs[n], probs[n], p_t, idx2nodes[n],
                                     ot_hyperpara)
    else:
        cost_t, trans, _ = Gwl.gromov_wasserstein_barycenter(
            costs, probs, p_t, ot_hyperpara, weights)
        for n in costs.keys():
            sub_costs, sub_probs, sub_idx2nodes = node_cluster_assignment(
                costs[n], trans[n], probs[n], p_t, idx2nodes[n])
            sub_costs_all[n] = sub_costs
            sub_idx2nodes_all[n] = sub_idx2nodes
            sub_probs_all[n] = sub_probs

    for i in range(p_t.shape[0]):
        sub_costs = {}
        sub_idx2nodes = {}
        sub_probs = {}
        for n in costs.keys():
            if i in sub_costs_all[n].keys():
                sub_costs[n] = sub_costs_all[n][i]
                sub_idx2nodes[n] = sub_idx2nodes_all[n][i]
                sub_probs[n] = sub_probs_all[n][i]
        sub_costs_cluster.append(sub_costs)
        sub_idx2nodes_cluster.append(sub_idx2nodes)
        sub_probs_cluster.append(sub_probs)

    return sub_costs_cluster, sub_probs_cluster, sub_idx2nodes_cluster, trans, cost_t
예제 #5
0
def recursive_graph_partition(
    cost_s: csr_matrix,
    p_s: np.ndarray,
    idx2node: Dict,
    ot_hyperpara: Dict,
    max_node_num: int = 200
) -> Tuple[List[np.ndarray], List[np.ndarray], List[Dict]]:
    """
    Achieve recursive multi-graph partition via calculating Gromov-Wasserstein barycenter
    between the target graphs and a proposed one
    Args:
        cost_s: (n_s, n_s) adjacency matrix of source graph
        p_s: (n_s, 1) the distribution of source nodes
        idx2node: a dictionary {key = idx of row in cost, value = name of node}
        ot_hyperpara: a dictionary of hyperparameters
        max_node_num: the maximum number of nodes in a sub-graph

    Returns:
        sub_costs_all: a dictionary of graph {key: graph idx,
                                              value: a dictionary {key: cluster idx,
                                                                   value: sub cost matrices}}
        sub_idx2nodes: a dictionary of graph {key: graph idx,
                                              value: a dictionary {key: cluster idx,
                                                                   value: a dictionary mapping indices to nodes' names}}
        trans: (n_s, n_t) the optimal transport
        cost_t: the reference graph corresponding to partition result
    """
    costs_all = [cost_s]
    probs_all = [p_s]
    idx2nodes_all = [idx2node]
    costs_final = []
    probs_final = []
    idx2nodes_final = []
    n = 0
    while len(costs_all) > 0:
        costs_tmp = []
        probs_tmp = []
        idx2nodes_tmp = []
        for i in range(len(costs_all)):
            # print('Partition: level {}, leaf {}/{}'.format(n+1, i+1, len(costs_all)))
            p_t = estimate_target_distribution({0: probs_all[i]}, dim_t=2)
            # print(p_t[:, 0], probs_all[i].shape[0])
            cost_t = csr_matrix(np.diag(p_t[:, 0]))
            # cost_t = 1 / (1 + cost_t)
            ot_hyperpara['outer_iteration'] = probs_all[i].shape[0]
            trans, d_gw, p_s = Gwl.gromov_wasserstein_discrepancy(
                costs_all[i], cost_t, probs_all[i], p_t, ot_hyperpara)
            sub_costs, sub_probs, sub_idx2nodes = node_cluster_assignment(
                costs_all[i], trans, probs_all[i], p_t, idx2nodes_all[i])

            for key in sub_idx2nodes.keys():
                sub_cost = sub_costs[key]
                sub_prob = sub_probs[key]
                sub_idx2node = sub_idx2nodes[key]
                if len(sub_idx2node) > max_node_num:
                    costs_tmp.append(sub_cost)
                    probs_tmp.append(sub_prob)
                    idx2nodes_tmp.append(sub_idx2node)
                else:
                    costs_final.append(sub_cost)
                    probs_final.append(sub_prob)
                    idx2nodes_final.append(sub_idx2node)

        costs_all = costs_tmp
        probs_all = probs_tmp
        idx2nodes_all = idx2nodes_tmp
        n += 1
    return costs_final, probs_final, idx2nodes_final
        ot_dict = {'loss_type': 'L2',  # the key hyperparameters of GW distance
                   'ot_method': 'proximal',
                   'beta': 0.01,
                   'outer_iteration': 3000,  # outer, inner iteration, error bound of optimal transport
                   'iter_bound': 1e-30,
                   'inner_iteration': 1,
                   'sk_bound': 1e-30,
                   'max_iter': 1,  # iteration and error bound for calcuating barycenter
                   'cost_bound': 1e-16,
                   'update_p': False,  # optional updates of source distribution
                   'lr': 0.1,
                   'node_prior': None,
                   'alpha': 0,
                   'test_mode': True}

        cost_st = GWL.node_cost_st(cost_s, cost_t, p_s, p_t,
                                   loss_type=ot_dict['loss_type'], prior=ot_dict['node_prior'])
        cost = GWL.node_cost(cost_s, cost_t, maps / num_nodes, cost_st, ot_dict['loss_type'])
        d_gw0 = (cost * maps / num_nodes).sum()

        t0 = time.time()
        ot_dict['beta'] = 10
        ot_dict['outer_iteration'] = 1
        ot_dict['inner_iteration'] = MM
        ot_dict['ot_method'] = 'b-admm'
        trans1, d_gw1, _ = GWL.gromov_wasserstein_discrepancy(cost_s, cost_t, p_s, p_t, ot_dict)
        t1 = time.time()
        ot_dict['beta'] = 1e-2
        ot_dict['outer_iteration'] = MM
        ot_dict['inner_iteration'] = 10
        ot_dict['ot_method'] = 'proximal'
        trans2, d_gw2, _ = GWL.gromov_wasserstein_discrepancy(cost_s, cost_t, p_s, p_t, ot_dict)