コード例 #1
0
ファイル: downstream.py プロジェクト: houchengbin/SG-EDNE
def gen_test_node_wrt_changes(graph_t0, graph_t1):
    ''' generate the testing nodes that we are intereted
        here we take the affected nodes presented in both graphs
    '''
    from utils import unique_nodes_from_edge_set
    G0 = graph_t0.copy()
    G1 = graph_t1.copy(
    )  # use copy to avoid problem caused by G1.remove_node(node)
    edge_add = edge_s1_minus_s0(s1=set(G1.edges()), s0=set(G0.edges()))
    edge_del = edge_s1_minus_s0(s1=set(G0.edges()), s0=set(G1.edges()))
    node_affected_by_edge_add = unique_nodes_from_edge_set(edge_add)  # unique
    node_affected_by_edge_del = unique_nodes_from_edge_set(edge_del)  # unique
    node_affected = list(
        set(node_affected_by_edge_add + node_affected_by_edge_del))  # unique
    node_add = [
        node for node in node_affected_by_edge_add if node not in G0.nodes()
    ]  # nodes not exist in G0
    node_del = [
        node for node in node_affected_by_edge_del if node not in G1.nodes()
    ]  # nodes not exist in G1

    not_intereted_node = node_add + node_del
    test_nodes = [
        node for node in node_affected if node not in not_intereted_node
    ]  # remove unseen nodes
    return test_nodes
コード例 #2
0
ファイル: downstream.py プロジェクト: houchengbin/SG-EDNE
def align_nodes(graph_t0, graph_t1):
    ''' remove newly added nodes from graph_t1, and add newly removed nodes to graph_t1
    '''
    from utils import unique_nodes_from_edge_set
    G0 = graph_t0.copy()
    G1 = graph_t1.copy()
    edge_add = edge_s1_minus_s0(s1=set(G1.edges()), s0=set(
        G0.edges()))  # one may directly use edge streams if available
    edge_del = edge_s1_minus_s0(s1=set(G0.edges()), s0=set(G1.edges()))
    node_affected_by_edge_add = unique_nodes_from_edge_set(edge_add)
    node_affected_by_edge_del = unique_nodes_from_edge_set(edge_del)
    node_affected = list(
        set(node_affected_by_edge_add + node_affected_by_edge_del)
    )  # nodes being directly affected between G0 and G1
    node_add = [
        node for node in node_affected_by_edge_add if node not in G0.nodes()
    ]  # nodes not exist in G0
    node_del = [
        node for node in node_affected_by_edge_del if node not in G1.nodes()
    ]  # nodes not exist in G1
    # to align G1 with G0
    G1.remove_nodes_from(node_add)
    G1.add_nodes_from(node_del)
    return G1
コード例 #3
0
ファイル: sg_edne_fixrw.py プロジェクト: houchengbin/SG-EDNE
    def sampling(self,
                 graph_t0,
                 graph_t1,
                 sampling_strategy,
                 emb_dict_t0=None,
                 w2v_models=None):
        ''' sampling strategies
        1: naively repeat all affected nodes
        2: sample with replancement with equal probability
        3: sample with replancement with equal probability; partially e.g. 0.8
        '''
        t1 = time.time()
        G0 = graph_t0.copy()
        G1 = graph_t1.copy()
        edge_add = edge_s1_minus_s0(s1=set(G1.edges()), s0=set(
            G0.edges()))  # one may directly use edge streams if available
        edge_del = edge_s1_minus_s0(s1=set(G0.edges()), s0=set(G1.edges()))
        node_affected_by_edge_add = unique_nodes_from_edge_set(edge_add)
        node_affected_by_edge_del = unique_nodes_from_edge_set(edge_del)
        node_affected = list(
            set(node_affected_by_edge_add + node_affected_by_edge_del)
        )  # nodes being directly affected between G0 and G1
        node_add = [
            node for node in node_affected_by_edge_add
            if node not in G0.nodes()
        ]  # nodes not exist in G0
        node_del = [
            node for node in node_affected_by_edge_del
            if node not in G1.nodes()
        ]  # nodes not exist in G1
        node_sample_pool = list(
            set(node_affected) -
            set(node_del))  # nodes being directly affected in G1
        print(
            f'# node being affected in current graph: {len(node_sample_pool)}')
        node_samples = []  # list of list

        # naively repeat all affected nodes
        if sampling_strategy == 1:
            print('S1: naively repeat all affected nodes')
            for i in range(self.num_base_models):
                node_samples.append(node_sample_pool)
        # sample with replancement with equal probability
        elif sampling_strategy == 2:
            print('S2: sample with replancement with equal probability')
            for i in range(self.num_base_models):
                node_samples.append(
                    list(
                        np.random.choice(node_sample_pool,
                                         size=len(node_sample_pool),
                                         replace=True)))
        # sample with replancement with equal probability; partially e.g. 80%
        elif sampling_strategy == 3:
            print(
                'S3: sample with replancement with equal probability; partially e.g. 80%'
            )
            frac = 0.80
            for i in range(self.num_base_models):
                node_samples.append(
                    list(
                        np.random.choice(node_sample_pool,
                                         size=int(frac *
                                                  len(node_sample_pool)),
                                         replace=True)))
        else:
            exit('Exit, sampling strategy not found ...')
        t2 = time.time()
        print(f'sampling time cost: {(t2-t1):.2f}')
        return node_samples, node_add, node_del
コード例 #4
0
ファイル: downstream.py プロジェクト: houchengbin/SG-EDNE
def gen_test_edge_wrt_changes(graph_t0, graph_t1, seed=None):
    ''' input: two networkx graphs
        generate **changed** testing edges for link prediction task
        currently, we only consider pos_neg_ratio = 1.0
        return: pos_edges_with_label [(node1, node2, 1), (), ...]
                neg_edges_with_label [(node3, node4, 0), (), ...]
    '''
    G0 = graph_t0.copy()
    G1 = graph_t1.copy(
    )  # use copy to avoid problem caused by G1.remove_node(node)
    edge_add = edge_s1_minus_s0(s1=set(G1.edges()), s0=set(G0.edges()))
    edge_del = edge_s1_minus_s0(s1=set(G0.edges()), s0=set(G1.edges()))

    unseen_nodes = set(G1.nodes()) - set(G0.nodes())
    for node in unseen_nodes:  # to avoid unseen nodes while testing
        G1.remove_node(node)

    edge_add_unseen_node = []  # to avoid unseen nodes while testing
    #print('len(edge_add)', len(edge_add))
    for node in unseen_nodes:
        for edge in edge_add:
            if node in edge:
                edge_add_unseen_node.append(edge)
    edge_add = edge_add - set(edge_add_unseen_node)
    #print('len(edge_add)', len(edge_add))

    neg_edges_with_label = [list(item + (0, )) for item in edge_del]
    pos_edges_with_label = [list(item + (1, )) for item in edge_add]

    random.seed(seed)
    all_nodes = list(G0.nodes())

    if len(edge_add) > len(edge_del):
        num = len(edge_add) - len(edge_del)
        start_nodes = np.random.choice(all_nodes, num, replace=True)
        i = 0
        for start_node in start_nodes:
            try:
                non_nbrs = list(nx.non_neighbors(G0, start_node))
                non_nbr = random.sample(non_nbrs, 1).pop()
                non_edge = (start_node, non_nbr)
                if non_edge not in edge_del:
                    neg_edges_with_label.append(list(non_edge + (0, )))
                    i += 1
                if i >= num:
                    break
            except:
                print('Found a fully connected node: ', start_node,
                      'Ignore it...')
    elif len(edge_add) < len(edge_del):
        num = len(edge_del) - len(edge_add)
        i = 0
        for edge in nx.edges(G1):
            if edge not in edge_add:
                pos_edges_with_label.append(list(edge + (1, )))
                i += 1
            if i >= num:
                break
    else:  # len(edge_add) == len(edge_del)
        pass
    print('---- len(pos_edges_with_label), len(neg_edges_with_label)',
          len(pos_edges_with_label), len(neg_edges_with_label))
    return pos_edges_with_label, neg_edges_with_label
コード例 #5
0
ファイル: DANRL.py プロジェクト: HansongN/DANRL
def node_selecting_scheme(graph_t0,
                          graph_t1,
                          reservoir_dict,
                          limit=0.1,
                          local_global=0.5):
    ''' select nodes to be updated 选择要更新的节点
         G0: previous graph @ t-1; 前一时刻t-1的graph G0
         G1: current graph  @ t; 当前时刻t的graph G1
         reservoir_dict: will be always maintained in ROM 不断维护的字典
         limit: fix the number of node --> the percentage of nodes of a network to be updated (exclude new nodes)
                除新节点外要更新节点的数量
         local_global: # of nodes from recent changes v.s. from random nodes
                       局部感知与全局拓扑的均衡
    '''
    G0 = graph_t0.copy()
    G1 = graph_t1.copy()
    # 增加的边
    edge_add = edge_s1_minus_s0(s1=set(G1.edges()), s0=set(
        G0.edges()))  # one may directly use streaming added edges if possible
    # 删除的边
    edge_del = edge_s1_minus_s0(s1=set(G0.edges()), s0=set(
        G1.edges()))  # one may directly use streaming added edges if possible
    # 权重发生改变的边
    edge_wei, common_edge = egde_weight_changed(G1=G1, G0=G0)

    node_affected_by_edge_add = unique_nodes_from_edge_set(
        edge_add)  # unique 增加的边中所有的节点
    node_affected_by_edge_del = unique_nodes_from_edge_set(
        edge_del)  # unique 删除的边中所有的节点
    node_affected_by_edge_wei = unique_nodes_from_edge_set(
        edge_wei)  # unique 删除的边中所有的节点
    node_affected = list(
        set(node_affected_by_edge_add + node_affected_by_edge_del +
            node_affected_by_edge_wei))  # unique 所有受影响的节点
    node_add = [
        node for node in node_affected_by_edge_add if node not in G0.nodes()
    ]  # 增加的节点
    node_del = [
        node for node in node_affected_by_edge_del if node not in G1.nodes()
    ]  # 删除的节点
    # 从reservoir中删除消失的节点
    if len(node_del) != 0:  # 删除的节点不为0,即有消失的节点
        reservoir_key_list = list(reservoir_dict.keys())  # reservoir中的keys
        for node in node_del:
            if node in reservoir_key_list:
                del reservoir_dict[
                    node]  # if node being deleted, also delete it from reservoir
    # affected的节点既在G0中,又在G1中
    exist_node_affected = list(
        set(node_affected) - set(node_add) -
        set(node_del))  # affected nodes are in both G0 and G1

    attri_change = {}
    for node in exist_node_affected:
        attri_change[node] = np.linalg.norm(
            np.array(G0.nodes[node]["attribute"]) -
            np.array(G1.nodes[node]["attribute"]),
            ord=2)

    num_limit = int(G1.number_of_nodes() * limit)  # 要更新节点的数量
    local_limit = int(local_global * num_limit)  # 局部感知节点的数量
    global_limit = num_limit - local_limit  # 全局拓扑节点的数量

    node_update_list = []  # all the nodes to be updated 要更新节点的list
    # 选择 最受影响的节点
    most_affected_nodes, reservoir_dict = select_most_affected_nodes(
        G0, G1, attri_change, local_limit, reservoir_dict, exist_node_affected)
    # 当有变化的节点少于 local_limit节点数量时,随机采样节点用作补偿
    lack = local_limit - len(
        most_affected_nodes
    )  # if the changes are relatively smaller than local_limit, sample some random nodes for compensation
    # tabu节点为新增节点和最受影响节点的并集
    tabu_nodes = set(node_add + most_affected_nodes)
    # 除tabu节点之外的其他节点
    other_nodes = list(set(G1.nodes()) - tabu_nodes)
    # 从other_nodes中随机选择节点
    random_nodes = list(
        np.random.choice(other_nodes,
                         min(global_limit + lack, len(other_nodes)),
                         replace=False))
    # 待更新embedding的节点list
    node_update_list = node_add + most_affected_nodes + random_nodes

    reservoir_key_list = list(reservoir_dict.keys())
    node_update_set = set(
        node_update_list)  # remove repeated nodes due to resample 出去重复节点
    # 已选则某个节点之后,从reservoir中删除,则下次重新开始累积该节点的变化
    for node in node_update_set:
        if node in reservoir_key_list:
            del reservoir_dict[node]  # if updated, delete it

    print(
        f'num_limit {num_limit}, local_limit {local_limit}, global_limit {global_limit}, # nodes updated {len(node_update_list)}'
    )
    print(f'# nodes added {len(node_add)}, # nodes deleted {len(node_del)}')
    print(
        f'# nodes affected {len(node_affected)}, # nodes most affected {len(most_affected_nodes)}, # of random nodes {len(random_nodes)}'
    )
    print(
        f'num of nodes in reservoir with accumulated changes but not updated {len(list(reservoir_dict))}'
    )
    return node_update_list, reservoir_dict, node_del, node_add