예제 #1
0
def get_data_v3(cuda=True):

    # Here the data is obtained from pytorch-geometric to eliminate unnecessary shuffling done in Kipf's code
    edge_index = pk.load(open("graph.pkl", "rb"))
    row, col = edge_index
    edges = [(int(u), int(v)) for u, v in zip(row.tolist(), col.tolist())]
    g = nx.Graph()
    g.add_edges_from(edges)
    print("Graph Read ")

    nnodes = nx.number_of_nodes(g)
    nodes = nx.nodes(g)
    #print(nodes)
    cr = dict(nx.core_number(g))
    cr_vals = set(v for v in cr.values())
    cr_dict = {}
    for d in cr_vals:
        tmp = []
        for k, v in cr.items():
            if v == d:
                tmp.append(k)
        cr_dict[d] = tmp
    print("core numbers of original graph", len(cr_vals))

    print("number of nodes--", nnodes)
    cut = int(0.1 * nnodes)
    print("cut value--", cut)
    #print("number of nodes,edges ",g.number_of_nodes(),g.number_of_edges())
    adj = np.zeros(
        (torch.max(edge_index).item() + 1, torch.max(edge_index).item() + 1))
    for u, v in list(g.edges()):
        adj[u, v] = 1
        adj[v, u] = 1
    adj = nx.to_numpy_array(g, dtype=np.float)
    adj = adj + np.eye(adj.shape[0])
    adj = sp.sparse.coo_matrix(adj)
    print("Adjacency Made")

    adj = torch.FloatTensor(adj.todense())
    features = pk.load(open("feature.pkl", "rb"))
    features = normalize_features(features.numpy())
    features = torch.FloatTensor(features)
    print("Features Normalized ")

    labels = pk.load(open("label.pkl", "rb"))
    lb = labels.numpy()
    ground_dict = Counter(lb)
    classes = len(ground_dict)

    #community detection --Infomap
    info = infomap.Infomap("--two-level --silent -s 8")
    for e in list(g.edges()):
        info.addLink(*e)
    info.run()
    c = info.getModules()  #node:community
    z = defaultdict(list)
    for u in c:
        z[c[u]].append(u)  #community:[nodes]
    #print("number of communities detected")
    #print (len(z))
    com_size = {}
    for k, v in z.items():
        com_size[k] = len(v)
    #print(com_size)

    #community detection-- Louvain
    partition = community.best_partition(g)  #node:community
    com = defaultdict(list)
    for p in partition:
        com[partition[p]].append(p)
    print("number of communities detected")
    print(len(com))

    a = set()
    a_wt = []
    for te in edges:
        u = te[0]
        v = te[1]
        com_u = partition[u]
        com_v = partition[v]
        t = (com_u, com_v)
        a.add(t)
        if com_u > com_v:
            m = (com_v, com_u)
            a_wt.append(m)
        else:
            a_wt.append(t)

    edge_wt = Counter(a_wt)
    #print(edge_wt)

    meta_wt_edge = {}
    #print(len(a))
    meta_nodes = list(com.keys())
    #print (len(meta_nodes))
    per = list(permutations(meta_nodes, 2))
    b = set()
    for cc in per:
        b.add(cc)
    meta_edge = a.intersection(b)

    for k, v in edge_wt.items():
        if k in meta_edge:
            meta_wt_edge[k] = v

    #print("meta edges")
    #print(meta_wt_edge)

    meta_net = nx.Graph()
    meta_net.add_nodes_from(meta_nodes)
    meta_net.add_edges_from(meta_edge)
    print("meta graph formed")

    m_nodes = nx.number_of_nodes(meta_net)
    print("number of meta nodes", m_nodes)

    m_edges = meta_net.number_of_edges()
    print("number of meta edges", m_edges)

    train_ids = []

    edge_set = set(edges)
    for m in meta_nodes:
        coms = com[m]
        perm = set(permutations(coms, 2))
        in_edges = edge_set.intersection(perm)
        #print(in_edges)
        in_net = nx.Graph()
        in_net.add_edges_from(in_edges)
        #print(in_net.edges())
        in_clus = nx.clustering(in_net)
        #print("clustering",in_clus)
        h = max(in_clus.items(), key=operator.itemgetter(1))[0]
        train_ids.append(h)

    #meta_edgelist = list(meta_net.edges())
    '''cores = dict(nx.core_number(meta_net))

    mst = nx.minimum_spanning_tree(meta_net, algorithm='prim')
    #print("tree edges",mst.edges())
    mst_edgelist = list(sorted(mst.edges()))
    mst_nodes =  list(mst.nodes())
    mst_adj = {}
    for s in mst_nodes:
        mst_l = []
        for e in mst_edgelist:
            if s == e[0] :
                mst_l.append(e[1])
        mst_adj[s] = mst_l

    #print(mst_adj)
    #print(mst_edgelist)
    core_vals = set(v for v in cores.values())
    core_dict = {}
    for d in core_vals:
        tmp = []
        for k,v in cores.items():
            if v == d:
                tmp.append(k)
        core_dict[d] = tmp'''

    #print(core_dict)
    #print ("number of cores in meta network:", len(core_dict))
    '''core_class = {}
    for k,v in core_dict.items():
        cls = []
        for m in v:
            nd = z[m]
            for x in nd:
                cl = lb[x]
                cls.append(cl)
        core_lb = Counter(cls)
        mm =  max(v for k,v in core_lb.items())
        for k1,v1 in core_lb.items():
            if v1 == mm:
                core_class[k]=k1
    print("class information per core--")
    print(core_class)   #The class information/core is printed

    com_class = {}
    for mn in meta_nodes:
        cls = []
        nd = z[mn]
        for x in nd:
            cl = lb[x]
            cls.append(cl)
        com_lb = Counter(cls)
        mm = max(v for k,v in com_lb.items())
        for k1,v1 in com_lb.items():
            if v1 == mm :
                com_class[mn] = k1

    print("class information per community--")
    #print(com_class) #The class information/community is printed

    com_cls = []
    for k,v in com_class.items():
        com_cls.append(v)
    print(Counter(com_cls))

    sorted_core = dict(OrderedDict(sorted(core_dict.items(),reverse=True)))

    reverse_core = dict(OrderedDict(sorted(sorted_core.items())))'''

    '''t_n = []
    for v in sorted_core[25]:
        for t in z[v]:
            t_n.append(t)
    t_lb = []
    for t in t_n:
        t_lb.append(lb[t])''' #for checking the class labels distribution in each core

    #build 2nd order network--
    '''meta_info = infomap.Infomap("--two-level --silent -s 8")
    for e in list(meta_net.edges()):
        meta_info.addLink(*e)
    meta_info.run()
    cc = meta_info.getModules() #node:community
    zz = defaultdict(list)
    for u in cc:
        zz[cc[u]].append(u) #community:[nodes]
    print("number of meta communities detected")
    print (len(zz))

    meta_coms = {}
    for k,v in zz.items():
        cls = []
        for b in v:
            lbl = com_class[b]
            cls.append(lbl)
        metacom_lb = Counter(cls)
        meta_coms[k] = metacom_lb

    print("class information of meta communities of 2nd order network--")
    print(meta_coms)

    meta_cr = dict(nx.core_number(meta_net))
    meta_cr_vals = set(v for v in meta_cr.values())
    meta_cr_dict = {}
    for d in meta_cr_vals:
        tmp = []
        for k,v in meta_cr.items():
            if v == d:
                tmp.append(k)
        meta_cr_dict[d] = tmp
    print("cores in 2nd order network--")
    print(meta_cr_dict)

    #Selection of training nodes
    core_window =3

    t_cores = []
    cnt = 0
    for cr,coms in sorted_core.items():
        t_cores.append(cr)
        cnt += 1
        if cnt == core_window:
            break
    print("t_cores--",t_cores)
    #print("t_coms--",len(t_coms))
   
    #build adjacency matrix of edges--
    t_coms = core_dict[7]
    p = len(t_coms)
    rows,cols = (p,p)
    adje = [[0]*cols]*rows
    for me in meta_edgelist:
        u = me[0]
        v = me[1]
        if u in t_coms:
            if v in t_coms:
                #h += 1
                ui = t_coms.index(u)
                vi = t_coms.index(v)
                adje[ui][vi] += 1
    #print(adje)'''
    '''for me in meta_edge:
        u = me[0]
        if u == 5:
            print(me)'''
    '''t_arr = []
    for i in range(core_window):
        t_arr.append(0)

    
    tr_dict = {}
    for cls in range(classes):
        tr_nodes = []
        fl  = 0
        ar = 0
        cnt_cls = int(0.1*(ground_dict[cls]))
        print("cls and count--",cls,cnt_cls)
        while(True):
            for cr in t_cores:
                coms = core_dict[cr]
                j = t_arr[ar]
                cm = coms[j]
                j = (j+1)%len(coms)
                t_arr[ar] = j
                ar += 1
                #cm = int(np.random.choice(coms,1))
                nn = z[cm]
                n = int(np.random.choice(nodes,1))
                l = lb[n]
                if l == cls and n not in tr_nodes:
                    tr_nodes.append(n)
                    if len(tr_nodes) == cnt_cls:
                        fl = 1
                        break
                if ar == core_window:
                    ar = 0
            if fl == 1:
                tr_dict[cls] = tr_nodes
                break



    t_lbls = []
    for k,v in tr_dict.items():
        for t in v:
            lbl = lb[t]
            t_lbls.append(lbl)
            
    print("class level distribution--training labels",Counter(t_lbls))

    train_ids = []
    val_ids = []
    test_ids = []
    test_mask_ids = []
    for k,v in tr_dict.items():
        for t in v:
            train_ids.append(t)
    #for n in nodes2:
        #train_ids.append(n)
    f = 0
    while True:
        if len(train_ids)<cut:
            r = int(np.random.choice(nodes,1,replace = False))
            if r not in train_ids:
                train_ids.append(r)
                if len(train_ids)==cut:
                    f = 1
        if f == 1:
            break
    #print("train ids--",len(train_ids))'''
    #sorted_core = dict(OrderedDict(sorted(core_dict.items(),reverse=True)))
    #print(sorted_core)

    #c_meta_nodes = sorted_core[7]
    #y = int(np.random.choice(c_meta_nodes,1))

    #train_ids = []
    #train_coms = bfs(mst_adj,y)
    #print(train_coms)
    '''f = 0
    while True:
        for tc in train_coms:
            yy = z[tc]
            x = int(np.random.choice(yy,1))
            train_ids.append(x)
            if len(train_ids) == cut :
                f = 1
                break
        if f == 1:
            break
        else:
            continue'''

    #print(train_ids)

    #train-test nodes choice
    '''for m in meta_nodes:
        f_nodes = z[m]
        x = int(np.random.choice(f_nodes,1,replace=False))
        train_ids.append(x)'''

    val_ids = []
    test_ids = []
    rm_ids = []

    for n in nodes:
        if n not in train_ids:
            #if n not in nodes2:
            rm_ids.append(n)
    #print ("test ids--",len(test_ids))

    #val_ids.extend(rm_ids[0:int(0.1*len(nodes))])
    val_ids = np.random.choice(rm_ids, len(train_ids), replace=False)

    r_ids = []
    for n in rm_ids:
        if n not in val_ids:
            r_ids.append(n)
    #val_ids= np.random.choice(test_ids,int(0.1*len(nodes)),replace= False)
    test_ids = np.random.choice(r_ids, 1084, replace=False)
    #val_ids = np.random.choice(test_ids,int(0.1*len(nodes)),replace= False)
    #test_mask_ids = np.random.choice(test_ids,1084,replace = False)

    with open("test_labels_infomap.txt", 'wb') as fp:
        pk.dump(test_ids, fp)

    with open("training_labels_infomap.txt", "wb") as fp:
        pk.dump(train_ids, fp)

    idx_train = np.array(train_ids)
    idx_val = np.array(val_ids)
    idx_test = np.array(test_ids)
    print("Train Validation Test ", len(idx_train), len(idx_val),
          len(idx_test))

    if cuda:
        features = features.cuda()
        adj = adj.cuda()
        labels = labels.cuda()
        #idx_train = idx_train.cuda()
        #idx_val = idx_val.cuda()
        #idx_test = idx_test.cuda()
    #return g,adj,features,labels,idx_train,idx_val,idx_test
    return idx_train, idx_test, idx_val
예제 #2
0
def infomap(
    g,
    seed=None,
    options="--inner-parallelization --silent",
    markov_time=1.0,
    number_of_modules=None,
    return_tree=False,
    directed=False,
):
    """
    Infomap is based on ideas of information theory.
    The algorithm uses the probability flow of random walks on a network
    as a proxy for information flows in the real system and it decomposes
    the network into modules by compressing a description of the probability flow.

    :param g: a networkx/igraph object
    :param seed: the seed for the random number generator (default: None)
    :param options: custom command line options
                    (default: "--inner-parallelization --silent")
    :param markov_time: tweak the transition likelihood of the random
                        walker (default: 1.0)
    :param number_of_modules: preferred number of modules (default: None)
    :param return_tree: whether to return the cluster tree generated by
                        the algorithm (default: False)
    :param directed: whether to treat a directed graph as directed
                     (default: False)
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.infomap(G)

    :References:

    Rosvall M, Bergstrom CT (2008) `Maps of random walks on complex networks
    reveal community structure. <https://www.pnas.org/content/105/4/1118/>`_
    Proc Natl Acad SciUSA 105(4):1118–1123

    .. note:: Reference implementation: https://pypi.org/project/infomap/
    """

    if imp is None:
        raise ModuleNotFoundError(
            "Optional dependency not satisfied: "
            "install infomap to use the selected feature.")

    g = convert_graph_formats(g, nx.Graph)

    g1 = nx.convert_node_labels_to_integers(g, label_attribute="name")
    name_map = nx.get_node_attributes(g1, "name")
    coms_to_node = defaultdict(list)

    options_compiled = options + f" --markov-time {markov_time}"
    if number_of_modules:
        options_compiled += f" --preferred-number-of-modules {number_of_modules}"
    if seed is not None:
        options_compiled += f" --seed {seed}"
    if directed:
        options_compiled += " -d"

    im = imp.Infomap(options_compiled)
    for u, v, data in g1.edges(data=True):
        im.add_link(u, v, weight=data["weight"])
    im.run()

    for depth in range(1, im.maxTreeDepth()):
        coms_to_node = defaultdict(list)
        for node in im.iterTree():
            # https://mapequation.github.io/infomap/
            # Guess: maxClusterLevel == moduleIndexLevel
            # moduleIndexLevel : int
            # The depth from the root on which to advance the moduleIndex accessed
            # from the iterator for a tree with multiple levels.
            # Set to 1 to have moduleIndex() return the coarsest level (top modules),
            # set to 2 for second level modules, and -1 (default) for the finest
            # level of modules (bottom level).
            if node.isLeaf():
                nid = node.physicalId
                module = node.path[:depth]
                nm = name_map[nid]
                coms_to_node[module].append(nm)
        break

    coms_infomap = [list(c) for c in coms_to_node.values()]

    clustering = NodeClustering(
        coms_infomap,
        g,
        "Infomap",
        method_parameters={
            "options": options,
            "seed": seed
        },
    )

    if not return_tree:
        return clustering
    else:  # create a cluster tree
        D = nx.DiGraph()

        D.add_nodes_from(g.nodes(data=True))

        for node in im.iterTree(maxClusterLevel=-1):
            node_path_str = [str(c) for c in node.path]
            if node.isRoot():
                D.add_node("root")
            else:
                if node.isLeaf():
                    node_key = g1.nodes[node.physicalId]["name"]
                else:
                    node_key = "tree_" + "_".join(node_path_str)
                    D.add_node(node_key)

                if len(node.path) == 1:
                    parent_key = "root"
                else:
                    parent_key = "tree_" + "_".join(node_path_str[:-1])

                assert D.has_node(parent_key)
                D.add_edge(parent_key, node_key)

        _sum_attrs_in_tree(D)

        return clustering, D
예제 #3
0
import infomap

eta = 0.3
im = infomap.Infomap(f"--two-level --meta-data-rate {eta}")

# Add weight as an optional third argument
im.add_link(0, 1)
im.add_link(0, 2)
im.add_link(0, 3)
im.add_link(1, 0)
im.add_link(1, 2)
im.add_link(2, 1)
im.add_link(2, 0)
im.add_link(3, 0)
im.add_link(3, 4)
im.add_link(3, 5)
im.add_link(4, 3)
im.add_link(4, 5)
im.add_link(5, 4)
im.add_link(5, 3)

im.set_meta_data(0, 1)
im.set_meta_data(1, 1)
im.set_meta_data(2, 2)
im.set_meta_data(3, 2)
im.set_meta_data(4, 3)
im.set_meta_data(5, 3)

im.run()

print(f"\nFound {im.num_top_modules} modules with codelength: {im.codelength}")
예제 #4
0
import infomap

im = infomap.Infomap("--two-level --verbose")

stateNetwork = """
*Vertices 4
1 "PRE"
2 "SCIENCE"
3 "PRL"
4 "BIO"
# *ngrams
# 1 2 3
# 1 2 2 3
# 4 2 4
*States
1 2 "1 2"
2 3 "2 3"
3 2 "1 2 2"
4 2 "4 2"
5 4 "2 4"
*Links
1 2
3 2
4 5
"""

im.set_name(1, "PRE")
im.set_name(2, "SCIENCE")
im.set_name(3, "PRL")
im.set_name(4, "BIO")
예제 #5
0
import infomap
import pathlib

name = "Email-Enron"
filename = f"../dataset/{name}.txt"

im = infomap.Infomap()

# You can read a network with the method read_file,
# which by default will accumulate to existing network data
accumulate = False
im.read_file(filename, accumulate)

im.run("-N5")

print(
    f"Found {im.max_depth} levels with {im.num_leaf_modules} leaf modules in {im.num_top_modules} top modules and codelength: {im.codelength}"
)
print(f"All codelengths: {im.codelengths}")

# print("Tree:\n# path node_id module_id flow")
# for node in im.nodes:
#     print(f"{node.path} {node.node_id} {node.module_id} {node.flow}")

for module_level in range(1, im.max_depth):
    print(
        f"Modules at level {module_level}: {im.get_modules(module_level).values()}"
    )

# print("\nModules at all levels:")
# for node_id, modules in im.get_multilevel_modules().items():
예제 #6
0
def make_communities(g, method):
    '''
    Function to run community detection

    Inputs:
        g : networkx object
            Networkx network object representing raw music data
        method : string
            String identifying clustering method to be used.
            Options are (case-sensitive):
                1) infomap
                2) LPM
                3) louvain
                4) HLC
    Returns:
        graph : networkx object
            Networkx network object with added community data
    '''
    print("*******Inside main comm function *******")

    if method == "infomap":
        edge_tuples = [edge.tuple for edge in g.es]
        im = infomap.Infomap()
        im.add_links(edge_tuples)
        im.run("-d -N 10")
        modules = im.get_multilevel_modules()

        # igraph non-hierarchical version
        #infomap_partition = g.community_infomap(edge_weights='weight')

        infomap_partition_assignment = {
            g.vs[i]['name']: modules[i]
            for i in range(g.vcount())
        }

        return infomap_partition_assignment

    elif method == "LPM":
        lpm_partition = g.community_label_propagation(weights='weight')
        lpm_partition_assignment = {
            g.vs[i]['name']: [lpm_partition.membership[i]]
            for i in range(g.vcount())
        }

        return lpm_partition_assignment

    elif method == 'louvain':
        louvain_partition = g.community_multilevel(
            weights=[e['weight'] for e in g.es], return_levels=True)
        louvain_partition_assignment = {
            g.vs[i]['name']:
            [level.membership[i] for level in louvain_partition]
            for i in range(len(g.vs))
        }

        return louvain_partition_assignment

    elif method == 'HLC':
        coms = algorithms.hierarchical_link_community(g)

        return coms.communities
"""
This is a test file, that you can use to validate 
"""

#%% validate that pathpy was installed correct
import pathpy as pp
paths = pp.Paths()
paths.add_path('a,b,c')
print(paths)

#%% validate that kernel was started in correct root directory
t = pp.TemporalNetwork.read_file('data/temporal_clusters.tedges')
print(t)

#%% validate that infomap is installed correctly
import infomap
print("Infomap version:", infomap.Infomap().version)
print("Make sure it is at least 1.0.0-beta.14")

#%% check that relative read and write works
from pathlib import Path
Path('output').mkdir(exist_ok=True)
im = infomap.Infomap("")
im.network().readInputData("data/ninetriangles.net")
im.run()
im.writeClu("output/ninetriangles.clu")
print(im.maxTreeDepth())  # Should print 3
예제 #8
0
    def __find_communities(self):
        if not self.datasources.files.exists('bipartite_community_detection',
                                             'find_communities', 'graph',
                                             'gexf'):
            graph = self.datasources.files.read('bipartite_graph',
                                                'get_user_hashtag_graph',
                                                'graph', 'gexf')
            graph = nx.convert_node_labels_to_integers(graph,
                                                       label_attribute='name')

            im = infomap.Infomap('--two-level --silent')

            is_multiplex = True

            # add edges and weights to network
            if is_multiplex:
                node_layer_dict = nx.get_node_attributes(graph, 'bipartite')
                for e in graph.edges(data=True):
                    # from (layer, node) to (layer, node) weight
                    im.addMultilayerLink(node_layer_dict[e[0]], e[0],
                                         node_layer_dict[e[1]], e[1],
                                         e[2]['weight'])
            else:
                for e in graph.edges(data=True):
                    im.addLink(e[0], e[1], e[2]['weight'])

            im.run()

            c = pd.DataFrame([{
                'node': n.physicalId,
                'community': n.moduleIndex()
            } for n in im.iterLeafNodes()]).set_index('node')

            # remove nodes with degree less than 30
            low_degree_nodes = [n for n, deg in graph.degree() if deg < 30]
            c = c.loc[~c.index.isin(low_degree_nodes)]

            # remove communities with only users
            c['is_hashtag'] = pd.Series(
                nx.get_node_attributes(graph, 'bipartite')).astype('bool')
            c = c.groupby('community').filter(lambda x: x['is_hashtag'].any())

            # rename communities
            communities_dict = {
                x: i
                for i, x in enumerate(c['community'].unique())
            }
            c.community = c.community.map(communities_dict.get)

            # remove nodes from graph (lone nodes, nodes with less than 30 degree and communities with no hashtag)
            graph.remove_nodes_from(set(graph.nodes) - set(c.index.tolist()))

            # add community attribute to nodes
            nx.set_node_attributes(graph,
                                   name='community',
                                   values=c.to_dict('dict')['community'])

            if is_multiplex:
                self.datasources.files.write(graph,
                                             'bipartite_community_detection',
                                             'find_communities',
                                             'multiplex_graph', 'gexf')
            else:
                self.datasources.files.write(graph,
                                             'bipartite_community_detection',
                                             'find_communities', 'graph',
                                             'gexf')
#%% In [1]
import infomap
print(infomap.Infomap().version)

#%% In [2]
!infomap data/ninetriangles.net output/ -N5

#%% In [3]
from pathlib import Path
print(Path('data/ninetriangles.net').read_text())

#%% In [4]
print(Path('output/ninetriangles.tree').read_text())

#%% In [5]
infomapFileIO = infomap.Infomap("-N5")

# Read from file
infomapFileIO.network().readInputData("data/ninetriangles.net")

infomapFileIO.run()

print("Clustered in {} levels with codelength {}".format(infomapFileIO.maxTreeDepth(), infomapFileIO.codelength()))

print("Writing result to file...")
infomapFileIO.writeClu("output/ninetriangles.clu")
infomapFileIO.writeFlowTree("output/ninetriangles.ftree")
print("Done!")

print("\n.ftree file:")
print(Path('output/ninetriangles.ftree').read_text())