def get_data_v3(cuda=True): # Here the data is obtained from pytorch-geometric to eliminate unnecessary shuffling done in Kipf's code edge_index = pk.load(open("graph.pkl", "rb")) row, col = edge_index edges = [(int(u), int(v)) for u, v in zip(row.tolist(), col.tolist())] g = nx.Graph() g.add_edges_from(edges) print("Graph Read ") nnodes = nx.number_of_nodes(g) nodes = nx.nodes(g) #print(nodes) cr = dict(nx.core_number(g)) cr_vals = set(v for v in cr.values()) cr_dict = {} for d in cr_vals: tmp = [] for k, v in cr.items(): if v == d: tmp.append(k) cr_dict[d] = tmp print("core numbers of original graph", len(cr_vals)) print("number of nodes--", nnodes) cut = int(0.1 * nnodes) print("cut value--", cut) #print("number of nodes,edges ",g.number_of_nodes(),g.number_of_edges()) adj = np.zeros( (torch.max(edge_index).item() + 1, torch.max(edge_index).item() + 1)) for u, v in list(g.edges()): adj[u, v] = 1 adj[v, u] = 1 adj = nx.to_numpy_array(g, dtype=np.float) adj = adj + np.eye(adj.shape[0]) adj = sp.sparse.coo_matrix(adj) print("Adjacency Made") adj = torch.FloatTensor(adj.todense()) features = pk.load(open("feature.pkl", "rb")) features = normalize_features(features.numpy()) features = torch.FloatTensor(features) print("Features Normalized ") labels = pk.load(open("label.pkl", "rb")) lb = labels.numpy() ground_dict = Counter(lb) classes = len(ground_dict) #community detection --Infomap info = infomap.Infomap("--two-level --silent -s 8") for e in list(g.edges()): info.addLink(*e) info.run() c = info.getModules() #node:community z = defaultdict(list) for u in c: z[c[u]].append(u) #community:[nodes] #print("number of communities detected") #print (len(z)) com_size = {} for k, v in z.items(): com_size[k] = len(v) #print(com_size) #community detection-- Louvain partition = community.best_partition(g) #node:community com = defaultdict(list) for p in partition: com[partition[p]].append(p) print("number of communities detected") print(len(com)) a = set() a_wt = [] for te in edges: u = te[0] v = te[1] com_u = partition[u] com_v = partition[v] t = (com_u, com_v) a.add(t) if com_u > com_v: m = (com_v, com_u) a_wt.append(m) else: a_wt.append(t) edge_wt = Counter(a_wt) #print(edge_wt) meta_wt_edge = {} #print(len(a)) meta_nodes = list(com.keys()) #print (len(meta_nodes)) per = list(permutations(meta_nodes, 2)) b = set() for cc in per: b.add(cc) meta_edge = a.intersection(b) for k, v in edge_wt.items(): if k in meta_edge: meta_wt_edge[k] = v #print("meta edges") #print(meta_wt_edge) meta_net = nx.Graph() meta_net.add_nodes_from(meta_nodes) meta_net.add_edges_from(meta_edge) print("meta graph formed") m_nodes = nx.number_of_nodes(meta_net) print("number of meta nodes", m_nodes) m_edges = meta_net.number_of_edges() print("number of meta edges", m_edges) train_ids = [] edge_set = set(edges) for m in meta_nodes: coms = com[m] perm = set(permutations(coms, 2)) in_edges = edge_set.intersection(perm) #print(in_edges) in_net = nx.Graph() in_net.add_edges_from(in_edges) #print(in_net.edges()) in_clus = nx.clustering(in_net) #print("clustering",in_clus) h = max(in_clus.items(), key=operator.itemgetter(1))[0] train_ids.append(h) #meta_edgelist = list(meta_net.edges()) '''cores = dict(nx.core_number(meta_net)) mst = nx.minimum_spanning_tree(meta_net, algorithm='prim') #print("tree edges",mst.edges()) mst_edgelist = list(sorted(mst.edges())) mst_nodes = list(mst.nodes()) mst_adj = {} for s in mst_nodes: mst_l = [] for e in mst_edgelist: if s == e[0] : mst_l.append(e[1]) mst_adj[s] = mst_l #print(mst_adj) #print(mst_edgelist) core_vals = set(v for v in cores.values()) core_dict = {} for d in core_vals: tmp = [] for k,v in cores.items(): if v == d: tmp.append(k) core_dict[d] = tmp''' #print(core_dict) #print ("number of cores in meta network:", len(core_dict)) '''core_class = {} for k,v in core_dict.items(): cls = [] for m in v: nd = z[m] for x in nd: cl = lb[x] cls.append(cl) core_lb = Counter(cls) mm = max(v for k,v in core_lb.items()) for k1,v1 in core_lb.items(): if v1 == mm: core_class[k]=k1 print("class information per core--") print(core_class) #The class information/core is printed com_class = {} for mn in meta_nodes: cls = [] nd = z[mn] for x in nd: cl = lb[x] cls.append(cl) com_lb = Counter(cls) mm = max(v for k,v in com_lb.items()) for k1,v1 in com_lb.items(): if v1 == mm : com_class[mn] = k1 print("class information per community--") #print(com_class) #The class information/community is printed com_cls = [] for k,v in com_class.items(): com_cls.append(v) print(Counter(com_cls)) sorted_core = dict(OrderedDict(sorted(core_dict.items(),reverse=True))) reverse_core = dict(OrderedDict(sorted(sorted_core.items())))''' '''t_n = [] for v in sorted_core[25]: for t in z[v]: t_n.append(t) t_lb = [] for t in t_n: t_lb.append(lb[t])''' #for checking the class labels distribution in each core #build 2nd order network-- '''meta_info = infomap.Infomap("--two-level --silent -s 8") for e in list(meta_net.edges()): meta_info.addLink(*e) meta_info.run() cc = meta_info.getModules() #node:community zz = defaultdict(list) for u in cc: zz[cc[u]].append(u) #community:[nodes] print("number of meta communities detected") print (len(zz)) meta_coms = {} for k,v in zz.items(): cls = [] for b in v: lbl = com_class[b] cls.append(lbl) metacom_lb = Counter(cls) meta_coms[k] = metacom_lb print("class information of meta communities of 2nd order network--") print(meta_coms) meta_cr = dict(nx.core_number(meta_net)) meta_cr_vals = set(v for v in meta_cr.values()) meta_cr_dict = {} for d in meta_cr_vals: tmp = [] for k,v in meta_cr.items(): if v == d: tmp.append(k) meta_cr_dict[d] = tmp print("cores in 2nd order network--") print(meta_cr_dict) #Selection of training nodes core_window =3 t_cores = [] cnt = 0 for cr,coms in sorted_core.items(): t_cores.append(cr) cnt += 1 if cnt == core_window: break print("t_cores--",t_cores) #print("t_coms--",len(t_coms)) #build adjacency matrix of edges-- t_coms = core_dict[7] p = len(t_coms) rows,cols = (p,p) adje = [[0]*cols]*rows for me in meta_edgelist: u = me[0] v = me[1] if u in t_coms: if v in t_coms: #h += 1 ui = t_coms.index(u) vi = t_coms.index(v) adje[ui][vi] += 1 #print(adje)''' '''for me in meta_edge: u = me[0] if u == 5: print(me)''' '''t_arr = [] for i in range(core_window): t_arr.append(0) tr_dict = {} for cls in range(classes): tr_nodes = [] fl = 0 ar = 0 cnt_cls = int(0.1*(ground_dict[cls])) print("cls and count--",cls,cnt_cls) while(True): for cr in t_cores: coms = core_dict[cr] j = t_arr[ar] cm = coms[j] j = (j+1)%len(coms) t_arr[ar] = j ar += 1 #cm = int(np.random.choice(coms,1)) nn = z[cm] n = int(np.random.choice(nodes,1)) l = lb[n] if l == cls and n not in tr_nodes: tr_nodes.append(n) if len(tr_nodes) == cnt_cls: fl = 1 break if ar == core_window: ar = 0 if fl == 1: tr_dict[cls] = tr_nodes break t_lbls = [] for k,v in tr_dict.items(): for t in v: lbl = lb[t] t_lbls.append(lbl) print("class level distribution--training labels",Counter(t_lbls)) train_ids = [] val_ids = [] test_ids = [] test_mask_ids = [] for k,v in tr_dict.items(): for t in v: train_ids.append(t) #for n in nodes2: #train_ids.append(n) f = 0 while True: if len(train_ids)<cut: r = int(np.random.choice(nodes,1,replace = False)) if r not in train_ids: train_ids.append(r) if len(train_ids)==cut: f = 1 if f == 1: break #print("train ids--",len(train_ids))''' #sorted_core = dict(OrderedDict(sorted(core_dict.items(),reverse=True))) #print(sorted_core) #c_meta_nodes = sorted_core[7] #y = int(np.random.choice(c_meta_nodes,1)) #train_ids = [] #train_coms = bfs(mst_adj,y) #print(train_coms) '''f = 0 while True: for tc in train_coms: yy = z[tc] x = int(np.random.choice(yy,1)) train_ids.append(x) if len(train_ids) == cut : f = 1 break if f == 1: break else: continue''' #print(train_ids) #train-test nodes choice '''for m in meta_nodes: f_nodes = z[m] x = int(np.random.choice(f_nodes,1,replace=False)) train_ids.append(x)''' val_ids = [] test_ids = [] rm_ids = [] for n in nodes: if n not in train_ids: #if n not in nodes2: rm_ids.append(n) #print ("test ids--",len(test_ids)) #val_ids.extend(rm_ids[0:int(0.1*len(nodes))]) val_ids = np.random.choice(rm_ids, len(train_ids), replace=False) r_ids = [] for n in rm_ids: if n not in val_ids: r_ids.append(n) #val_ids= np.random.choice(test_ids,int(0.1*len(nodes)),replace= False) test_ids = np.random.choice(r_ids, 1084, replace=False) #val_ids = np.random.choice(test_ids,int(0.1*len(nodes)),replace= False) #test_mask_ids = np.random.choice(test_ids,1084,replace = False) with open("test_labels_infomap.txt", 'wb') as fp: pk.dump(test_ids, fp) with open("training_labels_infomap.txt", "wb") as fp: pk.dump(train_ids, fp) idx_train = np.array(train_ids) idx_val = np.array(val_ids) idx_test = np.array(test_ids) print("Train Validation Test ", len(idx_train), len(idx_val), len(idx_test)) if cuda: features = features.cuda() adj = adj.cuda() labels = labels.cuda() #idx_train = idx_train.cuda() #idx_val = idx_val.cuda() #idx_test = idx_test.cuda() #return g,adj,features,labels,idx_train,idx_val,idx_test return idx_train, idx_test, idx_val
def infomap( g, seed=None, options="--inner-parallelization --silent", markov_time=1.0, number_of_modules=None, return_tree=False, directed=False, ): """ Infomap is based on ideas of information theory. The algorithm uses the probability flow of random walks on a network as a proxy for information flows in the real system and it decomposes the network into modules by compressing a description of the probability flow. :param g: a networkx/igraph object :param seed: the seed for the random number generator (default: None) :param options: custom command line options (default: "--inner-parallelization --silent") :param markov_time: tweak the transition likelihood of the random walker (default: 1.0) :param number_of_modules: preferred number of modules (default: None) :param return_tree: whether to return the cluster tree generated by the algorithm (default: False) :param directed: whether to treat a directed graph as directed (default: False) :return: NodeClustering object :Example: >>> from cdlib import algorithms >>> import networkx as nx >>> G = nx.karate_club_graph() >>> coms = algorithms.infomap(G) :References: Rosvall M, Bergstrom CT (2008) `Maps of random walks on complex networks reveal community structure. <https://www.pnas.org/content/105/4/1118/>`_ Proc Natl Acad SciUSA 105(4):1118–1123 .. note:: Reference implementation: https://pypi.org/project/infomap/ """ if imp is None: raise ModuleNotFoundError( "Optional dependency not satisfied: " "install infomap to use the selected feature.") g = convert_graph_formats(g, nx.Graph) g1 = nx.convert_node_labels_to_integers(g, label_attribute="name") name_map = nx.get_node_attributes(g1, "name") coms_to_node = defaultdict(list) options_compiled = options + f" --markov-time {markov_time}" if number_of_modules: options_compiled += f" --preferred-number-of-modules {number_of_modules}" if seed is not None: options_compiled += f" --seed {seed}" if directed: options_compiled += " -d" im = imp.Infomap(options_compiled) for u, v, data in g1.edges(data=True): im.add_link(u, v, weight=data["weight"]) im.run() for depth in range(1, im.maxTreeDepth()): coms_to_node = defaultdict(list) for node in im.iterTree(): # https://mapequation.github.io/infomap/ # Guess: maxClusterLevel == moduleIndexLevel # moduleIndexLevel : int # The depth from the root on which to advance the moduleIndex accessed # from the iterator for a tree with multiple levels. # Set to 1 to have moduleIndex() return the coarsest level (top modules), # set to 2 for second level modules, and -1 (default) for the finest # level of modules (bottom level). if node.isLeaf(): nid = node.physicalId module = node.path[:depth] nm = name_map[nid] coms_to_node[module].append(nm) break coms_infomap = [list(c) for c in coms_to_node.values()] clustering = NodeClustering( coms_infomap, g, "Infomap", method_parameters={ "options": options, "seed": seed }, ) if not return_tree: return clustering else: # create a cluster tree D = nx.DiGraph() D.add_nodes_from(g.nodes(data=True)) for node in im.iterTree(maxClusterLevel=-1): node_path_str = [str(c) for c in node.path] if node.isRoot(): D.add_node("root") else: if node.isLeaf(): node_key = g1.nodes[node.physicalId]["name"] else: node_key = "tree_" + "_".join(node_path_str) D.add_node(node_key) if len(node.path) == 1: parent_key = "root" else: parent_key = "tree_" + "_".join(node_path_str[:-1]) assert D.has_node(parent_key) D.add_edge(parent_key, node_key) _sum_attrs_in_tree(D) return clustering, D
import infomap eta = 0.3 im = infomap.Infomap(f"--two-level --meta-data-rate {eta}") # Add weight as an optional third argument im.add_link(0, 1) im.add_link(0, 2) im.add_link(0, 3) im.add_link(1, 0) im.add_link(1, 2) im.add_link(2, 1) im.add_link(2, 0) im.add_link(3, 0) im.add_link(3, 4) im.add_link(3, 5) im.add_link(4, 3) im.add_link(4, 5) im.add_link(5, 4) im.add_link(5, 3) im.set_meta_data(0, 1) im.set_meta_data(1, 1) im.set_meta_data(2, 2) im.set_meta_data(3, 2) im.set_meta_data(4, 3) im.set_meta_data(5, 3) im.run() print(f"\nFound {im.num_top_modules} modules with codelength: {im.codelength}")
import infomap im = infomap.Infomap("--two-level --verbose") stateNetwork = """ *Vertices 4 1 "PRE" 2 "SCIENCE" 3 "PRL" 4 "BIO" # *ngrams # 1 2 3 # 1 2 2 3 # 4 2 4 *States 1 2 "1 2" 2 3 "2 3" 3 2 "1 2 2" 4 2 "4 2" 5 4 "2 4" *Links 1 2 3 2 4 5 """ im.set_name(1, "PRE") im.set_name(2, "SCIENCE") im.set_name(3, "PRL") im.set_name(4, "BIO")
import infomap import pathlib name = "Email-Enron" filename = f"../dataset/{name}.txt" im = infomap.Infomap() # You can read a network with the method read_file, # which by default will accumulate to existing network data accumulate = False im.read_file(filename, accumulate) im.run("-N5") print( f"Found {im.max_depth} levels with {im.num_leaf_modules} leaf modules in {im.num_top_modules} top modules and codelength: {im.codelength}" ) print(f"All codelengths: {im.codelengths}") # print("Tree:\n# path node_id module_id flow") # for node in im.nodes: # print(f"{node.path} {node.node_id} {node.module_id} {node.flow}") for module_level in range(1, im.max_depth): print( f"Modules at level {module_level}: {im.get_modules(module_level).values()}" ) # print("\nModules at all levels:") # for node_id, modules in im.get_multilevel_modules().items():
def make_communities(g, method): ''' Function to run community detection Inputs: g : networkx object Networkx network object representing raw music data method : string String identifying clustering method to be used. Options are (case-sensitive): 1) infomap 2) LPM 3) louvain 4) HLC Returns: graph : networkx object Networkx network object with added community data ''' print("*******Inside main comm function *******") if method == "infomap": edge_tuples = [edge.tuple for edge in g.es] im = infomap.Infomap() im.add_links(edge_tuples) im.run("-d -N 10") modules = im.get_multilevel_modules() # igraph non-hierarchical version #infomap_partition = g.community_infomap(edge_weights='weight') infomap_partition_assignment = { g.vs[i]['name']: modules[i] for i in range(g.vcount()) } return infomap_partition_assignment elif method == "LPM": lpm_partition = g.community_label_propagation(weights='weight') lpm_partition_assignment = { g.vs[i]['name']: [lpm_partition.membership[i]] for i in range(g.vcount()) } return lpm_partition_assignment elif method == 'louvain': louvain_partition = g.community_multilevel( weights=[e['weight'] for e in g.es], return_levels=True) louvain_partition_assignment = { g.vs[i]['name']: [level.membership[i] for level in louvain_partition] for i in range(len(g.vs)) } return louvain_partition_assignment elif method == 'HLC': coms = algorithms.hierarchical_link_community(g) return coms.communities
""" This is a test file, that you can use to validate """ #%% validate that pathpy was installed correct import pathpy as pp paths = pp.Paths() paths.add_path('a,b,c') print(paths) #%% validate that kernel was started in correct root directory t = pp.TemporalNetwork.read_file('data/temporal_clusters.tedges') print(t) #%% validate that infomap is installed correctly import infomap print("Infomap version:", infomap.Infomap().version) print("Make sure it is at least 1.0.0-beta.14") #%% check that relative read and write works from pathlib import Path Path('output').mkdir(exist_ok=True) im = infomap.Infomap("") im.network().readInputData("data/ninetriangles.net") im.run() im.writeClu("output/ninetriangles.clu") print(im.maxTreeDepth()) # Should print 3
def __find_communities(self): if not self.datasources.files.exists('bipartite_community_detection', 'find_communities', 'graph', 'gexf'): graph = self.datasources.files.read('bipartite_graph', 'get_user_hashtag_graph', 'graph', 'gexf') graph = nx.convert_node_labels_to_integers(graph, label_attribute='name') im = infomap.Infomap('--two-level --silent') is_multiplex = True # add edges and weights to network if is_multiplex: node_layer_dict = nx.get_node_attributes(graph, 'bipartite') for e in graph.edges(data=True): # from (layer, node) to (layer, node) weight im.addMultilayerLink(node_layer_dict[e[0]], e[0], node_layer_dict[e[1]], e[1], e[2]['weight']) else: for e in graph.edges(data=True): im.addLink(e[0], e[1], e[2]['weight']) im.run() c = pd.DataFrame([{ 'node': n.physicalId, 'community': n.moduleIndex() } for n in im.iterLeafNodes()]).set_index('node') # remove nodes with degree less than 30 low_degree_nodes = [n for n, deg in graph.degree() if deg < 30] c = c.loc[~c.index.isin(low_degree_nodes)] # remove communities with only users c['is_hashtag'] = pd.Series( nx.get_node_attributes(graph, 'bipartite')).astype('bool') c = c.groupby('community').filter(lambda x: x['is_hashtag'].any()) # rename communities communities_dict = { x: i for i, x in enumerate(c['community'].unique()) } c.community = c.community.map(communities_dict.get) # remove nodes from graph (lone nodes, nodes with less than 30 degree and communities with no hashtag) graph.remove_nodes_from(set(graph.nodes) - set(c.index.tolist())) # add community attribute to nodes nx.set_node_attributes(graph, name='community', values=c.to_dict('dict')['community']) if is_multiplex: self.datasources.files.write(graph, 'bipartite_community_detection', 'find_communities', 'multiplex_graph', 'gexf') else: self.datasources.files.write(graph, 'bipartite_community_detection', 'find_communities', 'graph', 'gexf')
#%% In [1] import infomap print(infomap.Infomap().version) #%% In [2] !infomap data/ninetriangles.net output/ -N5 #%% In [3] from pathlib import Path print(Path('data/ninetriangles.net').read_text()) #%% In [4] print(Path('output/ninetriangles.tree').read_text()) #%% In [5] infomapFileIO = infomap.Infomap("-N5") # Read from file infomapFileIO.network().readInputData("data/ninetriangles.net") infomapFileIO.run() print("Clustered in {} levels with codelength {}".format(infomapFileIO.maxTreeDepth(), infomapFileIO.codelength())) print("Writing result to file...") infomapFileIO.writeClu("output/ninetriangles.clu") infomapFileIO.writeFlowTree("output/ninetriangles.ftree") print("Done!") print("\n.ftree file:") print(Path('output/ninetriangles.ftree').read_text())