def get_infomap_communities(graph: nx.Graph, reddit_edge_weight=None): im = Infomap("--flow-model undirected -N 10 --prefer-modular-solution") ## im only works with numerical ids, so we need to save a mapping ids_to_names = {} names_to_ids = {} for index, node in enumerate(graph.nodes): ids_to_names[index] = node names_to_ids[node] = index im.add_node(index, name=node) # iterate over edges and add them to the im tree, optionally adding the weight for e1, e2, data in graph.edges(data=True): e1_id = names_to_ids[e1] e2_id = names_to_ids[e2] weight = data[reddit_edge_weight] if reddit_edge_weight else None link = (e1_id, e2_id, weight) if weight else (e1_id, e2_id) im.add_link(*link) im.run() for node in im.tree: if node.is_leaf: graph.nodes[ids_to_names[node.node_id]][ "infomap_community" ] = node.module_id return graph
def infomap(self, inter_edge, threshold, update_method = None, **kwargs): ''' Infomap helper function. ''' im = Infomap("--two-level --directed --silent") ######### Make Network ## add intra edges thresholded_adjacency = [] for l in range(self.length): thresholded_adjacency.append(self.threshold(self.list_adjacency[l], thresh = threshold)) for n1,e in enumerate(thresholded_adjacency[l]):## list of length 2 corresponding to the adjacency matrices in each layer for n2,w in enumerate(e): s = MultilayerNode(layer_id = l, node_id = n1) t = MultilayerNode(layer_id = l, node_id = n2) im.add_multilayer_link(s, t, w) im.add_multilayer_link(t, s, w) ## add inter edges if update_method == 'local' or update_method == 'global': updated_interlayer = self.update_interlayer(kwargs['spikes'], 0, inter_edge, 0.1, update_method) for l in range(self.length-1): for k in range(self.size):# number of nodes which is 60 in the multilayer network s = MultilayerNode(layer_id = l, node_id = k) t = MultilayerNode(layer_id = l+1, node_id = k) im.add_multilayer_link(s, t, updated_interlayer[l][k]) im.add_multilayer_link(t, s, updated_interlayer[l][k]) elif update_method == 'neighborhood': updated_interlayer_indices, updated_interlayer_weights = self.get_normalized_outlinks(thresholded_adjacency, inter_edge) for l in range(self.length-1): for k in range(self.size): w, nbr = self.neighborhood_flow(l, k, updated_interlayer_indices, updated_interlayer_weights, threshold) for n in nbr: s = MultilayerNode(layer_id = l, node_id = k) t = MultilayerNode(layer_id = l+1, node_id = n) im.add_multilayer_link(s, t, w) im.add_multilayer_link(t, s, w) elif update_method == None: for l in range(self.length-1): for k in range(self.size):# number of nodes which is 60 in the multilayer network s = MultilayerNode(layer_id = l, node_id = k) t = MultilayerNode(layer_id = l+1, node_id = k) im.add_multilayer_link(s, t, inter_edge) im.add_multilayer_link(t, s, inter_edge) im.run() return(im)
def get_benchmark_amis(G,gt): # Louvain louv = community.best_partition(G) louvc = [] for idx,val in louv.items(): louvc.append(val) louv_ami = metrics.adjusted_mutual_info_score(gt,louvc) # Fluid communities fluid = asyn_fluidc(G,2) list_nodes = [set(c) for c in fluid] est_idx = np.zeros((nx.number_of_nodes(G),)) for i in range(len(list_nodes)): for idx in list_nodes[i]: est_idx[idx] = i fluid_ami = metrics.adjusted_mutual_info_score(gt,est_idx) # FastGreedy list_nodes = list(greedy_modularity_communities(G)) est_idx = np.zeros((nx.number_of_nodes(G),)) for i in range(len(list_nodes)): for idx in list_nodes[i]: est_idx[idx] = i fg_ami = metrics.adjusted_mutual_info_score(gt,est_idx) # Infomap im = Infomap() for node in G.nodes: im.add_node(node) for edge in G.edges: im.add_link(edge[0], edge[1]) im.add_link(edge[1],edge[0]) # Run the Infomap search algorithm to find optimal modules im.run() # print(f"Found {im.num_top_modules} modules with Infomap") est_idx = np.zeros((nx.number_of_nodes(G),)) for node in im.tree: if node.is_leaf: est_idx[node.node_id] = node.module_id im_ami = metrics.adjusted_mutual_info_score(gt,est_idx) benchmark = {'Louvain':louv_ami, 'Fluid':fluid_ami, 'FastGreedy':fg_ami, 'Infomap':im_ami} return benchmark
def run_infomap_alt(g): from infomap import Infomap n2num = {u: num for num, u in enumerate(g)} num2u = sorted(n2num, key=lambda x: n2num[x]) g_num = nx.Graph() for n, n1 in g.edges(): g_num.add_edge(n2num[n], n2num[n1]) im = Infomap("--undirected") for n, n1 in g_num.edges(): im.addLink(n, n1) im.run() part = {num2u[i]: m for i, m in im.getModules().items()} return part
def infomap_communities(graph): node2i = bidict({n: i for i, n in enumerate(graph.nodes)}) if len(node2i) == 0: return {} infomapWrapper = Infomap() for (n1, n2) in graph.edges(): infomapWrapper.addLink(node2i[n1], node2i[n2]) infomapWrapper.run() to_return_temp = infomapWrapper.getModules() to_return = {} for n, c in to_return_temp.items(): to_return[node2i.inv[n]] = c return to_return
def eval_map_equation(g, partitionobj): """Return the map equation score for a given partition.""" g1 = nx.convert_node_labels_to_integers(g, label_attribute="name") scoremapeq = 0 partition = partitionobj.communities part = dict() for i in range(len(partition)): for ind in partition[i]: part[ind] = i im = Infomap("--silent --no-infomap") # Don't change the partition. for e in g1.edges(): im.addLink(e[0], e[1]) im.initial_partition = part im.run() scoremapeq = im.codelength return scoremapeq
def info_map(mob_date, od_df=od_df, include_internal=False, silent=True): date = mob_date['date'].unique()[0] mob_date = od_df(mob_date).reset_index(drop=True) if not include_internal: mob_date = mob_date.loc[mob_date['from'] != mob_date['to'], :] mob_date = mob_date.reset_index(drop=True) #quadkeys exceed C max values - map nodes to an int value unique_qks = np.unique(mob_date['from'].astype('int').tolist() + mob_date['to'].astype('int').tolist()) qk_ref = {} for i, qk in enumerate(unique_qks): qk_ref[qk] = i qk_ref_inv = {v: k for k, v in qk_ref.items()} if silent: im_str = "--two-level --directed --seed 1000 --silent" else: im_str = "--two-level --directed --seed 1000" im = Infomap(im_str) for i in range(0, len(mob_date['to'])): row = mob_date.loc[i, :] im.addLink(qk_ref[int(row['from'])], qk_ref[int(row['to'])], row['weight']) im.run() clusters = [] for node in im.tree: if node.is_leaf: clusters.append({ 'date': date, 'quadkey': qk_ref_inv[node.node_id], 'cluster': node.module_id, 'flow': node.flow }) return (pd.DataFrame(clusters))
def findCommunitiesInfomap(G, v_mentions=False): im = Infomap("--two-level --flow-model directed") if v_mentions: read_mentions(DATA_PATH, G) return 0 user_node = dict() node_user = [] l = 0 for i, n in enumerate(G.nodes): l = i user_node[n] = l node_user.append(n) last_l = l if not v_mentions: for e in G.edges: im.addLink(user_node[e[0]], user_node[e[1]]) else: for k, v in x_mentions.items(): for m in v: if not user_node.get(m): user_node[m] = l + 1 node_user.append(m) im.addLink(user_node[k], user_node[m]) im.run() print("Found %d top modules with codelength: %f" % (im.numTopModules(), im.codelength)) communities = {} for node_id, module_id in im.modules: if not node_id > last_l: communities[node_user[node_id]] = module_id nx.set_node_attributes(G, communities, 'community') return im.numTopModules()
class MyInfomap: def __init__(self): self.handler = Infomap("--two-level") def add_network_edge(self, first_id, second_id, weight=1.00): self.handler.addLink(first_id, second_id, weight) def detect_communities(self): self.handler.run() communities = {} for node in self.handler.iterTree(): if node.isLeaf(): if node.moduleIndex() in communities: communities[node.moduleIndex()].append(node.physicalId) else: communities[node.moduleIndex()] = [node.physicalId] return communities
def INFOMAP(g, weights=None): X = Infomap("--two-level") if 'weight' not in g.es.attribute_names(): g.es['weight'] = [1.] * g.vcount() D = dict(zip(g.get_edgelist(), g.es['weight'])) if not weights else dict( zip(g.get_edgelist(), weights)) F = Infomap("--two-level") for a, b in D: F.addLink(a, b, D[(a, b)]) F.run() T = F.tree M = {node.physIndex: node.moduleIndex() for node in T.leafIter()} L = max(M.values()) isolated = set(range(g.vcount())).difference(M.keys()) i = 1 for n in isolated: M[n] = L + i i += 1 g.vs['c'] = [M[i] for i in xrange(g.vcount())] return igraph.VertexClustering.FromAttribute(g, 'c')
def communities_im(mob, silent=True): mob = od_df(mob).reset_index(drop=True) #quadkeys exceed C max values - map nodes to an int value unique_qks = np.unique(mob['from'].astype('int').tolist() + mob['to'].astype('int').tolist()) qk_ref = dict(zip(unique_qks, range(0, len(unique_qks)))) qk_ref_i = {v: k for k, v in qk_ref.items()} im_str = "--two-level --directed --seed 1000" if silent: im_str = im_str + " --silent" im = Infomap(im_str) for i in range(0, len(mob['to'])): row = mob.loc[i, :] im.addLink(qk_ref[int(row['from'])], qk_ref[int(row['to'])], row['weight']) im.run() clusters = [] for node in im.tree: if node.is_leaf: clusters.append({ 'quadkey': qk_ref_i[node.node_id], 'cluster': node.module_id, 'flow': node.flow }) return (pd.DataFrame(clusters))
def _apply_infomap(self): """Partition network with infomap algorithm Annotates node with community_id and returns number of communities found""" infomapWrapper = Infomap("--two-level --directed") print("Building Infomap network from a NetworkX graph...") for e in self.graph.edges(): infomapWrapper.addLink(*e) print("Find communities with Infomap...") infomapWrapper.run() print("Found %d top modules with codelength: %f" % (infomapWrapper.numTopModules(), infomapWrapper.codelength())) communities = {} for node in infomapWrapper.iterTree(): if node.isLeaf(): communities[node.physicalId] = node.moduleIndex() nx.set_node_attributes(self.graph, name='community', values=communities) self.graph = nx.relabel.relabel_nodes(self.graph, self.catalog, copy=True) self.num_modules = infomapWrapper.numTopModules() self.community_labels = set( nx.get_node_attributes(self.graph, "community").values())
runtimes['louvain-noisy'] = runtime ########################################################### ########################################################### # Method: Infomap ########################################################### # Raw time_s = time.time() im = Infomap() for node in G.nodes: im.add_node(node) for edge in G.edges: im.add_link(edge[0], edge[1]) im.add_link(edge[1], edge[0]) # Run the Infomap search algorithm to find optimal modules im.run() # print(f"Found {im.num_top_modules} modules with Infomap") est_idx = np.zeros((num_nodes, )) for node in im.tree: if node.is_leaf: est_idx[node.node_id] = node.module_id runtime = time.time() - time_s mutual_info = metrics.adjusted_mutual_info_score(database['labels'], est_idx) scores['infomap-raw'] = mutual_info runtimes['infomap-raw'] = runtime # Noisy print('---Running Infomap with noisy data---\n') time_s = time.time() im = Infomap()
import networkx as nx import numpy as np from sklearn.model_selection import ParameterGrid from infomap import Infomap im = Infomap(two_level=True, silent=True, num_trials=10) im.add_networkx_graph(nx.karate_club_graph()) grid = ParameterGrid({"markov_time": np.linspace(0.8, 2, 5)}) for params in grid: im.run(**params) print( f"markov_time={params['markov_time']:0.1f}: number of modules: {im.num_top_modules}" )
import pathlib from infomap import Infomap im = Infomap(silent=True) name = "ninetriangles" filename = f"../networks/{name}.net" # You can read a network with the method read_file, # which by default will accumulate to existing network data im.read_file(filename, accumulate=False) im.run(num_trials=5) print( f"Found {im.max_depth} levels with {im.num_leaf_modules} leaf modules in {im.num_top_modules} top modules and codelength: {im.codelength:.8f} bits" ) print(f"All codelengths: {im.codelengths}") print("Tree:\n# path node_id module_id flow") for node in im.nodes: print(f"{node.path} {node.node_id} {node.module_id} {node.flow:.8f}") for module_level in range(1, im.max_depth): print( f"Modules at level {module_level}: {tuple(im.get_modules(module_level).values())}" ) print("\nModules at all levels:") for node_id, modules in im.get_multilevel_modules().items():
def infomap_communities(node_idx_neighbors, node_idx_distances, counts, weight_exponent, distance_metric, verbose): """Two-level partition of single-layer network with Infomap. Parameters ---------- node_index_neighbors : array of arrays Example: `array([array([0]), array([1]), array([2]), ..., array([9997]), array([9998]), array([9999])], dtype=object)`. Returns ------- out : dict (node-community hash map). """ # Tracking if verbose: progress = tqdm else: progress = pass_func # Initiate two-level Infomap network = Infomap("--two-level") # Add nodes (and reindex nodes because Infomap wants ranked indices) if verbose: print(" ... adding nodes:") name_map, name_map_inverse = {}, {} singleton_nodes = [] infomap_idx = 0 for n, neighbors in progress(enumerate(node_idx_neighbors), total=len(node_idx_neighbors)): if len(neighbors) > 1: network.addNode(infomap_idx) name_map_inverse[infomap_idx] = n name_map[n] = infomap_idx infomap_idx += 1 else: singleton_nodes.append(n) # if verbose: # print(f" --> added {len(name_map)} nodes (found {len(singleton_nodes)} singleton nodes)") # Raise exception if network is too sparse. if len(name_map) == 0: raise Exception( "No edges added because `r2` < the smallest distance between any two points." ) # Add links if verbose: n_edges = 0 print(" ... adding edges") if node_idx_distances is None: for node, neighbors in progress(enumerate(node_idx_neighbors), total=len(node_idx_neighbors)): for neighbor in neighbors[neighbors > node]: network.addLink(name_map[node], name_map[neighbor], max(counts[node], counts[neighbor])) if verbose: n_edges += 1 else: for node, (neighbors, distances) in progress( enumerate(zip(node_idx_neighbors, node_idx_distances)), total=len(node_idx_neighbors)): for neighbor, distance in zip(neighbors[neighbors > node], distances[neighbors > node]): if distance_metric == "haversine": distance *= 6371000 network.addLink( name_map[node], name_map[neighbor], max(counts[node], counts[neighbor]) * distance**(-weight_exponent)) if verbose: n_edges += 1 # if verbose: # print(f" --> added {n_edges} edges") # Run infomap # if verbose: print(" ... running Infomap...", end=" ") network.run() # if verbose: print("done") # Convert to node-community dict format partition = dict([(name_map_inverse[infomap_idx], module) for infomap_idx, module in network.modules]) # if verbose: # print(f"Found {len(set(partition.values()))-1} stop locations") return partition, singleton_nodes
# Only two modules, splitting the chain in the middle partition2 = { 0: 0, 1: 0, 2: 0, 3: 0, 4: 2, 5: 2, 6: 2, 7: 2, } # Set initial partition on the Infomap instance to keep it during multiple runs im.initial_partition = partition1 im.run(no_infomap=True) print( f"Partition one with {im.num_top_modules} modules -> codelength: {im.codelength:.8f} bits" ) # Set initial partition as run parameter to only use it for this run (will be restored to partition1 after) im.run(initial_partition=partition2, no_infomap=True) print( f"Partition two with {im.num_top_modules} modules -> codelength: {im.codelength:.8f} bits" ) # Output: # Partition one with 3 modules -> codelength: 2.5555555555555554
from infomap import Infomap im = Infomap(two_level=True, silent=True) # Add weight as an optional third argument im.add_link(1, 2) im.add_link(1, 3) im.add_link(2, 3) im.add_link(3, 4) im.add_link(4, 5) im.add_link(4, 6) im.add_link(5, 6) im.run() print( f"Found {im.num_top_modules} modules with codelength {im.codelength:.8f} bits" ) modules = im.get_modules() print("Modify the network and test partition...") # Do some modification to the network im.add_link(1, 5) # Note that removing links will not remove nodes if they become unconnected im.remove_link(5, 6) # Run again with the optimal partition from the original network as initial solution # Set no_infomap to skip optimization and just calculate the codelength im.run(initial_partition=modules, no_infomap=True)