def SimulateSbm(sbm_data, num_vertices, num_edges, pi, prop_mat, out_degs=None): """Generates a stochastic block model, storing data in sbm_data.graph. This function uses graph_tool.generate_sbm. Refer to that documentation for more information on the model and parameters. Args: sbm_data: StochasticBlockModel dataclass to store result data. num_vertices: (int) number of nodes in the graph. num_edges: (int) expected number of edges in the graph. pi: interable of non-zero community size proportions. Must sum to 1.0. prop_mat: square, symmetric matrix of community edge count rates. out_degs: Out-degree propensity for each node. If not provided, a constant value will be used. Note that the values will be normalized inside each group, if they are not already so. Returns: (none) """ if np.sum(pi) != 1.0: raise ValueError("entries of pi must sum to 1.0") if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi): raise ValueError("prop_mat must be k x k where k = len(pi)") sbm_data.graph_memberships = _GenerateNodeMemberships(num_vertices, pi) edge_counts = _ComputeExpectedEdgeCounts(num_edges, num_vertices, pi, prop_mat) sbm_data.graph = generation.generate_sbm(sbm_data.graph_memberships, edge_counts, out_degs) stats.remove_self_loops(sbm_data.graph) stats.remove_parallel_edges(sbm_data.graph) sbm_data.graph.reindex_edges()
def command_draw(self): """ Draws the current resulting paths as a directed graph. Usage: draw """ if self.last_res is None: return "No saved paths. Do a search before you try to draw them." edge_list = [[ t.replace('_', ' ') for t in self.db.get_titles_of_ids((val, path[ind + 1])) ] for path in self.last_res for ind, val in enumerate(path[:-1])] graph = gt.Graph() strings = graph.add_edge_list(edge_list, string_vals=True, hashed=True) stats.remove_parallel_edges(graph) fill_color = graph.new_vertex_property('vector<float>', val=[0, 0, 0.640625, 0.9]) fill_color[graph.vertex(0)] = [0, 0.640625, 0, 0.9] fill_color[graph.vertex(len(self.last_res[0]) - 1)] = [0.640625, 0, 0, 0.9] draw.interactive_window( graph, vertex_fill_color=fill_color, vertex_text=strings, vertex_text_position=graph.new_vertex_property('float', val=0), vertex_anchor=graph.new_vertex_property('int', val=0), geometry=(1600, 1200), vertex_font_size=graph.new_vertex_property('float', val=20))
def load_GT_graph(graphExample, gcc=False, removeSL=False, removePE=False): ''' Input: - graphExample, graph example from Graph-tool collections (e.g., 'cond-mat-2003', 'adjnoun' 'karate' 'netscience') or a graphfile in .gml format - gcc = True if only the giant connected component should be returned - removeSL = True if any self-loops must be removed - removePE = True if any parallel-edge must be removed Output: the corresponding graph_tool graph object ''' if graphExample[-4:] == ".gml": g = load_graph(graphExample) else: g = collection.data[graphExample] if g.is_directed: g.set_directed(False) # g = Graph(g, directed=False) if removePE: gtStats.remove_parallel_edges(g) if removeSL: gtStats.remove_self_loops(g) if gcc: l = topology.label_largest_component( g) #Keep Largest Connected Component g.set_vertex_filter(l) #g = GraphView(g, vfilt=l) g.purge_vertices() return g
def generuj(n, rand_edges, num_colors): if rand_edges < 3: rand_edges = 3 g = random_graph(n, (lambda: (randint(0, rand_edges), randint(1, rand_edges))), parallel_edges=False) g.set_directed(False) remove_parallel_edges(g) g.vertex_properties['liczba_kolorow'] = g.new_vertex_property('int') for v in g.vertices(): g.vertex_properties['liczba_kolorow'][v] = randint(1, num_colors + 1) return g
def get(self, args): from depth_first_searcher import dfs_search_with_limit root = int(args["root"]) limit = int(args["limit"]) vertices = dfs_search_with_limit(graph, graph.vertex(root), limit) v_filter = graph.new_vertex_property('bool') for v in vertices: v_filter[v] = True subgraph = GraphView(graph, v_filter) from graph_tool.stats import remove_parallel_edges remove_parallel_edges(subgraph) subgraph = self.set_properties(subgraph) from graph_json_builder import create_json_graph return create_json_graph(subgraph)
def load_graph(infile): inmatrix = np.loadtxt(infile, dtype=np.dtype('uint32'), delimiter=" ") numv = np.amax(inmatrix[:,0:2]) #print numv, inmatrix[:,0:2] g = Graph(directed=False) edge_weights = g.new_edge_property("double") g.edge_properties["weights"] = edge_weights vlist = list(g.add_vertex(numv)) for i in inmatrix: edge = g.add_edge(vlist[i[0]-1], vlist[i[1]-1]) # need to convert from 1-based index in file to 0-based edge_weights[edge] = i[2] remove_parallel_edges(g) return g
def make_simple_graph(g, undirected=True, gcc=True): ''' Returns input graph -g- in a version without parallel edges or self-loops. If undirected = True, returned graph is also undirected. If gcc = True, returned graph is giant connected component of g. ''' if undirected and g.is_directed: g.set_directed(False) gtStats.remove_self_loops(g) gtStats.remove_parallel_edges(g) if gcc: l = topology.label_largest_component( g) # Keep Largest Connected Component. print "Nodes in largest connected component: " + str(np.sum(l.a)) g.set_vertex_filter(l) g.purge_vertices() return g
def loadGraphWithAnnotations(graphFile): ''' Used to read the graphs provides by RoleSim people, regarding scientific collaborations and the g/h index. ''' g = Graph(directed=False) with open(graphFile, "r") as inF: num_nodes = int(inF.readline().split()[1]) g.add_vertex(num_nodes) g_names = g.new_vertex_property("string") g_H_ind = g.new_vertex_property("int") g_G_ind = g.new_vertex_property("int") for i, line in enumerate(inF): # Read Meta-Data of Nodes if rstrip(line) == "*Edges": break contents = rstrip(line).split("\t") gID, name, gIndex, hIndex = contents[0], contents[1], int( contents[2]), int(contents[3]) assert (gID == str(i)) # print gID, name, gIndex, hIndex g_names[g.vertex(i)] = name g_H_ind.a[i] = gIndex g_G_ind.a[i] = hIndex for i, line in enumerate(inF): # Read Edges tokens = line.split() fromE, toE = int(tokens[0]), int(tokens[1]) g.add_edge(fromE, toE) g.vp["names"] = g_names g.vp["h-Index"] = g_H_ind g.vp["g-Index"] = g_G_ind gtStats.remove_parallel_edges(g) gtStats.remove_self_loops(g) l = topology.label_largest_component(g) #Keep Largest Connected Component g.set_vertex_filter(l) g.purge_vertices() return g
def f_centralization(D, stats, options={'features': []}): """""" if not 'centralization' in options['features']: return D_copied = D.copy() D = None remove_parallel_edges(D_copied) degree_list = D_copied.degree_property_map('total').a max_degree = degree_list.max() stats['centralization_degree'] = float( (max_degree - degree_list).sum()) / ((degree_list.size - 1) * (degree_list.size - 2)) # stats['centralization_in_degree'] = (v_max_in[0]-(D.get_in_degrees( D.get_vertices() ))).sum() / ( ( num_vertices-1 )*(num_vertices-2)) # stats['centralization_out_degree'] = (v_max_out[0]-(D.get_out_degrees( D.get_vertices() ))).sum() / ( ( num_vertices-1 )*(num_vertices-2)) log.debug('done centrality measures')
def __init__(self, nodes=0, copy_graph=None, weighted=True, directed=True, **kwargs): ''' @todo: document that see :class:`gt.Graph`'s constructor ''' self._nattr = _GtNProperty(self) self._eattr = _GtEProperty(self) self._edges_deleted = False g = copy_graph.graph if copy_graph is not None else None if g is not None: from graph_tool import Graph as GtGraph from graph_tool.stats import remove_parallel_edges num_edges = copy_graph.edge_nb() if copy_graph._edges_deleted: # set edge filter for non-deleted edges eprop = g.new_edge_property("bool", vals=np.ones(num_edges, dtype=bool)) g.set_edge_filter(eprop) g = GtGraph(g, directed=g.is_directed(), prune=True) if not directed and g.is_directed(): g = g.copy() g.set_directed(False) remove_parallel_edges(g) elif directed and not g.is_directed(): g = g.copy() g.set_directed(True) self._from_library_graph(g, copy=True) # make edge id property map if "eid" in g.edge_properties: g.edge_properties["eid"].a = list(range(num_edges)) else: eids = self._graph.new_edge_property("int", vals=list( range(self._max_eid))) g.edge_properties["eid"] = eids self._max_eid = num_edges else: self._graph = nngt._config["graph"](directed=directed) if nodes: self._graph.add_vertex(nodes) # make edge id property map self._max_eid = 0 eids = self._graph.new_edge_property("int") self._graph.edge_properties["eid"] = eids
def motif_operation(mid, cnt_mid): # ''' The steps of computing the exposure nodes are as follows: 1. For each pair of intervals of a cascde [I-1, I] for I \in [1, I_C], form the cascade + historical network N_I. 2. Compute the network motifs of size 3 exhibited in N_I. 3. For each node $u$ activated in interval I, see whether $u$ was a part of any of the instances belonging to the 6 motif patterns mentioned in the paper such that $u$ had participated in the those instances after the other two nodes, one of which must be the source 4. Add the third node in those selected instances barring the parent and $u$ (which together form the 3-motif instance) in the exposure set of $u$ for the cascade C. 5. This step imposes the AND gating constraint to remove nodes that violate the boolean TRUE threshold constraint. ''' # 1. cascade_set = dataDf[dataDf['mid'] == mid] print("Cascade : ", cnt_mid, " and reshare size: ", len(cascade_set)) numIntervals = np.max(np.array(list(cascade_set['interval_2:']))) last_time = 0 inf_nodes = { } # stores the influential nodes apart from parent for each node retweet inf_edges = { } # stores the influential edges between nodes apart from parent for each node retweet motif_graph_patterns = [[] for _ in range(500)] for int_idx in range(1, numIntervals): # print("Interval: ", int_idx) ''' Operation 1. ''' cascade_intervalDf_prev = cascade_set[cascade_set['interval_1'] == int_idx - 1] cascade_intervalDf_curr = cascade_set[cascade_set['interval_1'] == int_idx] cascade_intervalDf = pd.concat( [cascade_intervalDf_prev, cascade_intervalDf_curr]) cascade_Df = cascade_intervalDf[cascade_intervalDf['edge_type'] == 'cascade'] historical_Df = cascade_intervalDf[cascade_intervalDf['edge_type'] == 'historical'] # Create the vertex time dictionary vertex_rtTime_dict = {} for i, r in cascade_intervalDf.iterrows(): if r['edge_type'] == 'historical': continue src = r['source'] tgt = r['target'] vertex_rtTime_dict[tgt] = r['retweet_time'] if src not in vertex_rtTime_dict: vertex_rtTime_dict[src] = last_time last_time = r['retweet_time'] # Store the cascade edges edges_cascade = [] edges_historical = [] for i, r in cascade_intervalDf.iterrows(): src = r['source'] tgt = r['target'] if r['edge_type'] == 'cascade': edges_cascade.append((src, tgt)) else: edges_historical.append((src, tgt)) ''' Operation 2. ''' cascade_graph = gt.Graph(directed=True) node_cascades_diff_map = {} cnt_nodes = 0 cascade_vertices = cascade_graph.new_vertex_property("string") cascade_edge_prop = cascade_graph.new_edge_property("int") cascade_map_write_file = {} # Add the cascade edges # 0 - Cascade edges # 1 - Diffusion edges for i, r in cascade_Df.iterrows(): src = r['source'] tgt = r['target'] if src not in node_cascades_diff_map: node_cascades_diff_map[ src] = cnt_nodes # map from user ID to graphnode ID v1 = cascade_graph.add_vertex() cascade_vertices[v1] = src # map from graphnode ID to user ID cascade_map_write_file[cnt_nodes] = src cnt_nodes += 1 else: v1 = cascade_graph.vertex(node_cascades_diff_map[src]) if tgt not in node_cascades_diff_map: node_cascades_diff_map[ tgt] = cnt_nodes # map from user ID to graphnode ID v2 = cascade_graph.add_vertex() cascade_vertices[v2] = tgt # map from graphnode ID to user ID cascade_map_write_file[cnt_nodes] = tgt cnt_nodes += 1 else: v2 = cascade_graph.vertex(node_cascades_diff_map[tgt]) if cascade_graph.edge(v1, v2): continue else: e = cascade_graph.add_edge(v1, v2) cascade_edge_prop[e] = 0 gts.remove_parallel_edges(cascade_graph) # Add the historical diffusion edges (even if there already exists a cascade edge, but only once) edges_seen = [] for i, r in historical_Df.iterrows(): src = r['source'] tgt = r['target'] v1 = node_cascades_diff_map[src] v2 = node_cascades_diff_map[tgt] if (v1, v2) in edges_seen: continue edges_seen.append((v1, v2)) e = cascade_graph.add_edge(v1, v2) cascade_edge_prop[e] = 1 gts.remove_self_loops(cascade_graph) # gts.remove_parallel_edges(cascade_graph) '''' Operation 3. ''' # FINDING THE MOTIFS IN THE CASCADE GRAPH + DIFFUSION NETWORK - SIZE 3 motifs_graph, motifs_count, vertex_maps = \ gt.clustering.motifs(cascade_graph, 3, return_maps=True) # Store the motif patterns interval wise for retrieval later for idx_pat in range(len(motifs_graph)): motif_graph_patterns[int_idx - 1].append(motifs_graph[idx_pat]) '''' Operation 4. ''' # Find the influential nodes for each node in the retweet cascade for the current interval only - NOT the previous interval for i, r in cascade_intervalDf_curr.iterrows(): src = r['source'] tgt = r['target'] if tgt in inf_nodes: continue # extract the graph node IDs src_vert = node_cascades_diff_map[src] tgt_vert = node_cascades_diff_map[tgt] # extract the vertex timestamps src_rtTime = vertex_rtTime_dict[src] tgt_rtTime = vertex_rtTime_dict[tgt] # only consider the cascade retweets for (src, tgt) pair edge_type = r['edge_type'] if edge_type == 'historical': continue # find the motifs of particular patterns attached to that pair of src and tgt # Patterns - [M4, M7, M16, M17, M23, M25] # Patterns handcoded - can be automated into lists or arrays for more efficiency graph_pat_act_M4 = motif_patterns_dict['M4'] graph_pat_act_M7 = motif_patterns_dict['M7'] graph_pat_act_M16 = motif_patterns_dict['M16'] graph_pat_act_M23 = motif_patterns_dict['M23'] graph_pat_act_M25 = motif_patterns_dict['M25'] graph_pat_act_M31 = motif_patterns_dict['M31'] # Extract the motif instances belonging to this pattern for idx_map in range(len(motifs_graph)): graph_pat_curr = motifs_graph[idx_map] # check if the instance belongs to any of these patterns if (not gtt.isomorphism(graph_pat_act_M4, graph_pat_curr) ) and (not gtt.isomorphism(graph_pat_act_M7, graph_pat_curr) ) \ and (not gtt.isomorphism(graph_pat_act_M16, graph_pat_curr) ) and (not gtt.isomorphism(graph_pat_act_M23, graph_pat_curr) ) \ and (not gtt.isomorphism(graph_pat_act_M25, graph_pat_curr)) and (not gtt.isomorphism(graph_pat_act_M31, graph_pat_curr) ): continue # for M in motif_patterns_dict: # if gtt.isomorphism(motif_patterns_dict[M], graph_pat_curr): # print(M, motifs_count[idx_map]) # return # 1st constraint: Traverse through all the motif instances of this pattern that only contain the (src, tgt) cascade edge vMaps = vertex_maps[idx_map] # print(len(vMaps)) cnt_maps = 0 for vertices in vMaps: # print('hello....') # Cond. 1: the source and target should be in the motif instance vertex_list = list(vertices.a) if src_vert not in vertex_list or tgt_vert not in vertex_list: continue # print('hello1') # Find the non-source and non-target vertex for v in vertex_list: if v != src_vert and v != tgt_vert: third_vertex = cascade_vertices[ v] # this is the potential non-parent exposure node to target node break # print('hello2') # Cond. 2: the target vertex should have retweeted last among all the motif vertices third_rtTime = vertex_rtTime_dict[third_vertex] max_time = max([tgt_rtTime, src_rtTime, third_rtTime]) if max_time != tgt_rtTime: continue # For different motif patterns, need to check different types of motif edges - this is difficult !! # print(tgt, third_vertex) if tgt not in inf_nodes: inf_nodes[tgt] = [] inf_edges[tgt] = [] inf_nodes[tgt].append(third_vertex) inf_nodes[tgt] = list(set(inf_nodes[tgt])) # if (third_vertex, tgt) in edges_cas_curr: # inf_edges[tgt].append((third_vertex, tgt, 'cascade')) # elif (third_vertex, tgt) in edges_hist_curr: # inf_edges[tgt].append((third_vertex, tgt, 'historical')) ''' Operation 5. ''' thresh = 5 * 0.00001 for node in inf_nodes: time_dict = {} exposure_nodes = inf_nodes[node] for e in exposure_nodes: time_dict[e] = vertex_rtTime_dict[e] sorted_dict = sorted(time_dict.items()) product = np.prod(np.array(list(sorted_dict.values()))) val = list(sorted_dict.values()) while product < thresh: val = val[1:] inf_nodes[node] = np.find(sorted_dict, val) # Create a dataframe with the rows as original cascade_Df = cascade_set[cascade_set['edge_type'] == 'cascade'] inf_nodes_df = [] count_row = 0 for idx, row in cascade_Df.iterrows(): tgt = row['target'] if tgt not in inf_nodes: inf_nodes_df.append([]) else: inf_nodes_df.append(inf_nodes[tgt]) count_row += 1 cascade_Df['exposureNodes'] = inf_nodes_df return cascade_Df