def split_gt(): g = GTGraph() g.add_edge_list(adjacency) component_labels = label_components(g, directed=False)[0].a components = group(component_labels) result = mesh.submesh(components, only_watertight=only_watertight) return result
def test_fill_missing_time(): """simple chain graph test """ g = Graph(directed=False) g.add_vertex(4) g.add_edge_list([(0, 1), (1, 2), (2, 3)]) t = GraphView(g, directed=True) efilt = t.new_edge_property('bool') efilt.a = True efilt[t.edge(2, 3)] = False t.set_edge_filter(efilt) vfilt = t.new_vertex_property('bool') vfilt.a = True vfilt[3] = False t.set_vertex_filter(vfilt) root = 0 obs_nodes = {0, 2} infection_times = [0, 1.5, 3, -1] pt = fill_missing_time(g, t, root, obs_nodes, infection_times, debug=False) for i in range(4): assert pt[i] == infection_times[i]
def components_graphtool(): g = GTGraph() # make sure all the nodes are in the graph if min_len <= 1: g.add_vertex(node_count) g.add_edge_list(edges) component_labels = label_components(g, directed=False)[0].a components = grouping.group(component_labels, min_len=min_len) return components
def split_gt(): g = GTGraph() if not only_watertight: # same as above, for single triangles with no adjacency g.add_vertex(len(mesh.faces)) g.add_edge_list(adjacency) component_labels = label_components(g, directed=False)[0].a components = group(component_labels) result = mesh.submesh(components, only_watertight=only_watertight) return result
def shortest_path_cover_logn_apx(g: gt.Graph, weight: gt.EdgePropertyMap): started_with_directed = g.is_directed() if not g.is_directed(): reversed_edges = np.fliplr(g.get_edges()) g.set_directed(True) g.add_edge_list(reversed_edges) weight.a[-reversed_edges.shape[0]:] = weight.a[:reversed_edges. shape[0]] if weight.value_type() not in [ "bool", "int", "int16_t", "int32_t", "int64_t" ]: #min = np.min(weight.a) #min_second = np.min(weight.a[weight.a > min]) eps = 1 #min_second - min scaled_weight = (np.ceil(weight.a / eps) * (g.num_vertices() + 1)).astype(np.int) # ints >= 1 else: scaled_weight = weight.a * (g.num_vertices() + 1) summed_edge_weight = np.sum(scaled_weight) adjusted_weight = g.new_edge_property("long", vals=scaled_weight - 1) paths = [] covered_vertices = set() while len(covered_vertices) != g.num_vertices(): curr_paths = shortest_path_visiting_most_nodes(g, adjusted_weight, covered_vertices, summed_edge_weight) for path in curr_paths: paths.append(path) #if len(path) <= 2 switch to fast mode and just add single edges/vertices until done. path_vertices = set(path) for v in path_vertices.difference(covered_vertices): for w in g.get_in_neighbors(v): adjusted_weight[g.edge(w, v)] += 1 #.a[list()] -= 1 if adjusted_weight[g.edge( w, v)] % (g.num_vertices() + 1) != 0: exit(5) new_covered = path_vertices.difference(covered_vertices) covered_vertices = covered_vertices.union(path_vertices) print(len(new_covered), len(path), len(covered_vertices), path) if not started_with_directed: g.set_directed(False) for e in reversed_edges: g.remove_edge(g.edge(e[0], e[1])) return paths
def tree1(): g = Graph(directed=True) g.add_vertex(5) # one remaining singleton g.add_edge_list([(0, 1), (1, 2), (1, 3)]) # to test 4 is not included vfilt = g.new_vertex_property('bool') vfilt.set_value(True) vfilt[4] = False g.set_vertex_filter(vfilt) return g
def graph(self): try: from graph_tool import Graph except ImportError: return None g = Graph() g.add_vertex(len(self.nodes)) g.add_edge_list([(e.src_id, e.dest_id) for e in self.edges]) return g
def co_graph_directed(): '''co_graph_directed ''' g = Graph(directed=True) g.add_vertex(2) edges = [(0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1)] g.add_edge_list(edges) o = g.new_vertex_property('int') o.a = np.array([3, 4, 2]) co = g.new_edge_property('int') co.a = np.array([2, 2, 1, 1, 2, 2]) return g, o, co
def erdos_renyi_graph(n, e, directed=False, gcc=True): g = Graph(directed=directed) g.add_vertex(n) rint = np.random.randint edge_list = [[x, y] for x, y in zip(rint(0, n, size=e), rint(0, n, size=e))] g.add_edge_list(edge_list) random_rewire(g, model="erdos") g = make_simple_graph(g, undirected=1 - directed, gcc=gcc) return g
def gen_er(dicProperties): np.random.seed() # initialize graph graphER = Graph() nNodes = 0 nEdges = 0 rDens = 0.0 if "Nodes" in dicProperties.keys(): nNodes = dicProperties["Nodes"] graphER.add_vertex(nNodes) if "Edges" in dicProperties.keys(): nEdges = dicProperties["Edges"] rDens = nEdges / float(nNodes**2) dicProperties["Density"] = rDens else: rDens = dicProperties["Density"] nEdges = int(np.floor(rDens*nNodes**2)) dicProperties["Edges"] = nEdges else: nEdges = dicProperties["Edges"] rDens = dicProperties["Density"] nNodes = int(np.floor(np.sqrt(nEdges/rDens))) graphER.add_vertex(nNodes) dicProperties["Nodes"] = nNodes # generate edges numTest,numCurrentEdges = 0,0 while numCurrentEdges != nEdges and numTest < n_MAXTESTS: lstEdges = np.random.randint(0,nNodes,(nEdges-numCurrentEdges,2)) graphER.add_edge_list(lstEdges) # remove loops and duplicate edges remove_self_loops(graphER) remove_parallel_edges(graphER) numCurrentEdges = graphER.num_edges() numTest += 1 graphER.reindex_edges() nEdges = graphER.num_edges() rDens = nEdges / float(nNodes**2) # generate types rInhibFrac = dicProperties["InhibFrac"] lstTypesGen = np.random.uniform(0,1,nEdges) lstTypeLimit = np.full(nEdges,rInhibFrac) lstIsExcitatory = np.greater(lstTypesGen,lstTypeLimit) nExc = np.count_nonzero(lstIsExcitatory) epropType = graphER.new_edge_property("int",np.multiply(2,lstIsExcitatory)-np.repeat(1,nEdges)) # excitatory (True) or inhibitory (False) graphER.edge_properties["type"] = epropType # and weights if dicProperties["Weighted"]: lstWeights = dicGenWeights[dicProperties["Distribution"]](graphER,dicProperties,nEdges,nExc) # generate the weights epropW = graphER.new_edge_property("double",lstWeights) # crée la propriété pour stocker les poids graphER.edge_properties["weight"] = epropW return graphER
def facets_gt(mesh): ''' Returns lists of facets of a mesh. Facets are defined as groups of faces which are both adjacent and parallel facets returned reference indices in mesh.faces If return_area is True, both the list of facets and their area are returned. ''' face_idx = mesh.face_adjacency() normal_pairs = mesh.face_normals[[face_idx]] parallel = np.abs(np.sum(normal_pairs[:,0,:] * normal_pairs[:,1,:], axis=1) - 1) < TOL_PLANAR graph_parallel = GTGraph() graph_parallel.add_edge_list(face_idx[parallel]) connected = label_components(graph_parallel, directed=False)[0].a facets_idx = group(connected, min_length=2) return facets_idx
class MinGraphBuilder: def __init__(self): self.graph = Graph(directed=False) self.codes = [] self.labels = [] self.sources = [] def add_nodes(self, df, ns): n = len(df) _log.info('adding %d nodes to graph', n) start = self.graph.num_vertices() vs = self.graph.add_vertex(n) end = self.graph.num_vertices() assert end - start == n nodes = pd.Series(np.arange(start, end, dtype='i4'), index=df['id']) self.codes.append(df['id'].values + ns.offset) self.labels.append(df['id'].values) self.sources.append(np.full(n, ns.code, dtype='i2')) return nodes def add_edges(self, f, src, dst): _log.info('adding %d edges to graph', len(f)) edges = np.zeros((len(f), 2), dtype='i4') edges[:, 0] = src.loc[f.iloc[:, 0]] edges[:, 1] = dst.loc[f.iloc[:, 1]] self.graph.add_edge_list(edges) def finish(self): _log.info('setting code attributes') code_a = self.graph.new_vp('int64_t') code_a.a[:] = np.concatenate(self.codes) self.graph.vp['code'] = code_a _log.info('setting label attributes') label_a = self.graph.new_vp('int64_t') label_a.a[:] = np.concatenate(self.labels) self.graph.vp['label'] = label_a _log.info('setting source attributes') source_a = self.graph.new_vp('int16_t') source_a.a[:] = np.concatenate(self.sources) self.graph.vp['source'] = source_a return self.graph
def g(): g = Graph(directed=True) g.add_vertex(4) g.add_edge_list([(0, 1), (1, 0), (1, 3), (3, 1), (0, 2), (2, 0), (2, 3), (3, 2)]) weights = g.new_edge_property('float') weights[g.edge(0, 1)] = 0.9 weights[g.edge(1, 0)] = 0.7 weights[g.edge(1, 3)] = 0.8 weights[g.edge(3, 1)] = 0.2 weights[g.edge(2, 3)] = 0.4 weights[g.edge(3, 2)] = 0.3 weights[g.edge(0, 2)] = 0.1 weights[g.edge(2, 0)] = 0.4 g.edge_properties['weights'] = weights return g
def split_gt(mesh, check_watertight=True, only_count=False): g = GTGraph() g.add_edge_list(mesh.face_adjacency()) component_labels = label_components(g, directed=False)[0].a if check_watertight: degree = g.degree_property_map('total').a meshes = deque() components = group(component_labels) if only_count: return len(components) for i, current in enumerate(components): fill_holes = False if check_watertight: degree_3 = degree[current] == 3 degree_2 = degree[current] == 2 if not degree_3.all(): if np.logical_or(degree_3, degree_2).all(): fill_holes = True else: continue # these faces have the original vertex indices faces_original = mesh.faces[current] face_normals = mesh.face_normals[current] # we find the unique vertex indices, so we can reindex from zero unique_vert = np.unique(faces_original) vertices = mesh.vertices[unique_vert] replacement = np.zeros(unique_vert.max()+1, dtype=np.int) replacement[unique_vert] = np.arange(len(unique_vert)) faces = replacement[faces_original] new_mesh = mesh.__class__(faces = faces, face_normals = face_normals, vertices = vertices) new_meta = deepcopy(mesh.metadata) if 'name' in new_meta: new_meta['name'] = new_meta['name'] + '_' + str(i) new_mesh.metadata.update(new_meta) if fill_holes: try: new_mesh.fill_holes(raise_watertight=True) except MeshError: continue meshes.append(new_mesh) return list(meshes)
def components_graphtool(): """ Find connected components using graphtool """ g = GTGraph() # make sure all the nodes are in the graph g.add_vertex(node_count) # add the edge list g.add_edge_list(edges) labels = np.array(label_components(g, directed=False)[0].a, dtype=np.int64)[:node_count] # we have to remove results that contain nodes outside # of the specified node set and reindex contained = np.zeros(node_count, dtype=np.bool) contained[nodes] = True index = np.arange(node_count, dtype=np.int64)[contained] components = grouping.group(labels[contained], min_len=min_len) components = np.array([index[c] for c in components]) return components
def build_model(vkapi: vk.API, user_for_analyse: dict): friend_list, friend_links = __prepare_data(vkapi, user_for_analyse) graph = Graph(directed=False) vmap = graph.add_edge_list(friend_links.values, hashed=True) state = minimize_blockmodel_dl(graph) layout = sfdp_layout(graph, groups=state.b) state.draw(pos=layout, vertex_text=vmap, vertex_font_size=3, vertex_size=3, vertex_color=[128, 128, 128, 1], output_size=(2000, 2000), output="graph.svg") with open("graph.svg", 'r') as source: graph_image = source.read() os.remove("graph.svg") return graph_image
class GraphDataset: """ Class for managing datasets with graph data """ def __init__(self, name, edges, object_ids, weights, hidden_graph=None): """ Params: name (str): unique string to name this dataset (for pickling and unpickling) edges (numpy.ndarray): numpy array of shape [num_edges, 2] containing the indices of nodes in all edges objects (List[str]): string object ids for all nodes weights (numpy.ndarray): numpy array of shape [num_edges] containing edge weights hidden_graph (GraphDataset): Graph data that should be excluded but not considered as negative edges. (i.e. train edges should not be in eval dataset but they shouldn't be counted as negatives either) """ self.name = name self.edges = edges self.object_ids = np.asarray(object_ids) self.weights = weights self.hidden_graph = hidden_graph self.graph = Graph(directed=False) self.graph.add_vertex(len(object_ids)) edge_weights = [[edge[0], edge[1], weight] for edge, weight in zip(self.edges, self.weights)] self.weight_property = self.graph.new_edge_property("float") eprops = [self.weight_property] self.graph.add_edge_list(edge_weights, eprops=eprops) self.manifold_nns = None def gen_neighbor_data(self, verbose=True) -> Dict: """ Generates the graph data needed to run the cython iterator Returns a dict with the neighbor data which will have values - 'non_empty_vertices' the indices of vertices which have edges emanating from them - 'all_graph_neighbors' a list of lists of ints such that the list of edges emanating from the vertex with index non_empty_vertices[i] is stored in all_graph_neighbors[i] - 'all_graph_weights' a list of lists of ints such that all_graph_weights[i][j] represents the weight of the connection in all_graph_neighbors[i][j] - 'N' number of nodes in the graph Parameters: verbose (bool): should graph loading be printed out """ all_graph_neighbors = [] all_graph_weights = [] non_empty_vertices = [] empty_vertices = [] if verbose: iterator = tqdm(range(self.n_nodes()), desc="Generating Neighbor Data", dynamic_ncols=True) else: iterator = range(self.n_nodes()) for i in iterator: in_edges = self.graph.get_in_edges(i, [self.weight_property]) out_edges = self.graph.get_out_edges(i, [self.weight_property]) if in_edges.size + out_edges.size > 0: non_empty_vertices.append(i) if in_edges.size == 0: all_graph_neighbors.append(out_edges[:, 1].astype(np.int64)) all_graph_weights.append(out_edges[:, 2].astype(np.float32)) elif out_edges.size == 0: all_graph_neighbors.append(in_edges[:, 1].astype(np.int64)) all_graph_weights.append(in_edges[:, 2].astype(np.float32)) else: all_graph_neighbors.append( np.concatenate([in_edges[:, 0], out_edges[:, 1]]).astype(np.int64)) all_graph_weights.append( np.concatenate([in_edges[:, 2], out_edges[:, 2]]).astype(np.float32)) else: empty_vertices.append(i) # graph_neighbors = np.concatenate(all_graph_neighbors) # graph_neighbor_weights = np.concatenate(all_graph_weights) non_empty_vertices = np.array(non_empty_vertices, dtype=np.int64) empty_vertices = np.array(empty_vertices, dtype=np.int64) return { "all_graph_neighbors": all_graph_neighbors, "all_graph_weights": all_graph_weights, "non_empty_vertices": non_empty_vertices, "empty_vertices": empty_vertices, "N": self.n_nodes() } def add_manifold_nns(self, graph_embedder: GraphEmbedder): manifold = graph_embedder.get_manifold() data_points = graph_embedder.retrieve_nodes(self.n_nodes()) self.manifold_nns = ManifoldNNS(data_points, manifold) def n_nodes(self) -> int: """ Returns the number of nodes in the graph """ return len(self.object_ids) def collapse_nodes(self, node_ids): all_new_edges = [] for node_id in tqdm(node_ids, desc="Collapsing Nodes", dynamic_ncols=True): in_edges = self.graph.get_in_edges(node_id, [self.weight_property]) out_edges = self.graph.get_out_edges(node_id, [self.weight_property]) neighbors = np.concatenate([out_edges[:, 1:3], in_edges[:, 0:3:2]]) if neighbors.shape[0] > 1: neighbor_combos = \ neighbors[comb_index(neighbors.shape[0], 2)] neighbor_combos = \ neighbor_combos.reshape(neighbor_combos.shape[0], 4) new_edges = np.zeros((neighbor_combos.shape[0], 3)) new_edges[:, :2] += neighbor_combos[:, 0:3:2] new_edges[:,2] += (neighbor_combos[:,1] + \ neighbor_combos[:,3])/4 all_new_edges.append(new_edges) self.graph.add_edge_list(np.concatenate(all_new_edges), eprops=[self.weight_property]) self.object_ids = np.delete(self.object_ids, np.array(node_ids)) self.graph.remove_vertex(node_ids) edges_weights = self.graph.get_edges(eprops=[self.weight_property]) edges = edges_weights[:, 0:2] weights = edges_weights[:, 2] self.edges = edges self.weights = weights def get_neighbor_iterator( self, graph_sampling_config: GraphSamplingConfig, data_fraction: float = 1, ) -> Iterator[GraphDataBatch]: """ Gets an efficient iterator of edge batches """ neighbor_data = load_or_gen(f"GraphDataset.{self.name}", self.gen_neighbor_data) if self.hidden_graph is None: # GraphDataBatchIterator is defined in cython with these arguments. # noinspection PyArgumentList iterator = GraphDataBatchIterator(neighbor_data, graph_sampling_config) iterator.data_fraction = data_fraction else: hidden_neighbor_data = load_or_gen( f"GraphDataset.{self.hidden_graph.name}", self.hidden_graph.gen_neighbor_data) # GraphDataBatchIterator is defined in cython with these arguments. # noinspection PyArgumentList iterator = GraphDataBatchIterator(neighbor_data, graph_sampling_config, hidden_neighbor_data) iterator.data_fraction = data_fraction if self.manifold_nns is not None: sampling_config = get_config().sampling _, nns = \ self.manifold_nns.knn_query_all(sampling_config.manifold_nn_k) all_manifold_neighbors = [ nns[i][1:].astype(np.int64) for i in range(self.n_nodes()) ] iterator.refresh_manifold_nn(all_manifold_neighbors) return iterator @classmethod def make_train_eval_split(cls, name, edges, object_ids, weights): """ Returns a tuple of a train eval split of the graph as defined in the data config. """ data_config = get_config().data np.random.seed(data_config.split_seed) if data_config.split_by_edges: # TODO Doesn't save to file in this mode shuffle_order = np.arange(edges.shape[0]) np.random.shuffle(shuffle_order) num_eval = floor(edges.shape[0] * data_config.split_size) eval_indices = shuffle_order[:num_eval] train_indices = shuffle_order[num_eval:] train_edges = edges[train_indices] train_weights = weights[train_indices] eval_edges = edges[eval_indices] eval_weights = weights[eval_indices] else: shuffle_order = np.arange(len(object_ids)) np.random.shuffle(shuffle_order) num_eval = floor(len(object_ids) * data_config.split_size) eval_indices = shuffle_order[:num_eval] test_set = data_config.generate_test_set if test_set: test_indices = shuffle_order[num_eval:2 * num_eval] train_indices = shuffle_order[2 * num_eval:] if test_set else \ shuffle_order[num_eval:] train_edges = [] eval_edges = [] train_weights = [] eval_weights = [] if test_set: test_edges = [] test_weights = [] for edge, weight in zip(edges, weights): if test_set and (edge[0] in test_indices or edge[1] in test_indices): test_edges.append(edge) test_weights.append(weight) elif edge[0] in eval_indices or edge[1] in eval_indices: eval_edges.append(edge) eval_weights.append(weight) else: train_edges.append(edge) train_weights.append(weight) if test_set: save_graph_data(test_edges, test_weights, object_ids, data_config.test_path) save_graph_data(train_edges, train_weights, object_ids, data_config.train_path) save_graph_data(eval_edges, eval_weights, object_ids, data_config.eval_path) train_edges = np.array(train_edges) eval_edges = np.array(eval_edges) train_weights = np.array(train_weights) eval_weights = np.array(eval_weights) train_data = GraphDataset(f"{name}_train", train_edges, object_ids, train_weights) eval_data = GraphDataset(f"{name}_eval", eval_edges, object_ids, eval_weights, hidden_graph=train_data) return train_data, eval_data
def facets_gt(): graph_parallel = GTGraph() graph_parallel.add_edge_list(face_idx[parallel]) connected = label_components(graph_parallel, directed=False)[0].a facets_idx = group(connected, min_len=2) return facets_idx
def g(): g = Graph(directed=False) g.add_vertex(4) g.add_edge_list([(0, 1), (1, 2), (0, 3)]) return g
# In[26]: all_nodes = set(df['u'].as_matrix()) | set(df['v'].as_matrix()) # In[27]: g.add_vertex(len(all_nodes)) # In[28]: edges = list(zip(df['u'].as_matrix(), df['v'].as_matrix())) # In[29]: g.add_edge_list(edges) # In[30]: # add edges_iter = list(g.edges()) for e in tqdm(edges_iter): u, v = int(e.source()), int(e.target()) if g.edge(v, u) is None: g.add_edge(v, u) # In[31]: weight = g.new_edge_property('float') weight.set_value(EPS)
def _load_graph(input_filename: str, part_num: Optional[int] = None, graph: Optional[Graph] = None) -> Graph: """Load the graph from a TSV file with standard format. Parameters ---------- input_filename : str input file name not including the .tsv extension part_num : int, optional specify which stage of the streaming graph to load graph : Graph, optional existing graph to add to. This is used when loading the streaming graphs one stage at a time. Note that the truth partition is loaded all together at once. Returns ------- graph : Graph the Graph object loaded or updated from file Notes ----- The standard tsv file has the form for each row: "from to [weight]" (tab delimited). Nodes are indexed from 0 to N-1. """ # read the entire graph CSV into rows of edges if part_num is None: edge_rows = pd.read_csv('{}.tsv'.format(input_filename), delimiter='\t', header=None).values else: edge_rows = pd.read_csv('{}_{}.tsv'.format(input_filename, part_num), delimiter='\t', header=None).values if graph is None: # no previously loaded streaming pieces N = edge_rows[:, 0:2].max() # number of nodes out_neighbors = [[] for i in range(N)] # type: List[np.ndarray[int]] in_neighbors = [[] for i in range(N)] # type: List[np.ndarray[int]] else: # add to previously loaded streaming pieces N = max(edge_rows[:, 0:2].max(), len(graph.out_neighbors)) # number of nodes out_neighbors = [ list(graph.out_neighbors[i]) for i in range(len(graph.out_neighbors)) ] out_neighbors.extend([[] for i in range(N - len(out_neighbors))]) in_neighbors = [ list(graph.in_neighbors[i]) for i in range(len(graph.in_neighbors)) ] in_neighbors.extend([[] for i in range(N - len(in_neighbors))]) weights_included = edge_rows.shape[1] == 3 # load edges to list of lists of out and in neighbors for i in range(edge_rows.shape[0]): if weights_included: edge_weight = edge_rows[i, 2] else: edge_weight = 1 # -1 on the node index since Python is 0-indexed and the standard graph TSV is 1-indexed out_neighbors[edge_rows[i, 0] - 1].append( [edge_rows[i, 1] - 1, edge_weight]) in_neighbors[edge_rows[i, 1] - 1].append( [edge_rows[i, 0] - 1, edge_weight]) # convert each neighbor list to neighbor numpy arrays for faster access for i in range(N): if len(out_neighbors[i]) > 0: out_neighbors[i] = np.array(out_neighbors[i], dtype=np.int32) else: out_neighbors[i] = np.array(out_neighbors[i], dtype=np.int32).reshape((0, 2)) for i in range(N): if len(in_neighbors[i]) > 0: in_neighbors[i] = np.array(in_neighbors[i], dtype=np.int32) else: in_neighbors[i] = np.array(in_neighbors[i], dtype=np.int32).reshape((0, 2)) # E = sum(len(v) for v in out_neighbors) # number of edges input_graph = Graph() input_graph.add_vertex(N) input_graph.add_edge_list([(i, j) for i in range(len(out_neighbors)) if len(out_neighbors[i]) > 0 for j in out_neighbors[i][:, 0]]) return input_graph
def is_watertight_gt(mesh): g = GTGraph() g.add_edge_list(mesh.face_adjacency()) degree = g.degree_property_map('total').a watertight = np.equal(degree, 3).all() return watertight
def gen_fs(dicProperties): np.random.seed() graphFS = Graph() # on définit la fraction des arcs à utiliser la réciprocité f = dicProperties["Reciprocity"] rFracRecip = f/(2.0-f) # on définit toutes les grandeurs de base rInDeg = dicProperties["InDeg"] rOutDeg = dicProperties["OutDeg"] nNodes = 0 nEdges = 0 rDens = 0.0 if "Nodes" in dicProperties.keys(): nNodes = dicProperties["Nodes"] graphFS.add_vertex(nNodes) if "Edges" in dicProperties.keys(): nEdges = dicProperties["Edges"] rDens = nEdges / float(nNodes**2) dicProperties["Density"] = rDens else: rDens = dicProperties["Density"] nEdges = int(np.floor(rDens*nNodes**2)) dicProperties["Edges"] = nEdges else: nEdges = dicProperties["Edges"] rDens = dicProperties["Density"] nNodes = int(np.floor(np.sqrt(nEdges/rDens))) graphFS.add_vertex(nNodes) dicProperties["Nodes"] = nNodes # on définit le nombre d'arcs à créer nArcs = int(np.floor(rDens*nNodes**2)/(1+rFracRecip)) # on définit les paramètres fonctions de probabilité associées F(x) = A x^{-tau} Ai = nArcs*(rInDeg-1)/(nNodes) Ao = nArcs*(rOutDeg-1)/(nNodes) # on définit les moyennes des distributions de pareto 2 = lomax rMi = 1/(rInDeg-2.) rMo = 1/(rOutDeg-2.) # on définit les trois listes contenant les degrés sortant/entrant/bidirectionnels associés aux noeuds i in range(nNodes) lstInDeg = np.random.pareto(rInDeg,nNodes)+1 lstOutDeg = np.random.pareto(rOutDeg,nNodes)+1 lstInDeg = np.floor(np.multiply(Ai/np.mean(lstInDeg), lstInDeg)).astype(int) lstOutDeg = np.floor(np.multiply(Ao/np.mean(lstOutDeg), lstOutDeg)).astype(int) # on génère les stubs qui vont être nécessaires et on les compte nInStubs = int(np.sum(lstInDeg)) nOutStubs = int(np.sum(lstOutDeg)) lstInStubs = np.zeros(np.sum(lstInDeg)) lstOutStubs = np.zeros(np.sum(lstOutDeg)) nStartIn = 0 nStartOut = 0 for vert in range(nNodes): nInDegVert = lstInDeg[vert] nOutDegVert = lstOutDeg[vert] for j in range(np.max([nInDegVert,nOutDegVert])): if j < nInDegVert: lstInStubs[nStartIn+j] += vert if j < nOutDegVert: lstOutStubs[nStartOut+j] += vert nStartOut+=nOutDegVert nStartIn+=nInDegVert # on vérifie qu'on a à peu près le nombre voulu d'edges while nInStubs*(1+rFracRecip)/float(nArcs) < 0.95 : vert = np.random.randint(0,nNodes) nAddInStubs = int(np.floor(Ai/rMi*(np.random.pareto(rInDeg)+1))) lstInStubs = np.append(lstInStubs,np.repeat(vert,nAddInStubs)).astype(int) nInStubs+=nAddInStubs while nOutStubs*(1+rFracRecip)/float(nArcs) < 0.95 : nAddOutStubs = int(np.floor(Ao/rMo*(np.random.pareto(rOutDeg)+1))) lstOutStubs = np.append(lstOutStubs,np.repeat(vert,nAddOutStubs)).astype(int) nOutStubs+=nAddOutStubs # on s'assure d'avoir le même nombre de in et out stubs (1.13 is an experimental correction) nMaxStubs = int(1.13*(2.0*nArcs)/(2*(1+rFracRecip))) if nInStubs > nMaxStubs and nOutStubs > nMaxStubs: np.random.shuffle(lstInStubs) np.random.shuffle(lstOutStubs) lstOutStubs.resize(nMaxStubs) lstInStubs.resize(nMaxStubs) nOutStubs = nInStubs = nMaxStubs elif nInStubs < nOutStubs: np.random.shuffle(lstOutStubs) lstOutStubs.resize(nInStubs) nOutStubs = nInStubs else: np.random.shuffle(lstInStubs) lstInStubs.resize(nOutStubs) nInStubs = nOutStubs # on crée le graphe, les noeuds et les stubs nRecip = int(np.floor(nInStubs*rFracRecip)) nEdges = nInStubs + nRecip +1 # les stubs réciproques np.random.shuffle(lstInStubs) np.random.shuffle(lstOutStubs) lstInRecip = lstInStubs[0:nRecip] lstOutRecip = lstOutStubs[0:nRecip] lstEdges = np.array([np.concatenate((lstOutStubs,lstInRecip)),np.concatenate((lstInStubs,lstOutRecip))]).astype(int) # add edges graphFS.add_edge_list(np.transpose(lstEdges)) remove_self_loops(graphFS) remove_parallel_edges(graphFS) lstIsolatedVert = find_vertex(graphFS, graphFS.degree_property_map("total"), 0) graphFS.remove_vertex(lstIsolatedVert) graphFS.reindex_edges() nNodes = graphFS.num_vertices() nEdges = graphFS.num_edges() rDens = nEdges / float(nNodes**2) # generate types rInhibFrac = dicProperties["InhibFrac"] lstTypesGen = np.random.uniform(0,1,nEdges) lstTypeLimit = np.full(nEdges,rInhibFrac) lstIsExcitatory = np.greater(lstTypesGen,lstTypeLimit) nExc = np.count_nonzero(lstIsExcitatory) epropType = graphFS.new_edge_property("int",np.multiply(2,lstIsExcitatory)-np.repeat(1,nEdges)) # excitatory (True) or inhibitory (False) graphFS.edge_properties["type"] = epropType # and weights if dicProperties["Weighted"]: lstWeights = dicGenWeights[dicProperties["Distribution"]](graphFS,dicProperties,nEdges,nExc) # generate the weights epropW = graphFS.new_edge_property("double",lstWeights) # crée la propriété pour stocker les poids graphFS.edge_properties["weight"] = epropW return graphFS
class graphtool(): def get_edges(self): self.edges = [] for dev in Device.objects: port = dev['ports'] for port in dev['ports']: if not port['acc']: self.edges.append([int(port['dev']), int(dev['devid'])]) for edge in self.edges: if edge[::-1] in self.edges: self.edges.remove(edge) def create_graph(self): self.get_edges() self.g = Graph(directed=False) self.g.add_edge_list(self.edges) def load_graph(self): self.g = pickle.loads(System.objects.first().graph.read()) def shortestpath(self, source, dest): if source == dest: return ('нужны разные пипишники') #ip to id source = Device.objects(uri=source) dest = Device.objects(uri=dest) if len(source) > 0 and len(dest) > 0: source = self.g.vertex(source[0].devid) dest = self.g.vertex(dest[0].devid) result = graph_tool.topology.shortest_path(self.g, source, dest) path = [self.g.vertex_index[x] for x in result[0]] filteredge = self.g.new_edge_property('bool') filteredge[result[1][0]] = True self.g.set_edge_filter(filteredge, inverted=True) result = graph_tool.topology.shortest_path(self.g, source, dest) second_path = [self.g.vertex_index[x] for x in result[0]] self.g.clear_filters() another_paths = [] all_shortest = graph_tool.topology.all_shortest_paths( self.g, source, dest) for i in all_shortest: another_paths.append([self.g.vertex_index[j] for j in i]) self.all_paths = [path] + [second_path] + another_paths self.all_paths = [tuple(t) for t in self.all_paths] self.all_paths = [t for t in self.all_paths if len(t) > 0] self.all_paths = list(set(self.all_paths)) self.all_paths = [list(t) for t in self.all_paths] dev_from_stp = [] count = 0 for path in self.all_paths: for dev in path: dev = Device.objects(devid=dev).first().uri if Stpdomins.objects(devices__=dev): count += 1 [ dev_from_stp.append(x) for x in Stpdomins.objects( devices__=dev).first().devices if x not in dev_from_stp ] if len(dev_from_stp) > 0 and count > 1: print('stp domains') filtevertex = self.g.new_vertex_property('bool') for x in dev_from_stp: filtevertex[self.g.vertex( Device.objects(uri=x).first().devid)] = True self.g.set_vertex_filter(filtevertex) source = self.g.vertex( Device.objects(uri=dev_from_stp[0]).first().devid) dest = self.g.vertex( Device.objects(uri=dev_from_stp[-1]).first().devid) result = graph_tool.topology.all_paths(self.g, source, dest) for x in result: self.all_paths.append([int(self.g.vertex(i)) for i in x]) self.g.clear_filters() self.all_paths.sort() self.all_paths = list( self.all_paths for self.all_paths, _ in itertools.groupby(self.all_paths)) self.all_paths = [ path for path in self.all_paths if len(path) > 0 ] return self.all_paths def fancy_shortest(self): self.fancy_paths = [] for path in self.all_paths: fancy = [] for i in path: d = Device.objects(devid=i).first() if d.devtype not in passive: fancy.append([d.uri, d.addr, dev_type_dict[d.devtype]]) self.fancy_paths.append(fancy) return self.fancy_paths def paths_ports(self): output = [] for path in self.all_paths: for i, j in zip(path, path[1:]): dev = Device.objects(devid=i).first() if dev.devtype in supported: ports = [x['num'] for x in dev.ports if x['dev'] == j] if len(ports) == 0: ports = 0 else: ports = ports[0] output.append([dev.uri, dev.devtype, ports]) dev = Device.objects(devid=j).first() if dev.devtype in supported: ports = [x['num'] for x in dev.ports if x['dev'] == i] if len(ports) == 0: ports = 0 else: ports = ports[0] output.append([dev.uri, dev.devtype, ports]) g_fancy_output = dict() g_output = dict() for key, group in groupby(output, lambda x: x[0]): ports = [] for i in group: ports.append(i[2]) if key in g_output: # print (g_output[key]['ports'], ports) g_output[key]['ports'] = g_output[key]['ports'] + ports else: g_output[key] = {'type': i[1], 'ports': ports} for key in g_output: g_output[key]['ports'] = list(set(g_output[key]['ports'])) g_fancy_output = copy.deepcopy(g_output) for i in g_fancy_output: g_fancy_output[i]['type'] = dev_type_dict[g_fancy_output[i] ['type']] return g_fancy_output, g_output
def load_graph_from_kgtk( kr: KgtkReader, directed: bool = False, eprop_types: typing.Optional[typing.List[str]] = None, hashed: bool = True, hash_type: str = "string", # for future support ecols: typing.Optional[typing.Tuple[int, int]] = None, out: typing.TextIO = sys.stderr, verbose: bool = False, ): """Load a graph from a `KgtkReader` file containing a list of edges and edge properties. This code is based on load_graph_from_csv(...) in `graph-tool/src/graph_tool/__init__.py`, downloaded from git.skewed.de on 27-Jul-2020. Parameters ---------- kr : ``KgtkReader`` directed : ``bool`` (optional, default: ``False``) Whether or not the graph is directed. eprop_types : list of ``str`` (optional, default: ``None``) List of edge property types to be read from remaining columns (if this is ``None``, all properties will be of type ``string``. hashed : ``bool`` (optional, default: ``True``) If ``True`` the vertex values in the edge list are not assumed to correspond to vertex indices directly. In this case they will be mapped to vertex indices according to the order in which they are encountered, and a vertex property map with the vertex values is returned. hash_type : ``str`` (optional, default: ``string``) If ``hashed == True``, this will determined the type of the vertex values. It can be any property map value type (see :class:`PropertyMap`). Note: As of 29-Jul-2020, this parameter to graph.add_edge_list(...) is supported in the git version of graph_tools, but is not mentioned in the graph-tools 2.33 documentation. ecols : pair of ``int`` (optional, default: ``(0,1)``) Line columns used as source and target for the edges. Returns ------- g : :class:`~graph_tool.Graph` The loaded graph. It will contain additional columns in the file as internal edge property maps. If ``hashed == True``, it will also contain an internal vertex property map with the vertex names. """ r = kr # R may be wrapped for column reordering and/or non-hashed use. if ecols is None: ecols = (kr.node1_column_idx, kr.node2_column_idx) if ecols != (0, 1): def reorder(rows): for row in rows: row = list(row) s = row[ecols[0]] t = row[ecols[1]] del row[min(ecols)] del row[max(ecols) - 1] yield [s, t] + row r = reorder(r) if not hashed: def conv(rows): for row in rows: row = list(row) row[0] = int(row[0]) row[1] = int(row[1]) yield row r = conv(r) g = Graph(directed=directed) if eprop_types is None: if verbose: print("eprop_types is None", file=out, flush=True) eprops = [g.new_ep("string") for x in kr.column_names[2:]] else: if verbose: print("eprop_types: [%s]" % (", ".join([repr(x) for x in eprop_types])), file=out, flush=True) if len(eprop_types) != kr.column_count - 2: raise ValueError("There are %d eprop columns and %d eprop types." % (kr.column_count - 2, len(eprop_types))) eprops = [g.new_ep(t) for t in eprop_types] # 29-Jul-2020: This is supported in the git.skewed.de repository, and # presumably will be supported in a future release. Unfortunately, graph-tool # doesn't appear to include a version indicator or API version indicator # easily accessible from Python. # # The graph-tool 2.33 documentation does not include the hash_type parameter. # # name = g.add_edge_list(itertools.chain([line], r), hashed=hashed, # hash_type=hash_type, eprops=eprops) if verbose: print("Adding edges from the input file.", file=out, flush=True) name = g.add_edge_list(r, hashed=hashed, eprops=eprops) if verbose: print("Done adding edges from the input file.", file=out, flush=True) g.vp.name = name eprop_names: typing.List[str] = list(kr.column_names) del eprop_names[min(ecols)] del eprop_names[max(ecols) - 1] if verbose: print("eprop_names: [%s]" % (", ".join([repr(x) for x in eprop_names])), file=out, flush=True) for i, p in enumerate(eprops): ename = eprop_names[i] g.ep[ename] = p if verbose: print("prop %d name=%s" % (i, repr(ename)), file=out, flush=True) return g
class FullGraphBuilder: def __init__(self): self.graph = Graph(directed=False) self.codes = [] self.sources = [] self.labels = [] self.attrs = set() def add_nodes(self, df, ns): n = len(df) _log.info('adding %d nodes to graph', n) start = self.graph.num_vertices() vs = self.graph.add_vertex(n) end = self.graph.num_vertices() assert end - start == n nodes = pd.Series(np.arange(start, end, dtype='i4'), index=df['id']) self.codes.append(df['id'].values + ns.offset) self.sources.append(np.full(n, ns.code, dtype='i2')) if 'label' in df.columns: self.labels += list(df['label'].values) else: self.labels += list(df['id'].astype('str').values) for c in df.columns: if c in ['id', 'label']: continue if c not in self.attrs: vp = self.graph.new_vp('string') self.graph.vp[c] = vp self.attrs.add(c) else: vp = self.graph.vp[c] for v, val in zip(vs, df[c].values): vp[v] = val return nodes def add_edges(self, f, src, dst): _log.info('adding %d edges to graph', len(f)) edges = np.zeros((len(f), 2), dtype='i4') edges[:, 0] = src.loc[f.iloc[:, 0]] edges[:, 1] = dst.loc[f.iloc[:, 1]] self.graph.add_edge_list(edges) def finish(self): _log.info('setting code attributes') code_a = self.graph.new_vp('int64_t') code_a.a[:] = np.concatenate(self.codes) self.graph.vp['code'] = code_a _log.info('setting source attributes') source_a = self.graph.new_vp('string') for v, s in zip(self.graph.vertices(), np.concatenate(self.sources)): source_a[v] = src_label_rev[s] self.graph.vp['source'] = source_a _log.info('setting source attributes') label_a = self.graph.new_vp('string') for v, l in zip(self.graph.vertices(), self.labels): label_a[v] = l self.graph.vp['label'] = label_a return self.graph
class ob_viz(QWidget): def __init__(self, bg_color): QWidget.__init__(self) self.background_color = bg_color self.c = 0 # K = 0.5 # how many iterations the realignment is supposed to take self.step = 15 self.rwr_c = 0 # dumper([qt_coords]) dumper(['obv viz init']) # self.show() # with open("/tmp/eaf3.csv", "a") as fo: # wr = csv.writer(fo) # wr.writerow([self.c, "runs4"]) # dumper([self.c, "runs4"]) # self.node_names [g_id[i] for i in g.vertices()] def init2(self, emacs_var_dict): self.emacs_var_dict = emacs_var_dict self.link_str = self.emacs_var_dict['links'] self.g = Graph() self.label_ep = self.g.new_edge_property("string") self.links = self.link_str.split(";") link_tpls = [i.split(" -- ") for i in self.links] dumper([str(i) for i in link_tpls]) self.g_id = self.g.add_edge_list(link_tpls, hashed=True, string_vals=True, eprops=[self.label_ep]) self.adj = np.array([(int(i.source()), int(i.target())) for i in self.g.edges()]) self.node_names = [self.g_id[i] for i in self.g.vertices()] self.vd = {} for i in self.g.vertices(): self.vd[self.g_id[i]] = int(i) # self.pos_vp = sfdp_layout(self.g, K=0.5) self.pos_vp = fruchterman_reingold_layout(self.g) self.base_pos_ar = self.pos_vp.get_2d_array((0, 1)).T self.qt_coords = self.nolz_pos_ar(self.base_pos_ar) dumper([str(self.qt_coords)]) # dumper([link_str]) def update_graph(self, emacs_var_dict): """set new links and nodes""" new_link_str = emacs_var_dict['links'] new_links = new_link_str.split(";") new_link_tpls = [i.split(" -- ") for i in new_links] links_to_add = list(set(new_links) - set(self.links)) links_to_del = list(set(self.links) - set(new_links)) # setting new stuff self.links = new_links new_nodes = [] for tpl in new_link_tpls: new_nodes.append(tpl[0]) new_nodes.append(tpl[1]) new_nodes_unique = list(set(new_nodes)) nodes_to_del = list(set(self.node_names) - set(new_nodes_unique)) nodes_to_add = list(set(new_nodes_unique) - set(self.node_names)) dumper([ "nodes_to_add: ", nodes_to_add, "nodes_to_del: ", nodes_to_del, "links_to_add: ", links_to_add, "links_to_del: ", links_to_del ]) # first add nodes + index them, but not there yet (first links) for n in nodes_to_add: dumper(['adding node']) v = self.g.add_vertex() # how to new nodes pos to parents? separate loop afterwards self.vd[n] = int(v) self.g_id[v] = n del_node_ids = [self.vd[i] for i in nodes_to_del] self.g.remove_vertex(del_node_ids) # have to reindex after deletion self.vd = {} for i in self.g.vertices(): self.vd[self.g_id[i]] = int(i) dumper(['node deleted']) # nodes_to_del_id = # dumper(['old nodes deleted, add new links']) for l in links_to_add: tpl = l.split(" -- ") n0, n1 = tpl[0], tpl[1] self.g.add_edge(self.vd[n0], self.vd[n1]) # dumper(['new links added, delete old links']) for l in links_to_del: tpl = l.split(" -- ") n0 = tpl[0] n1 = tpl[1] dumper([list(self.vd.keys())]) # only remove edge when neither of nodes removed if n0 in self.vd.keys() and n1 in self.vd.keys(): self.g.remove_edge(self.g.edge(self.vd[n0], self.vd[n1])) # dumper(['graph modifications done']) # set positions of new nodes to parent nodes for n in nodes_to_add: v = self.g.vertex(self.vd[n]) v_prnt = list(v.all_neighbors())[0] self.pos_vp[v] = self.pos_vp[v_prnt] # dumper(['node positions adjusted']) self.adj = np.array([(int(i.source()), int(i.target())) for i in self.g.edges()]) self.node_names = [self.g_id[i] for i in self.g.vertices()] # dumper(['storage objects updated']) # dumper(["nbr_edges new: ", str(len([i for i in self.g.edges()]))]) # dumper(['nodes_to_add'] + nodes_to_add) # seems to work dumper(['to here']) self.recalculate_layout() dumper(['to here2']) def recalculate_layout(self): """calculate new change_array, set rwr_c counter""" dumper(['recalculating starting']) self.base_pos_ar = self.pos_vp.get_2d_array((0, 1)).T # set_dict = {'p': 2, 'max_level': 20, 'adaptive_cooling': False, # 'gamma': 1, 'theta': 1, 'cooling_step': 0.3, 'C': 0.6, 'mu_p': 1.2} # self.goal_vp = sfdp_layout(self.g, K=0.5, pos=self.pos_vp, **set_dict) self.goal_vp = fruchterman_reingold_layout(self.g, pos=self.pos_vp) goal_ar = self.goal_vp.get_2d_array([0, 1]).T self.chng_ar = (goal_ar - self.base_pos_ar) / self.step self.rwr_c = self.step dumper(["base_pos_ar: ", self.base_pos_ar]) dumper(["goal_ar: ", goal_ar]) dumper(["chng_ar: ", self.chng_ar]) dumper(['recalculating done']) def redraw_layout(self): """actually do the drawing, run multiple (step (rwr_c)) times""" self.cur_pos_ar = np.round( self.base_pos_ar + self.chng_ar * (self.step - self.rwr_c), 3) self.qt_coords = self.nolz_pos_ar(self.cur_pos_ar) self.rwr_c -= 1 self.update() # dumper(['redrawing']) # def draw_arrow(qp, p1x, p1y, p2x, p2y): def draw_arrow(self, qp, p1x, p1y, p2x, p2y, node_width): """draw arrow from p1 to rad units before p2""" # get arrow angle, counterclockwise from center -> east line # dumper(['painting time']) angle = degrees(atan2((p1y - p2y), (p1x - p2x))) # calculate attach point arw_goal_x = p2x + node_width * cos(radians(angle)) arw_goal_y = p2y + node_width * sin(radians(angle)) # calculate start point: idk how trig works but does start_px = p1x - node_width * cos(radians(angle)) start_py = p1y - node_width * sin(radians(angle)) # arrow stuff: +/- 30 deg ar1 = angle + 25 ar2 = angle - 25 arw_len = 10 # need to focus on vector from p2 to p1 ar1_x = arw_goal_x + arw_len * cos(radians(ar1)) ar1_y = arw_goal_y + arw_len * sin(radians(ar1)) ar2_x = arw_goal_x + arw_len * cos(radians(ar2)) ar2_y = arw_goal_y + arw_len * sin(radians(ar2)) # qp.drawLine(p1x, p1y, p2x, p2y) # qp.drawLine(p1x, p1y, arw_goal_x, arw_goal_y) qp.drawLine(start_px, start_py, arw_goal_x, arw_goal_y) qp.drawLine(ar1_x, ar1_y, arw_goal_x, arw_goal_y) qp.drawLine(ar2_x, ar2_y, arw_goal_x, arw_goal_y) def paintEvent(self, event): # dumper(['start painting']) node_width = 10 qp = QPainter(self) edges = [(self.qt_coords[i[0]], self.qt_coords[i[1]]) for i in self.adj] # dumper([str(i) for i in edges]) qp.setPen(QPen(Qt.green, 2, Qt.SolidLine)) # [qp.drawLine(e[0][0], e[0][1], e[1][0], e[1][1]) for e in edges] [ self.draw_arrow(qp, e[0][0], e[0][1], e[1][0], e[1][1], (node_width / 2) + 5) for e in edges ] qp.setPen(QColor(168, 34, 3)) # qp.setPen(Qt.green) qp.setFont(QFont('Decorative', 10)) [ qp.drawText(t[0][0] + node_width, t[0][1], t[1]) for t in zip(self.qt_coords, self.node_names) ] # dumper(['done painting']) qp.setPen(QPen(Qt.black, 3, Qt.SolidLine)) # qp.setBrush(QBrush(Qt.green, Qt.SolidPattern)) dumper(['painting nodes']) for i in zip(self.qt_coords, self.node_names): if self.emacs_var_dict['cur_node'] == i[1]: qp.setPen(QPen(Qt.black, 4, Qt.SolidLine)) qp.drawEllipse(i[0][0] - (node_width / 2), i[0][1] - (node_width / 2), node_width, node_width) qp.setPen(QPen(Qt.black, 3, Qt.SolidLine)) else: qp.drawEllipse(i[0][0] - (node_width / 2), i[0][1] - (node_width / 2), node_width, node_width) # qp.drawEllipse(self.c, self.c, 7, 7) # qp.end() def nolz_pos_ar(self, pos_ar_org): """normalize pos ar to window limits""" # pos_ar_org = goal_ar size = self.size() limits = [[20, size.width() - 50], [20, size.height() - 20]] x_max = max(pos_ar_org[:, 0]) x_min = min(pos_ar_org[:, 0]) y_max = max(pos_ar_org[:, 1]) y_min = min(pos_ar_org[:, 1]) # need linear maping function again pos_ar2 = pos_ar_org pos_ar2[:, 0] = (((pos_ar2[:, 0] - x_min) / (x_max - x_min)) * (limits[0][1] - limits[0][0])) + limits[0][0] pos_ar2[:, 1] = (((pos_ar2[:, 1] - y_min) / (y_max - y_min)) * (limits[1][1] - limits[1][0])) + limits[1][0] return (pos_ar2)
n_entries = sum(map(len, q2us.values())) data = np.ones(n_entries) row_idx = [] col_idx = [] for q, us in q2us.items(): row_idx += [q2id_map[q]]*len(us) col_idx += [u2id_map[u] for u in us] assert len(data) == len(row_idx) == len(col_idx) m = sp.csr_matrix((data, (row_idx, col_idx)), shape=(len(q2id_map), len(u2id_map))) qm = m * m.T # question adj matrix via unipartite projection g = Graph() edges = zip(*qm.nonzero()) g.add_edge_list(edges) vfilt = label_largest_component(g) f = np.sum(vfilt.a) / len(vfilt.a) print('fraciton of nodes in largest cc: {}'.format(f)) prop_question_id = g.new_vertex_property('int') prop_question_id.a = np.array(list(id2q_map.values())) # focus on largest CC g.set_vertex_filter(vfilt) # re-index the graph # SO qustion: https://stackoverflow.com/questions/46264296/graph-tool-re-index-vertex-ids-to-be-consecutive-integers n2i = {n: i for i, n in enumerate(g.vertices())}
def cumulative_cooccurrence_graph(steps, sequences, directed=False): '''cumulative_cooccurrence_graph Creates a cumulative cooccurrence graph. Parameters ---------- steps : :obj:`iter` of :obj:`int` or :obj:`str` A series that contains sequential labels for the nested groups. sequences : :obj:`iter` of :obj:`iter` of :obj:`int` Nested iterable of integers representing vertices in the graph. Number of nested iterables should be equal to `len(steps)`. directed : :obj:`bool` Currently has no effect. In future this will determine whether to build a bi-directional cooccurrence graph. Returns ------- g : :obj:`graph_tool.Graph` A graph. Vertices are elements. Edges link terms that have cooccurred at least once in the series. o_props : :obj:`dict` Property maps with vertex occurrence values at each step. o_cumsum_props : :obj:`dict` Property maps with cumulative vertex cooccurrence values at each step. co_props : :obj:`dict` Property maps with edge cooccurrnce values at each step. co_cumsum_props : :obj:`dict` Property maps with cumulative edge cooccurrence values at each step. ''' g = Graph(directed=directed) o_total = Counter(chain(*chain(*sequences))) n_vertices = len(o_total) g.add_vertex(n_vertices) o_max = dict_to_vertex_prop(g, o_total, 'int') co_total = cooccurrence_counts(chain(*sequences)) edge_list = ((c[0], c[1], count) for c, count in co_total.items()) co_max = g.new_edge_property('int') g.add_edge_list(edge_list, eprops=[co_max]) edges = g.get_edges() edge_indices = dict(zip([(e[0], e[1]) for e in edges], edges[:, 2])) o_props = {} co_props = {} o_cumsum_props = {} co_cumsum_props = {} for i, (step, seq) in enumerate(zip(steps[:-1], sequences[:-1])): logging.info(f'Calculating cooccurrences at step {step}') o_step = Counter(chain(*seq)) o_props[step] = dict_to_vertex_prop(g, o_step, 'int') combos = (combinations(sorted(ids), 2) for ids in seq) co_step = Counter(chain(*combos)) co_props[step] = dict_to_edge_prop(g, co_step, 'int', edge_indices) o_cumsum = g.new_vertex_property('int') co_cumsum = g.new_edge_property('int') if i == 0: o_cumsum.a = o_cumsum.a + o_props[step].a co_cumsum.a = co_cumsum.a + co_props[step].a else: o_cumsum.a = o_cumsum_props[steps[i - 1]].a + o_props[step].a co_cumsum.a = co_cumsum_props[steps[i - 1]].a + co_props[step].a o_cumsum_props[step] = o_cumsum co_cumsum_props[step] = co_cumsum # fill in the last step without needing to count occurrences # or cooccurrences step_max = steps[-1] o = g.new_vertex_property('int') co = g.new_edge_property('int') o.a = o_max.a - o_cumsum.a co.a = co_max.a - co_cumsum.a o_props[step_max] = o co_props[step_max] = co o_cumsum_props[step_max] = o_max co_cumsum_props[step_max] = co_max steps_prop = g.new_graph_property('vector<int>') steps_prop.set_value(steps) g.gp['steps'] = steps_prop return g, o_props, o_cumsum_props, co_props, co_cumsum_props
def line(): g = Graph(directed=True) g.add_vertex(4) g.add_edge_list([(0, 1), (1, 2), (2, 3)]) return g