def block_annotation(graph, state): if args.hierarchical: levels = state.get_levels() # Find the informative hierarchical levels (i.e. the non-redundant levels, those with non-equivalent block assignment) def check_level_redundancy(l): x = state.project_partition(l, 0).a y = state.project_partition(l + 1, 0).a return gt.partition_overlap(x, y, norm=True) == 1 L = len(levels) - 1 redundant_levels = [False] + list( map(check_level_redundancy, reversed(range(L)))) nr_levels = [L - i for i, x in enumerate(redundant_levels) if not x] b = levels[0].get_blocks() bcc = graph.new_vertex_property('string') bcc4 = graph.new_vertex_property('string') for i in np.unique(b.a): b_filter = (b.a == i) u = gt.GraphView(graph, vfilt=b_filter) tmp = [] r = u.get_vertices()[0] for l in range(len(levels)): r = levels[l].get_blocks()[r] if l in nr_levels: tmp.append(str(r)) tmp.reverse() comp, hist = gt.label_components(u) for v in u.vertices(): tag = '_'.join(tmp + [str(comp[v])]) bcc[v] = tag bcc4[v] = tag if (hist[comp[int(v)]] >= 4) else '-' else: b = state.get_blocks() bcc = graph.new_vertex_property('string') bcc4 = graph.new_vertex_property('string') for i in np.unique(b.a): b_filter = (b.a == i) u = gt.GraphView(graph, vfilt=b_filter) comp, hist = gt.label_components(u) for v in u.vertices(): tag = '_'.join([str(i), str(comp[v])]) bcc[v] = tag bcc4[v] = tag if (hist[comp[int(v)]] >= 4) else '-' return ((b, bcc, bcc4))
def Initialization(graph, bqueue): ''' Separate each node in its corresponding SCC using the 'label_components' function from graph_tool library. After define the SCC we classify each one to correctly initialize the fibration algorithm. ''' N = graph.get_vertices().shape[0] label_scc, hist = gt.label_components(graph, directed=True) fibers_listing = defaultdict(list) for v in graph.get_vertices(): label = label_scc[v] fibers_listing[label].append(int(v)) ''' 'fibers' now contains for each SCC label, all nodes belonging to it. ''' scc = [] N_scc = hist.shape[0] # Insert each node in its correct SCC object. for scc_j in range(N_scc): scc.append(StrongComponent()) node_list = fibers_listing[scc_j] for n in node_list: scc[scc_j].insert_node(n) ''' 'scc[j]' is an object containing the information about the nodes inside the j-th SCC. ''' partition = [FiberBlock()] autopivot = [] ''' Defines if each SCC receives or not input from other components not itself. ''' for strong in scc: strong.check_input(graph) strong.classify_strong(graph) if strong.type == 0: # receive external input. for node in strong.get_nodes(): partition[0].insert_node(node) elif strong.type == 1: # SCC does not receive any external input. partition.append(FiberBlock()) for node in strong.get_nodes(): partition[-1].insert_node(node) elif strong.type == 2: # does not receive external input, but it is an isolated autorregulated node. node = strong.get_nodes()[0] #for node in strong.get_nodes(): autopivot.append(FiberBlock()) autopivot[-1].insert_node(node) partition[0].insert_node(node) fiber_index = graph.vp.fiber_index for index, init_class in enumerate(partition): bqueue.append(copy_class(init_class)) for v in init_class.get_nodes(): fiber_index[v] = index for isolated in autopivot: bqueue.append(copy_class(isolated)) return partition
def get_blocksCC(graph, blocks): v_Bcc = graph.new_vertex_property("string") for i in np.unique(blocks.a): b_filter = (blocks.a == i) u = gt.GraphView(graph, vfilt=b_filter) comp, hist = gt.label_components(u) for v in u.vertices(): v_Bcc[v] = str(i)+'_'+str(comp[v]) return v_Bcc
def LabelComponents(g, filename): # Labels out components in graph and saves to file components, dist = gt.label_components(g, directed=False) gp = g.new_graph_property("vector<int16_t>") g.gp["component_dist"] = gp g.gp["component_dist"] = dist g.vp["conn_components"] = components # Save to file so we dont have to compute them again print "Saving result in file..." g.save(filename)
def get_nodes_of_component(self, node): if isinstance(node, gt.Vertex): node = int(node) comp, hist = gt.label_components(self._g, attractors=False) group = comp.a[node] nodes = [] for i in range(0, len(comp.a), 1): if comp.a[i] == group: nodes.append(i) return nodes
def Initialization(graph): ''' Separate each node in its corresponding SCC and WCC using the 'label_components' function from graph_tool library. ''' N = graph.get_vertices().shape[0] label_scc, hist = gt.label_components(graph, directed=True) fibers_listing = defaultdict(list) for v in graph.get_vertices(): label = label_scc[v] fibers_listing[label].append(int(v)) ''' 'fibers' now contains for each SCC label, all nodes belonging to it. ''' scc = [] N_scc = hist.shape[0] # Insert each node in its correct SCC object. for scc_j in range(N_scc): scc.append(StrongComponent()) nodes_list = fibers_listing[scc_j] for n in nodes_list: scc[scc_j].insert_node(n) ''' 'scc[j]' is an object containing the information about the nodes inside the j-th SCC. ''' ''' Defines if each SCC receives or not input from other components not itself. ''' fibers = [FiberBlock()] for strong in scc: strong.check_input(graph) strong.classify_strong(graph) if strong.type==0: for node in strong.get_nodes(): fibers[0].insert_node(node) elif strong.type==1: fibers.append(FiberBlock()) for node in strong.get_nodes(): fibers[-1].insert_node(node) elif strong.type == 2: node = strong.get_nodes()[0] fibers[0].insert_node(node) return fibers
def load(self, file_name): file_path = os.path.join(DATA_PATH, os.path.join("models", file_name)) if not os.path.isfile(file_path): raise ValueError("File {} does not exists.".format(file_path)) with open(file_path, 'rb') as file: self.g = pickle.load(file) self.lemma_to_vertex_id = pickle.load(file) self.synset_to_vertex_id = pickle.load(file) v_root_index = pickle.load(file) self.v_root = self.g.vertex(v_root_index) self.g.set_directed(False) # print(len(list(self.g.vertices()))) print(max(graph_tool.label_components(self.g)[0].a)) self._count_root_depth()
def is_arborescence(tree): # is tree? l, _ = label_components(GraphView(tree, directed=False)) if not np.all(np.array(l.a) == 0): return False in_degs = np.array([v.in_degree() for v in tree.vertices()]) if in_degs.max() > 1: return False if np.sum(in_degs == 1) != (tree.num_vertices() - 1): return False roots = get_roots(tree) assert len(roots) == 1, '>1 roots' return True
def get_nodes_for_component_id(self, c_id): comp, hist = gt.label_components(self._g, attractors=False) if c_id >= len(hist): print( "Received invalid id ({}). PRM has only {} components.".format( c_id, len(hist))) return [] # TODO: can be implemented more efficiently with numpy. vs = [] for v in self.vertices(): if comp.a[int(v)] == c_id: vs.append(v) if len(vs) >= hist[c_id]: break return vs
def graph_mis(G): t = time.time() comp_prop = gt.label_components(G) # print comp_prop.a (C,vertex_count,edge_count) = gt.condensation_graph(G,comp_prop) print 1, time.time() - t # t = time.time() # comp_prop = raf_components(G) # # print comp_prop.a # (C,vertex_count,edge_count) = gt.condensation_graph(G,comp_prop) # print 2, time.time() - t t = time.time() nodes = [] for i in range(C.num_vertices()): nodes.append([]) for v in G.vertices(): nodes[comp_prop[v]].append(int(v)) print nodes print 3, time.time() - t t = time.time() comps = [] for v in C.vertices(): if vertex_count.a[int(v)] > 1: comps.append(v) else: w = nodes[int(v)][0] # single vertex if G.edge(w,w): comps.append(v) print 4, time.time() - t t = time.time() upstream = descendents(C,comps) # print [nodes[c] for c in upstream] C.set_reversed(True) downstream = descendents(C,comps) # print [nodes[c] for c in downstream] cnodes = upstream & downstream # print [nodes[c] for c in cnodes] mis = list(itertools.chain(*[nodes[c] for c in cnodes])) print 5, time.time() - t return mis
def _components(self, k, k_cores, network): network_cpy = network.copy() network_cpy.vp['GRASP'] = network_cpy.new_vertex_property('bool') for v in network_cpy.vertices(): if k_cores[v] >= k: network_cpy.vp['GRASP'][v] = True else: network_cpy.vp['GRASP'][v] = False network_cpy.set_vertex_filter(network_cpy.vp['GRASP']) network_cpy.purge_vertices() labels, hist = gt.label_components(network_cpy, directed=False) components = self._group_labels(labels, network_cpy) mapped = self._map_vertexs(network_cpy, components) return mapped
def get_component_masks(self, min_vertices=0): component_vp, hist = gt.label_components(self.g, directed=False, attractors=False) masks = [] max_comp = max(component_vp.a) for label in range(0, max(max_comp, 1)): if hist[label] < min_vertices: continue binary_mask = component_vp.a == label cc_vp = self.g.new_vertex_property("bool") cc_vp.a = binary_mask masks.append(cc_vp) return masks, hist
def graph_stats(graph): clustering_coefficient = 0 neighbors = {int(node): set([int(n) for n in node.out_neighbours()]) for node in graph.vertices()} for idx, node in enumerate(graph.vertices()): node = int(node) if len(neighbors[node]) < 2: continue edges = sum(len(neighbors[int(n)] & neighbors[node]) for n in neighbors[node]) cc = edges / (len(neighbors[node]) * (len(neighbors[node]) - 1)) clustering_coefficient += cc component, histogram = gt.label_components(graph) return [ clustering_coefficient / graph.num_vertices(), len(histogram), ]
def prune_feats(cls, X, feat_defs, lambda_value=0.9, measure='cosine_similarity'): n, d = X.shape n_last_feat_defs = len(feat_defs[-1]) ug = gt.Graph(directed=False) [ug.add_vertex() for i in range(d)] ug.edge_properties['weight'] = ug.new_edge_property("double") sim_mat = eval(measure + '(X.transpose())') for i in range(d - n_last_feat_defs, d): for j in range(d - n_last_feat_defs): if sim_mat[i, j] > lambda_value: e = ug.add_edge(i, j) ug.edge_properties['weight'][e] = sim_mat[i, j] comp_labels, _ = gt.label_components(ug) uniq_comp_labels = np.unique(comp_labels.a) repr_feat_defs = [] remove_X_cols = [] for comp_label in uniq_comp_labels: comp = np.where(comp_labels.a == comp_label)[0] # only take last layer's ones comp = comp[comp >= d - n_last_feat_defs] if len(comp) > 0: # only take first one as a representative feature repr_feat_defs.append(feat_defs[-1][comp[0] - (d - n_last_feat_defs)]) remove_X_cols += list(comp[1:]) # note: repr_feat_defs might have different order from original # so, we need to handle this way (but probably can be simplified) remove_feat_idices = [] for i in range(len(feat_defs[-1])): if not feat_defs[-1][i] in repr_feat_defs: remove_feat_idices.append(i) for index in sorted(remove_feat_idices, reverse=True): del feat_defs[-1][index] X = np.delete(X, remove_X_cols, axis=1) return X, feat_defs
def _refinement(graph, threshold): vertex_betweenness_value = gt.betweenness(graph)[0].get_array() d = np.abs(vertex_betweenness_value - np.median(vertex_betweenness_value)) mdev = np.median(d) s = d / mdev if mdev else np.zeros_like(d) vfilt = s < threshold graph = gt.GraphView(graph, vfilt=vfilt) comp, hist = gt.label_components(graph) temp = [] for i in range(len(hist)): if hist[i] > 1: temp.append( gt.Graph(gt.GraphView(graph, vfilt=(comp.a == i)), prune=True, directed=False)) return temp
def is_convex(): print("citeseer") print("weighted") np.random.seed(0) attributes_df = pd.read_csv('res/citeseer/citeseer.content', sep="\t", header=None, dtype=np.str) features = attributes_df.iloc[:, 1:-1].to_numpy(dtype=np.int) labels, _ = pd.factorize(attributes_df.iloc[:, -1]) new_ids, old_ids = pd.factorize(attributes_df.iloc[:, 0]) edges_df = pd.read_csv('res/citeseer/citeseer.cites', sep="\t", header=None, dtype=np.str) edges_df = edges_df[edges_df.iloc[:, 0].apply(lambda x: x in old_ids)] edges_df = edges_df[edges_df.iloc[:, 1].apply(lambda x: x in old_ids)] renamed = edges_df.replace(old_ids, new_ids) edges = renamed.to_numpy(dtype=np.int) edges = np.fliplr(edges) g = gt.Graph(directed=True) g.add_edge_list(edges) weight = np.sum(np.abs(features[edges[:, 0]] - features[edges[:, 1]]), axis=1) weight_prop = g.new_edge_property("int", val=1) #weight = g.new_edge_property("double", vals=weight) comps, hist = gt.label_components(g) print(hist) dist_map = gt.shortest_distance(g, weights=weight_prop) #, weights=weight) simple = simplicial_vertices.simplicial_vertices(g) print("n=", g.num_vertices(), "s=", len(simple)) spc = shortest_path_cover_logn_apx(g, weight_prop) pickle.dump(spc, open("res/citeseer/spc_directed_unweighted.p", "wb")) '''intersection_0 = []
def label_core_components(g, core=1): kc = gt.kcore_decomposition(g) nodeId = g.vp.ids v_core = g.new_vertex_property('bool') for v in g.get_vertices(): v_core[v] = 0 if kc[v]>=core: v_core[v] = 1 g.set_vertex_filter(v_core) labels, val = gt.label_components(g) nodecolor_comp = defaultdict(lambda:-1) for v in g.get_vertices(): if v_core[v]==1: nodecolor_comp[nodeId[v]] = labels[v] # Recover the original network. g.set_vertex_filter(None) return nodecolor_comp, val.shape[0]
def write_classes(filename, graph, state): b = state.get_blocks() bcc = {} for i in np.unique(b.a): b_filter = (b.a == i) u = gt.GraphView(graph, vfilt=b_filter) comp, hist = gt.label_components(u) for v in u.vertices(): bcc[int(v)] = str(i)+'_'+str(comp[v]) f = open(filename, "w") header = "Name\tRealClass\tCComp\tBlockCC\tBlock" f.write(header+"\n") for v in graph.vertices(): Name = str(graph.vp.Name[v]) RealClass = str(graph.vp.RealClass[v]) CComp = str(graph.vp.CComp[v]) BlockCC = str(bcc[v]) Block = str(b[v]) f.write(Name+"\t"+RealClass+"\t"+CComp+"\t"+BlockCC+"\t"+Block+"\n") f.close()
def is_arborescence(tree): # is tree? l, _ = label_components(GraphView(tree, directed=False)) if not np.all(np.array(l.a) == 0): print('not connected') print(np.array(l.a)) return False in_degs = np.array([v.in_degree() for v in tree.vertices()]) if in_degs.max() > 1: print('in_degree.max() > 1') return False if np.sum(in_degs == 1) != (tree.num_vertices() - 1): print('should be: only root has no parent') return False roots = get_roots(tree) assert len(roots) == 1, '>1 roots' return True
def write_classes(filename, graph, state): b = state.get_blocks() bcc = {} for i in np.unique(b.a): b_filter = (b.a == i) u = gt.GraphView(graph, vfilt=b_filter) comp, hist = gt.label_components(u) for v in u.vertices(): bcc[int(v)] = str(i) + '_' + str(comp[v]) f = open(filename, "w") header = "AccessionVersion\tPTU\tCComp\tBlockCC\tBlock" f.write(header + "\n") for v in graph.vertices(): AccVer = graph.vp.AccessionVersion[v] PtuManual = graph.vp.PtuManual[v] CComp = str(graph.vp.CComp[v]) BlockCC = bcc[v] Block = str(b[v]) f.write(AccVer + "\t" + PtuManual + "\t" + CComp + "\t" + BlockCC + "\t" + Block + "\n") f.close()
def ptu_annotation(graph, state): levels = state.get_levels() levels_nr = [] save_flag = True for l in reversed(range(1, len(levels))): b_o = state.project_partition(l, 0).a b_t = state.project_partition(l - 1, 0).a if (save_flag == True): levels_nr.append(l) if (adjusted_mutual_info_score(b_o, b_t) >= 1): save_flag = False else: if (adjusted_mutual_info_score(b_o, b_t) < 1): save_flag = True if (save_flag == True): levels_nr.append(0) b = levels[0].get_blocks() bcc = {} bcc4 = {} for i in np.unique(b.a): b_filter = (b.a == i) u = gt.GraphView(graph, vfilt=b_filter) tmp = [] r = u.get_vertices()[0] for l in range(len(levels)): r = levels[l].get_blocks()[r] if l in levels_nr: tmp.append(str(r)) tmp.reverse() comp, hist = gt.label_components(u) for v in u.vertices(): tag = '_'.join(tmp + [str(comp[v])]) bcc[int(v)] = tag bcc4[int(v)] = tag if (hist[comp[int(v)]] >= 4) else '-' return ((bcc, bcc4))
def clustering(): precursor_tolerance = session.config.cg_precursor_tolerance.value dot_product_threshold = session.config.cg_dot_product_threshold.value index_length = len(session.internal_index) dps = [] edg = [] for i, ei in enumerate(session.internal_index): print(i, end='\r') for j, ej in enumerate(session.internal_index[i + 1:index_length]): if abs(ei.precursor_mass - ej.precursor_mass) <= precursor_tolerance: dp = ei.get_spectrum().verificative_ranked_dp( ej.get_spectrum()) if dp > dot_product_threshold: edg.append((i, j + i + 1)) dps.append(dp) print() import pprint pprint.pprint(edg) pprint.pprint(dps) import graph_tool.all as gt import numpy as np g = gt.Graph(directed=False) g.add_edge_list(edg) g.vp['iid'] = g.new_vp('int') for i in range(g.num_vertices()): g.vp['iid'][i] = i g.ep['dps'] = g.new_ep('float') g.ep['dps'].a = np.array(dps, dtype=np.float32) comp, hist = gt.label_components(g) graphs = [] for i in range(len(hist)): graphs.append( gt.Graph(gt.GraphView(g, vfilt=(comp.a == i)), directed=False, prune=True)) return graphs
def networkSummary(G): """Provides summary values about the network Args: G (graph) The network of strains from :func:`~constructNetwork` Returns: components (int) The number of connected components (and clusters) density (float) The proportion of possible edges used transitivity (float) Network transitivity (triads/triangles) score (float) A score of network fit, given by :math:`\mathrm{transitivity} * (1-\mathrm{density})` """ component_assignments, component_frequencies = gt.label_components(G) components = len(component_frequencies) density = len(list(G.edges()))/(0.5 * len(list(G.vertices())) * (len(list(G.vertices())) - 1)) transitivity = gt.global_clustering(G)[0] score = transitivity * (1-density) return(components, density, transitivity, score)
def is_convex(directed): print("cora") np.random.seed(0) edges = np.genfromtxt('res/cora/cora.edges', dtype=np.int, delimiter=',')[:, :2] - 1 labels = np.genfromtxt('res/cora/cora.node_labels', dtype=np.int, delimiter=',')[:, 1] g = gt.Graph(directed=directed) g.add_edge_list(edges) weight = g.new_edge_property("double", val=1) comps, hist = gt.label_components(g) print(hist) dist_map = gt.shortest_distance(g, weights=weight) #, weights=weight) simple = simplicial_vertices.simplicial_vertices(g) print("n=", g.num_vertices(), "s=", len(simple)) spc = pickle.load(open("res/cora/spc_" + str(directed) + ".p", "rb")) #shortest_path_cover_logn_apx(g, weight) a, b = spc_querying_naive(g, spc, labels) print(a) print(b, np.sum(b)) print(np.sum(a == labels)) return print("len(spc)", len(spc)) num_of_convex_paths = 0 total_error = 0 for p in spc: error = are_convex(labels[p]) if error == 0: num_of_convex_paths += 1 else: total_error += error print("#convex paths", num_of_convex_paths) print("total error on paths", total_error) return pickle.dump(spc, open("res/cora/spc_" + str(directed) + ".p", "wb")) for c in np.unique(labels): print("class label", c) print("class size: ", np.sum(labels == c)) cls = np.where(labels == c)[0] for sample_size in [5, 10, 20, len(cls)]: print("sample_size", sample_size) if sample_size <= 20: times = 5 else: times = 1 for _ in range(times): sample = np.random.choice(cls, sample_size, replace=False) hull_p = compute_hull(g, sample, dist_map=dist_map, comps=comps, hist=hist, compute_closure=False) print("size interval: ", np.sum(hull_p)) print("number of correct in interval: ", np.sum(hull_p[cls])) hull_p = compute_hull(g, sample, dist_map=dist_map, comps=comps, hist=hist) print("size hull: ", np.sum(hull_p)) print("number of correct in interval: ", np.sum(hull_p[cls])) print("==================================")
# Additional plasmid characteristics g.vp.Size[v_qry] = seq_len g.vp.MOB[v_qry] = run_mobscan(fname_faa, fname_fna + '_mobscan') genome_type = 'unordered_replicon' if multifasta else 'ordered_replicon' g.vp.MPF[v_qry] = run_conjscan(fname_faa, fname_fna + '_conjscan', genome_type, args.topology) g.vp.PFinder[v_qry] = run_pfinder(fname_fna, fname_fna + '_pfinder') with open(fname_fna + '.qry_info.tsv', 'w') as fh: fh.write("#Total bp\tMOB\tMPF\tReplicon\n") fh.write("{}\t{}\t{}\t{}\n".format(g.vp.Size[v_qry], g.vp.MOB[v_qry], g.vp.MPF[v_qry], g.vp.PFinder[v_qry])) # Check if query belongs to a graph component of less than 4 members comp, hist = gt.label_components(g) g.vertex_properties['CComp'] = comp if hist[comp[v_qry]] <= 4: i_filter = (comp.a == comp[v_qry]) u = gt.GraphView(g, vfilt=i_filter) cl_size = u.num_vertices() if cl_size < 4: ptu_pred = '-' print('PTU could not be assigned') print('Query is part of a graph component of size {}'.format(cl_size)) print( 'However, at least four members are required for PTU assignation') print('This plasmid could form part of a new, still unnamed, PTU') else: ptu_pred = 'PTU-?' print('New putative PTU')
g = load_graph(sys.argv[1]) data = [] data.append(('nodes', g.num_vertices())) data.append(('edges', g.num_edges())) data.append(('is_directed?', g.is_directed())) deg = g.degree_property_map('total') num = int(np.sum(deg.a == 0)) data.append(('isolated nodes', '{} ({:.2f}%)'.format(num, num / g.num_vertices() * 100))) labels, hist = label_components(g, directed=False) data.append(('number of connected components', len(hist))) _, hist = label_components(g, directed=False) hist.sort() if len(hist) > 1: size1, size2 = hist[-1], hist[-2] else: size1, size2 = hist[-1], 0 data.append(('size of 1st/2nd component', '{} ({:.2f}%), {}/({:.2f}%)'.format( size1, 100 * size1 / g.num_vertices(), size2, 100 * size2 / g.num_vertices()))) data.append(('min/max/avg degree', '{}/{}/{:.2f}'.format(int(deg.a.min()),
def bcc_tree(G, vlist, elist): """Get biconnected component tree view of a subgraph defined by the input vertex and edge lists. Args: G (graph_tool.Graph): The graph instance. vlist (list): List of vertex indices to induce upon. elist (list): List of edge indices to induce upon. Returns: An object containing information regarding the BCC tree created, including Vis.js formatted network data. """ # get proper indices vp = G.new_vp('bool', vals=False) ep = G.new_ep('bool', vals=False) if vlist is [] or vlist is None: vlist = np.ones_like(vp.a) if elist is [] or elist is None: elist = np.ones_like(ep.a) vp.a[vlist] = True ep.a[elist] = True G.set_vertex_filter(vp) G.set_edge_filter(ep) # label biconnected components bcc, art, _ = gt.label_biconnected_components(G) # create metagraph Gp = gt.Graph(directed=False) Gp.vp['count'] = Gp.new_vp('int', vals=-1) Gp.vp['is_articulation'] = Gp.new_vp('bool', vals=False) Gp.vp['id'] = Gp.new_vp('string', vals='') Gp.ep['count'] = Gp.new_ep('int', vals=0) # add bcc metanodes # each bcc metanode will be indexed in Gp in increasing order of bcc id, # a scheme which is leveraged later below. B = sorted(Counter(bcc.a[elist]).items()) for b, count in B: v = Gp.add_vertex() Gp.vp['count'][v] = count # add articulation points and metagraph edges elist_set = set(elist) ap_list = [G.vertex(v) for v in np.where(art.a == 1)[0]] for ap in ap_list: v = Gp.add_vertex() Gp.vp['count'][v] = 1 Gp.vp['is_articulation'][v] = True # assign original vertex_index as art point's id Gp.vp['id'][v] = str(G.vertex_index[ap]) for e in ap.out_edges(): # NOTE: Following if should never evaluate true... if G.edge_index[e] not in elist_set: continue # add metagraph edge if not already present meta_e = Gp.edge(v, Gp.vertex(bcc[e])) if not meta_e: meta_e = Gp.add_edge(v, Gp.vertex(bcc[e])) Gp.ep['count'][meta_e] += 1 # assert bcc tree is, in fact, a tree comp, _ = gt.label_components(G) num_components = len(np.unique(comp.a)) assert Gp.num_edges() == Gp.num_vertices() - num_components # TODO: Handle no articulation points (single BCC) # articulation point degree distribution ap_degrees = [v.out_degree() for v in ap_list] ap_deg_bins, ap_deg_counts = \ zip(*sorted(Counter(ap_degrees).items())) # bcc size distribution (no. of edges) bcc_size_bins, bcc_size_counts = \ zip(*sorted(Counter(zip(*B)[-1]).items())) vis_data = to_vis_json_bcc_tree(Gp) return { 'vis_data': vis_data, 'ap_deg_bins': ap_deg_bins, 'ap_deg_counts': ap_deg_counts, 'bcc_size_bins': bcc_size_bins, 'bcc_size_counts': bcc_size_counts, }
word_dict[w1] = pairs_graph.vertex_index[v1] ver_names[v1] = w1 else: v1 = pairs_graph.vertex(word_dict[w1]) if w2 not in word_dict: v2 = pairs_graph.add_vertex() word_dict[w2] = pairs_graph.vertex_index[v2] ver_names[v2] = w2 else: v2 = pairs_graph.vertex(word_dict[w2]) e = pairs_graph.add_edge(v1, v2) edge_weights[e] = cur_weight components_label = label_components(pairs_graph) largest_label = label_largest_component(pairs_graph) #print(components_label[0].a) print(largest_label.a) degr = pairs_graph.new_vertex_property("int") for v in pairs_graph.vertices(): degr[v] = v.out_degree() print("hihi") posPlot = sfdp_layout(pairs_graph, gamma = 5, max_level = 50, vweight = degr, C = 0.5, K = 5, p = 7, theta = 0.1) print("hi") graph_draw(pairs_graph, pos=posPlot, vertex_text=ver_names, edge_text=edge_weights)
def is_convex(weighted): print("digit1") np.random.seed(0) X = np.genfromtxt('res/benchmark/SSL,set=' + str(1) + ',X.tab') # X = (X - np.min(X, axis=0)) / (np.max(X, axis=0) - np.min(X, axis=0)) y = (np.genfromtxt('res/benchmark/SSL,set=' + str(1) + ',y.tab')) n = X.shape[0] dists = scipy.spatial.distance.cdist(X, X) y = y[:n] W = dists[:n, :n] # np.exp(-(dists) ** 2 / (2 * sigma ** 2)) np.fill_diagonal(W, 0) W[W > np.quantile(W, 0.004)] = np.inf # W2 = np.copy(W) less edges is slower strangely # W2[W2 <= 0.1] = 0 weights = W[(W < np.inf) & (W > 0)].flatten() edges = np.array(np.where((W < np.inf) & (W > 0))).T np.random.seed(0) g = gt.Graph() # construct actual graph g.add_vertex(n) g.add_edge_list(edges) if weighted: weight_prop = g.new_edge_property("double", vals=weights) else: weight_prop = g.new_edge_property("double", val=1) comps, hist = gt.label_components(g) #print("simplicial=", len(simplicial_vertices(g)), "#coms=", hist.size) dist_map = gt.shortest_distance(g, weights=weight_prop) #paths = shortest_path_cover_logn_apx(g, weight_prop) if not weighted: spc = pickle.load( open( "res/benchmark/spc_" + str(1) + "_q_" + str(0.004) + "_weighted_" + str(weighted) + ".p", "rb")) else: spc = shortest_path_cover_logn_apx(g, weight_prop) labels = y a, b = spc_querying_naive(g, spc, labels) print(a) print(b, np.sum(b)) print(np.sum(a == labels)) print("len(spc)", len(spc)) num_of_convex_paths = 0 total_error = 0 for p in spc: error = are_convex(labels[p]) if error == 0: num_of_convex_paths += 1 else: total_error += error print("#convex paths", num_of_convex_paths) print("total error on paths", total_error) return for c in np.unique(labels): print("class label", c) print("class size: ", np.sum(labels == c)) cls = np.where(labels == c)[0] for sample_size in [5, 10, 20, len(cls)]: print("sample_size", sample_size) if sample_size <= 20: times = 5 else: times = 1 for _ in range(times): sample = np.random.choice(cls, sample_size, replace=False) hull_p = compute_hull(g, sample, dist_map=dist_map, comps=comps, hist=hist, compute_closure=False) print("size interval: ", np.sum(hull_p)) print("number of correct in interval: ", np.sum(hull_p[cls])) hull_p = compute_hull(g, sample, dist_map=dist_map, comps=comps, hist=hist) print("size hull: ", np.sum(hull_p)) print("number of correct in interval: ", np.sum(hull_p[cls])) print("==================================")
def topology(self): g = self.arcgraph.copy() components, chist = gt.label_components( g, directed=False ) # directed = False because True would look for strongly connected components self.__plot_component_hist(chist, 'componenthist') start_components = set() number_compounds_in_start_components = 0 for c in self.start_compounds: for v in gt.find_vertex(g, g.vp.compound_ids, c): start_components.add(components[v]) cg = gt.Graph() cg.vertex_properties["size"] = cg.new_vertex_property("int", val=10) for c in start_components: v = cg.add_vertex() cg.vp.size[v] = chist[c] number_compounds_in_start_components += chist[c] satellites = set() clustering_coefficient = gt.global_clustering(g) with open(join(self.statistics_path, "clustering_coefficient.txt"), 'w') as f: f.write( str(clustering_coefficient[0]) + '\t' + str(clustering_coefficient[1]) + '\n') with open(join(self.statistics_path, "compounds_components.txt"), 'w') as f, \ open(join(self.statistics_path, "component_hist.txt"), 'w') as f2: for componentid, elem in enumerate(chist): u = gt.GraphView(g, vfilt=components.a == componentid) u = gt.Graph(u, prune=True) f2.write(str(componentid + 1) + '\t' + str(elem) + '\n') for v in u.vertices(): f.write( str(componentid + 1) + '\t' + u.vp.compound_ids[v] + '\t' + u.vp.name[v] + '\n') if componentid not in start_components: satellites.add(u.vp.compound_ids[v]) # gt.graph_draw(u, output=join(self.statistics_path, "component{i}.pdf".format(i=componentid))) targets_in_main_component = self.targets - satellites targets_in_satellites = self.targets & satellites with open(join(self.statistics_path, "targets_in_main_component.txt"), 'w') as f: for c in targets_in_main_component: compound = self.builder.compounds[c] f.write(c + '\t' + compound.names[0] + '\n') with open(join(self.statistics_path, "targets_in_satellites.txt"), 'w') as f: for c in targets_in_satellites: compound = self.builder.compounds[c] f.write(c + '\t' + compound.names[0] + '\n') with open( join(self.statistics_path, "components_with_start_metabolites.txt"), 'w') as f: for cid in start_components: f.write(str(cid) + '\n') p = number_compounds_in_start_components / g.num_vertices() * 100 with open(join(const.MECAT_BASE, "component_table.txt"), 'a') as f: f.write(self.name + ' & ' + str(len(chist)) + ' & ' + str(np.amax(chist)) + ' & ' + str(len(start_components)) + ' & ' + str(int(number_compounds_in_start_components)) + ' & ' + str(int(round(p, 0))) + '\%' + '\\\\ \n') #largest = gt.label_largest_component(g, directed=False) #gt.graph_draw(g, vertex_fill_color=largest, output=join(self.statistics_path,"largest_component.pdf")) g.vertex_properties["start_components"] = g.new_vertex_property( "string", val='white') for v in g.vertices(): if components[v] in start_components: g.vp.start_components[v] = 'red' else: g.vp.start_components[v] = 'blue' gt.graph_draw(g, vertex_fill_color=g.vp.start_components, output=join('/mnt', 'g', 'LisaDaten', 'Paper2', 'figures', 'arcgraph' + self.name + '.pdf'))
def useGraphTool(pd, space): # Extract the graphml representation of the planner data graphml = pd.printGraphML() f = open("graph.xml", 'w') f.write(graphml) f.close() # Load the graphml data using graph-tool graph = gt.load_graph("graph.xml") edgeweights = graph.edge_properties["weight"] # Write some interesting statistics avgdeg, stddevdeg = gt.vertex_average(graph, "total") avgwt, stddevwt = gt.edge_average(graph, edgeweights) print "---- PLANNER DATA STATISTICS ----" print str(graph.num_vertices()) + " vertices and " + str(graph.num_edges()) + " edges" print "Average vertex degree (in+out) = " + str(avgdeg) + " St. Dev = " + str(stddevdeg) print "Average edge weight = " + str(avgwt) + " St. Dev = " + str(stddevwt) comps, hist = gt.label_components(graph) print "Strongly connected components: " + str(len(hist)) graph.set_directed(False) # Make the graph undirected (for weak components, and a simpler drawing) comps, hist = gt.label_components(graph) print "Weakly connected components: " + str(len(hist)) # Plotting the graph gt.remove_parallel_edges(graph) # Removing any superfluous edges edgeweights = graph.edge_properties["weight"] colorprops = graph.new_vertex_property("string") vertexsize = graph.new_vertex_property("double") start = -1 goal = -1 for v in range(graph.num_vertices()): # Color and size vertices by type: start, goal, other if (pd.isStartVertex(v)): start = v colorprops[graph.vertex(v)] = "cyan" vertexsize[graph.vertex(v)] = 10 elif (pd.isGoalVertex(v)): goal = v colorprops[graph.vertex(v)] = "green" vertexsize[graph.vertex(v)] = 10 else: colorprops[graph.vertex(v)] = "yellow" vertexsize[graph.vertex(v)] = 5 # default edge color is black with size 0.5: edgecolor = graph.new_edge_property("string") edgesize = graph.new_edge_property("double") for e in graph.edges(): edgecolor[e] = "black" edgesize[e] = 0.5 # using A* to find shortest path in planner data if start != -1 and goal != -1: dist, pred = gt.astar_search(graph, graph.vertex(start), edgeweights) # Color edges along shortest path red with size 3.0 v = graph.vertex(goal) while v != graph.vertex(start): p = graph.vertex(pred[v]) for e in p.out_edges(): if e.target() == v: edgecolor[e] = "red" edgesize[e] = 2.0 v = p # Writing graph to file: # pos indicates the desired vertex positions, and pin=True says that we # really REALLY want the vertices at those positions gt.graph_draw (graph, vertex_size=vertexsize, vertex_fill_color=colorprops, edge_pen_width=edgesize, edge_color=edgecolor, output="graph.png") print print 'Graph written to graph.png'
def useGraphTool(pd): # Extract the graphml representation of the planner data graphml = pd.printGraphML() f = open("graph.graphml", 'w') f.write(graphml) f.close() # Load the graphml data using graph-tool graph = gt.load_graph("graph.graphml", fmt="xml") edgeweights = graph.edge_properties["weight"] # Write some interesting statistics avgdeg, stddevdeg = gt.vertex_average(graph, "total") avgwt, stddevwt = gt.edge_average(graph, edgeweights) print("---- PLANNER DATA STATISTICS ----") print( str(graph.num_vertices()) + " vertices and " + str(graph.num_edges()) + " edges") print("Average vertex degree (in+out) = " + str(avgdeg) + " St. Dev = " + str(stddevdeg)) print("Average edge weight = " + str(avgwt) + " St. Dev = " + str(stddevwt)) _, hist = gt.label_components(graph) print("Strongly connected components: " + str(len(hist))) # Make the graph undirected (for weak components, and a simpler drawing) graph.set_directed(False) _, hist = gt.label_components(graph) print("Weakly connected components: " + str(len(hist))) # Plotting the graph gt.remove_parallel_edges(graph) # Removing any superfluous edges edgeweights = graph.edge_properties["weight"] colorprops = graph.new_vertex_property("string") vertexsize = graph.new_vertex_property("double") start = -1 goal = -1 for v in range(graph.num_vertices()): # Color and size vertices by type: start, goal, other if pd.isStartVertex(v): start = v colorprops[graph.vertex(v)] = "cyan" vertexsize[graph.vertex(v)] = 10 elif pd.isGoalVertex(v): goal = v colorprops[graph.vertex(v)] = "green" vertexsize[graph.vertex(v)] = 10 else: colorprops[graph.vertex(v)] = "yellow" vertexsize[graph.vertex(v)] = 5 # default edge color is black with size 0.5: edgecolor = graph.new_edge_property("string") edgesize = graph.new_edge_property("double") for e in graph.edges(): edgecolor[e] = "black" edgesize[e] = 0.5 # using A* to find shortest path in planner data if start != -1 and goal != -1: _, pred = gt.astar_search(graph, graph.vertex(start), edgeweights) # Color edges along shortest path red with size 3.0 v = graph.vertex(goal) while v != graph.vertex(start): p = graph.vertex(pred[v]) for e in p.out_edges(): if e.target() == v: edgecolor[e] = "red" edgesize[e] = 2.0 v = p pos = graph.new_vertex_property("vector<double>") for v in range(graph.num_vertices()): vtx = pd.getVertex(v) st = vtx.getState() pos[graph.vertex(v)] = [st[0], st[1]] # Writing graph to file: # pos indicates the desired vertex positions, and pin=True says that we # really REALLY want the vertices at those positions # gt.graph_draw(graph, pos=pos, vertex_size=vertexsize, vertex_fill_color=colorprops, # edge_pen_width=edgesize, edge_color=edgecolor, # output="graph.pdf") gt.graph_draw(graph, pos=pos, output="graph.pdf") print('\nGraph written to graph.pdf') graph.vertex_properties["pos"] = pos graph.vertex_properties["vsize"] = vertexsize graph.vertex_properties["vcolor"] = colorprops graph.edge_properties["esize"] = edgesize graph.edge_properties["ecolor"] = edgecolor graph.save("mgraph.graphml") print('\nGraph saved to mgraph.graphml')
for line in fh: line = line.strip() s, score, t = line.split(",") scores[s][t] = float(score) # Add vertices if s and s not in node_dict: source = graph.add_vertex() node_dict[s] = source vprop[source] = s if t and t not in node_dict: target = graph.add_vertex() node_dict[t] = target vprop[target] = t # Add edge if s and t: graph.add_edge(node_dict[s], node_dict[t]) return graph, vprop, scores filename = check_arguments() graph, vprop, scores = read_graph(filename) components, histogram = gt.label_components(graph) for edge in graph.edges(): v1, v2 = edge.source(), edge.target() print("{},{},{},{},{}".format(vprop[v1], scores[vprop[v1]][vprop[v2]], vprop[v2], components[v1], components[v2]))
def get_tramas(g, h): id_trama = g.new_edge_property("int") id_trama_vert = g.new_vertex_property("int") id_trama_pie_color = g.new_vertex_property("object") id_trama_pie_prop = g.new_vertex_property("vector<float>", []) colors = [ [0.3, 0.2, 0.4, 1.0], [0.5, 0.9, 0.4, 1.0], [0.2, 0.2, 0.7, 1.0], [0.9, 0.2, 0.5, 1.0], [0.3, 0.6, 0.4, 1.0], [0.3, 0.4, 0.9, 1.0], [0.0, 0.8, 0.4, 1.0], [0.9, 0.5, 0.7, 1.0], [0.6, 0.2, 0.1, 1.0], [0.1, 0.7, 0.9, 1.0], [0.3, 0.2, 0.8, 1.0], [0.3, 0.8, 0.6, 1.0], ] cur_trama = 0 # Primero sacamos las componentes conexas que tienen valores positivos # Cada componente será "una trama" hv = gt.GraphView(h.copy(), vfilt=lambda v: h.vp.new_peso[v] >= 0, directed=False) hv.purge_vertices() hv.purge_edges() labels = gt.label_components(hv)[0] for label in np.unique(labels.a): trama = gt.GraphView(hv.copy(), vfilt=lambda v: labels[v] == label, directed=False) for v in trama.vertices(): id_trama[g.edge(int(trama.vp.pares[v][0]), int(trama.vp.pares[v][1]))] = cur_trama cur_trama += 1 # A continuación sacamos componentes conexas negativas -> Más tramas hv = gt.GraphView(h.copy(), vfilt=lambda v: h.vp.new_peso[v] < 0, directed=False) hv.purge_vertices() hv.purge_edges() labels = gt.label_components(hv)[0] for label in np.unique(labels.a): trama = gt.GraphView(hv.copy(), vfilt=lambda v: labels[v] == label, directed=False) for v in trama.vertices(): id_trama[g.edge(int(trama.vp.pares[v][0]), int(trama.vp.pares[v][1]))] = cur_trama cur_trama += 1 # Ahora hay que pasar de arcos a nodos, filtramos el grafo original por arcos. # Los nodos que nos queden, pertenecen a esa trama for t in np.unique(id_trama.a): gv = gt.GraphView(g.copy(), efilt=lambda e: id_trama[e] == t, directed=False) gv.purge_edges() gv.purge_vertices() gv = gt.GraphView(gv.copy(), vfilt=lambda v: v.out_degree() > 0, directed=False) for v in gv.vertices(): if (id_trama_pie_color[g.vertex(v)] == None): id_trama_pie_color[g.vertex(v)] = [] id_trama_pie_color[g.vertex(v)].append(colors[t % len(colors)]) id_trama_vert[g.vertex(v)] = t print("----") for v in g.vertices(): for _ in id_trama_pie_color[g.vertex(v)]: id_trama_pie_prop[g.vertex(v)].append( 1.0 / len(id_trama_pie_color[g.vertex(v)])) gt.graph_draw(g, vertex_text=g.vp.etiqueta_nodo, vertex_shape="pie", vertex_pie_fractions=id_trama_pie_prop, vertex_pie_colors=id_trama_pie_color, vertex_size=8, edge_text=g.ep.etiqueta_arco) gt.graph_draw(g, vertex_text=g.vp.etiqueta_nodo, vertex_fill_color=id_trama_vert, vertex_size=8, edge_text=g.ep.etiqueta_arco)
def gComponents(G): t0 = time.clock() tables = gr.label_components(G) t1 = time.clock() return (tables, t1 - t0)