def load_train_test_graphs(dataset, recache_input): raw_mat_path = 'data/{}.npz'.format(dataset) train_graph_path = 'data/{}/train_graph.pkl'.format(dataset) test_graph_path = 'data/{}/test_graph.pkl'.format(dataset) if recache_input: print('loading sparse matrix from {}'.format(raw_mat_path)) m = load_sparse_csr(raw_mat_path) print('splitting train and test...') train_m, test_m = split_train_test( m, weights=[0.9, 0.1]) print('converting to nx.DiGraph') train_g = nx.from_scipy_sparse_matrix(train_m, create_using=nx.DiGraph(), edge_attribute='sign') test_g = nx.from_scipy_sparse_matrix(test_m, create_using=nx.DiGraph(), edge_attribute='sign') print('saving train and test graphs...') nx.write_gpickle(train_g, train_graph_path) nx.write_gpickle(test_g, test_graph_path) else: print('loading train and test graphs...') train_g = nx.read_gpickle(train_graph_path) test_g = nx.read_gpickle(test_graph_path) return train_g, test_g
def test_from_scipy_sparse_matrix_parallel_edges(self): """Tests that the :func:`networkx.from_scipy_sparse_matrix` function interprets integer weights as the number of parallel edges when creating a multigraph. """ A = sparse.csr_matrix([[1, 1], [1, 2]]) # First, with a simple graph, each integer entry in the adjacency # matrix is interpreted as the weight of a single edge in the graph. expected = nx.DiGraph() edges = [(0, 0), (0, 1), (1, 0)] expected.add_weighted_edges_from([(u, v, 1) for (u, v) in edges]) expected.add_edge(1, 1, weight=2) actual = nx.from_scipy_sparse_matrix(A, parallel_edges=True, create_using=nx.DiGraph()) assert_graphs_equal(actual, expected) actual = nx.from_scipy_sparse_matrix(A, parallel_edges=False, create_using=nx.DiGraph()) assert_graphs_equal(actual, expected) # Now each integer entry in the adjacency matrix is interpreted as the # number of parallel edges in the graph if the appropriate keyword # argument is specified. edges = [(0, 0), (0, 1), (1, 0), (1, 1), (1, 1)] expected = nx.MultiDiGraph() expected.add_weighted_edges_from([(u, v, 1) for (u, v) in edges]) actual = nx.from_scipy_sparse_matrix(A, parallel_edges=True, create_using=nx.MultiDiGraph()) assert_graphs_equal(actual, expected) expected = nx.MultiDiGraph() expected.add_edges_from(set(edges), weight=1) # The sole self-loop (edge 0) on vertex 1 should have weight 2. expected[1][1][0]['weight'] = 2 actual = nx.from_scipy_sparse_matrix(A, parallel_edges=False, create_using=nx.MultiDiGraph()) assert_graphs_equal(actual, expected)
def submatrix_pull_via_networkx(matrix, node_array, directed=True): if directed: graph = nx.from_scipy_sparse_matrix(matrix, create_using=nx.DiGraph()) else: graph = nx.from_scipy_sparse_matrix(matrix, create_using=nx.Graph()) sub_graph = graph.subgraph(list(node_array)) sub_matrix = nx.to_scipy_sparse_matrix(sub_graph, dtype=np.float64, format="csr") return sub_matrix
def configuration_model(self, return_copy=False): """ Reads AdjMatrixSequence Object and returns an edge randomized version. Result is written to txt file. """ if self.is_directed: nx_creator = nx.DiGraph() else: nx_creator = nx.Graph() if return_copy: x = self[:] else: x = self # t_edges=[] for i in range(len(self)): print "configuration model: ", i graphlet = nx.from_scipy_sparse_matrix(x[i], create_using=nx_creator) graphlet = gwh.randomize_network(graphlet) x[i] = nx.to_scipy_sparse_matrix(graphlet, dtype="int") # for u,v in graphlet.edges(): # t_edges.append((u,v,i)) # gwh.write_array(t_edges,"Configuration_model.txt") if return_copy: return x else: return
def learnStructure(dataP, dataS, Pp, Ps, TAN= True): tempMatrix = [[0 for i in range(len(dataP))] for j in range(len(dataP))] for i in range(len(dataP)): for j in range(i+1, len(dataP)): temp = 0.0 if np.corrcoef(dataP[i], dataP[j])[0][1] != 1.0: temp += Pp * math.log(1-((np.corrcoef(dataP[i], dataP[j])[0][1])**2)) if np.corrcoef(dataS[i], dataS[j])[0][1] != 1.0: temp += Ps * math.log(1-((np.corrcoef(dataS[i], dataS[j])[0][1])**2)) temp *= (0.5) tempMatrix[i][j] = temp #tempMatrix[j][i] = temp MaxG = nx.DiGraph() if TAN: G = nx.from_scipy_sparse_matrix(minimum_spanning_tree(csr_matrix(tempMatrix))) adjList = G.adj i = 0 notReturnable = {} MaxG = getDirectedTree(adjList, notReturnable, MaxG, i) else: G = nx.Graph(np.asmatrix(tempMatrix)) adjList = sorted([(u,v,d['weight']) for (u,v,d) in G.edges(data=True)], key=lambda x:x[2]) i = 2 MaxG = getDirectedGraph(adjList, MaxG, i) return MaxG
def identity_conversion(self, G, A, create_using): GG = nx.from_scipy_sparse_matrix(A, create_using=create_using) self.assert_equal(G, GG) GW = nx.to_networkx_graph(A, create_using=create_using) self.assert_equal(G, GW) GI = create_using.__class__(A) self.assert_equal(G, GI) ACSR = A.tocsr() GI = create_using.__class__(ACSR) self.assert_equal(G, GI) ACOO = A.tocoo() GI = create_using.__class__(ACOO) self.assert_equal(G, GI) ACSC = A.tocsc() GI = create_using.__class__(ACSC) self.assert_equal(G, GI) AD = A.todense() GI = create_using.__class__(AD) self.assert_equal(G, GI) AA = A.toarray() GI = create_using.__class__(AA) self.assert_equal(G, GI)
def community(document): sentences = sent_tokenize(document) bow_matrix = CountVectorizer(stop_words = 'english').fit_transform(sentences) normalized = TfidfTransformer().fit_transform(bow_matrix) similarity_graph = normalized * normalized.T nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) sub_graphs = [] #n gives the number of sub graphs edge_wts = nx_graph.edges(data=True) edge_wts.sort(key=lambda (a, b, dct): dct['weight'],reverse=True) k = 10 #number of sentence in summary G = nx.Graph() for i in nx_graph.nodes(): G.add_node(i) for u,v,d in edge_wts: G.add_edge(u,v,d) sub_graphs = nx.connected_component_subgraphs(G) # print sub_graphs n = len(sub_graphs) if n == k: break inSummary = [0 for i in range(len(sentences))] n = len(sub_graphs) for i in range(n): sen = [sentences[j] for j in (sub_graphs[i].nodes())] arr = [j for j in (sub_graphs[i].nodes())] scores = textrank(sen) # print (scores) # print (arr) for j in range(len(arr)): inSummary[arr[j]] = scores[j]; # print inSummary summ = [(sentences[i],inSummary[i]) for i in range(len(inSummary)) ] # print summ[0] return summ
def compute_clusters_statistic(test_statistic, proximity_matrix, verbose=False): """Given a test statistic for each unit and a boolean proximity matrix among units, compute the cluster statistic using the connected components graph algorithm. It works for sparse proximity matrices as well. Returns the clusters and their associated cluster statistic. """ # Build a graph from the proximity matrix: if issparse(proximity_matrix): graph = from_scipy_sparse_matrix(proximity_matrix) else: graph = from_numpy_matrix(proximity_matrix) # Compute connected components: clusters = connected_components(graph) if verbose: print("Nr. of clusters: %s. Clusters sizes: %s" % (len(clusters), np.array([len(cl) for cl in clusters]))) # Compute the cluster statistic: cluster_statistic = np.zeros(len(clusters)) for i, cluster in enumerate(clusters): cluster_statistic[i] = test_statistic[cluster].sum() # final cleanup to prepare easy-to-use results: idx = np.argsort(cluster_statistic)[::-1] clusters = np.array([np.array(cl, dtype=np.int) for cl in clusters], dtype=np.object)[idx] if clusters[0].dtype == np.object: # THIS FIXES A NUMPY BUG (OR FEATURE?) # The bug: it seems not possible to create ndarray of type # np.object from arrays all of the *same* lenght and desired # dtype, i.e. dtype!=np.object. In this case the desired dtype # is automatically changed into np.object. Example: # array([array([1], dtype=int)], dtype=object) clusters = clusters.astype(np.int) cluster_statistic = cluster_statistic[idx] return clusters, cluster_statistic
def textrank(document): pst = PunktSentenceTokenizer() sentences = pst.tokenize(document) # Bag of Words from sklearn.feature_extraction.text import CountVectorizer cv = CountVectorizer() bow_matrix = cv.fit_transform(sentences) from sklearn.feature_extraction.text import TfidfTransformer normalized_matrix = TfidfTransformer().fit_transform(bow_matrix) ## mirrored matrix where the rows and columns correspond to ## sentences, and the elements describe how similar the ## sentences are. score 1 means sentences are exactly the same. similarity_graph = normalized_matrix * normalized_matrix.T similarity_graph.toarray() # PageRank import networkx as nx nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) ## mapping of sentence indices to scores. use them to associate ## back to the original sentences and sort them scores = nx.pagerank(nx_graph) ranked = sorted(((scores[i], s) for i,s in enumerate(sentences)), reverse=True) print ranked[0][1]
def textrank(sentences): bow_matrix = CountVectorizer().fit_transform(sentences) normalized = TfidfTransformer().fit_transform(bow_matrix) similarity_graph = normalized * normalized.T nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) scores = nx.pagerank(nx_graph) return sorted(((scores[i], i, s) for i, s in enumerate(sentences)), reverse=True)
def plot2d(self, title=None, domain=[-1, 1], codomain=[-1, 1], predict=True): f, ax = plt.subplots() x1 = np.linspace(*domain, 100) x2 = np.linspace(*codomain, 100) n_samples, n_features = self.X_.shape G = nx.from_scipy_sparse_matrix(self.A_) pos = {i: self.X_[i] for i in range(n_samples)} cm_sc = ListedColormap(["#AAAAAA", "#FF0000", "#0000FF"]) if title is not None: ax.set_title(title) ax.set_xlabel("$x_1$") ax.set_ylabel("$x_2$") ax.set_xlim(domain) ax.set_ylim(codomain) nx.draw_networkx_nodes(G, pos, ax=ax, node_size=25, node_color=self.y_, cmap=cm_sc) if predict: xx1, xx2 = np.meshgrid(x1, x2) xfull = np.c_[xx1.ravel(), xx2.ravel()] z = self.predict(xfull).reshape(100, 100) levels = np.array([-1, 0, 1]) cm_cs = plt.cm.RdYlBu if self.params["gamma_i"] != 0.0: nx.draw_networkx_edges(G, pos, ax=ax, edge_color="#AAAAAA") ax.contourf(xx1, xx2, z, levels, cmap=cm_cs, alpha=0.25) return (f, ax)
def format_out_relations(relations, out_): """Format relations in the format they is detemined in parameter out_. Parameters ---------- relations: scipy.sparse matrix the relations expressed in a sparse way. out_: optional, ['sparse', 'network', 'sp_relations'] the output format we desired. Returns ------- relations: decided format the relations expressed in the decided format. """ if out_ == 'sparse': relations_o = relations elif out_ == 'network': relations_o = nx.from_scipy_sparse_matrix(relations) elif out_ == 'sp_relations': relations_o = RegionDistances(relations) elif out_ == 'list': relations_o = [] for i in range(relations.shape[0]): relations_o.append(list(relations.getrow(i).nonzero()[0])) return relations_o
def classify_samples(data, labels, unmarked_idxs, sample_size, n_runs, n_clusters): unmarked_point_probs = {} all_idxs = range(len(unmarked_idxs)) random.shuffle(all_idxs) keep_raw_idxs = sorted(all_idxs[:sample_size]) delete_raw_idxs = sorted(all_idxs[sample_size:]) keep_idxs, delete_idxs = (unmarked_idxs[keep_raw_idxs], unmarked_idxs[delete_raw_idxs]) bagging_graph = nx.from_scipy_sparse_matrix(data) bagging_graph.remove_nodes_from(delete_idxs) bagging_adj_matrix = nx.to_scipy_sparse_matrix(bagging_graph) bagging_labels = np.delete(labels, delete_idxs, 0) bagging_unmarked_idxs = np.where( bagging_labels[:, 0] == -1)[0] clf = TransductiveClassifier(n_runs, n_clusters) clf.fit(bagging_adj_matrix, bagging_labels) assert len(keep_idxs) == len(bagging_unmarked_idxs) for i, idx in enumerate(keep_idxs): unmarked_point_probs[idx] = clf.transduction_[ bagging_unmarked_idxs[i]] return unmarked_point_probs
def plot_subgraph_links(sparse_m, query, degree=0, layout="std", graph=None): cond = np.where(query)[0] if graph is None: graph = nx.from_scipy_sparse_matrix(sparse_m) if degree == 0: sub1 = cond node_color = "r" elif degree == 1: sub1 = list(set(cond) | set( compute_sub_adj(sparse_m, cond))) # print(sub1) node_color = [("r" if (n in cond) else "b") for n in sub1] # print(node_color) elif degree == 2: sub0 = set(cond) | set(compute_sub_adj(sparse_m, cond)) sub1 = list(sub0 | set(compute_sub_adj(sparse_m, list(sub0)))) node_color = [("r" if (n in cond) else "b" if ( n in sub0) else "y") for n in sub1] renderer[layout]( graph.subgraph(sub1), nodelist=list(sub1), node_color=node_color, alpha=0.5, labels={n: str(n) for n in sub1})
def draw_adjacency_graph (A, node_color=[], size=10, layout='graphviz', prog = 'neato', node_size=80): graph = nx.from_scipy_sparse_matrix(A) plt.figure(figsize=(size,size)) plt.grid(False) plt.axis('off') if layout == 'graphviz': pos = nx.graphviz_layout(graph, prog = prog) else: pos = nx.spring_layout(graph) if not node_color: node_color='gray' nx.draw_networkx_nodes(graph, pos, node_color = node_color, alpha = 0.6, node_size = node_size, cmap = plt.get_cmap('autumn')) nx.draw_networkx_edges(graph, pos, alpha = 0.5) plt.show()
def cover(socp_data, N): if not settings.paths['graclus']: raise Exception( "Please provide a path to graclus: settings.paths['graculus'] = PATH.") """stacks the socp data and partitions it into N local dicts describing constraints R <= s""" n = socp_data['c'].shape[0] # form the Laplacian and use graculus to partition L = form_laplacian(socp_data) graph = nx.from_scipy_sparse_matrix(L) d = nx.convert.to_dict_of_lists(graph) edgepath = "graclus.edgelist" with open(edgepath, "w") as f: f.write("%d %d\n" % (graph.number_of_nodes(), graph.number_of_edges())) for k, v in d.iteritems(): f.write("%d %s\n" % (k + 1, ' '.join(map(lambda x: str(x + 1), v)))) import subprocess outpath = "graclus.edgelist.part.%d" % N proc = subprocess.Popen([settings.paths['graclus'], edgepath, str(N)]) proc.wait() lines = open(outpath, "r").readlines() part_vert = [] for l in lines: part_vert.append(int(l.strip())) return part_vert[n:]
def draw_adjacency_graph(adjacency_matrix, node_color=None, size=10, layout='graphviz', prog='neato', node_size=80, colormap='autumn'): """draw_adjacency_graph.""" graph = nx.from_scipy_sparse_matrix(adjacency_matrix) plt.figure(figsize=(size, size)) plt.grid(False) plt.axis('off') if layout == 'graphviz': pos = nx.graphviz_layout(graph, prog=prog) else: pos = nx.spring_layout(graph) if len(node_color) == 0: node_color = 'gray' nx.draw_networkx_nodes(graph, pos, node_color=node_color, alpha=0.6, node_size=node_size, cmap=plt.get_cmap(colormap)) nx.draw_networkx_edges(graph, pos, alpha=0.5) plt.show()
def find_min_spanning_tree(A): """ Input: A : Adjecency matrix in scipy.sparse format. Output: T : Minimum spanning tree. run_time : Total runtime to find minimum spanning tree """ # Record start time. start = time.time() # Check if graph is pre-processed, if yes then don't process it again. if os.path.exists('../Data/dcg_graph.json'): with open('../Data/dcg_graph.json') as data: d = json.load(data) G = json_graph.node_link_graph(d) # If graph is not preprocessed then convert it to a Graph and save it to a JSON file. else: G = from_scipy_sparse_matrix(A) data = json_graph.node_link_data(G) with open('../Data/dcg_graph.json', 'w') as outfile: json.dump(data, outfile) # Find MST. T = minimum_spanning_tree(G) #Record total Runtime run_time = time.time()-start return T, run_time
def get_key_sentences(self, n=5): ''' Uses a simple implementation of TextRank to extract the top N sentences from a document. Sources: - Original paper: http://acl.ldc.upenn.edu/acl2004/emnlp/pdf/Mihalcea.pdf - Super useful blog post: http://joshbohde.com/blog/document-summarization - Wikipedia: http://en.wikipedia.org/wiki/Automatic_summarization#Unsupervised_keyphrase_extraction:_TextRank ''' # Tokenize the document into sentences. More NLP preprocesing should also happen here. sentence_tokenizer = PunktSentenceTokenizer() sentences = sentence_tokenizer.tokenize(self.doc) # Calculate word counts and TFIDF vectors word_counts = CountVectorizer(min_df=0).fit_transform(sentences) normalized = TfidfTransformer().fit_transform(word_counts) # Normalized graph * its transpose yields a sentence-level similarity matrix similarity_graph = normalized * normalized.T nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) scores = nx.pagerank(nx_graph) return sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)[n]
def test_graph_degree(): "Graph: Graph Degree" A = rand_dm(25, 0.5) deg = graph_degree(A.data) G = nx.from_scipy_sparse_matrix(A.data) nx_deg = G.degree() nx_deg = array([nx_deg[k] for k in range(25)]) assert_equal((deg - nx_deg).all(), 0)
def __test_save_and_load_graph_npz(self, x): '''Test save and load a Networkx DiGraph in npz format with np-array wrapping.''' out_file = tempfile.TemporaryFile() np.savez(out_file, x=np.array([nx.to_scipy_sparse_matrix(x)])) out_file.seek(0) # Only needed here to simulate closing & reopening file x2 = np.load(out_file) y = nx.from_scipy_sparse_matrix(x2['x'][0], nx.DiGraph()) assert_equal(x.nodes(), y.nodes(), 'Saving and loading did not restore the original object') assert_equal(x.edges(), y.edges(), 'Saving and loading did not restore the original object')
def text_rank4(content): sents = list(cut_sentence(content)) vect = TfidfVectorizer(min_df=1,tokenizer=Tokenize) tfidf = vect.fit_transform(sents) tfidf_graph = tfidf*tfidf.T nx_graph = nx.from_scipy_sparse_matrix(tfidf_graph) scores = nx.pagerank(nx_graph) res = sorted(((scores[i],i) for i,s in enumerate(sents)), reverse=True) top_n_summary = [sents[i] for _,i in sorted(res[:3])] print 'text_rank4', u'。 '.join(top_n_summary).replace('\r','').replace('\n','')+u'。'
def to_networkx(self, directed=None): '''Converts this Graph object to a networkx-compatible object. Requires the networkx library.''' import networkx as nx directed = directed if directed is not None else self.is_directed() cls = nx.DiGraph if directed else nx.Graph adj = self.matrix() if ss.issparse(adj): return nx.from_scipy_sparse_matrix(adj, create_using=cls()) return nx.from_numpy_matrix(adj, create_using=cls())
def make_json_graph(msm, request): c = float(request.get_argument('cutoff')) e = str(request.get_argument('resize')) t = sparse.csr_matrix(msm.transmat_.copy()) t.data[t.data < c] = 0.0 t.eliminate_zeros() G = nx.from_scipy_sparse_matrix(t, create_using=nx.DiGraph()) metric = resize[e](G, msm, t) nx.set_node_attributes(G, 'size', metric) G.remove_nodes_from(nx.isolates(G)) return json_graph.node_link_data(G)
def test_symmetric(self): """Tests that a symmetric matrix has edges added only once to an undirected multigraph when using :func:`networkx.from_scipy_sparse_matrix`. """ A = sparse.csr_matrix([[0, 1], [1, 0]]) G = nx.from_scipy_sparse_matrix(A, create_using=nx.MultiGraph()) expected = nx.MultiGraph() expected.add_edge(0, 1, weight=1) assert_graphs_equal(G, expected)
def __init__(self,adj_matrix=None,file_name=None): if adj_matrix== None: self.adj_list, self.adj_matrix, self.station_lookup, self.index_lookup, \ self.num_stations = self.__readInput(file_name) self.initializeGraph() else : self.adj_matrix=adj_matrix self.adj_list = self.adj_matrix_to_list(self.adj_matrix) self.graph_obj = nx.from_scipy_sparse_matrix(csr_matrix(adj_matrix)) self.num_stations = len(adj_matrix) self.testing = True
def cover(socp_data, N): """stacks the socp data and partitions it into N local dicts describing constraints R <= s""" n = socp_data['c'].shape[0] # form the Laplacian and use pymetis to partition L = form_laplacian(socp_data) graph = nx.from_scipy_sparse_matrix(L) cuts, part_vert = pm.part_graph(N, graph) return part_vert[n:]
def remove_small_components(full_adj_matrix, labels, min_nodes): ## get rid of components with fewer than min_nodes nodes g = nx.from_scipy_sparse_matrix(full_adj_matrix) cpt_nodes = nx.connected_components(g) nodes = [] for cpt in cpt_nodes: if len(cpt) >= min_nodes: nodes.extend(cpt) subgraph = g.subgraph(nodes) return (nx.to_scipy_sparse_matrix(subgraph, format="csc"), labels[subgraph.nodes()])
def textrank(self, document): sentence_tokenizer = PunktSentenceTokenizer() sentences = sentence_tokenizer.tokenize(document) bow_matrix = CountVectorizer().fit_transform(sentences) normalized = TfidfTransformer().fit_transform(bow_matrix) similarity_graph = normalized * normalized.T nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) scores = nx.pagerank(nx_graph) return sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
def rank_sentences(sentences): simple_sent = [preprocess(s) for s in sentences] sim_mat = TfidfVectorizer().fit_transform(simple_sent) nx_graph = nx.from_scipy_sparse_matrix(sim_mat * sim_mat.T) scores = nx.pagerank(nx_graph) results = ((scores[i], i, s) for i,s in enumerate(sentences)) results = sorted(results, key=lambda result: result[1]) results = sorted(results, key=lambda result: result[0], reverse=True) return results
def refresh_seeds(self, n_trees=1) -> np.ndarray: """Fetch a new set of tip seeds from the current canvas.""" new_seeds = list() logging.info("TipTracerSeedPolicy skeletonizing and extracting seeds") # Transform logits to probabilities, apply threshold, and skeletonize to # extract the locations of leaf nodes ("tips") c_t = expit(np.squeeze(self.canvas.seed)) c_t = np.nan_to_num(c_t) c_t = (c_t >= self.skeletonization_threshold).astype(np.uint8) s_t = morphology.skeletonize(c_t) self._check_save_skeleton(s_t) g_t, c_t, _ = skeleton_to_csgraph(s_t) g_t = nx.from_scipy_sparse_matrix(g_t) # Get connected components and extract leaf nodes, sorting from large to small. subgraphs = sorted(nx.connected_components(g_t), key=len, reverse=True) for subgraph_nodes in subgraphs[:n_trees]: leaf_node_ids = [ node_id for node_id, node_degree in g_t.degree(subgraph_nodes) if node_degree == 1 ] # Produce a nested list of [y, x] coordinates of leaf nodes in this subgraph. leaf_node_yx = c_t[leaf_node_ids, :].astype(int).tolist() new_seeds.extend(leaf_node_yx) # Add z-coordinate to new_seeds and append to list of seed coords. new_seeds = np.hstack((np.zeros((len(new_seeds), 1), dtype=int), new_seeds, np.full((len(new_seeds), 1), self.idx, dtype=int))) # Compute the unique union of existing coords and new seeds (do not re-seed in # locations which have already been seeded. coord_update = np.vstack((self.coords, new_seeds)) coord_update = np.unique(coord_update, axis=0) coord_update = coord_update[np.argsort(coord_update[:, 3])] self.coords = coord_update
def gerarGrafoNx(nome): """ Parameters ---------- nome: str Nome do arquivo .mkt que contem a matriz do grafo que se deseja inicializar um objeto da classe da biblioteca NetworkX Returns ------- grafo Objeto da classe NetworkX que representa um grafo """ matriz = mmread(nome + ".mtx") simetrica = ReducaoLarguraBanda.ehSimetrica(matriz) if (not simetrica): matriz += matriz.transpose() # print(matriz) grafo = nx.from_scipy_sparse_matrix(matriz) return grafo, simetrica
def init_setup(): data = Dataset(root='/tmp/', name=args.dataset, setting='nettack') injecting_nodes(data) adj, features, labels = data.adj, data.features, data.labels StaticGraph.graph = nx.from_scipy_sparse_matrix(adj) dict_of_lists = nx.to_dict_of_lists(StaticGraph.graph) idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test device = torch.device('cuda') if args.ctx == 'gpu' else 'cpu' # gray box setting adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False, sparse=True, device=device) # Setup victim model victim_model = GCN(nfeat=features.shape[1], nclass=labels.max().item() + 1, nhid=16, dropout=0.5, weight_decay=5e-4, device=device) victim_model = victim_model.to(device) victim_model.fit(features, adj, labels, idx_train, idx_val) setattr(victim_model, 'norm_tool', GraphNormTool(normalize=True, gm='gcn', device=device)) output = victim_model.predict(features, adj) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item())) return features, labels, idx_train, idx_val, idx_test, victim_model, dict_of_lists, adj
def textrank_text_summarizer(documents, num_sentences=2, feature_type='frequency'): vec, dt_matrix = build_feature_matrix(norm_sentences, feature_type='tfidf') similarity_matrix = (dt_matrix * dt_matrix.T) similarity_graph = networkx.from_scipy_sparse_matrix(similarity_matrix) scores = networkx.pagerank(similarity_graph) ranked_sentences = sorted( ((score, index) for index, score in scores.items()), reverse=True) top_sentence_indices = [ ranked_sentences[index][1] for index in range(num_sentences) ] top_sentence_indices.sort() s = '' for index in top_sentence_indices: s = s + ' ' + sentences[index] print(sentences[index]) return s
def graph_from_scipy(x: ScipyGraph, **props) -> NetworkXGraph: from ..python.types import dtype_casting aprops = ScipyGraph.Type.compute_abstract_properties( x, { "is_directed", "edge_type", "edge_dtype", "node_type", "node_dtype" }) nx_graph = nx.from_scipy_sparse_matrix( x.value, create_using=nx.DiGraph if aprops["is_directed"] else nx.Graph, edge_attribute="weight", ) if aprops["edge_type"] == "set": # Remove weight attribute for _, _, attr in nx_graph.edges(data=True): del attr["weight"] else: caster = dtype_casting[aprops["edge_dtype"]] for _, _, attr in nx_graph.edges(data=True): attr["weight"] = caster(attr["weight"]) is_sequential_node_list = (x.node_list == np.arange(len( x.node_list))).all() if not is_sequential_node_list: pos2id = dict(enumerate(x.node_list)) nx.relabel_nodes(nx_graph, pos2id, False) if x.node_vals is not None: caster = dtype_casting[aprops["node_dtype"]] node_weights = { idx: caster(val) for idx, val in zip(x.node_list, x.node_vals) } nx.set_node_attributes(nx_graph, node_weights, name="weight") return NetworkXGraph(nx_graph, aprops=aprops)
def subgraph_extraction_labeling(ind, A, h=1, max_nodes_per_hop=None, node_information=None): # extract the h-hop enclosing subgraph around link 'ind' dist = 0 nodes = set([ind[0], ind[1]]) visited = set([ind[0], ind[1]]) fringe = set([ind[0], ind[1]]) nodes_dist = [0, 0] for dist in range(1, h + 1): fringe = neighbors(fringe, A) fringe = fringe - visited visited = visited.union(fringe) if max_nodes_per_hop is not None: if max_nodes_per_hop < len(fringe): fringe = random.sample(fringe, max_nodes_per_hop) if len(fringe) == 0: break nodes = nodes.union(fringe) nodes_dist += [dist] * len(fringe) # move target nodes to top nodes.remove(ind[0]) nodes.remove(ind[1]) nodes = [ind[0], ind[1]] + list(nodes) subgraph = A[nodes, :][:, nodes] # apply node-labeling labels = node_label(subgraph) # get node features features = None if node_information is not None: features = node_information[nodes] # construct nx graph g = nx.from_scipy_sparse_matrix(subgraph) # remove link between target nodes if g.has_edge(0, 1): g.remove_edge(0, 1) return g, labels.tolist(), features
def load_data_GraphSaint(self): temp_data = self.load_m() train_data = self.process_graph_data(*temp_data) adj_full, adj_train, feat_full, class_arr, role = train_data adj_full = adj_full.astype(np.int32) # adj_train = adj_train.astype(np.int32) # adj_full_norm = adj_norm(adj_full) self._num_classes = class_arr.shape[1] # adj = _coo_scipy2torch(adj_full_norm.tocoo()) print("create graph") t = time.time() graph = nx.from_scipy_sparse_matrix(adj_full) #convert the graph to the LOL format undirected_graph = lol.LolGraph(directed=False, weighted=False) undirected_graph.convert(list(graph.edges)) self._g = undirected_graph print("took", time.time() - t) # nx.write_edgelist(self._g, "amazon.edgelist") # self._labels = torch.tensor(np.argwhere(class_arr==1).T[1]) self._labels = torch.tensor(class_arr) self._X = torch.tensor(feat_full).to(dtype=torch.float) self.in_features = feat_full.shape[1]
def order_points(points): """ https://stackoverflow.com/questions/37742358/sorting-points-to-form-a-continuous-line """ clf = NearestNeighbors(2).fit(points) #calc nearest neighbour G = clf.kneighbors_graph() #create sparse matrix T = nx.from_scipy_sparse_matrix(G) #construct graph from sparse matrix # order paths paths = [list(nx.dfs_preorder_nodes(T, i)) for i in range(len(points))] mindist = np.inf minidx = 0 for i in range(len(points)): p = paths[i] # order of nodes ordered = points[p] # ordered nodes # find cost of that order by the sum of euclidean distances between points (i) and (i+1) cost = (((ordered[:-1] - ordered[1:])**2).sum(1)).sum() if cost < mindist: mindist = cost minidx = i return paths[minidx]
def summarize(text): print("Summary...") sentences_token = sent_tokenize(text) # Feature Extraction vectorizer = CountVectorizer(min_df=1, decode_error='replace') sent_bow = vectorizer.fit_transform(sentences_token) transformer = TfidfTransformer(norm='l2', smooth_idf=True, use_idf=True) sent_tfidf = transformer.fit_transform(sent_bow) similarity_graph = sent_tfidf * sent_tfidf.T nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) scores = nx.pagerank(nx_graph) text_rank_graph = sorted( ((scores[i], s) for i, s in enumerate(sentences_token)), reverse=True) # print(scores) number_of_sents = int(0.4 * len(text_rank_graph)) del text_rank_graph[number_of_sents:] summary = ' '.join(word for _, word in text_rank_graph) return summary
def make_ConformationalNetwork(self): neigh = NearestNeighbors(radius=1, metric='chebyshev') neigh.fit(self.ijk_centers) net_centers = nx.from_scipy_sparse_matrix( neigh.radius_neighbors_graph()) del (neigh) net_rotations = nx.Graph() net_rotations.add_nodes_from(range(self.num_rotations)) for ii in range(self.num_rotations): neighs = hp.get_all_neighbours(self.nside, ii, nest=False) neighs[neighs == -1] = 0 net_rotations.add_edges_from( zip(np.full(neighs.shape[0], ii), neighs)) del (neighs) net = nx.cartesian_product(net_centers, net_rotations) del (net_rotations, net_centers) return net
def get_highest_pagerank_scores(fileid, n=5): with open(fileid, encoding="utf-8") as f: text = f.read() sentences = re.findall(r'.*?\n', text[0:10000], flags=re.DOTALL) vectorizer = CountVectorizer() matrix = vectorizer.fit_transform(sentences) transformer = TfidfTransformer() normalized = transformer.fit_transform(matrix) similarity_graph = normalized * normalized.T nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) scores = nx.pagerank(nx_graph) index_scores = scores.items() sorted_scores = sorted(index_scores, key=lambda x: x[1], reverse=True) for index, score in sorted_scores[:n]: print(score, sentences[index])
def calculate_comment_tree_hirsch(comment_tree): comment_tree_nx = nx.from_scipy_sparse_matrix(comment_tree, create_using=nx.Graph()) if len(comment_tree_nx) == 0: comment_tree_hirsch = 0.0 else: node_to_depth = nx.shortest_path_length(comment_tree_nx, 0) depth_to_nodecount = collections.defaultdict(int) for k, v in node_to_depth.items(): depth_to_nodecount[v] += 1 comment_tree_hirsch = max(node_to_depth.values()) while True: if depth_to_nodecount[comment_tree_hirsch] >= comment_tree_hirsch: break else: comment_tree_hirsch -= 1 return comment_tree_hirsch
def generate_summary(self, sents): cv = CountVectorizer(ngram_range=(2, 2)) bow_matrix = cv.fit_transform(sents) normalized = TfidfTransformer().fit_transform(bow_matrix) similarity_graph = normalized * normalized.T nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) # print "graph built" scores = nx.pagerank(nx_graph) text_rank_graph = sorted(((scores[i], s) for i, s in enumerate(sents)), reverse=False) # print text_rank_graph number_of_nodes = int(0.3 * len(text_rank_graph)) if number_of_nodes < 3: number_of_nodes = 3 del text_rank_graph[number_of_nodes:] summaries = {} removed_sentences = [] for _, sentence in text_rank_graph: for index, document in enumerate(self.documents): if sentence in document: found = True if index in summaries: sentences = summaries[index] sentences.append(sentence.strip()) summaries[index] = sentences else: summaries[index] = [sentence.strip()] # summary = ' '.join(sentence.strip() for _,sentence in text_rank_graph) # print summary return summaries, removed_sentences
def draw_clustered_mlp(weights_path, clustering_result, n_clusters=4, is_first_square=True, ax=None): """Draw MLP with its spectral clustering.""" weights = load_weights(weights_path) layer_widths = extract_layer_widths(weights) if 'cnn' in str( weights_path).lower(): # if cnn, omit input layer and fc layers is_first_square = False cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str( weights_path).lower() else CNN_MODEL_PARAMS n_conv_layers = len(cnn_params['conv']) weights = weights[1:n_conv_layers] layer_widths = layer_widths[1:n_conv_layers + 1] labels, metrics = clustering_result G = nx.from_scipy_sparse_matrix(weights_to_graph(weights)) pos = set_nodes_positions(G.nodes, layer_widths, labels, is_first_square) color_mapper = get_color_mapper(n_clusters) color_map = [color_mapper[label] for label in labels] if ax is None: _, ax = plt.subplots(1) with warnings.catch_warnings(): warnings.simplefilter('ignore') nx.draw(G, pos=pos, node_color=color_map, width=0, node_size=10, ax=ax) draw_metrics(metrics, ax) return ax, labels, metrics
def load_graph(path, name, is_weighted): """ Data loader assuming the format is a text file with columns of : target source (e.g. 1 2) or target source weight (e.g. 1 2 0.34). If you have a different format, you may want to create your own data loader. :param path: The path to the edgelist file :param name: The name of te dataset :param is_weighted: True if the graph is weighted, False otherwise. :return: A Directed networkx graph with an attribute of "weight" for each edge. """ if name == "Yelp": with open(os.path.join(path, "yelp_data.p"), 'rb') as f: G = pickle.load(f) G = add_weights(G) elif name == "Youtube" or name == "Flickr": inputFile = os.path.join(path, "{}.mat".format(name)) features_struct = scipy.io.loadmat(inputFile) data = scipy.sparse.csr_matrix(features_struct["network"]) G = nx.from_scipy_sparse_matrix(data) # no need to add weights, already has else: if is_weighted: G = nx.read_weighted_edgelist(os.path.join(path, name + ".txt"), create_using=nx.DiGraph(), delimiter=",") if G.number_of_nodes() == 0: G = nx.read_weighted_edgelist(os.path.join( path, name + ".txt"), create_using=nx.DiGraph()) else: G = nx.read_edgelist(os.path.join(path, name + ".txt"), create_using=nx.DiGraph(), delimiter=",") if G.number_of_nodes() == 0: G = nx.read_edgelist(os.path.join(path, name + ".txt"), create_using=nx.DiGraph()) # put weights equal to 1 G = add_weights(G) return G
def summarize(document): sentences = sent_tokenize(document) bow_matrix = CountVectorizer( stop_words='english').fit_transform(sentences) normalized = TfidfTransformer().fit_transform(bow_matrix) similarity_graph = normalized * normalized.T nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) sub_graphs = [] #n gives the number of sub graphs edge_wts = nx_graph.edges(data=True) edge_wts.sort(key=lambda (a, b, dct): dct['weight'], reverse=True) k = 10 #number of sentence in summary G = nx.Graph() for i in nx_graph.nodes(): G.add_node(i) for u, v, d in edge_wts: G.add_edge(u, v, d) sub_graphs = nx.connected_component_subgraphs(G) # print sub_graphs n = len(sub_graphs) if n == k: break inSummary = [0 for i in range(len(sentences))] n = len(sub_graphs) for i in range(n): sen = [sentences[j] for j in (sub_graphs[i].nodes())] arr = [j for j in (sub_graphs[i].nodes())] scores = CommunitySummarizer.textrank(sen) # print (scores) # print (arr) for j in range(len(arr)): inSummary[arr[j]] = scores[j] # print inSummary summ = [ sentences[i] for i in range(len(inSummary)) if inSummary[i] >= 1 ] # print len(summ) return summ
def get_n2v_embedding(graph, binary): ## construct the embedding and return the binary.. #./node2vec -i:graph/karate.edgelist -o:emb/karate.emb -l:3 -d:24 -p:0.3 -dr -v ## get the graph.. G = nx.from_scipy_sparse_matrix(graph, edge_attribute='weight') for e in G.edges(): if e[0] == e[1]: G.remove_edge(e[0], e[0]) if not os.path.exists("tmp"): os.makedirs("tmp") tmp_graph = "tmp/tmpgraph.edges" out_graph = "tmp/tmpgraph.emb" number_of_nodes = len(G.nodes()) number_of_edges = len(G.edges()) print("Graph has {} edges and {} nodes.".format(number_of_edges, number_of_nodes)) ## n e + for loop.. f = open(tmp_graph, "w+") #f.write(str(number_of_nodes)+" "+str(number_of_edges)+"\n") for e in G.edges(data=True): f.write(str(e[0]) + " " + str(e[1]) + " " + str(e[2]['weight']) + "\n") f.close() print("Starting graphlet counts..") call([ binary, "-i:" + tmp_graph, "-o:" + out_graph, "-l:3", "-d:128", "-p:0.3", "-dr", "-v" ]) matf = np.loadtxt(out_graph, delimiter=" ", skiprows=1) call(["rm", "-rf", "tmp"]) print("Finished n2v:", matf.shape) return matf
def draw_clustered_net_imagenet(clustering_results, n_clusters=10): fig, ax = plt.subplots(figsize=(20, 30)) fig.suptitle(clustering_results['network']) conv_connections = clustering_results['conv_connections'] layer_widths = [cc[0]['weights'].shape[0] for cc in conv_connections[1:]] dense_sizes = get_dense_sizes(conv_connections) layer_widths.extend(list(dense_sizes.values())) labels = clustering_results['labels'] adj_mat = connections_to_graph_imagenet(conv_connections) G = nx.from_scipy_sparse_matrix(adj_mat) pos = set_nodes_positions(G.nodes, layer_widths, labels, is_first_square=False, dx=2, dy=2, jitter=0) color_mapper = get_color_mapper(n_clusters) color_map = [color_mapper[label] for label in labels] with warnings.catch_warnings(): warnings.simplefilter('ignore') nx.draw(G, pos=pos, node_color=color_map, width=0, node_size=4, ax=ax) metrics = { k: clustering_results[k] for k in [ 'ncut', 'ave_in_out', 'n_samples', 'mean', 'stdev', 'z_score', 'percentile' ] } draw_metrics(metrics, ax)
def text_rank(sentence_list, alpha=0.85): corpus = [] for sentence in sentence_list: document = ' '.join(sentence) corpus.append(document) count_vec = CountVectorizer() # 计算个词语出现的次数 X = count_vec.fit_transform(corpus) # 类调用 transformer = TfidfTransformer() # print(transformer) # 将词频矩阵X统计成TF-IDF值 tf_idf_vec = transformer.fit_transform(X) similarity = nx.from_scipy_sparse_matrix(tf_idf_vec * tf_idf_vec.T) scores = nx.pagerank(similarity, alpha=alpha) vectors = [] tf_idf_vec = tf_idf_vec.toarray() scores_val = list(scores.values()) for i in range(len(scores_val)): vectors.append(tf_idf_vec[i] * scores_val[i]) return np.array(vectors)
def to_networkx(G, directed=True): """Convert Scipy sparse matrix to networkx graph to Parameters ---------- G : Scipy sparse matrix a Scipy sparse matrix directed : bool, optional whether convert to a directed graph, by default None, if checks if the graph is directed and convert it to propert type Returns ------- networkx graph a netwotkx graph """ if directed is None: directed = is_directed(G) if directed: create_using = nx.DiGraph else: create_using = nx.Graph return nx.from_scipy_sparse_matrix(G, create_using=create_using)
def _score_generator(self, sentences, sentence_vectors): sentence_count = len(sentences) similarity_matrix = dok_matrix((sentence_count, sentence_count), dtype=np.float32) for i in range(len(sentences)): for j in range(len(sentences)): if i != j and len(sentence_vectors) > i: value = cosine_similarity( sentence_vectors[i].reshape(1, 100), sentence_vectors[j].reshape(1, 100))[0, 0] similarity_matrix[i, j] = value # Before proceeding further, let’s convert the similarity matrix sim_mat into a graph. The nodes of this graph will # represent the sentences and the edges will represent the similarity scores between the sentences. On this graph, # we will apply the PageRank algorithm to arrive at the sentence rankings. try: nx_graph = nx.from_scipy_sparse_matrix(similarity_matrix) scores = nx.pagerank(nx_graph, max_iter=200) except Exception as e: log.getLogger().error(str(e)) return [] return scores
def get_text_summarization_text_rank(text, num_sentences=3, feature_type='tfidf'): # parse and normalize document normalized_sentences = normalize_document(text, lemmatize=False, expand_cont=False, remove_special_char=False, remove_stop_words=False, lower_case=False) # construct weighted document term matrix vec, dt_matrix = build_feature_matrix(normalized_sentences, feature_type=feature_type) # construct the document similarity matrix similarity_matrix = (dt_matrix * dt_matrix.T) # build the similarity graph similarity_graph = networkx.from_scipy_sparse_matrix(similarity_matrix) # compute pagerank scores for all the sentences scores = networkx.pagerank(similarity_graph) # rank sentences based on their scores ranked_sentences = sorted(((score, index) for index, score in scores.items()), reverse=True) # get the top sentence indices for our summary top_sentence_indices = [ranked_sentences[index][1] for index in range(num_sentences)] top_sentence_indices.sort() # construct the document summary summary_sentences = [] for index in top_sentence_indices: summary_sentences.append(normalized_sentences[index]) return summary_sentences
def textRank(document): sentence_tokenizer = PunktSentenceTokenizer() sentences = sentence_tokenizer.tokenize(document) bow_matrix = CountVectorizer().fit_transform(sentences) normalized = TfidfTransformer().fit_transform(bow_matrix) similarity_graph = normalized * normalized.T nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) scores = nx.pagerank(nx_graph) text_rank_graph = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) number_of_nodes = int(0.25 * len(text_rank_graph)) if number_of_nodes < 3: number_of_nodes = 3 del text_rank_graph[number_of_nodes:] summary = ' '.join(word for _, word in text_rank_graph) return summary
def __init__(self, samples: pd.DataFrame, samples_features: pd.DataFrame): self.samples = samples sorted_names = np.sort(self.samples[self.df_col].unique()) samples_features = samples_features.sort_index() self.names = {name: i for i, name in enumerate(sorted_names)} self.rnames = {v: k for k, v in self.names.items()} self.empirical_single_marginals = Counter() self.empirical_pair_marginals = Counter() self.adj_matrix = dok_matrix((len(self.names), len(self.names))) self.process_samples() for (label1, label2), count in self.empirical_pair_marginals.items(): self.adj_matrix[label1, label2] = self.adj_matrix[label2, label1] = count for label, count in self.empirical_single_marginals.items(): self.adj_matrix[label, label] = count self.nx_graph: nx.Graph = nx.from_scipy_sparse_matrix(self.adj_matrix) nx.relabel_nodes(self.nx_graph, self.rnames) self.dgl_graph = dgl.DGLGraph(self.nx_graph) self.graph_features = samples_features.values
def save_gephi_graph(output_dir,A,y,k,multi_label=False): import networkx as nx labels=[] if(multi_label): nY= [" ".join(row) for row in y] labels = dict(zip(range(len(y)), nY)) else: y = [str(i) for i in y] labels = dict(zip(range(len(y)), y)) print(labels) G = nx.from_scipy_sparse_matrix(A) # print(G.edges()) # G=G.to_directed() # print(G.edges()) nx.set_node_attributes(G, labels, 'labels') print("Writing gephi") nx.write_gexf(G, output_dir+'graph_knn_'+str(k)+'.gexf') return
def preprocess(self, adj, features, graph=None): if self.normalize_features: features = self._normalize_features(features) if graph is None: graph = nx.from_scipy_sparse_matrix(adj, create_using=nx.DiGraph) (self.batch_adj, self.batch_features, self.batch_labels, self.cluster_member, self.mapper) = partition_graph(adj, features, self.labels, graph, n_cluster=self.n_cluster) if self.normalize_rate is not None: self.batch_adj = self._normalize_adj(self.batch_adj, self.normalize_rate) with self.device: self.batch_adj, self.batch_features = self._to_tensor( [self.batch_adj, self.batch_features])
def load_data(dataset_str): if dataset_str == 'blog': G, adj, features = graph_reader( './data/BlogCatalog-dataset/data/edges.csv') elif dataset_str == 'flickr': G, adj, features = graph_reader('./data/Flickr-dataset/data/edges.csv') elif dataset_str in ['cora', 'citeseer']: G, adj, features = load_cc(dataset_str) elif dataset_str == 'wiki': import scipy.io as sio A = sio.loadmat('./data/POS.mat')['network'] G = nx.from_scipy_sparse_matrix(A) adj = nx.adjacency_matrix(G) features = None elif 'dblp' in dataset_str: G, adj, edge_labels = read_dblp_small( './data/dblp-small/net_co_author.txt') features = None else: assert False n_nodes = adj.shape[0] return G, adj, features
def summarize(self, text, num=320): # 切句 if type(text) == str: sentences = cut_sentence(text) elif type(text) == list: sentences = text else: raise RuntimeError("text type must be list or str") # tf-idf相似度 matrix = tdidf_sim(sentences) matrix_norm = TfidfTransformer().fit_transform(matrix) # 构建相似度矩阵 tfidf_sim = nx.from_scipy_sparse_matrix(matrix_norm * matrix_norm.T) # nx.pagerank sens_scores = nx.pagerank(tfidf_sim) # 得分排序 sen_rank = sorted(sens_scores.items(), key=lambda x: x[1], reverse=True) # 保留topk个, 防止越界 topk = min(len(sentences), num) # 返回原句子和得分 return [(sr[1], sentences[sr[0]]) for sr in sen_rank][0:topk]
def __init__(self, metric_space, cover_list, clusterer, prune=True, backend='networkx'): # build metric space as distance matrix self.partition_node_map = {} self.N = metric_space.shape[0] self.cover = self.build_cover(cover_list) self.node_row_matrix = self.build_topological_model( metric_space, self.cover, clusterer) self.adjacency_matrix = self.node_row_matrix.dot( self.node_row_matrix.T) self.cooccurence_matrix = self.node_row_matrix.T.dot( self.node_row_matrix) if prune: pruned_node_set = self._prune(self.adjacency_matrix) self.raw_node_row_matrix = self.node_row_matrix self.raw_adjacency_matrix = self.adjacency_matrix self.raw_cooccurence_matrix = self.cooccurence_matrix partition_keys = sorted(self.partition_node_map.keys()) re_index = 0 new_partition_node_map = {} for node in partition_keys: if node in pruned_node_set: new_partition_node_map[re_index] = self.partition_node_map[ node] re_index += 1 self.partition_node_map = new_partition_node_map self.node_row_matrix = self.node_row_matrix[pruned_node_set, :] self.adjacency_matrix = self.node_row_matrix.dot( self.node_row_matrix.T) self.cooccurence_matrix = self.node_row_matrix.T.dot( self.node_row_matrix) self.graph = nx.from_scipy_sparse_matrix(self.adjacency_matrix)
def compute(tribes, adj_matrix, conv, precision): import networkx as nx spectra = [] pbar = progressbar.ProgressBar() for tribe in pbar(tribes): tribe_ids = conv.indices(tribe) adj_submat = adj_matrix[np.ix_(tribe_ids, tribe_ids)] G = nx.from_scipy_sparse_matrix(adj_submat, create_using=nx.DiGraph) # Find the largest connected component of the graph largest = max(nx.strongly_connected_components(G), key=len) if len(largest) <= 2: # Needs at least a certain size... spectra.append([]) else: # Adjacency matrix of the tribe's strong component tribe_strong_adj_submat = nx.to_numpy_array(G.subgraph(largest), dtype='int8') # Make a diagonal matrix of inverses of outdegrees in the tribe diag_outdegree_inverses = np.diagflat( np.power( np.sum(tribe_strong_adj_submat, axis=1).astype(float), -1)) # The transition probability matrix tr_prob = diag_outdegree_inverses @ tribe_strong_adj_submat # Find the eigenvalues eig = scipy.linalg.eig(tr_prob)[0] # Order the non-zero eigenvalues and round to desired precision spectrum = np.unique(np.round(eig[np.nonzero(eig)], precision)) spectra.append(spectrum) return spectra