def test_from_biadjacency_weight(self): M = sparse.csc_matrix([[1,2],[0,3]]) B = bipartite.from_biadjacency_matrix(M) assert_edges_equal(B.edges(),[(0,2),(0,3),(1,3)]) B = bipartite.from_biadjacency_matrix(M, edge_attribute='weight') e = [(0,2,{'weight':1}),(0,3,{'weight':2}),(1,3,{'weight':3})] assert_edges_equal(B.edges(data=True),e)
def load(path): biadjacency = sparse.load_npz(path) bipartite = nxb.from_biadjacency_matrix(biadjacency) print(f'{bipartite.number_of_edges():,} edges in the bipartite graph') print(f'connected: {nx.is_connected(bipartite)}') # nx.write_graphml(bipartite, 's2_2_bipartite_graph/paper_author.graphml') return bipartite
def Draw_Bipartite_Graph(user, seq_item, attn_value, name, save=True): if True: fig = plt.figure(name) entity2ids, index_user, index_item = dict(), 0, 0 for i in range(len(user)): if user[i] not in entity2ids: entity2ids[user[i]] = index_user index_user += 1 for i in range(len(seq_item)): if seq_item[i] not in entity2ids: entity2ids[seq_item[i]] = index_item index_item += 1 row = [entity2ids[user[i]] for i in range(len(user))] col = [entity2ids[seq_item[i]] for i in range(len(seq_item))] X_name = [i for i in range(len(set(row)))] Y_name = [i + len(set(row)) for i in range(len(set(col)))] a_matrix = coo_matrix((attn_value, (row, col))).toarray() a_matrix = coo_matrix(a_matrix) G = bipartite.from_biadjacency_matrix(a_matrix, create_using=None, edge_attribute='weight') pos = dict() Y_len = int((len(Y_name) - 1) * 10) X_unit_len = int(Y_len / (len(X_name) + 1)) pos.update( (n, (0, (i + 1) * X_unit_len)) for i, n in enumerate(X_name)) pos.update((n, (0.5, i * 10)) for i, n in enumerate(Y_name)) num_edges = G.number_of_edges() num_nodes = G.number_of_nodes() color_map = [] for node in G: if node < len(set(user)): color_map.append('xkcd:red') else: color_map.append('xkcd:blue') nx.draw( G, pos=pos, #with_labels=True, edge_color=attn_value, edge_cmap=plt.get_cmap('rainbow'), node_color=color_map, cmap=plt.get_cmap('Reds')) plt.savefig('/home/hsucheng/DRS/code/RS_2/graph/draw_test-' + str(name) + '.png') plt.close(name)
def birkhoff_von_neumann(Y, tol=0.0001): if Y.shape[0] != Y.shape[1]: raise ValueError('Y.shape[0] != Y.shape[1]') if np.any(Y < -tol): raise ValueError('np.any(Y < -tol)') Y = np.where(Y < tol, 0, Y) m = Y.shape[0] lambdas = [] perms = [] residuals = Y > tol while np.any(residuals): adj = residuals.astype(int) adj = sparse.csr_matrix(adj) G = bp.from_biadjacency_matrix(adj) M = bp.maximum_matching(G) M_ = [(kk, v - m) for kk, v in M.items() if kk < m] if len( M_ ) < m: # this can happen due to numerical stability issues TODO add test break M_ = sorted( M_, key=itemgetter(0) ) # if tuples sorted by rows, then the columns are the permutation rows, columns = zip(*M_) perm = np.array(columns) assert perm.shape == (m, ) lambda_ = np.min(Y[rows, columns]) P = np.zeros((m, m), dtype=float) P[rows, columns] = 1. lambdas.append(lambda_) perms.append(perm) Y -= lambda_ * P residuals = Y > tol return np.array(lambdas), np.array(perms)
def matching(idxA, idxB): labelsA = np.unique(idxA) m = len(labelsA) labelsB = np.unique(idxB) n = len(labelsB) W = np.zeros((m, n)) for j in range(n): for i in range(m): W[i, j] = -sum(idxA[idxB == labelsB[j]] == labelsA[i]) G = bipartite.from_biadjacency_matrix(scipy.sparse.coo_matrix(W)) top_nodes = {n for n, d in G.nodes(data=True) if d["bipartite"] == 0} match = bipartite.minimum_weight_full_matching(G, top_nodes) new_labels = dict() for i in range(n): new_labels[labelsB[i]] = labelsA[match[i+n]] matched = np.array([new_labels[idxB[i]] for i in range(len(idxB))]) return matched
def bipartite_graph_from_matrix(matrix, labels1, labels2, threshold=0, match=False): if match: matrix, permutation = sort_matrix(matrix) labels1 = [labels1[int(permutation[i])] for i in range(len(labels1))] label_dict = {i: l for i, l in enumerate(labels1 + labels2)} print("Topic labels:", label_dict) if not isinstance(matrix, np.ndarray): matrix = np.array(matrix) matrix[matrix < threshold] = 0 sp_matrix = sparse.coo_matrix(matrix) g = bipartite.from_biadjacency_matrix(sp_matrix) nx.set_node_attributes(g, label_dict, name='label') return g
else: numero += i f.close() #matrix.pop() matrix.pop() numpy_matrix = np.matrix(matrix) return numpy_matrix matrix = leer_matrix("Code_CPP/matrix.csv") adjacency = scipy.sparse.csc_matrix(matrix) G = bi.from_biadjacency_matrix(adjacency) infected = leer_matrix("Code_CPP/Datos/infectados.csv") color = bi.color(G) ''' pos=nx.spring_layout(G) w = csv.writer(open("output.csv", "w")) for key,val in pos.items(): w.writerow([key,val[0],val[1]]) ''' pos = {} for i in range(10): pos[i] = np.array([0, 1 - (0.2 * i)]) for i in range(9): pos[10 + i] = np.array([0.2, 0.9 - (0.2 * i)])
#https://stackoverflow.com/questions/35392342/how-to-change-colours-of-nodes-and-edges-of-bipartite-graph-in-networkx import numpy as np import networkx as nx import matplotlib.pyplot as plt from networkx.algorithms import bipartite import scipy.sparse as sparse a_matrix = sparse.rand(5, 10, format='coo', density=0.8) G = bipartite.from_biadjacency_matrix(a_matrix, create_using=None, edge_attribute='weight') X, Y = bipartite.sets(G) pos = dict() pos.update((n, (-1, i * 11)) for i, n in enumerate(X)) pos.update((n, (0.5, i * 5)) for i, n in enumerate(Y)) num_edges = G.number_of_edges() num_nodes = G.number_of_nodes() #nx.draw(G, pos=pos, with_labels=True,edge_color=np.random.random(num_edges), edge_cmap=plt.get_cmap('Blues'), node_color=np.random.random(num_nodes), cmap=plt.get_cmap('Reds')) nx.draw(G, pos=pos, with_labels=True, edge_color=np.random.random(num_edges), edge_cmap=plt.get_cmap('Reds'), node_color=range(num_nodes), node_size=1400, cmap=plt.cm.Reds) #C = nx.connected_component_subgraphs(G) #for g in C:
def test_from_biadjacency_multigraph(self): M = sparse.csc_matrix([[1, 2], [0, 3]]) B = bipartite.from_biadjacency_matrix(M, create_using=nx.MultiGraph()) assert_edges_equal(B.edges(), [(0, 2), (0, 3), (0, 3), (1, 3), (1, 3), (1, 3)])
def test_from_biadjacency_roundtrip(self): B1 = nx.path_graph(5) M = bipartite.biadjacency_matrix(B1, [0, 2, 4]) B2 = bipartite.from_biadjacency_matrix(M) assert_true(nx.is_isomorphic(B1, B2))
def aco_preprocessing(path_expr, path_ppi, col, val1, val2, patients_info, log2, gene_list=None, size=None, HI_calc="exact", sample=None): #gene_list - preselected genes #th if genes are not preselected specify threshold for standard deviation selection # HI calculation is either exact or corelation based (for big datasets) expr = pd.read_csv(path_expr, sep="\t") expr = expr.set_index("Unnamed: 0") group1_true = list(expr[expr[col] == val1].index) group2_true = list(expr[expr[col] == val2].index) patients_new = group1_true + group2_true if sample != None: idx = list(expr.index) new_idx = np.random.choice(idx, int(sample * len(idx)), False) expr = expr.loc[new_idx] group1_true = list(expr[expr[col] == val1].index) group2_true = list(expr[expr[col] == val2].index) patients_new = group1_true + group2_true expr = expr.loc[patients_new] net = pd.read_csv(path_ppi, sep="\t", header=None) nodes_ppi = set(net[0]).union(set(net[1])) genes_ge = list(set(expr.columns) - set(patients_info)) new_genes = [int(x) for x in genes_ge] intersec_genes = set.intersection(set(new_genes), set(nodes_ppi)) genes_for_expr = [str(x) for x in list(intersec_genes)] expr = expr[genes_for_expr] #20188 genes if log2: expr = np.log2(expr) z_scores = stats.zscore(expr) z_scores = pd.DataFrame(z_scores, columns=expr.columns, index=expr.index) if gene_list != None and size == None: # gene list is given new_genes = [str(gene) for gene in gene_list] elif gene_list == None and size != None: #std selection std_genes = expr[genes_for_expr].std() std_genes, genes_for_expr = zip( *sorted(zip(std_genes, genes_for_expr))) genes_for_expr = genes_for_expr[len(std_genes) - size:] new_genes = list(genes_for_expr) elif gene_list == None and size == None: #all genes new_genes = genes_for_expr else: print( "please specify gene selection method: predifined list, standart deviation filtering or none of them" ) return () expr = expr[new_genes] z_scores = z_scores[new_genes].values labels_B = dict() rev_labels_B = dict() node = 0 #nodes = set(deg_nodes + genes_aco) for g in new_genes: labels_B[node] = g rev_labels_B[g] = node node = node + 1 for p in patients_new: labels_B[node] = p rev_labels_B[p] = node node = node + 1 #scaler = preprocessing.MinMaxScaler(feature_range=(0, 1)) #sim = scaler.fit_transform(expr) data_aco = pd.DataFrame(z_scores, columns=new_genes, index=patients_new) data_aco = data_aco.T n, m = data_aco.shape GE = pd.DataFrame(data_aco.values, index=np.arange(n), columns=np.arange(n, n + m)) t = 2 b = np.matrix(data_aco > t) b_sp = csr_matrix(b) B = bipartite.from_biadjacency_matrix(b_sp) G = nx.Graph() G.add_nodes_from(np.arange(n)) for row in net.itertuples(): node1 = str(row[1]) node2 = str(row[2]) if node1 in set(new_genes) and node2 in set(new_genes): G.add_edge(rev_labels_B[node1], rev_labels_B[node2]) A_new = nx.adj_matrix(G).todense() A_j = joined_net(B, G) if HI_calc == "exact": H = hi(A_j, n, m) if HI_calc == "corr": H = HI_big(data_aco, gtg_weight=1, gtp_weight=1, ptp_weight=1) group1_true_ids = [rev_labels_B[x] for x in group1_true] group2_true_ids = [rev_labels_B[x] for x in group2_true] return B, G, H, n, m, GE, A_new, group1_true_ids, group2_true_ids, labels_B, rev_labels_B
def sim_data(genes1, genes2, background, patients1, patients2, dens): n = genes1 + genes2 + background m = patients1 + patients2 genes = np.arange(n) groups_genes = list(np.ones(genes1)) + list(np.ones(genes2) * 2) + list( np.ones(background) * 3) groups_p = [1 if node < patients1 else 2 for node in range(m)] to_sparce = 0.3 #to sparcify bipartite to_mix = 0.99 # to mix edges berween groups b = np.zeros((n, m)) ge = np.random.normal(0, 1, n * m).reshape(n, m) for patient in range(m): for gene in range(n): p_gr = groups_p[patient] g_gr = groups_genes[gene] if p_gr == 1 and g_gr == 1: #all up ge[gene, patient] = np.random.normal(1, 0.35, 1) elif p_gr == 2 and g_gr == 2: ge[gene, patient] = np.random.normal(1, 0.35, 1) #also up elif p_gr == 1 and g_gr == 2: ge[gene, patient] = np.random.normal(-1, 0.35, 1) #down elif p_gr == 2 and g_gr == 1: ge[gene, patient] = np.random.normal(-1, 0.35, 1) #down for patient in range(m): for gene in range(genes1 + genes2): prob = np.random.uniform(0, 1) if prob > 0.9: ge[gene, patient] = np.random.normal(0, 1, 1) for gene in range(genes1 + genes2, n): prob = np.random.uniform(0, 1) if prob < 0.05: for patient in range(m): if groups_p[patient] == 1: #all up ge[gene, patient] = np.random.normal(0.3, 0.35, 1) else: ge[gene, patient] = np.random.normal(-0.3, 0.35, 1) if prob > 0.05 and prob < 0.1: for patient in range(m): if groups_p[patient] == 1: #all up ge[gene, patient] = np.random.normal(-0.3, 0.35, 1) else: ge[gene, patient] = np.random.normal(0.3, 0.35, 1) g1 = nx.barabasi_albert_graph(genes1, 1) g2 = nx.barabasi_albert_graph(genes2, 1) g3 = nx.barabasi_albert_graph(background, 1) G = nx.disjoint_union(g1, g2) G = nx.disjoint_union(G, g3) for _ in range(int(dens * n)): node1 = np.random.randint(0, genes1) node2 = np.random.randint(genes1, genes1 + genes2) node3_1 = np.random.randint(genes1 + genes2, n) node3_2 = np.random.randint(genes1 + genes2, n) G.add_edges_from([(node1, node3_1), (node2, node3_2)]) d = nx.density(G) count = 0 while d > 0.002 and count < 10: node3_1 = np.random.randint(genes1 + genes2, n) node3_2 = np.random.randint(genes1 + genes2, n) count = count + 1 if G.has_edge(node3_1, node3_2): G.remove_edge(node3_1, node3_2) d = nx.density(G) #A_g = nx.adj_matrix(G).todense() *1 b_sp = csr_matrix(b) #sparse matrix for making bipartite graph B = bipartite.from_biadjacency_matrix(b_sp) GE = pd.DataFrame(ge, index=np.arange(n), columns=np.arange(n, n + m)) H = HI_big(GE, 1, 1, 1) return (B, GE, G, H, d, n, m)
def aco_preprocessing_strings(expr_str, ppi_str, col, log2, gene_list=None, size=None, sample=None): # path_expr - path for gene expression # path_ppi - path for ppi # col - split variable name (ONLY TWO CLASSES) # log2 - log2 transform #gene_list - preselected genes (if any) #size - if genes are not preselected specify size of the gene set for standard deviation selection # sample = None - all patients, otherwise specify fraction of patients taken EXPRDATA = StringIO(expr_str) expr = pd.read_csv(EXPRDATA, sep="\t") expr = expr.set_index("Unnamed: 0") #TODO: check if column 'prognosis' or 'cancer type' exists, set column based on this info if ('cancer_type' in list(expr)): col = 'cancer_type' else: col = 'prognosis' val1, val2 = list(set(expr[col])) group1_true = list(expr[expr[col] == val1].index) group2_true = list(expr[expr[col] == val2].index) patients_new = group1_true + group2_true if sample != None: idx = list(expr.index) new_idx = np.random.choice(idx, int(sample * len(idx)), False) expr = expr.loc[new_idx] group1_true = list(expr[expr[col] == val1].index) group2_true = list(expr[expr[col] == val2].index) patients_new = group1_true + group2_true expr = expr.loc[patients_new] PPIDATA = StringIO(ppi_str) net = pd.read_csv(PPIDATA, sep="\t", header=None) nodes_ppi = set(net[0]).union(set(net[1])) genes_ge = list(set(expr.columns) - set([col])) new_genes = [int(x) for x in genes_ge] intersec_genes = set.intersection(set(new_genes), set(nodes_ppi)) genes_for_expr = [str(x) for x in list(intersec_genes)] expr = expr[genes_for_expr] #20188 genes if log2: expr = np.log2(expr) z_scores = stats.zscore(expr) z_scores = pd.DataFrame(z_scores, columns=expr.columns, index=expr.index) if gene_list != None and size == None: # gene list is given new_genes = [str(gene) for gene in gene_list] elif gene_list == None and size != None: #std selection std_genes = expr[genes_for_expr].std() std_genes, genes_for_expr = zip( *sorted(zip(std_genes, genes_for_expr))) genes_for_expr = genes_for_expr[len(std_genes) - size:] new_genes = list(genes_for_expr) elif gene_list == None and size == None: #all genes new_genes = genes_for_expr else: print( "please specify gene selection method: predifined list, standart deviation filtering or none of them" ) return () expr = expr[new_genes] z_scores = z_scores[new_genes].values labels_B = dict() rev_labels_B = dict() node = 0 #nodes = set(deg_nodes + genes_aco) for g in new_genes: labels_B[node] = g rev_labels_B[g] = node node = node + 1 for p in patients_new: labels_B[node] = p rev_labels_B[p] = node node = node + 1 #scaler = preprocessing.MinMaxScaler(feature_range=(0, 1)) #sim = scaler.fit_transform(expr) data_aco = pd.DataFrame(z_scores, columns=new_genes, index=patients_new) data_aco = data_aco.T n, m = data_aco.shape GE = pd.DataFrame(data_aco.values, index=np.arange(n), columns=np.arange(n, n + m)) t = 2 b = np.matrix(data_aco > t) b_sp = csr_matrix(b) B = bipartite.from_biadjacency_matrix(b_sp) G = nx.Graph() G.add_nodes_from(np.arange(n)) for row in net.itertuples(): node1 = str(row[1]) node2 = str(row[2]) if node1 in set(new_genes) and node2 in set(new_genes): G.add_edge(rev_labels_B[node1], rev_labels_B[node2]) A_new = nx.adj_matrix(G).todense() H = HI_big(data_aco, gtg_weight=1, gtp_weight=1, ptp_weight=1) group1_true_ids = [rev_labels_B[x] for x in group1_true] group2_true_ids = [rev_labels_B[x] for x in group2_true] #print(group1_true + "babaaba") return B, G, H, n, m, GE, A_new, group1_true_ids, group2_true_ids, labels_B, rev_labels_B, val1, val2, group1_true, group2_true
if tempCol[j] > r: closeSegments = 2 * closeSegments elif tempCol[j] < (-r): closeSegments = 2 * closeSegments + 1 else: closeSegments = np.concatenate( (2 * closeSegments, 2 * closeSegments + 1)) #closeTargets = np.reshape(targetPoints[closeSegments], (closeSegments.shape[0] * k, d)) for j in range(closeSegments.shape[0]): norms = np.linalg.norm(targetPoints[closeSegments[j]] - latentPoints[i], axis=-1) smallNorms = norms < r data = norms[smallNorms] rows = np.repeat(i, data.shape[0]) cols = np.arange(k)[smallNorms] + (closeSegments[j] * k) B = B + coo_matrix( (data, (rows, cols)), shape=(n, (2**d) * k), dtype=np.float16) #closeTargets = targetPoints[closeSegments] #norms = np.linalg.norm(closeTargets - latentPoints[i], axis=-1) #for j in range(containments.shape[0]): # closeLatentPoints[containments[j]].extend([i]) #create the biadjacency sparse matrix of the desired bipartate graph #for i in range(n): # segmentPointIndices = np.asarray(closeLatentPoints[i], dtype=int) # segmentPoints = latentPoints[segmentPointIndices] G = bipartite.from_biadjacency_matrix(B)
pass else: adjmatrix[i][x-1]=1 counter+=1 counter import networkx as nx # define the graph from networkx.algorithms import bipartite G=nx.Graph() import numpy, scipy.sparse # define the adj. matrix as scipy matrix for input graph A = numpy.array(adjmatrix) Asp = scipy.sparse.csr_matrix(A) from networkx.algorithms.bipartite import from_biadjacency_matrix G = from_biadjacency_matrix(Asp, create_using=None, edge_attribute=None)#scipy sparse matrix X, Y = bipartite.sets(G) XX = list(X) YY = list(Y) import matplotlib.pyplot as plt import numpy as np nx.draw(G) plt.savefig("simple_path.png") # save as png plt.show() # display # Define I_tem dictionary tem = {} for i in range(1, aMAXX+1): for j in range(1, bMAXX+1):
def test_from_biadjacency_multigraph(self): M = sparse.csc_matrix([[1,2],[0,3]]) B = bipartite.from_biadjacency_matrix(M, create_using=nx.MultiGraph()) assert_edges_equal(B.edges(),[(0,2),(0,3),(0,3),(1,3),(1,3),(1,3)])
def test_from_biadjacency_roundtrip(self): B1 = nx.path_graph(5) M = bipartite.biadjacency_matrix(B1, [0,2,4]) B2 = bipartite.from_biadjacency_matrix(M) assert_true(nx.is_isomorphic(B1,B2))