コード例 #1
0
 def test_from_biadjacency_weight(self):
     M = sparse.csc_matrix([[1,2],[0,3]])
     B = bipartite.from_biadjacency_matrix(M)
     assert_edges_equal(B.edges(),[(0,2),(0,3),(1,3)])
     B = bipartite.from_biadjacency_matrix(M, edge_attribute='weight')
     e = [(0,2,{'weight':1}),(0,3,{'weight':2}),(1,3,{'weight':3})]
     assert_edges_equal(B.edges(data=True),e)
コード例 #2
0
ファイル: test_matrix.py プロジェクト: 4c656554/networkx
 def test_from_biadjacency_weight(self):
     M = sparse.csc_matrix([[1,2],[0,3]])
     B = bipartite.from_biadjacency_matrix(M)
     assert_edges_equal(B.edges(),[(0,2),(0,3),(1,3)])
     B = bipartite.from_biadjacency_matrix(M, edge_attribute='weight')
     e = [(0,2,{'weight':1}),(0,3,{'weight':2}),(1,3,{'weight':3})]
     assert_edges_equal(B.edges(data=True),e)
def load(path):

    biadjacency = sparse.load_npz(path)
    bipartite = nxb.from_biadjacency_matrix(biadjacency)

    print(f'{bipartite.number_of_edges():,} edges in the bipartite graph')
    print(f'connected: {nx.is_connected(bipartite)}')

    # nx.write_graphml(bipartite, 's2_2_bipartite_graph/paper_author.graphml')

    return bipartite
コード例 #4
0
ファイル: draw.py プロジェクト: retagnn/RetaGNN
def Draw_Bipartite_Graph(user, seq_item, attn_value, name, save=True):

    if True:
        fig = plt.figure(name)
        entity2ids, index_user, index_item = dict(), 0, 0
        for i in range(len(user)):
            if user[i] not in entity2ids:
                entity2ids[user[i]] = index_user
                index_user += 1
        for i in range(len(seq_item)):
            if seq_item[i] not in entity2ids:
                entity2ids[seq_item[i]] = index_item
                index_item += 1

        row = [entity2ids[user[i]] for i in range(len(user))]
        col = [entity2ids[seq_item[i]] for i in range(len(seq_item))]
        X_name = [i for i in range(len(set(row)))]
        Y_name = [i + len(set(row)) for i in range(len(set(col)))]
        a_matrix = coo_matrix((attn_value, (row, col))).toarray()
        a_matrix = coo_matrix(a_matrix)
        G = bipartite.from_biadjacency_matrix(a_matrix,
                                              create_using=None,
                                              edge_attribute='weight')
        pos = dict()
        Y_len = int((len(Y_name) - 1) * 10)
        X_unit_len = int(Y_len / (len(X_name) + 1))
        pos.update(
            (n, (0, (i + 1) * X_unit_len)) for i, n in enumerate(X_name))
        pos.update((n, (0.5, i * 10)) for i, n in enumerate(Y_name))

        num_edges = G.number_of_edges()
        num_nodes = G.number_of_nodes()
        color_map = []
        for node in G:
            if node < len(set(user)):
                color_map.append('xkcd:red')
            else:
                color_map.append('xkcd:blue')

        nx.draw(
            G,
            pos=pos,  #with_labels=True,
            edge_color=attn_value,
            edge_cmap=plt.get_cmap('rainbow'),
            node_color=color_map,
            cmap=plt.get_cmap('Reds'))
        plt.savefig('/home/hsucheng/DRS/code/RS_2/graph/draw_test-' +
                    str(name) + '.png')
        plt.close(name)
コード例 #5
0
def birkhoff_von_neumann(Y, tol=0.0001):
    if Y.shape[0] != Y.shape[1]:
        raise ValueError('Y.shape[0] != Y.shape[1]')
    if np.any(Y < -tol):
        raise ValueError('np.any(Y < -tol)')

    Y = np.where(Y < tol, 0, Y)

    m = Y.shape[0]

    lambdas = []
    perms = []

    residuals = Y > tol
    while np.any(residuals):

        adj = residuals.astype(int)
        adj = sparse.csr_matrix(adj)

        G = bp.from_biadjacency_matrix(adj)

        M = bp.maximum_matching(G)
        M_ = [(kk, v - m) for kk, v in M.items() if kk < m]

        if len(
                M_
        ) < m:  # this can happen due to numerical stability issues TODO add test
            break

        M_ = sorted(
            M_, key=itemgetter(0)
        )  # if tuples sorted by rows, then the columns are the permutation

        rows, columns = zip(*M_)
        perm = np.array(columns)

        assert perm.shape == (m, )

        lambda_ = np.min(Y[rows, columns])

        P = np.zeros((m, m), dtype=float)
        P[rows, columns] = 1.

        lambdas.append(lambda_)
        perms.append(perm)
        Y -= lambda_ * P

        residuals = Y > tol
    return np.array(lambdas), np.array(perms)
コード例 #6
0
def matching(idxA, idxB):
    labelsA = np.unique(idxA)
    m = len(labelsA)
    labelsB = np.unique(idxB)
    n = len(labelsB)
    W = np.zeros((m, n))
    for j in range(n):
        for i in range(m):
            W[i, j] = -sum(idxA[idxB == labelsB[j]] == labelsA[i])
    G = bipartite.from_biadjacency_matrix(scipy.sparse.coo_matrix(W))
    top_nodes = {n for n, d in G.nodes(data=True) if d["bipartite"] == 0}
    match = bipartite.minimum_weight_full_matching(G, top_nodes)
    new_labels = dict()
    for i in range(n):
        new_labels[labelsB[i]] = labelsA[match[i+n]]
    matched = np.array([new_labels[idxB[i]] for i in range(len(idxB))])
    return matched
コード例 #7
0
def bipartite_graph_from_matrix(matrix,
                                labels1,
                                labels2,
                                threshold=0,
                                match=False):
    if match:
        matrix, permutation = sort_matrix(matrix)
        labels1 = [labels1[int(permutation[i])] for i in range(len(labels1))]

    label_dict = {i: l for i, l in enumerate(labels1 + labels2)}
    print("Topic labels:", label_dict)

    if not isinstance(matrix, np.ndarray):
        matrix = np.array(matrix)
    matrix[matrix < threshold] = 0
    sp_matrix = sparse.coo_matrix(matrix)
    g = bipartite.from_biadjacency_matrix(sp_matrix)
    nx.set_node_attributes(g, label_dict, name='label')
    return g
コード例 #8
0
ファイル: plot.py プロジェクト: INSCOLOMBlA/Simulaciones_UNAL
        else:
            numero += i

    f.close()
    #matrix.pop()
    matrix.pop()

    numpy_matrix = np.matrix(matrix)

    return numpy_matrix


matrix = leer_matrix("Code_CPP/matrix.csv")
adjacency = scipy.sparse.csc_matrix(matrix)

G = bi.from_biadjacency_matrix(adjacency)

infected = leer_matrix("Code_CPP/Datos/infectados.csv")

color = bi.color(G)
'''
pos=nx.spring_layout(G)
w = csv.writer(open("output.csv", "w"))
for key,val in pos.items():
    w.writerow([key,val[0],val[1]])
'''
pos = {}
for i in range(10):
    pos[i] = np.array([0, 1 - (0.2 * i)])
for i in range(9):
    pos[10 + i] = np.array([0.2, 0.9 - (0.2 * i)])
コード例 #9
0
ファイル: networkRobhoot3.py プロジェクト: RobhooX/Robhoot
#https://stackoverflow.com/questions/35392342/how-to-change-colours-of-nodes-and-edges-of-bipartite-graph-in-networkx

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms import bipartite
import scipy.sparse as sparse

a_matrix = sparse.rand(5, 10, format='coo', density=0.8)

G = bipartite.from_biadjacency_matrix(a_matrix,
                                      create_using=None,
                                      edge_attribute='weight')
X, Y = bipartite.sets(G)
pos = dict()
pos.update((n, (-1, i * 11)) for i, n in enumerate(X))
pos.update((n, (0.5, i * 5)) for i, n in enumerate(Y))
num_edges = G.number_of_edges()
num_nodes = G.number_of_nodes()
#nx.draw(G, pos=pos, with_labels=True,edge_color=np.random.random(num_edges), edge_cmap=plt.get_cmap('Blues'), node_color=np.random.random(num_nodes), cmap=plt.get_cmap('Reds'))
nx.draw(G,
        pos=pos,
        with_labels=True,
        edge_color=np.random.random(num_edges),
        edge_cmap=plt.get_cmap('Reds'),
        node_color=range(num_nodes),
        node_size=1400,
        cmap=plt.cm.Reds)

#C = nx.connected_component_subgraphs(G)
#for g in C:
コード例 #10
0
 def test_from_biadjacency_multigraph(self):
     M = sparse.csc_matrix([[1, 2], [0, 3]])
     B = bipartite.from_biadjacency_matrix(M, create_using=nx.MultiGraph())
     assert_edges_equal(B.edges(), [(0, 2), (0, 3), (0, 3), (1, 3), (1, 3),
                                    (1, 3)])
コード例 #11
0
 def test_from_biadjacency_roundtrip(self):
     B1 = nx.path_graph(5)
     M = bipartite.biadjacency_matrix(B1, [0, 2, 4])
     B2 = bipartite.from_biadjacency_matrix(M)
     assert_true(nx.is_isomorphic(B1, B2))
コード例 #12
0
def aco_preprocessing(path_expr,
                      path_ppi,
                      col,
                      val1,
                      val2,
                      patients_info,
                      log2,
                      gene_list=None,
                      size=None,
                      HI_calc="exact",
                      sample=None):
    #gene_list - preselected genes
    #th if genes are not preselected specify threshold for standard deviation selection
    # HI calculation is either exact or corelation based (for big datasets)
    expr = pd.read_csv(path_expr, sep="\t")
    expr = expr.set_index("Unnamed: 0")

    group1_true = list(expr[expr[col] == val1].index)
    group2_true = list(expr[expr[col] == val2].index)
    patients_new = group1_true + group2_true
    if sample != None:
        idx = list(expr.index)
        new_idx = np.random.choice(idx, int(sample * len(idx)), False)
        expr = expr.loc[new_idx]
        group1_true = list(expr[expr[col] == val1].index)
        group2_true = list(expr[expr[col] == val2].index)
        patients_new = group1_true + group2_true

    expr = expr.loc[patients_new]
    net = pd.read_csv(path_ppi, sep="\t", header=None)
    nodes_ppi = set(net[0]).union(set(net[1]))
    genes_ge = list(set(expr.columns) - set(patients_info))
    new_genes = [int(x) for x in genes_ge]
    intersec_genes = set.intersection(set(new_genes), set(nodes_ppi))
    genes_for_expr = [str(x) for x in list(intersec_genes)]
    expr = expr[genes_for_expr]
    #20188 genes
    if log2:
        expr = np.log2(expr)
    z_scores = stats.zscore(expr)
    z_scores = pd.DataFrame(z_scores, columns=expr.columns, index=expr.index)
    if gene_list != None and size == None:  # gene list is given
        new_genes = [str(gene) for gene in gene_list]

    elif gene_list == None and size != None:  #std selection
        std_genes = expr[genes_for_expr].std()
        std_genes, genes_for_expr = zip(
            *sorted(zip(std_genes, genes_for_expr)))
        genes_for_expr = genes_for_expr[len(std_genes) - size:]
        new_genes = list(genes_for_expr)
    elif gene_list == None and size == None:  #all genes
        new_genes = genes_for_expr
    else:
        print(
            "please specify gene selection method: predifined list, standart deviation filtering or none of them"
        )
        return ()

    expr = expr[new_genes]
    z_scores = z_scores[new_genes].values

    labels_B = dict()
    rev_labels_B = dict()
    node = 0
    #nodes = set(deg_nodes + genes_aco)
    for g in new_genes:
        labels_B[node] = g
        rev_labels_B[g] = node
        node = node + 1
    for p in patients_new:
        labels_B[node] = p
        rev_labels_B[p] = node
        node = node + 1

    #scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
    #sim = scaler.fit_transform(expr)
    data_aco = pd.DataFrame(z_scores, columns=new_genes, index=patients_new)
    data_aco = data_aco.T
    n, m = data_aco.shape

    GE = pd.DataFrame(data_aco.values,
                      index=np.arange(n),
                      columns=np.arange(n, n + m))
    t = 2
    b = np.matrix(data_aco > t)
    b_sp = csr_matrix(b)
    B = bipartite.from_biadjacency_matrix(b_sp)

    G = nx.Graph()
    G.add_nodes_from(np.arange(n))
    for row in net.itertuples():
        node1 = str(row[1])
        node2 = str(row[2])
        if node1 in set(new_genes) and node2 in set(new_genes):
            G.add_edge(rev_labels_B[node1], rev_labels_B[node2])
    A_new = nx.adj_matrix(G).todense()
    A_j = joined_net(B, G)
    if HI_calc == "exact":
        H = hi(A_j, n, m)
    if HI_calc == "corr":
        H = HI_big(data_aco, gtg_weight=1, gtp_weight=1, ptp_weight=1)

    group1_true_ids = [rev_labels_B[x] for x in group1_true]
    group2_true_ids = [rev_labels_B[x] for x in group2_true]

    return B, G, H, n, m, GE, A_new, group1_true_ids, group2_true_ids, labels_B, rev_labels_B
コード例 #13
0
def sim_data(genes1, genes2, background, patients1, patients2, dens):
    n = genes1 + genes2 + background
    m = patients1 + patients2

    genes = np.arange(n)
    groups_genes = list(np.ones(genes1)) + list(np.ones(genes2) * 2) + list(
        np.ones(background) * 3)
    groups_p = [1 if node < patients1 else 2 for node in range(m)]

    to_sparce = 0.3  #to sparcify bipartite
    to_mix = 0.99  # to mix edges berween groups
    b = np.zeros((n, m))
    ge = np.random.normal(0, 1, n * m).reshape(n, m)

    for patient in range(m):
        for gene in range(n):
            p_gr = groups_p[patient]
            g_gr = groups_genes[gene]
            if p_gr == 1 and g_gr == 1:  #all up
                ge[gene, patient] = np.random.normal(1, 0.35, 1)
            elif p_gr == 2 and g_gr == 2:
                ge[gene, patient] = np.random.normal(1, 0.35, 1)  #also up
            elif p_gr == 1 and g_gr == 2:
                ge[gene, patient] = np.random.normal(-1, 0.35, 1)  #down
            elif p_gr == 2 and g_gr == 1:
                ge[gene, patient] = np.random.normal(-1, 0.35, 1)  #down
    for patient in range(m):
        for gene in range(genes1 + genes2):
            prob = np.random.uniform(0, 1)
            if prob > 0.9:
                ge[gene, patient] = np.random.normal(0, 1, 1)

    for gene in range(genes1 + genes2, n):
        prob = np.random.uniform(0, 1)
        if prob < 0.05:

            for patient in range(m):
                if groups_p[patient] == 1:  #all up
                    ge[gene, patient] = np.random.normal(0.3, 0.35, 1)
                else:
                    ge[gene, patient] = np.random.normal(-0.3, 0.35, 1)
        if prob > 0.05 and prob < 0.1:
            for patient in range(m):
                if groups_p[patient] == 1:  #all up
                    ge[gene, patient] = np.random.normal(-0.3, 0.35, 1)
                else:
                    ge[gene, patient] = np.random.normal(0.3, 0.35, 1)

    g1 = nx.barabasi_albert_graph(genes1, 1)
    g2 = nx.barabasi_albert_graph(genes2, 1)
    g3 = nx.barabasi_albert_graph(background, 1)
    G = nx.disjoint_union(g1, g2)
    G = nx.disjoint_union(G, g3)
    for _ in range(int(dens * n)):
        node1 = np.random.randint(0, genes1)
        node2 = np.random.randint(genes1, genes1 + genes2)
        node3_1 = np.random.randint(genes1 + genes2, n)
        node3_2 = np.random.randint(genes1 + genes2, n)
        G.add_edges_from([(node1, node3_1), (node2, node3_2)])

    d = nx.density(G)
    count = 0
    while d > 0.002 and count < 10:

        node3_1 = np.random.randint(genes1 + genes2, n)
        node3_2 = np.random.randint(genes1 + genes2, n)
        count = count + 1
        if G.has_edge(node3_1, node3_2):
            G.remove_edge(node3_1, node3_2)
            d = nx.density(G)

    #A_g = nx.adj_matrix(G).todense() *1
    b_sp = csr_matrix(b)  #sparse matrix for making bipartite graph
    B = bipartite.from_biadjacency_matrix(b_sp)

    GE = pd.DataFrame(ge, index=np.arange(n), columns=np.arange(n, n + m))
    H = HI_big(GE, 1, 1, 1)
    return (B, GE, G, H, d, n, m)
コード例 #14
0
def aco_preprocessing_strings(expr_str,
                              ppi_str,
                              col,
                              log2,
                              gene_list=None,
                              size=None,
                              sample=None):
    # path_expr - path for gene expression
    # path_ppi - path for ppi
    # col - split variable name (ONLY TWO CLASSES)
    # log2 - log2 transform
    #gene_list - preselected genes (if any)
    #size -  if genes are not preselected specify size of the gene set  for standard deviation selection
    # sample = None - all patients, otherwise specify fraction of patients taken
    EXPRDATA = StringIO(expr_str)
    expr = pd.read_csv(EXPRDATA, sep="\t")
    expr = expr.set_index("Unnamed: 0")
    #TODO: check if column 'prognosis' or 'cancer type' exists, set column based on this info
    if ('cancer_type' in list(expr)):
        col = 'cancer_type'
    else:
        col = 'prognosis'
    val1, val2 = list(set(expr[col]))
    group1_true = list(expr[expr[col] == val1].index)
    group2_true = list(expr[expr[col] == val2].index)
    patients_new = group1_true + group2_true
    if sample != None:
        idx = list(expr.index)
        new_idx = np.random.choice(idx, int(sample * len(idx)), False)
        expr = expr.loc[new_idx]
        group1_true = list(expr[expr[col] == val1].index)
        group2_true = list(expr[expr[col] == val2].index)
        patients_new = group1_true + group2_true

    expr = expr.loc[patients_new]
    PPIDATA = StringIO(ppi_str)
    net = pd.read_csv(PPIDATA, sep="\t", header=None)
    nodes_ppi = set(net[0]).union(set(net[1]))
    genes_ge = list(set(expr.columns) - set([col]))
    new_genes = [int(x) for x in genes_ge]
    intersec_genes = set.intersection(set(new_genes), set(nodes_ppi))
    genes_for_expr = [str(x) for x in list(intersec_genes)]
    expr = expr[genes_for_expr]
    #20188 genes
    if log2:
        expr = np.log2(expr)
    z_scores = stats.zscore(expr)
    z_scores = pd.DataFrame(z_scores, columns=expr.columns, index=expr.index)
    if gene_list != None and size == None:  # gene list is given
        new_genes = [str(gene) for gene in gene_list]

    elif gene_list == None and size != None:  #std selection
        std_genes = expr[genes_for_expr].std()
        std_genes, genes_for_expr = zip(
            *sorted(zip(std_genes, genes_for_expr)))
        genes_for_expr = genes_for_expr[len(std_genes) - size:]
        new_genes = list(genes_for_expr)
    elif gene_list == None and size == None:  #all genes
        new_genes = genes_for_expr
    else:
        print(
            "please specify gene selection method: predifined list, standart deviation filtering or none of them"
        )
        return ()

    expr = expr[new_genes]
    z_scores = z_scores[new_genes].values

    labels_B = dict()
    rev_labels_B = dict()
    node = 0
    #nodes = set(deg_nodes + genes_aco)
    for g in new_genes:
        labels_B[node] = g
        rev_labels_B[g] = node
        node = node + 1
    for p in patients_new:
        labels_B[node] = p
        rev_labels_B[p] = node
        node = node + 1

    #scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
    #sim = scaler.fit_transform(expr)
    data_aco = pd.DataFrame(z_scores, columns=new_genes, index=patients_new)
    data_aco = data_aco.T
    n, m = data_aco.shape

    GE = pd.DataFrame(data_aco.values,
                      index=np.arange(n),
                      columns=np.arange(n, n + m))
    t = 2
    b = np.matrix(data_aco > t)
    b_sp = csr_matrix(b)
    B = bipartite.from_biadjacency_matrix(b_sp)

    G = nx.Graph()
    G.add_nodes_from(np.arange(n))
    for row in net.itertuples():
        node1 = str(row[1])
        node2 = str(row[2])
        if node1 in set(new_genes) and node2 in set(new_genes):
            G.add_edge(rev_labels_B[node1], rev_labels_B[node2])
    A_new = nx.adj_matrix(G).todense()

    H = HI_big(data_aco, gtg_weight=1, gtp_weight=1, ptp_weight=1)

    group1_true_ids = [rev_labels_B[x] for x in group1_true]
    group2_true_ids = [rev_labels_B[x] for x in group2_true]
    #print(group1_true + "babaaba")
    return B, G, H, n, m, GE, A_new, group1_true_ids, group2_true_ids, labels_B, rev_labels_B, val1, val2, group1_true, group2_true
コード例 #15
0
        if tempCol[j] > r:
            closeSegments = 2 * closeSegments
        elif tempCol[j] < (-r):
            closeSegments = 2 * closeSegments + 1
        else:
            closeSegments = np.concatenate(
                (2 * closeSegments, 2 * closeSegments + 1))
    #closeTargets = np.reshape(targetPoints[closeSegments], (closeSegments.shape[0] * k, d))
    for j in range(closeSegments.shape[0]):
        norms = np.linalg.norm(targetPoints[closeSegments[j]] -
                               latentPoints[i],
                               axis=-1)
        smallNorms = norms < r
        data = norms[smallNorms]
        rows = np.repeat(i, data.shape[0])
        cols = np.arange(k)[smallNorms] + (closeSegments[j] * k)
        B = B + coo_matrix(
            (data, (rows, cols)), shape=(n, (2**d) * k), dtype=np.float16)
    #closeTargets = targetPoints[closeSegments]
    #norms = np.linalg.norm(closeTargets - latentPoints[i], axis=-1)

    #for j in range(containments.shape[0]):
    #    closeLatentPoints[containments[j]].extend([i])

#create the biadjacency sparse matrix of the desired bipartate graph
#for i in range(n):
#    segmentPointIndices = np.asarray(closeLatentPoints[i], dtype=int)
#    segmentPoints = latentPoints[segmentPointIndices]

G = bipartite.from_biadjacency_matrix(B)
コード例 #16
0
            pass
        else:
            adjmatrix[i][x-1]=1
            counter+=1

counter
import networkx as nx # define the graph
from networkx.algorithms import bipartite
G=nx.Graph()

import numpy, scipy.sparse # define the adj. matrix as scipy matrix for input graph
A = numpy.array(adjmatrix)
Asp = scipy.sparse.csr_matrix(A)

from networkx.algorithms.bipartite import from_biadjacency_matrix
G = from_biadjacency_matrix(Asp, create_using=None, edge_attribute=None)#scipy sparse matrix
X, Y = bipartite.sets(G)
XX = list(X)
YY = list(Y)

import matplotlib.pyplot as plt
import numpy as np

nx.draw(G)
plt.savefig("simple_path.png") # save as png
plt.show() # display

# Define I_tem dictionary
tem = {}
for i in range(1, aMAXX+1): 
    for j in range(1, bMAXX+1):
コード例 #17
0
ファイル: test_matrix.py プロジェクト: 4c656554/networkx
 def test_from_biadjacency_multigraph(self):
     M = sparse.csc_matrix([[1,2],[0,3]])
     B = bipartite.from_biadjacency_matrix(M, create_using=nx.MultiGraph())
     assert_edges_equal(B.edges(),[(0,2),(0,3),(0,3),(1,3),(1,3),(1,3)])
コード例 #18
0
ファイル: test_matrix.py プロジェクト: 4c656554/networkx
 def test_from_biadjacency_roundtrip(self):
     B1 = nx.path_graph(5)
     M = bipartite.biadjacency_matrix(B1, [0,2,4])
     B2 = bipartite.from_biadjacency_matrix(M)
     assert_true(nx.is_isomorphic(B1,B2))