Esempio n. 1
0
def similarity_matrix(infmat, index2gene, gene2heat, directed=True):
    """Create and return a similarity matrix and index to gene mapping for the given influence
    matrix and heat. Only genes with heat that are in the network will be included in the returned
    similarity matrix and index to gene mapping.
    
    Arguments:
    infmat -- 2D ndarray representing the full influence matrix
    index2gene -- dict mapping an index in the matrix to the name of the gene represented at that
                  index in the influence matrix
    gene2heat -- dict mapping a gene name to the heat score for that gene
    directed -- if True, sim[i][j] = inf(i,j)*heat[i] and sim[i][j] != sim[j][i]
                if False, sim[i][j] = min(inf(i,j), inf(j,i))*max(heat(i), heat(j))
    
    """
    start_index = min(index2gene.keys())
    gene2index = dict((gene, index) for index, gene in index2gene.iteritems())
    
    # Identify genes in the given list that are also in the network
    genelist = sorted(set(gene2heat.keys()).intersection(gene2index.keys()))
    index2gene = dict(enumerate(genelist))
    print "\t- Genes in similarity matrix:", len(genelist)

    h = np.array([gene2heat[g] for g in genelist],dtype=np.float)
    
    if choice_creation_similarity_matrix == 1:
    
        if infmat.dtype != np.float:
            infmat = np.array(infmat,dtype=np.float)  
        indices = np.array([gene2index[g]-start_index+1 for g in genelist],dtype=np.int)  # Fortran is 1-indexed
        if directed:
            sim = fortran_routines.compute_sim(infmat, h, indices, np.shape(infmat)[0], np.shape(h)[0])
        else:
            sim = fortran_routines.compute_sim_classic(infmat, h, indices, np.shape(infmat)[0], np.shape(h)[0])
            
    elif choice_creation_similarity_matrix == 2:
    
        if infmat.dtype != np.float:
            infmat = np.array(infmat,dtype=np.float)
        indices = np.array([gene2index[g]-start_index for g in genelist],dtype=np.int)
        if directed:
            sim = c_routines.compute_sim(infmat, h, indices, np.shape(infmat)[0], np.shape(h)[0])
        else:
            sim = c_routines.compute_sim_classic(infmat, h, indices, np.shape(infmat)[0], np.shape(h)[0])      

    else:
    
        indices = [gene2index[g]-start_index for g in genelist]
        M = infmat[np.ix_(indices, indices)]
        if directed:
            sim = M * h
        else:
            M = np.minimum(M, M.transpose())  # Ensure that the influence matrix is symmetric
            sim = np.empty_like(M)
            for i in range(M.shape[0]):
                for j in range(i,M.shape[1]):
                    sim[i][j] = max(h[i], h[j]) * M[i][j]
                    sim[j][i] = sim[i][j]
     
    return sim, index2gene
def similarity_matrix(infmat, index2gene, gene2heat, directed=True, verbose=0):
    """Create and return a similarity matrix and index to gene mapping for the given influence
    matrix and heat. Only genes with heat that are in the network will be included in the returned
    similarity matrix and index to gene mapping.

    Arguments:
    infmat -- 2D ndarray representing the full influence matrix
    index2gene -- dict mapping an index in the matrix to the name of the gene represented at that
                  index in the influence matrix
    gene2heat -- dict mapping a gene name to the heat score for that gene
    directed -- if True, sim[i][j] = inf(i,j)*heat[i] and sim[i][j] != sim[j][i]
                if False, sim[i][j] = min(inf(i,j), inf(j,i))*max(heat(i), heat(j))

    """
    start_index = min(index2gene.keys())
    gene2index = dict((gene, index) for index, gene in index2gene.iteritems())

    # Identify genes in the given list that are also in the network
    genelist = sorted(set(gene2heat.keys()).intersection(gene2index.keys()))
    index2gene = dict(enumerate(genelist))
    if verbose > 4:
        print "\t- Genes in similarity matrix:", len(genelist)

    infmat = np.asarray(infmat, dtype=np.float64)
    h = np.array([gene2heat[g] for g in genelist], dtype=np.float64)
    indices = np.array([gene2index[g] - start_index for g in genelist],
                       dtype=np.int)
    m = np.shape(infmat)[0]
    n = np.shape(h)[0]

    if fast_similarity_matrix:
        if directed:
            sim = c_routines.compute_sim(infmat, h, indices, m, n)
        else:
            sim = c_routines.compute_sim_classic(infmat, h, indices, m, n)
    else:
        M = infmat[np.ix_(indices, indices)]
        if directed:
            sim = M * h
        else:
            M = np.minimum(
                M,
                M.transpose())  # Ensure that the influence matrix is symmetric
            sim = np.empty_like(M)
            for i in range(n):
                for j in range(i, n):
                    sim[i, j] = max(h[i], h[j]) * M[i, j]
                    sim[j, i] = sim[i, j]

    return sim, index2gene