def similarity_matrix(infmat, index2gene, gene2heat, directed=True): """Create and return a similarity matrix and index to gene mapping for the given influence matrix and heat. Only genes with heat that are in the network will be included in the returned similarity matrix and index to gene mapping. Arguments: infmat -- 2D ndarray representing the full influence matrix index2gene -- dict mapping an index in the matrix to the name of the gene represented at that index in the influence matrix gene2heat -- dict mapping a gene name to the heat score for that gene directed -- if True, sim[i][j] = inf(i,j)*heat[i] and sim[i][j] != sim[j][i] if False, sim[i][j] = min(inf(i,j), inf(j,i))*max(heat(i), heat(j)) """ start_index = min(index2gene.keys()) gene2index = dict((gene, index) for index, gene in index2gene.iteritems()) # Identify genes in the given list that are also in the network genelist = sorted(set(gene2heat.keys()).intersection(gene2index.keys())) index2gene = dict(enumerate(genelist)) print "\t- Genes in similarity matrix:", len(genelist) h = np.array([gene2heat[g] for g in genelist],dtype=np.float) if choice_creation_similarity_matrix == 1: if infmat.dtype != np.float: infmat = np.array(infmat,dtype=np.float) indices = np.array([gene2index[g]-start_index+1 for g in genelist],dtype=np.int) # Fortran is 1-indexed if directed: sim = fortran_routines.compute_sim(infmat, h, indices, np.shape(infmat)[0], np.shape(h)[0]) else: sim = fortran_routines.compute_sim_classic(infmat, h, indices, np.shape(infmat)[0], np.shape(h)[0]) elif choice_creation_similarity_matrix == 2: if infmat.dtype != np.float: infmat = np.array(infmat,dtype=np.float) indices = np.array([gene2index[g]-start_index for g in genelist],dtype=np.int) if directed: sim = c_routines.compute_sim(infmat, h, indices, np.shape(infmat)[0], np.shape(h)[0]) else: sim = c_routines.compute_sim_classic(infmat, h, indices, np.shape(infmat)[0], np.shape(h)[0]) else: indices = [gene2index[g]-start_index for g in genelist] M = infmat[np.ix_(indices, indices)] if directed: sim = M * h else: M = np.minimum(M, M.transpose()) # Ensure that the influence matrix is symmetric sim = np.empty_like(M) for i in range(M.shape[0]): for j in range(i,M.shape[1]): sim[i][j] = max(h[i], h[j]) * M[i][j] sim[j][i] = sim[i][j] return sim, index2gene
def similarity_matrix(infmat, index2gene, gene2heat, directed=True, verbose=0): """Create and return a similarity matrix and index to gene mapping for the given influence matrix and heat. Only genes with heat that are in the network will be included in the returned similarity matrix and index to gene mapping. Arguments: infmat -- 2D ndarray representing the full influence matrix index2gene -- dict mapping an index in the matrix to the name of the gene represented at that index in the influence matrix gene2heat -- dict mapping a gene name to the heat score for that gene directed -- if True, sim[i][j] = inf(i,j)*heat[i] and sim[i][j] != sim[j][i] if False, sim[i][j] = min(inf(i,j), inf(j,i))*max(heat(i), heat(j)) """ start_index = min(index2gene.keys()) gene2index = dict((gene, index) for index, gene in index2gene.iteritems()) # Identify genes in the given list that are also in the network genelist = sorted(set(gene2heat.keys()).intersection(gene2index.keys())) index2gene = dict(enumerate(genelist)) if verbose > 4: print "\t- Genes in similarity matrix:", len(genelist) infmat = np.asarray(infmat, dtype=np.float64) h = np.array([gene2heat[g] for g in genelist], dtype=np.float64) indices = np.array([gene2index[g] - start_index for g in genelist], dtype=np.int) m = np.shape(infmat)[0] n = np.shape(h)[0] if fast_similarity_matrix: if directed: sim = c_routines.compute_sim(infmat, h, indices, m, n) else: sim = c_routines.compute_sim_classic(infmat, h, indices, m, n) else: M = infmat[np.ix_(indices, indices)] if directed: sim = M * h else: M = np.minimum( M, M.transpose()) # Ensure that the influence matrix is symmetric sim = np.empty_like(M) for i in range(n): for j in range(i, n): sim[i, j] = max(h[i], h[j]) * M[i, j] sim[j, i] = sim[i, j] return sim, index2gene