def edge_construct(self): for i in range(self.no_of_genes): if self.no_of_cols - ma.sum(self.mask_array[i].mask()) <8: if self.debug: print 'jump_out level 0\t' + self.genelabels[i] continue #less than 8 valid data spots for j in range(i+1, self.no_of_genes): if self.no_of_cols - ma.sum(self.mask_array[j].mask()) <8: if self.debug: print 'jump_out level 1\t' + self.genelabels[j] continue #less than 8 valid data spots mask_tmp = ma.mask_or(self.mask_array[i].mask(), self.mask_array[j].mask()) #joint mask self.cor_vector = [] #initiliation for k in range(self.no_of_cols): new_mask = ma.mask_or(mask_tmp, self.mask_matrix[k]) #leave k out if self.no_of_cols - ma.sum(new_mask) <7: if self.debug: print 'jump_out level 2\t%s v.s %s at %d'%(self.genelabels[i], self.genelabels[j], k,) continue #less than 6, no correlation v1 = ma.array(self.mask_array[i], mask=new_mask).compressed().tolist() v2 = ma.array(self.mask_array[j], mask=new_mask).compressed().tolist() self.cor_vector.append( r.cor(v1,v2)) if len(self.cor_vector) >0: min_cor = min(self.cor_vector) if min_cor >= 0.6: if self.debug: print 'cor vector of %s v.s. %s: %s'%(self.genelabels[i], self.genelabels[j],self.cor_vector,) self.graph_dict[(self.genelabels[i],self.genelabels[j])] = min_cor
def mask_array_construct(self): data = with_mode(0, r.read_table)(self.dataset_source, row_names=1) ''' !Important! if the dataset_source has too few data, conversion from R to python will be a problem. The whole data matrix will be converted to a python string matrix. R's NA is not converted to nan in python. The problem has been found. r.as_matrix converts small dataset to character type. r.matrix won't rig the class type, but it rigs the structure. The only to sovle this is add a colClasses vector to r.read_table. such as: colClasses=c('character',rep('double',11)) But you have to know the no_of_cols in advance. As our dataset is really big, this problem hasn't appeared. ''' #print r.as_matrix(data) array = ma.masked_inside(r.as_matrix(data), -1.0e20, 1.0e20) #all are set to be masked except nan. weird! So have to do a converse. self.mask_array = ma.array(array, mask=ma.logical_not(ma.getmask(array))) self.genelabels = r.rownames(data) self.no_of_genes = len(self.genelabels) self.no_of_cols = len(array[0]) self.mask_matrix=ma.identity(self.no_of_cols) del array ,data
def edge_construct(self): for i in range(self.no_of_genes): #after preprocessing, theses filters are of no use. ''' if self.no_of_cols - ma.sum(self.mask_array[i].mask()) <self.gene_cut_off: if self.debug: sys.stderr.write( 'jump_out level 0\t' + self.genelabels[i]) continue #less than 8 valid data spots ''' for j in range(i+1, self.no_of_genes): ''' if self.no_of_cols - ma.sum(self.mask_array[j].mask()) <self.gene_cut_off: if self.debug: sys.stderr.write(print 'jump_out level 1\t' + self.genelabels[j]) continue #less than 8 valid data spots ''' joint_mask = ma.mask_or(self.mask_array[i].mask(), self.mask_array[j].mask()) #joint mask self.cor_vector = [] #initilization nn_cor_vector = [] #non-negative version of co_vector for k in range(self.no_of_cols): new_mask = ma.mask_or(joint_mask, self.mask_matrix[k]) #leave k out if self.no_of_cols - ma.sum(new_mask) < self.jk_cor_cut_off: #if self.debug: # sys.stderr.write( 'jump_out level 2\t%s v.s %s at %d\n'%(self.genelabels[i], self.genelabels[j], k,)) continue #less than jk_cor_cut_off, no correlation v1 = ma.array(self.mask_array[i], mask=new_mask).compressed().tolist() v2 = ma.array(self.mask_array[j], mask=new_mask).compressed().tolist() cor = r.cor(v1,v2) self.cor_vector.append( cor) nn_cor_vector.append(math.fabs(cor)) if self.no_of_cols-ma.sum(joint_mask) == self.jk_cor_cut_off: break #Only jk_cor_cut_off(7) valid quantities shared by two genes. #All the leave-one-out cor's are same. You can only leave NA out. if len(self.cor_vector) >0: min_cor = min(nn_cor_vector) #minimum in the non-negative version of cor_vector if min_cor >= self.cor_cut_off: if self.debug: sys.stderr.write('cor vector of %s v.s. %s: %s\n'%(self.genelabels[i], self.genelabels[j],self.cor_vector,)) self.graph_dict[(self.genelabels[i],self.genelabels[j])] = self.cor_vector[nn_cor_vector.index(min_cor)]
def mask_array_construct(self): data =with_mode(0, r.read_table)(self.dataset_source) ''' !Important! if the dataset_source has too few data, conversion from R to python will be a problem. The whole data matrix will be converted to a python string matrix. R's NA is not converted to nan in python. ''' #print r.as_matrix(data) array = ma.masked_inside(r.as_matrix(data), -1.0e20, 1.0e20) #all are set to be masked except nan. weird! So have to do a converse. self.mask_array = ma.array(array, mask=ma.logical_not(ma.getmask(array))) self.genelabels = r.rownames(data) self.no_of_genes = len(self.genelabels) self.no_of_cols = len(array[0]) self.mask_matrix=ma.identity(self.no_of_cols) del array ,data