Example #1
0
	def edge_construct(self):
		for i in range(self.no_of_genes):
			if self.no_of_cols - ma.sum(self.mask_array[i].mask()) <8:
				if self.debug:
					print 'jump_out level 0\t' + self.genelabels[i]
				continue
				#less than 8 valid data spots
			for j in range(i+1, self.no_of_genes):
				if self.no_of_cols - ma.sum(self.mask_array[j].mask()) <8:
					if self.debug:
						print 'jump_out level 1\t' + self.genelabels[j]
					continue
					#less than 8 valid data spots
				mask_tmp = ma.mask_or(self.mask_array[i].mask(), self.mask_array[j].mask())		#joint mask
				self.cor_vector = []			#initiliation
				for k in range(self.no_of_cols):
					new_mask = ma.mask_or(mask_tmp, self.mask_matrix[k])		#leave k out
					if self.no_of_cols - ma.sum(new_mask) <7:
						if self.debug:
							print 'jump_out level 2\t%s v.s %s at %d'%(self.genelabels[i], self.genelabels[j], k,)
						continue
						#less than 6, no correlation
					v1 = ma.array(self.mask_array[i], mask=new_mask).compressed().tolist()
					v2 = ma.array(self.mask_array[j], mask=new_mask).compressed().tolist()
					self.cor_vector.append( r.cor(v1,v2))
				if len(self.cor_vector) >0:
					min_cor = min(self.cor_vector)
					if min_cor >= 0.6:
						if self.debug:
							print 'cor vector of %s v.s. %s: %s'%(self.genelabels[i], self.genelabels[j],self.cor_vector,)
						self.graph_dict[(self.genelabels[i],self.genelabels[j])] = min_cor
Example #2
0
	def mask_array_construct(self):
		data = with_mode(0, r.read_table)(self.dataset_source, row_names=1)
		'''
		!Important!
		if the dataset_source has too few data, conversion from R to python will be a problem.
		The whole data matrix will be converted to a python string matrix.
		R's NA is not converted to nan in python.
		
		The problem has been found. 
		r.as_matrix converts small dataset to character type.
		r.matrix won't rig the class type, but it rigs the structure.
		The only to sovle this is add a colClasses vector to r.read_table.
		such as: colClasses=c('character',rep('double',11))
		But you have to know the no_of_cols in advance.
		
		As our dataset is really big, this problem hasn't appeared.
		
		'''
		#print r.as_matrix(data)
		array = ma.masked_inside(r.as_matrix(data),  -1.0e20, 1.0e20)
		#all are set to be masked except nan. weird! So have to do a converse.
		self.mask_array = ma.array(array, mask=ma.logical_not(ma.getmask(array)))
		self.genelabels = r.rownames(data)
		self.no_of_genes = len(self.genelabels)
		self.no_of_cols = len(array[0])
		self.mask_matrix=ma.identity(self.no_of_cols)
		del array ,data
Example #3
0
	def edge_construct(self):
		for i in range(self.no_of_genes):
			#after preprocessing, theses filters are of no use.
			'''
			if self.no_of_cols - ma.sum(self.mask_array[i].mask()) <self.gene_cut_off:
				if self.debug:
					sys.stderr.write( 'jump_out level 0\t' + self.genelabels[i])
				continue
				#less than 8 valid data spots
			'''
			for j in range(i+1, self.no_of_genes):
				'''
				if self.no_of_cols - ma.sum(self.mask_array[j].mask()) <self.gene_cut_off:
					if self.debug:
						sys.stderr.write(print 'jump_out level 1\t' + self.genelabels[j])
					continue
					#less than 8 valid data spots
				'''
				joint_mask = ma.mask_or(self.mask_array[i].mask(), self.mask_array[j].mask())		#joint mask
				self.cor_vector = []			#initilization
				nn_cor_vector = [] 			#non-negative version of co_vector
				for k in range(self.no_of_cols):
					new_mask = ma.mask_or(joint_mask, self.mask_matrix[k])		#leave k out
					if self.no_of_cols - ma.sum(new_mask) < self.jk_cor_cut_off:
						#if self.debug:
						#	sys.stderr.write( 'jump_out level 2\t%s v.s %s at %d\n'%(self.genelabels[i], self.genelabels[j], k,))
						continue
						#less than jk_cor_cut_off, no correlation
					v1 = ma.array(self.mask_array[i], mask=new_mask).compressed().tolist()
					v2 = ma.array(self.mask_array[j], mask=new_mask).compressed().tolist()
					cor = r.cor(v1,v2)
					self.cor_vector.append( cor)
					nn_cor_vector.append(math.fabs(cor))
					
					if self.no_of_cols-ma.sum(joint_mask) == self.jk_cor_cut_off:
						break
					#Only jk_cor_cut_off(7) valid quantities shared by two genes. 
					#All the leave-one-out cor's are same. You can only leave NA out.
					
				if len(self.cor_vector) >0:
					min_cor = min(nn_cor_vector)		#minimum in the non-negative version of cor_vector
					if min_cor >= self.cor_cut_off:
						if self.debug:
							sys.stderr.write('cor vector of %s v.s. %s: %s\n'%(self.genelabels[i], self.genelabels[j],self.cor_vector,))
						self.graph_dict[(self.genelabels[i],self.genelabels[j])] = self.cor_vector[nn_cor_vector.index(min_cor)]
Example #4
0
	def mask_array_construct(self):
		data =with_mode(0, r.read_table)(self.dataset_source)
		'''
		!Important!
		if the dataset_source has too few data, conversion from R to python will be a problem.
		The whole data matrix will be converted to a python string matrix.
		R's NA is not converted to nan in python.
		'''
		#print r.as_matrix(data)
		array = ma.masked_inside(r.as_matrix(data),  -1.0e20, 1.0e20)
		#all are set to be masked except nan. weird! So have to do a converse.
		self.mask_array = ma.array(array, mask=ma.logical_not(ma.getmask(array)))
		self.genelabels = r.rownames(data)
		self.no_of_genes = len(self.genelabels)
		self.no_of_cols = len(array[0])
		self.mask_matrix=ma.identity(self.no_of_cols)
		del array ,data