Beispiel #1
0
	def run(self):
		"""
		06-03-05
			
			--<get_edge_data>
			
			--mpi_schedule_jobs()
				--callTightClust()
					--<PreprocessEdgeData>
					--tightClust
			--<netmine_wrapper>
			
		"""
		communicator = MPI.world.duplicate()
		get_edge_data_instance = get_edge_data(self.hostname, self.dbname, self.schema,\
				self.table, self.output_dir, self.min_no_of_edges, self.debug, self.no_of_nas)
		
		if communicator.rank == 0:
			sys.stderr.write("this is node %s\n"%communicator.rank)
			get_edge_data_instance.run()
		
		mpi_synchronize(communicator)
		
		job_list = get_edge_data_instance.go_no_qualified
		parameter_list =[self.output_dir, self.no_of_nas, self.top_percentage, self.targetClustNum, \
			self.min_k, self.max_k, self.alpha, self.beta, self.topNum, self.seqNum, self.resampNum,\
			self.subSampPercent, self.npass]
		if self.debug:
			sys.stderr.write("The common parameter_list is %s.\n"%repr(parameter_list))
		of_name_list = mpi_schedule_jobs(communicator, job_list, callTightClust, parameter_list, self.debug)
		
		mpi_synchronize(communicator)
		
		#collecting
		if communicator.rank==0:
			final_ofname = os.path.join(self.output_dir, 'tightClust')
			netmine_wrapper_instance = netmine_wrapper()
			netmine_wrapper_instance.collect_and_merge_output(of_name_list, final_ofname)
Beispiel #2
0
	def run(self):
		"""
		08-14-05
		"""
		communicator = MPI.world.duplicate()
		fake_no_of_nodes = int((communicator.size-1)*times_nodes)	#NOTICE: fake_no_of_nodes is used to enlarge(or shrink) the actual number of nodes,
			#to balance the amount of work on each node
		OffsetLimitList = Numeric.zeros((fake_no_of_nodes,2), Numeric.Int)
		if communicator.rank == 0:
			(conn, curs) = db_connect(self.hostname, self.dbname, self.schema)
			OffsetLimitList = self.createOffsetLimitList(curs, self.source_table, fake_no_of_nodes)
			OffsetLimitList = Numeric.array(OffsetLimitList, Numeric.Int)	#transform it into Numeric array to broadcast()
			if self.commit:	#08-14-05	create the gene_table
				instance = gene_stat()
				instance.createGeneTable(curs, self.gene_table)
				curs.execute('end')
			if self.debug:
				sys.stderr.write("OffsetLimitList: %s"%repr(OffsetLimitList))
			del conn, curs
		
		communicator.broadcast(OffsetLimitList, 0)	#share the OffsetLimitList
		
		mpi_synchronize(communicator)
		job_list = range(len(OffsetLimitList))	#corresponding to the indices in the OffsetLimitList
		parameter_list =[self.hostname, self.dbname, self.schema, self.source_table, self.output, \
			self.gene_table, self.commit, OffsetLimitList, self.debug]
		if self.debug:
			sys.stderr.write("The common parameter_list is %s.\n"%repr(parameter_list))
		of_name_list = mpi_schedule_jobs(communicator, job_list, node_cluster_stat, parameter_list, self.debug)
		
		mpi_synchronize(communicator)
		
		#collecting 08-14-05 not really necessary, but just to make the number of files small
		if communicator.rank==0:
			netmine_wrapper_instance = netmine_wrapper()
			netmine_wrapper_instance.collect_and_merge_output(of_name_list, self.output)
	def run(self):
		"""
		08-06-05
		08-24-05
			read all edge data into matrix
		08-31-05
			the integer returned by encodeOccurrenceBv() could be 138-bit(human no_of_datasets)
			And Numeric.Int is only 32 bit. So Change edge_sig_matrix format.
		12-31-05
			no database connection any more
			2 threads on computing node
		01-08-06
			no threads
			back to edge_sig_matrix
		01-11-06
			use the cc module, PostFim
		01-15-06
			add min_line_number and max_line_number
			
			(rank==0)
				--get_no_of_datasets()
				--sendEdgeSigMatrix()
			elif free_computing_nodes:
				--PostFim()
				--receiveEdgeSigMatrix()
			
			--mpi_synchronize()
			
			(rank==0)
				--input_node()
					--input_handler()
			elif free_computing_nodes:
				--computing_node()
					--computing_node_handler()
			else:
				--output_node()
					--output_node_handler()
			
			--mpi_synchronize()
			
			(rank==0)
				--receive node_outputfile
				--netmine_wrapper()
				--collect_and_merge_output()
				--uniqueSort()
			else:
				--return node_outputfile
			
		"""
		communicator = MPI.world.duplicate()
		free_computing_nodes = range(1,communicator.size-1)	#exclude the 1st and last node
		block_size = 10000
		if communicator.rank == 0:
			no_of_datasets = self.get_no_of_datasets(self.sig_vector_fname)
				#no_of_datasets is used in fillEdgeSigMatrix() and patternFormation()
			for node in free_computing_nodes:
				communicator.send(str(no_of_datasets), node, 0)
			self.sendEdgeSigMatrix(communicator, free_computing_nodes, self.sig_vector_fname, \
				no_of_datasets, self.min_sup, self.max_sup, block_size=10000)
		elif communicator.rank in free_computing_nodes:
			data, source, tag = communicator.receiveString(0, 0)
			no_of_datasets = int(data)	#take the data
			offset = communicator.rank - 1
			node_outputfile = '%s.%s'%(self.outputfile, offset)
			PostFim_instance = PostFim(self.no_cc, no_of_datasets, self.min_cluster_size, node_outputfile)
			self.receiveEdgeSigMatrix(communicator, PostFim_instance, no_of_datasets, block_size)
		
		mpi_synchronize(communicator)
		
		if communicator.rank == 0:
			reader = csv.reader(open(self.inputfile, 'r'), delimiter=' ')
			parameter_list = [reader, self.min_line_number, self.max_line_number]	#01-15-06
			self.line_number = 0	#01-15-06	used in input_handler()
			input_node(communicator, parameter_list, free_computing_nodes, self.queue_size, \
				self.report, input_handler=self.input_handler)
			del reader
		elif communicator.rank in free_computing_nodes:
			parameter_list = [PostFim_instance]
			computing_node(communicator, parameter_list, self.computing_node_handler, report=self.report)
		else:
			parameter_list = []
			output_node(communicator, free_computing_nodes, parameter_list, self.output_node_handler, self.report)
			
		mpi_synchronize(communicator)
		
		if communicator.rank == 0:
			#12-31-05 wait until of_name_list is full
			of_name_list = []
			while len(of_name_list)<len(free_computing_nodes):
				data, source, tag = communicator.receiveString(None, 1)
				of_name_list.append(data)
			#collecting
			intermediateFile = '%s.unsorted'%self.outputfile	#intermediateFile to store concatenated results
			netmine_wrapper_instance = netmine_wrapper()
			netmine_wrapper_instance.collect_and_merge_output(of_name_list, intermediateFile)
			self.uniqueSort(intermediateFile, self.outputfile, self.tmpdir)
		elif communicator.rank in free_computing_nodes:
			communicator.send(node_outputfile, 0, 1)	#send back the outputfile