def run(self): """ 06-03-05 --<get_edge_data> --mpi_schedule_jobs() --callTightClust() --<PreprocessEdgeData> --tightClust --<netmine_wrapper> """ communicator = MPI.world.duplicate() get_edge_data_instance = get_edge_data(self.hostname, self.dbname, self.schema,\ self.table, self.output_dir, self.min_no_of_edges, self.debug, self.no_of_nas) if communicator.rank == 0: sys.stderr.write("this is node %s\n"%communicator.rank) get_edge_data_instance.run() mpi_synchronize(communicator) job_list = get_edge_data_instance.go_no_qualified parameter_list =[self.output_dir, self.no_of_nas, self.top_percentage, self.targetClustNum, \ self.min_k, self.max_k, self.alpha, self.beta, self.topNum, self.seqNum, self.resampNum,\ self.subSampPercent, self.npass] if self.debug: sys.stderr.write("The common parameter_list is %s.\n"%repr(parameter_list)) of_name_list = mpi_schedule_jobs(communicator, job_list, callTightClust, parameter_list, self.debug) mpi_synchronize(communicator) #collecting if communicator.rank==0: final_ofname = os.path.join(self.output_dir, 'tightClust') netmine_wrapper_instance = netmine_wrapper() netmine_wrapper_instance.collect_and_merge_output(of_name_list, final_ofname)
def run(self): """ 08-14-05 """ communicator = MPI.world.duplicate() fake_no_of_nodes = int((communicator.size-1)*times_nodes) #NOTICE: fake_no_of_nodes is used to enlarge(or shrink) the actual number of nodes, #to balance the amount of work on each node OffsetLimitList = Numeric.zeros((fake_no_of_nodes,2), Numeric.Int) if communicator.rank == 0: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) OffsetLimitList = self.createOffsetLimitList(curs, self.source_table, fake_no_of_nodes) OffsetLimitList = Numeric.array(OffsetLimitList, Numeric.Int) #transform it into Numeric array to broadcast() if self.commit: #08-14-05 create the gene_table instance = gene_stat() instance.createGeneTable(curs, self.gene_table) curs.execute('end') if self.debug: sys.stderr.write("OffsetLimitList: %s"%repr(OffsetLimitList)) del conn, curs communicator.broadcast(OffsetLimitList, 0) #share the OffsetLimitList mpi_synchronize(communicator) job_list = range(len(OffsetLimitList)) #corresponding to the indices in the OffsetLimitList parameter_list =[self.hostname, self.dbname, self.schema, self.source_table, self.output, \ self.gene_table, self.commit, OffsetLimitList, self.debug] if self.debug: sys.stderr.write("The common parameter_list is %s.\n"%repr(parameter_list)) of_name_list = mpi_schedule_jobs(communicator, job_list, node_cluster_stat, parameter_list, self.debug) mpi_synchronize(communicator) #collecting 08-14-05 not really necessary, but just to make the number of files small if communicator.rank==0: netmine_wrapper_instance = netmine_wrapper() netmine_wrapper_instance.collect_and_merge_output(of_name_list, self.output)
def run(self): """ 08-06-05 08-24-05 read all edge data into matrix 08-31-05 the integer returned by encodeOccurrenceBv() could be 138-bit(human no_of_datasets) And Numeric.Int is only 32 bit. So Change edge_sig_matrix format. 12-31-05 no database connection any more 2 threads on computing node 01-08-06 no threads back to edge_sig_matrix 01-11-06 use the cc module, PostFim 01-15-06 add min_line_number and max_line_number (rank==0) --get_no_of_datasets() --sendEdgeSigMatrix() elif free_computing_nodes: --PostFim() --receiveEdgeSigMatrix() --mpi_synchronize() (rank==0) --input_node() --input_handler() elif free_computing_nodes: --computing_node() --computing_node_handler() else: --output_node() --output_node_handler() --mpi_synchronize() (rank==0) --receive node_outputfile --netmine_wrapper() --collect_and_merge_output() --uniqueSort() else: --return node_outputfile """ communicator = MPI.world.duplicate() free_computing_nodes = range(1,communicator.size-1) #exclude the 1st and last node block_size = 10000 if communicator.rank == 0: no_of_datasets = self.get_no_of_datasets(self.sig_vector_fname) #no_of_datasets is used in fillEdgeSigMatrix() and patternFormation() for node in free_computing_nodes: communicator.send(str(no_of_datasets), node, 0) self.sendEdgeSigMatrix(communicator, free_computing_nodes, self.sig_vector_fname, \ no_of_datasets, self.min_sup, self.max_sup, block_size=10000) elif communicator.rank in free_computing_nodes: data, source, tag = communicator.receiveString(0, 0) no_of_datasets = int(data) #take the data offset = communicator.rank - 1 node_outputfile = '%s.%s'%(self.outputfile, offset) PostFim_instance = PostFim(self.no_cc, no_of_datasets, self.min_cluster_size, node_outputfile) self.receiveEdgeSigMatrix(communicator, PostFim_instance, no_of_datasets, block_size) mpi_synchronize(communicator) if communicator.rank == 0: reader = csv.reader(open(self.inputfile, 'r'), delimiter=' ') parameter_list = [reader, self.min_line_number, self.max_line_number] #01-15-06 self.line_number = 0 #01-15-06 used in input_handler() input_node(communicator, parameter_list, free_computing_nodes, self.queue_size, \ self.report, input_handler=self.input_handler) del reader elif communicator.rank in free_computing_nodes: parameter_list = [PostFim_instance] computing_node(communicator, parameter_list, self.computing_node_handler, report=self.report) else: parameter_list = [] output_node(communicator, free_computing_nodes, parameter_list, self.output_node_handler, self.report) mpi_synchronize(communicator) if communicator.rank == 0: #12-31-05 wait until of_name_list is full of_name_list = [] while len(of_name_list)<len(free_computing_nodes): data, source, tag = communicator.receiveString(None, 1) of_name_list.append(data) #collecting intermediateFile = '%s.unsorted'%self.outputfile #intermediateFile to store concatenated results netmine_wrapper_instance = netmine_wrapper() netmine_wrapper_instance.collect_and_merge_output(of_name_list, intermediateFile) self.uniqueSort(intermediateFile, self.outputfile, self.tmpdir) elif communicator.rank in free_computing_nodes: communicator.send(node_outputfile, 0, 1) #send back the outputfile