def run(self): """ 09-05-05 Watch: when sending via MPI, tag 0 means from node 0, tag 1 means goes to the last node. 10-21-05 replace output_node() with the one from codense.common for better scheduling --fill_edge2encodedOccurrence() --input_node() --get_cluster_block() --computing_node() --node_fire() --output_node() --output_cluster() --uniqueSort() """ communicator = MPI.world.duplicate() node_rank = communicator.rank intermediateFile = "%s.unsorted" % self.outputfile # intermediateFile to store concatenated results if communicator.rank == (communicator.size - 1): edge2encodedOccurrence = {} no_of_datasets = self.fill_edge2encodedOccurrence( self.hostname, self.dbname, self.schema, edge2encodedOccurrence, self.min_sup, self.max_sup ) mpi_synchronize(communicator) if node_rank == 0: self.input_node( communicator, self.inputfile, self.min_size, self.cluster_block_size, self.cluster_block_edges ) elif node_rank <= communicator.size - 2: # exclude the last node self.computing_node(communicator, self.cluster_block_size, self.min_size, self.min_con) elif node_rank == communicator.size - 1: codense2db_instance = codense2db() free_computing_nodes = range(1, communicator.size - 1) writer = csv.writer(open(intermediateFile, "w"), delimiter="\t") parameter_list = [writer, codense2db_instance, edge2encodedOccurrence, no_of_datasets] output_node( communicator, free_computing_nodes, parameter_list, self.output_cluster, report=self.report, type=Numeric.Int, ) del writer # 10-21-05self.output_node(communicator, intermediateFile, codense2db_instance, edge2encodedOccurrence, no_of_datasets) mpi_synchronize(communicator) # collecting if node_rank == 0: MpiFromDatasetSignatureToPattern_instance = MpiFromDatasetSignatureToPattern() MpiFromDatasetSignatureToPattern_instance.uniqueSort(intermediateFile, self.outputfile)
def run(self): """ 10-07-05 10-09-05 input_node() add mcl_table 10-24-05 create new views for splat_table and mcl_table 10-28-05 no views, no new pattern_table, read from inputfile, write to outputfile 01-24-06 copy a whole block from MpiFromDatasetSignatureToPattern.py to read in edge sig matrix (rank==0) --get_no_of_datasets() --sendEdgeSigMatrix() elif free_computing_nodes: --PostFim() --receiveEdgeSigMatrix() mpi_synchronize() --input_node() --input_handler() --computing_node() --node_fire() --cleanup_handler() --output_node() --output_handler() """ communicator = MPI.world.duplicate() node_rank = communicator.rank free_computing_nodes = range(1,communicator.size-1) #exclude the last node #01-24-06 following block is directly copied from MpiFromDatasetSignatureToPattern.py block_size = 10000 MpiFromDatasetSignatureToPattern_instance = MpiFromDatasetSignatureToPattern() if communicator.rank == 0: no_of_datasets = MpiFromDatasetSignatureToPattern_instance.get_no_of_datasets(self.sig_vector_fname) #no_of_datasets is used in fillEdgeSigMatrix() and patternFormation() for node in free_computing_nodes: communicator.send(str(no_of_datasets), node, 0) MpiFromDatasetSignatureToPattern_instance.sendEdgeSigMatrix(communicator, free_computing_nodes, self.sig_vector_fname, \ no_of_datasets, self.min_sup, self.max_sup, block_size) elif communicator.rank in free_computing_nodes: data, source, tag = communicator.receiveString(0, 0) no_of_datasets = int(data) #take the data j_instance = johnson_sp(no_of_datasets) MpiFromDatasetSignatureToPattern_instance.receiveEdgeSigMatrix(communicator, j_instance, no_of_datasets, block_size) mpi_synchronize(communicator) if node_rank == 0: inf = csv.reader(open(self.inputfile,'r'), delimiter='\t') parameter_list = [inf] input_node(communicator, parameter_list, free_computing_nodes, self.size, self.report, input_handler=self.input_handler) del inf elif node_rank in free_computing_nodes: #exclude the last node parameter_list = [j_instance, self.parser_type] computing_node(communicator, parameter_list, self.node_fire, self.cleanup_handler, self.report) elif node_rank==communicator.size-1: writer = csv.writer(open(self.outputfile, 'w'), delimiter='\t') parameter_list = [writer] output_node(communicator, free_computing_nodes, parameter_list, self.output_handler, self.report) del writer