예제 #1
0
    def run(self):
        """
		09-05-05
			Watch: when sending via MPI, tag 0 means from node 0,  tag 1 means goes to the last node.
		10-21-05
			replace output_node() with the one from codense.common for better scheduling
			
			--fill_edge2encodedOccurrence()
			
			--input_node()
				--get_cluster_block()
			--computing_node()
				--node_fire()
			--output_node()
				--output_cluster()
			
			--uniqueSort()
		"""
        communicator = MPI.world.duplicate()
        node_rank = communicator.rank
        intermediateFile = "%s.unsorted" % self.outputfile  # intermediateFile to store concatenated results
        if communicator.rank == (communicator.size - 1):
            edge2encodedOccurrence = {}
            no_of_datasets = self.fill_edge2encodedOccurrence(
                self.hostname, self.dbname, self.schema, edge2encodedOccurrence, self.min_sup, self.max_sup
            )

        mpi_synchronize(communicator)

        if node_rank == 0:
            self.input_node(
                communicator, self.inputfile, self.min_size, self.cluster_block_size, self.cluster_block_edges
            )
        elif node_rank <= communicator.size - 2:  # exclude the last node
            self.computing_node(communicator, self.cluster_block_size, self.min_size, self.min_con)
        elif node_rank == communicator.size - 1:
            codense2db_instance = codense2db()
            free_computing_nodes = range(1, communicator.size - 1)
            writer = csv.writer(open(intermediateFile, "w"), delimiter="\t")
            parameter_list = [writer, codense2db_instance, edge2encodedOccurrence, no_of_datasets]
            output_node(
                communicator,
                free_computing_nodes,
                parameter_list,
                self.output_cluster,
                report=self.report,
                type=Numeric.Int,
            )
            del writer
            # 10-21-05self.output_node(communicator, intermediateFile, codense2db_instance, edge2encodedOccurrence, no_of_datasets)

        mpi_synchronize(communicator)
        # collecting
        if node_rank == 0:
            MpiFromDatasetSignatureToPattern_instance = MpiFromDatasetSignatureToPattern()
            MpiFromDatasetSignatureToPattern_instance.uniqueSort(intermediateFile, self.outputfile)