Esempio n. 1
0
    def run(self):
        """
		09-05-05
			Watch: when sending via MPI, tag 0 means from node 0,  tag 1 means goes to the last node.
		10-21-05
			replace output_node() with the one from codense.common for better scheduling
			
			--fill_edge2encodedOccurrence()
			
			--input_node()
				--get_cluster_block()
			--computing_node()
				--node_fire()
			--output_node()
				--output_cluster()
			
			--uniqueSort()
		"""
        communicator = MPI.world.duplicate()
        node_rank = communicator.rank
        intermediateFile = "%s.unsorted" % self.outputfile  # intermediateFile to store concatenated results
        if communicator.rank == (communicator.size - 1):
            edge2encodedOccurrence = {}
            no_of_datasets = self.fill_edge2encodedOccurrence(
                self.hostname, self.dbname, self.schema, edge2encodedOccurrence, self.min_sup, self.max_sup
            )

        mpi_synchronize(communicator)

        if node_rank == 0:
            self.input_node(
                communicator, self.inputfile, self.min_size, self.cluster_block_size, self.cluster_block_edges
            )
        elif node_rank <= communicator.size - 2:  # exclude the last node
            self.computing_node(communicator, self.cluster_block_size, self.min_size, self.min_con)
        elif node_rank == communicator.size - 1:
            codense2db_instance = codense2db()
            free_computing_nodes = range(1, communicator.size - 1)
            writer = csv.writer(open(intermediateFile, "w"), delimiter="\t")
            parameter_list = [writer, codense2db_instance, edge2encodedOccurrence, no_of_datasets]
            output_node(
                communicator,
                free_computing_nodes,
                parameter_list,
                self.output_cluster,
                report=self.report,
                type=Numeric.Int,
            )
            del writer
            # 10-21-05self.output_node(communicator, intermediateFile, codense2db_instance, edge2encodedOccurrence, no_of_datasets)

        mpi_synchronize(communicator)
        # collecting
        if node_rank == 0:
            MpiFromDatasetSignatureToPattern_instance = MpiFromDatasetSignatureToPattern()
            MpiFromDatasetSignatureToPattern_instance.uniqueSort(intermediateFile, self.outputfile)
Esempio n. 2
0
	def run(self):
		"""
		10-07-05
		10-09-05 input_node() add mcl_table
		10-24-05 create new views for splat_table and mcl_table
		10-28-05 no views, no new pattern_table, read from inputfile, write to outputfile
		01-24-06 copy a whole block from MpiFromDatasetSignatureToPattern.py to read in edge sig matrix
			
			(rank==0)
				--get_no_of_datasets()
				--sendEdgeSigMatrix()
			elif free_computing_nodes:
				--PostFim()
				--receiveEdgeSigMatrix()
			
			mpi_synchronize()
			
			--input_node()
				--input_handler()
			--computing_node()
				--node_fire()
				--cleanup_handler()
			--output_node()
				--output_handler()
		"""
		communicator = MPI.world.duplicate()
		node_rank = communicator.rank		
		free_computing_nodes = range(1,communicator.size-1)	#exclude the last node
		
		#01-24-06 following block is directly copied from MpiFromDatasetSignatureToPattern.py
		block_size = 10000
		MpiFromDatasetSignatureToPattern_instance = MpiFromDatasetSignatureToPattern()
		if communicator.rank == 0:
			no_of_datasets = MpiFromDatasetSignatureToPattern_instance.get_no_of_datasets(self.sig_vector_fname)
				#no_of_datasets is used in fillEdgeSigMatrix() and patternFormation()
			for node in free_computing_nodes:
				communicator.send(str(no_of_datasets), node, 0)
			MpiFromDatasetSignatureToPattern_instance.sendEdgeSigMatrix(communicator, free_computing_nodes, self.sig_vector_fname, \
				no_of_datasets, self.min_sup, self.max_sup, block_size)
		elif communicator.rank in free_computing_nodes:
			data, source, tag = communicator.receiveString(0, 0)
			no_of_datasets = int(data)	#take the data
			j_instance = johnson_sp(no_of_datasets)
			MpiFromDatasetSignatureToPattern_instance.receiveEdgeSigMatrix(communicator, j_instance, no_of_datasets, block_size)
		
		mpi_synchronize(communicator)
		
		if node_rank == 0:
			inf = csv.reader(open(self.inputfile,'r'), delimiter='\t')
			parameter_list = [inf]
			input_node(communicator, parameter_list, free_computing_nodes, self.size, self.report, input_handler=self.input_handler)
			del inf
		elif node_rank in free_computing_nodes:	#exclude the last node
			parameter_list = [j_instance, self.parser_type]
			computing_node(communicator, parameter_list, self.node_fire, self.cleanup_handler, self.report)
		elif node_rank==communicator.size-1:
			writer = csv.writer(open(self.outputfile, 'w'), delimiter='\t')
			parameter_list = [writer]
			output_node(communicator, free_computing_nodes, parameter_list, self.output_handler, self.report)
			del writer