Esempi in Python per ParamSweeper.stats

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: execo_engine

Classe/tipologia: ParamSweeper

Metodo/funzione: stats

Esempi su hotexamples.com: 2

ParamSweeper.stats in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per execo_engine.ParamSweeper.stats, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

ParamSweeper(17)

done(12)

get_remaining(11)

get_next(9)

cancel(6)

skip(5)

get_sweeps(2)

get_skipped(1)

stats(1)

Esempio n. 1

Mostra file

File: active_data_tps.py Progetto: PierreVeyre/execo-g5k-tools

	def run(self):
		# Defining experiment parameters
		self.parameters = {
			'n_clients': [400, 450, 500, 550, 600],
			'n_transitions': [10000]
		}
		cluster = 'griffon'
		sweeps = sweep(self.parameters)
		sweeper = ParamSweeper(os.path.join(self.result_dir, "sweeps"), sweeps)
		server_out_path = os.path.join(self.result_dir, "server.out")
		
		self._updateStat(sweeper.stats())
		
		# Loop on the number of nodes
		while True:
			# Taking the next parameter combinations
			comb = sweeper.get_next()
			if not comb: break

			# Performing the submission on G5K
			site = get_cluster_site(cluster)
			self._log("Output will go to " + self.result_dir)
			
			n_nodes = int(math.ceil(float(comb['n_clients']) / EX5.get_host_attributes(cluster + '-1')['architecture']['smt_size'])) + 1
			self._log("Reserving {0} nodes on {1}".format(n_nodes, site))
			
			resources = "{cluster=\\'" + cluster + "\\'}/nodes=" + str(n_nodes)
			submission = EX5.OarSubmission(resources = resources, job_type = 'allow_classic_ssh', walltime ='00:10:00')
			
			job = EX5.oarsub([(submission, site)])
			self.__class__._job = job
			
			# Sometimes oarsub fails silently
			if job[0][0] is None:
				print("\nError: no job was created")
				sys.exit(1)
				
			# Wait for the job to start
			self._log("Waiting for job {0} to start...\n".format(BOLD_MAGENTA + str(job[0][0]) + NORMAL))
			EX5.wait_oar_job_start(job[0][0], job[0][1], prediction_callback = prediction)
			nodes = EX5.get_oar_job_nodes(job[0][0], job[0][1])
			
			# Deploying nodes
			#deployment = EX5.Deployment(hosts = nodes, env_file='path_to_env_file')
			#run_deploy = EX5.deploy(deployment)
			#nodes_deployed = run_deploy.hosts[0]
			
			# Copying active_data program on all deployed hosts
			EX.Put([nodes[0]], '../dist/active-data-lib-0.1.2.jar', connexion_params = {'user': '******'}).run()
			EX.Put([nodes[0]], '../server.policy', connexion_params = {'user': '******'}).run()
			
			# Loop on the number of requests per client process
			while True:
				# Split the nodes
				clients = nodes[1:]
				server = nodes[0] 
				
				self._log("Running experiment with {0} nodes and {1} transitions per client".format(len(clients), comb['n_transitions']))
				
				# Launching Server on one node
				out_handler = FileOutputHandler(server_out_path)
				launch_server = EX.Remote('java -jar active-data-lib-0.1.2.jar', [server], stdout_handler = out_handler, stderr_handler = out_handler).start()
				self._log("Server started on " + server.address)
				time.sleep(2)
				
				# Launching clients
				rank=0
				n_cores = EX5.get_host_attributes(clients[0])['architecture']['smt_size'];
				cores = nodes * n_cores
				cores = cores[0:comb['n_clients']] # Cut out the additional cores
				
				client_connection_params = {
						'taktuk_gateway': 'lyon.grid5000.fr',
						'host_rewrite_func': None
				}
				
				self._log("Launching {0} clients...".format(len(cores)))
				
				client_cmd = "/usr/bin/env java -cp /home/ansimonet/active-data-lib-0.1.2.jar org.inria.activedata.examples.perf.TransitionsPerSecond " + \
								"{0} {1} {2} {3} {4}".format(server.address, 1200, "{{range(len(cores))}}", len(cores), comb['n_transitions'])
				client_out_handler = FileOutputHandler(os.path.join(self.result_dir, "clients.out"))
				client_request = EX.TaktukRemote(client_cmd, cores, connexion_params = client_connection_params, \
									stdout_handler = client_out_handler, stderr_handler = client_out_handler)
				
				client_request.run()
				
				if not client_request.ok():
					# Some client failed, please panic
					self._log("One or more client process failed. Enjoy reading their outputs.")
					self._log("OUTPUT STARTS -------------------------------------------------\n")
					for process in client_request.processes():
						print("----- {0} returned {1}".format(process.host().address, process.exit_code()))
						if not process.stdout() == "": print(GREEN + process.stdout() + NORMAL)
						if not process.stderr() == "": print(RED + process.stderr() + NORMAL)
						print("")
					self._log("OUTPUT ENDS ---------------------------------------------------\n")
					sweeper.skip(comb)
					launch_server.kill()
					launch_server.wait()
				else:
					# Waiting for server to end
					launch_server.wait()
				
					# Getting log files
					distant_path = OUT_FILE_FORMAT.format(len(cores), comb['n_transitions'])
					local_path = distant_path
					
					EX.Get([server], distant_path).run()
					
					EX.Local('mv ' + distant_path + ' ' + os.path.join(self.result_dir, local_path)).run()
					
					EX.Get([server], 'client_*.out', local_location = self.result_dir)
					EX.Remote('rm -f client_*.out', [server])
					
					self._log("Finishing experiment with {0} clients and {1} transitions per client".format(comb['n_clients'], comb['n_transitions']))
					
					sweeper.done(comb)
					
				sub_comb = sweeper.get_next (filtr = lambda r: filter(lambda s: s["n_clients"] == comb['n_clients'], r))
				self._updateStat(sweeper.stats())
				
				if not sub_comb: 
					# Killing job
					EX5.oar.oardel(job)
					self.__class__._job = None
					break
				else: 
					comb = sub_comb
		
		print ""

Esempio n. 2

Mostra file

    def run(self):
        # Defining experiment parameters
        self.parameters = {
            'n_clients': [400, 450, 500, 550, 600],
            'n_transitions': [10000]
        }
        cluster = 'griffon'
        sweeps = sweep(self.parameters)
        sweeper = ParamSweeper(os.path.join(self.result_dir, "sweeps"), sweeps)
        server_out_path = os.path.join(self.result_dir, "server.out")

        self._updateStat(sweeper.stats())

        # Loop on the number of nodes
        while True:
            # Taking the next parameter combinations
            comb = sweeper.get_next()
            if not comb: break

            # Performing the submission on G5K
            site = get_cluster_site(cluster)
            self._log("Output will go to " + self.result_dir)

            n_nodes = int(
                math.ceil(
                    float(comb['n_clients']) / EX5.get_host_attributes(
                        cluster + '-1')['architecture']['smt_size'])) + 1
            self._log("Reserving {0} nodes on {1}".format(n_nodes, site))

            resources = "{cluster=\\'" + cluster + "\\'}/nodes=" + str(n_nodes)
            submission = EX5.OarSubmission(resources=resources,
                                           job_type='allow_classic_ssh',
                                           walltime='00:10:00')

            job = EX5.oarsub([(submission, site)])
            self.__class__._job = job

            # Sometimes oarsub fails silently
            if job[0][0] is None:
                print("\nError: no job was created")
                sys.exit(1)

            # Wait for the job to start
            self._log(
                "Waiting for job {0} to start...\n".format(BOLD_MAGENTA +
                                                           str(job[0][0]) +
                                                           NORMAL))
            EX5.wait_oar_job_start(job[0][0],
                                   job[0][1],
                                   prediction_callback=prediction)
            nodes = EX5.get_oar_job_nodes(job[0][0], job[0][1])

            # Deploying nodes
            #deployment = EX5.Deployment(hosts = nodes, env_file='path_to_env_file')
            #run_deploy = EX5.deploy(deployment)
            #nodes_deployed = run_deploy.hosts[0]

            # Copying active_data program on all deployed hosts
            EX.Put([nodes[0]],
                   '../dist/active-data-lib-0.1.2.jar',
                   connexion_params={
                       'user': '******'
                   }).run()
            EX.Put([nodes[0]],
                   '../server.policy',
                   connexion_params={
                       'user': '******'
                   }).run()

            # Loop on the number of requests per client process
            while True:
                # Split the nodes
                clients = nodes[1:]
                server = nodes[0]

                self._log(
                    "Running experiment with {0} nodes and {1} transitions per client"
                    .format(len(clients), comb['n_transitions']))

                # Launching Server on one node
                out_handler = FileOutputHandler(server_out_path)
                launch_server = EX.Remote(
                    'java -jar active-data-lib-0.1.2.jar', [server],
                    stdout_handler=out_handler,
                    stderr_handler=out_handler).start()
                self._log("Server started on " + server.address)
                time.sleep(2)

                # Launching clients
                rank = 0
                n_cores = EX5.get_host_attributes(
                    clients[0])['architecture']['smt_size']
                cores = nodes * n_cores
                cores = cores[
                    0:comb['n_clients']]  # Cut out the additional cores

                client_connection_params = {
                    'taktuk_gateway': 'lyon.grid5000.fr',
                    'host_rewrite_func': None
                }

                self._log("Launching {0} clients...".format(len(cores)))

                client_cmd = "/usr/bin/env java -cp /home/ansimonet/active-data-lib-0.1.2.jar org.inria.activedata.examples.perf.TransitionsPerSecond " + \
                    "{0} {1} {2} {3} {4}".format(server.address, 1200, "{{range(len(cores))}}", len(cores), comb['n_transitions'])
                client_out_handler = FileOutputHandler(
                    os.path.join(self.result_dir, "clients.out"))
                client_request = EX.TaktukRemote(client_cmd, cores, connexion_params = client_connection_params, \
                     stdout_handler = client_out_handler, stderr_handler = client_out_handler)

                client_request.run()

                if not client_request.ok():
                    # Some client failed, please panic
                    self._log(
                        "One or more client process failed. Enjoy reading their outputs."
                    )
                    self._log(
                        "OUTPUT STARTS -------------------------------------------------\n"
                    )
                    for process in client_request.processes():
                        print("----- {0} returned {1}".format(
                            process.host().address, process.exit_code()))
                        if not process.stdout() == "":
                            print(GREEN + process.stdout() + NORMAL)
                        if not process.stderr() == "":
                            print(RED + process.stderr() + NORMAL)
                        print("")
                    self._log(
                        "OUTPUT ENDS ---------------------------------------------------\n"
                    )
                    sweeper.skip(comb)
                    launch_server.kill()
                    launch_server.wait()
                else:
                    # Waiting for server to end
                    launch_server.wait()

                    # Getting log files
                    distant_path = OUT_FILE_FORMAT.format(
                        len(cores), comb['n_transitions'])
                    local_path = distant_path

                    EX.Get([server], distant_path).run()

                    EX.Local('mv ' + distant_path + ' ' +
                             os.path.join(self.result_dir, local_path)).run()

                    EX.Get([server],
                           'client_*.out',
                           local_location=self.result_dir)
                    EX.Remote('rm -f client_*.out', [server])

                    self._log(
                        "Finishing experiment with {0} clients and {1} transitions per client"
                        .format(comb['n_clients'], comb['n_transitions']))

                    sweeper.done(comb)

                sub_comb = sweeper.get_next(filtr=lambda r: filter(
                    lambda s: s["n_clients"] == comb['n_clients'], r))
                self._updateStat(sweeper.stats())

                if not sub_comb:
                    # Killing job
                    EX5.oar.oardel(job)
                    self.__class__._job = None
                    break
                else:
                    comb = sub_comb

        print ""