def deploy_node(job_id, site, submission): node = get_oar_job_nodes(job_id,site)[0] deployment = Deployment(hosts = [node], env_file = "unikernels/hermit/debian10-x64-nfs-hermit.env", user = "******", other_options = "-k") node.user = "******" deploy(deployment) return node
def _launch_kadeploy(self, max_tries=10, check_deploy=True): """Create an execo_g5k.Deployment object, launch the deployment and return a tuple (deployed_hosts, undeployed_hosts) """ # if the provisioner has oar_job_ids and no config_file_path # then the configs variable is not created if not hasattr(self, 'configs'): logger.info('The list of %s hosts: \n%s', len(self.hosts), hosts_list(self.hosts, separator='\n')) return logger.info('Deploying %s hosts \n%s', len(self.hosts), hosts_list(self.hosts, separator='\n')) try: deployment = Deployment( hosts=[Host(canonical_host_name(host)) for host in self.hosts], env_file=self.configs['custom_image'], env_name=self.configs['cloud_provider_image']) except ValueError: logger.error( "Please put in the config file either custom_image or cloud_provider_image." ) exit() # user=self.env_user, # vlan=self.kavlan) # Activate kadeploy output log if log level is debug if logger.getEffectiveLevel() <= 10: stdout = [sys.stdout] stderr = [sys.stderr] else: stdout = None stderr = None # deploy() function will iterate through each frontend to run kadeploy # so that the deployment hosts will be performed sequentially site after site deployed_hosts, undeployed_hosts = deploy( deployment, stdout_handlers=stdout, stderr_handlers=stderr, num_tries=max_tries, check_deployed_command=check_deploy) deployed_hosts = list(deployed_hosts) undeployed_hosts = list(undeployed_hosts) # # Renaming hosts if a kavlan is used # if self.kavlan: # for i, host in enumerate(deployed_hosts): # deployed_hosts[i] = get_kavlan_host_name(host, self.kavlan) # for i, host in enumerate(undeployed_hosts): # undeployed_hosts[i] = get_kavlan_host_name(host, self.kavlan) logger.info('Deployed %s hosts successfully', len(deployed_hosts)) cr = '\n' if len(undeployed_hosts) > 0 else '' logger.info('Failed %s hosts %s%s', len(undeployed_hosts), cr, hosts_list(undeployed_hosts)) return deployed_hosts, undeployed_hosts
def _launch_kadeploy(self, max_tries=1, check_deploy=True): """Create a execo_g5k.Deployment object, launch the deployment and return a tuple (deployed_hosts, undeployed_hosts) """ logger.info('Deploying %s hosts \n%s', len(self.hosts), hosts_list(self.hosts)) deployment = Deployment( hosts=[Host(canonical_host_name(host)) for host in self.hosts], env_file=self.env_file, env_name=self.env_name, user=self.env_user, vlan=self.kavlan) # Activate kadeploy output log if log level is debug if logger.getEffectiveLevel() <= 10: stdout = [sys.stdout] stderr = [sys.stderr] else: stdout = None stderr = None deployed_hosts, undeployed_hosts = deploy( deployment, stdout_handlers=stdout, stderr_handlers=stderr, num_tries=max_tries, check_deployed_command=check_deploy) deployed_hosts = list(deployed_hosts) undeployed_hosts = list(undeployed_hosts) # Renaming hosts if a kavlan is used if self.kavlan: for i, host in enumerate(deployed_hosts): deployed_hosts[i] = get_kavlan_host_name(host, self.kavlan) for i, host in enumerate(undeployed_hosts): undeployed_hosts[i] = get_kavlan_host_name(host, self.kavlan) logger.info('Deployed %s hosts \n%s', len(deployed_hosts), hosts_list(deployed_hosts)) cr = '\n' if len(undeployed_hosts) > 0 else '' logger.info('Failed %s hosts %s%s', len(undeployed_hosts), cr, hosts_list(undeployed_hosts)) self._update_hosts_state(deployed_hosts, undeployed_hosts) return deployed_hosts, undeployed_hosts
def run(self): sweeper = self.create_paramsweeper() while True: comb = sweeper.get_next() if not comb: break comb_dir = self.result_dir + '/' + slugify(comb) if not os.path.isdir(comb_dir): os.mkdir(comb_dir) comb_file = comb_dir + '/trace' g5k_configuration['kadeploy3'] = comb['version'] logger.info('Treating combination %s', pformat(comb)) get_version = SshProcess( comb['version'] + ' -v', comb['site'], connection_params=default_frontend_connection_params).run() logger.info(get_version.stdout) resources = "" if comb['kavlan']: resources += "{type='kavlan'}/vlan=1+" resources += "nodes=" + str(comb['n_nodes']) sub = OarSubmission(resources=resources, job_type='deploy', walltime="0:30:00", name='Kadeploy_Tests') logger.info('Performing submission of %s on site %s', resources, comb['site']) jobs = oarsub([(sub, comb['site'])]) if jobs[0][0]: try: logger.info('Waiting for job to start') wait_oar_job_start(jobs[0][0], jobs[0][1]) hosts = get_oar_job_nodes(jobs[0][0], jobs[0][1]) logger.info('Deployment of %s', ' '.join([host.address for host in hosts])) kavlan = get_oar_job_kavlan(jobs[0][0], jobs[0][1]) if kavlan: logger.info('In kavlan %s', kavlan) deployment = Deployment(hosts, env_name=comb['env'], vlan=kavlan) deployed, undeployed = deploy(deployment, stdout_handlers=[comb_file], stderr_handlers=[comb_file]) finally: logger.info('Destroying job %s on %s', str(jobs[0][0]), jobs[0][1]) oardel([(jobs[0][0], jobs[0][1])]) else: deployed = [] if len(undeployed) == 0: logger.info('%s is OK', slugify(comb)) elif len(deployed) == 0: logger.error('%s is KO', slugify(comb)) else: logger.warning('%s encountered problems with some hosts', slugify(comb)) sweeper.done(comb)
def run(self): """Inherited method, put here the code for running the engine""" self.define_parameters() self.cluster = self.args[0] self.site = get_cluster_site(self.cluster) if self.options.oar_job_id: self.oar_job_id = self.options.oar_job_id else: self.oar_job_id = None try: # Creation of the main iterator which is used for the first control loop. # You need have a method called define_parameters, that returns a list of parameter dicts self.define_parameters() job_is_dead = False # While they are combinations to treat while len(self.sweeper.get_remaining()) > 0: # If no job, we make a reservation and prepare the hosts for the experiments if job_is_dead or self.oar_job_id is None: self.make_reservation() # Retrieving the hosts and subnets parameters self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend) # Hosts deployment deployed, undeployed = deploy( Deployment(self.hosts, env_file="/home/mliroz/deploys/hadoop6.env")) logger.info("%i deployed, %i undeployed" % (len(deployed), len(undeployed))) if len(deployed) == 0: break # Configuration du systeme => look at the execo_g5k.topology module attr = get_host_attributes(self.cluster + '-1') ## SETUP FINISHED # Getting the next combination comb = self.sweeper.get_next() self.prepare_dataset(comb) self.xp(comb) # subloop over the combinations that have the same sizes while True: newcomb = self.sweeper.get_next(lambda r: filter( lambda subcomb: subcomb['sizes'] == comb['sizes'], r)) if newcomb: try: self.xp(newcomb) except: break else: break if get_oar_job_info(self.oar_job_id, self.frontend)['state'] == 'Error': job_is_dead = True finally: if self.oar_job_id is not None: if not self.options.keep_alive: logger.info('Deleting job') oardel([(self.oar_job_id, self.frontend)]) else: logger.info('Keeping job alive for debugging')
def run(self): """ """ if self.options.oargrid_job_id: self.oargrid_job_id = self.options.oargrid_job_id else: self.oargrid_job_id = None try: # Creation of the main iterator which is used for the first control loop. self.define_parameters() job_is_dead = False # While there are combinations to treat while len(self.sweeper.get_remaining()) > 0: # If no job, we make a reservation and prepare the hosts for the experiments if self.oargrid_job_id is None: self.make_reservation() # Wait that the job starts logger.info('Waiting that the job start') wait_oargrid_job_start(self.oargrid_job_id) # Retrieving the hosts and subnets parameters self.hosts = get_oargrid_job_nodes(self.oargrid_job_id) # Hosts deployment and configuration default_connection_params['user'] = '******' logger.info("Start hosts configuration") ex_log.setLevel('INFO') deployment = Deployment( hosts=self.hosts, env_file='/home/sirimie/env/mywheezy-x64-base.env') self.hosts, _ = deploy(deployment) Remote("rm -f /home/Work/sgcbntier/paasage_demo/csv/REQTASK_*", self.hosts).run() Remote( "rm -f /home/Work/sgcbntier/paasage_demo/platform_aws.xml", self.hosts).run() Remote("rm -f /home/Work/sgcbntier/paasage_demo/cloud_ec2.xml", self.hosts).run() Put(self.hosts, [ "run_all_execo.py", "xml_gen_execo.py", "conf.xml", "platform_aws.xml", "cloud_ec2.xml" ], remote_location="/home/Work/sgcbntier/paasage_demo/").run( ) logger.info("Done") if len(self.hosts) == 0: break # Initializing the resources and threads available_hosts = [ host for host in self.hosts for i in range( get_host_attributes(host)['architecture']['smt_size']) ] threads = {} # Creating the unique folder for storing the results comb_dir = self.result_dir + '/csv_results' if not os.path.exists(comb_dir): os.mkdir(comb_dir) # Checking that the job is running and not in Error while self.is_job_alive() or len(threads.keys()) > 0: job_is_dead = False while self.options.n_nodes > len(available_hosts): tmp_threads = dict(threads) for t in tmp_threads: if not t.is_alive(): available_hosts.append(tmp_threads[t]['host']) del threads[t] sleep(5) if not self.is_job_alive(): job_is_dead = True break if job_is_dead: break # Getting the next combination comb = self.sweeper.get_next() if not comb: while len(threads.keys()) > 0: tmp_threads = dict(threads) for t in tmp_threads: if not t.is_alive(): del threads[t] logger.info('Waiting for threads to complete') sleep(20) break host = available_hosts[0] available_hosts = available_hosts[1:] t = Thread(target=self.workflow, args=(comb, host, comb_dir)) threads[t] = {'host': host} t.daemon = True t.start() if not self.is_job_alive(): job_is_dead = True if job_is_dead: self.oargrid_job_id = None finally: if self.oargrid_job_id is not None: if not self.options.keep_alive: logger.info('Deleting job') oargriddel([self.oargrid_job_id]) else: logger.info('Keeping job alive for debugging')