def deploy_node(job_id, site, submission): node = get_oar_job_nodes(job_id,site)[0] deployment = Deployment(hosts = [node], env_file = "unikernels/hermit/debian10-x64-nfs-hermit.env", user = "******", other_options = "-k") node.user = "******" deploy(deployment) return node
def _deploy(self, conf, nodes, vlans, force_deploy=False): provider_conf = conf['provider'] # we put the nodes in the first vlan we have vlan = self._get_primary_vlan(vlans) # Deploy all the nodes logging.info("Deploying %s on %d nodes %s" % (provider_conf['env_name'], len(nodes), '(forced)' if force_deploy else '')) deployed, undeployed = EX5.deploy( EX5.Deployment(nodes, env_name=provider_conf['env_name'], vlan=vlan[1]), check_deployed_command=not force_deploy) # Check the deployment if len(undeployed) > 0: logging.error("%d nodes where not deployed correctly:" % len(undeployed)) for n in undeployed: logging.error(n) deployed_nodes_vlan = sorted(self._translate_to_vlan( map(lambda n: EX.Host(n), deployed), vlan[1]), key=lambda n: n.address) logging.info(deployed_nodes_vlan) # Checking the deployed nodes according to the # resource distribution policy self._check_nodes(nodes=deployed_nodes_vlan, resources=conf['resources'], mode=conf['provider']['role_distribution']) return deployed, deployed_nodes_vlan
def deploy(self): # we put the nodes in the first vlan we have vlan = self._get_primary_vlan() # Deploy all the nodes logger.info("Deploying %s on %d nodes %s" % (self.config['env_name'], len(self.nodes), '(forced)' if self.force_deploy else '')) deployed, undeployed = EX5.deploy( EX5.Deployment( self.nodes, env_name=self.config['env_name'], vlan = vlan[1] ), check_deployed_command=not self.force_deploy) # Check the deployment if len(undeployed) > 0: logger.error("%d nodes where not deployed correctly:" % len(undeployed)) for n in undeployed: logger.error(style.emph(n)) # Updating nodes names with vlans self.nodes = sorted(translate_to_vlan(self.nodes, vlan[1]), key = lambda n: n.address) logger.info(self.nodes) self.deployed_nodes = sorted(translate_to_vlan( map(lambda n: EX.Host(n), deployed), vlan[1]), key = lambda n: n.address) logger.info(self.deployed_nodes) check_nodes( nodes = self.deployed_nodes, resources = self.config['resources'], mode = self.config['role_distribution']) return deployed, undeployed
def _deploy(self): # we put the nodes in the first vlan we have vlan = self._get_primary_vlan() # Deploy all the nodes logging.info("Deploying %s on %d nodes %s" % (self.config['env_name'], len( self.nodes), '(forced)' if self.force_deploy else '')) deployed, undeployed = EX5.deploy( EX5.Deployment(self.nodes, env_name=self.config['env_name'], vlan=vlan[1]), check_deployed_command=not self.force_deploy) # Check the deployment if len(undeployed) > 0: logging.error("%d nodes where not deployed correctly:" % len(undeployed)) for n in undeployed: logging.error(n) # Updating nodes names with vlans self.nodes = sorted(self._translate_to_vlan(self.nodes, vlan[1]), key=lambda n: n.address) logging.info(self.nodes) self.deployed_nodes = sorted(self._translate_to_vlan( map(lambda n: EX.Host(n), deployed), vlan[1]), key=lambda n: n.address) logging.info(self.deployed_nodes) self._check_nodes(nodes=self.deployed_nodes, resources=self.config['resources'], mode=self.config['role_distribution']) return deployed, undeployed
def _launch_kadeploy(self, max_tries=10, check_deploy=True): """Create an execo_g5k.Deployment object, launch the deployment and return a tuple (deployed_hosts, undeployed_hosts) """ # if the provisioner has oar_job_ids and no config_file_path # then the configs variable is not created if not hasattr(self, 'configs'): logger.info('The list of %s hosts: \n%s', len(self.hosts), hosts_list(self.hosts, separator='\n')) return logger.info('Deploying %s hosts \n%s', len(self.hosts), hosts_list(self.hosts, separator='\n')) try: deployment = Deployment( hosts=[Host(canonical_host_name(host)) for host in self.hosts], env_file=self.configs['custom_image'], env_name=self.configs['cloud_provider_image']) except ValueError: logger.error( "Please put in the config file either custom_image or cloud_provider_image." ) exit() # user=self.env_user, # vlan=self.kavlan) # Activate kadeploy output log if log level is debug if logger.getEffectiveLevel() <= 10: stdout = [sys.stdout] stderr = [sys.stderr] else: stdout = None stderr = None # deploy() function will iterate through each frontend to run kadeploy # so that the deployment hosts will be performed sequentially site after site deployed_hosts, undeployed_hosts = deploy( deployment, stdout_handlers=stdout, stderr_handlers=stderr, num_tries=max_tries, check_deployed_command=check_deploy) deployed_hosts = list(deployed_hosts) undeployed_hosts = list(undeployed_hosts) # # Renaming hosts if a kavlan is used # if self.kavlan: # for i, host in enumerate(deployed_hosts): # deployed_hosts[i] = get_kavlan_host_name(host, self.kavlan) # for i, host in enumerate(undeployed_hosts): # undeployed_hosts[i] = get_kavlan_host_name(host, self.kavlan) logger.info('Deployed %s hosts successfully', len(deployed_hosts)) cr = '\n' if len(undeployed_hosts) > 0 else '' logger.info('Failed %s hosts %s%s', len(undeployed_hosts), cr, hosts_list(undeployed_hosts)) return deployed_hosts, undeployed_hosts
def _launch_kadeploy(self, max_tries=1, check_deploy=True): """Create a execo_g5k.Deployment object, launch the deployment and return a tuple (deployed_hosts, undeployed_hosts) """ logger.info('Deploying %s hosts \n%s', len(self.hosts), hosts_list(self.hosts)) deployment = Deployment( hosts=[Host(canonical_host_name(host)) for host in self.hosts], env_file=self.env_file, env_name=self.env_name, user=self.env_user, vlan=self.kavlan) # Activate kadeploy output log if log level is debug if logger.getEffectiveLevel() <= 10: stdout = [sys.stdout] stderr = [sys.stderr] else: stdout = None stderr = None deployed_hosts, undeployed_hosts = deploy( deployment, stdout_handlers=stdout, stderr_handlers=stderr, num_tries=max_tries, check_deployed_command=check_deploy) deployed_hosts = list(deployed_hosts) undeployed_hosts = list(undeployed_hosts) # Renaming hosts if a kavlan is used if self.kavlan: for i, host in enumerate(deployed_hosts): deployed_hosts[i] = get_kavlan_host_name(host, self.kavlan) for i, host in enumerate(undeployed_hosts): undeployed_hosts[i] = get_kavlan_host_name(host, self.kavlan) logger.info('Deployed %s hosts \n%s', len(deployed_hosts), hosts_list(deployed_hosts)) cr = '\n' if len(undeployed_hosts) > 0 else '' logger.info('Failed %s hosts %s%s', len(undeployed_hosts), cr, hosts_list(undeployed_hosts)) self._update_hosts_state(deployed_hosts, undeployed_hosts) return deployed_hosts, undeployed_hosts
def _deploy(self, conf, nodes, vlans, force_deploy=False): provider_conf = conf['provider'] # we put the nodes in the first vlan we have vlan = self._get_primary_vlan(vlans) kw = { 'hosts': nodes, 'vlan': vlan[1], } if provider_conf.get('env_file'): kw.update({'env_file': provider_conf.get('env_file')}) provider_conf.pop('env_name') if provider_conf.get('env_name'): kw.update({'env_name': provider_conf.get('env_name')}) logging.info("%s deploying %s nodes with %s" % ('(forced)' if force_deploy else '', len(nodes), kw)) deployed, undeployed = EX5.deploy( EX5.Deployment(**kw), check_deployed_command=not force_deploy) # Check the deployment if len(undeployed) > 0: logging.error("%d nodes where not deployed correctly:" % len(undeployed)) for n in undeployed: logging.error(n) deployed_nodes_vlan = sorted(self._translate_to_vlan( map(lambda n: EX.Host(n), deployed), vlan[1]), key=lambda n: n.address) logging.info(deployed_nodes_vlan) # Checking the deployed nodes according to the # resource distribution policy self._check_nodes(nodes=deployed_nodes_vlan, resources=conf['resources'], mode=conf['provider']['role_distribution']) return deployed, deployed_nodes_vlan
def _launch_kadeploy(self, max_tries=1, check_deploy=True): """Create a execo_g5k.Deployment object, launch the deployment and return a tuple (deployed_hosts, undeployed_hosts) """ logger.info('Deploying %s hosts \n%s', len(self.hosts), hosts_list(self.hosts)) deployment = Deployment(hosts=[Host(canonical_host_name(host)) for host in self.hosts], env_file=self.env_file, env_name=self.env_name, user=self.env_user, vlan=self.kavlan) # Activate kadeploy output log if log level is debug if logger.getEffectiveLevel() <= 10: stdout = [sys.stdout] stderr = [sys.stderr] else: stdout = None stderr = None deployed_hosts, undeployed_hosts = deploy(deployment, stdout_handlers=stdout, stderr_handlers=stderr, num_tries=max_tries, check_deployed_command=check_deploy) deployed_hosts = list(deployed_hosts) undeployed_hosts = list(undeployed_hosts) # Renaming hosts if a kavlan is used if self.kavlan: for i, host in enumerate(deployed_hosts): deployed_hosts[i] = get_kavlan_host_name(host, self.kavlan) for i, host in enumerate(undeployed_hosts): undeployed_hosts[i] = get_kavlan_host_name(host, self.kavlan) logger.info('Deployed %s hosts \n%s', len(deployed_hosts), hosts_list(deployed_hosts)) cr = '\n' if len(undeployed_hosts) > 0 else '' logger.info('Failed %s hosts %s%s', len(undeployed_hosts), cr, hosts_list(undeployed_hosts)) self._update_hosts_state(deployed_hosts, undeployed_hosts) return deployed_hosts, undeployed_hosts
def run(self): """ """ if self.options.oargrid_job_id: self.oargrid_job_id = self.options.oargrid_job_id else: self.oargrid_job_id = None try: # Creation of the main iterator which is used for the first control loop. self.define_parameters() job_is_dead = False # While there are combinations to treat while len(self.sweeper.get_remaining()) > 0: # If no job, we make a reservation and prepare the hosts for the experiments if self.oargrid_job_id is None: self.make_reservation() # Wait that the job starts logger.info('Waiting that the job start') wait_oargrid_job_start(self.oargrid_job_id) # Retrieving the hosts and subnets parameters self.hosts = get_oargrid_job_nodes(self.oargrid_job_id) # Hosts deployment and configuration default_connection_params['user'] = '******' logger.info("Start hosts configuration") ex_log.setLevel('INFO') deployment = Deployment( hosts=self.hosts, env_file='/home/sirimie/env/mywheezy-x64-base.env') self.hosts, _ = deploy(deployment) Remote("rm -f /home/Work/sgcbntier/paasage_demo/csv/REQTASK_*", self.hosts).run() Remote( "rm -f /home/Work/sgcbntier/paasage_demo/platform_aws.xml", self.hosts).run() Remote("rm -f /home/Work/sgcbntier/paasage_demo/cloud_ec2.xml", self.hosts).run() Put(self.hosts, [ "run_all_execo.py", "xml_gen_execo.py", "conf.xml", "platform_aws.xml", "cloud_ec2.xml" ], remote_location="/home/Work/sgcbntier/paasage_demo/").run( ) logger.info("Done") if len(self.hosts) == 0: break # Initializing the resources and threads available_hosts = [ host for host in self.hosts for i in range( get_host_attributes(host)['architecture']['smt_size']) ] threads = {} # Creating the unique folder for storing the results comb_dir = self.result_dir + '/csv_results' if not os.path.exists(comb_dir): os.mkdir(comb_dir) # Checking that the job is running and not in Error while self.is_job_alive() or len(threads.keys()) > 0: job_is_dead = False while self.options.n_nodes > len(available_hosts): tmp_threads = dict(threads) for t in tmp_threads: if not t.is_alive(): available_hosts.append(tmp_threads[t]['host']) del threads[t] sleep(5) if not self.is_job_alive(): job_is_dead = True break if job_is_dead: break # Getting the next combination comb = self.sweeper.get_next() if not comb: while len(threads.keys()) > 0: tmp_threads = dict(threads) for t in tmp_threads: if not t.is_alive(): del threads[t] logger.info('Waiting for threads to complete') sleep(20) break host = available_hosts[0] available_hosts = available_hosts[1:] t = Thread(target=self.workflow, args=(comb, host, comb_dir)) threads[t] = {'host': host} t.daemon = True t.start() if not self.is_job_alive(): job_is_dead = True if job_is_dead: self.oargrid_job_id = None finally: if self.oargrid_job_id is not None: if not self.options.keep_alive: logger.info('Deleting job') oargriddel([self.oargrid_job_id]) else: logger.info('Keeping job alive for debugging')
def deploy(host): return ex5.deploy(ex5.Deployment([host], env_name=ENV_NAME))
def run(self): sweeper = self.create_paramsweeper() while True: comb = sweeper.get_next() if not comb: break comb_dir = self.result_dir + '/' + slugify(comb) if not os.path.isdir(comb_dir): os.mkdir(comb_dir) comb_file = comb_dir + '/trace' g5k_configuration['kadeploy3'] = comb['version'] logger.info('Treating combination %s', pformat(comb)) get_version = SshProcess( comb['version'] + ' -v', comb['site'], connection_params=default_frontend_connection_params).run() logger.info(get_version.stdout) resources = "" if comb['kavlan']: resources += "{type='kavlan'}/vlan=1+" resources += "nodes=" + str(comb['n_nodes']) sub = OarSubmission(resources=resources, job_type='deploy', walltime="0:30:00", name='Kadeploy_Tests') logger.info('Performing submission of %s on site %s', resources, comb['site']) jobs = oarsub([(sub, comb['site'])]) if jobs[0][0]: try: logger.info('Waiting for job to start') wait_oar_job_start(jobs[0][0], jobs[0][1]) hosts = get_oar_job_nodes(jobs[0][0], jobs[0][1]) logger.info('Deployment of %s', ' '.join([host.address for host in hosts])) kavlan = get_oar_job_kavlan(jobs[0][0], jobs[0][1]) if kavlan: logger.info('In kavlan %s', kavlan) deployment = Deployment(hosts, env_name=comb['env'], vlan=kavlan) deployed, undeployed = deploy(deployment, stdout_handlers=[comb_file], stderr_handlers=[comb_file]) finally: logger.info('Destroying job %s on %s', str(jobs[0][0]), jobs[0][1]) oardel([(jobs[0][0], jobs[0][1])]) else: deployed = [] if len(undeployed) == 0: logger.info('%s is OK', slugify(comb)) elif len(deployed) == 0: logger.error('%s is KO', slugify(comb)) else: logger.warning('%s encountered problems with some hosts', slugify(comb)) sweeper.done(comb)
def run(self): """Inherited method, put here the code for running the engine""" self.define_parameters() self.cluster = self.args[0] self.site = get_cluster_site(self.cluster) if self.options.oar_job_id: self.oar_job_id = self.options.oar_job_id else: self.oar_job_id = None try: # Creation of the main iterator which is used for the first control loop. # You need have a method called define_parameters, that returns a list of parameter dicts self.define_parameters() job_is_dead = False # While they are combinations to treat while len(self.sweeper.get_remaining()) > 0: # If no job, we make a reservation and prepare the hosts for the experiments if job_is_dead or self.oar_job_id is None: self.make_reservation() # Retrieving the hosts and subnets parameters self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend) # Hosts deployment deployed, undeployed = deploy( Deployment(self.hosts, env_file="/home/mliroz/deploys/hadoop6.env")) logger.info("%i deployed, %i undeployed" % (len(deployed), len(undeployed))) if len(deployed) == 0: break # Configuration du systeme => look at the execo_g5k.topology module attr = get_host_attributes(self.cluster + '-1') ## SETUP FINISHED # Getting the next combination comb = self.sweeper.get_next() self.prepare_dataset(comb) self.xp(comb) # subloop over the combinations that have the same sizes while True: newcomb = self.sweeper.get_next(lambda r: filter( lambda subcomb: subcomb['sizes'] == comb['sizes'], r)) if newcomb: try: self.xp(newcomb) except: break else: break if get_oar_job_info(self.oar_job_id, self.frontend)['state'] == 'Error': job_is_dead = True finally: if self.oar_job_id is not None: if not self.options.keep_alive: logger.info('Deleting job') oardel([(self.oar_job_id, self.frontend)]) else: logger.info('Keeping job alive for debugging')
sub.walltime = "03:00:00" job = ex5.oarsub([(sub, site)]) job_id = job[0][0] job_site = job[0][1] host = ex5.get_oar_job_nodes(job_id, job_site) except Exception as e: t, value, tb = sys.exc_info() print str(t) + " " + str(value) traceback.print_tb(tb) try: # deploys print("Deploying monubuntu at " + job_site) deployment = ex5.kadeploy.Deployment(hosts=host, env_file=envfile) deployed_hosts, _ = ex5.deploy(deployment) # print("Deployed on " + deployed_host[0]) if len(deployed_hosts) != 0: # commented because the slave agent is already downloaded # gets the agent started after downloading it # print("Downloading and launching the slave agent") # ex.action.Remote("wget https://ci.inria.fr/beyondtheclouds/jnlpJars/slave.jar", deployed_hosts, connection_params={'user':'******'}).run() # replaces the process with the ssh connection and the launching of the slave agent os.execl("/usr/bin/ssh", "/usr/bin/ssh", "root@"+list(deployed_hosts)[0], "java -jar /home/ci/slave.jar") else: raise DeploymentError("Error while deploying") except Exception as e: t, value, tb = sys.exc_info()
def run(self): sweeper = self.create_paramsweeper() while True: comb = sweeper.get_next() if not comb: break comb_dir = self.result_dir + '/' + slugify(comb) if not os.path.isdir(comb_dir): os.mkdir(comb_dir) comb_file = comb_dir + '/trace' g5k_configuration['kadeploy3'] = comb['version'] logger.info('Treating combination %s', pformat(comb)) get_version = SshProcess(comb['version'] + ' -v', comb['site'], connection_params=default_frontend_connection_params).run() logger.info(get_version.stdout) resources = "" if comb['kavlan']: resources += "{type='kavlan'}/vlan=1+" resources += "nodes=" + str(comb['n_nodes']) sub = OarSubmission(resources=resources, job_type='deploy', walltime="0:30:00", name='Kadeploy_Tests') logger.info('Performing submission of %s on site %s', resources, comb['site']) jobs = oarsub([(sub, comb['site'])]) if jobs[0][0]: try: logger.info('Waiting for job to start') wait_oar_job_start(jobs[0][0], jobs[0][1]) hosts = get_oar_job_nodes(jobs[0][0], jobs[0][1]) logger.info('Deployment of %s', ' '.join([host.address for host in hosts])) kavlan = get_oar_job_kavlan(jobs[0][0], jobs[0][1]) if kavlan: logger.info('In kavlan %s', kavlan) deployment = Deployment(hosts, env_name=comb['env'], vlan=kavlan) deployed, undeployed = deploy(deployment, stdout_handlers=[comb_file], stderr_handlers=[comb_file]) finally: logger.info('Destroying job %s on %s', str(jobs[0][0]), jobs[0][1]) oardel([(jobs[0][0], jobs[0][1])]) else: deployed = [] if len(undeployed) == 0: logger.info('%s is OK', slugify(comb)) elif len(deployed) == 0: logger.error('%s is KO', slugify(comb)) else: logger.warning('%s encountered problems with some hosts', slugify(comb)) sweeper.done(comb)
def setup_host(self): """Deploy a node, install dependencies and Rally""" logger.info('Deploying environment %s on %s' % (style.emph(self.config['env-name']), self.host) + (' (forced)' if self.options.force_deploy else '')) deployment = None if 'env-user' not in self.config or self.config['env-user'] == '': deployment = EX5.Deployment(hosts=[self.host], env_name=self.config['env-name']) else: deployment = EX5.Deployment(hosts=[self.host], env_name=self.config['env-name'], user=self.config['env-user']) deployed_hosts, _ = EX5.deploy( deployment, check_deployed_command=not self.options.force_deploy) # Test if rally is installed test_p = EX.SshProcess('rally version', self.host, {'user': '******'}) test_p.ignore_exit_code = True test_p.nolog_exit_code = True test_p.run() if test_p.exit_code != 0: # Install rally self._run_or_abort( "curl -sO %s" % RALLY_INSTALL_URL, self.host, "Could not download Rally install script from %s" % RALLY_INSTALL_URL, conn_params={'user': '******'}) logger.info("Installing dependencies on deployed host") self._run_or_abort('apt-get update && apt-get -y update', self.host, 'Could not update packages on host', conn_params={'user': '******'}) self._run_or_abort('apt-get -y install python-pip', self.host, 'Could not install pip on host', conn_params={'user': '******'}) self._run_or_abort('pip install --upgrade setuptools', self.host, 'Could not upgrade setuptools', conn_params={'user': '******'}) logger.info("Installing rally from %s" % style.emph(self.config['rally-git'])) self._run_or_abort("bash install_rally.sh -y --url %s" % self.config['rally-git'], self.host, 'Could not install Rally on host', conn_params={'user': '******'}) else: logger.info("Rally %s is already installed" % test_p.stdout.rstrip()) # Setup the deployment file vars = { "controller": self.config['os-services']['controller'], "os_region": self.config['authentication']['os-region'], "os_username": self.config['authentication']['os-username'], "os_password": self.config['authentication']['os-password'], "os_tenant": self.config['authentication']['os-tenant'], "os_user_domain": self.config['authentication']['os-user-domain'], "os_project_domain": self.config['authentication']['os-project-domain'] } rally_deployment = self._render_template( 'templates/deployment_existing.json', vars) EX.Put([self.host], [rally_deployment], remote_location='deployment_existing.json', connection_params={ 'user': '******' }).run() # Create a Rally deployment self._run_or_abort( "rally deployment create --filename deployment_existing.json " "--name %s" % self.config['deployment-name'], self.host, 'Could not create the Rally deployment', conn_params={'user': '******'}) self.rally_deployed = True logger.info("Rally has been deployed correctly")
EX.logger.setLevel('INFO') jobs = EX5.get_current_oar_jobs(['reims']) if len(jobs) == 0: jobs = EX5.oarsub([( EX5.OarSubmission(resources = "{type=\\'kavlan\\'}/vlan=1+/nodes=2", walltime="3:00:00", job_type ='deploy'), "reims")]) EX5.wait_oar_job_start( oar_job_id=jobs[0][0], frontend=jobs[0][1]) print jobs hosts = EX5.get_oar_job_nodes(jobs[0][0], jobs[0][1]) print hosts kavlan_id = EX5.get_oar_job_kavlan(jobs[0][0], jobs[0][1]) print kavlan_id deployment = EX5.Deployment( hosts = hosts, env_file= "ubuntu-x64-1204", vlan = kavlan_id) deployed_hosts, undeployed_hosts = EX5.deploy(deployment) #deployed_hosts, undeployed_hosts = EX5.deploy(deployment, num_tries=0,check_deployed_command=True) if kavlan_id is not None: hosts = [ EX5.get_kavlan_host_name(host, kavlan_id) for host in deployed_hosts ] print hosts[0] def get_kavlan_network(kavlan, site): """Retrieve the network parameters for a given kavlan from the API""" network, mask_size = None, None equips = EX5.get_resource_attributes('/sites/' + site + '/network_equipments/') for equip in equips['items']: if 'vlans' in equip and len(equip['vlans']) > 2: all_vlans = equip['vlans'] for info in all_vlans.itervalues():
logger.info("Wait for job to start...") print oargrid_job_id wait_oar_job_start(oar_job_id = oargrid_job_id) logger.info("Wait for job to start...") wait_oar_job_start(oargrid_job_id) #wait_oargrid_job_start(oargrid_job_id) # print oargrid_job_id print ssh_key nodes = get_oar_job_nodes(oargrid_job_id) #nodes = get_oargrid_job_nodes(oargrid_job_id) logger.info("Job has started") print nodes logger.info("Deployment started") #logger.setLevel(1) nodes = deploy(Deployment(hosts = nodes, env_name = "wheezy-x64-diet", user = "******", other_options='-d -V4'), out = True, check_deployed_command=True)#, check_deployed_command = False) deploy_nodes = nodes[0] ko_nodes = nodes[1] logger.info("Deployment completed") if not deploy_nodes: logger.info("No nodes were correctly deployed") logger.info("End of program") sys.exit(0) #Get the "clean" list of nodes nodes = [] for host in deploy_nodes: nodes.append(get_node_name(host)) writeNodesToFile(nodes, nodefile)
def _deploy(nodes, force_deploy, options): # For testing purpose logger.info("Deploying %s with options %s" % (nodes, options)) dep = ex5.Deployment(nodes, **options) return ex5.deploy(dep, check_deployed_command=not force_deploy)
def run(self): """Inherited method, put here the code for running the engine""" self.define_parameters() self.cluster = self.args[0] self.site = get_cluster_site(self.cluster) if self.options.oar_job_id: self.oar_job_id = self.options.oar_job_id else: self.oar_job_id = None try: # Creation of the main iterator which is used for the first control loop. # You need have a method called define_parameters, that returns a list of parameter dicts self.define_parameters() job_is_dead = False # While they are combinations to treat while len(self.sweeper.get_remaining()) > 0: # If no job, we make a reservation and prepare the hosts for the experiments if job_is_dead or self.oar_job_id is None: self.make_reservation() # Retrieving the hosts and subnets parameters self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend) # Hosts deployment deployed, undeployed = deploy(Deployment(self.hosts, env_file="/home/mliroz/deploys/hadoop6.env")) logger.info("%i deployed, %i undeployed" % (len(deployed), len(undeployed))) if len(deployed) == 0: break # Configuration du systeme => look at the execo_g5k.topology module attr = get_host_attributes(self.cluster + '-1') ## SETUP FINISHED # Getting the next combination comb = self.sweeper.get_next() self.prepare_dataset(comb) self.xp(comb) # subloop over the combinations that have the same sizes while True: newcomb = self.sweeper.get_next(lambda r: filter(lambda subcomb: subcomb['sizes'] == comb['sizes'], r)) if newcomb: try: self.xp(newcomb) except: break else: break if get_oar_job_info(self.oar_job_id, self.frontend)['state'] == 'Error': job_is_dead = True finally: if self.oar_job_id is not None: if not self.options.keep_alive: logger.info('Deleting job') oardel([(self.oar_job_id, self.frontend)]) else: logger.info('Keeping job alive for debugging')