def _get_jobs_and_vlans(self, conf): """Get the hosts from an existing job (if any) or from a new job. This will perform a reservation if necessary.""" provider_conf = conf['provider'] # Look if there is a running job or make a new reservation gridjob, _ = EX5.planning.get_job_by_name(provider_conf['name']) if gridjob is None: gridjob = self._make_reservation(conf) else: logging.info("Using running oargrid job %s" % gridjob) # Wait for the job to start EX5.wait_oargrid_job_start(gridjob) nodes = sorted(EX5.get_oargrid_job_nodes(gridjob), key=lambda n: n.address) # Checking the number of nodes given # the disribution policy self._check_nodes(nodes=nodes, resources=conf['resources'], mode=provider_conf['role_distribution']) # vlans information job_sites = EX5.get_oargrid_job_oar_jobs(gridjob) jobs = [] vlans = [] for (job_id, site) in job_sites: jobs.append((site, job_id)) vlan_id = EX5.get_oar_job_kavlan(job_id, site) if vlan_id is not None: vlans.append((site, EX5.get_oar_job_kavlan(job_id, site))) return (jobs, vlans, nodes)
def _get_job(self): """Get the hosts from an existing job (if any) or from a new job. This will perform a reservation if necessary.""" # Look if there is a running job or make a new reservation self.gridjob, _ = EX5.planning.get_job_by_name(self.config['name']) if self.gridjob is None: self._make_reservation() else: logging.info("Using running oargrid job %s" % self.gridjob) # Wait for the job to start EX5.wait_oargrid_job_start(self.gridjob) # # XXX Still useful? # attempts = 0 # self.nodes = None # while self.nodes is None and attempts < MAX_ATTEMPTS: # self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob), # key = lambda n: n.address) # attempts += 1 self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob), key=lambda n: n.address) # # XXX check already done into `_deploy`. self._check_nodes(nodes=self.nodes, resources=self.config['resources'], mode=self.config['role_distribution']) # XXX(Ad_rien_) Start_date is never used, deadcode? - August # 11th 2016 self.start_date = None job_info = EX5.get_oargrid_job_info(self.gridjob) if 'start_date' in job_info: self.start_date = job_info['start_date'] # filling some information about the jobs here self.user = None job_info = EX5.get_oargrid_job_info(self.gridjob) if 'user' in job_info: self.user = job_info['user'] # vlans information job_sites = EX5.get_oargrid_job_oar_jobs(self.gridjob) self.jobs = [] self.vlans = [] for (job_id, site) in job_sites: self.jobs.append((site, job_id)) vlan_id = EX5.get_oar_job_kavlan(job_id, site) if vlan_id is not None: self.vlans.append((site, EX5.get_oar_job_kavlan(job_id, site)))
def get_job(self): """Get the hosts from an existing job (if any) or from a new job. This will perform a reservation if necessary.""" # Look if there is a running job or make a new reservation self.gridjob, _ = EX5.planning.get_job_by_name(self.config['name']) if self.gridjob is None: self._make_reservation() else: logger.info("Using running oargrid job %s" % style.emph(self.gridjob)) # Wait for the job to start EX5.wait_oargrid_job_start(self.gridjob) attempts = 0 self.nodes = None while self.nodes is None and attempts < MAX_ATTEMPTS: self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob), key = lambda n: n.address) attempts += 1 check_nodes( nodes = self.nodes, resources = self.config['resources'], mode = self.config['role_distribution']) # TODO - Start_date is never used, deadcode ? Ad_rien_ - August 11th 2016 self.start_date = None job_info = EX5.get_oargrid_job_info(self.gridjob) if 'start_date' in job_info: self.start_date = job_info['start_date'] ## filling some information about the jobs here self.user = None job_info = EX5.get_oargrid_job_info(self.gridjob) if 'user' in job_info: self.user = job_info['user'] ## vlans information job_sites = EX5.get_oargrid_job_oar_jobs(self.gridjob) self.jobs = [] self.vlans = [] for (job_id, site) in job_sites: self.jobs.append((site, job_id)) vlan_id = EX5.get_oar_job_kavlan(job_id, site) if vlan_id is not None: self.vlans.append((site, EX5.get_oar_job_kavlan(job_id, site))) return self.gridjob
def concretize_resources(resources, gridjob, reservation_type): if reservation_type == "oar": nodes = ex5.get_oar_job_nodes(gridjob) else: nodes = ex5.get_oargrid_job_nodes(gridjob) concretize_nodes(resources, nodes) if reservation_type == "oar": # This block is in charge of detecting the site of the oar reservation site_candidates = [] for network_description in resources.get("machines", []): cluster = network_description.get("cluster") site_candidates += [ex5.get_cluster_site(cluster)] for network_description in resources.get("networks", []): site_candidates += [network_description.get("site", "unknown")] if len(set(site_candidates)) == 1: site = site_candidates[0] else: raise "Could not detect the g5k site of the oarjob %s" % gridjob job_sites = [(gridjob, site)] else: job_sites = ex5.get_oargrid_job_oar_jobs(gridjob) vlans = [] for (job_id, site) in job_sites: vlan_ids = ex5.get_oar_job_kavlan(job_id, site) vlans.extend([{ "site": site, "vlan_id": vlan_id } for vlan_id in vlan_ids]) concretize_networks(resources, vlans)
def get_oar_job_vm5k_resources(jobs): """Retrieve the hosts list and (ip, mac) list from a list of oar_job and return the resources dict needed by vm5k_deployment """ resources = {} for oar_job_id, site in jobs: logger.detail('Retrieving resources from %s:%s', style.emph(site), oar_job_id) oar_job_id = int(oar_job_id) wait_oar_job_start(oar_job_id, site) logger.debug('Retrieving hosts') hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)] logger.debug('Retrieving subnet') ip_mac, _ = get_oar_job_subnets(oar_job_id, site) kavlan = None if len(ip_mac) == 0: logger.debug('Retrieving kavlan') kavlan = get_oar_job_kavlan(oar_job_id, site) if kavlan: assert (len(kavlan) == 1) kavlan = kavlan[0] ip_mac = get_kavlan_ip_mac(kavlan, site) resources[site] = { 'hosts': hosts, 'ip_mac': ip_mac[300:], 'kavlan': kavlan } return resources
def get_oar_job_vm5k_resources(jobs): """Retrieve the hosts list and (ip, mac) list from a list of oar_job and return the resources dict needed by vm5k_deployment """ resources = {} for oar_job_id, site in jobs: logger.detail('Retrieving resources from %s:%s', style.emph(site), oar_job_id) oar_job_id = int(oar_job_id) wait_oar_job_start(oar_job_id, site) logger.debug('Retrieving hosts') hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)] logger.debug('Retrieving subnet') ip_mac, _ = get_oar_job_subnets(oar_job_id, site) kavlan = None if len(ip_mac) == 0: logger.debug('Retrieving kavlan') kavlan = get_oar_job_kavlan(oar_job_id, site) if kavlan: assert(len(kavlan) == 1) kavlan = kavlan[0] ip_mac = get_kavlan_ip_mac(kavlan, site) resources[site] = {'hosts': hosts, 'ip_mac': ip_mac[300:], 'kavlan': kavlan} return resources
def grid_reload_from_id(gridjob): logger.info("Reloading the resources from oargrid job %s", gridjob) gridjob = int(gridjob) nodes = ex5.get_oargrid_job_nodes(gridjob) job_sites = ex5.get_oargrid_job_oar_jobs(gridjob) vlans = [] subnets = [] for (job_id, site) in job_sites: vlan_ids = ex5.get_oar_job_kavlan(job_id, site) vlans.extend([{ "site": site, "vlan_id": vlan_id } for vlan_id in vlan_ids]) # NOTE(msimonin): this currently returned only one subnet # even if several are reserved # We'll need to patch execo the same way it has been patched for vlans ipmac, info = ex5.get_oar_job_subnets(job_id, site) if not ipmac: logger.debug("No subnet information found for this job") continue subnet = { "site": site, "ipmac": ipmac, } subnet.update(info) # Mandatory key when it comes to concretize resources subnet.update({"network": info["ip_prefix"]}) subnets.append(subnet) return nodes, vlans, subnets
def get_resources(self): """Retrieve the hosts address list and (ip, mac) list from a list of oar_result and return the resources which is a dict needed by g5k_provisioner """ logger.info("Getting resources specs") self.resources = dict() self.hosts = list() for oar_job_id, site in self.oar_result: logger.info('Waiting for the reserved nodes on %s to be up' % site) if not wait_oar_job_start(oar_job_id, site): logger.error('The reserved resources cannot be used.\nThe program is terminated.') exit() for oar_job_id, site in self.oar_result: logger.info('Retrieving resource information on %s' % site) logger.debug('Retrieving hosts') hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)] # if len(hosts) != self.clusters[site]: logger.debug('Retrieving subnet') ip_mac, _ = get_oar_job_subnets(oar_job_id, site) kavlan = None if len(ip_mac) == 0: logger.debug('Retrieving kavlan') kavlan = get_oar_job_kavlan(oar_job_id, site) if kavlan: ip_mac = self.get_kavlan_ip_mac(kavlan, site) self.resources[site] = {'hosts': hosts, 'ip_mac': ip_mac, 'kavlan': kavlan} for site, resource in self.resources.items(): self.hosts += resource['hosts']
def prepare_global_vlan(self): vlans = g5k.get_oar_job_kavlan(*self.globalvlan_job) if len(vlans) > 0: self.global_vlan = vlans[0] logger.debug("Global VLAN ID: {}".format(self.global_vlan)) else: logger.error("Could not reserve global VLAN") sys.exit(1)
def concretize_resources(resources, gridjob): nodes = ex5.get_oargrid_job_nodes(gridjob) concretize_nodes(resources, nodes) job_sites = ex5.get_oargrid_job_oar_jobs(gridjob) vlans = [] for (job_id, site) in job_sites: vlan_ids = ex5.get_oar_job_kavlan(job_id, site) vlans.extend([{ "site": site, "vlan_id": vlan_id} for vlan_id in vlan_ids]) concretize_networks(resources, vlans)
def get_network_info_from_job_id(job_id, site, vlans, subnets): vlan_ids = ex5.get_oar_job_kavlan(job_id, site) vlans.extend([{"site": site, "vlan_id": vlan_id} for vlan_id in vlan_ids]) # NOTE(msimonin): this currently returned only one subnet # even if several are reserved # We'll need to patch execo the same way it has been patched for vlans ipmac, info = ex5.get_oar_job_subnets(job_id, site) if not ipmac: logger.debug("No subnet information found for this job") return vlans, subnets subnet = { "site": site, "ipmac": ipmac, } subnet.update(info) # Mandatory key when it comes to concretize resources subnet.update({"network": info["ip_prefix"]}) subnets.append(subnet) return vlans, subnets
def reserve_global_vlan(self): """Global VLAN, only used for multi-site experiment (server not on the same site as the VM)""" # TODO: integrate that into the "single job reservation" thing. # Existing job, look in all currently running jobs for (job_id, frontend) in g5k.get_current_oar_jobs(): vlans = g5k.get_oar_job_kavlan(job_id, frontend) if len(vlans) > 0: logger.debug( "Found existing Kavlan job {} (VLAN ID: {})".format( job_id, vlans[0])) self.globalvlan_job = (job_id, frontend) return # New job submission = g5k.OarSubmission( resources="{{type='kavlan-global'}}/vlan=1", name="VLAN {}".format(self.exp_id), reservation_date=self.args.start_date, walltime=self.args.walltime) [(jobid, site)] = g5k.oarsub([(submission, None)]) self.globalvlan_job = (jobid, site)
def run(self): sweeper = self.create_paramsweeper() while True: comb = sweeper.get_next() if not comb: break comb_dir = self.result_dir + '/' + slugify(comb) if not os.path.isdir(comb_dir): os.mkdir(comb_dir) comb_file = comb_dir + '/trace' g5k_configuration['kadeploy3'] = comb['version'] logger.info('Treating combination %s', pformat(comb)) get_version = SshProcess( comb['version'] + ' -v', comb['site'], connection_params=default_frontend_connection_params).run() logger.info(get_version.stdout) resources = "" if comb['kavlan']: resources += "{type='kavlan'}/vlan=1+" resources += "nodes=" + str(comb['n_nodes']) sub = OarSubmission(resources=resources, job_type='deploy', walltime="0:30:00", name='Kadeploy_Tests') logger.info('Performing submission of %s on site %s', resources, comb['site']) jobs = oarsub([(sub, comb['site'])]) if jobs[0][0]: try: logger.info('Waiting for job to start') wait_oar_job_start(jobs[0][0], jobs[0][1]) hosts = get_oar_job_nodes(jobs[0][0], jobs[0][1]) logger.info('Deployment of %s', ' '.join([host.address for host in hosts])) kavlan = get_oar_job_kavlan(jobs[0][0], jobs[0][1]) if kavlan: logger.info('In kavlan %s', kavlan) deployment = Deployment(hosts, env_name=comb['env'], vlan=kavlan) deployed, undeployed = deploy(deployment, stdout_handlers=[comb_file], stderr_handlers=[comb_file]) finally: logger.info('Destroying job %s on %s', str(jobs[0][0]), jobs[0][1]) oardel([(jobs[0][0], jobs[0][1])]) else: deployed = [] if len(undeployed) == 0: logger.info('%s is OK', slugify(comb)) elif len(deployed) == 0: logger.error('%s is KO', slugify(comb)) else: logger.warning('%s encountered problems with some hosts', slugify(comb)) sweeper.done(comb)
# sites = EX5.get_g5k_sites() # sites.remove('bordeaux') EX.logger.setLevel('INFO') jobs = EX5.get_current_oar_jobs(['reims']) if len(jobs) == 0: jobs = EX5.oarsub([( EX5.OarSubmission(resources = "{type=\\'kavlan\\'}/vlan=1+/nodes=2", walltime="3:00:00", job_type ='deploy'), "reims")]) EX5.wait_oar_job_start( oar_job_id=jobs[0][0], frontend=jobs[0][1]) print jobs hosts = EX5.get_oar_job_nodes(jobs[0][0], jobs[0][1]) print hosts kavlan_id = EX5.get_oar_job_kavlan(jobs[0][0], jobs[0][1]) print kavlan_id deployment = EX5.Deployment( hosts = hosts, env_file= "ubuntu-x64-1204", vlan = kavlan_id) deployed_hosts, undeployed_hosts = EX5.deploy(deployment) #deployed_hosts, undeployed_hosts = EX5.deploy(deployment, num_tries=0,check_deployed_command=True) if kavlan_id is not None: hosts = [ EX5.get_kavlan_host_name(host, kavlan_id) for host in deployed_hosts ] print hosts[0] def get_kavlan_network(kavlan, site): """Retrieve the network parameters for a given kavlan from the API""" network, mask_size = None, None equips = EX5.get_resource_attributes('/sites/' + site + '/network_equipments/')
def run(self): sweeper = self.create_paramsweeper() while True: comb = sweeper.get_next() if not comb: break comb_dir = self.result_dir + '/' + slugify(comb) if not os.path.isdir(comb_dir): os.mkdir(comb_dir) comb_file = comb_dir + '/trace' g5k_configuration['kadeploy3'] = comb['version'] logger.info('Treating combination %s', pformat(comb)) get_version = SshProcess(comb['version'] + ' -v', comb['site'], connection_params=default_frontend_connection_params).run() logger.info(get_version.stdout) resources = "" if comb['kavlan']: resources += "{type='kavlan'}/vlan=1+" resources += "nodes=" + str(comb['n_nodes']) sub = OarSubmission(resources=resources, job_type='deploy', walltime="0:30:00", name='Kadeploy_Tests') logger.info('Performing submission of %s on site %s', resources, comb['site']) jobs = oarsub([(sub, comb['site'])]) if jobs[0][0]: try: logger.info('Waiting for job to start') wait_oar_job_start(jobs[0][0], jobs[0][1]) hosts = get_oar_job_nodes(jobs[0][0], jobs[0][1]) logger.info('Deployment of %s', ' '.join([host.address for host in hosts])) kavlan = get_oar_job_kavlan(jobs[0][0], jobs[0][1]) if kavlan: logger.info('In kavlan %s', kavlan) deployment = Deployment(hosts, env_name=comb['env'], vlan=kavlan) deployed, undeployed = deploy(deployment, stdout_handlers=[comb_file], stderr_handlers=[comb_file]) finally: logger.info('Destroying job %s on %s', str(jobs[0][0]), jobs[0][1]) oardel([(jobs[0][0], jobs[0][1])]) else: deployed = [] if len(undeployed) == 0: logger.info('%s is OK', slugify(comb)) elif len(deployed) == 0: logger.error('%s is KO', slugify(comb)) else: logger.warning('%s encountered problems with some hosts', slugify(comb)) sweeper.done(comb)