Beispiel #1
0
    def _get_jobs_and_vlans(self, conf):
        """Get the hosts from an existing job (if any) or from a new job.
        This will perform a reservation if necessary."""

        provider_conf = conf['provider']
        # Look if there is a running job or make a new reservation
        gridjob, _ = EX5.planning.get_job_by_name(provider_conf['name'])

        if gridjob is None:
            gridjob = self._make_reservation(conf)
        else:
            logging.info("Using running oargrid job %s" % gridjob)

        # Wait for the job to start
        EX5.wait_oargrid_job_start(gridjob)

        nodes = sorted(EX5.get_oargrid_job_nodes(gridjob),
                       key=lambda n: n.address)

        # Checking the number of nodes given
        # the disribution policy
        self._check_nodes(nodes=nodes,
                          resources=conf['resources'],
                          mode=provider_conf['role_distribution'])

        # vlans information
        job_sites = EX5.get_oargrid_job_oar_jobs(gridjob)
        jobs = []
        vlans = []
        for (job_id, site) in job_sites:
            jobs.append((site, job_id))
            vlan_id = EX5.get_oar_job_kavlan(job_id, site)
            if vlan_id is not None:
                vlans.append((site, EX5.get_oar_job_kavlan(job_id, site)))
        return (jobs, vlans, nodes)
Beispiel #2
0
    def _get_job(self):
        """Get the hosts from an existing job (if any) or from a new job.
        This will perform a reservation if necessary."""

        # Look if there is a running job or make a new reservation
        self.gridjob, _ = EX5.planning.get_job_by_name(self.config['name'])

        if self.gridjob is None:
            self._make_reservation()
        else:
            logging.info("Using running oargrid job %s" % self.gridjob)

        # Wait for the job to start
        EX5.wait_oargrid_job_start(self.gridjob)

        # # XXX Still useful?
        # attempts = 0
        # self.nodes = None
        # while self.nodes is None and attempts < MAX_ATTEMPTS:
        #     self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob),
        #                             key = lambda n: n.address)
        #     attempts += 1

        self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob),
                            key=lambda n: n.address)

        # # XXX check already done into `_deploy`.
        self._check_nodes(nodes=self.nodes,
                          resources=self.config['resources'],
                          mode=self.config['role_distribution'])

        # XXX(Ad_rien_) Start_date is never used, deadcode? - August
        # 11th 2016
        self.start_date = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'start_date' in job_info:
            self.start_date = job_info['start_date']

        # filling some information about the jobs here
        self.user = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'user' in job_info:
            self.user = job_info['user']

        # vlans information
        job_sites = EX5.get_oargrid_job_oar_jobs(self.gridjob)
        self.jobs = []
        self.vlans = []
        for (job_id, site) in job_sites:
            self.jobs.append((site, job_id))
            vlan_id = EX5.get_oar_job_kavlan(job_id, site)
            if vlan_id is not None:
                self.vlans.append((site, EX5.get_oar_job_kavlan(job_id, site)))
    def get_job(self):
        """Get the hosts from an existing job (if any) or from a new job.
        This will perform a reservation if necessary."""

        # Look if there is a running job or make a new reservation
        self.gridjob, _ = EX5.planning.get_job_by_name(self.config['name'])

        if self.gridjob is None:
            self._make_reservation()
        else:
            logger.info("Using running oargrid job %s" % style.emph(self.gridjob))
            

        # Wait for the job to start
        EX5.wait_oargrid_job_start(self.gridjob)

        attempts = 0
        self.nodes = None
        while self.nodes is None and attempts < MAX_ATTEMPTS:
            self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob),
                                    key = lambda n: n.address)
            attempts += 1

        check_nodes(
                nodes = self.nodes,
                resources = self.config['resources'],
                mode = self.config['role_distribution'])

        # TODO - Start_date is never used, deadcode ? Ad_rien_ - August 11th 2016
        self.start_date = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'start_date' in job_info:
            self.start_date = job_info['start_date']

        ## filling some information about the jobs here
        self.user = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'user' in job_info:
            self.user = job_info['user']

        ## vlans information
        job_sites = EX5.get_oargrid_job_oar_jobs(self.gridjob)
        self.jobs = []
        self.vlans = []
        for (job_id, site) in job_sites:
            self.jobs.append((site, job_id))
            vlan_id = EX5.get_oar_job_kavlan(job_id, site)
            if vlan_id is not None:
                self.vlans.append((site, EX5.get_oar_job_kavlan(job_id, site)))

        return self.gridjob
Beispiel #4
0
def concretize_resources(resources, gridjob, reservation_type):
    if reservation_type == "oar":
        nodes = ex5.get_oar_job_nodes(gridjob)
    else:
        nodes = ex5.get_oargrid_job_nodes(gridjob)

    concretize_nodes(resources, nodes)

    if reservation_type == "oar":
        # This block is in charge of detecting the site of the oar reservation
        site_candidates = []
        for network_description in resources.get("machines", []):
            cluster = network_description.get("cluster")
            site_candidates += [ex5.get_cluster_site(cluster)]
        for network_description in resources.get("networks", []):
            site_candidates += [network_description.get("site", "unknown")]
        if len(set(site_candidates)) == 1:
            site = site_candidates[0]
        else:
            raise "Could not detect the g5k site of the oarjob %s" % gridjob
        job_sites = [(gridjob, site)]
    else:
        job_sites = ex5.get_oargrid_job_oar_jobs(gridjob)
    vlans = []
    for (job_id, site) in job_sites:
        vlan_ids = ex5.get_oar_job_kavlan(job_id, site)
        vlans.extend([{
            "site": site,
            "vlan_id": vlan_id
        } for vlan_id in vlan_ids])

    concretize_networks(resources, vlans)
Beispiel #5
0
def get_oar_job_vm5k_resources(jobs):
    """Retrieve the hosts list and (ip, mac) list from a list of oar_job and
    return the resources dict needed by vm5k_deployment """
    resources = {}
    for oar_job_id, site in jobs:
        logger.detail('Retrieving resources from %s:%s', style.emph(site),
                      oar_job_id)
        oar_job_id = int(oar_job_id)
        wait_oar_job_start(oar_job_id, site)
        logger.debug('Retrieving hosts')
        hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)]
        logger.debug('Retrieving subnet')
        ip_mac, _ = get_oar_job_subnets(oar_job_id, site)
        kavlan = None
        if len(ip_mac) == 0:
            logger.debug('Retrieving kavlan')
            kavlan = get_oar_job_kavlan(oar_job_id, site)
            if kavlan:
                assert (len(kavlan) == 1)
                kavlan = kavlan[0]
                ip_mac = get_kavlan_ip_mac(kavlan, site)
        resources[site] = {
            'hosts': hosts,
            'ip_mac': ip_mac[300:],
            'kavlan': kavlan
        }
    return resources
Beispiel #6
0
def get_oar_job_vm5k_resources(jobs):
    """Retrieve the hosts list and (ip, mac) list from a list of oar_job and
    return the resources dict needed by vm5k_deployment """
    resources = {}
    for oar_job_id, site in jobs:
        logger.detail('Retrieving resources from %s:%s',
                      style.emph(site), oar_job_id)
        oar_job_id = int(oar_job_id)
        wait_oar_job_start(oar_job_id, site)
        logger.debug('Retrieving hosts')
        hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)]
        logger.debug('Retrieving subnet')
        ip_mac, _ = get_oar_job_subnets(oar_job_id, site)
        kavlan = None
        if len(ip_mac) == 0:
            logger.debug('Retrieving kavlan')
            kavlan = get_oar_job_kavlan(oar_job_id, site)
            if kavlan:
                assert(len(kavlan) == 1)
                kavlan = kavlan[0]
                ip_mac = get_kavlan_ip_mac(kavlan, site)
        resources[site] = {'hosts': hosts,
                           'ip_mac': ip_mac[300:],
                           'kavlan': kavlan}
    return resources
Beispiel #7
0
def grid_reload_from_id(gridjob):
    logger.info("Reloading the resources from oargrid job %s", gridjob)
    gridjob = int(gridjob)
    nodes = ex5.get_oargrid_job_nodes(gridjob)

    job_sites = ex5.get_oargrid_job_oar_jobs(gridjob)
    vlans = []
    subnets = []
    for (job_id, site) in job_sites:
        vlan_ids = ex5.get_oar_job_kavlan(job_id, site)
        vlans.extend([{
            "site": site,
            "vlan_id": vlan_id
        } for vlan_id in vlan_ids])
        # NOTE(msimonin): this currently returned only one subnet
        # even if several are reserved
        # We'll need to patch execo the same way it has been patched for vlans
        ipmac, info = ex5.get_oar_job_subnets(job_id, site)
        if not ipmac:
            logger.debug("No subnet information found for this job")
            continue
        subnet = {
            "site": site,
            "ipmac": ipmac,
        }
        subnet.update(info)
        # Mandatory key when it comes to concretize resources
        subnet.update({"network": info["ip_prefix"]})
        subnets.append(subnet)
    return nodes, vlans, subnets
Beispiel #8
0
    def get_resources(self):
        """Retrieve the hosts address list and (ip, mac) list from a list of oar_result and
        return the resources which is a dict needed by g5k_provisioner
        """
        logger.info("Getting resources specs")
        self.resources = dict()
        self.hosts = list()

        for oar_job_id, site in self.oar_result:
            logger.info('Waiting for the reserved nodes on %s to be up' % site)
            if not wait_oar_job_start(oar_job_id, site):
                logger.error('The reserved resources cannot be used.\nThe program is terminated.')
                exit()

        for oar_job_id, site in self.oar_result:
            logger.info('Retrieving resource information on %s' % site)
            logger.debug('Retrieving hosts')
            hosts = [host.address for host in get_oar_job_nodes(oar_job_id, site)]

            # if len(hosts) != self.clusters[site]:

            logger.debug('Retrieving subnet')
            ip_mac, _ = get_oar_job_subnets(oar_job_id, site)
            kavlan = None
            if len(ip_mac) == 0:
                logger.debug('Retrieving kavlan')
                kavlan = get_oar_job_kavlan(oar_job_id, site)
                if kavlan:
                    ip_mac = self.get_kavlan_ip_mac(kavlan, site)
            self.resources[site] = {'hosts': hosts,
                                    'ip_mac': ip_mac,
                                    'kavlan': kavlan}

        for site, resource in self.resources.items():
            self.hosts += resource['hosts']
Beispiel #9
0
 def prepare_global_vlan(self):
     vlans = g5k.get_oar_job_kavlan(*self.globalvlan_job)
     if len(vlans) > 0:
         self.global_vlan = vlans[0]
         logger.debug("Global VLAN ID: {}".format(self.global_vlan))
     else:
         logger.error("Could not reserve global VLAN")
         sys.exit(1)
Beispiel #10
0
def concretize_resources(resources, gridjob):
    nodes = ex5.get_oargrid_job_nodes(gridjob)
    concretize_nodes(resources, nodes)

    job_sites = ex5.get_oargrid_job_oar_jobs(gridjob)
    vlans = []
    for (job_id, site) in job_sites:
        vlan_ids = ex5.get_oar_job_kavlan(job_id, site)
        vlans.extend([{
            "site": site,
            "vlan_id": vlan_id} for vlan_id in vlan_ids])

    concretize_networks(resources, vlans)
Beispiel #11
0
def get_network_info_from_job_id(job_id, site, vlans, subnets):
    vlan_ids = ex5.get_oar_job_kavlan(job_id, site)
    vlans.extend([{"site": site, "vlan_id": vlan_id} for vlan_id in vlan_ids])
    # NOTE(msimonin): this currently returned only one subnet
    # even if several are reserved
    # We'll need to patch execo the same way it has been patched for vlans
    ipmac, info = ex5.get_oar_job_subnets(job_id, site)
    if not ipmac:
        logger.debug("No subnet information found for this job")
        return vlans, subnets
    subnet = {
        "site": site,
        "ipmac": ipmac,
    }
    subnet.update(info)
    # Mandatory key when it comes to concretize resources
    subnet.update({"network": info["ip_prefix"]})
    subnets.append(subnet)
    return vlans, subnets
Beispiel #12
0
 def reserve_global_vlan(self):
     """Global VLAN, only used for multi-site experiment (server not on the
     same site as the VM)"""
     # TODO: integrate that into the "single job reservation" thing.
     # Existing job, look in all currently running jobs
     for (job_id, frontend) in g5k.get_current_oar_jobs():
         vlans = g5k.get_oar_job_kavlan(job_id, frontend)
         if len(vlans) > 0:
             logger.debug(
                 "Found existing Kavlan job {} (VLAN ID: {})".format(
                     job_id, vlans[0]))
             self.globalvlan_job = (job_id, frontend)
             return
     # New job
     submission = g5k.OarSubmission(
         resources="{{type='kavlan-global'}}/vlan=1",
         name="VLAN {}".format(self.exp_id),
         reservation_date=self.args.start_date,
         walltime=self.args.walltime)
     [(jobid, site)] = g5k.oarsub([(submission, None)])
     self.globalvlan_job = (jobid, site)
    def run(self):
        sweeper = self.create_paramsweeper()

        while True:
            comb = sweeper.get_next()
            if not comb:
                break
            comb_dir = self.result_dir + '/' + slugify(comb)
            if not os.path.isdir(comb_dir):
                os.mkdir(comb_dir)
            comb_file = comb_dir + '/trace'
            g5k_configuration['kadeploy3'] = comb['version']
            logger.info('Treating combination %s', pformat(comb))
            get_version = SshProcess(
                comb['version'] + ' -v',
                comb['site'],
                connection_params=default_frontend_connection_params).run()
            logger.info(get_version.stdout)

            resources = ""
            if comb['kavlan']:
                resources += "{type='kavlan'}/vlan=1+"
            resources += "nodes=" + str(comb['n_nodes'])
            sub = OarSubmission(resources=resources,
                                job_type='deploy',
                                walltime="0:30:00",
                                name='Kadeploy_Tests')
            logger.info('Performing submission of %s on site %s', resources,
                        comb['site'])
            jobs = oarsub([(sub, comb['site'])])

            if jobs[0][0]:
                try:
                    logger.info('Waiting for job to start')
                    wait_oar_job_start(jobs[0][0], jobs[0][1])
                    hosts = get_oar_job_nodes(jobs[0][0], jobs[0][1])
                    logger.info('Deployment of %s',
                                ' '.join([host.address for host in hosts]))
                    kavlan = get_oar_job_kavlan(jobs[0][0], jobs[0][1])
                    if kavlan:
                        logger.info('In kavlan %s', kavlan)
                    deployment = Deployment(hosts,
                                            env_name=comb['env'],
                                            vlan=kavlan)
                    deployed, undeployed = deploy(deployment,
                                                  stdout_handlers=[comb_file],
                                                  stderr_handlers=[comb_file])

                finally:
                    logger.info('Destroying job %s on %s', str(jobs[0][0]),
                                jobs[0][1])
                    oardel([(jobs[0][0], jobs[0][1])])
            else:
                deployed = []

            if len(undeployed) == 0:
                logger.info('%s is OK', slugify(comb))
            elif len(deployed) == 0:
                logger.error('%s is KO', slugify(comb))
            else:
                logger.warning('%s encountered problems with some hosts',
                               slugify(comb))

            sweeper.done(comb)
# sites = EX5.get_g5k_sites()
# sites.remove('bordeaux')


EX.logger.setLevel('INFO')
jobs = EX5.get_current_oar_jobs(['reims'])
 
if len(jobs) == 0:
    jobs = EX5.oarsub([( EX5.OarSubmission(resources = "{type=\\'kavlan\\'}/vlan=1+/nodes=2", walltime="3:00:00", job_type ='deploy'), "reims")])
    EX5.wait_oar_job_start( oar_job_id=jobs[0][0], frontend=jobs[0][1])  

print jobs
hosts = EX5.get_oar_job_nodes(jobs[0][0], jobs[0][1])
print hosts
kavlan_id = EX5.get_oar_job_kavlan(jobs[0][0], jobs[0][1])
print kavlan_id
deployment = EX5.Deployment( hosts = hosts, env_file= "ubuntu-x64-1204", vlan = kavlan_id) 

deployed_hosts, undeployed_hosts = EX5.deploy(deployment)
#deployed_hosts, undeployed_hosts = EX5.deploy(deployment, num_tries=0,check_deployed_command=True)

if kavlan_id is not None:
        hosts = [ EX5.get_kavlan_host_name(host, kavlan_id) for host in deployed_hosts ]
print hosts[0]


def get_kavlan_network(kavlan, site):
    """Retrieve the network parameters for a given kavlan from the API"""
    network, mask_size = None, None
    equips = EX5.get_resource_attributes('/sites/' + site + '/network_equipments/')
    def run(self):
        sweeper = self.create_paramsweeper()

        while True:
            comb = sweeper.get_next()
            if not comb:
                break
            comb_dir = self.result_dir + '/' + slugify(comb)
            if not os.path.isdir(comb_dir):
                os.mkdir(comb_dir)
            comb_file = comb_dir + '/trace'
            g5k_configuration['kadeploy3'] = comb['version']
            logger.info('Treating combination %s', pformat(comb))
            get_version = SshProcess(comb['version'] + ' -v',
                                     comb['site'],
                                     connection_params=default_frontend_connection_params).run()
            logger.info(get_version.stdout)

            resources = ""
            if comb['kavlan']:
                resources += "{type='kavlan'}/vlan=1+"
            resources += "nodes=" + str(comb['n_nodes'])
            sub = OarSubmission(resources=resources,
                                job_type='deploy',
                                walltime="0:30:00",
                                name='Kadeploy_Tests')
            logger.info('Performing submission of %s on site %s',
                        resources, comb['site'])
            jobs = oarsub([(sub, comb['site'])])

            if jobs[0][0]:
                try:
                    logger.info('Waiting for job to start')
                    wait_oar_job_start(jobs[0][0], jobs[0][1])
                    hosts = get_oar_job_nodes(jobs[0][0], jobs[0][1])
                    logger.info('Deployment of %s',
                                ' '.join([host.address for host in hosts]))
                    kavlan = get_oar_job_kavlan(jobs[0][0], jobs[0][1])
                    if kavlan:
                        logger.info('In kavlan %s', kavlan)
                    deployment = Deployment(hosts, env_name=comb['env'],
                                            vlan=kavlan)
                    deployed, undeployed = deploy(deployment,
                                                  stdout_handlers=[comb_file],
                                                  stderr_handlers=[comb_file])

                finally:
                    logger.info('Destroying job %s on %s', str(jobs[0][0]),
                                jobs[0][1])
                    oardel([(jobs[0][0], jobs[0][1])])
            else:
                deployed = []

            if len(undeployed) == 0:
                logger.info('%s is OK', slugify(comb))
            elif len(deployed) == 0:
                logger.error('%s is KO', slugify(comb))
            else:
                logger.warning('%s encountered problems with some hosts',
                               slugify(comb))

            sweeper.done(comb)