예제 #1
0
def deploy_node(job_id, site, submission):
    node = get_oar_job_nodes(job_id,site)[0]
    deployment = Deployment(hosts = [node],
                    env_file = "unikernels/hermit/debian10-x64-nfs-hermit.env",
                    user = "******",
                    other_options = "-k")
    node.user = "******"
    deploy(deployment)
    return node
예제 #2
0
    def _launch_kadeploy(self, max_tries=10, check_deploy=True):
        """Create an execo_g5k.Deployment object, launch the deployment and
        return a tuple (deployed_hosts, undeployed_hosts)
        """

        # if the provisioner has oar_job_ids and no config_file_path
        # then the configs variable is not created
        if not hasattr(self, 'configs'):
            logger.info('The list of %s hosts: \n%s', len(self.hosts),
                        hosts_list(self.hosts, separator='\n'))
            return

        logger.info('Deploying %s hosts \n%s', len(self.hosts),
                    hosts_list(self.hosts, separator='\n'))
        try:
            deployment = Deployment(
                hosts=[Host(canonical_host_name(host)) for host in self.hosts],
                env_file=self.configs['custom_image'],
                env_name=self.configs['cloud_provider_image'])
        except ValueError:
            logger.error(
                "Please put in the config file either custom_image or cloud_provider_image."
            )
            exit()
        # user=self.env_user,
        # vlan=self.kavlan)

        # Activate kadeploy output log if log level is debug
        if logger.getEffectiveLevel() <= 10:
            stdout = [sys.stdout]
            stderr = [sys.stderr]
        else:
            stdout = None
            stderr = None

        # deploy() function will iterate through each frontend to run kadeploy
        # so that the deployment hosts will be performed sequentially site after site
        deployed_hosts, undeployed_hosts = deploy(
            deployment,
            stdout_handlers=stdout,
            stderr_handlers=stderr,
            num_tries=max_tries,
            check_deployed_command=check_deploy)
        deployed_hosts = list(deployed_hosts)
        undeployed_hosts = list(undeployed_hosts)
        # # Renaming hosts if a kavlan is used
        # if self.kavlan:
        #     for i, host in enumerate(deployed_hosts):
        #         deployed_hosts[i] = get_kavlan_host_name(host, self.kavlan)
        #     for i, host in enumerate(undeployed_hosts):
        #         undeployed_hosts[i] = get_kavlan_host_name(host, self.kavlan)
        logger.info('Deployed %s hosts successfully', len(deployed_hosts))
        cr = '\n' if len(undeployed_hosts) > 0 else ''
        logger.info('Failed %s hosts %s%s', len(undeployed_hosts), cr,
                    hosts_list(undeployed_hosts))

        return deployed_hosts, undeployed_hosts
예제 #3
0
    def _launch_kadeploy(self, max_tries=1, check_deploy=True):
        """Create a execo_g5k.Deployment object, launch the deployment and
        return a tuple (deployed_hosts, undeployed_hosts)
        """
        logger.info('Deploying %s hosts \n%s', len(self.hosts),
                    hosts_list(self.hosts))
        deployment = Deployment(
            hosts=[Host(canonical_host_name(host)) for host in self.hosts],
            env_file=self.env_file,
            env_name=self.env_name,
            user=self.env_user,
            vlan=self.kavlan)
        # Activate kadeploy output log if log level is debug
        if logger.getEffectiveLevel() <= 10:
            stdout = [sys.stdout]
            stderr = [sys.stderr]
        else:
            stdout = None
            stderr = None

        deployed_hosts, undeployed_hosts = deploy(
            deployment,
            stdout_handlers=stdout,
            stderr_handlers=stderr,
            num_tries=max_tries,
            check_deployed_command=check_deploy)
        deployed_hosts = list(deployed_hosts)
        undeployed_hosts = list(undeployed_hosts)
        # Renaming hosts if a kavlan is used
        if self.kavlan:
            for i, host in enumerate(deployed_hosts):
                deployed_hosts[i] = get_kavlan_host_name(host, self.kavlan)
            for i, host in enumerate(undeployed_hosts):
                undeployed_hosts[i] = get_kavlan_host_name(host, self.kavlan)
        logger.info('Deployed %s hosts \n%s', len(deployed_hosts),
                    hosts_list(deployed_hosts))
        cr = '\n' if len(undeployed_hosts) > 0 else ''
        logger.info('Failed %s hosts %s%s', len(undeployed_hosts), cr,
                    hosts_list(undeployed_hosts))
        self._update_hosts_state(deployed_hosts, undeployed_hosts)
        return deployed_hosts, undeployed_hosts
    def run(self):
        sweeper = self.create_paramsweeper()

        while True:
            comb = sweeper.get_next()
            if not comb:
                break
            comb_dir = self.result_dir + '/' + slugify(comb)
            if not os.path.isdir(comb_dir):
                os.mkdir(comb_dir)
            comb_file = comb_dir + '/trace'
            g5k_configuration['kadeploy3'] = comb['version']
            logger.info('Treating combination %s', pformat(comb))
            get_version = SshProcess(
                comb['version'] + ' -v',
                comb['site'],
                connection_params=default_frontend_connection_params).run()
            logger.info(get_version.stdout)

            resources = ""
            if comb['kavlan']:
                resources += "{type='kavlan'}/vlan=1+"
            resources += "nodes=" + str(comb['n_nodes'])
            sub = OarSubmission(resources=resources,
                                job_type='deploy',
                                walltime="0:30:00",
                                name='Kadeploy_Tests')
            logger.info('Performing submission of %s on site %s', resources,
                        comb['site'])
            jobs = oarsub([(sub, comb['site'])])

            if jobs[0][0]:
                try:
                    logger.info('Waiting for job to start')
                    wait_oar_job_start(jobs[0][0], jobs[0][1])
                    hosts = get_oar_job_nodes(jobs[0][0], jobs[0][1])
                    logger.info('Deployment of %s',
                                ' '.join([host.address for host in hosts]))
                    kavlan = get_oar_job_kavlan(jobs[0][0], jobs[0][1])
                    if kavlan:
                        logger.info('In kavlan %s', kavlan)
                    deployment = Deployment(hosts,
                                            env_name=comb['env'],
                                            vlan=kavlan)
                    deployed, undeployed = deploy(deployment,
                                                  stdout_handlers=[comb_file],
                                                  stderr_handlers=[comb_file])

                finally:
                    logger.info('Destroying job %s on %s', str(jobs[0][0]),
                                jobs[0][1])
                    oardel([(jobs[0][0], jobs[0][1])])
            else:
                deployed = []

            if len(undeployed) == 0:
                logger.info('%s is OK', slugify(comb))
            elif len(deployed) == 0:
                logger.error('%s is KO', slugify(comb))
            else:
                logger.warning('%s encountered problems with some hosts',
                               slugify(comb))

            sweeper.done(comb)
예제 #5
0
    def run(self):
        """Inherited method, put here the code for running the engine"""
        self.define_parameters()
        self.cluster = self.args[0]
        self.site = get_cluster_site(self.cluster)
        if self.options.oar_job_id:
            self.oar_job_id = self.options.oar_job_id
        else:
            self.oar_job_id = None

        try:
            # Creation of the main iterator which is used for the first control loop.
            # You need have a method called define_parameters, that returns a list of parameter dicts
            self.define_parameters()

            job_is_dead = False
            # While they are combinations to treat
            while len(self.sweeper.get_remaining()) > 0:
                # If no job, we make a reservation and prepare the hosts for the experiments
                if job_is_dead or self.oar_job_id is None:
                    self.make_reservation()
                # Retrieving the hosts and subnets parameters
                self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend)
                # Hosts deployment
                deployed, undeployed = deploy(
                    Deployment(self.hosts,
                               env_file="/home/mliroz/deploys/hadoop6.env"))
                logger.info("%i deployed, %i undeployed" %
                            (len(deployed), len(undeployed)))
                if len(deployed) == 0:
                    break
                # Configuration du systeme => look at the execo_g5k.topology module
                attr = get_host_attributes(self.cluster + '-1')

                ## SETUP FINISHED

                # Getting the next combination
                comb = self.sweeper.get_next()
                self.prepare_dataset(comb)
                self.xp(comb)
                # subloop over the combinations that have the same sizes
                while True:
                    newcomb = self.sweeper.get_next(lambda r: filter(
                        lambda subcomb: subcomb['sizes'] == comb['sizes'], r))
                    if newcomb:
                        try:
                            self.xp(newcomb)
                        except:
                            break
                    else:
                        break

                if get_oar_job_info(self.oar_job_id,
                                    self.frontend)['state'] == 'Error':
                    job_is_dead = True

        finally:
            if self.oar_job_id is not None:
                if not self.options.keep_alive:
                    logger.info('Deleting job')
                    oardel([(self.oar_job_id, self.frontend)])
                else:
                    logger.info('Keeping job alive for debugging')
예제 #6
0
    def run(self):
        """ """
        if self.options.oargrid_job_id:
            self.oargrid_job_id = self.options.oargrid_job_id
        else:
            self.oargrid_job_id = None

        try:
            # Creation of the main iterator which is used for the first control loop.
            self.define_parameters()

            job_is_dead = False
            # While there are combinations to treat
            while len(self.sweeper.get_remaining()) > 0:
                # If no job, we make a reservation and prepare the hosts for the experiments
                if self.oargrid_job_id is None:
                    self.make_reservation()
                # Wait that the job starts
                logger.info('Waiting that the job start')
                wait_oargrid_job_start(self.oargrid_job_id)
                # Retrieving the hosts and subnets parameters
                self.hosts = get_oargrid_job_nodes(self.oargrid_job_id)
                # Hosts deployment and configuration

                default_connection_params['user'] = '******'

                logger.info("Start hosts configuration")
                ex_log.setLevel('INFO')
                deployment = Deployment(
                    hosts=self.hosts,
                    env_file='/home/sirimie/env/mywheezy-x64-base.env')
                self.hosts, _ = deploy(deployment)

                Remote("rm -f /home/Work/sgcbntier/paasage_demo/csv/REQTASK_*",
                       self.hosts).run()
                Remote(
                    "rm -f /home/Work/sgcbntier/paasage_demo/platform_aws.xml",
                    self.hosts).run()
                Remote("rm -f /home/Work/sgcbntier/paasage_demo/cloud_ec2.xml",
                       self.hosts).run()

                Put(self.hosts, [
                    "run_all_execo.py", "xml_gen_execo.py", "conf.xml",
                    "platform_aws.xml", "cloud_ec2.xml"
                ],
                    remote_location="/home/Work/sgcbntier/paasage_demo/").run(
                    )
                logger.info("Done")

                if len(self.hosts) == 0:
                    break

                # Initializing the resources and threads
                available_hosts = [
                    host for host in self.hosts for i in range(
                        get_host_attributes(host)['architecture']['smt_size'])
                ]

                threads = {}

                # Creating the unique folder for storing the results
                comb_dir = self.result_dir + '/csv_results'
                if not os.path.exists(comb_dir):
                    os.mkdir(comb_dir)

                # Checking that the job is running and not in Error
                while self.is_job_alive() or len(threads.keys()) > 0:
                    job_is_dead = False
                    while self.options.n_nodes > len(available_hosts):
                        tmp_threads = dict(threads)
                        for t in tmp_threads:
                            if not t.is_alive():
                                available_hosts.append(tmp_threads[t]['host'])
                                del threads[t]
                        sleep(5)
                        if not self.is_job_alive():
                            job_is_dead = True
                            break
                    if job_is_dead:
                        break

                    # Getting the next combination
                    comb = self.sweeper.get_next()
                    if not comb:
                        while len(threads.keys()) > 0:
                            tmp_threads = dict(threads)
                            for t in tmp_threads:
                                if not t.is_alive():
                                    del threads[t]
                            logger.info('Waiting for threads to complete')
                            sleep(20)
                        break

                    host = available_hosts[0]
                    available_hosts = available_hosts[1:]

                    t = Thread(target=self.workflow,
                               args=(comb, host, comb_dir))
                    threads[t] = {'host': host}
                    t.daemon = True
                    t.start()

                if not self.is_job_alive():
                    job_is_dead = True

                if job_is_dead:
                    self.oargrid_job_id = None

        finally:
            if self.oargrid_job_id is not None:
                if not self.options.keep_alive:
                    logger.info('Deleting job')
                    oargriddel([self.oargrid_job_id])
                else:
                    logger.info('Keeping job alive for debugging')