Esempio n. 1
0
File: g5k.py Progetto: ivotron/enos
    def _get_jobs_and_vlans(self, conf):
        """Get the hosts from an existing job (if any) or from a new job.
        This will perform a reservation if necessary."""

        provider_conf = conf['provider']
        # Look if there is a running job or make a new reservation
        gridjob, _ = EX5.planning.get_job_by_name(provider_conf['name'])

        if gridjob is None:
            gridjob = self._make_reservation(conf)
        else:
            logging.info("Using running oargrid job %s" % gridjob)

        # Wait for the job to start
        EX5.wait_oargrid_job_start(gridjob)

        nodes = sorted(EX5.get_oargrid_job_nodes(gridjob),
                       key=lambda n: n.address)

        # Checking the number of nodes given
        # the disribution policy
        self._check_nodes(nodes=nodes,
                          resources=conf['resources'],
                          mode=provider_conf['role_distribution'])

        # vlans information
        job_sites = EX5.get_oargrid_job_oar_jobs(gridjob)
        jobs = []
        vlans = []
        for (job_id, site) in job_sites:
            jobs.append((site, job_id))
            vlan_id = EX5.get_oar_job_kavlan(job_id, site)
            if vlan_id is not None:
                vlans.append((site, EX5.get_oar_job_kavlan(job_id, site)))
        return (jobs, vlans, nodes)
Esempio n. 2
0
def get_or_create_job(resources, job_name, walltime):
    gridjob, _ = ex5.planning.get_job_by_name(job_name)
    if gridjob is None:
        gridjob = make_reservation(resources, job_name, walltime)
    logging.info("Waiting for oargridjob %s to start" % gridjob)
    ex5.wait_oargrid_job_start(gridjob)
    return gridjob
Esempio n. 3
0
def grid_get_or_create_job(job_name, walltime, reservation_date, queue,
                           job_type, machines, networks):
    gridjob, _ = ex5.planning.get_job_by_name(job_name)
    if gridjob is None:
        gridjob = grid_make_reservation(job_name, walltime, reservation_date,
                                        queue, job_type, machines, networks)
    logger.info("Waiting for oargridjob %s to start" % gridjob)
    ex5.wait_oargrid_job_start(gridjob)
    return gridjob
Esempio n. 4
0
File: g5k.py Progetto: asimonet/enos
    def _get_job(self):
        """Get the hosts from an existing job (if any) or from a new job.
        This will perform a reservation if necessary."""

        # Look if there is a running job or make a new reservation
        self.gridjob, _ = EX5.planning.get_job_by_name(self.config['name'])

        if self.gridjob is None:
            self._make_reservation()
        else:
            logging.info("Using running oargrid job %s" % self.gridjob)

        # Wait for the job to start
        EX5.wait_oargrid_job_start(self.gridjob)

        # # XXX Still useful?
        # attempts = 0
        # self.nodes = None
        # while self.nodes is None and attempts < MAX_ATTEMPTS:
        #     self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob),
        #                             key = lambda n: n.address)
        #     attempts += 1

        self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob),
                            key=lambda n: n.address)

        # # XXX check already done into `_deploy`.
        self._check_nodes(nodes=self.nodes,
                          resources=self.config['resources'],
                          mode=self.config['role_distribution'])

        # XXX(Ad_rien_) Start_date is never used, deadcode? - August
        # 11th 2016
        self.start_date = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'start_date' in job_info:
            self.start_date = job_info['start_date']

        # filling some information about the jobs here
        self.user = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'user' in job_info:
            self.user = job_info['user']

        # vlans information
        job_sites = EX5.get_oargrid_job_oar_jobs(self.gridjob)
        self.jobs = []
        self.vlans = []
        for (job_id, site) in job_sites:
            self.jobs.append((site, job_id))
            vlan_id = EX5.get_oar_job_kavlan(job_id, site)
            if vlan_id is not None:
                self.vlans.append((site, EX5.get_oar_job_kavlan(job_id, site)))
Esempio n. 5
0
def get_or_create_job(resources, job_name, walltime, reservation_date, queue,
                      reservation_type):
    gridjob, _ = ex5.planning.get_job_by_name(job_name)
    if gridjob is None:
        gridjob = make_reservation(resources, job_name, walltime,
                                   reservation_date, queue, reservation_type)
    if reservation_type == "oar":
        logger.info("Waiting for oarjob %s to start" % gridjob)
        ex5.wait_oar_job_start(gridjob)
    else:
        logger.info("Waiting for oargridjob %s to start" % gridjob)
        ex5.wait_oargrid_job_start(gridjob)
    return gridjob
Esempio n. 6
0
    def get_job(self):
        """Get the hosts from an existing job (if any) or from a new job.
        This will perform a reservation if necessary."""

        # Look if there is a running job or make a new reservation
        self.gridjob, _ = EX5.planning.get_job_by_name(self.config['name'])

        if self.gridjob is None:
            self._make_reservation()
        else:
            logger.info("Using running oargrid job %s" % style.emph(self.gridjob))
            

        # Wait for the job to start
        EX5.wait_oargrid_job_start(self.gridjob)

        attempts = 0
        self.nodes = None
        while self.nodes is None and attempts < MAX_ATTEMPTS:
            self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob),
                                    key = lambda n: n.address)
            attempts += 1

        check_nodes(
                nodes = self.nodes,
                resources = self.config['resources'],
                mode = self.config['role_distribution'])

        # TODO - Start_date is never used, deadcode ? Ad_rien_ - August 11th 2016
        self.start_date = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'start_date' in job_info:
            self.start_date = job_info['start_date']

        ## filling some information about the jobs here
        self.user = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'user' in job_info:
            self.user = job_info['user']

        ## vlans information
        job_sites = EX5.get_oargrid_job_oar_jobs(self.gridjob)
        self.jobs = []
        self.vlans = []
        for (job_id, site) in job_sites:
            self.jobs.append((site, job_id))
            vlan_id = EX5.get_oar_job_kavlan(job_id, site)
            if vlan_id is not None:
                self.vlans.append((site, EX5.get_oar_job_kavlan(job_id, site)))

        return self.gridjob
Esempio n. 7
0
File: utils.py Progetto: badock/vm5k
def get_oargrid_job_vm5k_resources(oargrid_job_id):
    """Retrieve the hosts list and (ip, mac) list by sites from an
    oargrid_job_id and return the resources dict needed by vm5k_deployment,
    with kavlan-global if used in the oargrid job """
    oargrid_job_id = int(oargrid_job_id)
    logger.info('Waiting job start')
    wait_oargrid_job_start(oargrid_job_id)
    resources = get_oar_job_vm5k_resources([(oar_job_id, site)
                                            for oar_job_id, site in
                                            get_oargrid_job_oar_jobs(oargrid_job_id)])
    kavlan_global = None
    for site, res in resources.iteritems():
        if res['kavlan'] >= 10:
            kavlan_global = {'kavlan': res['kavlan'],
                             'ip_mac': resources[site]['ip_mac'],
                             'site': site}
            break
    if kavlan_global:
        resources['global'] = kavlan_global

    return resources
Esempio n. 8
0
def get_oargrid_job_vm5k_resources(oargrid_job_id):
    """Retrieve the hosts list and (ip, mac) list by sites from an
    oargrid_job_id and return the resources dict needed by vm5k_deployment,
    with kavlan-global if used in the oargrid job """
    oargrid_job_id = int(oargrid_job_id)
    logger.info('Waiting job start')
    wait_oargrid_job_start(oargrid_job_id)
    resources = get_oar_job_vm5k_resources([(oar_job_id, site)
                                            for oar_job_id, site in
                                            get_oargrid_job_oar_jobs(oargrid_job_id)])
    kavlan_global = None
    for site, res in resources.iteritems():
        if res['kavlan'] >= 10:
            kavlan_global = {'kavlan': res['kavlan'],
                             'ip_mac': resources[site]['ip_mac'],
                             'site': site}
            break
    if kavlan_global:
        resources['global'] = kavlan_global

    return resources
Esempio n. 9
0
    def run(self):
        """ """
        if self.options.oargrid_job_id:
            self.oargrid_job_id = self.options.oargrid_job_id
        else:
            self.oargrid_job_id = None

        try:
            # Creation of the main iterator which is used for the first control loop.
            self.define_parameters()

            job_is_dead = False
            # While there are combinations to treat
            while len(self.sweeper.get_remaining()) > 0:
                # If no job, we make a reservation and prepare the hosts for the experiments
                if self.oargrid_job_id is None:
                    self.make_reservation()
                # Wait that the job starts
                logger.info('Waiting that the job start')
                wait_oargrid_job_start(self.oargrid_job_id)
                # Retrieving the hosts and subnets parameters
                self.hosts = get_oargrid_job_nodes(self.oargrid_job_id)
                # Hosts deployment and configuration

                default_connection_params['user'] = '******'

                logger.info("Start hosts configuration")
                ex_log.setLevel('INFO')
                deployment = Deployment(
                    hosts=self.hosts,
                    env_file='/home/sirimie/env/mywheezy-x64-base.env')
                self.hosts, _ = deploy(deployment)

                Remote("rm -f /home/Work/sgcbntier/paasage_demo/csv/REQTASK_*",
                       self.hosts).run()
                Remote(
                    "rm -f /home/Work/sgcbntier/paasage_demo/platform_aws.xml",
                    self.hosts).run()
                Remote("rm -f /home/Work/sgcbntier/paasage_demo/cloud_ec2.xml",
                       self.hosts).run()

                Put(self.hosts, [
                    "run_all_execo.py", "xml_gen_execo.py", "conf.xml",
                    "platform_aws.xml", "cloud_ec2.xml"
                ],
                    remote_location="/home/Work/sgcbntier/paasage_demo/").run(
                    )
                logger.info("Done")

                if len(self.hosts) == 0:
                    break

                # Initializing the resources and threads
                available_hosts = [
                    host for host in self.hosts for i in range(
                        get_host_attributes(host)['architecture']['smt_size'])
                ]

                threads = {}

                # Creating the unique folder for storing the results
                comb_dir = self.result_dir + '/csv_results'
                if not os.path.exists(comb_dir):
                    os.mkdir(comb_dir)

                # Checking that the job is running and not in Error
                while self.is_job_alive() or len(threads.keys()) > 0:
                    job_is_dead = False
                    while self.options.n_nodes > len(available_hosts):
                        tmp_threads = dict(threads)
                        for t in tmp_threads:
                            if not t.is_alive():
                                available_hosts.append(tmp_threads[t]['host'])
                                del threads[t]
                        sleep(5)
                        if not self.is_job_alive():
                            job_is_dead = True
                            break
                    if job_is_dead:
                        break

                    # Getting the next combination
                    comb = self.sweeper.get_next()
                    if not comb:
                        while len(threads.keys()) > 0:
                            tmp_threads = dict(threads)
                            for t in tmp_threads:
                                if not t.is_alive():
                                    del threads[t]
                            logger.info('Waiting for threads to complete')
                            sleep(20)
                        break

                    host = available_hosts[0]
                    available_hosts = available_hosts[1:]

                    t = Thread(target=self.workflow,
                               args=(comb, host, comb_dir))
                    threads[t] = {'host': host}
                    t.daemon = True
                    t.start()

                if not self.is_job_alive():
                    job_is_dead = True

                if job_is_dead:
                    self.oargrid_job_id = None

        finally:
            if self.oargrid_job_id is not None:
                if not self.options.keep_alive:
                    logger.info('Deleting job')
                    oargriddel([self.oargrid_job_id])
                else:
                    logger.info('Keeping job alive for debugging')