예제 #1
0
파일: engine.py 프로젝트: badock/vm5k
 def make_reservation(self):
     """Perform a reservation of the required number of nodes, with 4000 IP.
     """
     logger.info('Performing reservation')
     starttime = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1)))
     endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3,
                                                              minutes=1)))
     startdate, n_nodes = self._get_nodes(starttime, endtime)
     while not n_nodes:
         logger.info('No enough nodes found between %s and %s, ' + \
                     'increasing time window',
                     format_date(starttime), format_date(endtime))
         starttime = endtime
         endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3,
                                                             minutes=1)))
         startdate, n_nodes = self._get_nodes(starttime, endtime)
         if starttime > int(time.time() + timedelta_to_seconds(
                                             datetime.timedelta(weeks=6))):
             logger.error('There are not enough nodes on %s for your ' + \
                          'experiments, abort ...', self.cluster)
             exit()
     jobs_specs = get_jobs_specs({self.cluster: n_nodes},
                                 name=self.__class__.__name__)
     sub = jobs_specs[0][0]
     tmp = str(sub.resources).replace('\\', '')
     sub.resources = 'slash_22=4+' + tmp.replace('"', '')
     sub.walltime = self.options.walltime
     sub.additional_options = '-t deploy'
     sub.reservation_date = startdate
     (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0]
     logger.info('Startdate: %s, n_nodes: %s', format_date(startdate),
                 str(n_nodes))
 def worker_start(self, cluster, site, oarsubmission, data, worker_index):
     th = current_thread()
     th.cluster = cluster
     th.site = site
     th.worker_index = worker_index
     th.jobid = None
     try:
         with th.oarsublock:
             if th.willterminate:
                 return
             worker_log.detail("submit oar job")
             ((th.jobid, _), ) = oarsub([(oarsubmission, site)])
         if not th.jobid:
             worker_log.detail("job submission failed")
             self.worker(cluster, site, data, None, worker_index,
                         oarsubmission, None)
         worker_log.detail("job submitted - wait job start")
         wait_oar_job_start(th.jobid,
                            site,
                            prediction_callback=lambda ts: worker_log.
                            detail("job start prediction: %s" %
                                   (format_date(ts), )))
         th.waiting = False
         worker_log.detail("job started - get job nodes")
         nodes = get_oar_job_nodes(th.jobid, site)
         worker_log.detail("got %i nodes" % (len(nodes), ))
         self.worker(cluster, site, data, nodes, worker_index,
                     oarsubmission, th.jobid)
     finally:
         with th.oarsublock:
             if th.jobid:
                 worker_log.detail("delete oar job")
                 oardel([(th.jobid, site)])
                 th.jobid = None
         worker_log.detail("exit")
 def worker_start(self, cluster, site, oarsubmission, data, worker_index):
     th = current_thread()
     th.cluster = cluster
     th.site = site
     th.worker_index = worker_index
     th.jobid = None
     try:
         with th.oarsublock:
             if th.willterminate:
                 return
             worker_log.detail("submit oar job")
             ((th.jobid, _),) = oarsub([(oarsubmission, site)])
         if not th.jobid:
             worker_log.detail("job submission failed")
             self.worker(cluster, site, data, None, worker_index, oarsubmission, None)
         worker_log.detail("job submitted - wait job start")
         wait_oar_job_start(th.jobid, site,
                            prediction_callback = lambda ts:
                                worker_log.detail("job start prediction: %s" % (format_date(ts),)))
         th.waiting = False
         worker_log.detail("job started - get job nodes")
         nodes = get_oar_job_nodes(th.jobid, site)
         worker_log.detail("got %i nodes" % (len(nodes),))
         self.worker(cluster, site, data, nodes, worker_index, oarsubmission, th.jobid)
     finally:
         with th.oarsublock:
             if th.jobid:
                 worker_log.detail("delete oar job")
                 oardel([(th.jobid, site)])
                 th.jobid = None
         worker_log.detail("exit")
    def make_reservation_local(self):
        """Perform a reservation of the required number of nodes, with 4000 IP.
        """
        logger.info('Performing reservation')
        starttime = int(time.time() +
                        timedelta_to_seconds(datetime.timedelta(minutes=1)))
        endtime = int(
            starttime +
            timedelta_to_seconds(datetime.timedelta(days=3, minutes=1)))
        self.cluster = self.options.selected_cluster
        startdate, n_nodes = self._get_nodes(starttime, endtime)

        while not n_nodes:
            logger.info('No enough nodes found between %s and %s, ' + \
                        'increasing time window',
                        format_date(starttime), format_date(endtime))
            starttime = endtime
            endtime = int(
                starttime +
                timedelta_to_seconds(datetime.timedelta(days=3, minutes=1)))
            startdate, n_nodes = self._get_nodes(starttime, endtime)
            if starttime > int(time.time() +
                               timedelta_to_seconds(datetime.timedelta(
                                   weeks=6))):
                logger.error('There are not enough nodes on %s for your ' + \
                             'experiments, abort ...', self.cluster)
                exit()
        startdate = []
        jobs_specs = get_jobs_specs({self.cluster: n_nodes},
                                    name=self.__class__.__name__)
        sub = jobs_specs[0][0]
        tmp = str(sub.resources).replace('\\', '')
        sub.resources = tmp.replace('"', '')
        sub.walltime = self.options.walltime
        sub.additional_options = '-t allow_classic_ssh -t besteffort'
        (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0]
        logger.info('Startdate: besteffort, n_nodes: %s', str(n_nodes))
 def _get_nodes(self, starttime, endtime):
     """ """
     planning = get_planning(elements=[self.cluster],
                             starttime=starttime,
                             endtime=endtime,
                             out_of_chart=self.options.outofchart)
     slots = compute_slots(planning, self.options.walltime)
     startdate = slots[0][0]
     i_slot = 0
     n_nodes = slots[i_slot][2][self.cluster]
     logger.info("nodes %s in %s at %s", str(n_nodes), str(self.cluster),
                 format_date(startdate))
     while n_nodes < self.options.n_nodes:
         logger.debug(slots[i_slot])
         startdate = slots[i_slot][0]
         n_nodes = slots[i_slot][2][self.cluster]
         i_slot += 1
         if i_slot == len(slots) - 1:
             return False, False
     return startdate, n_nodes
예제 #6
0
파일: engine.py 프로젝트: badock/vm5k
 def _get_nodes(self, starttime, endtime):
     """ """
     planning = get_planning(elements=[self.cluster],
                             starttime=starttime,
                             endtime=endtime,
                             out_of_chart=self.options.outofchart)
     slots = compute_slots(planning, self.options.walltime)
     startdate = slots[0][0]
     i_slot = 0
     n_nodes = self.options.n_nodes * \
             (slots[i_slot][2][self.cluster] // self.options.n_nodes)
     while n_nodes < self.options.n_nodes:
         logger.debug(slots[i_slot])
         startdate = slots[i_slot][0]
         n_nodes = self.options.n_nodes * \
             (slots[i_slot][2][self.cluster] // self.options.n_nodes)
         i_slot += 1
         if i_slot == len(slots) - 1:
             return False, False
     logger.debug('Reserving %s nodes at %s', n_nodes, format_date(startdate))
     return startdate, 1
예제 #7
0
 def _get_nodes(self, starttime, endtime):
     """ """
     planning = get_planning(elements=[self.cluster],
                             starttime=starttime,
                             endtime=endtime,
                             out_of_chart=self.options.outofchart)
     slots = compute_slots(planning, self.options.walltime)
     startdate = slots[0][0]
     i_slot = 0
     n_nodes = self.options.n_nodes * \
             (slots[i_slot][2][self.cluster] // self.options.n_nodes)
     while n_nodes < self.options.n_nodes:
         logger.debug(slots[i_slot])
         startdate = slots[i_slot][0]
         n_nodes = self.options.n_nodes * \
             (slots[i_slot][2][self.cluster] // self.options.n_nodes)
         i_slot += 1
         if i_slot == len(slots) - 1:
             return False, False
     logger.debug('Reserving %s nodes at %s', n_nodes,
                  format_date(startdate))
     return startdate, n_nodes
예제 #8
0
    def _get_nodes(self, starttime, endtime):
        """ return the nearest slot (startdate) that has enough available nodes
        to perform the client's actions

        Parameters
        ----------
        starttime: str
            the time to start the reservation

        endtime: str
            the time to stop the reservation

        Returns
        -------
        str
        the start time of the reservation
        """

        planning = get_planning(elements=self.clusters.keys(),
                                starttime=starttime,
                                endtime=endtime,
                                out_of_chart=self.out_of_chart)
        slots = compute_slots(planning, self.configs['walltime'])
        startdate = None
        for slot in slots:
            is_enough_nodes = True
            for cluster_name, n_nodes in self.clusters.items():
                if slot[2][cluster_name] < n_nodes:
                    is_enough_nodes = False
                    break
            if is_enough_nodes:
                startdate = slot[0]
                break
        if startdate is not None:
            logger.info('A slot is found for your request at %s' %
                        format_date(startdate))

        return startdate
def prediction_callback(ts):
    logger.info("job start prediction = %s" % (format_date(ts),))
예제 #10
0
def pred_cb(ts):
    logger.info("job start prediction = %s" % (format_date(ts),))
예제 #11
0
    def make_reservation(self):
        """Perform a reservation of the required number of nodes.

        Parameters
        ----------

        Returns
        -------

        """
        if self.oar_result:
            message = "Validated OAR_JOB_ID:"
            for job_id, site in self.oar_result:
                message += "\n%s: %s" % (site, job_id)
            logger.info(message)
            message = "The list of hosts:"
            for job_id, site in self.oar_result:
                hosts = get_oar_job_nodes(oar_job_id=job_id, frontend=site)
                message += "\n--- %s: %s nodes ---" % (site, len(hosts))
                for host in hosts:
                    message += "\n%s" % (host.address)
            logger.info(message)
            return

        if self.configs['walltime'] <= 99*3600+99*60+99:
            walltime = time.strftime('%H:%M:%S', time.gmtime(self.configs['walltime']))
        else:
            walltime = '%s seconds' % self.configs['walltime']
        message = 'You are requesting %s nodes for %s:' % (sum(self.clusters.values()), walltime)

        for cluster, n_nodes in self.clusters.items():
            message += "\n%s: %s nodes" % (cluster, n_nodes)
        logger.info(message)

        logger.info('Performing reservation .......')
        if 'starttime' not in self.configs or self.configs['starttime'] is None:
            self.configs['starttime'] = int(
                time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1)))

        starttime = int(get_unixts(self.configs['starttime']))
        endtime = int(
            starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1)))
        startdate = self._get_nodes(starttime, endtime)

        while startdate is None:
            logger.info('No enough nodes found between %s and %s, ' +
                        '\nIncreasing the window time....', format_date(starttime), format_date(endtime))
            starttime = endtime
            endtime = int(
                starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1)))

            startdate = self._get_nodes(starttime, endtime)
            if starttime > int(self.configs['starttime'] + timedelta_to_seconds(datetime.timedelta(weeks=6))):
                logger.error(
                    'What a pity! There is no slot which satisfies your request until %s :(' % format_date(endtime))
                exit()

        jobs_specs = get_jobs_specs(self.clusters, name=self.job_name)
        for job_spec, site_name in jobs_specs:
            tmp = str(job_spec.resources).replace('\\', '')
            job_spec.resources = 'slash_22=4+' + tmp.replace('"', '')
            job_spec.walltime = self.configs['walltime']
            # -t deploy to reserve node without deploying OS
            job_spec.additional_options = '-t deploy'
            job_spec.reservation_date = startdate + 10

        self.oar_result = oarsub(jobs_specs)

        for oar_job_id, _ in self.oar_result:
            if oar_job_id is None:
                logger.info('Performing reservation FAILED')
                exit()

        message = "Reserved nodes successfully!!! \nOAR JOB ID:\n"
        for each in self.oar_result:
            message += "%s:%s," % (each[1], each[0])
        logger.info(message)
def prediction_callback(ts):
    logger.info("job start prediction = %s" % (format_date(ts), ))