def make_reservation(self): """Perform a reservation of the required number of nodes, with 4000 IP. """ logger.info('Performing reservation') starttime = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) while not n_nodes: logger.info('No enough nodes found between %s and %s, ' + \ 'increasing time window', format_date(starttime), format_date(endtime)) starttime = endtime endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) if starttime > int(time.time() + timedelta_to_seconds( datetime.timedelta(weeks=6))): logger.error('There are not enough nodes on %s for your ' + \ 'experiments, abort ...', self.cluster) exit() jobs_specs = get_jobs_specs({self.cluster: n_nodes}, name=self.__class__.__name__) sub = jobs_specs[0][0] tmp = str(sub.resources).replace('\\', '') sub.resources = 'slash_22=4+' + tmp.replace('"', '') sub.walltime = self.options.walltime sub.additional_options = '-t deploy' sub.reservation_date = startdate (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0] logger.info('Startdate: %s, n_nodes: %s', format_date(startdate), str(n_nodes))
def worker_start(self, cluster, site, oarsubmission, data, worker_index): th = current_thread() th.cluster = cluster th.site = site th.worker_index = worker_index th.jobid = None try: with th.oarsublock: if th.willterminate: return worker_log.detail("submit oar job") ((th.jobid, _), ) = oarsub([(oarsubmission, site)]) if not th.jobid: worker_log.detail("job submission failed") self.worker(cluster, site, data, None, worker_index, oarsubmission, None) worker_log.detail("job submitted - wait job start") wait_oar_job_start(th.jobid, site, prediction_callback=lambda ts: worker_log. detail("job start prediction: %s" % (format_date(ts), ))) th.waiting = False worker_log.detail("job started - get job nodes") nodes = get_oar_job_nodes(th.jobid, site) worker_log.detail("got %i nodes" % (len(nodes), )) self.worker(cluster, site, data, nodes, worker_index, oarsubmission, th.jobid) finally: with th.oarsublock: if th.jobid: worker_log.detail("delete oar job") oardel([(th.jobid, site)]) th.jobid = None worker_log.detail("exit")
def worker_start(self, cluster, site, oarsubmission, data, worker_index): th = current_thread() th.cluster = cluster th.site = site th.worker_index = worker_index th.jobid = None try: with th.oarsublock: if th.willterminate: return worker_log.detail("submit oar job") ((th.jobid, _),) = oarsub([(oarsubmission, site)]) if not th.jobid: worker_log.detail("job submission failed") self.worker(cluster, site, data, None, worker_index, oarsubmission, None) worker_log.detail("job submitted - wait job start") wait_oar_job_start(th.jobid, site, prediction_callback = lambda ts: worker_log.detail("job start prediction: %s" % (format_date(ts),))) th.waiting = False worker_log.detail("job started - get job nodes") nodes = get_oar_job_nodes(th.jobid, site) worker_log.detail("got %i nodes" % (len(nodes),)) self.worker(cluster, site, data, nodes, worker_index, oarsubmission, th.jobid) finally: with th.oarsublock: if th.jobid: worker_log.detail("delete oar job") oardel([(th.jobid, site)]) th.jobid = None worker_log.detail("exit")
def make_reservation_local(self): """Perform a reservation of the required number of nodes, with 4000 IP. """ logger.info('Performing reservation') starttime = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) self.cluster = self.options.selected_cluster startdate, n_nodes = self._get_nodes(starttime, endtime) while not n_nodes: logger.info('No enough nodes found between %s and %s, ' + \ 'increasing time window', format_date(starttime), format_date(endtime)) starttime = endtime endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate, n_nodes = self._get_nodes(starttime, endtime) if starttime > int(time.time() + timedelta_to_seconds(datetime.timedelta( weeks=6))): logger.error('There are not enough nodes on %s for your ' + \ 'experiments, abort ...', self.cluster) exit() startdate = [] jobs_specs = get_jobs_specs({self.cluster: n_nodes}, name=self.__class__.__name__) sub = jobs_specs[0][0] tmp = str(sub.resources).replace('\\', '') sub.resources = tmp.replace('"', '') sub.walltime = self.options.walltime sub.additional_options = '-t allow_classic_ssh -t besteffort' (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0] logger.info('Startdate: besteffort, n_nodes: %s', str(n_nodes))
def _get_nodes(self, starttime, endtime): """ """ planning = get_planning(elements=[self.cluster], starttime=starttime, endtime=endtime, out_of_chart=self.options.outofchart) slots = compute_slots(planning, self.options.walltime) startdate = slots[0][0] i_slot = 0 n_nodes = slots[i_slot][2][self.cluster] logger.info("nodes %s in %s at %s", str(n_nodes), str(self.cluster), format_date(startdate)) while n_nodes < self.options.n_nodes: logger.debug(slots[i_slot]) startdate = slots[i_slot][0] n_nodes = slots[i_slot][2][self.cluster] i_slot += 1 if i_slot == len(slots) - 1: return False, False return startdate, n_nodes
def _get_nodes(self, starttime, endtime): """ """ planning = get_planning(elements=[self.cluster], starttime=starttime, endtime=endtime, out_of_chart=self.options.outofchart) slots = compute_slots(planning, self.options.walltime) startdate = slots[0][0] i_slot = 0 n_nodes = self.options.n_nodes * \ (slots[i_slot][2][self.cluster] // self.options.n_nodes) while n_nodes < self.options.n_nodes: logger.debug(slots[i_slot]) startdate = slots[i_slot][0] n_nodes = self.options.n_nodes * \ (slots[i_slot][2][self.cluster] // self.options.n_nodes) i_slot += 1 if i_slot == len(slots) - 1: return False, False logger.debug('Reserving %s nodes at %s', n_nodes, format_date(startdate)) return startdate, 1
def _get_nodes(self, starttime, endtime): """ """ planning = get_planning(elements=[self.cluster], starttime=starttime, endtime=endtime, out_of_chart=self.options.outofchart) slots = compute_slots(planning, self.options.walltime) startdate = slots[0][0] i_slot = 0 n_nodes = self.options.n_nodes * \ (slots[i_slot][2][self.cluster] // self.options.n_nodes) while n_nodes < self.options.n_nodes: logger.debug(slots[i_slot]) startdate = slots[i_slot][0] n_nodes = self.options.n_nodes * \ (slots[i_slot][2][self.cluster] // self.options.n_nodes) i_slot += 1 if i_slot == len(slots) - 1: return False, False logger.debug('Reserving %s nodes at %s', n_nodes, format_date(startdate)) return startdate, n_nodes
def _get_nodes(self, starttime, endtime): """ return the nearest slot (startdate) that has enough available nodes to perform the client's actions Parameters ---------- starttime: str the time to start the reservation endtime: str the time to stop the reservation Returns ------- str the start time of the reservation """ planning = get_planning(elements=self.clusters.keys(), starttime=starttime, endtime=endtime, out_of_chart=self.out_of_chart) slots = compute_slots(planning, self.configs['walltime']) startdate = None for slot in slots: is_enough_nodes = True for cluster_name, n_nodes in self.clusters.items(): if slot[2][cluster_name] < n_nodes: is_enough_nodes = False break if is_enough_nodes: startdate = slot[0] break if startdate is not None: logger.info('A slot is found for your request at %s' % format_date(startdate)) return startdate
def prediction_callback(ts): logger.info("job start prediction = %s" % (format_date(ts),))
def pred_cb(ts): logger.info("job start prediction = %s" % (format_date(ts),))
def make_reservation(self): """Perform a reservation of the required number of nodes. Parameters ---------- Returns ------- """ if self.oar_result: message = "Validated OAR_JOB_ID:" for job_id, site in self.oar_result: message += "\n%s: %s" % (site, job_id) logger.info(message) message = "The list of hosts:" for job_id, site in self.oar_result: hosts = get_oar_job_nodes(oar_job_id=job_id, frontend=site) message += "\n--- %s: %s nodes ---" % (site, len(hosts)) for host in hosts: message += "\n%s" % (host.address) logger.info(message) return if self.configs['walltime'] <= 99*3600+99*60+99: walltime = time.strftime('%H:%M:%S', time.gmtime(self.configs['walltime'])) else: walltime = '%s seconds' % self.configs['walltime'] message = 'You are requesting %s nodes for %s:' % (sum(self.clusters.values()), walltime) for cluster, n_nodes in self.clusters.items(): message += "\n%s: %s nodes" % (cluster, n_nodes) logger.info(message) logger.info('Performing reservation .......') if 'starttime' not in self.configs or self.configs['starttime'] is None: self.configs['starttime'] = int( time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1))) starttime = int(get_unixts(self.configs['starttime'])) endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate = self._get_nodes(starttime, endtime) while startdate is None: logger.info('No enough nodes found between %s and %s, ' + '\nIncreasing the window time....', format_date(starttime), format_date(endtime)) starttime = endtime endtime = int( starttime + timedelta_to_seconds(datetime.timedelta(days=3, minutes=1))) startdate = self._get_nodes(starttime, endtime) if starttime > int(self.configs['starttime'] + timedelta_to_seconds(datetime.timedelta(weeks=6))): logger.error( 'What a pity! There is no slot which satisfies your request until %s :(' % format_date(endtime)) exit() jobs_specs = get_jobs_specs(self.clusters, name=self.job_name) for job_spec, site_name in jobs_specs: tmp = str(job_spec.resources).replace('\\', '') job_spec.resources = 'slash_22=4+' + tmp.replace('"', '') job_spec.walltime = self.configs['walltime'] # -t deploy to reserve node without deploying OS job_spec.additional_options = '-t deploy' job_spec.reservation_date = startdate + 10 self.oar_result = oarsub(jobs_specs) for oar_job_id, _ in self.oar_result: if oar_job_id is None: logger.info('Performing reservation FAILED') exit() message = "Reserved nodes successfully!!! \nOAR JOB ID:\n" for each in self.oar_result: message += "%s:%s," % (each[1], each[0]) logger.info(message)
def prediction_callback(ts): logger.info("job start prediction = %s" % (format_date(ts), ))