def get_oargrid_job_nodes(oargrid_job_id, frontend_connection_params=None, timeout=False): """Return an iterable of `execo.host.Host` containing the hosts of an oargrid job. :param oargrid_job_id: the oargrid job id. :param frontend_connection_params: connection params for connecting to frontends if needed. Values override those in `execo_g5k.config.default_frontend_connection_params`. :param timeout: timeout for retrieving. Default is False, which means use ``execo_g5k.config.g5k_configuration['default_timeout']``. None means no timeout. """ if isinstance(timeout, bool) and timeout == False: timeout = g5k_configuration.get('default_timeout') process = get_process( "oargridstat -wl %i 2>/dev/null || oargridstat -l %i 2>/dev/null" % (oargrid_job_id, oargrid_job_id), host=get_frontend_host(), connection_params=make_connection_params( frontend_connection_params, default_frontend_connection_params)) process.timeout = timeout process.pty = True process.run() if process.ok: host_addresses = re.findall("(\S+)", process.stdout, re.MULTILINE) return list( set([Host(host_address) for host_address in host_addresses])) else: raise ProcessesFailed([process])
def get_oar_job_kavlan(oar_job_id=None, frontend=None, frontend_connection_params=None, timeout=False): """Return the list of vlan ids of a job (if any). :param oar_job_id: the oar job id. If None given, will try to get it from ``OAR_JOB_ID`` environment variable. :param frontend: the frontend of the oar job. If None given, use default frontend. :param frontend_connection_params: connection params for connecting to frontends if needed. Values override those in `execo_g5k.config.default_frontend_connection_params`. :param timeout: timeout for retrieving. Default is False, which means use ``execo_g5k.config.g5k_configuration['default_timeout']``. None means no timeout. """ if isinstance(timeout, bool) and timeout == False: timeout = g5k_configuration.get('default_timeout') if oar_job_id == None: if 'OAR_JOB_ID' in os.environ: oar_job_id = os.environ['OAR_JOB_ID'] else: raise ValueError( "no oar job id given and no OAR_JOB_ID environment variable found" ) countdown = Timer(timeout) wait_oar_job_start(oar_job_id, frontend, frontend_connection_params, countdown.remaining()) process = get_process('kavlan -j %s -V ' % oar_job_id, host=get_frontend_host(frontend), connection_params=make_connection_params( frontend_connection_params, default_frontend_connection_params)) process.timeout = countdown.remaining() process.pty = True process.ignore_exit_code = True # kavlan exit code != 0 if request process.nolog_exit_code = True # is for a job without a vlan # reservation process.run() if process.ok: try: return [ int(x) for x in process.stdout.strip().rstrip().split('\r\n') ] except: return [] # handles cases where the job has no kavlan # resource or when kavlan isn't available else: raise ProcessesFailed([process])
def get_oar_job_nodes(oar_job_id=None, frontend=None, frontend_connection_params=None, timeout=False): """Return an iterable of `execo.host.Host` containing the hosts of an oar job. This method waits for the job start (the list of nodes isn't fixed until the job start). :param oar_job_id: the oar job id. If None given, will try to get it from ``OAR_JOB_ID`` environment variable. :param frontend: the frontend of the oar job. If None given, use default frontend. :param frontend_connection_params: connection params for connecting to frontends if needed. Values override those in `execo_g5k.config.default_frontend_connection_params`. :param timeout: timeout for retrieving. Default is False, which means use ``execo_g5k.config.g5k_configuration['default_timeout']``. None means no timeout. """ if isinstance(timeout, bool) and timeout == False: timeout = g5k_configuration.get('default_timeout') if oar_job_id == None: if 'OAR_JOB_ID' in os.environ: oar_job_id = os.environ['OAR_JOB_ID'] else: raise ValueError( "no oar job id given and no OAR_JOB_ID environment variable found" ) countdown = Timer(timeout) wait_oar_job_start(oar_job_id, frontend, frontend_connection_params, countdown.remaining()) process = get_process( "(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && oarstat -pj %(oar_job_id)i | oarprint host -f -" % {'oar_job_id': oar_job_id}, host=get_frontend_host(frontend), connection_params=make_connection_params( frontend_connection_params, default_frontend_connection_params)) process.timeout = countdown.remaining() process.shell = process.pty = True process.run() if process.ok: host_addresses = re.findall("(\S+)", process.stdout, re.MULTILINE) return [Host(host_address) for host_address in host_addresses] else: raise ProcessesFailed([process])
def get_current_oargrid_jobs(start_between=None, end_between=None, frontend_connection_params=None, timeout=False): """Return a list of current active oargrid job ids. :param start_between: a tuple (low, high) of endpoints. Filters and returns only jobs whose start date is in between these endpoints. :param end_between: a tuple (low, high) of endpoints. Filters and returns only jobs whose end date is in between these endpoints. :param frontend_connection_params: connection params for connecting to frontends if needed. Values override those in `execo_g5k.config.default_frontend_connection_params`. :param timeout: timeout for retrieving. Default is False, which means use ``execo_g5k.config.g5k_configuration['default_timeout']``. None means no timeout. """ if isinstance(timeout, bool) and timeout == False: timeout = g5k_configuration.get('default_timeout') if start_between: start_between = [get_unixts(t) for t in start_between] if end_between: end_between = [get_unixts(t) for t in end_between] process = get_process("oargridstat", host=get_frontend_host(), connection_params=make_connection_params( frontend_connection_params, default_frontend_connection_params)) process.timeout = timeout process.pty = True process.run() if process.ok: jobs = re.findall("Reservation # (\d+):", process.stdout, re.MULTILINE) oargrid_job_ids = [int(j) for j in jobs] if start_between or end_between: filtered_job_ids = [] for job in oargrid_job_ids: info = get_oargrid_job_info(job, timeout) if (_date_in_range(info['start_date'], start_between) and _date_in_range( info['start_date'] + info['walltime'], end_between)): filtered_job_ids.append(job) oargrid_job_ids = filtered_job_ids return oargrid_job_ids else: raise ProcessesFailed([process])
def get_oargrid_job_oar_jobs(oargrid_job_id=None, frontend_connection_params=None, timeout=False): """Return a list of tuples (oar job id, site), the list of individual oar jobs which make an oargrid job. :param oargrid_job_id: the oargrid job id. :param frontend_connection_params: connection params for connecting to frontends if needed. Values override those in `execo_g5k.config.default_frontend_connection_params`. :param timeout: timeout for retrieving. Default is False, which means use ``execo_g5k.config.g5k_configuration['default_timeout']``. None means no timeout. """ if isinstance(timeout, bool) and timeout == False: timeout = g5k_configuration.get('default_timeout') process = get_process("oargridstat %i" % (oargrid_job_id, ), host=get_frontend_host(), connection_params=make_connection_params( frontend_connection_params, default_frontend_connection_params)) process.timeout = timeout process.pty = True process.run() if process.ok: job_specs = [] for m in re.finditer("^\t(\w+) --> (\d+)", process.stdout, re.MULTILINE): site = m.group(1) if site not in get_g5k_sites(): site = get_cluster_site(site) job_specs.append((int(m.group(2)), site)) return job_specs else: raise ProcessesFailed([process])
def get_oar_job_subnets(oar_job_id=None, frontend=None, frontend_connection_params=None, timeout=False): """Return a tuple containing an iterable of tuples (IP, MAC) and a dict containing the subnet parameters of the reservation (if any). subnet parameters dict has keys: 'ip_prefix', 'broadcast', 'netmask', 'gateway', 'network', 'dns_hostname', 'dns_ip'. :param oar_job_id: the oar job id. If None given, will try to get it from ``OAR_JOB_ID`` environment variable. :param frontend: the frontend of the oar job. If None given, use default frontend. :param frontend_connection_params: connection params for connecting to frontends if needed. Values override those in `execo_g5k.config.default_frontend_connection_params`. :param timeout: timeout for retrieving. Default is False, which means use ``execo_g5k.config.g5k_configuration['default_timeout']``. None means no timeout. """ if isinstance(timeout, bool) and timeout == False: timeout = g5k_configuration.get('default_timeout') if oar_job_id == None: if 'OAR_JOB_ID' in os.environ: oar_job_id = os.environ['OAR_JOB_ID'] else: raise ValueError( "no oar job id given and no OAR_JOB_ID environment variable found" ) countdown = Timer(timeout) wait_oar_job_start(oar_job_id, frontend, frontend_connection_params, countdown.remaining()) # Get ip adresses process_ip = get_process( "(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && g5k-subnets -i -m -j %(oar_job_id)i" % {'oar_job_id': oar_job_id}, host=get_frontend_host(frontend), connection_params=make_connection_params( frontend_connection_params, default_frontend_connection_params)) process_ip.timeout = countdown.remaining() process_ip.shell = process_ip.pty = True process_ip.run() # Get network parameters process_net = get_process( "(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && g5k-subnets -a -j %(oar_job_id)i" % {'oar_job_id': oar_job_id}, host=get_frontend_host(frontend), connection_params=make_connection_params( frontend_connection_params, default_frontend_connection_params)) process_net.timeout = countdown.remaining() process_net.shell = process_net.pty = True process_net.run() if process_net.ok and process_ip.ok: subnet_addresses = re.findall("(\S+)\s+(\S+)", process_ip.stdout, re.MULTILINE) process_net_out = process_net.stdout.rstrip().split('\t') network_params = dict() if len(process_net_out) == 7: network_params = { "ip_prefix": process_net_out[0], "broadcast": process_net_out[1], "netmask": process_net_out[2], "gateway": process_net_out[3], "network": process_net_out[4], "dns_hostname": process_net_out[5], "dns_ip": process_net_out[6] } return (subnet_addresses, network_params) else: raise ProcessesFailed( [p for p in [process_net, process_ip] if not p.ok])
def get_current_oar_jobs(frontends=None, start_between=None, end_between=None, frontend_connection_params=None, timeout=False, abort_on_error=False): """Return a list of current active oar job ids. The list contains tuples (oarjob id, frontend). :param frontends: an iterable of frontends to connect to. A frontend with value None means default frontend. If frontends == None, means get current oar jobs only for default frontend. :param start_between: a tuple (low, high) of endpoints. Filters and returns only jobs whose start date is in between these endpoints. :param end_between: a tuple (low, high) of endpoints. Filters and returns only jobs whose end date is in between these endpoints. :param frontend_connection_params: connection params for connecting to frontends if needed. Values override those in `execo_g5k.config.default_frontend_connection_params`. :param timeout: timeout for retrieving. Default is False, which means use ``execo_g5k.config.g5k_configuration['default_timeout']``. None means no timeout. :param abort_on_error: default False. If True, raises an exception on any error. If False, will returned the list of job got, even if incomplete (some frontends may have failed to answer). """ if isinstance(timeout, bool) and timeout == False: timeout = g5k_configuration.get('default_timeout') if start_between: start_between = [get_unixts(t) for t in start_between] if end_between: end_between = [get_unixts(t) for t in end_between] processes = [] if frontends == None: frontends = [None] for frontend in frontends: p = get_process("oarstat -u", host=get_frontend_host(frontend), connection_params=make_connection_params( frontend_connection_params, default_frontend_connection_params)) p.timeout = timeout p.pty = True p.frontend = frontend processes.append(p) oar_job_ids = [] if len(processes) == 0: return oar_job_ids for process in processes: process.start() for process in processes: process.wait() failed_processes = [] for process in processes: if process.ok: jobs = re.findall("^(\d+)\s", process.stdout, re.MULTILINE) oar_job_ids.extend([(int(jobid), process.frontend) for jobid in jobs]) else: failed_processes.append(process) if len(failed_processes) > 0 and abort_on_error: raise ProcessesFailed(failed_processes) else: if start_between or end_between: filtered_job_ids = [] for jobfrontend in oar_job_ids: info = get_oar_job_info(jobfrontend[0], jobfrontend[1], frontend_connection_params, timeout, nolog_exit_code=True, nolog_timeout=True, nolog_error=True) if (_date_in_range(info['start_date'], start_between) and _date_in_range( info['start_date'] + info['walltime'], end_between)): filtered_job_ids.append(jobfrontend) oar_job_ids = filtered_job_ids return oar_job_ids
def oarsub(job_specs, frontend_connection_params=None, timeout=False, abort_on_error=False): """Submit jobs. :param job_specs: iterable of tuples (execo_g5k.oar.OarSubmission, frontend) with None for default frontend :param frontend_connection_params: connection params for connecting to frontends if needed. Values override those in `execo_g5k.config.default_frontend_connection_params`. :param timeout: timeout for submitting. Default is False, which means use ``execo_g5k.config.g5k_configuration['default_timeout']``. None means no timeout. :param abort_on_error: default False. If True, raises an exception on any error. If False, will returned the list of job got, even if incomplete (some frontends may have failed to answer). Returns a list of tuples (oarjob id, frontend), with frontend == None for default frontend. If submission error, oarjob id == None. The returned list matches, in the same order, the job_specs parameter. """ if isinstance(timeout, bool) and timeout == False: timeout = g5k_configuration.get('default_timeout') processes = [] for (spec, frontend) in job_specs: oarsub_cmdline = get_oarsub_commandline(spec) p = get_process(oarsub_cmdline, host=get_frontend_host(frontend), connection_params=make_connection_params( frontend_connection_params, default_frontend_connection_params)) p.timeout = timeout p.pty = True p.frontend = frontend processes.append(p) oar_job_ids = [] if len(processes) == 0: return oar_job_ids for process in processes: process.start() for process in processes: process.wait() failed_processes = [] for process in processes: job_id = None if process.ok: mo = re.search("^OAR_JOB_ID=(\d+)\s*$", process.stdout, re.MULTILINE) if mo != None: job_id = int(mo.group(1)) if job_id == None: failed_processes.append(process) oar_job_ids.append((job_id, process.frontend)) if len(failed_processes) > 0 and abort_on_error: raise ProcessesFailed(failed_processes) else: return oar_job_ids