예제 #1
0
파일: oargrid.py 프로젝트: mickours/execo-1
def get_oargrid_job_nodes(oargrid_job_id,
                          frontend_connection_params=None,
                          timeout=False):
    """Return an iterable of `execo.host.Host` containing the hosts of an oargrid job.

    :param oargrid_job_id: the oargrid job id.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    process = get_process(
        "oargridstat -wl %i 2>/dev/null || oargridstat -l %i 2>/dev/null" %
        (oargrid_job_id, oargrid_job_id),
        host=get_frontend_host(),
        connection_params=make_connection_params(
            frontend_connection_params, default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    if process.ok:
        host_addresses = re.findall("(\S+)", process.stdout, re.MULTILINE)
        return list(
            set([Host(host_address) for host_address in host_addresses]))
    else:
        raise ProcessesFailed([process])
예제 #2
0
파일: oar.py 프로젝트: msimonin/execo
def get_oar_job_kavlan(oar_job_id=None,
                       frontend=None,
                       frontend_connection_params=None,
                       timeout=False):
    """Return the list of vlan ids of a job (if any).

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if oar_job_id == None:
        if 'OAR_JOB_ID' in os.environ:
            oar_job_id = os.environ['OAR_JOB_ID']
        else:
            raise ValueError(
                "no oar job id given and no OAR_JOB_ID environment variable found"
            )
    countdown = Timer(timeout)
    wait_oar_job_start(oar_job_id, frontend, frontend_connection_params,
                       countdown.remaining())
    process = get_process('kavlan -j %s -V ' % oar_job_id,
                          host=get_frontend_host(frontend),
                          connection_params=make_connection_params(
                              frontend_connection_params,
                              default_frontend_connection_params))
    process.timeout = countdown.remaining()
    process.pty = True
    process.ignore_exit_code = True  # kavlan exit code != 0 if request
    process.nolog_exit_code = True  # is for a job without a vlan
    # reservation
    process.run()
    if process.ok:
        try:
            return [
                int(x) for x in process.stdout.strip().rstrip().split('\r\n')
            ]
        except:
            return []  # handles cases where the job has no kavlan
            # resource or when kavlan isn't available
    else:
        raise ProcessesFailed([process])
예제 #3
0
파일: oar.py 프로젝트: msimonin/execo
def get_oar_job_nodes(oar_job_id=None,
                      frontend=None,
                      frontend_connection_params=None,
                      timeout=False):
    """Return an iterable of `execo.host.Host` containing the hosts of an oar job.

    This method waits for the job start (the list of nodes isn't fixed
    until the job start).

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if oar_job_id == None:
        if 'OAR_JOB_ID' in os.environ:
            oar_job_id = os.environ['OAR_JOB_ID']
        else:
            raise ValueError(
                "no oar job id given and no OAR_JOB_ID environment variable found"
            )
    countdown = Timer(timeout)
    wait_oar_job_start(oar_job_id, frontend, frontend_connection_params,
                       countdown.remaining())
    process = get_process(
        "(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && oarstat -pj %(oar_job_id)i | oarprint host -f -"
        % {'oar_job_id': oar_job_id},
        host=get_frontend_host(frontend),
        connection_params=make_connection_params(
            frontend_connection_params, default_frontend_connection_params))
    process.timeout = countdown.remaining()
    process.shell = process.pty = True
    process.run()
    if process.ok:
        host_addresses = re.findall("(\S+)", process.stdout, re.MULTILINE)
        return [Host(host_address) for host_address in host_addresses]
    else:
        raise ProcessesFailed([process])
예제 #4
0
파일: oargrid.py 프로젝트: mickours/execo-1
def get_current_oargrid_jobs(start_between=None,
                             end_between=None,
                             frontend_connection_params=None,
                             timeout=False):
    """Return a list of current active oargrid job ids.

    :param start_between: a tuple (low, high) of endpoints. Filters
      and returns only jobs whose start date is in between these
      endpoints.

    :param end_between: a tuple (low, high) of endpoints. Filters and
      returns only jobs whose end date is in between these endpoints.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if start_between: start_between = [get_unixts(t) for t in start_between]
    if end_between: end_between = [get_unixts(t) for t in end_between]
    process = get_process("oargridstat",
                          host=get_frontend_host(),
                          connection_params=make_connection_params(
                              frontend_connection_params,
                              default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    if process.ok:
        jobs = re.findall("Reservation # (\d+):", process.stdout, re.MULTILINE)
        oargrid_job_ids = [int(j) for j in jobs]
        if start_between or end_between:
            filtered_job_ids = []
            for job in oargrid_job_ids:
                info = get_oargrid_job_info(job, timeout)
                if (_date_in_range(info['start_date'], start_between)
                        and _date_in_range(
                            info['start_date'] + info['walltime'],
                            end_between)):
                    filtered_job_ids.append(job)
            oargrid_job_ids = filtered_job_ids
        return oargrid_job_ids
    else:
        raise ProcessesFailed([process])
예제 #5
0
파일: oargrid.py 프로젝트: mickours/execo-1
def get_oargrid_job_oar_jobs(oargrid_job_id=None,
                             frontend_connection_params=None,
                             timeout=False):
    """Return a list of tuples (oar job id, site), the list of individual oar jobs which make an oargrid job.

    :param oargrid_job_id: the oargrid job id.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    process = get_process("oargridstat %i" % (oargrid_job_id, ),
                          host=get_frontend_host(),
                          connection_params=make_connection_params(
                              frontend_connection_params,
                              default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    if process.ok:
        job_specs = []
        for m in re.finditer("^\t(\w+) --> (\d+)", process.stdout,
                             re.MULTILINE):
            site = m.group(1)
            if site not in get_g5k_sites():
                site = get_cluster_site(site)
            job_specs.append((int(m.group(2)), site))
        return job_specs
    else:
        raise ProcessesFailed([process])
예제 #6
0
파일: oar.py 프로젝트: msimonin/execo
def get_oar_job_subnets(oar_job_id=None,
                        frontend=None,
                        frontend_connection_params=None,
                        timeout=False):
    """Return a tuple containing an iterable of tuples (IP, MAC) and a dict containing the subnet parameters of the reservation (if any).

    subnet parameters dict has keys: 'ip_prefix', 'broadcast',
    'netmask', 'gateway', 'network', 'dns_hostname', 'dns_ip'.

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if oar_job_id == None:
        if 'OAR_JOB_ID' in os.environ:
            oar_job_id = os.environ['OAR_JOB_ID']
        else:
            raise ValueError(
                "no oar job id given and no OAR_JOB_ID environment variable found"
            )
    countdown = Timer(timeout)
    wait_oar_job_start(oar_job_id, frontend, frontend_connection_params,
                       countdown.remaining())
    # Get ip adresses
    process_ip = get_process(
        "(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && g5k-subnets -i -m -j %(oar_job_id)i"
        % {'oar_job_id': oar_job_id},
        host=get_frontend_host(frontend),
        connection_params=make_connection_params(
            frontend_connection_params, default_frontend_connection_params))
    process_ip.timeout = countdown.remaining()
    process_ip.shell = process_ip.pty = True
    process_ip.run()
    # Get network parameters
    process_net = get_process(
        "(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && g5k-subnets -a -j %(oar_job_id)i"
        % {'oar_job_id': oar_job_id},
        host=get_frontend_host(frontend),
        connection_params=make_connection_params(
            frontend_connection_params, default_frontend_connection_params))
    process_net.timeout = countdown.remaining()
    process_net.shell = process_net.pty = True
    process_net.run()

    if process_net.ok and process_ip.ok:
        subnet_addresses = re.findall("(\S+)\s+(\S+)", process_ip.stdout,
                                      re.MULTILINE)
        process_net_out = process_net.stdout.rstrip().split('\t')
        network_params = dict()
        if len(process_net_out) == 7:
            network_params = {
                "ip_prefix": process_net_out[0],
                "broadcast": process_net_out[1],
                "netmask": process_net_out[2],
                "gateway": process_net_out[3],
                "network": process_net_out[4],
                "dns_hostname": process_net_out[5],
                "dns_ip": process_net_out[6]
            }
        return (subnet_addresses, network_params)
    else:
        raise ProcessesFailed(
            [p for p in [process_net, process_ip] if not p.ok])
예제 #7
0
파일: oar.py 프로젝트: msimonin/execo
def get_current_oar_jobs(frontends=None,
                         start_between=None,
                         end_between=None,
                         frontend_connection_params=None,
                         timeout=False,
                         abort_on_error=False):
    """Return a list of current active oar job ids.

    The list contains tuples (oarjob id, frontend).

    :param frontends: an iterable of frontends to connect to. A
      frontend with value None means default frontend. If frontends ==
      None, means get current oar jobs only for default frontend.

    :param start_between: a tuple (low, high) of endpoints. Filters
      and returns only jobs whose start date is in between these
      endpoints.

    :param end_between: a tuple (low, high) of endpoints. Filters and
      returns only jobs whose end date is in between these endpoints.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    :param abort_on_error: default False. If True, raises an exception
      on any error. If False, will returned the list of job got, even
      if incomplete (some frontends may have failed to answer).
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if start_between: start_between = [get_unixts(t) for t in start_between]
    if end_between: end_between = [get_unixts(t) for t in end_between]
    processes = []
    if frontends == None:
        frontends = [None]
    for frontend in frontends:
        p = get_process("oarstat -u",
                        host=get_frontend_host(frontend),
                        connection_params=make_connection_params(
                            frontend_connection_params,
                            default_frontend_connection_params))
        p.timeout = timeout
        p.pty = True
        p.frontend = frontend
        processes.append(p)
    oar_job_ids = []
    if len(processes) == 0:
        return oar_job_ids
    for process in processes:
        process.start()
    for process in processes:
        process.wait()
    failed_processes = []
    for process in processes:
        if process.ok:
            jobs = re.findall("^(\d+)\s", process.stdout, re.MULTILINE)
            oar_job_ids.extend([(int(jobid), process.frontend)
                                for jobid in jobs])
        else:
            failed_processes.append(process)
    if len(failed_processes) > 0 and abort_on_error:
        raise ProcessesFailed(failed_processes)
    else:
        if start_between or end_between:
            filtered_job_ids = []
            for jobfrontend in oar_job_ids:
                info = get_oar_job_info(jobfrontend[0],
                                        jobfrontend[1],
                                        frontend_connection_params,
                                        timeout,
                                        nolog_exit_code=True,
                                        nolog_timeout=True,
                                        nolog_error=True)
                if (_date_in_range(info['start_date'], start_between)
                        and _date_in_range(
                            info['start_date'] + info['walltime'],
                            end_between)):
                    filtered_job_ids.append(jobfrontend)
            oar_job_ids = filtered_job_ids
        return oar_job_ids
예제 #8
0
파일: oar.py 프로젝트: msimonin/execo
def oarsub(job_specs,
           frontend_connection_params=None,
           timeout=False,
           abort_on_error=False):
    """Submit jobs.

    :param job_specs: iterable of tuples (execo_g5k.oar.OarSubmission,
      frontend) with None for default frontend

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for submitting. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    :param abort_on_error: default False. If True, raises an exception
      on any error. If False, will returned the list of job got, even
      if incomplete (some frontends may have failed to answer).

    Returns a list of tuples (oarjob id, frontend), with frontend ==
    None for default frontend. If submission error, oarjob id ==
    None. The returned list matches, in the same order, the job_specs
    parameter.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    processes = []
    for (spec, frontend) in job_specs:
        oarsub_cmdline = get_oarsub_commandline(spec)
        p = get_process(oarsub_cmdline,
                        host=get_frontend_host(frontend),
                        connection_params=make_connection_params(
                            frontend_connection_params,
                            default_frontend_connection_params))
        p.timeout = timeout
        p.pty = True
        p.frontend = frontend
        processes.append(p)
    oar_job_ids = []
    if len(processes) == 0:
        return oar_job_ids
    for process in processes:
        process.start()
    for process in processes:
        process.wait()
    failed_processes = []
    for process in processes:
        job_id = None
        if process.ok:
            mo = re.search("^OAR_JOB_ID=(\d+)\s*$", process.stdout,
                           re.MULTILINE)
            if mo != None:
                job_id = int(mo.group(1))
        if job_id == None:
            failed_processes.append(process)
        oar_job_ids.append((job_id, process.frontend))
    if len(failed_processes) > 0 and abort_on_error:
        raise ProcessesFailed(failed_processes)
    else:
        return oar_job_ids