Esempio n. 1
0
 def _init_processes(self):
     self.processes = []
     self._unique_hosts = get_hosts_set(self.deployment.hosts)
     frontends = dict()
     for host in self._unique_hosts:
         frontend = _get_host_frontend(host)
         if frontend in frontends:
             frontends[frontend].append(host)
         else:
             frontends[frontend] = [host]
     lifecycle_handler = ActionNotificationProcessLH(self, len(frontends))
     deploy_stdout_handler = _KadeployStdoutHandler()
     for frontend in frontends:
         kadeploy_command = self.deployment._get_common_kadeploy_command_line()
         for host in frontends[frontend]:
             kadeploy_command += " -m %s" % (host.address,)
         p = get_process(kadeploy_command,
                         host = get_frontend_host(frontend),
                         connection_params = make_connection_params(self.frontend_connection_params,
                                                                  default_frontend_connection_params))
         p.pty = True
         p.timeout = self.timeout
         p.stdout_handlers.append(deploy_stdout_handler)
         p.stdout_handlers.extend([ FrontendPrefixWrapper(h)
                                    for h in singleton_to_collection(self._stdout_handlers) ])
         p.stderr_handlers.extend([ FrontendPrefixWrapper(h)
                                    for h in singleton_to_collection(self._stderr_handlers) ])
         p.lifecycle_handlers.append(lifecycle_handler)
         p.frontend = frontend
         p.kadeploy_hosts = [ host.address for host in frontends[frontend] ]
         p.deployed_hosts = set()
         p.undeployed_hosts = set()
         p.kadeployer = self
         self.processes.append(p)
Esempio n. 2
0
def oargriddel(job_ids, frontend_connection_params = None, timeout = False):
    """Delete oargrid jobs.

    Ignores any error, so you can delete inexistant jobs, already
    deleted jobs, or jobs that you don't own. Those deletions will be
    ignored.

    :param job_ids: iterable of oar grid job ids.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use ``g5k_configuration['default_timeout']``. None means no
      timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    processes = []
    for job_id in job_ids:
        p = get_process("oargriddel %i" % (job_id,),
                        host = get_frontend_host(),
                        connection_params = make_connection_params(frontend_connection_params,
                                                                   default_frontend_connection_params))
        p.timeout = timeout
        p.nolog_exit_code = True
        p.pty = True
        processes.append(p)
    for process in processes: process.start()
    for process in processes: process.wait()
Esempio n. 3
0
def get_oargrid_job_oar_jobs(oargrid_job_id = None, frontend_connection_params = None, timeout = False):
    """Return a list of tuples (oar job id, site), the list of individual oar jobs which make an oargrid job.

    :param oargrid_job_id: the oargrid job id.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    process = get_process("oargridstat %i" % (oargrid_job_id,),
                          host = get_frontend_host(),
                          connection_params = make_connection_params(frontend_connection_params,
                                                                     default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    if process.ok:
        job_specs = []
        for m in re.finditer("^\t(\w+) --> (\d+)", process.stdout, re.MULTILINE):
            site = m.group(1)
            if site not in get_g5k_sites():
                site = get_cluster_site(site)
            job_specs.append((int(m.group(2)), site))
        return job_specs
    else:
        raise ProcessesFailed([process])
Esempio n. 4
0
def get_oargrid_job_nodes(oargrid_job_id, frontend_connection_params = None, timeout = False):
    """Return an iterable of `execo.host.Host` containing the hosts of an oargrid job.

    :param oargrid_job_id: the oargrid job id.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    process = get_process("oargridstat -wl %i 2>/dev/null || oargridstat -l %i 2>/dev/null" % (oargrid_job_id, oargrid_job_id),
                          host = get_frontend_host(),
                          connection_params = make_connection_params(frontend_connection_params,
                                                                     default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    if process.ok:
        host_addresses = re.findall("(\S+)", process.stdout, re.MULTILINE)
        return list(set([ Host(host_address) for host_address in host_addresses ]))
    else:
        raise ProcessesFailed([process])
Esempio n. 5
0
def get_oargrid_job_nodes(oargrid_job_id,
                          frontend_connection_params=None,
                          timeout=False):
    """Return an iterable of `execo.host.Host` containing the hosts of an oargrid job.

    :param oargrid_job_id: the oargrid job id.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    process = get_process(
        "oargridstat -wl %i 2>/dev/null || oargridstat -l %i 2>/dev/null" %
        (oargrid_job_id, oargrid_job_id),
        host=get_frontend_host(),
        connection_params=make_connection_params(
            frontend_connection_params, default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    if process.ok:
        host_addresses = re.findall("(\S+)", process.stdout, re.MULTILINE)
        return list(
            set([Host(host_address) for host_address in host_addresses]))
    else:
        raise ProcessesFailed([process])
Esempio n. 6
0
def oargriddel(job_ids, frontend_connection_params=None, timeout=False):
    """Delete oargrid jobs.

    Ignores any error, so you can delete inexistant jobs, already
    deleted jobs, or jobs that you don't own. Those deletions will be
    ignored.

    :param job_ids: iterable of oar grid job ids.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use ``g5k_configuration['default_timeout']``. None means no
      timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    processes = []
    for job_id in job_ids:
        p = get_process("oargriddel %i" % (job_id, ),
                        host=get_frontend_host(),
                        connection_params=make_connection_params(
                            frontend_connection_params,
                            default_frontend_connection_params))
        p.timeout = timeout
        p.nolog_exit_code = True
        p.pty = True
        processes.append(p)
    for process in processes:
        process.start()
    for process in processes:
        process.wait()
Esempio n. 7
0
def oarsub(job_specs, frontend_connection_params = None, timeout = False, abort_on_error = False):
    """Submit jobs.

    :param job_specs: iterable of tuples (execo_g5k.oar.OarSubmission,
      frontend) with None for default frontend

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for submitting. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    :param abort_on_error: default False. If True, raises an exception
      on any error. If False, will returned the list of job got, even
      if incomplete (some frontends may have failed to answer).

    Returns a list of tuples (oarjob id, frontend), with frontend ==
    None for default frontend. If submission error, oarjob id ==
    None. The returned list matches, in the same order, the job_specs
    parameter.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    processes = []
    for (spec, frontend) in job_specs:
        oarsub_cmdline = get_oarsub_commandline(spec)
        p = get_process(oarsub_cmdline,
                        host = get_frontend_host(frontend),
                        connection_params = make_connection_params(frontend_connection_params,
                                                                   default_frontend_connection_params))
        p.timeout = timeout
        p.pty = True
        p.frontend = frontend
        processes.append(p)
    oar_job_ids = []
    if len(processes) == 0:
        return oar_job_ids
    for process in processes: process.start()
    for process in processes: process.wait()
    failed_processes = []
    for process in processes:
        job_id = None
        if process.ok:
            mo = re.search("^OAR_JOB_ID=(\d+)\s*$", process.stdout, re.MULTILINE)
            if mo != None:
                job_id = int(mo.group(1))
        if job_id == None:
            failed_processes.append(process)
        oar_job_ids.append((job_id, process.frontend))
    if len(failed_processes) > 0 and abort_on_error:
        raise ProcessesFailed(failed_processes)
    else:
        return oar_job_ids
Esempio n. 8
0
def get_oar_job_kavlan(oar_job_id=None,
                       frontend=None,
                       frontend_connection_params=None,
                       timeout=False):
    """Return the list of vlan ids of a job (if any).

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if oar_job_id == None:
        if 'OAR_JOB_ID' in os.environ:
            oar_job_id = os.environ['OAR_JOB_ID']
        else:
            raise ValueError(
                "no oar job id given and no OAR_JOB_ID environment variable found"
            )
    countdown = Timer(timeout)
    wait_oar_job_start(oar_job_id, frontend, frontend_connection_params,
                       countdown.remaining())
    process = get_process('kavlan -j %s -V ' % oar_job_id,
                          host=get_frontend_host(frontend),
                          connection_params=make_connection_params(
                              frontend_connection_params,
                              default_frontend_connection_params))
    process.timeout = countdown.remaining()
    process.pty = True
    process.ignore_exit_code = True  # kavlan exit code != 0 if request
    process.nolog_exit_code = True  # is for a job without a vlan
    # reservation
    process.run()
    if process.ok:
        try:
            return [
                int(x) for x in process.stdout.strip().rstrip().split('\r\n')
            ]
        except:
            return []  # handles cases where the job has no kavlan
            # resource or when kavlan isn't available
    else:
        raise ProcessesFailed([process])
Esempio n. 9
0
 def __enter__(self):
     if 'grid5000.fr' in socket.getfqdn():
         return self.__host, self.__port
     else:
         self.__port_forwarder = PortForwarder(
             get_frontend_host(self.__site),
             self.__host,
             self.__port,
             connection_params=make_connection_params(default_frontend_connection_params))
         self.__port_forwarder.__enter__()
         return "127.0.0.1", self.__port_forwarder.local_port
Esempio n. 10
0
 def __enter__(self):
     if 'grid5000.fr' in socket.getfqdn():
         return self.__host, self.__port
     else:
         self.__port_forwarder = PortForwarder(
             get_frontend_host(self.__site),
             self.__host,
             self.__port,
             connection_params=make_connection_params(
                 default_frontend_connection_params))
         self.__port_forwarder.__enter__()
         return "127.0.0.1", self.__port_forwarder.local_port
Esempio n. 11
0
def get_oargrid_job_info(oargrid_job_id=None,
                         frontend_connection_params=None,
                         timeout=False):
    """Return a dict with informations about an oargrid job.

    :param oargrid_job_id: the oargrid job id.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    Hash returned contains these keys:

    - ``start_date``: unix timestamp of job's start date

    - ``walltime``: job's walltime in seconds

    - ``user``: job's user
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    process = get_process("oargridstat %i" % (oargrid_job_id, ),
                          host=get_frontend_host(),
                          connection_params=make_connection_params(
                              frontend_connection_params,
                              default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    job_info = dict()
    start_date_result = re.search(
        "start date : (\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)", process.stdout,
        re.MULTILINE)
    if start_date_result:
        start_date = oar_date_to_unixts(start_date_result.group(1))
        job_info['start_date'] = start_date
    walltime_result = re.search("walltime : (\d+:\d?\d:\d?\d)", process.stdout,
                                re.MULTILINE)
    if walltime_result:
        walltime = oar_duration_to_seconds(walltime_result.group(1))
        job_info['walltime'] = walltime
    user_result = re.search("user : (\S+)", process.stdout, re.MULTILINE)
    if user_result:
        user = user_result.group(1)
        job_info['user'] = user
    return job_info
Esempio n. 12
0
def get_oar_job_nodes(oar_job_id=None,
                      frontend=None,
                      frontend_connection_params=None,
                      timeout=False):
    """Return an iterable of `execo.host.Host` containing the hosts of an oar job.

    This method waits for the job start (the list of nodes isn't fixed
    until the job start).

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if oar_job_id == None:
        if 'OAR_JOB_ID' in os.environ:
            oar_job_id = os.environ['OAR_JOB_ID']
        else:
            raise ValueError(
                "no oar job id given and no OAR_JOB_ID environment variable found"
            )
    countdown = Timer(timeout)
    wait_oar_job_start(oar_job_id, frontend, frontend_connection_params,
                       countdown.remaining())
    process = get_process(
        "(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && oarstat -pj %(oar_job_id)i | oarprint host -f -"
        % {'oar_job_id': oar_job_id},
        host=get_frontend_host(frontend),
        connection_params=make_connection_params(
            frontend_connection_params, default_frontend_connection_params))
    process.timeout = countdown.remaining()
    process.shell = process.pty = True
    process.run()
    if process.ok:
        host_addresses = re.findall("(\S+)", process.stdout, re.MULTILINE)
        return [Host(host_address) for host_address in host_addresses]
    else:
        raise ProcessesFailed([process])
Esempio n. 13
0
def get_current_oargrid_jobs(start_between=None,
                             end_between=None,
                             frontend_connection_params=None,
                             timeout=False):
    """Return a list of current active oargrid job ids.

    :param start_between: a tuple (low, high) of endpoints. Filters
      and returns only jobs whose start date is in between these
      endpoints.

    :param end_between: a tuple (low, high) of endpoints. Filters and
      returns only jobs whose end date is in between these endpoints.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if start_between: start_between = [get_unixts(t) for t in start_between]
    if end_between: end_between = [get_unixts(t) for t in end_between]
    process = get_process("oargridstat",
                          host=get_frontend_host(),
                          connection_params=make_connection_params(
                              frontend_connection_params,
                              default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    if process.ok:
        jobs = re.findall("Reservation # (\d+):", process.stdout, re.MULTILINE)
        oargrid_job_ids = [int(j) for j in jobs]
        if start_between or end_between:
            filtered_job_ids = []
            for job in oargrid_job_ids:
                info = get_oargrid_job_info(job, timeout)
                if (_date_in_range(info['start_date'], start_between)
                        and _date_in_range(
                            info['start_date'] + info['walltime'],
                            end_between)):
                    filtered_job_ids.append(job)
            oargrid_job_ids = filtered_job_ids
        return oargrid_job_ids
    else:
        raise ProcessesFailed([process])
Esempio n. 14
0
def get_current_oargrid_jobs(start_between = None,
                             end_between = None,
                             frontend_connection_params = None,
                             timeout = False):
    """Return a list of current active oargrid job ids.

    :param start_between: a tuple (low, high) of endpoints. Filters
      and returns only jobs whose start date is in between these
      endpoints.

    :param end_between: a tuple (low, high) of endpoints. Filters and
      returns only jobs whose end date is in between these endpoints.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if start_between: start_between = [ get_unixts(t) for t in start_between ]
    if end_between: end_between = [ get_unixts(t) for t in end_between ]
    process = get_process("oargridstat",
                          host = get_frontend_host(),
                          connection_params = make_connection_params(frontend_connection_params,
                                                                     default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    if process.ok:
        jobs = re.findall("Reservation # (\d+):", process.stdout, re.MULTILINE)
        oargrid_job_ids = [ int(j) for j in jobs ]
        if start_between or end_between:
            filtered_job_ids = []
            for job in oargrid_job_ids:
                info = get_oargrid_job_info(job, timeout)
                if (_date_in_range(info['start_date'], start_between)
                    and _date_in_range(info['start_date'] + info['walltime'], end_between)):
                    filtered_job_ids.append(job)
            oargrid_job_ids = filtered_job_ids
        return oargrid_job_ids
    else:
        raise ProcessesFailed([process])
Esempio n. 15
0
def get_oar_job_kavlan(oar_job_id = None, frontend = None, frontend_connection_params = None, timeout = False):
    """Return the list of vlan ids of a job (if any).

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if oar_job_id == None:
        if 'OAR_JOB_ID' in os.environ:
            oar_job_id = os.environ['OAR_JOB_ID']
        else:
            raise ValueError("no oar job id given and no OAR_JOB_ID environment variable found")
    countdown = Timer(timeout)
    wait_oar_job_start(oar_job_id, frontend, frontend_connection_params, countdown.remaining())
    process = get_process(
        'kavlan -j %s -V ' % oar_job_id,
        host = get_frontend_host(frontend),
        connection_params = make_connection_params(
            frontend_connection_params,
            default_frontend_connection_params))
    process.timeout = countdown.remaining()
    process.pty = True
    process.ignore_exit_code = True # kavlan exit code != 0 if request
    process.nolog_exit_code = True  # is for a job without a vlan
                                    # reservation
    process.run()
    if process.ok:
        try:
            return [ int(x) for x in process.stdout.strip().rstrip().split('\r\n') ]
        except:
            return [] # handles cases where the job has no kavlan
                      # resource or when kavlan isn't available
    else:
        raise ProcessesFailed([process])
Esempio n. 16
0
def get_ssh_scp_auth_options(user=None,
                             keyfile=None,
                             port=None,
                             connection_params=None):
    """Return tuple with ssh / scp authentifications options.

    :param user: the user to connect with. If None, will try to get
      the user from the given connection_params, or fallback to the
      default user in `execo.config.default_connection_params`, or no
      user option at all.

    :param keyfile: the keyfile to connect with. If None, will try to
      get the keyfile from the given connection_params, or fallback to
      the default keyfile in `execo.config.default_connection_params`,
      or no keyfile option at all.

    :param port: the port to connect to. If None, will try to get the
      port from the given connection_params, or fallback to the default
      port in `execo.config.default_connection_params`, or no port
      option at all.

    :param connection_params: a dict similar to
      `execo.config.default_connection_params`, whose values will
      override those in `execo.config.default_connection_params`
    """
    ssh_scp_auth_options = ()
    actual_connection_params = make_connection_params(connection_params)

    if user != None:
        ssh_scp_auth_options += ("-o", "User=%s" % (user, ))
    elif actual_connection_params.get('user'):
        ssh_scp_auth_options += ("-o", "User=%s" %
                                 (actual_connection_params['user'], ))

    if keyfile != None:
        ssh_scp_auth_options += ("-i", str(keyfile))
    elif actual_connection_params.get('keyfile'):
        ssh_scp_auth_options += ("-i",
                                 str(actual_connection_params['keyfile']))

    if port != None:
        ssh_scp_auth_options += ("-o", "Port=%i" % port)
    elif actual_connection_params.get('port'):
        ssh_scp_auth_options += ("-o",
                                 "Port=%i" % actual_connection_params['port'])

    return ssh_scp_auth_options
Esempio n. 17
0
def get_oargrid_job_info(oargrid_job_id = None, frontend_connection_params = None, timeout = False):
    """Return a dict with informations about an oargrid job.

    :param oargrid_job_id: the oargrid job id.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    Hash returned contains these keys:

    - ``start_date``: unix timestamp of job's start date

    - ``walltime``: job's walltime in seconds

    - ``user``: job's user
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    process = get_process("oargridstat %i" % (oargrid_job_id,),
                          host = get_frontend_host(),
                          connection_params = make_connection_params(frontend_connection_params,
                                                                     default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    job_info = dict()
    start_date_result = re.search("start date : (\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)", process.stdout, re.MULTILINE)
    if start_date_result:
        start_date = oar_date_to_unixts(start_date_result.group(1))
        job_info['start_date'] = start_date
    walltime_result = re.search("walltime : (\d+:\d?\d:\d?\d)", process.stdout, re.MULTILINE)
    if walltime_result:
        walltime = oar_duration_to_seconds(walltime_result.group(1))
        job_info['walltime'] = walltime
    user_result = re.search("user : (\S+)", process.stdout, re.MULTILINE)
    if user_result:
        user = user_result.group(1)
        job_info['user'] = user
    return job_info
Esempio n. 18
0
def get_oar_job_nodes(oar_job_id = None, frontend = None,
                      frontend_connection_params = None, timeout = False):
    """Return an iterable of `execo.host.Host` containing the hosts of an oar job.

    This method waits for the job start (the list of nodes isn't fixed
    until the job start).

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if oar_job_id == None:
        if 'OAR_JOB_ID' in os.environ:
            oar_job_id = os.environ['OAR_JOB_ID']
        else:
            raise ValueError("no oar job id given and no OAR_JOB_ID environment variable found")
    countdown = Timer(timeout)
    wait_oar_job_start(oar_job_id, frontend, frontend_connection_params, countdown.remaining())
    process = get_process("(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && oarstat -pj %(oar_job_id)i | oarprint host -f -" % {'oar_job_id': oar_job_id},
                          host = get_frontend_host(frontend),
                          connection_params = make_connection_params(frontend_connection_params,
                                                                     default_frontend_connection_params))
    process.timeout = countdown.remaining()
    process.pty = True
    process.run()
    if process.ok:
        host_addresses = re.findall("(\S+)", process.stdout, re.MULTILINE)
        return [ Host(host_address) for host_address in host_addresses ]
    else:
        raise ProcessesFailed([process])
Esempio n. 19
0
def get_ssh_scp_auth_options(user = None, keyfile = None, port = None, connection_params = None):
    """Return tuple with ssh / scp authentifications options.

    :param user: the user to connect with. If None, will try to get
      the user from the given connection_params, or fallback to the
      default user in `execo.config.default_connection_params`, or no
      user option at all.

    :param keyfile: the keyfile to connect with. If None, will try to
      get the keyfile from the given connection_params, or fallback to
      the default keyfile in `execo.config.default_connection_params`,
      or no keyfile option at all.

    :param port: the port to connect to. If None, will try to get the
      port from the given connection_params, or fallback to the default
      port in `execo.config.default_connection_params`, or no port
      option at all.

    :param connection_params: a dict similar to
      `execo.config.default_connection_params`, whose values will
      override those in `execo.config.default_connection_params`
    """
    ssh_scp_auth_options = ()
    actual_connection_params = make_connection_params(connection_params)

    if user != None:
        ssh_scp_auth_options += ("-o", "User=%s" % (user,))
    elif actual_connection_params.get('user'):
        ssh_scp_auth_options += ("-o", "User=%s" % (actual_connection_params['user'],))

    if keyfile != None:
        ssh_scp_auth_options += ("-i", str(keyfile))
    elif actual_connection_params.get('keyfile'):
        ssh_scp_auth_options += ("-i", str(actual_connection_params['keyfile']))

    if port != None:
        ssh_scp_auth_options += ("-o", "Port=%i" % port)
    elif actual_connection_params.get('port'):
        ssh_scp_auth_options += ("-o", "Port=%i" % actual_connection_params['port'])

    return ssh_scp_auth_options
Esempio n. 20
0
 def _init_processes(self):
     self.processes = []
     self._unique_hosts = get_hosts_set(self.deployment.hosts)
     frontends = dict()
     for host in self._unique_hosts:
         frontend = _get_host_frontend(host)
         if frontend in frontends:
             frontends[frontend].append(host)
         else:
             frontends[frontend] = [host]
     lifecycle_handler = ActionNotificationProcessLH(self, len(frontends))
     deploy_stdout_handler = _KadeployStdoutHandler()
     for frontend in frontends:
         kadeploy_command = self.deployment._get_common_kadeploy_command_line(
         )
         for host in frontends[frontend]:
             kadeploy_command += " -m %s" % (host.address, )
         p = get_process(kadeploy_command,
                         host=get_frontend_host(frontend),
                         connection_params=make_connection_params(
                             self.frontend_connection_params,
                             default_frontend_connection_params))
         p.pty = True
         p.timeout = self.timeout
         p.stdout_handlers.append(deploy_stdout_handler)
         p.stdout_handlers.extend([
             FrontendPrefixWrapper(h)
             for h in singleton_to_collection(self._stdout_handlers)
         ])
         p.stderr_handlers.extend([
             FrontendPrefixWrapper(h)
             for h in singleton_to_collection(self._stderr_handlers)
         ])
         p.lifecycle_handlers.append(lifecycle_handler)
         p.frontend = frontend
         p.kadeploy_hosts = [host.address for host in frontends[frontend]]
         p.deployed_hosts = set()
         p.undeployed_hosts = set()
         p.kadeployer = self
         self.processes.append(p)
Esempio n. 21
0
def _get_connector_command(connector_params_entry,
                           connector_options_params_entry,
                           user = None,
                           keyfile = None,
                           port = None,
                           connection_params = None):
    """build an ssh / scp / taktuk connector command line.

    Constructs the command line based on values of
    <connector_params_entry> and <connector_options_params_entry> in
    connection_params, if any, or fallback to
    `execo.config.default_connection_params`, and add authentification
    options got from `execo.ssh_utils.get_ssh_scp_auth_options`

    :param connector_params_entry: name of field in connection_params
      or default_connection_params containing the connector executable
      name

    :param connector_options_params_entry: name of field in
      connection_params or default_connection_params containing the
      connector options

    :param user: see `execo.ssh_utils.get_ssh_scp_auth_options`

    :param keyfile: see `execo.ssh_utils.get_ssh_scp_auth_options`

    :param port: see `execo.ssh_utils.get_ssh_scp_auth_options`

    :param connection_params: see
      `execo.ssh_utils.get_ssh_scp_auth_options`
    """
    command = ()
    actual_connection_params = make_connection_params(connection_params)
    command += (actual_connection_params[connector_params_entry],)
    command += actual_connection_params[connector_options_params_entry]
    command += get_ssh_scp_auth_options(user, keyfile, port, connection_params)
    return command
Esempio n. 22
0
def _get_connector_command(connector_params_entry,
                           connector_options_params_entry,
                           user = None,
                           keyfile = None,
                           port = None,
                           connection_params = None):
    """build an ssh / scp / taktuk connector command line.

    Constructs the command line based on values of
    <connector_params_entry> and <connector_options_params_entry> in
    connection_params, if any, or fallback to
    `execo.config.default_connection_params`, and add authentification
    options got from `execo.ssh_utils.get_ssh_scp_auth_options`

    :param connector_params_entry: name of field in connection_params
      or default_connection_params containing the connector executable
      name

    :param connector_options_params_entry: name of field in
      connection_params or default_connection_params containing the
      connector options

    :param user: see `execo.ssh_utils.get_ssh_scp_auth_options`

    :param keyfile: see `execo.ssh_utils.get_ssh_scp_auth_options`

    :param port: see `execo.ssh_utils.get_ssh_scp_auth_options`

    :param connection_params: see
      `execo.ssh_utils.get_ssh_scp_auth_options`
    """
    command = ()
    actual_connection_params = make_connection_params(connection_params)
    command += (actual_connection_params[connector_params_entry],)
    command += actual_connection_params[connector_options_params_entry]
    command += get_ssh_scp_auth_options(user, keyfile, port, connection_params)
    return command
Esempio n. 23
0
def get_oargrid_job_oar_jobs(oargrid_job_id=None,
                             frontend_connection_params=None,
                             timeout=False):
    """Return a list of tuples (oar job id, site), the list of individual oar jobs which make an oargrid job.

    :param oargrid_job_id: the oargrid job id.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    process = get_process("oargridstat %i" % (oargrid_job_id, ),
                          host=get_frontend_host(),
                          connection_params=make_connection_params(
                              frontend_connection_params,
                              default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    if process.ok:
        job_specs = []
        for m in re.finditer("^\t(\w+) --> (\d+)", process.stdout,
                             re.MULTILINE):
            site = m.group(1)
            if site not in get_g5k_sites():
                site = get_cluster_site(site)
            job_specs.append((int(m.group(2)), site))
        return job_specs
    else:
        raise ProcessesFailed([process])
Esempio n. 24
0
def oargridsub(job_specs, reservation_date = None,
               walltime = None, job_type = None,
               queue = None, directory = None,
               additional_options = None,
               frontend_connection_params = None,
               timeout = False):
    """Submit oargrid jobs.

    :param job_specs: iterable of tuples (OarSubmission,
      clusteralias). Reservation date, walltime, queue, directory,
      project, additional_options, command of the OarSubmission are
      ignored.

    :param reservation_date: grid job reservation date. Default: now.

    :param walltime: grid job walltime.

    :param job_type: type of job for all clusters: deploy, besteffort,
      cosystem, checkpoint, timesharing.

    :param queue: oar queue to use.

    :param directory: directory where the reservation will be
      launched.

    :param additional_options: passed directly to oargridsub on the
      command line.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    Returns a tuple (oargrid_job_id, ssh_key), or (None, None) if
    error.

    Note that, as oargrid does not handle correctly quoting sql
    clauses enclosed inside braces, this function tries to
    automatically overcome this limitation by adding some, with the
    right escaping (backslashes). Also, note that oargrid's command
    line parser does not handle correctly commas in sql clauses
    enclosed inside braces, as it considers it as a rdef
    separator. This prevents, for example, using comma separated list
    values for ``NOT IN`` clauses.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    oargridsub_cmdline = get_oargridsub_commandline(job_specs, reservation_date,
                                                    walltime, job_type, queue,
                                                    directory, additional_options)
    process = get_process(oargridsub_cmdline,
                          host = get_frontend_host(),
                          connection_params = make_connection_params(frontend_connection_params,
                                                                     default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    job_id = None
    ssh_key = None
    if process.ok:
        mo = re.search("^\[OAR_GRIDSUB\] Grid reservation id = (\d+)\s*$", process.stdout, re.MULTILINE)
        if mo != None:
            job_id = int(mo.group(1))
        mo = re.search("^\[OAR_GRIDSUB\] SSH KEY : (\S*)\s*$", process.stdout, re.MULTILINE)
        if mo != None:
            ssh_key = mo.group(1)
    if job_id != None:
        return (job_id, ssh_key)
    else:
        return (None, None)
Esempio n. 25
0
def get_oar_job_info(oar_job_id = None, frontend = None,
                     frontend_connection_params = None, timeout = False,
                     nolog_exit_code = False, nolog_timeout = False, nolog_error = False):
    """Return a dict with informations about an oar job.

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    Hash returned may contain these keys:

    - ``start_date``: unix timestamp of job's start date

    - ``walltime``: job's walltime (seconds)

    - ``scheduled_start``: unix timestamp of job's start prediction
      (may change between invocations)

    - ``state``: job state. Possible states: 'Waiting', 'Hold',
      'toLaunch', 'toError', 'toAckReservation', 'Launching',
      'Running', 'Suspended', 'Resuming', 'Finishing', 'Terminated',
      'Error', see table jobs, column state, in oar documentation
      http://oar.imag.fr/sources/2.5/docs/documentation/OAR-DOCUMENTATION-ADMIN/#jobs

    - ``name``: job name

    But no info may be available as long as the job is not scheduled.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if oar_job_id == None:
        if 'OAR_JOB_ID' in os.environ:
            oar_job_id = os.environ['OAR_JOB_ID']
        else:
            raise ValueError("no oar job id given and no OAR_JOB_ID environment variable found")
    process = get_process("oarstat -fj %i" % (oar_job_id,),
                          host = get_frontend_host(frontend),
                          connection_params = make_connection_params(frontend_connection_params,
                                                                     default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.nolog_exit_code = nolog_exit_code
    process.nolog_timeout = nolog_timeout
    process.nolog_error = nolog_error
    process.run()
    job_info = dict()
    start_date_result = re.search("^\s*startTime = (\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)\s*$", process.stdout, re.MULTILINE)
    if start_date_result:
        start_date = oar_date_to_unixts(start_date_result.group(1))
        job_info['start_date'] = start_date
    walltime_result = re.search("^\s*walltime = (\d+:\d?\d:\d?\d)\s*$", process.stdout, re.MULTILINE)
    if walltime_result:
        walltime = oar_duration_to_seconds(walltime_result.group(1))
        job_info['walltime'] = walltime
    scheduled_start_result = re.search("^\s*scheduledStart = (\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)\s*$", process.stdout, re.MULTILINE)
    if scheduled_start_result:
        scheduled_start = oar_date_to_unixts(scheduled_start_result.group(1))
        job_info['scheduled_start'] = scheduled_start
    state_result = re.search("^\s*state = (\w*)\s*$", process.stdout, re.MULTILINE)
    if state_result:
        job_info['state'] = state_result.group(1)
    name_result = re.search("^\s*name = ([ \t\S]*)\s*$", process.stdout, re.MULTILINE)
    if name_result:
        job_info['name'] = name_result.group(1)
    return job_info
Esempio n. 26
0
def get_oar_job_subnets(oar_job_id=None,
                        frontend=None,
                        frontend_connection_params=None,
                        timeout=False):
    """Return a tuple containing an iterable of tuples (IP, MAC) and a dict containing the subnet parameters of the reservation (if any).

    subnet parameters dict has keys: 'ip_prefix', 'broadcast',
    'netmask', 'gateway', 'network', 'dns_hostname', 'dns_ip'.

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if oar_job_id == None:
        if 'OAR_JOB_ID' in os.environ:
            oar_job_id = os.environ['OAR_JOB_ID']
        else:
            raise ValueError(
                "no oar job id given and no OAR_JOB_ID environment variable found"
            )
    countdown = Timer(timeout)
    wait_oar_job_start(oar_job_id, frontend, frontend_connection_params,
                       countdown.remaining())
    # Get ip adresses
    process_ip = get_process(
        "(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && g5k-subnets -i -m -j %(oar_job_id)i"
        % {'oar_job_id': oar_job_id},
        host=get_frontend_host(frontend),
        connection_params=make_connection_params(
            frontend_connection_params, default_frontend_connection_params))
    process_ip.timeout = countdown.remaining()
    process_ip.shell = process_ip.pty = True
    process_ip.run()
    # Get network parameters
    process_net = get_process(
        "(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && g5k-subnets -a -j %(oar_job_id)i"
        % {'oar_job_id': oar_job_id},
        host=get_frontend_host(frontend),
        connection_params=make_connection_params(
            frontend_connection_params, default_frontend_connection_params))
    process_net.timeout = countdown.remaining()
    process_net.shell = process_net.pty = True
    process_net.run()

    if process_net.ok and process_ip.ok:
        subnet_addresses = re.findall("(\S+)\s+(\S+)", process_ip.stdout,
                                      re.MULTILINE)
        process_net_out = process_net.stdout.rstrip().split('\t')
        network_params = dict()
        if len(process_net_out) == 7:
            network_params = {
                "ip_prefix": process_net_out[0],
                "broadcast": process_net_out[1],
                "netmask": process_net_out[2],
                "gateway": process_net_out[3],
                "network": process_net_out[4],
                "dns_hostname": process_net_out[5],
                "dns_ip": process_net_out[6]
            }
        return (subnet_addresses, network_params)
    else:
        raise ProcessesFailed(
            [p for p in [process_net, process_ip] if not p.ok])
Esempio n. 27
0
def get_oar_job_subnets(oar_job_id = None, frontend = None, frontend_connection_params = None, timeout = False):
    """Return a tuple containing an iterable of tuples (IP, MAC) and a dict containing the subnet parameters of the reservation (if any).

    subnet parameters dict has keys: 'ip_prefix', 'broadcast',
    'netmask', 'gateway', 'network', 'dns_hostname', 'dns_ip'.

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if oar_job_id == None:
        if 'OAR_JOB_ID' in os.environ:
            oar_job_id = os.environ['OAR_JOB_ID']
        else:
            raise ValueError("no oar job id given and no OAR_JOB_ID environment variable found")
    countdown = Timer(timeout)
    wait_oar_job_start(oar_job_id, frontend, frontend_connection_params, countdown.remaining())
    # Get ip adresses
    process_ip = get_process(
        "(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && g5k-subnets -i -m -j %(oar_job_id)i" % {'oar_job_id': oar_job_id},
        host = get_frontend_host(frontend),
        connection_params = make_connection_params(
            frontend_connection_params,
            default_frontend_connection_params))
    process_ip.timeout = countdown.remaining()
    process_ip.pty = True
    process_ip.run()
    # Get network parameters
    process_net = get_process(
        "(oarstat -sj %(oar_job_id)i | grep 'Running\|Terminated\|Error') > /dev/null 2>&1 && g5k-subnets -a -j %(oar_job_id)i" % {'oar_job_id': oar_job_id},
        host = get_frontend_host(frontend),
        connection_params = make_connection_params(
            frontend_connection_params,
            default_frontend_connection_params))
    process_net.timeout = countdown.remaining()
    process_net.pty = True
    process_net.run()

    if process_net.ok and process_ip.ok:
        subnet_addresses = re.findall("(\S+)\s+(\S+)", process_ip.stdout, re.MULTILINE)
        process_net_out = process_net.stdout.rstrip().split('\t')
        network_params = dict()
        if len(process_net_out) == 7:
            network_params = {
                "ip_prefix": process_net_out[0],
                "broadcast": process_net_out[1],
                "netmask": process_net_out[2],
                "gateway": process_net_out[3],
                "network": process_net_out[4],
                "dns_hostname": process_net_out[5],
                "dns_ip": process_net_out[6]
                }
        return (subnet_addresses, network_params)
    else:
        raise ProcessesFailed([ p for p in [process_net, process_ip] if not p.ok ])
Esempio n. 28
0
def get_oar_job_info(oar_job_id=None,
                     frontend=None,
                     frontend_connection_params=None,
                     timeout=False,
                     nolog_exit_code=False,
                     nolog_timeout=False,
                     nolog_error=False):
    """Return a dict with informations about an oar job.

    :param oar_job_id: the oar job id. If None given, will try to get
      it from ``OAR_JOB_ID`` environment variable.

    :param frontend: the frontend of the oar job. If None given, use
      default frontend

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    Hash returned may contain these keys:

    - ``start_date``: unix timestamp of job's start date

    - ``walltime``: job's walltime (seconds)

    - ``scheduled_start``: unix timestamp of job's start prediction
      (may change between invocations)

    - ``state``: job state. Possible states: 'Waiting', 'Hold',
      'toLaunch', 'toError', 'toAckReservation', 'Launching',
      'Running', 'Suspended', 'Resuming', 'Finishing', 'Terminated',
      'Error', see table jobs, column state, in oar documentation
      http://oar.imag.fr/sources/2.5/docs/documentation/OAR-DOCUMENTATION-ADMIN/#jobs

    - ``name``: job name

    But no info may be available as long as the job is not scheduled.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if oar_job_id == None:
        if 'OAR_JOB_ID' in os.environ:
            oar_job_id = os.environ['OAR_JOB_ID']
        else:
            raise ValueError(
                "no oar job id given and no OAR_JOB_ID environment variable found"
            )
    process = get_process("oarstat -fj %i" % (oar_job_id, ),
                          host=get_frontend_host(frontend),
                          connection_params=make_connection_params(
                              frontend_connection_params,
                              default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.nolog_exit_code = nolog_exit_code
    process.nolog_timeout = nolog_timeout
    process.nolog_error = nolog_error
    process.run()
    job_info = dict()
    start_date_result = re.search(
        "^\s*startTime = (\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)\s*$",
        process.stdout, re.MULTILINE)
    if start_date_result:
        start_date = oar_date_to_unixts(start_date_result.group(1))
        job_info['start_date'] = start_date
    walltime_result = re.search("^\s*walltime = (\d+:\d?\d:\d?\d)\s*$",
                                process.stdout, re.MULTILINE)
    if walltime_result:
        walltime = oar_duration_to_seconds(walltime_result.group(1))
        job_info['walltime'] = walltime
    scheduled_start_result = re.search(
        "^\s*scheduledStart = (\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)\s*$",
        process.stdout, re.MULTILINE)
    if scheduled_start_result:
        scheduled_start = oar_date_to_unixts(scheduled_start_result.group(1))
        job_info['scheduled_start'] = scheduled_start
    state_result = re.search("^\s*state = (\w*)\s*$", process.stdout,
                             re.MULTILINE)
    if state_result:
        job_info['state'] = state_result.group(1)
    name_result = re.search("^\s*name = ([ \t\S]*)\s*$", process.stdout,
                            re.MULTILINE)
    if name_result:
        job_info['name'] = name_result.group(1)
    return job_info
Esempio n. 29
0
def get_current_oar_jobs(frontends=None,
                         start_between=None,
                         end_between=None,
                         frontend_connection_params=None,
                         timeout=False,
                         abort_on_error=False):
    """Return a list of current active oar job ids.

    The list contains tuples (oarjob id, frontend).

    :param frontends: an iterable of frontends to connect to. A
      frontend with value None means default frontend. If frontends ==
      None, means get current oar jobs only for default frontend.

    :param start_between: a tuple (low, high) of endpoints. Filters
      and returns only jobs whose start date is in between these
      endpoints.

    :param end_between: a tuple (low, high) of endpoints. Filters and
      returns only jobs whose end date is in between these endpoints.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    :param abort_on_error: default False. If True, raises an exception
      on any error. If False, will returned the list of job got, even
      if incomplete (some frontends may have failed to answer).
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if start_between: start_between = [get_unixts(t) for t in start_between]
    if end_between: end_between = [get_unixts(t) for t in end_between]
    processes = []
    if frontends == None:
        frontends = [None]
    for frontend in frontends:
        p = get_process("oarstat -u",
                        host=get_frontend_host(frontend),
                        connection_params=make_connection_params(
                            frontend_connection_params,
                            default_frontend_connection_params))
        p.timeout = timeout
        p.pty = True
        p.frontend = frontend
        processes.append(p)
    oar_job_ids = []
    if len(processes) == 0:
        return oar_job_ids
    for process in processes:
        process.start()
    for process in processes:
        process.wait()
    failed_processes = []
    for process in processes:
        if process.ok:
            jobs = re.findall("^(\d+)\s", process.stdout, re.MULTILINE)
            oar_job_ids.extend([(int(jobid), process.frontend)
                                for jobid in jobs])
        else:
            failed_processes.append(process)
    if len(failed_processes) > 0 and abort_on_error:
        raise ProcessesFailed(failed_processes)
    else:
        if start_between or end_between:
            filtered_job_ids = []
            for jobfrontend in oar_job_ids:
                info = get_oar_job_info(jobfrontend[0],
                                        jobfrontend[1],
                                        frontend_connection_params,
                                        timeout,
                                        nolog_exit_code=True,
                                        nolog_timeout=True,
                                        nolog_error=True)
                if (_date_in_range(info['start_date'], start_between)
                        and _date_in_range(
                            info['start_date'] + info['walltime'],
                            end_between)):
                    filtered_job_ids.append(jobfrontend)
            oar_job_ids = filtered_job_ids
        return oar_job_ids
Esempio n. 30
0
def oarsub(job_specs,
           frontend_connection_params=None,
           timeout=False,
           abort_on_error=False):
    """Submit jobs.

    :param job_specs: iterable of tuples (execo_g5k.oar.OarSubmission,
      frontend) with None for default frontend

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for submitting. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    :param abort_on_error: default False. If True, raises an exception
      on any error. If False, will returned the list of job got, even
      if incomplete (some frontends may have failed to answer).

    Returns a list of tuples (oarjob id, frontend), with frontend ==
    None for default frontend. If submission error, oarjob id ==
    None. The returned list matches, in the same order, the job_specs
    parameter.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    processes = []
    for (spec, frontend) in job_specs:
        oarsub_cmdline = get_oarsub_commandline(spec)
        p = get_process(oarsub_cmdline,
                        host=get_frontend_host(frontend),
                        connection_params=make_connection_params(
                            frontend_connection_params,
                            default_frontend_connection_params))
        p.timeout = timeout
        p.pty = True
        p.frontend = frontend
        processes.append(p)
    oar_job_ids = []
    if len(processes) == 0:
        return oar_job_ids
    for process in processes:
        process.start()
    for process in processes:
        process.wait()
    failed_processes = []
    for process in processes:
        job_id = None
        if process.ok:
            mo = re.search("^OAR_JOB_ID=(\d+)\s*$", process.stdout,
                           re.MULTILINE)
            if mo != None:
                job_id = int(mo.group(1))
        if job_id == None:
            failed_processes.append(process)
        oar_job_ids.append((job_id, process.frontend))
    if len(failed_processes) > 0 and abort_on_error:
        raise ProcessesFailed(failed_processes)
    else:
        return oar_job_ids
Esempio n. 31
0
def get_rewritten_host_address(host_addr, connection_params):
    """Based on given connection_params or default_connection_params, return a rewritten host address."""
    func = make_connection_params(connection_params).get('host_rewrite_func')
    if func: return func(host_addr)
    else: return host_addr
Esempio n. 32
0
def get_rewritten_host_address(host_addr, connection_params):
    """Based on given connection_params or default_connection_params, return a rewritten host address."""
    func = make_connection_params(connection_params).get('host_rewrite_func')
    if func: return func(host_addr)
    else: return host_addr
Esempio n. 33
0
def oargridsub(job_specs,
               reservation_date=None,
               walltime=None,
               job_type=None,
               queue=None,
               directory=None,
               additional_options=None,
               frontend_connection_params=None,
               timeout=False):
    """Submit oargrid jobs.

    :param job_specs: iterable of tuples (OarSubmission,
      clusteralias). Reservation date, walltime, queue, directory,
      project, additional_options, command of the OarSubmission are
      ignored.

    :param reservation_date: grid job reservation date. Default: now.

    :param walltime: grid job walltime.

    :param job_type: type of job for all clusters: deploy, besteffort,
      cosystem, checkpoint, timesharing.

    :param queue: oar queue to use.

    :param directory: directory where the reservation will be
      launched.

    :param additional_options: passed directly to oargridsub on the
      command line.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    Returns a tuple (oargrid_job_id, ssh_key), or (None, None) if
    error.

    Note that, as oargrid does not handle correctly quoting sql
    clauses enclosed inside braces, this function tries to
    automatically overcome this limitation by adding some, with the
    right escaping (backslashes). Also, note that oargrid's command
    line parser does not handle correctly commas in sql clauses
    enclosed inside braces, as it considers it as a rdef
    separator. This prevents, for example, using comma separated list
    values for ``NOT IN`` clauses.
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    oargridsub_cmdline = get_oargridsub_commandline(job_specs,
                                                    reservation_date, walltime,
                                                    job_type, queue, directory,
                                                    additional_options)
    process = get_process(oargridsub_cmdline,
                          host=get_frontend_host(),
                          connection_params=make_connection_params(
                              frontend_connection_params,
                              default_frontend_connection_params))
    process.timeout = timeout
    process.pty = True
    process.run()
    job_id = None
    ssh_key = None
    if process.ok:
        mo = re.search("^\[OAR_GRIDSUB\] Grid reservation id = (\d+)\s*$",
                       process.stdout, re.MULTILINE)
        if mo != None:
            job_id = int(mo.group(1))
        mo = re.search("^\[OAR_GRIDSUB\] SSH KEY : (\S*)\s*$", process.stdout,
                       re.MULTILINE)
        if mo != None:
            ssh_key = mo.group(1)
    if job_id != None:
        return (job_id, ssh_key)
    else:
        return (None, None)
Esempio n. 34
0
def get_current_oar_jobs(frontends = None,
                         start_between = None,
                         end_between = None,
                         frontend_connection_params = None,
                         timeout = False,
                         abort_on_error = False):
    """Return a list of current active oar job ids.

    The list contains tuples (oarjob id, frontend).

    :param frontends: an iterable of frontends to connect to. A
      frontend with value None means default frontend. If frontends ==
      None, means get current oar jobs only for default frontend.

    :param start_between: a tuple (low, high) of endpoints. Filters
      and returns only jobs whose start date is in between these
      endpoints.

    :param end_between: a tuple (low, high) of endpoints. Filters and
      returns only jobs whose end date is in between these endpoints.

    :param frontend_connection_params: connection params for connecting
      to frontends if needed. Values override those in
      `execo_g5k.config.default_frontend_connection_params`.

    :param timeout: timeout for retrieving. Default is False, which
      means use
      ``execo_g5k.config.g5k_configuration['default_timeout']``. None
      means no timeout.

    :param abort_on_error: default False. If True, raises an exception
      on any error. If False, will returned the list of job got, even
      if incomplete (some frontends may have failed to answer).
    """
    if isinstance(timeout, bool) and timeout == False:
        timeout = g5k_configuration.get('default_timeout')
    if start_between: start_between = [ get_unixts(t) for t in start_between ]
    if end_between: end_between = [ get_unixts(t) for t in end_between ]
    processes = []
    if frontends == None:
        frontends = [ None ]
    for frontend in frontends:
        p = get_process("oarstat -u",
                        host = get_frontend_host(frontend),
                        connection_params = make_connection_params(frontend_connection_params,
                                                                   default_frontend_connection_params))
        p.timeout = timeout
        p.pty = True
        p.frontend = frontend
        processes.append(p)
    oar_job_ids = []
    if len(processes) == 0:
        return oar_job_ids
    for process in processes: process.start()
    for process in processes: process.wait()
    failed_processes = []
    for process in processes:
        if process.ok:
            jobs = re.findall("^(\d+)\s", process.stdout, re.MULTILINE)
            oar_job_ids.extend([ (int(jobid), process.frontend) for jobid in jobs ])
        else:
            failed_processes.append(process)
    if len(failed_processes) > 0 and abort_on_error:
        raise ProcessesFailed(failed_processes)
    else:
        if start_between or end_between:
            filtered_job_ids = []
            for jobfrontend in oar_job_ids:
                info = get_oar_job_info(jobfrontend[0], jobfrontend[1],
                                        frontend_connection_params, timeout,
                                        nolog_exit_code = True, nolog_timeout = True,
                                        nolog_error = True)
                if (_date_in_range(info['start_date'], start_between)
                    and _date_in_range(info['start_date'] + info['walltime'], end_between)):
                    filtered_job_ids.append(jobfrontend)
            oar_job_ids = filtered_job_ids
        return oar_job_ids