Example #1
0
    def _configure(self):

        self.launch_command = self._find_executable([
            'mpirun',  # General case
            'mpirun_rsh',  # Gordon @ SDSC
            'mpirun-mpich-mp',  # Mac OSX MacPorts
            'mpirun-openmpi-mp'  # Mac OSX MacPorts
        ])

        # alas, the way to transplant env variables to the target node differs
        # per mpi(run) version...
        out, err, ret = ru.sh_callout('%s -v' % self.launch_command)

        if ret != 0:
            out, err, ret = ru.sh_callout('%s -info' % self.launch_command)

        self.launch_version = ''
        for line in out.splitlines():
            if 'HYDRA build details:' in line:
                self.launch_version += 'hydra-'
            if 'version:' in line.lower():
                self.launch_version += line.split(':')[1].strip().lower()
                break

        if not self.launch_version:
            self.launch_version = 'unknown'
Example #2
0
def test_upload_and_download(config):

    cfg = config()
    tmp_dir = '/tmp/'
    replica_url = cfg.replica_url

    with open(TEMP_FILENAME, "wb") as f:
        f.write("x" * (FILE_SIZE * pow(2, 20)))

    # clear old file
    out, err, ret = ru.sh_callout(["irm", TEMP_FILENAME])

    _ = rs.replica.LogicalDirectory(replica_url)
    f = rs.replica.LogicalFile(replica_url + TEMP_FILENAME)
    f.upload(tmp_dir + TEMP_FILENAME,
             "irods:///path/is/ignored/?resource=" + IRODS_RESOURCE)

    # myfile.upload(tmp_dir + TEMP_FILENAME, \
    #                   "irods:///this/path/is/ignored")

    os.remove(tmp_dir + TEMP_FILENAME)

    myfile = rs.replica.LogicalFile(replica_url + TEMP_FILENAME)
    myfile.download(TEMP_FILENAME)

    os.remove(tmp_dir + TEMP_FILENAME)
Example #3
0
def test_sh_callout():

    out, err, ret = ru.sh_callout('echo TRUE')
    assert(out == 'TRUE\n'),  out
    assert(err == ''),        err
    assert(ret == 0),         ret

    out, err, ret = ru.sh_callout('false')
    assert(out == ''),        out
    assert(err == ''),        err
    assert(ret == 1),         ret

    out, err, ret = ru.sh_callout('echo FALSE 1>&2; exit 2', shell=True)
    assert(out == ''),        out
    assert(err == 'FALSE\n'), err
    assert(ret == 2),         ret
Example #4
0
def test_upload_and_download(config):

    cfg = config()

    replica_url = cfg.replica_url

    with open(TEMP_FILENAME, "wb") as f:
        f.write("x" * (FILE_SIZE * pow(2, 20)))

    # clear old file
    out, err, ret = ru.sh_callout(["irm", TEMP_FILENAME])

    d = rs.replica.LogicalDirectory(replica_url)
    f = rs.replica.LogicalFile(replica_url + TEMP_FILENAME)
    f.upload(home_dir + TEMP_FILENAME, \
                      "irods:///this/path/is/ignored/?resource="+IRODS_RESOURCE)

    #myfile.upload(home_dir + TEMP_FILENAME, \
    #                  "irods:///this/path/is/ignored")

    print "Deleting file locally : %s" % (home_dir + TEMP_FILENAME)
    os.remove(home_dir + TEMP_FILENAME)

    print "Downloading logical file %s to current/default directory" % \
        (replica_url + TEMP_FILENAME)
    myfile.download(TEMP_FILENAME)

    print "Deleting downloaded file locally : %s" % (home_dir + TEMP_FILENAME)
    os.remove(home_dir + TEMP_FILENAME)
Example #5
0
    def _get_mpi_info(self, exe):
        '''
        returns version and flavor of MPI version.
        '''

        if not exe:
            raise ValueError('no executable found')

        version = None
        flavor = self.MPI_FLAVOR_UNKNOWN

        out, _, ret = ru.sh_callout('%s -v' % exe)

        if ret:
            out, _, ret = ru.sh_callout('%s --version' % exe)

        if ret:
            out, _, ret = ru.sh_callout('%s -info' % exe)

        if not ret:
            for line in out.splitlines():
                if 'hydra build details:' in line.lower():
                    version = line.split(':', 1)[1].strip()
                    flavor = self.MPI_FLAVOR_HYDRA
                    break

                if 'mvapich2' in line.lower():
                    version = line
                    flavor = self.MPI_FLAVOR_HYDRA
                    break

                if 'version:' in line.lower():
                    version = line.split(':', 1)[1].strip()
                    flavor = self.MPI_FLAVOR_OMPI
                    break

                if '(open mpi)' in line.lower():
                    version = line.split(')', 1)[1].strip()
                    flavor = self.MPI_FLAVOR_OMPI
                    break

        if not flavor:
            raise RuntimeError('cannot identify MPI flavor [%s]' % exe)

        self._log.debug('mpi version: %s [%s]', version, flavor)

        return version, flavor
Example #6
0
    def start_components(self, cfg=None):
        '''
        check if any components are defined under `cfg['components']`
        and start them
        '''

        self._prof.prof('start_components_start', uid=self._uid)

        timeout = self._cfg.heartbeat.timeout

        if cfg is None:
            cfg = self._cfg

        # we pass a copy of the complete session config to all components, but
        # merge it into the component specific config settings (no overwrite),
        # and then remove the `bridges` and `components` sections
        #
        scfg = ru.Config(cfg=cfg)
        if 'bridges' in scfg: del (scfg['bridges'])
        if 'components' in scfg: del (scfg['components'])

        for cname, ccfg in cfg.get('components', {}).items():

            for _ in range(ccfg.get('count', 1)):

                ccfg.uid = ru.generate_id(cname, ns=self._sid)
                ccfg.cmgr = self.uid
                ccfg.kind = cname
                ccfg.sid = cfg.sid
                ccfg.base = cfg.base
                ccfg.path = cfg.path
                ccfg.heartbeat = cfg.heartbeat

                ccfg.merge(scfg, policy=ru.PRESERVE, log=self._log)

                fname = '%s/%s.json' % (cfg.path, ccfg.uid)
                ccfg.write(fname)

                self._log.info('create  component %s [%s]', cname, ccfg.uid)

                out, err, ret = ru.sh_callout('radical-pilot-component %s' %
                                              fname)
                self._log.debug('out: %s', out)
                self._log.debug('err: %s', err)
                if ret:
                    raise RuntimeError('bridge startup failed')

                self._uids.append(ccfg.uid)
                self._log.info('created component %s [%s]', cname, ccfg.uid)

        # all components should start now, for their heartbeats
        # to appear.
        failed = self._hb.wait_startup(self._uids, timeout=timeout * 10)
        if failed:
            raise RuntimeError('could not start all components %s' % failed)

        self._prof.prof('start_components_stop', uid=self._uid)
Example #7
0
    def _get_mpi_info(self, exe):
        '''
        returns version and flavor of MPI version.
        '''

        version = None
        flavor  = self.MPI_FLAVOR_UNKNOWN

        out, err, ret = ru.sh_callout('%s -v' % exe)

        if ret:
            out, err, ret = ru.sh_callout('%s --version' % exe)

        if ret:
            out, err, ret = ru.sh_callout('%s -info' % exe)

        if not ret:
            for line in out.splitlines():
                if 'hydra build details:' in line.lower():
                    version = line.split(':', 1)[1].strip()
                    flavor  = self.MPI_FLAVOR_HYDRA
                    break

                if 'mvapich2' in line.lower():
                    version = line
                    flavor  = self.MPI_FLAVOR_HYDRA
                    break

                if 'version:' in line.lower():
                    version = line.split(':', 1)[1].strip()
                    flavor  = self.MPI_FLAVOR_OMPI
                    break

                if '(open mpi):' in line.lower():
                    version = line.split(')', 1)[1].strip()
                    flavor  = self.MPI_FLAVOR_OMPI
                    break

        if not flavor:
            raise RuntimeError('cannot identify MPI flavor [%s]' % exe)

        self._log.debug('mpi version: %s [%s]', version, flavor)

        return version, flavor
Example #8
0
    def lrms_shutdown_hook(cls, name, cfg, lrms, lm_info, logger, profiler):
        """
        This hook is symmetric to the config hook above, and is called during
        shutdown sequence, for the sake of freeing allocated resources.
        """

        if 'dvm_uri' in lm_info:
            try:
                logger.info('terminating dvm')
                orterun = ru.which('orterun')
                if not orterun:
                    raise Exception("Couldn't find orterun")
                ru.sh_callout('%s --hnp %s --terminate' %
                              (orterun, lm_info['dvm_uri']))
                profiler.prof(event='orte_dvm_stop', uid=cfg['pilot_id'])
            except Exception as e:
                # use the same event name as for runtime failures - those are
                # not distinguishable at the moment from termination failures
                profiler.prof(event='orte_dvm_fail', uid=cfg['pilot_id'])
                logger.exception('dvm termination failed')
Example #9
0
    def _configure(self):

        self.launch_command = ru.which('srun')

        out, err, ret = ru.sh_callout('%s -V' % self.launch_command)
        if ret:
            raise RuntimeError('cannot use srun [%s] [%s]' % (out, err))

        self._version = out.split()[-1]
        self._log.debug('using srun from %s [%s]', self.launch_command,
                        self._version)
Example #10
0
def _cmd(cmd):

    _, _, ret = ru.sh_callout(cmd)

    if ret == 0:
        return True
    else:
        # print 'cmd: %s' % cmd
        # print 'out: %s' % out
        # print 'err: %s' % err
        return False
Example #11
0
def test_gtod():
    '''
    test
    '''

    out, _, _ = ru.sh_callout('radical-gtod')
    t1 = float(out)
    t2 = rg.gtod()
    t3 = time.time()

    assert (t3 - 0.1 < t1 < t3 + 0.1)
    assert (t3 - 0.1 < t2 < t3 + 0.1)
    def lrms_shutdown_hook(cls, name, cfg, lrms, lm_info, logger, profiler):
        """
        This hook is symmetric to the config hook above, and is called during
        shutdown sequence, for the sake of freeing allocated resources.
        """

        if 'dvm_uri' in lm_info:
            try:
                logger.info('terminating dvm')
                orterun = ru.which('orterun')
                if not orterun:
                    raise Exception("Couldn't find orterun")
                ru.sh_callout('%s --hnp %s --terminate' 
                             % (orterun, lm_info['dvm_uri']))
                profiler.prof(event='orte_dvm_stop', uid=cfg['pilot_id'])

            except Exception as e:
                # use the same event name as for runtime failures - those are
                # not distinguishable at the moment from termination failures
                profiler.prof(event='orte_dvm_fail', uid=cfg['pilot_id'], msg=e)
                logger.exception('dvm termination failed')
Example #13
0
    def stage_output(self):

        if os.path.isfile('./staging_output.txt'):

            if not os.path.isfile('./staging_output.tgz'):

                cmd = 'tar zcvf staging_output.tgz $(cat staging_output.txt)'
                out, err, ret = ru.sh_callout(cmd, shell=True)

                if ret:
                    self._log.debug('out: %s', out)
                    self._log.debug('err: %s', err)
                    self._log.error('output tarring failed: %s', cmd)
Example #14
0
    def _configure(self):

        # we only support Cobalt on Theta right now, and since we know that
        # Theta is a Cray, we know that aprun is available.  Alas, aprun
        # provides the only way (we could find so far) to determing the list of
        # nodes we have available (`COBALT_NODELIST` seems broken).  So we run
        # `aprun` with the rank of nodes we *think* we have, and with `-N 1` to
        # place one rank per node, and run `hostname` - that gives is the list
        # of hostnames.  The number of nodes we receive from `$COBALT_PARTSIZE`.

        n_nodes = int(os.environ['COBALT_PARTSIZE'])
        out, _, _ = ru.sh_callout('aprun -n %d -N 1 hostname' % n_nodes)
        node_list = out.split()
        assert (len(node_list) == n_nodes), node_list

        # we also want    to learn the core count per node
        cmd = 'cat /proc/cpuinfo | grep processor | wc -l'
        out, _, _ = ru.sh_callout('aprun -n %d -N 1 %s' % (n_nodes, cmd))
        core_counts = list(set([int(x) for x in out.split()]))
        assert (len(core_counts) == 1), core_counts
        cores_per_node = core_counts[0]

        gpus_per_node = self._cfg.get('gpus_per_node', 0)
        lfs_per_node = {
            'path': ru.expand_env(self._cfg.get('lfs_path_per_node')),
            'size': self._cfg.get('lfs_size_per_node', 0)
        }
        mem_per_node = self._cfg.get('mem_per_node', 0)

        self._log.info("Found unique core counts: %s", cores_per_node)

        # node names are unique, so can serve as node uids
        self.node_list = [[node, node] for node in node_list]
        self.cores_per_node = cores_per_node
        self.gpus_per_node = gpus_per_node
        self.lfs_per_node = lfs_per_node
        self.mem_per_node = mem_per_node
Example #15
0
    def _shell(self, data):
        '''
        We expect data to have a single entry: 'cmd', containing the command
        line to be called as string.
        '''

        try:
            out, err, ret = ru.sh_callout(data['cmd'])

        except Exception as e:
            self._log.exception('_shell failed: %s' % (data))
            out = None
            err = 'shell failed: %s' % e
            ret = 1

        return out, err, ret
Example #16
0
    def start_bridges(self, cfg=None):
        '''
        check if any bridges are defined under `cfg['bridges']` and start them
        '''

        self._prof.prof('start_bridges_start', uid=self._uid)

        timeout = self._cfg.heartbeat.timeout

        if cfg is None:
            cfg = self._cfg

        for bname, bcfg in cfg.get('bridges', {}).items():

            bcfg.uid = bname
            bcfg.channel = bname
            bcfg.cmgr = self.uid
            bcfg.sid = cfg.sid
            bcfg.path = cfg.path
            bcfg.heartbeat = cfg.heartbeat

            fname = '%s/%s.json' % (cfg.path, bcfg.uid)
            bcfg.write(fname)

            self._log.info('create  bridge %s [%s]', bname, bcfg.uid)

            out, err, ret = ru.sh_callout('radical-pilot-bridge %s' % fname)
            self._log.debug('bridge startup out: %s', out)
            self._log.debug('bridge startup err: %s', err)
            if ret:
                raise RuntimeError('bridge startup failed')

            self._uids.append(bcfg.uid)
            self._log.info('created bridge %s [%s]', bname, bcfg.uid)

        # all bridges should start now, for their heartbeats
        # to appear.
    # self._log.debug('wait   for %s', self._uids)
        failed = self._hb.wait_startup(self._uids, timeout=timeout)
        # self._log.debug('waited for %s: %s', self._uids, failed)
        if failed:
            raise RuntimeError('could not start all bridges %s' % failed)

        self._prof.prof('start_bridges_stop', uid=self._uid)
Example #17
0
        def _watch_flux(flux_env):

            logger.info('=== starting flux watcher')

            for k, v in flux_env.items():
                os.environ[k] = v

            ret = None
            while not ret:

                out, err, ret = ru.sh_callout('flux ping -c 1 all')
                logger.debug('=== flux watcher out: %s', out)

                if ret:
                    logger.error('=== flux watcher err: %s', err)
                    break

                time.sleep(0.1)

            logger.info('flux stopped?')
Example #18
0
    def lrms_config_hook(cls, name, cfg, lrms, logger, profiler):
        """
        FIXME: this config hook will manipulate the LRMS nodelist.  Not a nice
               thing to do, but hey... :P
               What really should be happening is that the LRMS digs information
               on node reservation out of the config and configures the node
               list accordingly.  This config hook should be limited to starting
               the DVM.
        """

        dvm_command = ru.which('orte-dvm')
        if not dvm_command:
            raise Exception("Couldn't find orte-dvm")

        # Now that we found the orte-dvm, get ORTE version
        out, err, ret = ru.sh_callout('orte-info | grep "Open RTE"',
                                      shell=True)
        orte_info = dict()
        for line in out.split('\n'):

            line = line.strip()
            if not line:
                continue

            key, val = line.split(':', 1)
            if 'Open RTE' == key.strip():
                orte_info['version'] = val.strip()
            elif 'Open RTE repo revision' == key.strip():
                orte_info['version_detail'] = val.strip()

        assert (orte_info.get('version'))
        logger.info("Found Open RTE: %s / %s", orte_info['version'],
                    orte_info.get('version_detail'))

        # Use (g)stdbuf to disable buffering.
        # We need this to get the "DVM ready",
        # without waiting for orte-dvm to complete.
        # The command seems to be generally available on our Cray's,
        # if not, we can code some home-coooked pty stuff.
        stdbuf_cmd = ru.which(['stdbuf', 'gstdbuf'])
        if not stdbuf_cmd:
            raise Exception("Couldn't find (g)stdbuf")
        stdbuf_arg = "-oL"

        # Base command = (g)stdbuf <args> + orte-dvm + debug_args
        dvm_args = [stdbuf_cmd, stdbuf_arg, dvm_command]

        # Additional (debug) arguments to orte-dvm
        if os.environ.get('RADICAL_PILOT_ORTE_VERBOSE'):
            debug_strings = [
                '--debug-devel', '--mca odls_base_verbose 100',
                '--mca rml_base_verbose 100'
            ]
        else:
            debug_strings = []

        # Split up the debug strings into args and add them to the dvm_args
        [dvm_args.extend(ds.split()) for ds in debug_strings]

        vm_size = len(lrms.node_list)
        logger.info("Start DVM on %d nodes ['%s']", vm_size,
                    ' '.join(dvm_args))
        profiler.prof(event='orte_dvm_start', uid=cfg['pilot_id'])

        dvm_uri = None
        dvm_process = mp.Popen(dvm_args, stdout=mp.PIPE, stderr=mp.STDOUT)

        while True:

            line = dvm_process.stdout.readline().strip()

            if line.startswith('VMURI:'):

                if len(line.split(' ')) != 2:
                    raise Exception("Unknown VMURI format: %s" % line)

                label, dvm_uri = line.split(' ', 1)

                if label != 'VMURI:':
                    raise Exception("Unknown VMURI format: %s" % line)

                logger.info("ORTE DVM URI: %s" % dvm_uri)

            elif line == 'DVM ready':

                if not dvm_uri:
                    raise Exception("VMURI not found!")

                logger.info("ORTE DVM startup successful!")
                profiler.prof(event='orte_dvm_ok', uid=cfg['pilot_id'])
                break

            else:

                # Check if the process is still around,
                # and log output in debug mode.
                if dvm_process.poll() is None:
                    logger.debug("ORTE: %s", line)
                else:
                    # Process is gone: fatal!
                    raise Exception("ORTE DVM process disappeared")
                    profiler.prof(event='orte_dvm_fail', uid=cfg['pilot_id'])

        # ----------------------------------------------------------------------
        def _watch_dvm():

            logger.info('starting DVM watcher')

            retval = dvm_process.poll()
            while retval is None:
                line = dvm_process.stdout.readline().strip()
                if line:
                    logger.debug('dvm output: %s', line)
                else:
                    time.sleep(1.0)

            if retval != 0:
                # send a kill signal to the main thread.
                # We know that Python and threading are likely not to play well
                # with signals - but this is an exceptional case, and not part
                # of the stadard termination sequence.  If the signal is
                # swallowed, the next `orte-submit` call will trigger
                # termination anyway.
                os.kill(os.getpid())

            logger.info('DVM stopped (%d)' % dvm_process.returncode)

        # ----------------------------------------------------------------------

        dvm_watcher = ru.Thread(target=_watch_dvm, name="DVMWatcher")
        dvm_watcher.start()

        lm_info = {'dvm_uri': dvm_uri, 'version_info': {name: orte_info}}

        # we need to inform the actual LM instance about the DVM URI.  So we
        # pass it back to the LRMS which will keep it in an 'lm_info', which
        # will then be passed as part of the slots via the scheduler
        return lm_info
Example #19
0
def get_backfill(partition=None, max_cores=None, max_walltime=None):
    '''
    Return a set of [partition, cores walltime] tuples which fit into the
    current backfill.  By default we split the backfillable cores into chunks of
    10 nodes (where one node is used for the agent), and in walltimes of at most
    60 min.
    '''

    if max_cores    is None: max_cores    = 160
    if max_walltime is None: max_walltime =  60


    # --------------------------------------------------------------------------
    def _duration_to_walltime(timestr):
        '''
        convert a timestring of the forms:

            00:00:00:00  days:hours:min:sec
               00:00:00       hours:min:sec
                  00:00             min:sec
                     00                 sec
               INFINITY

        into a number of minutes. 

        Any result larger than `max_walltime` is truncated to `max_walltime`.
        `INFINITY` is also mapped to `max_walltime`.
        '''
        if timestr == 'INFINITY':
            return max_walltime

        walltime = 0.0
        elems    = timestr.split(':')
        if len(elems) >= 4:  walltime += 24 * 60 * int(elems[-4])
        if len(elems) >= 3:  walltime +=      60 * int(elems[-3])
        if len(elems) >= 2:  walltime +=           int(elems[-2])
        if len(elems) >= 1:  walltime +=           int(elems[-1]) / 60

        return min(walltime, max_walltime)
    # --------------------------------------------------------------------------


    if partition:
        part = '-p %s' % partition
    else:
        part = ''

    out, err, ret = ru.sh_callout('showbf --blocking %s' % part)

    if err:
        raise RuntimeError('showbf failed [%s]: %s' % (ret, err))

    ret = list()
    for line in out.splitlines():
        part, cores, nodes, duration, start_offset, start_date = line.split()

        if  part.startswith('-') or \
            part == 'Partition':
            continue

        cores    = int(cores)
        walltime = int(_duration_to_walltime(duration))

        while cores > max_cores:
            cores -= max_cores
            ret.append([part, max_cores, walltime])

        if cores:
            ret.append([part, cores, walltime])

    return ret
Example #20
0
def _lsfscript_generator(url, logger, jd, ppn, lsf_version, queue):
    """
    generates an LSF script from a SAGA job description
    """

    lsf_bsubs = ''
    command = ''
    env_string = ''

    if jd.executable: command += "%s " % (jd.executable)
    if jd.arguments: command += ' '.join(jd.arguments)

    if jd.queue and queue: lsf_bsubs += "#BSUB -q %s \n" % queue
    elif jd.queue and not queue: lsf_bsubs += "#BSUB -q %s \n" % jd.queue
    elif not jd.queue and queue: lsf_bsubs += "#BSUB -q %s \n" % queue

    if jd.name: lsf_bsubs += "#BSUB -J %s \n" % jd.name
    if jd.job_contact: lsf_bsubs += "#BSUB -u %s \n" % jd.job_contact
    if jd.working_directory:        lsf_bsubs += "#BSUB -cwd %s \n" \
                                %  jd.working_directory
    if jd.wall_time_limit:        lsf_bsubs += "#BSUB -W %s:%s \n" \
                                % (jd.wall_time_limit / 60,
                                   jd.wall_time_limit % 60)

    # if working directory is set, we want stdout to end up in the
    # working directory as well, unless it containes a specific
    # path name - otherwise we pass `output` as is.
    if jd.output:
        if os.path.isabs(jd.output): path = ''
        elif jd.working_directory: path = '%s/' % jd.working_directory
        else: path = ''
        lsf_bsubs += "#BSUB -o %s%s \n" % (path, jd.output)

    # same holds for stderr
    if jd.error:
        if os.path.isabs(jd.error): path = ''
        elif jd.working_directory: path = '%s/' % jd.working_directory
        else: path = ''
        lsf_bsubs += "#BSUB -e %s%s \n" % (path, jd.error)

    env_string += "export RADICAL_SAGA_SMT=%d" % SMT
    if jd.environment:
        for k, v in jd.environment.iteritems():
            env_string += " %s=%s" % (k, v)

    if jd.project and ':' in jd.project:
        account, reservation = jd.project.split(':', 1)
        lsf_bsubs += "#BSUB -P %s \n" % account
        lsf_bsubs += "#BSUB -U %s \n" % reservation

    elif jd.project:
        lsf_bsubs += "#BSUB -P %s \n" % jd.project

    # Request enough nodes to cater for the number of gpus and cores requested
    if not jd.total_cpu_count: total_cpu_count = 1
    else: total_cpu_count = jd.total_cpu_count

    if not jd.total_gpu_count: total_gpu_count = 1
    else: total_gpu_count = jd.total_gpu_count

    hostname = url.host

    if not hostname or 'localhost' in hostname:
        out, _, ret = ru.sh_callout('hostname -f')
        if ret: hostname = os.environ.get('HOSTNAME', '')
        else: hostname = out.strip()

    if not hostname:
        raise RuntimeError('cannot determine target host f or %s' % url)

    if 'summitdev' in hostname: cpn = 20 * SMT
    elif 'summit' in hostname: cpn = 42 * SMT
    else: raise ValueError('LSF host (%s) not yet supported' % hostname)

    if 'summitdev' in hostname: gpn = 4
    elif 'summit' in hostname: gpn = 6

    cpu_nodes = int(total_cpu_count / cpn)
    if total_cpu_count > (cpu_nodes * cpn):
        cpu_nodes += 1

    gpu_nodes = int(total_gpu_count / gpn)
    if total_gpu_count > (gpu_nodes * gpn):
        gpu_nodes += 1

    nodes = max(cpu_nodes, gpu_nodes)

    lsf_bsubs += "#BSUB -nnodes %s \n" % str(nodes)
    lsf_bsubs += "#BSUB -alloc_flags 'gpumps smt%d' \n" % SMT

    # escape double quotes and dollar signs, otherwise 'echo |'
    # further down won't work
    # only escape '$' in args and exe. not in the bsubs
    command = command.replace('$', '\\$')
    lsfscript = "\n#!/bin/bash \n%s\n%s\n%s" % (lsf_bsubs, env_string, command)
    lsfscript = lsfscript.replace('"', '\\"')

    return lsfscript
Example #21
0
    def work(self, units):

        if not isinstance(units, list):
            units = [units]

        self.advance(units, rps.UMGR_STAGING_INPUT, publish=True, push=False)

        # we first filter out any units which don't need any input staging, and
        # advance them again as a bulk.  We work over the others one by one, and
        # advance them individually, to avoid stalling from slow staging ops.

        no_staging_units = list()
        staging_units = list()

        for unit in units:

            # no matter if we perform any staging or not, we will push the full
            # unit info to the DB on the next advance, and will pass control to
            # the agent.
            unit['$all'] = True
            unit['control'] = 'agent_pending'

            # check if we have any staging directives to be enacted in this
            # component
            actionables = list()
            for sd in unit['description'].get('input_staging', []):
                if sd['action'] in [rpc.TRANSFER, rpc.TARBALL]:
                    actionables.append(sd)

            if actionables:
                staging_units.append([unit, actionables])
            else:
                no_staging_units.append(unit)

        # Optimization: if we obtained a large bulk of units, we at this point
        # attempt a bulk mkdir for the unit sandboxes, to free the agent of
        # performing that operation.  That implies that the agent needs to check
        # sandbox existence before attempting to create them now.
        #
        # Note that this relies on the umgr scheduler to assigning the sandbox
        # to the unit.
        #
        # Note further that we need to make sure that all units are actually
        # pointing into the same target file system, so we need to cluster by
        # filesystem before checking the bulk size.  For simplicity we actually
        # cluster by pilot ID, which is sub-optimal for unit bulks which go to
        # different pilots on the same resource (think OSG).
        #
        # Note further that we skip the bulk-op for all units for which we
        # actually need to stage data, since the mkdir will then implicitly be
        # done anyways.
        #
        # Caveat: we can actually only (reasonably) do this if we know some
        # details about the pilot, because otherwise we'd have to much guessing
        # to do about the pilot configuration (sandbox, access schema, etc), so
        # we only attempt this optimization for units scheduled to pilots for
        # which we learned those details.
        units_by_pid = dict()
        for unit in no_staging_units:
            sbox = unit['unit_sandbox']
            pid = unit['pilot']
            if pid not in units_by_pid:
                units_by_pid[pid] = list()
            units_by_pid[pid].append(sbox)

        # now trigger the bulk mkdir for all filesystems which have more than
        # a certain units tohandle in this bulk:
        for pid in units_by_pid:

            with self._pilots_lock:
                pilot = self._pilots.get(pid)

            if not pilot:
                # we don't feel inclined to optimize for unknown pilots
                self._log.debug('pid unknown - skip optimizion', pid)
                continue

            session_sbox = self._session._get_session_sandbox(pilot)
            unit_sboxes = units_by_pid[pid]

            if len(unit_sboxes) >= UNIT_BULK_MKDIR_THRESHOLD:

                # no matter the bulk mechanism, we need a SAGA handle to the
                # remote FS
                sbox_fs = ru.Url(session_sbox)  # deep copy
                sbox_fs.path = '/'
                sbox_fs_str = str(sbox_fs)
                if sbox_fs_str not in self._fs_cache:
                    self._fs_cache[sbox_fs_str] = rs.filesystem.Directory(
                        sbox_fs, session=self._session)
                saga_dir = self._fs_cache[sbox_fs_str]

                # we have two options for a bulk mkdir:
                # 1) ask SAGA to create the sandboxes in a bulk op
                # 2) create a tarball with all unit sandboxes, push it over, and
                #    untar it (one untar op then creates all dirs).  We implement
                #    both
                if UNIT_BULK_MKDIR_MECHANISM == 'saga':

                    tc = rs.task.Container()
                    for sbox in unit_sboxes:
                        tc.add(saga_dir.make_dir(sbox, ttype=rs.TASK))
                    tc.run()
                    tc.wait()

                elif UNIT_BULK_MKDIR_MECHANISM == 'tar':

                    tmp_path = tempfile.mkdtemp(prefix='rp_agent_tar_dir')
                    tmp_dir = os.path.abspath(tmp_path)
                    tar_name = '%s.%s.tgz' % (self._session.uid, self.uid)
                    tar_tgt = '%s/%s' % (tmp_dir, tar_name)
                    tar_url = ru.Url('file://localhost/%s' % tar_tgt)

                    for sbox in unit_sboxes:
                        os.makedirs('%s/%s' % (tmp_dir, ru.Url(sbox).path))

                    cmd = "cd %s && tar zchf %s *" % (tmp_dir, tar_tgt)
                    out, err, ret = ru.sh_callout(cmd, shell=True)

                    self._log.debug('tar : %s', cmd)
                    self._log.debug('tar : %s\n---\n%s\n---\n%s', out, err,
                                    ret)

                    if ret:
                        raise RuntimeError('failed callout %s: %s' %
                                           (cmd, err))

                    tar_rem_path = "%s/%s" % (str(session_sbox), tar_name)

                    self._log.debug('sbox: %s [%s]', session_sbox,
                                    type(session_sbox))
                    self._log.debug('copy: %s -> %s', tar_url, tar_rem_path)
                    saga_dir.copy(tar_url,
                                  tar_rem_path,
                                  flags=rs.filesystem.CREATE_PARENTS)

                    # ru.sh_callout('rm -r %s' % tmp_path)

                    # get a job service handle to the target resource and run
                    # the untar command.  Use the hop to skip the batch system
                    js_url = pilot['js_hop']
                    self._log.debug('js  : %s', js_url)

                    if js_url in self._js_cache:
                        js_tmp = self._js_cache[js_url]
                    else:
                        js_tmp = rs.job.Service(js_url, session=self._session)
                        self._js_cache[js_url] = js_tmp

                    cmd = "tar zmxvf %s/%s -C /" % (session_sbox.path,
                                                    tar_name)
                    j = js_tmp.run_job(cmd)
                    j.wait()
                    self._log.debug('untar : %s', cmd)
                    self._log.debug('untar : %s\n---\n%s\n---\n%s',
                                    j.get_stdout_string(),
                                    j.get_stderr_string(), j.exit_code)

        if no_staging_units:

            # nothing to stage, push to the agent
            self.advance(no_staging_units,
                         rps.AGENT_STAGING_INPUT_PENDING,
                         publish=True,
                         push=True)

        for unit, actionables in staging_units:
            self._handle_unit(unit, actionables)
    def lrms_config_hook(cls, name, cfg, lrms, logger, profiler):
        """
        FIXME: this config hook will manipulate the LRMS nodelist.  Not a nice
               thing to do, but hey... :P
               What really should be happening is that the LRMS digs information
               on node reservation out of the config and configures the node
               list accordingly.  This config hook should be limited to starting
               the DVM.
        """

        dvm_command = ru.which('orte-dvm')
        if not dvm_command:
            raise Exception("Couldn't find orte-dvm")

        # Now that we found the orte-dvm, get ORTE version
        out, err, ret = ru.sh_callout('orte-info | grep "Open RTE"', shell=True)
        orte_info = dict()
        for line in out.split('\n'):

            line = line.strip()
            if not line:
                continue

            key, val = line.split(':', 1)
            if 'Open RTE' == key.strip():
                orte_info['version'] = val.strip()
            elif  'Open RTE repo revision' == key.strip():
                orte_info['version_detail'] = val.strip()

        assert(orte_info.get('version'))
        logger.info("Found Open RTE: %s / %s",
                    orte_info['version'], orte_info.get('version_detail'))

        # Use (g)stdbuf to disable buffering.
        # We need this to get the "DVM ready",
        # without waiting for orte-dvm to complete.
        # The command seems to be generally available on our Cray's,
        # if not, we can code some home-coooked pty stuff.
        stdbuf_cmd =  ru.which(['stdbuf', 'gstdbuf'])
        if not stdbuf_cmd:
            raise Exception("Couldn't find (g)stdbuf")
        stdbuf_arg = "-oL"

        # Base command = (g)stdbuf <args> + orte-dvm + debug_args
        dvm_args = [stdbuf_cmd, stdbuf_arg, dvm_command]

        # Additional (debug) arguments to orte-dvm
        if os.environ.get('RADICAL_PILOT_ORTE_VERBOSE'):
            debug_strings = [
                             '--debug-devel',
                             '--mca odls_base_verbose 100',
                             '--mca rml_base_verbose 100'
                            ]
        else:
            debug_strings = []

        # Split up the debug strings into args and add them to the dvm_args
        [dvm_args.extend(ds.split()) for ds in debug_strings]

        vm_size = len(lrms.node_list)
        logger.info("Start DVM on %d nodes ['%s']", vm_size, ' '.join(dvm_args))
        profiler.prof(event='orte_dvm_start', uid=cfg['pilot_id'])

        dvm_uri     = None
        dvm_process = mp.Popen(dvm_args, stdout=mp.PIPE, stderr=mp.STDOUT)

        while True:

            line = dvm_process.stdout.readline().strip()

            if line.startswith('VMURI:'):

                if len(line.split(' ')) != 2:
                    raise Exception("Unknown VMURI format: %s" % line)

                label, dvm_uri = line.split(' ', 1)

                if label != 'VMURI:':
                    raise Exception("Unknown VMURI format: %s" % line)

                logger.info("ORTE DVM URI: %s" % dvm_uri)

            elif line == 'DVM ready':

                if not dvm_uri:
                    raise Exception("VMURI not found!")

                logger.info("ORTE DVM startup successful!")
                profiler.prof(event='orte_dvm_ok', uid=cfg['pilot_id'])
                break

            else:

                # Check if the process is still around,
                # and log output in debug mode.
                if dvm_process.poll() is None:
                    logger.debug("ORTE: %s", line)
                else:
                    # Process is gone: fatal!
                    raise Exception("ORTE DVM process disappeared")
                    profiler.prof(event='orte_dvm_fail', uid=cfg['pilot_id'])


        # ----------------------------------------------------------------------
        def _watch_dvm():

            logger.info('starting DVM watcher')

            retval = dvm_process.poll()
            while retval is None:
                line = dvm_process.stdout.readline().strip()
                if line:
                    logger.debug('dvm output: %s', line)
                else:
                    time.sleep(1.0)

            if retval != 0:
                # send a kill signal to the main thread.
                # We know that Python and threading are likely not to play well
                # with signals - but this is an exceptional case, and not part
                # of the stadard termination sequence.  If the signal is
                # swallowed, the next `orte-submit` call will trigger
                # termination anyway.
                os.kill(os.getpid())

            logger.info('DVM stopped (%d)' % dvm_process.returncode)
        # ----------------------------------------------------------------------

        dvm_watcher = ru.Thread(target=_watch_dvm, name="DVMWatcher")
        dvm_watcher.start()

        lm_info = {'dvm_uri'     : dvm_uri,
                   'version_info': {name: orte_info}}

        # we need to inform the actual LM instance about the DVM URI.  So we
        # pass it back to the LRMS which will keep it in an 'lm_info', which
        # will then be passed as part of the slots via the scheduler
        return lm_info
Example #23
0
def _lsfscript_generator(url, logger, jd, ppn, lsf_version, queue):
    """
    generates an LSF script from a SAGA job description
    """

    lsf_bsubs = ''
    command = ''
    env_string = ''

    if jd.executable: command += "%s " % (jd.executable)
    if jd.arguments: command += ' '.join(jd.arguments)

    bsub_queue = queue or jd.queue
    if bsub_queue: lsf_bsubs += "#BSUB -q %s \n" % bsub_queue

    if jd.name: lsf_bsubs += "#BSUB -J %s \n" % jd.name
    if jd.job_contact: lsf_bsubs += "#BSUB -u %s \n" % jd.job_contact
    if jd.working_directory:        lsf_bsubs += "#BSUB -cwd %s \n" \
                                %  jd.working_directory
    if jd.wall_time_limit:        lsf_bsubs += "#BSUB -W %s:%s \n" \
                                % (int(jd.wall_time_limit / 60),
                                   int(jd.wall_time_limit % 60))

    # if working directory is set, we want stdout to end up in the
    # working directory as well, unless it contains a specific
    # path name - otherwise we pass `output` as is.
    if jd.output:
        if os.path.isabs(jd.output): path = ''
        elif jd.working_directory: path = '%s/' % jd.working_directory
        else: path = ''
        lsf_bsubs += "#BSUB -o %s%s \n" % (path, jd.output)

    # same holds for stderr
    if jd.error:
        if os.path.isabs(jd.error): path = ''
        elif jd.working_directory: path = '%s/' % jd.working_directory
        else: path = ''
        lsf_bsubs += "#BSUB -e %s%s \n" % (path, jd.error)

    if jd.project and ':' in jd.project:
        account, reservation = jd.project.split(':', 1)
        lsf_bsubs += "#BSUB -P %s \n" % account
        lsf_bsubs += "#BSUB -U %s \n" % reservation

    elif jd.project:
        lsf_bsubs += "#BSUB -P %s \n" % jd.project

    # Request enough nodes to cater for the number of gpus and cores requested
    if not jd.total_cpu_count: total_cpu_count = 1
    else: total_cpu_count = jd.total_cpu_count

    if not jd.total_gpu_count: total_gpu_count = 1
    else: total_gpu_count = jd.total_gpu_count

    hostname = url.host

    if not hostname or 'localhost' in hostname:
        out, _, ret = ru.sh_callout('hostname -f')
        if ret: hostname = os.environ.get('HOSTNAME', '')
        else: hostname = out.strip()

    if not hostname:
        raise RuntimeError('cannot determine target host f or %s' % url)

    cpn, gpn, smt, valid_alloc_flags = 0, 1, SMT_DEFAULT, []
    for resource_name in RESOURCES:
        if resource_name in hostname:
            smt = jd.system_architecture.get('smt') or smt
            cpn = RESOURCES[resource_name]['cpn'] * smt
            gpn = RESOURCES[resource_name]['gpn']
            valid_alloc_flags = RESOURCES[resource_name]['valid_alloc_flags']
            break

    if not cpn:
        raise ValueError('LSF host (%s) not yet supported' % hostname)

    if smt not in SMT_VALID_VALUES:
        smt = SMT_DEFAULT

    cpu_nodes = int(total_cpu_count / cpn)
    if total_cpu_count > (cpu_nodes * cpn):
        cpu_nodes += 1

    gpu_nodes = int(total_gpu_count / gpn)
    if total_gpu_count > (gpu_nodes * gpn):
        gpu_nodes += 1

    nodes = max(cpu_nodes, gpu_nodes)
    lsf_bsubs += "#BSUB -nnodes %s \n" % str(nodes)

    alloc_flags = []
    for flag in jd.system_architecture.get('options', []):
        if flag.lower() in valid_alloc_flags:
            alloc_flags.append(flag.lower())
    alloc_flags.append('smt%d' % smt)
    lsf_bsubs += "#BSUB -alloc_flags '%s' \n" % ' '.join(alloc_flags)

    env_string += "export RADICAL_SAGA_SMT=%d" % smt
    if jd.environment:
        for k, v in jd.environment.items():
            env_string += " %s=%s" % (k, v)

    # escape double quotes and dollar signs, otherwise 'echo |'
    # further down won't work
    # only escape '$' in args and exe. not in the bsubs
    command = command.replace('$', '\\$')
    lsfscript = "\n#!/bin/bash \n%s\n%s\n%s" % (lsf_bsubs, env_string, command)
    lsfscript = lsfscript.replace('"', '\\"')

    return lsfscript
Example #24
0
def _cmd(cmd):

    _, _, ret = ru.sh_callout(cmd)

    return not bool(ret)
Example #25
0
    def rm_config_hook(cls, name, cfg, rm, log, profiler):

        prte = ru.which('prte')
        if not prte:
            raise Exception("Couldn't find prte")

        # Now that we found the prte, get PRUN version
        out, _, _ = ru.sh_callout('prte_info | grep "Open RTE"', shell=True)
        prte_info = dict()
        for line in out.split('\n'):

            line = line.strip()

            if 'Open RTE:' in line:
                prte_info['version'] = line.split(':')[1].strip()

            elif 'Open RTE repo revision:' in line:
                prte_info['version_detail'] = line.split(':')[1].strip()

        log.info("Found Open RTE: %s [%s]", prte_info.get('version'),
                 prte_info.get('version_detail'))

        # write hosts file
        furi = '%s/prrte.uri' % os.getcwd()
        fhosts = '%s/prrte.hosts' % os.getcwd()
        vm_size = len(rm.node_list)

        with open(fhosts, 'w') as fout:
            for node in rm.node_list:
                fout.write('%s slots=%d\n' %
                           (node[0], rm.cores_per_node * rm.smt))

        pre = os.environ['PRRTE_PREFIX']
        prte += ' --prefix %s' % pre
        prte += ' --report-uri %s' % furi
        prte += ' --hostfile %s' % fhosts

        if profiler.enabled:
            prte += ' --pmca orte_state_base_verbose 1'  # prte profiling

        # large tasks imply large message sizes, and we need to account for that
        # FIXME: we should derive the message size from DVM size - smaller DVMs
        #        will never need large messages, as they can't run large tasks)
        prte += ' --pmca ptl_base_max_msg_size %d' % (1024 * 1024 * 1024 * 1)
        # prte += ' --pmca rmaps_base_verbose 5'

        # debug mapper problems for large tasks
        if log.isEnabledFor(logging.DEBUG):
            prte += ' -pmca orte_rmaps_base_verbose 100'

        # we apply two temporary tweaks on Summit which should not be needed in
        # the long run:
        #
        # avoid 64 node limit (ssh connection limit)
        prte += ' --pmca plm_rsh_no_tree_spawn 1'

        # ensure 1 ssh per dvm
        prte += ' --pmca plm_rsh_num_concurrent %d' % vm_size

        # Use (g)stdbuf to disable buffering.  We need this to get the
        # "DVM ready" message to ensure DVM startup completion
        #
        # The command seems to be generally available on our Cray's,
        # if not, we can code some home-coooked pty stuff (TODO)
        stdbuf_cmd = ru.which(['stdbuf', 'gstdbuf'])
        if not stdbuf_cmd:
            raise Exception("Couldn't find (g)stdbuf")
        stdbuf_arg = "-oL"

        # Base command = (g)stdbuf <args> + prte + prte-args + debug_args
        cmdline = '%s %s %s ' % (stdbuf_cmd, stdbuf_arg, prte)
        # cmdline   = prte

        # Additional (debug) arguments to prte
        verbose = bool(os.environ.get('RADICAL_PILOT_PRUN_VERBOSE'))
        if verbose:
            debug_strings = [
                '--debug-devel',
                '--pmca odls_base_verbose 100',
                '--pmca rml_base_verbose 100',
            ]
        else:
            debug_strings = []

        # Split up the debug strings into args and add them to the cmdline
        cmdline += ' '.join(debug_strings)
        cmdline = cmdline.strip()

        log.info("Start prte on %d nodes [%s]", vm_size, cmdline)
        profiler.prof(event='dvm_start', uid=cfg['pid'])

        dvm_uri = None
        dvm_process = mp.Popen(cmdline.split(),
                               stdout=mp.PIPE,
                               stderr=mp.STDOUT)

        # ----------------------------------------------------------------------
        def _watch_dvm():

            log.info('starting prte watcher')

            retval = dvm_process.poll()
            while retval is None:
                line = dvm_process.stdout.readline().strip()
                if line:
                    log.debug('prte output: %s', line)
                else:
                    time.sleep(1.0)

            if retval != 0:
                # send a kill signal to the main thread.
                # We know that Python and threading are likely not to play well
                # with signals - but this is an exceptional case, and not part
                # of the stadard termination sequence.  If the signal is
                # swallowed, the next `prun` call will trigger
                # termination anyway.
                os.kill(os.getpid())
                raise RuntimeError('PRTE DVM died')

            log.info('prte stopped (%d)' % dvm_process.returncode)

        # ----------------------------------------------------------------------

        dvm_watcher = mt.Thread(target=_watch_dvm)
        dvm_watcher.daemon = True
        dvm_watcher.start()

        for _ in range(100):

            time.sleep(0.5)
            try:
                with open(furi, 'r') as fin:
                    for line in fin.readlines():
                        if '://' in line:
                            dvm_uri = line.strip()
                            break

            except Exception as e:
                log.debug('DVM check: uri file missing: %s...' % str(e)[:24])
                time.sleep(0.5)

            if dvm_uri:
                break

        if not dvm_uri:
            raise Exception("VMURI not found!")

        log.info("prte startup successful: [%s]", dvm_uri)

        # in some cases, the DVM seems to need some additional time to settle.
        # FIXME: this should not be needed, really
        time.sleep(10)
        profiler.prof(event='dvm_ok', uid=cfg['pid'])

        lm_info = {
            'dvm_uri': dvm_uri,
            'version_info': prte_info,
            'cvd_id_mode': 'physical'
        }

        # we need to inform the actual LaunchMethod instance about the prte URI.
        # So we pass it back to the ResourceManager which will keep it in an
        # 'lm_info', which will then be passed as part of the slots via the
        # scheduler
        return lm_info
Example #26
0
def _cmd(cmd):

    _, _, ret = ru.sh_callout(cmd)

    if ret == 0: return True
    else: return False
Example #27
0
 def excuse():
     cmd_fetch  = "telnet bofh.jeffballard.us 666 2>&1 "
     cmd_filter = "grep 'Your excuse is:' | cut -f 2- -d :"
     out        = ru.sh_callout("%s | %s" % (cmd_fetch, cmd_filter),
                                shell=True)[0]
     return out.strip()