Ejemplo n.º 1
0
    def init_instance(self, adaptor_state, rm_url, session):
        """ service instance constructor
        """
        self.rm = rm_url
        self.session = session
        self.ppn = None
        self.gpn = 1  # gpus per node  # FIXME: inspect system
        self.is_cray = ""
        self.queue = None
        self.shell = None
        self.jobs = dict()
        self.gres = None

        # the monitoring thread - one per service instance
        self.mt = _job_state_monitor(job_service=self)
        self.mt.start()

        rm_scheme = rm_url.scheme
        pty_url = surl.Url(rm_url)

        # this adaptor supports options that can be passed via the
        # 'query' component of the job service URL.
        if rm_url.query:
            for key, val in parse_qs(rm_url.query).iteritems():
                if key == 'queue':
                    self.queue = val[0]
                elif key == 'craytype':
                    self.is_cray = val[0]
                elif key == 'ppn':
                    self.ppn = int(val[0])
                elif key == 'gres':
                    self.gres = val[0]

        # we need to extract the scheme for PTYShell. That's basically the
        # job.Service Url without the pbs+ part. We use the PTYShell to execute
        # pbs commands either locally or via gsissh or ssh.
        if rm_scheme == "torque":
            pty_url.scheme = "fork"
        elif rm_scheme == "torque+ssh":
            pty_url.scheme = "ssh"
        elif rm_scheme == "torque+gsissh":
            pty_url.scheme = "gsissh"

        # these are the commands that we need in order to interact with PBS.
        # the adaptor will try to find them during initialize(self) and bail
        # out in case they are note available.
        self._commands = {
            'pbsnodes': dict(),
            'qstat': dict(),
            'qsub': dict(),
            'qdel': dict()
        }

        self.shell = sups.PTYShell(pty_url, self.session)

        # self.shell.set_initialize_hook(self.initialize)
        # self.shell.set_finalize_hook(self.finalize)

        self.initialize()
        return self.get_api()
Ejemplo n.º 2
0
    def init_instance(self, adaptor_state, url, flags, session):
        """ Directory instance constructor """

        if flags == None:
            flags = 0

        self.url = saga.Url(url)  # deep copy
        self.flags = flags
        self.session = session
        self.valid = False  # will be set by initialize
        self.lm = session._lease_manager

        def _shell_creator(url):
            return sups.PTYShell(url, self.session, self._logger)

        self.shell_creator = _shell_creator

        # The dir command shell is leased, as the dir seems to be used
        # extensively in some cases.  Note that before each command, we need to
        # perform a 'cd' to the target location, to make sure we operate in the
        # right location (see self._command())

        self.initialize()

        # we create a local shell handle, too, if only to support copy and move
        # to and from local file systems (mkdir for staging target, remove of move
        # source).  Not that we do not perform a cd on the local shell -- all
        # operations are assumed to be performed on absolute paths.
        #
        # self.local is not leased -- local shells are always fast and eat
        # little resourcess
        self.local = sups.PTYShell('fork://localhost/',
                                   saga.Session(default=True), self._logger)

        return self.get_api()
Ejemplo n.º 3
0
    def get_go_shell(self, session, go_url=None):

        # this basically return a pty shell for
        #
        #   gsissh [email protected]
        #
        # X509 contexts are prefered, but ssh contexts, userpass and myproxy can
        # also be used.  If the given url has username / password encoded, we
        # create an userpass context out of it and add it to the (copy of) the
        # session.

        sid = session._id

        if not sid in self.shells:

            self.shells[sid] = dict()

            if not go_url:
                new_url = saga.Url(GO_DEFAULT_URL)
            else:
                new_url = saga.Url(go_url)  # deep copy

            # create the shell.
            shell = sups.PTYShell(new_url, session, self._logger, posix=False)
            self.shells[sid]['shell'] = shell

            # confirm the user ID for this shell
            self.shells[sid]['user'] = None

            _, out, _ = shell.run_sync('profile')

            for line in out.split('\n'):
                if 'User Name:' in line:
                    self.shells[sid]['user'] = line.split(':', 2)[1].strip()
                    self._logger.debug("using account '%s'" %
                                       self.shells[sid]['user'])
                    break

            if not self.shells[sid]['user']:
                raise saga.NoSuccess("Could not confirm user id")

            if self.notify != 'None':
                if self.notify == 'True':
                    self._logger.debug("disable email notifications")
                    shell.run_sync('profile -n on')
                else:
                    self._logger.debug("enable email notifications")
                    shell.run_sync('profile -n off')

            # for this fresh shell, we get the list of public endpoints.  That list
            # will contain the set of hosts we can potentially connect to.
            self.get_go_endpoint_list(session, shell, fetch=True)

        # pprint.pprint (self.shells)

        # we have the shell for sure by now -- return it!
        return self.shells[session._id]['shell']
Ejemplo n.º 4
0
    def init_instance (self, adaptor_state, url, flags, session) :
        """ Directory instance constructor """

        if  flags == None :
            flags = 0

        self.url         = saga.Url (url) # deep copy
        self.flags       = flags
        self.session     = session
        self.valid       = False # will be set by initialize
        self.lm          = session._lease_manager

        # cwd is where this directory is in, so the path w/o the last element
        path             = self.url.path.rstrip ('/')
        self.cwd         = sumisc.url_get_dirname (path)
        self.cwdurl      = saga.Url (url) # deep copy
        self.cwdurl.path = self.cwd

        def _shell_creator (url) :
            return sups.PTYShell (url, self.session, self._logger)
        self.shell_creator = _shell_creator

        # self.shell is not leased, as we use it for almost every operation.
        # Even more important, that shell has state (it has a pwd).  If
        # performance or scalability becomes a problem, we can also lease-manage
        # it, but then need to keep state separate.
        self.shell         = sups.PTYShell     (self.url, self.session, self._logger)

      # self.shell.set_initialize_hook (self.initialize)
      # self.shell.set_finalize_hook   (self.finalize)

        self.initialize ()

        # we create a local shell handle, too, if only to support copy and move
        # to and from local file systems (mkdir for staging target, remove of move
        # source).  Not that we do not perform a cd on the local shell -- all
        # operations are assumed to be performed on absolute paths.
        #
        # self.local is not leased -- local shells are always fast and eat
        # little resourcess
        self.local = sups.PTYShell ('fork://localhost/', saga.Session(default=True), 
                                    self._logger)

        return self.get_api ()
Ejemplo n.º 5
0
def test_ptyshell_nok():
    """ Test pty_shell which runs command unsuccessfully """
    conf = rut.get_test_config()
    shell = sups.PTYShell(saga.Url(conf.job_service_url), conf.session)

    txt = "______1______2_____3_____"
    ret, out, _ = shell.run_sync("printf \"%s\" ; false" % txt)
    assert (ret == 1), "%s" % (repr(ret))
    assert (out == txt), "%s == %s" % (repr(out), repr(txt))

    assert (shell.alive())
    shell.finalize(True)
    assert (not shell.alive())
Ejemplo n.º 6
0
def test_ptyshell_file_stage():
    """ Test pty_shell file staging """
    conf = rut.get_test_config()
    shell = sups.PTYShell(saga.Url(conf.job_service_url), conf.session)

    txt = "______1______2_____3_____"
    shell.write_to_remote(txt, "/tmp/saga-test-staging")
    out = shell.read_from_remote("/tmp/saga-test-staging")

    assert (txt == out), "%s == %s" % (repr(out), repr(txt))

    ret, out, _ = shell.run_sync("rm /tmp/saga-test-staging")
    assert (ret == 0), "%s" % (repr(ret))
    assert (out == ""), "%s == ''" % (repr(out))
Ejemplo n.º 7
0
def test_ptyshell_file_stage():
    """ Test pty_shell file staging """
    conf = sutc.TestConfig()
    shell = sups.PTYShell(saga.Url("fork://localhost"), saga.Session())

    txt = "______1______2_____3_____"
    shell.stage_to_remote(txt, "/tmp/saga-test-staging-$$")
    out = shell.read_from_remote("/tmp/saga-test-staging-$$")

    assert (txt == out)

    ret, out, _ = shell.run_sync("rm /tmp/saga-test-staging-111")
    assert (ret == 0)
    assert (out == "")
Ejemplo n.º 8
0
def test_ptyshell_async():
    """ Test pty_shell which runs command successfully """
    conf = rut.get_test_config()
    shell = sups.PTYShell(saga.Url(conf.job_service_url), conf.session)

    txt = "______1______2_____3_____\n"
    shell.run_async("cat <<EOT")

    shell.send(txt)
    shell.send('EOT\n')

    ret, out = shell.find_prompt()

    assert (ret == 0), "%s" % (repr(ret))
    assert (out == txt), "%s == %s" % (repr(out), repr(txt))

    assert (shell.alive())
    shell.finalize(True)
    assert (not shell.alive())
Ejemplo n.º 9
0
    def init_instance(self, adaptor_state, url, flags, session):

        self._url     = url
        self._flags   = flags
        self._session = session

        self._init_check()

        try:
            # open a shell
            self.shell = sups.PTYShell(self._adaptor.pty_url, self.session)
        except:
            raise saga.NoSuccess("Couldn't open shell")

        #
        # Test for valid proxy
        #
        try:
            rc, out, _ = self.shell.run_sync("grid-proxy-info")
        except:
            self.shell.finalize(kill_pty=True)
            raise saga.exceptions.NoSuccess("grid-proxy-info failed")

        if rc != 0:
            raise saga.exceptions.NoSuccess("grid-proxy-info failed")

        if 'timeleft : 0:00:00' in out:
            raise saga.exceptions.AuthenticationFailed("x509 proxy expired.")

        #
        # Test for gfal2 tool
        #
        try:
            rc, _, _ = self.shell.run_sync("gfal2_version")
        except:
            self.shell.finalize(kill_pty=True)
            raise saga.exceptions.NoSuccess("gfal2_version")

        if rc != 0:
            raise saga.exceptions.DoesNotExist("gfal2 client not found")

        return self.get_api()
Ejemplo n.º 10
0
def test_ptyshell_prompt():
    """ Test pty_shell with prompt change """
    conf = rut.get_test_config()
    shell = sups.PTYShell(saga.Url(conf.job_service_url), conf.session)

    txt = "______1______2_____3_____"
    ret, out, _ = shell.run_sync("printf \"%s\"" % txt)
    assert (ret == 0), "%s" % (repr(ret))
    assert (out == txt), "%s == %s" % (repr(out), repr(txt))

    shell.run_sync('export PS1="HALLO-(\\$?)-PROMPT>"',
                   new_prompt='HALLO-\((\d)\)-PROMPT>')

    txt = "______1______2_____3_____"
    ret, out, _ = shell.run_sync("printf \"%s\"" % txt)
    assert (ret == 0), "%s" % (repr(ret))
    assert (out == txt), "%s == %s" % (repr(out), repr(txt))

    assert (shell.alive())
    shell.finalize(True)
    assert (not shell.alive())
Ejemplo n.º 11
0
def parse_rwd(rwd, fs_endpoint):
    if '$' not in rwd:
        return rwd

    shell = rsups.PTYShell(fs_endpoint)
    _, out, _ = shell.run_sync('env')

    env = dict()
    for line in out.split('\n'):
        line = line.strip()
        if not line:
            continue
        try:
            k, v = line.split('=', 1)
            env[k] = v
        except:
            pass

    parsed_rwd = rwd
    for k, v in env.iteritems():
        parsed_rwd = re.sub(r'\$%s\b' % k, v, parsed_rwd)

    return parsed_rwd
Ejemplo n.º 12
0
    def _get_resource_sandbox(self, pilot):
        """
        for a given pilot dict, determine the global RP sandbox, based on the
        pilot's 'resource' attribute.
        """

        self.is_valid()

        # FIXME: this should get 'resource, schema=None' as parameters

        resource = pilot['description'].get('resource')
        schema = pilot['description'].get('access_schema')

        if not resource:
            raise ValueError('Cannot get pilot sandbox w/o resource target')

        # the global sandbox will be the same for all pilots on any resource, so
        # we cache it
        with self._cache_lock:

            if resource not in self._cache['resource_sandbox']:

                # cache miss -- determine sandbox and fill cache
                rcfg = self.get_resource_config(resource, schema)
                fs_url = rs.Url(rcfg['filesystem_endpoint'])

                # Get the sandbox from either the pilot_desc or resource conf
                sandbox_raw = pilot['description'].get('sandbox')
                if not sandbox_raw:
                    sandbox_raw = rcfg.get('default_remote_workdir', "$PWD")

                # If the sandbox contains expandables, we need to resolve those remotely.
                # NOTE: Note that this will only work for (gsi)ssh or shell based access mechanisms
                if '$' not in sandbox_raw and '`' not in sandbox_raw:
                    # no need to expand further
                    sandbox_base = sandbox_raw

                else:
                    js_url = rs.Url(rcfg['job_manager_endpoint'])

                    if 'ssh' in js_url.schema.split('+'):
                        js_url.schema = 'ssh'
                    elif 'gsissh' in js_url.schema.split('+'):
                        js_url.schema = 'gsissh'
                    elif 'fork' in js_url.schema.split('+'):
                        js_url.schema = 'fork'
                    elif '+' not in js_url.schema:
                        # For local access to queueing systems use fork
                        js_url.schema = 'fork'
                    else:
                        raise Exception("unsupported access schema: %s" %
                                        js_url.schema)

                    self._log.debug("rsup.PTYShell('%s')", js_url)
                    shell = rsup.PTYShell(js_url, self)

                    ret, out, err = shell.run_sync(' echo "WORKDIR: %s"' %
                                                   sandbox_raw)
                    if ret == 0 and 'WORKDIR:' in out:
                        sandbox_base = out.split(":")[1].strip()
                        self._log.debug("sandbox base %s: '%s'", js_url,
                                        sandbox_base)
                    else:
                        raise RuntimeError(
                            "Couldn't get remote working directory.")

                # at this point we have determined the remote 'pwd' - the global sandbox
                # is relative to it.
                fs_url.path = "%s/radical.pilot.sandbox" % sandbox_base

                # before returning, keep the URL string in cache
                self._cache['resource_sandbox'][resource] = fs_url

            return self._cache['resource_sandbox'][resource]
Ejemplo n.º 13
0
__author__ = "Andre Merzky"
__copyright__ = "Copyright 2012-2013, The SAGA Project"
__license__ = "MIT"

import re
import time
import saga
import saga.utils.pty_shell as sups

try:
    shell = sups.PTYShell(saga.Url("fork://localhost"), [])
    shell.run_async("scp ~/downloads/totalview*.sh @localhost:/tmp/t")

    # pat_bof = re.compile ("(?P<perc>\d+\%).*(?P<time>--:--)\s*ETA")
    pat_bof = re.compile(
        "(?P<perc>\d+)\%\s+(?P<size>.+?)\s+(?P<perf>.+?)\s+(?P<time>--:--)\s*ETA"
    )
    pat_eta = re.compile(
        "(?P<perc>\d+)\%\s+(?P<size>.+?)\s+(?P<perf>.+?)\s+(?P<time>\d\d:\d\d)\s*ETA"
    )
    pat_eof = re.compile(
        "(?P<perc>\d+)\%\s+(?P<size>.+?)\s+(?P<perf>.+?)\s+(?P<time>\d\d:\d\d)\s*\n"
    )

    begin = True
    error = ""

    while True:
        ret, out = shell.find(['ETA', '\n'])

        match = None
Ejemplo n.º 14
0
    def register_start_pilot_request(self, pilot, resource_config):
        """Register a new pilot start request with the worker.
        """

        # create a new UID for the pilot
        pilot_uid = ru.generate_id('pilot')

        # switch endpoint type
        fs_url = saga.Url(resource_config['filesystem_endpoint'])

        # Get the sandbox from either the pilot_desc or resource conf
        if pilot.description.sandbox:
            workdir_raw = pilot.description.sandbox
        else:
            workdir_raw = resource_config.get('default_remote_workdir', "$PWD")

        # If the sandbox contains expandables, we need to resolve those remotely.
        # TODO: Note that this will only work for (gsi)ssh or shell based access mechanisms
        if '$' in workdir_raw or '`' in workdir_raw:
            js_url = saga.Url(resource_config['job_manager_endpoint'])

            # The PTYShell will swallow in the job part of the scheme
            if js_url.scheme.endswith('+ssh'):
                # For remote adaptor usage over shh, use that here
                js_url.scheme = 'ssh'
            elif js_url.scheme.endswith('+gsissh'):
                # For remote adaptor usage over gsissh, use that here
                js_url.scheme = 'gsissh'
            elif js_url.scheme in ['fork', 'ssh', 'gsissh']:
                # Use the scheme as is for non-queuing adaptor mechanisms
                pass
            elif '+' not in js_url.scheme:
                # For local access to queueing systems use fork
                js_url.scheme = 'fork'
            else:
                raise Exception(
                    "Are there more flavours we need to support?! (%s)" %
                    js_url.scheme)

            # TODO: Why is this 'translation' required?
            if js_url.port is not None:
                url = "%s://%s:%d/" % (js_url.schema, js_url.host, js_url.port)
            else:
                url = "%s://%s/" % (js_url.schema, js_url.host)

            logger.debug("saga.utils.PTYShell ('%s')" % url)
            shell = sup.PTYShell(url, self._session)

            ret, out, err = shell.run_sync(' echo "WORKDIR: %s"' % workdir_raw)
            if ret == 0 and 'WORKDIR:' in out:
                workdir_expanded = out.split(":")[1].strip()
                logger.debug(
                    "Determined remote working directory for %s: '%s'" %
                    (url, workdir_expanded))
            else:
                error_msg = "Couldn't determine remote working directory."
                logger.error(error_msg)
                raise Exception(error_msg)
        else:
            workdir_expanded = workdir_raw

        # At this point we have determined the remote 'pwd'
        fs_url.path = "%s/radical.pilot.sandbox" % workdir_expanded

        # This is the base URL / 'sandbox' for the pilot!
        agent_dir_url = saga.Url("%s/%s-%s/" %
                                 (str(fs_url), self._session.uid, pilot_uid))

        # Create a database entry for the new pilot.
        pilot_uid, pilot_json = self._dbs.insert_pilot(
            pilot_uid=pilot_uid,
            pilot_manager_uid=self._pm_id,
            pilot_description=pilot.description,
            pilot_sandbox=str(agent_dir_url),
            global_sandbox=str(fs_url.path))

        # Create a shared data store entry
        self._shared_data[pilot_uid] = {
            'data': pilot_json,
            'callbacks': [],
            'facade_object': weakref.ref(pilot)
        }

        return pilot_uid
Ejemplo n.º 15
0
    def register_start_pilot_request(self, pilot, resource_config):
        """Register a new pilot start request with the worker.
        """

        # create a new UID for the pilot
        pilot_uid = ru.generate_id ('pilot')

        # switch endpoint type
        filesystem_endpoint = resource_config['filesystem_endpoint']

        fs = saga.Url(filesystem_endpoint)

        # get the home directory on the remote machine.
        # Note that this will only work for (gsi)ssh or shell based access
        # mechanisms (FIXME)

        import saga.utils.pty_shell as sup

        if fs.port is not None:
            url = "%s://%s:%d/" % (fs.schema, fs.host, fs.port)
        else:
            url = "%s://%s/" % (fs.schema, fs.host)

        logger.debug ("saga.utils.PTYShell ('%s')" % url)
        shell = sup.PTYShell(url, self._session, logger)

        if pilot.description.sandbox :
            workdir_raw = pilot.description.sandbox
        else :
            workdir_raw = resource_config.get ('default_remote_workdir', "$PWD")

        if '$' in workdir_raw or '`' in workdir_raw :
            ret, out, err = shell.run_sync (' echo "WORKDIR: %s"' % workdir_raw)
            if  ret == 0 and 'WORKDIR:' in out :
                workdir_expanded = out.split(":")[1].strip()
                logger.debug("Determined remote working directory for %s: '%s'" % (url, workdir_expanded))
            else :
                error_msg = "Couldn't determine remote working directory."
                logger.error(error_msg)
                raise Exception(error_msg)
        else :
            workdir_expanded = workdir_raw

        # At this point we have determined 'pwd'
        fs.path = "%s/radical.pilot.sandbox" % workdir_expanded

        # This is the base URL / 'sandbox' for the pilot!
        agent_dir_url = saga.Url("%s/%s-%s/" % (str(fs), self._session.uid, pilot_uid))

        # Create a database entry for the new pilot.
        pilot_uid, pilot_json = self._db.insert_pilot(
            pilot_uid=pilot_uid,
            pilot_manager_uid=self._pm_id,
            pilot_description=pilot.description,
            pilot_sandbox=str(agent_dir_url), 
            global_sandbox=str(fs.path)
            )

        # Create a shared data store entry
        self._shared_data[pilot_uid] = {
            'data':          pilot_json,
            'callbacks':     [],
            'facade_object': weakref.ref(pilot)
        }

        return pilot_uid
Ejemplo n.º 16
0
    def initialize_child(self):

        from .... import pilot as rp

        self._pwd = os.getcwd() 

        self.register_input(rps.EXECUTING_PENDING, 
                            rpc.AGENT_EXECUTING_QUEUE, self.work)

        self.register_output(rps.AGENT_STAGING_OUTPUT_PENDING,
                             rpc.AGENT_STAGING_OUTPUT_QUEUE)

        self.register_publisher (rpc.AGENT_UNSCHEDULE_PUBSUB)
        self.register_subscriber(rpc.CONTROL_PUBSUB, self.command_cb)

        # Mimic what virtualenv's "deactivate" would do
        self._deactivate = "\n# deactivate pilot virtualenv\n"

        old_path  = os.environ.get('_OLD_VIRTUAL_PATH',       None)
        old_ppath = os.environ.get('_OLD_VIRTUAL_PYTHONPATH', None)
        old_home  = os.environ.get('_OLD_VIRTUAL_PYTHONHOME', None)
        old_ps1   = os.environ.get('_OLD_VIRTUAL_PS1',        None)

        if old_ppath: self._deactivate += 'export PATH="%s"\n'        % old_ppath
        if old_path : self._deactivate += 'export PYTHONPATH="%s"\n'  % old_path
        if old_home : self._deactivate += 'export PYTHON_HOME="%s"\n' % old_home
        if old_ps1  : self._deactivate += 'export PS1="%s"\n'         % old_ps1

        self._deactivate += 'unset VIRTUAL_ENV\n\n'

        # FIXME: we should not alter the environment of the running agent, but
        #        only make sure that the CU finds a pristine env.  That also
        #        holds for the unsetting below -- AM
        if old_path : os.environ['PATH']        = old_path
        if old_ppath: os.environ['PYTHONPATH']  = old_ppath
        if old_home : os.environ['PYTHON_HOME'] = old_home
        if old_ps1  : os.environ['PS1']         = old_ps1

        if 'VIRTUAL_ENV' in os.environ :
            del(os.environ['VIRTUAL_ENV'])

        # simplify shell startup / prompt detection
        os.environ['PS1'] = '$ '

        # FIXME:
        #
        # The AgentExecutingComponent needs the LaunchMethods to construct
        # commands.  Those need the scheduler for some lookups and helper
        # methods, and the scheduler needs the LRMS.  The LRMS can in general
        # only initialized in the original agent environment -- which ultimately
        # limits our ability to place the CU execution on other nodes.
        #
        # As a temporary workaround we pass a None-Scheduler -- this will only
        # work for some launch methods, and specifically not for ORTE, DPLACE
        # and RUNJOB.
        #
        # The clean solution seems to be to make sure that, on 'allocating', the
        # scheduler derives all information needed to use the allocation and
        # attaches them to the CU, so that the launch methods don't need to look
        # them up again.  This will make the 'opaque_slots' more opaque -- but
        # that is the reason of their existence (and opaqueness) in the first
        # place...

        self._task_launcher = rp.agent.LM.create(
                name    = self._cfg['task_launch_method'],
                cfg     = self._cfg,
                session = self._session)

        self._mpi_launcher = rp.agent.LM.create(
                name    = self._cfg['mpi_launch_method'],
                cfg     = self._cfg,
                session = self._session)

        # TODO: test that this actually works
        # Remove the configured set of environment variables from the
        # environment that we pass to Popen.
        for e in os.environ.keys():
            env_removables = list()
            if self._mpi_launcher : env_removables += self._mpi_launcher.env_removables
            if self._task_launcher: env_removables += self._task_launcher.env_removables
            for r in  env_removables:
                if e.startswith(r):
                    os.environ.pop(e, None)

        # if we need to transplant any original env into the CU, we dig the
        # respective keys from the dump made by bootstrap_1.sh
        self._env_cu_export = dict()
        if self._cfg.get('export_to_cu'):
            with open('env.orig', 'r') as f:
                for line in f.readlines():
                    if '=' in line:
                        k,v = line.split('=', 1)
                        key = k.strip()
                        val = v.strip()
                        if key in self._cfg['export_to_cu']:
                            self._env_cu_export[key] = val

        # the registry keeps track of units to watch, indexed by their shell
        # spawner process ID.  As the registry is shared between the spawner and
        # watcher thread, we use a lock while accessing it.
        self._registry      = dict()
        self._registry_lock = threading.RLock()

        self._cus_to_cancel  = list()
        self._cancel_lock    = threading.RLock()

        self._cached_events = list() # keep monitoring events for pid's which
                                     # are not yet known

        # get some threads going -- those will do all the work.
        import saga.utils.pty_shell as sups
        self.launcher_shell = sups.PTYShell("fork://localhost/")
        self.monitor_shell  = sups.PTYShell("fork://localhost/")

        # run the spawner on the shells
        # tmp = tempfile.gettempdir()
        # Moving back to shared file system again, until it reaches maturity,
        # as this breaks launch methods with a hop, e.g. ssh.
        # FIXME: see #658
        self._pilot_id    = self._cfg['pilot_id']
        self._spawner_tmp = "/%s/%s-%s" % (self._pwd, self._pilot_id, self.uid)

        ret, out, _  = self.launcher_shell.run_sync \
                           ("/bin/sh %s/agent/executing/shell_spawner.sh %s" \
                           % (os.path.dirname (rp.__file__), self._spawner_tmp))
        if  ret != 0 :
            raise RuntimeError ("failed to bootstrap launcher: (%s)(%s)", ret, out)

        ret, out, _  = self.monitor_shell.run_sync \
                           ("/bin/sh %s/agent/executing/shell_spawner.sh %s" \
                           % (os.path.dirname (rp.__file__), self._spawner_tmp))
        if  ret != 0 :
            raise RuntimeError ("failed to bootstrap monitor: (%s)(%s)", ret, out)

        # run watcher thread
        self._terminate = threading.Event()
        self._watcher   = threading.Thread(target=self._watch, name="Watcher")
        self._watcher.daemon = True
        self._watcher.start ()

        self.gtod = "%s/gtod" % self._pwd
Ejemplo n.º 17
0
    def get_go_shell(self, session, go_url=GO_DEFAULT_URL):

        # This returns a pty shell for: '[gsi]ssh [email protected]'
        #
        # X509 contexts are preferred, but ssh contexts, userpass and myproxy can
        # also be used.  If the given url has username / password encoded, we
        # create an userpass context out of it and add it to the (copy of) the
        # session.

        self._logger.debug("Acquiring lock")
        with self.shell_lock:
            self._logger.debug("Acquired lock")

            sid = session._id

            init = False
            create = False

            if sid in self.shells and self.shells[sid]['shell'].alive(recover=False):
                self._logger.debug("Shell in cache and alive, can reuse.")
            elif sid in self.shells:
                self._logger.debug("Shell in cache but not alive.")
                self.shells[sid]['shell'].finalize()
                self._logger.debug("Shell is finalized, need to recreate.")
                create = True
            else:
                self._logger.debug("Shell not in cache, create entry.")
                init = True
                create = True
                self.shells[sid] = {}

            # Acquire new shell
            if create:

                # deep copy URL (because of?)
                new_url = saga.Url(go_url)

                # GO specific prompt pattern
                opts = {'prompt_pattern': self.prompt}

                # create the shell.
                shell = sups.PTYShell(new_url, session=session, logger=self._logger, opts=opts, posix=False)
                self.shells[sid]['shell'] = shell

                # For this fresh shell, we get the list of public endpoints.
                # That list will contain the set of hosts we can potentially connect to.
                self.get_go_endpoint_list(session, shell, fetch=True)

            # Initialize other dict members and remote shell
            if init:
                shell = self.shells[sid]['shell']

                # Confirm the user ID for this shell
                self.shells[sid]['user'] = None
                _, out, _ = shell.run_sync('profile')
                for line in out.split('\n'):
                    if 'User Name:' in line:
                        self.shells[sid]['user'] = line.split(':', 2)[1].strip()
                        self._logger.debug("using account '%s'" % self.shells[sid]['user'])
                        break
                if not self.shells[sid]['user']:
                    raise saga.NoSuccess("Could not confirm user id")

                # Toggle notification
                if self.notify == 'True':
                    self._logger.debug("enable email notifications")
                    shell.run_sync('profile -n on')
                elif self.notify == 'False':
                    self._logger.debug("disable email notifications")
                    shell.run_sync('profile -n off')

            self._logger.debug("Release lock")

            # we have the shell for sure by now -- return it!
            return self.shells[sid]['shell']
Ejemplo n.º 18
0
 def _shell_creator(url):
     return sups.PTYShell(url, self.get_session(), self._logger)
Ejemplo n.º 19
0
 def _alive(self):
     alive = self.shell.alive()
     if not alive:
         self.shell = sups.PTYShell(self._adaptor.pty_url)