Example #1
0
 def _merge_delegation_tokens(self, delegation_token_files,
                              delegation_token_dir):
     """
 Use the Credentials Merger utility to combine the delegation token files into one delegation token file.
 Returns the NamedTemporaryFile that contains the combined delegation tokens.
 """
     merged_token_file_no, merged_token_file_path = tempfile.mkstemp(
         dir=delegation_token_dir)
     os.close(merged_token_file_no)
     merge_tool_args = [
         hadoop.conf.HDFS_CLUSTERS['default'].HADOOP_BIN.get(), 'jar'
     ]
     merge_tool_args += [
         hadoop.conf.CREDENTIALS_MERGER_JAR.get(), merged_token_file_path
     ]
     merge_tool_args += delegation_token_files
     LOG.debug("Merging credentials files with command: '%s'" %
               (' '.join(merge_tool_args)))
     merge_process = subprocess.Popen(merge_tool_args,
                                      stderr=subprocess.PIPE,
                                      shell=False,
                                      close_fds=True)
     while merge_process.poll() is None:
         time.sleep(1)
     retcode = merge_process.wait()
     if retcode != 0:
         LOG.error("Failed to merge credentials :'%s'..." %
                   (merge_process.stderr.readline(), ))
         raise MergeToolException(
             _("bin/hadoop return non-zero %(retcode)d while trying to merge credentials."
               ) % dict(retcode=(retcode, )))
     return merged_token_file_path
Example #2
0
    def spawn_children(self, number=1):
        parent_pid = os.getpid()

        for i in range(number):
            child_side, parent_side = os.pipe()
            try:
                child_pid = os.fork()
            except:
                print_exc('Could not fork child! Panic!')
                ### TODO: restart

            if not child_pid:      # child process
                os.close(parent_side)
                command = [sys.executable, '-c',
                    'import sys; from spawning import spawning_child; spawning_child.main()',
                    str(parent_pid),
                    str(self.sock.fileno()),
                    str(child_side),
                    self.factory,
                    json.dumps(self.args)]
                if self.args['reload'] == 'dev':
                    command.append('--reload')
                env = environ()
                tpool_size = int(self.config.get('threadpool_workers', 0))
                assert tpool_size >= 0, (tpool_size, 'Cannot have a negative --threads argument')
                if not tpool_size in (0, 1):
                    env['EVENTLET_THREADPOOL_SIZE'] = str(tpool_size)
                os.execve(sys.executable, command, env)

            # controller process
            os.close(child_side)
            self.children[child_pid] = Child(child_pid, parent_side)
Example #3
0
  def destroy(self):
    """
    Clean up the resources used for this shell.
    """
    for delegation_token_file in self._delegation_token_files:
      try:
        os.unlink(delegation_token_file)
      except:
        LOG.warning("Could not remove delegation token file %s" % delegation_token_file)

    try:
      self._delegation_token_files = None
      self._write_buffer.close()
      self._read_buffer.close()

      os.close(self._fd)
      os.close(self._child_fd)

      try:
        LOG.debug("Sending SIGKILL to process with PID %d" % (self.pid,))
        os.kill(self.pid, signal.SIGKILL)
        _, exitcode = os.waitpid(self.pid, 0)
        msg = "%s - shell_id:%s pid:%d - Exited with status %d" % (self.username, self.shell_id, self.pid, exitcode)
      except OSError:
        msg = "%s - shell_id:%s pid:%d - Killed successfully" % (self.username, self.shell_id, self.pid,)
        # This means the subprocess was already killed, which happens if the command was "quit"
        # This can also happen if the waitpid call results in an error, which we don't care about.
      LOG.info(msg)
      SHELL_OUTPUT_LOGGER.info(msg)
      SHELL_INPUT_LOGGER.info(msg)
    finally:
      self.destroyed = True
Example #4
0
    def runloop(self):
        while self.keep_going:
            eventlet.sleep(0.1)
            ## Only start the number of children we need
            number = self.num_processes - self.children_count()
            if number > 0:
                self.log.debug('Should start %d new children', number)
                self.spawn_children(number=number)
                continue

            if not self.children:
                ## If we don't yet have children, let's loop
                continue

            pid, result = None, None
            try:
                pid, result = os.wait()
            except OSError, e:
                if e.errno != errno.EINTR:
                    raise

            if pid and self.children.get(pid):
                try:
                    child = self.children.pop(pid)
                    os.close(child.kill_pipe)
                except (IOError, OSError):
                    pass

            if result:
                signum = os.WTERMSIG(result)
                exitcode = os.WEXITSTATUS(result)
                self.log.info('(%s) Child died from signal %s with code %s',
                              pid, signum, exitcode)
Example #5
0
  def _get_delegation_tokens(self, username, delegation_token_dir):
    """
    If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for
    the user we want to run the subprocess as. We have to do it here rather than in the subprocess
    because the subprocess does not have Kerberos credentials in that case.
    """
    delegation_token_files = []
    all_clusters = []
    all_clusters += all_mrclusters().values()
    all_clusters += get_all_hdfs().values()

    LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),))

    for cluster in all_clusters:
      if cluster.security_enabled:
        current_user = cluster.user
        try:
          cluster.setuser(username)
          token = cluster.get_delegation_token(KERBEROS.HUE_PRINCIPAL.get())
          token_file_no, path = tempfile.mkstemp(dir=delegation_token_dir)
          os.write(token_file_no, token)
          os.close(token_file_no)
          delegation_token_files.append(path)
        finally:
          cluster.setuser(current_user)

    return delegation_token_files
Example #6
0
  def destroy(self):
    """
    Clean up the resources used for this shell.
    """
    for delegation_token_file in self._delegation_token_files:
      try:
        os.unlink(delegation_token_file)
      except:
        LOG.warning("Could not remove delegation token file %s" % delegation_token_file)

    try:
      self._delegation_token_files = None
      self._write_buffer.close()
      self._read_buffer.close()

      os.close(self._fd)
      os.close(self._child_fd)

      try:
        LOG.debug("Sending SIGKILL to process with PID %d" % (self.pid,))
        os.kill(self.pid, signal.SIGKILL)
        _, exitcode = os.waitpid(self.pid, 0)
        msg = "%s - shell_id:%s pid:%d - Exited with status %d" % (self.username, self.shell_id, self.pid, exitcode)
      except OSError:
        msg = "%s - shell_id:%s pid:%d - Killed successfully" % (self.username, self.shell_id, self.pid,)
        # This means the subprocess was already killed, which happens if the command was "quit"
        # This can also happen if the waitpid call results in an error, which we don't care about.
      LOG.info(msg)
      SHELL_OUTPUT_LOGGER.info(msg)
      SHELL_INPUT_LOGGER.info(msg)
    finally:
      self.destroyed = True
Example #7
0
    def spawn_children(self, number=1):
        parent_pid = os.getpid()
        self.log.debug('Controller.spawn_children(number=%d)' % number)

        for i in range(number):
            child_side, parent_side = os.pipe()
            try:
                child_pid = os.fork()
            except:
                print_exc('Could not fork child! Panic!')
                ### TODO: restart

            if not child_pid:  # child process
                os.close(parent_side)
                command = [
                    sys.executable, '-c',
                    'import sys; from spawning import spawning_child; spawning_child.main()',
                    str(parent_pid),
                    str(self.sock.fileno()),
                    str(child_side), self.factory,
                    json.dumps(self.args)
                ]
                if self.args['reload'] == 'dev':
                    command.append('--reload')
                env = environ()
                tpool_size = int(self.config.get('threadpool_workers', 0))
                assert tpool_size >= 0, (
                    tpool_size, 'Cannot have a negative --threads argument')
                if not tpool_size in (0, 1):
                    env['EVENTLET_THREADPOOL_SIZE'] = str(tpool_size)
                os.execve(sys.executable, command, env)

            # controller process
            os.close(child_side)
            self.children[child_pid] = Child(child_pid, parent_side)
Example #8
0
    def runloop(self):
        while self.keep_going:
            eventlet.sleep(0.1)
            ## Only start the number of children we need
            number = self.num_processes - self.children_count()
            if number > 0:
                self.log.debug('Should start %d new children', number)
                self.spawn_children(number=number)
                continue

            if not self.children:
                ## If we don't yet have children, let's loop
                continue

            pid, result = None, None
            try:
                pid, result = os.wait()
            except OSError, e:
                if e.errno != errno.EINTR:
                    raise

            if pid and self.children.get(pid):
                try:
                    child = self.children.pop(pid)
                    os.close(child.kill_pipe)
                except (IOError, OSError):
                    pass

            if result:
                signum = os.WTERMSIG(result)
                exitcode = os.WEXITSTATUS(result)
                self.log.info('(%s) Child died from signal %s with code %s',
                              pid, signum, exitcode)
Example #9
0
  def __init__(self, shell_command, subprocess_env, shell_id, username, delegation_token_dir):
    try:
      user_info = pwd.getpwnam(username)
    except KeyError:
      LOG.error("Unix user account didn't exist at subprocess creation. Was it deleted?")
      raise

    parent, child = pty.openpty()

    try:
      tty.setraw(parent)
    except tty.error:
      LOG.debug("Could not set parent fd to raw mode, user will see duplicated input.")

    subprocess_env[constants.HOME] = user_info.pw_dir
    command_to_use = [_SETUID_PROG, str(user_info.pw_uid), str(user_info.pw_gid)]
    command_to_use.extend(shell_command)

    delegation_token_files = self._get_delegation_tokens(username, delegation_token_dir)
    if delegation_token_files:
      merged_token_file = self._merge_delegation_tokens(delegation_token_files, delegation_token_dir)
      delegation_token_files = [merged_token_file]
      subprocess_env[constants.HADOOP_TOKEN_FILE_LOCATION] = merged_token_file.name

    try:
      LOG.debug("Starting subprocess with command '%s' and environment '%s'" %
                                                             (command_to_use, subprocess_env,))
      p = subprocess.Popen(command_to_use, stdin=child, stdout=child, stderr=child,
                                                                 env=subprocess_env, close_fds=True)
    except (OSError, ValueError):
      os.close(parent)
      os.close(child)
      raise

    msg_format =  "%s - shell_id:%s pid:%d - args:%s"
    msg_args = (username, shell_id, p.pid, ' '.join(command_to_use))
    msg = msg_format % msg_args
    SHELL_OUTPUT_LOGGER.info(msg)
    SHELL_INPUT_LOGGER.info(msg)

    # State that shouldn't be touched by any other classes.
    self._output_buffer_length = 0
    self._commands = []
    self._fd = parent
    self._child_fd = child
    self.subprocess = p
    self.pid = p.pid
    self._write_buffer = cStringIO.StringIO()
    self._read_buffer = cStringIO.StringIO()
    self._delegation_token_files = delegation_token_files

    # State that's accessed by other classes.
    self.shell_id = shell_id
    self.username = username
    # Timestamp that is updated on shell creation and on every output request. Used so that we know
    # when to kill the shell.
    self.time_received = time.time()
    self.last_output_sent = False
    self.remove_at_next_iteration = False
    self.destroyed = False
Example #10
0
  def _get_delegation_tokens(self, username, delegation_token_dir):
    """
    If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for
    the user we want to run the subprocess as. We have to do it here rather than in the subprocess
    because the subprocess does not have Kerberos credentials in that case.
    """
    delegation_token_files = []
    all_clusters = []
    all_clusters += all_mrclusters().values()
    all_clusters += get_all_hdfs().values()

    LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),))

    for cluster in all_clusters:
      if cluster.security_enabled:
        current_user = cluster.user
        try:
          cluster.setuser(username)
          token = cluster.get_delegation_token(KERBEROS.HUE_PRINCIPAL.get())
          token_file_no, path = tempfile.mkstemp(dir=delegation_token_dir)
          os.write(token_file_no, token)
          os.close(token_file_no)
          delegation_token_files.append(path)
        finally:
          cluster.setuser(current_user)

    return delegation_token_files
Example #11
0
  def __init__(self, shell_command, shell_id, username, delegation_token_dir):
    subprocess_env = {}
    env = desktop.lib.i18n.make_utf8_env()
    for item in constants.PRESERVED_ENVIRONMENT_VARIABLES:
      value = env.get(item)
      if value:
        subprocess_env[item] = value

    try:
      user_info = pwd.getpwnam(username)
    except KeyError:
      LOG.error("Unix user account didn't exist at subprocess creation. Was it deleted?")
      raise

    parent, child = pty.openpty()

    try:
      tty.setraw(parent)
    except tty.error:
      LOG.debug("Could not set parent fd to raw mode, user will see echoed input.")

    subprocess_env[constants.HOME] = user_info.pw_dir
    command_to_use = [_SETUID_PROG, str(user_info.pw_uid), str(user_info.pw_gid)]
    command_to_use.extend(shell_command)

    delegation_token_files = self._get_delegation_tokens(username, delegation_token_dir)
    if delegation_token_files:
      delegation_token_files = [token_file.name for token_file in delegation_token_files]
      subprocess_env[constants.HADOOP_TOKEN_FILE_LOCATION] = ','.join(delegation_token_files)

    try:
      LOG.debug("Starting subprocess with command '%s' and environment '%s'" %
                                                             (command_to_use, subprocess_env,))
      p = subprocess.Popen(command_to_use, stdin=child, stdout=child, stderr=child,
                                                                 env=subprocess_env, close_fds=True)
    except (OSError, ValueError):
      os.close(parent)
      os.close(child)
      raise

    # State that shouldn't be touched by any other classes.
    self._output_buffer_length = 0
    self._commands = []
    self._fd = parent
    self._child_fd = child
    self.subprocess = p
    self.pid = p.pid
    self._write_buffer = cStringIO.StringIO()
    self._read_buffer = cStringIO.StringIO()
    self._delegation_token_files = delegation_token_files

    # State that's accessed by other classes.
    self.shell_id = shell_id
    # Timestamp that is updated on shell creation and on every output request. Used so that we know
    # when to kill the shell.
    self.time_received = time.time()
    self.last_output_sent = False
    self.remove_at_next_iteration = False
    self.destroyed = False
Example #12
0
 def _merge_delegation_tokens(self, delegation_token_files, delegation_token_dir):
   """
   Use the Credentials Merger utility to combine the delegation token files into one delegation token file.
   Returns the NamedTemporaryFile that contains the combined delegation tokens.
   """
   merged_token_file_no, merged_token_file_path = tempfile.mkstemp(dir=delegation_token_dir)
   os.close(merged_token_file_no)
   merge_tool_args = [hadoop.conf.HDFS_CLUSTERS['default'].HADOOP_BIN.get(), 'jar']
   merge_tool_args += [hadoop.conf.CREDENTIALS_MERGER_JAR.get(), merged_token_file_path]
   merge_tool_args += delegation_token_files
   LOG.debug("Merging credentials files with command: '%s'" % (' '.join(merge_tool_args)))
   merge_process = subprocess.Popen(merge_tool_args, stderr=subprocess.PIPE, shell=False, close_fds=True)
   retcode = merge_process.wait()
   if retcode != 0:
     LOG.error("Failed to merge credentials :'%s'..." % (merge_process.stderr.readline(),))
     raise MergeToolException(_("bin/hadoop return non-zero %(retcode)d while trying to merge credentials.") % dict(retcode=(retcode,)))
   return merged_token_file_path
Example #13
0
    def runloop(self):
        self.log.debug('runloop')
        while self.keep_going:
            #self.log.debug('about to sleep????')
            eventlet.sleep(0.1)
            ## Only start the number of children we need
            number = self.num_processes - self.children_count()
            if number > 0:
                self.log.debug('Should start %d new children', number)
                self.spawn_children(number=number)
                continue

            if not self.children:
                ## If we don't yet have children, let's loop
                continue

            for child in self.children.values():
                if child.active:
                    continue

                result = child.proc.poll()
                if result is not None:
                    del self.children[child.pid]

                    try:
                        os.close(child.kill_pipe)
                        os.close(child.notify_pipe)
                    except (IOError, OSError):
                        pass

                    if sys.platform != 'win32':
                        signum = os.WTERMSIG(result)
                        exitcode = os.WEXITSTATUS(result)
                        self.log.info('(%s) Child died from signal %s with code %s',
                                      pid, signum, exitcode)
                    else:
                        self.log.info('(%s) Child died with code %s',
                                      pid, result)
Example #14
0
  def destroy(self):
    """
    Clean up the resources used for this shell.
    """
    try:
      for delegation_token_file in self._delegation_token_files:
        delegation_token_file.close()
      self._delegation_token_files = None
      self._write_buffer.close()
      self._read_buffer.close()

      os.close(self._fd)
      os.close(self._child_fd)

      try:
        LOG.debug("Sending SIGKILL to process with PID %d" % (self.subprocess.pid,))
        os.kill(self.subprocess.pid, signal.SIGKILL)
        # We could try figure out which exit statuses are fine and which ones are errors.
        # But that would be difficult to do correctly since os.wait might block.
      except OSError:
        pass # This means the subprocess was already killed, which happens if the command was "quit"
    finally:
      self.destroyed = True
Example #15
0
  def __init__(self, shell_command, subprocess_env, shell_id, username, delegation_token_dir):
    try:
      user_info = pwd.getpwnam(username)
    except KeyError:
      LOG.error("Unix user account didn't exist at subprocess creation. Was it deleted?")
      raise

    parent, child = pty.openpty()

    try:
      tty.setraw(parent)
    except tty.error:
      LOG.debug("Could not set parent fd to raw mode, user will see duplicated input.")

    subprocess_env[constants.HOME] = user_info.pw_dir
    command_to_use = [_SETUID_PROG, str(user_info.pw_uid), str(user_info.pw_gid)]
    command_to_use.extend(shell_command)

    delegation_token_files = self._get_delegation_tokens(username, delegation_token_dir)
    if delegation_token_files:
      merged_token_file_path = self._merge_delegation_tokens(delegation_token_files, delegation_token_dir)
      for path in delegation_token_files:
        try:
          os.unlink(path)
        except:
          LOG.warning("Could not remove delegation token file %s" % path)
      delegation_token_files = [merged_token_file_path]
      subprocess_env[constants.HADOOP_TOKEN_FILE_LOCATION] = merged_token_file_path

    try:
      LOG.debug("Starting subprocess with command '%s' and environment '%s'" %
                                                             (command_to_use, subprocess_env,))
      p = subprocess.Popen(command_to_use, stdin=child, stdout=child, stderr=child,
                                                                 env=subprocess_env, close_fds=True)
    except (OSError, ValueError):
      os.close(parent)
      os.close(child)
      raise

    msg_format =  "%s - shell_id:%s pid:%d - args:%s"
    msg_args = (username, shell_id, p.pid, ' '.join(command_to_use))
    msg = msg_format % msg_args
    SHELL_OUTPUT_LOGGER.info(msg)
    SHELL_INPUT_LOGGER.info(msg)

    # State that shouldn't be touched by any other classes.
    self._output_buffer_length = 0
    self._commands = []
    self._fd = parent
    self._child_fd = child
    self.subprocess = p
    self.pid = p.pid
    self._write_buffer = cStringIO.StringIO()
    self._read_buffer = cStringIO.StringIO()
    self._delegation_token_files = delegation_token_files

    # State that's accessed by other classes.
    self.shell_id = shell_id
    self.username = username
    # Timestamp that is updated on shell creation and on every output request. Used so that we know
    # when to kill the shell.
    self.time_received = time.time()
    self.last_output_sent = False
    self.remove_at_next_iteration = False
    self.destroyed = False
Example #16
0
 def copy_fd(fd):
     rhandle = duplicate(msvcrt.get_osfhandle(fd), inheritable=True)
     os.close(fd)
     return rhandle
Example #17
0
        restart_args = json.loads(options.restart_args)
        factory = restart_args['factory']
        factory_args = restart_args['factory_args']

        start_delay = restart_args.get('start_delay')
        if start_delay is not None:
            factory_args['start_delay'] = start_delay
            print "(%s) delaying startup by %s" % (os.getpid(), start_delay)
            time.sleep(start_delay)

        fd = restart_args.get('fd')
        if fd is not None:
            sock = socket.fromfd(restart_args['fd'], socket.AF_INET, socket.SOCK_STREAM)
            ## socket.fromfd doesn't result in a socket object that has the same fd.
            ## The old fd is still open however, so we close it so we don't leak.
            os.close(restart_args['fd'])
        return start_controller(sock, factory, factory_args)

    ## We're starting up for the first time.
    if sys.platform != 'win32' and getattr(options,'daemonize'):
        # Do the daemon dance. Note that this isn't what is considered good
        # daemonization, because frankly it's convenient to keep the file
        # descriptiors open (especially when there are prints scattered all
        # over the codebase.)
        # What we do instead is fork off, create a new session, fork again.
        # This leaves the process group in a state without a session
        # leader.
        pid = os.fork()
        if not pid:
            os.setsid()
            pid = os.fork()
Example #18
0
        factory = restart_args['factory']
        factory_args = restart_args['factory_args']

        start_delay = restart_args.get('start_delay')
        if start_delay is not None:
            factory_args['start_delay'] = start_delay
            print "(%s) delaying startup by %s" % (os.getpid(), start_delay)
            time.sleep(start_delay)

        fd = restart_args.get('fd')
        if fd is not None:
            sock = socket.fromfd(restart_args['fd'], socket.AF_INET,
                                 socket.SOCK_STREAM)
            ## socket.fromfd doesn't result in a socket object that has the same fd.
            ## The old fd is still open however, so we close it so we don't leak.
            os.close(restart_args['fd'])
        return start_controller(sock, factory, factory_args)

    ## We're starting up for the first time.
    if options.daemonize:
        # Do the daemon dance. Note that this isn't what is considered good
        # daemonization, because frankly it's convenient to keep the file
        # descriptiors open (especially when there are prints scattered all
        # over the codebase.)
        # What we do instead is fork off, create a new session, fork again.
        # This leaves the process group in a state without a session
        # leader.
        pid = os.fork()
        if not pid:
            os.setsid()
            pid = os.fork()
Example #19
0
 def close_tail(self, tail):
     self.watch_manager.rm_watch(tail.watch_descriptor)
     os.close(tail.file_descriptor)
     if tail.buffer:
         LOG.debug("Generating message from tail buffer")
         self.handler({'message': tail.buffer})
Example #20
0
 def close_tail(self, tail):
     self.watch_manager.rm_watch(tail.watch_descriptor)
     os.close(tail.file_descriptor)
     if tail.buffer:
         LOG.debug("Generating message from tail buffer")
         self.handler({'message': tail.buffer})