def test_log_sanitize_correctness(self):
        url_data = repr(self._gen_data(3, True, True, self.URL_SECRET))
        ssh_data = repr(self._gen_data(3, True, True, self.SSH_SECRET))

        url_output = heuristic_log_sanitize(url_data)
        ssh_output = heuristic_log_sanitize(ssh_data)

        # Basic functionality: Successfully hid the password
        try:
            self.assertNotIn('pas:word', url_output)
            self.assertNotIn('pas:word', ssh_output)

            # Slightly more advanced, we hid all of the password despite the ":"
            self.assertNotIn('pas', url_output)
            self.assertNotIn('pas', ssh_output)
        except AttributeError:
            # python2.6 or less's unittest
            self.assertFalse(
                'pas:word' in url_output,
                '%s is present in %s' % ('"pas:word"', url_output))
            self.assertFalse(
                'pas:word' in ssh_output,
                '%s is present in %s' % ('"pas:word"', ssh_output))

            self.assertFalse('pas' in url_output,
                             '%s is present in %s' % ('"pas"', url_output))
            self.assertFalse('pas' in ssh_output,
                             '%s is present in %s' % ('"pas"', ssh_output))

        # In this implementation we replace the password with 8 "*" which is
        # also the length of our password.  The url fields should be able to
        # accurately detect where the password ends so the length should be
        # the same:
        self.assertEqual(len(url_output), len(url_data))

        # ssh checking is harder as the heuristic is overzealous in many
        # cases.  Since the input will have at least one ":" present before
        # the password we can tell some things about the beginning and end of
        # the data, though:
        self.assertTrue(ssh_output.startswith("{'"))
        self.assertTrue(ssh_output.endswith("'}}}}"))
        try:
            self.assertIn(":********@foo.com/data',", ssh_output)
        except AttributeError:
            # python2.6 or less's unittest
            self.assertTrue(
                ":********@foo.com/data'," in ssh_output,
                '%s is not present in %s' %
                (":********@foo.com/data',", ssh_output))
Example #2
0
 def fail_json(self, module):
     msg = heuristic_log_sanitize(
         self.stderr.rstrip(), module.no_log_values
     )
     module.fail_json(
         git_cmd=module._clean_args(self.cmd),
         rc=self.rc,
         stdout=self.stdout,
         stderr=self.stderr,
         msg=msg
     )
Example #3
0
    def test_log_sanitize_correctness(self):
        url_data = repr(self._gen_data(3, True, True, self.URL_SECRET))
        ssh_data = repr(self._gen_data(3, True, True, self.SSH_SECRET))

        url_output = heuristic_log_sanitize(url_data)
        ssh_output = heuristic_log_sanitize(ssh_data)

        # Basic functionality: Successfully hid the password
        try:
            self.assertNotIn('pas:word', url_output)
            self.assertNotIn('pas:word', ssh_output)

            # Slightly more advanced, we hid all of the password despite the ":"
            self.assertNotIn('pas', url_output)
            self.assertNotIn('pas', ssh_output)
        except AttributeError:
            # python2.6 or less's unittest
            self.assertFalse('pas:word' in url_output, '%s is present in %s' % ('"pas:word"', url_output))
            self.assertFalse('pas:word' in ssh_output, '%s is present in %s' % ('"pas:word"', ssh_output))

            self.assertFalse('pas' in url_output, '%s is present in %s' % ('"pas"', url_output))
            self.assertFalse('pas' in ssh_output, '%s is present in %s' % ('"pas"', ssh_output))

        # In this implementation we replace the password with 8 "*" which is
        # also the length of our password.  The url fields should be able to
        # accurately detect where the password ends so the length should be
        # the same:
        self.assertEqual(len(url_output), len(url_data))

        # ssh checking is harder as the heuristic is overzealous in many
        # cases.  Since the input will have at least one ":" present before
        # the password we can tell some things about the beginning and end of
        # the data, though:
        self.assertTrue(ssh_output.startswith("{'"))
        self.assertTrue(ssh_output.endswith("'}}}}"))
        try:
            self.assertIn(":********@foo.com/data',", ssh_output)
        except AttributeError:
            # python2.6 or less's unittest
            self.assertTrue(":********@foo.com/data'," in ssh_output, '%s is not present in %s' % (":********@foo.com/data',", ssh_output))
    def test_hides_url_secrets(self):
        url_output = heuristic_log_sanitize(self.url_data)
        # Basic functionality: Successfully hid the password
        self.assertNotIn("pas:word", url_output)

        # Slightly more advanced, we hid all of the password despite the ":"
        self.assertNotIn("pas", url_output)

        # In this implementation we replace the password with 8 "*" which is
        # also the length of our password.  The url fields should be able to
        # accurately detect where the password ends so the length should be
        # the same:
        self.assertEqual(len(url_output), len(self.url_data))
    def test_hides_url_secrets(self):
        url_output = heuristic_log_sanitize(self.url_data)
        # Basic functionality: Successfully hid the password
        self.assertNotIn('pas:word', url_output)

        # Slightly more advanced, we hid all of the password despite the ":"
        self.assertNotIn('pas', url_output)

        # In this implementation we replace the password with 8 "*" which is
        # also the length of our password.  The url fields should be able to
        # accurately detect where the password ends so the length should be
        # the same:
        self.assertEqual(len(url_output), len(self.url_data))
    def test_hides_ssh_secrets(self):
        ssh_output = heuristic_log_sanitize(self.ssh_data)
        self.assertNotIn('pas:word', ssh_output)

        # Slightly more advanced, we hid all of the password despite the ":"
        self.assertNotIn('pas', ssh_output)

        # ssh checking is harder as the heuristic is overzealous in many
        # cases.  Since the input will have at least one ":" present before
        # the password we can tell some things about the beginning and end of
        # the data, though:
        self.assertTrue(ssh_output.startswith("{'"))
        self.assertTrue(ssh_output.endswith("}"))
        self.assertIn(":********@foo.com/data'", ssh_output)
    def test_hides_ssh_secrets(self):
        ssh_output = heuristic_log_sanitize(self.ssh_data)
        self.assertNotIn("pas:word", ssh_output)

        # Slightly more advanced, we hid all of the password despite the ":"
        self.assertNotIn("pas", ssh_output)

        # ssh checking is harder as the heuristic is overzealous in many
        # cases.  Since the input will have at least one ":" present before
        # the password we can tell some things about the beginning and end of
        # the data, though:
        self.assertTrue(ssh_output.startswith("{'"))
        self.assertTrue(ssh_output.endswith("}"))
        self.assertIn(":********@foo.com/data'", ssh_output)
 def test_log_sanitize_speed_many_ssh(self):
     heuristic_log_sanitize(self.many_ssh)
Example #9
0
 def test_log_sanitize_speed_zero_secrets(self):
     heuristic_log_sanitize(self.zero_secrets)
Example #10
0
 def test_log_sanitize_speed_one_ssh(self):
     heuristic_log_sanitize(self.one_ssh)
Example #11
0
 def test_log_sanitize_speed_many_ssh(self):
     heuristic_log_sanitize(self.many_ssh)
Example #12
0
    def test_module_utils_basic_heuristic_log_sanitize(self):
        from ansible.module_utils.basic import heuristic_log_sanitize

        URL_SECRET = 'http://*****:*****@foo.com/data'
        SSH_SECRET = 'username:pas:[email protected]/data'

        def _gen_data(records, per_rec, top_level, secret_text):
            hostvars = {'hostvars': {}}
            for i in range(1, records, 1):
                host_facts = {'host%s' % i:
                                {'pstack':
                                    {'running': '875.1',
                                     'symlinked': '880.0',
                                     'tars': [],
                                     'versions': ['885.0']},
                             }}
                if per_rec:
                    host_facts['host%s' % i]['secret'] = secret_text
                hostvars['hostvars'].update(host_facts)
            if top_level:
                hostvars['secret'] = secret_text
            return hostvars

        url_data = repr(_gen_data(3, True, True, URL_SECRET))
        ssh_data = repr(_gen_data(3, True, True, SSH_SECRET))

        url_output = heuristic_log_sanitize(url_data)
        ssh_output = heuristic_log_sanitize(ssh_data)

        # Basic functionality: Successfully hid the password
        try:
            self.assertNotIn('pas:word', url_output)
            self.assertNotIn('pas:word', ssh_output)

            # Slightly more advanced, we hid all of the password despite the ":"
            self.assertNotIn('pas', url_output)
            self.assertNotIn('pas', ssh_output)
        except AttributeError:
            # python2.6 or less's unittest
            self.assertFalse('pas:word' in url_output, '%s is present in %s' % ('"pas:word"', url_output))
            self.assertFalse('pas:word' in ssh_output, '%s is present in %s' % ('"pas:word"', ssh_output))

            self.assertFalse('pas' in url_output, '%s is present in %s' % ('"pas"', url_output))
            self.assertFalse('pas' in ssh_output, '%s is present in %s' % ('"pas"', ssh_output))

        # In this implementation we replace the password with 8 "*" which is
        # also the length of our password.  The url fields should be able to
        # accurately detect where the password ends so the length should be
        # the same:
        self.assertEqual(len(url_output), len(url_data))

        # ssh checking is harder as the heuristic is overzealous in many
        # cases.  Since the input will have at least one ":" present before
        # the password we can tell some things about the beginning and end of
        # the data, though:
        self.assertTrue(ssh_output.startswith("{'"))
        self.assertTrue(ssh_output.endswith("}"))
        try:
            self.assertIn(":********@foo.com/data'", ssh_output)
        except AttributeError:
            # python2.6 or less's unittest
            self.assertTrue(":********@foo.com/data'" in ssh_output, '%s is not present in %s' % (":********@foo.com/data'", ssh_output))
Example #13
0
def zuul_run_command(self, args, check_rc=False, close_fds=True, executable=None, data=None, binary_data=False, path_prefix=None, cwd=None, use_unsafe_shell=False, prompt_regex=None, environ_update=None):
    '''
    Execute a command, returns rc, stdout, and stderr.

    :arg args: is the command to run
        * If args is a list, the command will be run with shell=False.
        * If args is a string and use_unsafe_shell=False it will split args to a list and run with shell=False
        * If args is a string and use_unsafe_shell=True it runs with shell=True.
    :kw check_rc: Whether to call fail_json in case of non zero RC.
        Default False
    :kw close_fds: See documentation for subprocess.Popen(). Default True
    :kw executable: See documentation for subprocess.Popen(). Default None
    :kw data: If given, information to write to the stdin of the command
    :kw binary_data: If False, append a newline to the data.  Default False
    :kw path_prefix: If given, additional path to find the command in.
        This adds to the PATH environment vairable so helper commands in
        the same directory can also be found
    :kw cwd: If given, working directory to run the command inside
    :kw use_unsafe_shell: See `args` parameter.  Default False
    :kw prompt_regex: Regex string (not a compiled regex) which can be
        used to detect prompts in the stdout which would otherwise cause
        the execution to hang (especially if no input data is specified)
    :kwarg environ_update: dictionary to *update* os.environ with
    '''

    shell = False
    if isinstance(args, list):
        if use_unsafe_shell:
            args = " ".join([pipes.quote(x) for x in args])
            shell = True
    elif isinstance(args, (str, unicode)) and use_unsafe_shell:
        shell = True
    elif isinstance(args, (str, unicode)):
        # On python2.6 and below, shlex has problems with text type
        # ZUUL: Hardcode python2 until we're on ansible 2.2
        if isinstance(args, unicode):
            args = args.encode('utf-8')
        args = shlex.split(args)
    else:
        msg = "Argument 'args' to run_command must be list or string"
        self.fail_json(rc=257, cmd=args, msg=msg)

    prompt_re = None
    if prompt_regex:
        try:
            prompt_re = re.compile(prompt_regex, re.MULTILINE)
        except re.error:
            self.fail_json(msg="invalid prompt regular expression given to run_command")

    # expand things like $HOME and ~
    if not shell:
        args = [ os.path.expanduser(os.path.expandvars(x)) for x in args if x is not None ]

    rc = 0
    msg = None
    st_in = None

    # Manipulate the environ we'll send to the new process
    old_env_vals = {}
    # We can set this from both an attribute and per call
    for key, val in self.run_command_environ_update.items():
        old_env_vals[key] = os.environ.get(key, None)
        os.environ[key] = val
    if environ_update:
        for key, val in environ_update.items():
            old_env_vals[key] = os.environ.get(key, None)
            os.environ[key] = val
    if path_prefix:
        old_env_vals['PATH'] = os.environ['PATH']
        os.environ['PATH'] = "%s:%s" % (path_prefix, os.environ['PATH'])

    # If using test-module and explode, the remote lib path will resemble ...
    #   /tmp/test_module_scratch/debug_dir/ansible/module_utils/basic.py
    # If using ansible or ansible-playbook with a remote system ...
    #   /tmp/ansible_vmweLQ/ansible_modlib.zip/ansible/module_utils/basic.py

    # Clean out python paths set by ansiballz
    if 'PYTHONPATH' in os.environ:
        pypaths = os.environ['PYTHONPATH'].split(':')
        pypaths = [x for x in pypaths \
                    if not x.endswith('/ansible_modlib.zip') \
                    and not x.endswith('/debug_dir')]
        os.environ['PYTHONPATH'] = ':'.join(pypaths)
        if not os.environ['PYTHONPATH']:
            del os.environ['PYTHONPATH']

    # create a printable version of the command for use
    # in reporting later, which strips out things like
    # passwords from the args list
    to_clean_args = args
    # ZUUL: Hardcode python2 until we're on ansible 2.2
    if isinstance(args, (unicode, str)):
        to_clean_args = shlex.split(to_clean_args)

    clean_args = []
    is_passwd = False
    for arg in to_clean_args:
        if is_passwd:
            is_passwd = False
            clean_args.append('********')
            continue
        if PASSWD_ARG_RE.match(arg):
            sep_idx = arg.find('=')
            if sep_idx > -1:
                clean_args.append('%s=********' % arg[:sep_idx])
                continue
            else:
                is_passwd = True
        arg = heuristic_log_sanitize(arg, self.no_log_values)
        clean_args.append(arg)
    clean_args = ' '.join(pipes.quote(arg) for arg in clean_args)

    if data:
        st_in = subprocess.PIPE

    # ZUUL: changed stderr to follow stdout
    kwargs = dict(
        executable=executable,
        shell=shell,
        close_fds=close_fds,
        stdin=st_in,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )

    if cwd and os.path.isdir(cwd):
        kwargs['cwd'] = cwd

    # store the pwd
    prev_dir = os.getcwd()

    # make sure we're in the right working directory
    if cwd and os.path.isdir(cwd):
        try:
            os.chdir(cwd)
        except (OSError, IOError):
            e = get_exception()
            self.fail_json(rc=e.errno, msg="Could not open %s, %s" % (cwd, str(e)))

    try:

        if self._debug:
            if isinstance(args, list):
                running = ' '.join(args)
            else:
                running = args
            self.log('Executing: ' + running)
        # ZUUL: Replaced the excution loop with the zuul_runner run function
        cmd = subprocess.Popen(args, **kwargs)
        t = threading.Thread(target=follow, args=(cmd.stdout,))
        t.daemon = True
        t.start()
        ret = cmd.wait()
        # Give the thread that is writing the console log up to 10 seconds
        # to catch up and exit.  If it hasn't done so by then, it is very
        # likely stuck in readline() because it spawed a child that is
        # holding stdout or stderr open.
        t.join(10)
        with Console() as console:
            if t.isAlive():
                console.addLine("[Zuul] standard output/error still open "
                                "after child exited")
            console.addLine("[Zuul] Task exit code: %s\n" % ret)

        # ZUUL: If the console log follow thread *is* stuck in readline,
        # we can't close stdout (attempting to do so raises an
        # exception) , so this is disabled.
        # cmd.stdout.close()

        # ZUUL: stdout and stderr are in the console log file
        stdout = ''
        stderr = ''

        rc = cmd.returncode
    except (OSError, IOError):
        e = get_exception()
        self.fail_json(rc=e.errno, msg=str(e), cmd=clean_args)
    except Exception:
        e = get_exception()
        self.fail_json(rc=257, msg=str(e), exception=traceback.format_exc(), cmd=clean_args)

    # Restore env settings
    for key, val in old_env_vals.items():
        if val is None:
            del os.environ[key]
        else:
            os.environ[key] = val

    if rc != 0 and check_rc:
        msg = heuristic_log_sanitize(stderr.rstrip(), self.no_log_values)
        self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg)

    # reset the pwd
    os.chdir(prev_dir)

    return (rc, stdout, stderr)
Example #14
0
    def test_module_utils_basic_heuristic_log_sanitize(self):
        from ansible.module_utils.basic import heuristic_log_sanitize

        URL_SECRET = 'http://*****:*****@foo.com/data'
        SSH_SECRET = 'username:pas:[email protected]/data'

        def _gen_data(records, per_rec, top_level, secret_text):
            hostvars = {'hostvars': {}}
            for i in range(1, records, 1):
                host_facts = {'host%s' % i:
                                {'pstack':
                                    {'running': '875.1',
                                     'symlinked': '880.0',
                                     'tars': [],
                                     'versions': ['885.0']},
                             }}
                if per_rec:
                    host_facts['host%s' % i]['secret'] = secret_text
                hostvars['hostvars'].update(host_facts)
            if top_level:
                hostvars['secret'] = secret_text
            return hostvars

        url_data = repr(_gen_data(3, True, True, URL_SECRET))
        ssh_data = repr(_gen_data(3, True, True, SSH_SECRET))

        url_output = heuristic_log_sanitize(url_data)
        ssh_output = heuristic_log_sanitize(ssh_data)

        # Basic functionality: Successfully hid the password
        try:
            self.assertNotIn('pas:word', url_output)
            self.assertNotIn('pas:word', ssh_output)

            # Slightly more advanced, we hid all of the password despite the ":"
            self.assertNotIn('pas', url_output)
            self.assertNotIn('pas', ssh_output)
        except AttributeError:
            # python2.6 or less's unittest
            self.assertFalse('pas:word' in url_output, '%s is present in %s' % ('"pas:word"', url_output))
            self.assertFalse('pas:word' in ssh_output, '%s is present in %s' % ('"pas:word"', ssh_output))

            self.assertFalse('pas' in url_output, '%s is present in %s' % ('"pas"', url_output))
            self.assertFalse('pas' in ssh_output, '%s is present in %s' % ('"pas"', ssh_output))

        # In this implementation we replace the password with 8 "*" which is
        # also the length of our password.  The url fields should be able to
        # accurately detect where the password ends so the length should be
        # the same:
        self.assertEqual(len(url_output), len(url_data))

        # ssh checking is harder as the heuristic is overzealous in many
        # cases.  Since the input will have at least one ":" present before
        # the password we can tell some things about the beginning and end of
        # the data, though:
        self.assertTrue(ssh_output.startswith("{'"))
        self.assertTrue(ssh_output.endswith("}"))
        try:
            self.assertIn(":********@foo.com/data'", ssh_output)
        except AttributeError:
            # python2.6 or less's unittest
            self.assertTrue(":********@foo.com/data'" in ssh_output, '%s is not present in %s' % (":********@foo.com/data'", ssh_output))
Example #15
0
def zuul_run_command(self, args, zuul_log_id, check_rc=False, close_fds=True, executable=None, data=None, binary_data=False, path_prefix=None, cwd=None,
                     use_unsafe_shell=False, prompt_regex=None, environ_update=None, umask=None, encoding='utf-8', errors='surrogate_or_strict'):
    '''
    Execute a command, returns rc, stdout, and stderr.

    :arg args: is the command to run
        * If args is a list, the command will be run with shell=False.
        * If args is a string and use_unsafe_shell=False it will split args to a list and run with shell=False
        * If args is a string and use_unsafe_shell=True it runs with shell=True.
    :kw check_rc: Whether to call fail_json in case of non zero RC.
        Default False
    :kw close_fds: See documentation for subprocess.Popen(). Default True
    :kw executable: See documentation for subprocess.Popen(). Default None
    :kw data: If given, information to write to the stdin of the command
    :kw binary_data: If False, append a newline to the data.  Default False
    :kw path_prefix: If given, additional path to find the command in.
        This adds to the PATH environment vairable so helper commands in
        the same directory can also be found
    :kw cwd: If given, working directory to run the command inside
    :kw use_unsafe_shell: See `args` parameter.  Default False
    :kw prompt_regex: Regex string (not a compiled regex) which can be
        used to detect prompts in the stdout which would otherwise cause
        the execution to hang (especially if no input data is specified)
    :kw environ_update: dictionary to *update* os.environ with
    :kw umask: Umask to be used when running the command. Default None
    :kw encoding: Since we return native strings, on python3 we need to
        know the encoding to use to transform from bytes to text.  If you
        want to always get bytes back, use encoding=None.  The default is
        "utf-8".  This does not affect transformation of strings given as
        args.
    :kw errors: Since we return native strings, on python3 we need to
        transform stdout and stderr from bytes to text.  If the bytes are
        undecodable in the ``encoding`` specified, then use this error
        handler to deal with them.  The default is ``surrogate_or_strict``
        which means that the bytes will be decoded using the
        surrogateescape error handler if available (available on all
        python3 versions we support) otherwise a UnicodeError traceback
        will be raised.  This does not affect transformations of strings
        given as args.
    :returns: A 3-tuple of return code (integer), stdout (native string),
        and stderr (native string).  On python2, stdout and stderr are both
        byte strings.  On python3, stdout and stderr are text strings converted
        according to the encoding and errors parameters.  If you want byte
        strings on python3, use encoding=None to turn decoding to text off.
    '''

    if not isinstance(args, (list, binary_type, text_type)):
        msg = "Argument 'args' to run_command must be list or string"
        self.fail_json(rc=257, cmd=args, msg=msg)

    shell = False
    if use_unsafe_shell:

        # stringify args for unsafe/direct shell usage
        if isinstance(args, list):
            args = " ".join([shlex_quote(x) for x in args])

        # not set explicitly, check if set by controller
        if executable:
            args = [executable, '-c', args]
        elif self._shell not in (None, '/bin/sh'):
            args = [self._shell, '-c', args]
        else:
            shell = True
    else:
        # ensure args are a list
        if isinstance(args, (binary_type, text_type)):
            # On python2.6 and below, shlex has problems with text type
            # On python3, shlex needs a text type.
            if PY2:
                args = to_bytes(args, errors='surrogate_or_strict')
            elif PY3:
                args = to_text(args, errors='surrogateescape')
            args = shlex.split(args)

        # expand shellisms
        args = [os.path.expanduser(os.path.expandvars(x)) for x in args if x is not None]

    prompt_re = None
    if prompt_regex:
        if isinstance(prompt_regex, text_type):
            if PY3:
                prompt_regex = to_bytes(prompt_regex, errors='surrogateescape')
            elif PY2:
                prompt_regex = to_bytes(prompt_regex, errors='surrogate_or_strict')
        try:
            prompt_re = re.compile(prompt_regex, re.MULTILINE)
        except re.error:
            self.fail_json(msg="invalid prompt regular expression given to run_command")

    rc = 0
    msg = None
    st_in = None

    # Manipulate the environ we'll send to the new process
    old_env_vals = {}
    # We can set this from both an attribute and per call
    for key, val in self.run_command_environ_update.items():
        old_env_vals[key] = os.environ.get(key, None)
        os.environ[key] = val
    if environ_update:
        for key, val in environ_update.items():
            old_env_vals[key] = os.environ.get(key, None)
            os.environ[key] = val
    if path_prefix:
        old_env_vals['PATH'] = os.environ['PATH']
        os.environ['PATH'] = "%s:%s" % (path_prefix, os.environ['PATH'])

    # If using test-module and explode, the remote lib path will resemble ...
    #   /tmp/test_module_scratch/debug_dir/ansible/module_utils/basic.py
    # If using ansible or ansible-playbook with a remote system ...
    #   /tmp/ansible_vmweLQ/ansible_modlib.zip/ansible/module_utils/basic.py

    # Clean out python paths set by ansiballz
    if 'PYTHONPATH' in os.environ:
        pypaths = os.environ['PYTHONPATH'].split(':')
        pypaths = [x for x in pypaths
                   if not x.endswith('/ansible_modlib.zip') and
                   not x.endswith('/debug_dir')]
        os.environ['PYTHONPATH'] = ':'.join(pypaths)
        if not os.environ['PYTHONPATH']:
            del os.environ['PYTHONPATH']

    # create a printable version of the command for use
    # in reporting later, which strips out things like
    # passwords from the args list
    to_clean_args = args
    if PY2:
        if isinstance(args, text_type):
            to_clean_args = to_bytes(args)
    else:
        if isinstance(args, binary_type):
            to_clean_args = to_text(args)
    if isinstance(args, (text_type, binary_type)):
        to_clean_args = shlex.split(to_clean_args)

    clean_args = []
    is_passwd = False
    for arg in (to_native(a) for a in to_clean_args):
        if is_passwd:
            is_passwd = False
            clean_args.append('********')
            continue
        if PASSWD_ARG_RE.match(arg):
            sep_idx = arg.find('=')
            if sep_idx > -1:
                clean_args.append('%s=********' % arg[:sep_idx])
                continue
            else:
                is_passwd = True
        arg = heuristic_log_sanitize(arg, self.no_log_values)
        clean_args.append(arg)
    clean_args = ' '.join(shlex_quote(arg) for arg in clean_args)

    if data:
        st_in = subprocess.PIPE

    # ZUUL: changed stderr to follow stdout
    kwargs = dict(
        executable=executable,
        shell=shell,
        close_fds=close_fds,
        stdin=st_in,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )

    # store the pwd
    prev_dir = os.getcwd()

    # make sure we're in the right working directory
    if cwd and os.path.isdir(cwd):
        cwd = os.path.abspath(os.path.expanduser(cwd))
        kwargs['cwd'] = cwd
        try:
            os.chdir(cwd)
        except (OSError, IOError) as e:
            self.fail_json(rc=e.errno, msg="Could not open %s, %s" % (cwd, to_native(e)),
                           exception=traceback.format_exc())

    old_umask = None
    if umask:
        old_umask = os.umask(umask)

    t = None
    fail_json_kwargs = None

    try:
        if self._debug:
            self.log('Executing: ' + clean_args)

        # ZUUL: Replaced the execution loop with the zuul_runner run function

        cmd = subprocess.Popen(args, **kwargs)
        if self.no_log:
            t = None
        else:
            t = threading.Thread(target=follow, args=(cmd.stdout, zuul_log_id))
            t.daemon = True
            t.start()

        # ZUUL: Our log thread will catch the output so don't do that here.

        # # the communication logic here is essentially taken from that
        # # of the _communicate() function in ssh.py
        #
        # stdout = b('')
        # stderr = b('')
        #
        # # ZUUL: stderr follows stdout
        # rpipes = [cmd.stdout]
        #
        # if data:
        #     if not binary_data:
        #         data += '\n'
        #     if isinstance(data, text_type):
        #         data = to_bytes(data)
        #     cmd.stdin.write(data)
        #     cmd.stdin.close()
        #
        # while True:
        #     rfds, wfds, efds = select.select(rpipes, [], rpipes, 1)
        #     stdout += self._read_from_pipes(rpipes, rfds, cmd.stdout)
        #
        #     # ZUUL: stderr follows stdout
        #     # stderr += self._read_from_pipes(rpipes, rfds, cmd.stderr)
        #
        #     # if we're checking for prompts, do it now
        #     if prompt_re:
        #         if prompt_re.search(stdout) and not data:
        #             if encoding:
        #                 stdout = to_native(stdout, encoding=encoding, errors=errors)
        #             else:
        #                 stdout = stdout
        #             return (257, stdout, "A prompt was encountered while running a command, but no input data was specified")
        #     # only break out if no pipes are left to read or
        #     # the pipes are completely read and
        #     # the process is terminated
        #     if (not rpipes or not rfds) and cmd.poll() is not None:
        #         break
        #     # No pipes are left to read but process is not yet terminated
        #     # Only then it is safe to wait for the process to be finished
        #     # NOTE: Actually cmd.poll() is always None here if rpipes is empty
        #     elif not rpipes and cmd.poll() is None:
        #         cmd.wait()
        #         # The process is terminated. Since no pipes to read from are
        #         # left, there is no need to call select() again.
        #         break

        # ZUUL: If the console log follow thread *is* stuck in readline,
        # we can't close stdout (attempting to do so raises an
        # exception) , so this is disabled.
        # cmd.stdout.close()
        # cmd.stderr.close()

        rc = cmd.wait()

        # Give the thread that is writing the console log up to 10 seconds
        # to catch up and exit.  If it hasn't done so by then, it is very
        # likely stuck in readline() because it spawed a child that is
        # holding stdout or stderr open.
        if t:
            t.join(10)
            with Console(zuul_log_id) as console:
                if t.isAlive():
                    console.addLine("[Zuul] standard output/error still open "
                                    "after child exited")
            # ZUUL: stdout and stderr are in the console log file
            # ZUUL: return the saved log lines so we can ship them back
            stdout = b('').join(_log_lines)
        else:
            stdout = b('')
        stderr = b('')

    except (OSError, IOError) as e:
        self.log("Error Executing CMD:%s Exception:%s" % (clean_args, to_native(e)))
        # ZUUL: store fail_json_kwargs and fail later in finally
        fail_json_kwargs = dict(rc=e.errno, msg=to_native(e), cmd=clean_args)
    except Exception as e:
        self.log("Error Executing CMD:%s Exception:%s" % (clean_args, to_native(traceback.format_exc())))
        # ZUUL: store fail_json_kwargs and fail later in finally
        fail_json_kwargs = dict(rc=257, msg=to_native(e), exception=traceback.format_exc(), cmd=clean_args)
    finally:
        if t:
            with Console(zuul_log_id) as console:
                if t.isAlive():
                    console.addLine("[Zuul] standard output/error still open "
                                    "after child exited")
                if fail_json_kwargs:
                    # we hit an exception and need to use the rc from
                    # fail_json_kwargs
                    rc = fail_json_kwargs['rc']

                console.addLine("[Zuul] Task exit code: %s\n" % rc)

        if fail_json_kwargs:
            self.fail_json(**fail_json_kwargs)

    # Restore env settings
    for key, val in old_env_vals.items():
        if val is None:
            del os.environ[key]
        else:
            os.environ[key] = val

    if old_umask:
        os.umask(old_umask)

    if rc != 0 and check_rc:
        msg = heuristic_log_sanitize(stderr.rstrip(), self.no_log_values)
        self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg)

    # reset the pwd
    os.chdir(prev_dir)

    if encoding is not None:
        return (rc, to_native(stdout, encoding=encoding, errors=errors),
                to_native(stderr, encoding=encoding, errors=errors))
    return (rc, stdout, stderr)
 def test_did_not_hide_too_much(self):
     self.assertEquals(heuristic_log_sanitize(self.clean_data),
                       self.clean_data)
Example #17
0
def zuul_run_command(self, args, zuul_log_id, check_rc=False, close_fds=True, executable=None, data=None, binary_data=False, path_prefix=None, cwd=None, use_unsafe_shell=False, prompt_regex=None, environ_update=None, umask=None, encoding='utf-8', errors='surrogate_or_strict'):
    '''
    Execute a command, returns rc, stdout, and stderr.

    :arg args: is the command to run
        * If args is a list, the command will be run with shell=False.
        * If args is a string and use_unsafe_shell=False it will split args to a list and run with shell=False
        * If args is a string and use_unsafe_shell=True it runs with shell=True.
    :kw check_rc: Whether to call fail_json in case of non zero RC.
        Default False
    :kw close_fds: See documentation for subprocess.Popen(). Default True
    :kw executable: See documentation for subprocess.Popen(). Default None
    :kw data: If given, information to write to the stdin of the command
    :kw binary_data: If False, append a newline to the data.  Default False
    :kw path_prefix: If given, additional path to find the command in.
        This adds to the PATH environment vairable so helper commands in
        the same directory can also be found
    :kw cwd: If given, working directory to run the command inside
    :kw use_unsafe_shell: See `args` parameter.  Default False
    :kw prompt_regex: Regex string (not a compiled regex) which can be
        used to detect prompts in the stdout which would otherwise cause
        the execution to hang (especially if no input data is specified)
    :kw environ_update: dictionary to *update* os.environ with
    :kw umask: Umask to be used when running the command. Default None
    :kw encoding: Since we return native strings, on python3 we need to
        know the encoding to use to transform from bytes to text.  If you
        want to always get bytes back, use encoding=None.  The default is
        "utf-8".  This does not affect transformation of strings given as
        args.
    :kw errors: Since we return native strings, on python3 we need to
        transform stdout and stderr from bytes to text.  If the bytes are
        undecodable in the ``encoding`` specified, then use this error
        handler to deal with them.  The default is ``surrogate_or_strict``
        which means that the bytes will be decoded using the
        surrogateescape error handler if available (available on all
        python3 versions we support) otherwise a UnicodeError traceback
        will be raised.  This does not affect transformations of strings
        given as args.
    :returns: A 3-tuple of return code (integer), stdout (native string),
        and stderr (native string).  On python2, stdout and stderr are both
        byte strings.  On python3, stdout and stderr are text strings converted
        according to the encoding and errors parameters.  If you want byte
        strings on python3, use encoding=None to turn decoding to text off.
    '''

    shell = False
    if isinstance(args, list):
        if use_unsafe_shell:
            args = " ".join([pipes.quote(x) for x in args])
            shell = True
    elif isinstance(args, (binary_type, text_type)) and use_unsafe_shell:
        shell = True
    elif isinstance(args, (binary_type, text_type)):
        # On python2.6 and below, shlex has problems with text type
        # On python3, shlex needs a text type.
        if PY2:
            args = to_bytes(args, errors='surrogate_or_strict')
        elif PY3:
            args = to_text(args, errors='surrogateescape')
        args = shlex.split(args)
    else:
        msg = "Argument 'args' to run_command must be list or string"
        self.fail_json(rc=257, cmd=args, msg=msg)

    prompt_re = None
    if prompt_regex:
        if isinstance(prompt_regex, text_type):
            if PY3:
                prompt_regex = to_bytes(prompt_regex, errors='surrogateescape')
            elif PY2:
                prompt_regex = to_bytes(prompt_regex, errors='surrogate_or_strict')
        try:
            prompt_re = re.compile(prompt_regex, re.MULTILINE)
        except re.error:
            self.fail_json(msg="invalid prompt regular expression given to run_command")

    # expand things like $HOME and ~
    if not shell:
        args = [os.path.expanduser(os.path.expandvars(x)) for x in args if x is not None]

    rc = 0
    msg = None
    st_in = None

    # Manipulate the environ we'll send to the new process
    old_env_vals = {}
    # We can set this from both an attribute and per call
    for key, val in self.run_command_environ_update.items():
        old_env_vals[key] = os.environ.get(key, None)
        os.environ[key] = val
    if environ_update:
        for key, val in environ_update.items():
            old_env_vals[key] = os.environ.get(key, None)
            os.environ[key] = val
    if path_prefix:
        old_env_vals['PATH'] = os.environ['PATH']
        os.environ['PATH'] = "%s:%s" % (path_prefix, os.environ['PATH'])

    # If using test-module and explode, the remote lib path will resemble ...
    #   /tmp/test_module_scratch/debug_dir/ansible/module_utils/basic.py
    # If using ansible or ansible-playbook with a remote system ...
    #   /tmp/ansible_vmweLQ/ansible_modlib.zip/ansible/module_utils/basic.py

    # Clean out python paths set by ansiballz
    if 'PYTHONPATH' in os.environ:
        pypaths = os.environ['PYTHONPATH'].split(':')
        pypaths = [x for x in pypaths
                   if not x.endswith('/ansible_modlib.zip') and
                   not x.endswith('/debug_dir')]
        os.environ['PYTHONPATH'] = ':'.join(pypaths)
        if not os.environ['PYTHONPATH']:
            del os.environ['PYTHONPATH']

    # create a printable version of the command for use
    # in reporting later, which strips out things like
    # passwords from the args list
    to_clean_args = args
    if PY2:
        if isinstance(args, text_type):
            to_clean_args = to_bytes(args)
    else:
        if isinstance(args, binary_type):
            to_clean_args = to_text(args)
    if isinstance(args, (text_type, binary_type)):
        to_clean_args = shlex.split(to_clean_args)

    clean_args = []
    is_passwd = False
    for arg in to_clean_args:
        if is_passwd:
            is_passwd = False
            clean_args.append('********')
            continue
        if PASSWD_ARG_RE.match(arg):
            sep_idx = arg.find('=')
            if sep_idx > -1:
                clean_args.append('%s=********' % arg[:sep_idx])
                continue
            else:
                is_passwd = True
        arg = heuristic_log_sanitize(arg, self.no_log_values)
        clean_args.append(arg)
    clean_args = ' '.join(pipes.quote(arg) for arg in clean_args)

    if data:
        st_in = subprocess.PIPE

    # ZUUL: changed stderr to follow stdout
    kwargs = dict(
        executable=executable,
        shell=shell,
        close_fds=close_fds,
        stdin=st_in,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )

    # store the pwd
    prev_dir = os.getcwd()

    # make sure we're in the right working directory
    if cwd and os.path.isdir(cwd):
        cwd = os.path.abspath(os.path.expanduser(cwd))
        kwargs['cwd'] = cwd
        try:
            os.chdir(cwd)
        except (OSError, IOError):
            e = get_exception()
            self.fail_json(rc=e.errno, msg="Could not open %s, %s" % (cwd, str(e)))

    old_umask = None
    if umask:
        old_umask = os.umask(umask)

    t = None
    fail_json_kwargs = None
    ret = None

    try:
        if self._debug:
            self.log('Executing: ' + clean_args)

        # ZUUL: Replaced the excution loop with the zuul_runner run function
        cmd = subprocess.Popen(args, **kwargs)
        if self.no_log:
            t = None
        else:
            t = threading.Thread(target=follow, args=(cmd.stdout, zuul_log_id))
            t.daemon = True
            t.start()

        ret = cmd.wait()

        # Give the thread that is writing the console log up to 10 seconds
        # to catch up and exit.  If it hasn't done so by then, it is very
        # likely stuck in readline() because it spawed a child that is
        # holding stdout or stderr open.
        if t:
            t.join(10)
            with Console(zuul_log_id) as console:
                if t.isAlive():
                    console.addLine("[Zuul] standard output/error still open "
                                    "after child exited")
                console.addLine("[Zuul] Task exit code: %s\n" % ret)

            # ZUUL: If the console log follow thread *is* stuck in readline,
            # we can't close stdout (attempting to do so raises an
            # exception) , so this is disabled.
            # cmd.stdout.close()
            # cmd.stderr.close()

            # ZUUL: stdout and stderr are in the console log file
            # ZUUL: return the saved log lines so we can ship them back
            stdout = b('').join(_log_lines)
        else:
            stdout = b('')
        stderr = b('')

        rc = cmd.returncode
    except (OSError, IOError):
        e = get_exception()
        self.log("Error Executing CMD:%s Exception:%s" % (clean_args, to_native(e)))
        fail_json_kwargs=dict(rc=e.errno, msg=str(e), cmd=clean_args)
    except Exception:
        e = get_exception()
        self.log("Error Executing CMD:%s Exception:%s" % (clean_args, to_native(traceback.format_exc())))
        fail_json_kwargs = dict(rc=257, msg=str(e), exception=traceback.format_exc(), cmd=clean_args)
    finally:
        if t:
            with Console(zuul_log_id) as console:
                if t.isAlive():
                    console.addLine("[Zuul] standard output/error still open "
                                    "after child exited")
                if ret is None and fail_json_kwargs:
                    ret = fail_json_kwargs['rc']
                elif ret is None and not fail_json_kwargs:
                    ret = -1
                console.addLine("[Zuul] Task exit code: %s\n" % ret)
                if ret == -1 and not fail_json_kwargs:
                    self.fail_json(rc=ret, msg="Something went horribly wrong during task execution")

        if fail_json_kwargs:
            self.fail_json(**fail_json_kwargs)

    # Restore env settings
    for key, val in old_env_vals.items():
        if val is None:
            del os.environ[key]
        else:
            os.environ[key] = val

    if old_umask:
        os.umask(old_umask)

    if rc != 0 and check_rc:
        msg = heuristic_log_sanitize(stderr.rstrip(), self.no_log_values)
        self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg)

    # reset the pwd
    os.chdir(prev_dir)

    if encoding is not None:
        return (rc, to_native(stdout, encoding=encoding, errors=errors),
                to_native(stderr, encoding=encoding, errors=errors))
    return (rc, stdout, stderr)
 def test_did_not_hide_too_much(self):
     self.assertEquals(heuristic_log_sanitize(self.clean_data), self.clean_data)
 def test_hides_parameter_secrets(self):
     output = heuristic_log_sanitize(
         'token="secret", user="******", token_entry="test=secret"', frozenset(["secret"])
     )
     self.assertNotIn("secret", output)
 def test_log_sanitize_speed_one_ssh(self):
     heuristic_log_sanitize(self.one_ssh)
 def test_log_sanitize_speed_zero_secrets(self):
     heuristic_log_sanitize(self.zero_secrets)
 def test_hides_parameter_secrets(self):
     output = heuristic_log_sanitize(
         'token="secret", user="******", token_entry="test=secret"',
         frozenset(['secret']))
     self.assertNotIn('secret', output)
 def test_no_password(self):
     self.assertEqual(heuristic_log_sanitize('foo@bar'), 'foo@bar')