Ejemplo n.º 1
0
def MakeReplacer(pat, replace_str, slash_spid):
  """Helper for ${x/pat/replace}

  Parses 'pat' and returns either a _GlobReplacer or a _ConstStringReplacer.

  Using these objects is more efficient when performing the same operation on
  multiple strings.
  """
  regex, warnings = glob_.GlobToERE(pat)
  if warnings:
    # TODO: Add strict mode and expose warnings.
    pass
  if regex is None:
    return _ConstStringReplacer(pat, replace_str)
  else:
    return _GlobReplacer(regex, replace_str, slash_spid)
Ejemplo n.º 2
0
    def testGlobParser(self):
        CASES = [
            # (glob input, expected AST, expected extended regexp, has error)
            ('*.py', '.*\.py', False),
            ('*.?', '.*\..', False),
            ('<*>', '<.*>', False),
            ('\**+', '\*.*\+', False),
            ('\**', '\*.*', False),
            ('*.[ch]pp', '.*\.[ch]pp', False),

            # not globs
            ('abc', None, False),
            ('\\*', None, False),
            ('c:\\foo', None, False),
            ('strange]one', None, False),

            # character class globs
            ('[[:space:]abc]', '[[:space:]abc]', False),
            ('[abc]', '[abc]', False),
            (r'[\a\b\c]', r'[\a\b\c]', False),
            ('[abc\[]', '[abc\[]', False),
            ('[!not]', '[^not]', False),
            ('[^also_not]', '[^also_not]', False),
            ('[!*?!\\[]', '[^*?!\\[]', False),
            ('[!\]foo]', '[^\]foo]', False),

            # invalid globs
            ('not_closed[a-z', None, True),
            ('[[:spa[ce:]]', None, True),

            # Regression test for IndexError.
            ('[', None, True),
            ('\\', None, True),
            (']', None, False),
        ]
        for glob, expected_ere, expected_err in CASES:
            print('===')
            print(glob)
            regex, warnings = glob_.GlobToERE(glob)
            self.assertEqual(
                expected_ere, regex, 'Expected %r to translate to %r, got %r' %
                (glob, expected_ere, regex))

            print('regex   : %s' % regex)
            print('warnings: %s' % warnings)
Ejemplo n.º 3
0
  def _EvalBracedVarSub(self, part, part_vals, quoted):
    """
    Args:
      part_vals: output param to append to.
    """
    # We have four types of operator that interact.
    #
    # 1. Bracket: value -> (value, bool maybe_decay_array)
    #
    # 2. Then these four cases are mutually exclusive:
    #
    #   a. Prefix length: value -> value
    #   b. Test: value -> part_value[]
    #   c. Other Suffix: value -> value
    #   d. no operator: you have a value
    #
    # That is, we don't have both prefix and suffix operators.
    #
    # 3. Process maybe_decay_array here before returning.

    maybe_decay_array = False  # for $*, ${a[*]}, etc.

    var_name = None  # For ${foo=default}

    # 1. Evaluate from (var_name, var_num, token Id) -> value
    if part.token.id == Id.VSub_Name:
      var_name = part.token.val
      val = self.mem.GetVar(var_name)
      #log('EVAL NAME %s -> %s', var_name, val)

    elif part.token.id == Id.VSub_Number:
      var_num = int(part.token.val)
      val = self._EvalVarNum(var_num)
    else:
      # $* decays
      val, maybe_decay_array = self._EvalSpecialVar(part.token.id, quoted)

    # 2. Bracket: value -> (value v, bool maybe_decay_array)
    # maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER
    # suffix ops are applied.  If we take the length with a prefix op, the
    # distinction is ignored.
    if part.bracket_op:
      if part.bracket_op.tag == bracket_op_e.WholeArray:
        op_id = part.bracket_op.op_id

        if op_id == Id.Lit_At:
          if not quoted:
            maybe_decay_array = True  # ${a[@]} decays but "${a[@]}" doesn't
          if val.tag == value_e.Undef:
            val = self._EmptyStrArrayOrError(part.token)
          elif val.tag == value_e.Str:
            e_die("Can't index string with @: %r", val, part=part)
          elif val.tag == value_e.StrArray:
            # TODO: Is this a no-op?  Just leave 'val' alone.
            val = value.StrArray(val.strs)

        elif op_id == Id.Arith_Star:
          maybe_decay_array = True  # both ${a[*]} and "${a[*]}" decay
          if val.tag == value_e.Undef:
            val = self._EmptyStrArrayOrError(part.token)
          elif val.tag == value_e.Str:
            e_die("Can't index string with *: %r", val, part=part)
          elif val.tag == value_e.StrArray:
            # TODO: Is this a no-op?  Just leave 'val' alone.
            # ${a[*]} or "${a[*]}" :  maybe_decay_array is always true
            val = value.StrArray(val.strs)

        else:
          raise AssertionError(op_id)  # unknown

      elif part.bracket_op.tag == bracket_op_e.ArrayIndex:
        anode = part.bracket_op.expr

        if val.tag == value_e.Undef:
          pass  # it will be checked later

        elif val.tag == value_e.Str:
          # Bash treats any string as an array, so we can't add our own
          # behavior here without making valid OSH invalid bash.
          e_die("Can't index string %r with integer", part.token.val,
                token=part.token)

        elif val.tag == value_e.StrArray:
          index = self.arith_ev.Eval(anode)
          try:
            # could be None because representation is sparse
            s = val.strs[index]
          except IndexError:
            s = None

          if s is None:
            val = value.Undef()
          else:
            val = value.Str(s)

        elif val.tag == value_e.AssocArray:
          key = self.arith_ev.Eval(anode, int_coerce=False)
          try:
            val = value.Str(val.d[key])
          except KeyError:
            val = value.Undef()

        else:
          raise AssertionError(val.__class__.__name__)

      else:
        raise AssertionError(part.bracket_op.tag)

    if part.prefix_op:
      val = self._EmptyStrOrError(val)  # maybe error
      val = self._ApplyPrefixOp(val, part.prefix_op, token=part.token)
      # NOTE: When applying the length operator, we can't have a test or
      # suffix afterward.  And we don't want to decay the array

    elif part.suffix_op:
      op = part.suffix_op
      if op.tag == suffix_op_e.StringNullary:
        if op.op_id == Id.VOp0_P:
          prompt = self.prompt_ev.EvalPrompt(val)
          val = value.Str(prompt)
        elif op.op_id == Id.VOp0_Q:
          val = value.Str(string_ops.ShellQuote(val.s))
        else:
          raise NotImplementedError(op.op_id)

      elif op.tag == suffix_op_e.StringUnary:
        if LookupKind(part.suffix_op.op_id) == Kind.VTest:
          # TODO: Change style to:
          # if self._ApplyTestOp(...)
          #   return
          # It should return whether anything was done.  If not, we continue to
          # the end, where we might throw an error.

          assign_part_vals, effect = self._ApplyTestOp(val, part.suffix_op,
                                                       quoted, part_vals)

          # NOTE: Splicing part_values is necessary because of code like
          # ${undef:-'a b' c 'd # e'}.  Each part_value can have a different
          # do_glob/do_elide setting.
          if effect == effect_e.SpliceParts:
            return  # EARLY RETURN, part_vals mutated

          elif effect == effect_e.SpliceAndAssign:
            if var_name is None:
              # TODO: error context
              e_die("Can't assign to special variable")
            else:
              # NOTE: This decays arrays too!  'set -o strict_array' could
              # avoid it.
              rhs_str = _DecayPartValuesToString(assign_part_vals,
                                                 self.splitter.GetJoinChar())
              state.SetLocalString(self.mem, var_name, rhs_str)
            return  # EARLY RETURN, part_vals mutated

          elif effect == effect_e.Error:
            raise NotImplementedError

          else:
            # The old one
            #val = self._EmptyStringPartOrError(part_val, quoted)
            pass  # do nothing, may still be undefined

        else:
          val = self._EmptyStrOrError(val)  # maybe error
          # Other suffix: value -> value
          val = self._ApplyUnarySuffixOp(val, part.suffix_op)

      elif op.tag == suffix_op_e.PatSub:  # PatSub, vectorized
        val = self._EmptyStrOrError(val)  # ${undef//x/y}

        # globs are supported in the pattern
        pat_val = self.EvalWordToString(op.pat, do_fnmatch=True)
        assert pat_val.tag == value_e.Str, pat_val

        if op.replace:
          replace_val = self.EvalWordToString(op.replace)
          assert replace_val.tag == value_e.Str, replace_val
          replace_str = replace_val.s
        else:
          replace_str = ''

        regex, warnings = glob_.GlobToERE(pat_val.s)
        if warnings:
          # TODO:
          # - Add 'set -o strict-glob' mode and expose warnings.
          #   "Glob is not in CANONICAL FORM".
          # - Propagate location info back to the 'op.pat' word.
          pass
        replacer = string_ops.GlobReplacer(regex, replace_str, op.spids[0])

        if val.tag == value_e.Str:
          s = replacer.Replace(val.s, op)
          val = value.Str(s)

        elif val.tag == value_e.StrArray:
          strs = []
          for s in val.strs:
            if s is not None:
              strs.append(replacer.Replace(s, op))
          val = value.StrArray(strs)

        else:
          raise AssertionError(val.__class__.__name__)

      elif op.tag == suffix_op_e.Slice:
        val = self._EmptyStrOrError(val)  # ${undef:3:1}

        if op.begin:
          begin = self.arith_ev.Eval(op.begin)
        else:
          begin = 0

        if op.length:
          length = self.arith_ev.Eval(op.length)
        else:
          length = None

        if val.tag == value_e.Str:  # Slice UTF-8 characters in a string.
          s = val.s

          try:
            if begin < 0:
              # It could be negative if we compute unicode length, but that's
              # confusing.

              # TODO: Instead of attributing it to the word part, it would be
              # better if we attributed it to arith_expr begin.
              raise util.InvalidSlice(
                  "The start index of a string slice can't be negative: %d",
                  begin, part=part)

            byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)

            if length is None:
              byte_end = len(s)
            else:
              if length < 0:
                # TODO: Instead of attributing it to the word part, it would be
                # better if we attributed it to arith_expr begin.
                raise util.InvalidSlice(
                    "The length of a string slice can't be negative: %d",
                    length, part=part)

              byte_end = string_ops.AdvanceUtf8Chars(s, length, byte_begin)

          except (util.InvalidSlice, util.InvalidUtf8) as e:
            if self.exec_opts.strict_word_eval:
              raise
            else:
              # TODO:
              # - We don't see the error location here, but we see it when set
              #   -o strict-word-eval.
              # - Doesn't make the command exit with 1.  It just sets the word
              #   to empty string.
              util.warn(e.UserErrorString())
              substr = ''  # error condition
          else:
            substr = s[byte_begin : byte_end]

          val = value.Str(substr)

        elif val.tag == value_e.StrArray:  # Slice array entries.
          # NOTE: unset elements don't count towards the length.
          strs = []
          for s in val.strs[begin:]:
            if s is not None:
              strs.append(s)
              if len(strs) == length:  # never true for unspecified length
                break
          val = value.StrArray(strs)

        else:
          raise AssertionError(val.__class__.__name__)  # Not possible

    # After applying suffixes, process maybe_decay_array here.
    if maybe_decay_array and val.tag == value_e.StrArray:
      val = self._DecayArray(val)

    # For the case where there are no prefix or suffix ops.
    val = self._EmptyStrOrError(val)

    # For example, ${a} evaluates to value_t.Str(), but we want a
    # part_value.StringPartValue.
    part_val = _ValueToPartValue(val, quoted)
    part_vals.append(part_val)