Ejemplo n.º 1
0
    def testTokens(self):
        print(Id.Op_Newline)
        print(syntax_asdl.token(Id.Op_Newline, '\n'))

        print(Id.Op_Newline)

        print(Kind.Eof)
        print(Kind.Left)
        print('--')
        num_kinds = 0
        for name in dir(Kind):
            if name[0].isupper():
                print(name, getattr(Kind, name))
                num_kinds += 1

        print('Number of Kinds:', num_kinds)
        # 233 out of 256 tokens now
        print('Number of IDs:', len(ID_SPEC.id_str2int))

        # Make sure we're not exporting too much
        print(dir(id_kind))

        t = syntax_asdl.token(Id.Arith_Plus, '+')
        self.assertEqual(Kind.Arith, LookupKind(t.id))
        t = syntax_asdl.token(Id.Arith_CaretEqual, '^=')
        self.assertEqual(Kind.Arith, LookupKind(t.id))
        t = syntax_asdl.token(Id.Arith_RBrace, '}')
        self.assertEqual(Kind.Arith, LookupKind(t.id))

        t = syntax_asdl.token(Id.BoolBinary_GlobDEqual, '==')
        self.assertEqual(Kind.BoolBinary, LookupKind(t.id))

        t = syntax_asdl.token(Id.BoolBinary_Equal, '=')
        self.assertEqual(Kind.BoolBinary, LookupKind(t.id))
Ejemplo n.º 2
0
def BoolId(node):
    # type: (word_t) -> Id_t
    if isinstance(node, word__String):  # for test/[
        return node.id

    if isinstance(node, word__Token):
        return node.token.id

    # NOTE: I think Empty never happens in this context?
    assert isinstance(node, word__Compound)

    if len(node.parts) != 1:
        return Id.Word_Compound

    token_type = _LiteralId(node.parts[0])
    if token_type == Id.Undefined_Tok:
        return Id.Word_Compound  # It's a regular word

    # This is outside the BoolUnary/BoolBinary namespace, but works the same.
    if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
        return token_type  # special boolean "tokens"

    token_kind = LookupKind(token_type)
    if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
        return token_type  # boolean operators

    return Id.Word_Compound
Ejemplo n.º 3
0
  def _ApplyUnarySuffixOp(self, val, op):
    assert val.tag != value_e.Undef

    op_kind = LookupKind(op.op_id)

    if op_kind == Kind.VOp1:
      #log('%s', op)
      arg_val = self.EvalWordToString(op.arg_word, do_fnmatch=True)
      assert arg_val.tag == value_e.Str

      if val.tag == value_e.Str:
        s = string_ops.DoUnarySuffixOp(val.s, op, arg_val.s)
        new_val = value.Str(s)
      else:  # val.tag == value_e.StrArray:
        # ${a[@]#prefix} is VECTORIZED on arrays.  Oil should have this too.
        strs = []
        for s in val.strs:
          if s is not None:
            strs.append(string_ops.DoUnarySuffixOp(s, op, arg_val.s))
        new_val = value.StrArray(strs)

    else:
      raise AssertionError(op_kind)

    return new_val
Ejemplo n.º 4
0
def CommandKind(w):
    if w.tag == word_e.TokenWord:
        return LookupKind(w.token.id)

    # NOTE: This is a bit inconsistent with CommandId, because we never retur
    # Kind.KW (or Kind.Lit).  But the CommandParser is easier to write this way.
    return Kind.Word
Ejemplo n.º 5
0
def CommandId(node):
    # type: (word_t) -> Id_t
    if isinstance(node, word__Token):
        return node.token.id

    # Assume it's a Compound
    assert isinstance(node, word__Compound)

    # Has to be a single literal part
    if len(node.parts) != 1:
        return Id.Word_Compound

    token_type = _LiteralId(node.parts[0])
    if token_type == Id.Undefined_Tok:
        return Id.Word_Compound

    elif token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.ControlFlow_Return):
        # Return is for special processing
        return token_type

    token_kind = LookupKind(token_type)
    if token_kind == Kind.KW:
        return token_type

    return Id.Word_Compound
Ejemplo n.º 6
0
def KeywordToken(w):
    # type: (word_t) -> Tuple[Kind_t, Optional[token]]
    """Tests if a word is an assignment or control flow word.

  Returns:
    kind, token
  """
    assert isinstance(w, word__Compound)

    err = (Kind.Undefined, None)
    if len(w.parts) != 1:
        return err

    part0 = w.parts[0]
    token_type = _LiteralId(part0)
    if token_type == Id.Undefined_Tok:
        return err

    assert isinstance(part0, word_part__Literal)  # for MyPy

    token_kind = LookupKind(token_type)
    if token_kind == Kind.ControlFlow:
        return token_kind, part0.token

    return err
Ejemplo n.º 7
0
def BoolId(node):
    if node.tag == word_e.StringWord:  # for test/[
        return node.id

    if node.tag == word_e.TokenWord:
        return node.token.id

    # Assume it's a CompoundWord
    #assert node.tag == word_e.CompoundWord

    if len(node.parts) != 1:
        return Id.Word_Compound

    token_type = _LiteralPartId(node.parts[0])
    if token_type == Id.Undefined_Tok:
        return Id.Word_Compound  # It's a regular word

    # This is outside the BoolUnary/BoolBinary namespace, but works the same.
    if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
        return token_type  # special boolean "tokens"

    token_kind = LookupKind(token_type)
    if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
        return token_type  # boolean operators

    return Id.Word_Compound
Ejemplo n.º 8
0
def CommandKind(w):
    # type: (word_t) -> Kind_t
    """The CommandKind is for coarse-grained decisions in the CommandParser."""
    if isinstance(w, word__Token):
        return LookupKind(w.token.id)

    # NOTE: This is a bit inconsistent with CommandId, because we never return
    # Kind.KW (or Kind.Lit).  But the CommandParser is easier to write this way.
    return Kind.Word
Ejemplo n.º 9
0
 def _Peek(self):
     """Helper method."""
     if self.next_lex_mode is not None:
         self.cur_token = self.lexer.Read(self.next_lex_mode)
         self.token_kind = LookupKind(self.cur_token.id)
         self.token_type = self.cur_token.id
         self.parse_ctx.trail.AppendToken(self.cur_token)  # For completion
         self.next_lex_mode = None
     return self.cur_token
Ejemplo n.º 10
0
    def testTokens(self):
        print(Id.Op_Newline)
        print(syntax_asdl.token(Id.Op_Newline, '\n'))

        print(Id.Op_Newline)

        print(Kind.Eof)
        print(Kind.Left)

        print('--')
        num_kinds = 0
        for name in dir(Kind):
            if name[0].isupper():
                kind = getattr(Kind, name)
                print('%-20s %s' % (name, kind))
                num_kinds += 1

        print()
        print('Number of Kinds:', num_kinds)
        print()

        for name in dir(Id):
            if name[0].isupper():
                id_ = getattr(Id, name)
                print('%-30s %s' % (name, id_))

        # 309 out of 256 tokens now
        print()
        print('Number of IDs:', len(ID_SPEC.id_str2int))

        t = syntax_asdl.token(Id.Arith_Plus, '+')
        self.assertEqual(Kind.Arith, LookupKind(t.id))
        t = syntax_asdl.token(Id.Arith_CaretEqual, '^=')
        self.assertEqual(Kind.Arith, LookupKind(t.id))
        t = syntax_asdl.token(Id.Arith_RBrace, '}')
        self.assertEqual(Kind.Arith, LookupKind(t.id))

        t = syntax_asdl.token(Id.BoolBinary_GlobDEqual, '==')
        self.assertEqual(Kind.BoolBinary, LookupKind(t.id))

        t = syntax_asdl.token(Id.BoolBinary_Equal, '=')
        self.assertEqual(Kind.BoolBinary, LookupKind(t.id))
Ejemplo n.º 11
0
    def _Peek(self):
        """Helper method."""
        if self.next_lex_mode is not None:
            self.prev_token = self.cur_token  # for completion
            self.cur_token = self.lexer.Read(self.next_lex_mode)
            self.token_kind = LookupKind(self.cur_token.id)
            self.token_type = self.cur_token.id

            if 0:
                log('cur token = %s', self.cur_token)

            self.next_lex_mode = None
        return self.cur_token
Ejemplo n.º 12
0
    def _NextOne(self, lex_mode=lex_mode_e.DBracket):
        n = len(self.words)
        if n == 2:
            assert lex_mode == lex_mode_e.DBracket
            self.words[0] = self.words[1]
            self.cur_word = self.words[0]
            del self.words[1]
        elif n in (0, 1):
            w = self.w_parser.ReadWord(lex_mode)  # may raise
            if n == 0:
                self.words.append(w)
            else:
                self.words[0] = w
            self.cur_word = w

        assert self.cur_word is not None
        self.op_id = word.BoolId(self.cur_word)
        self.b_kind = LookupKind(self.op_id)
Ejemplo n.º 13
0
def KeywordToken(w):
    """Tests if a word is an assignment or control flow word.

  Returns:
    kind, token
  """
    assert w.tag == word_e.CompoundWord

    err = (Kind.Undefined, None)
    if len(w.parts) != 1:
        return err

    token_type = _LiteralPartId(w.parts[0])
    if token_type == Id.Undefined_Tok:
        return err

    token_kind = LookupKind(token_type)
    if token_kind in (Kind.Assign, Kind.ControlFlow):
        return token_kind, w.parts[0].token

    return err
Ejemplo n.º 14
0
def CommandId(node):
    if node.tag == word_e.TokenWord:
        return node.token.id

    # Assume it's a CompoundWord
    assert node.tag == word_e.CompoundWord

    # Has to be a single literal part
    if len(node.parts) != 1:
        return Id.Word_Compound

    token_type = _LiteralPartId(node.parts[0])
    if token_type == Id.Undefined_Tok:
        return Id.Word_Compound

    elif token_type in (Id.Lit_LBrace, Id.Lit_RBrace):
        return token_type

    token_kind = LookupKind(token_type)
    if token_kind == Kind.KW:
        return token_type

    return Id.Word_Compound
Ejemplo n.º 15
0
    def ParseFactor(self):
        """
    Factor  : WORD
            | UNARY_OP WORD
            | WORD BINARY_OP WORD
            | '(' Expr ')'
    """
        if self.b_kind == Kind.BoolUnary:
            # Just save the type and not the token itself?
            op = self.op_id
            self._Next()
            w = self.cur_word
            # e.g. [[ -f < ]].  But [[ -f '<' ]] is OK
            if w.tag not in (word_e.CompoundWord, word_e.StringWord):
                p_die('Invalid argument to unary operator', word=w)
            self._Next()
            node = bool_expr.BoolUnary(op, w)
            return node

        if self.b_kind == Kind.Word:
            # Peek ahead another token.
            t2 = self._LookAhead()
            t2_op_id = word.BoolId(t2)
            t2_b_kind = LookupKind(t2_op_id)

            #log('t2 %s / t2_op_id %s / t2_b_kind %s', t2, t2_op_id, t2_b_kind)
            # Redir pun for < and >, -a and -o pun
            if t2_b_kind in (Kind.BoolBinary, Kind.Redir):
                left = self.cur_word

                self._Next()
                op = self.op_id

                # TODO: Need to change to lex_mode_e.BashRegex.
                # _Next(lex_mode) then?
                is_regex = t2_op_id == Id.BoolBinary_EqualTilde
                if is_regex:
                    self._Next(lex_mode=lex_mode_e.BashRegex)
                else:
                    self._Next()

                right = self.cur_word
                if is_regex:
                    # NOTE: StaticEval for checking regex syntax isn't enough.  We could
                    # need to pass do_ere so that the quoted parts get escaped.
                    #ok, s, unused_quoted = word.StaticEval(right)
                    pass

                self._Next()
                return bool_expr.BoolBinary(op, left, right)
            else:
                # [[ foo ]]
                w = self.cur_word
                self._Next()
                return bool_expr.WordTest(w)

        if self.op_id == Id.Op_LParen:
            self._Next()
            node = self.ParseExpr()
            if self.op_id != Id.Op_RParen:
                p_die('Expected ), got %s', self.cur_word, word=self.cur_word)
            self._Next()
            return node

        # It's not WORD, UNARY_OP, or '('
        p_die('Unexpected token in boolean expression', word=self.cur_word)
Ejemplo n.º 16
0
  def _EvalBracedVarSub(self, part, part_vals, quoted):
    """
    Args:
      part_vals: output param to append to.
    """
    # We have four types of operator that interact.
    #
    # 1. Bracket: value -> (value, bool maybe_decay_array)
    #
    # 2. Then these four cases are mutually exclusive:
    #
    #   a. Prefix length: value -> value
    #   b. Test: value -> part_value[]
    #   c. Other Suffix: value -> value
    #   d. no operator: you have a value
    #
    # That is, we don't have both prefix and suffix operators.
    #
    # 3. Process maybe_decay_array here before returning.

    maybe_decay_array = False  # for $*, ${a[*]}, etc.

    var_name = None  # For ${foo=default}

    # 1. Evaluate from (var_name, var_num, token Id) -> value
    if part.token.id == Id.VSub_Name:
      var_name = part.token.val
      val = self.mem.GetVar(var_name)
      #log('EVAL NAME %s -> %s', var_name, val)

    elif part.token.id == Id.VSub_Number:
      var_num = int(part.token.val)
      val = self._EvalVarNum(var_num)
    else:
      # $* decays
      val, maybe_decay_array = self._EvalSpecialVar(part.token.id, quoted)

    # 2. Bracket: value -> (value v, bool maybe_decay_array)
    # maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER
    # suffix ops are applied.  If we take the length with a prefix op, the
    # distinction is ignored.
    if part.bracket_op:
      if part.bracket_op.tag == bracket_op_e.WholeArray:
        op_id = part.bracket_op.op_id

        if op_id == Id.Lit_At:
          if not quoted:
            maybe_decay_array = True  # ${a[@]} decays but "${a[@]}" doesn't
          if val.tag == value_e.Undef:
            val = self._EmptyStrArrayOrError(part.token)
          elif val.tag == value_e.Str:
            e_die("Can't index string with @: %r", val, part=part)
          elif val.tag == value_e.StrArray:
            # TODO: Is this a no-op?  Just leave 'val' alone.
            val = value.StrArray(val.strs)

        elif op_id == Id.Arith_Star:
          maybe_decay_array = True  # both ${a[*]} and "${a[*]}" decay
          if val.tag == value_e.Undef:
            val = self._EmptyStrArrayOrError(part.token)
          elif val.tag == value_e.Str:
            e_die("Can't index string with *: %r", val, part=part)
          elif val.tag == value_e.StrArray:
            # TODO: Is this a no-op?  Just leave 'val' alone.
            # ${a[*]} or "${a[*]}" :  maybe_decay_array is always true
            val = value.StrArray(val.strs)

        else:
          raise AssertionError(op_id)  # unknown

      elif part.bracket_op.tag == bracket_op_e.ArrayIndex:
        anode = part.bracket_op.expr

        if val.tag == value_e.Undef:
          pass  # it will be checked later

        elif val.tag == value_e.Str:
          # Bash treats any string as an array, so we can't add our own
          # behavior here without making valid OSH invalid bash.
          e_die("Can't index string %r with integer", part.token.val,
                token=part.token)

        elif val.tag == value_e.StrArray:
          index = self.arith_ev.Eval(anode)
          try:
            # could be None because representation is sparse
            s = val.strs[index]
          except IndexError:
            s = None

          if s is None:
            val = value.Undef()
          else:
            val = value.Str(s)

        elif val.tag == value_e.AssocArray:
          key = self.arith_ev.Eval(anode, int_coerce=False)
          try:
            val = value.Str(val.d[key])
          except KeyError:
            val = value.Undef()

        else:
          raise AssertionError(val.__class__.__name__)

      else:
        raise AssertionError(part.bracket_op.tag)

    if part.prefix_op:
      val = self._EmptyStrOrError(val)  # maybe error
      val = self._ApplyPrefixOp(val, part.prefix_op, token=part.token)
      # NOTE: When applying the length operator, we can't have a test or
      # suffix afterward.  And we don't want to decay the array

    elif part.suffix_op:
      op = part.suffix_op
      if op.tag == suffix_op_e.StringNullary:
        if op.op_id == Id.VOp0_P:
          prompt = self.prompt_ev.EvalPrompt(val)
          val = value.Str(prompt)
        elif op.op_id == Id.VOp0_Q:
          val = value.Str(string_ops.ShellQuote(val.s))
        else:
          raise NotImplementedError(op.op_id)

      elif op.tag == suffix_op_e.StringUnary:
        if LookupKind(part.suffix_op.op_id) == Kind.VTest:
          # TODO: Change style to:
          # if self._ApplyTestOp(...)
          #   return
          # It should return whether anything was done.  If not, we continue to
          # the end, where we might throw an error.

          assign_part_vals, effect = self._ApplyTestOp(val, part.suffix_op,
                                                       quoted, part_vals)

          # NOTE: Splicing part_values is necessary because of code like
          # ${undef:-'a b' c 'd # e'}.  Each part_value can have a different
          # do_glob/do_elide setting.
          if effect == effect_e.SpliceParts:
            return  # EARLY RETURN, part_vals mutated

          elif effect == effect_e.SpliceAndAssign:
            if var_name is None:
              # TODO: error context
              e_die("Can't assign to special variable")
            else:
              # NOTE: This decays arrays too!  'set -o strict_array' could
              # avoid it.
              rhs_str = _DecayPartValuesToString(assign_part_vals,
                                                 self.splitter.GetJoinChar())
              state.SetLocalString(self.mem, var_name, rhs_str)
            return  # EARLY RETURN, part_vals mutated

          elif effect == effect_e.Error:
            raise NotImplementedError

          else:
            # The old one
            #val = self._EmptyStringPartOrError(part_val, quoted)
            pass  # do nothing, may still be undefined

        else:
          val = self._EmptyStrOrError(val)  # maybe error
          # Other suffix: value -> value
          val = self._ApplyUnarySuffixOp(val, part.suffix_op)

      elif op.tag == suffix_op_e.PatSub:  # PatSub, vectorized
        val = self._EmptyStrOrError(val)  # ${undef//x/y}

        # globs are supported in the pattern
        pat_val = self.EvalWordToString(op.pat, do_fnmatch=True)
        assert pat_val.tag == value_e.Str, pat_val

        if op.replace:
          replace_val = self.EvalWordToString(op.replace)
          assert replace_val.tag == value_e.Str, replace_val
          replace_str = replace_val.s
        else:
          replace_str = ''

        regex, warnings = glob_.GlobToERE(pat_val.s)
        if warnings:
          # TODO:
          # - Add 'set -o strict-glob' mode and expose warnings.
          #   "Glob is not in CANONICAL FORM".
          # - Propagate location info back to the 'op.pat' word.
          pass
        replacer = string_ops.GlobReplacer(regex, replace_str, op.spids[0])

        if val.tag == value_e.Str:
          s = replacer.Replace(val.s, op)
          val = value.Str(s)

        elif val.tag == value_e.StrArray:
          strs = []
          for s in val.strs:
            if s is not None:
              strs.append(replacer.Replace(s, op))
          val = value.StrArray(strs)

        else:
          raise AssertionError(val.__class__.__name__)

      elif op.tag == suffix_op_e.Slice:
        val = self._EmptyStrOrError(val)  # ${undef:3:1}

        if op.begin:
          begin = self.arith_ev.Eval(op.begin)
        else:
          begin = 0

        if op.length:
          length = self.arith_ev.Eval(op.length)
        else:
          length = None

        if val.tag == value_e.Str:  # Slice UTF-8 characters in a string.
          s = val.s

          try:
            if begin < 0:
              # It could be negative if we compute unicode length, but that's
              # confusing.

              # TODO: Instead of attributing it to the word part, it would be
              # better if we attributed it to arith_expr begin.
              raise util.InvalidSlice(
                  "The start index of a string slice can't be negative: %d",
                  begin, part=part)

            byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)

            if length is None:
              byte_end = len(s)
            else:
              if length < 0:
                # TODO: Instead of attributing it to the word part, it would be
                # better if we attributed it to arith_expr begin.
                raise util.InvalidSlice(
                    "The length of a string slice can't be negative: %d",
                    length, part=part)

              byte_end = string_ops.AdvanceUtf8Chars(s, length, byte_begin)

          except (util.InvalidSlice, util.InvalidUtf8) as e:
            if self.exec_opts.strict_word_eval:
              raise
            else:
              # TODO:
              # - We don't see the error location here, but we see it when set
              #   -o strict-word-eval.
              # - Doesn't make the command exit with 1.  It just sets the word
              #   to empty string.
              util.warn(e.UserErrorString())
              substr = ''  # error condition
          else:
            substr = s[byte_begin : byte_end]

          val = value.Str(substr)

        elif val.tag == value_e.StrArray:  # Slice array entries.
          # NOTE: unset elements don't count towards the length.
          strs = []
          for s in val.strs[begin:]:
            if s is not None:
              strs.append(s)
              if len(strs) == length:  # never true for unspecified length
                break
          val = value.StrArray(strs)

        else:
          raise AssertionError(val.__class__.__name__)  # Not possible

    # After applying suffixes, process maybe_decay_array here.
    if maybe_decay_array and val.tag == value_e.StrArray:
      val = self._DecayArray(val)

    # For the case where there are no prefix or suffix ops.
    val = self._EmptyStrOrError(val)

    # For example, ${a} evaluates to value_t.Str(), but we want a
    # part_value.StringPartValue.
    part_val = _ValueToPartValue(val, quoted)
    part_vals.append(part_val)
Ejemplo n.º 17
0
 def testMode_DBracket(self):
     lexer = _InitLexer('-z foo')
     t = lexer.Read(lex_mode_e.DBracket)
     self.assertTokensEqual(token(Id.BoolUnary_z, '-z'), t)
     self.assertEqual(Kind.BoolUnary, LookupKind(t.id))
Ejemplo n.º 18
0
def main(argv):
    try:
        action = argv[1]
    except IndexError:
        raise RuntimeError('Action required')

    if action == 'c':
        ids = list(ID_SPEC.token_names.iteritems())
        ids.sort(key=lambda pair: pair[0])  # Sort by ID
        for i, name in ids:
            print('#define id__%s %s' % (name, i))

    elif action == 'cpp':
        # For blog post
        try:
            labels = argv[2]
        except IndexError:
            label_lines = []
        else:
            with open(labels) as f:
                label_lines = f.readlines()

        from collections import defaultdict

        id_by_kind_index = defaultdict(list)  # Kind name -> [list of Id names]
        for name in dir(Id):
            if name[0].isupper():
                id_ = getattr(Id, name)
                kind_index = LookupKind(id_)
                id_by_kind_index[kind_index].append(name)

        kinds = []
        for name in dir(Kind):
            if name[0].isupper():
                kind_index = getattr(Kind, name)
                #print(kind, name)
                kinds.append(
                    (name, kind_index, len(id_by_kind_index[kind_index])))

        # Sort descending by length of ID list
        kinds = sorted(kinds, key=lambda p: p[2], reverse=True)

        id_labels = {}  # Id name -> integer
        kind_labels = {}  # Kind name -> integer

        for k, line in enumerate(label_lines):  # descending order by kind size

            parts = line.split()
            id_list_len, _, actual_len, _, kind_label, _ = parts[:6]
            id_list_len = int(id_list_len)
            kind_label = int(kind_label)
            id_list = [int(id_) for id_ in parts[6:]]

            try:
                kind_name, kind_index, len_id_list = kinds[k]
            except IndexError:
                break
            kind_labels[kind_name] = kind_label

            id_names = id_by_kind_index[kind_index]
            #print(id_names)
            for i, name in enumerate(id_names):
                try:
                    id_labels[name] = id_list[i]
                except IndexError:
                    raise RuntimeError('%s %s' % (name, i))

        if 0:  # disable labeling
            id_labels = None
            kind_labels = None

        kind_names = [k[0] for k in kinds]

        id_names = []
        for _, kind_index, _ in kinds:
            n = id_by_kind_index[kind_index]
            id_names.append(n)

        GenCppCode(kind_names,
                   id_names,
                   sys.stdout,
                   id_labels=id_labels,
                   kind_labels=kind_labels)

    else:
        raise RuntimeError('Invalid action %r' % action)