예제 #1
0
파일: string_ops.py 프로젝트: dotmpe/oil
def _Utf8CharLen(starting_byte):
    # type: (int) -> int
    if (starting_byte >> 7) == 0b0:
        return 1
    elif (starting_byte >> 5) == 0b110:
        return 2
    elif (starting_byte >> 4) == 0b1110:
        return 3
    elif (starting_byte >> 3) == 0b11110:
        return 4
    else:
        e_strict(INVALID_START)
예제 #2
0
파일: string_ops.py 프로젝트: dotmpe/oil
def PreviousUtf8Char(s, i):
    # type: (str, int) -> int
    """
  Given a string and a byte offset, returns the position of the
  character before that offset.  To start (find the first byte of the
  last character), pass len(s) for the initial value of i.

  Validates UTF-8.
  """
    # All bytes in a valid UTF-8 string have one of the following formats:
    #
    #   0xxxxxxx (1-byte char)
    #   110xxxxx (start of 2-byte char)
    #   1110xxxx (start of 3-byte char)
    #   11110xxx (start of 4-byte char)
    #   10xxxxxx (continuation byte)
    #
    # Any byte that starts with 10... MUST be a continuation byte,
    # otherwise it must be the start of a character (or just invalid
    # data).
    #
    # Walking backward, we stop at the first non-continuaton byte
    # found.  We try to interpret it as a valid UTF-8 character starting
    # byte, and check that it indicates the correct length, based on how
    # far we've moved from the original byte.  Possible problems:
    #   * byte we stopped on does not have a valid value (e.g., 11111111)
    #   * start byte indicates more or fewer continuation bytes than we've seen
    #   * no start byte at beginning of array
    #
    # Note that because we are going backward, on malformed input, we
    # won't error out in the same place as when parsing the string
    # forwards as normal.
    orig_i = i

    while i > 0:
        i -= 1
        byte_as_int = ord(s[i])
        if (byte_as_int >> 6) != 0b10:
            offset = orig_i - i
            if offset != _Utf8CharLen(byte_as_int):
                # Leaving a generic error for now, but if we want to, it's not
                # hard to calculate the position where things go wrong.  Note
                # that offset might be more than 4, for an invalid utf-8 string.
                e_strict(INVALID_START)
            return i

    e_strict(INVALID_START)
예제 #3
0
    def _ValToIntOrError(self, val, span_id=runtime.NO_SPID):
        # type: (value_t, int) -> int
        try:
            UP_val = val
            with tagswitch(val) as case:
                if case(value_e.Undef
                        ):  # 'nounset' already handled before got here
                    # Happens upon a[undefined]=42, which unfortunately turns into a[0]=42.
                    #log('blame_word %s   arena %s', blame_word, self.arena)
                    e_strict('Undefined value in arithmetic context',
                             span_id=span_id)

                elif case(value_e.Int):
                    val = cast(value__Int, UP_val)
                    return val.i

                elif case(value_e.Str):
                    val = cast(value__Str, UP_val)
                    return self._StringToInteger(
                        val.s, span_id=span_id)  # calls e_strict

                elif case(value_e.Obj):
                    # Note: this handles var x = 42; echo $(( x > 2 )).
                    if mylib.PYTHON:
                        val = cast(value__Obj, UP_val)
                        if isinstance(val.obj, int):
                            return val.obj
                    raise AssertionError()  # not in C++

        except error.Strict as e:
            if self.exec_opts.strict_arith():
                raise
            else:
                return 0

        # Arrays and associative arrays always fail -- not controlled by
        # strict_arith.
        # In bash, (( a )) is like (( a[0] )), but I don't want that.
        # And returning '0' gives different results.
        e_die("Expected a value convertible to integer, got %s",
              ui.ValType(val),
              span_id=span_id)
예제 #4
0
def _NextUtf8Char(s, i):
    # type: (str, int) -> int
    """
  Given a string and a byte offset, returns the byte position after
  the character at this position.  Usually this is the position of the
  next character, but for the last character in the string, it's the
  position just past the end of the string.

  Validates UTF-8.
  """
    n = len(s)
    assert i < n, i  # should always be in range
    byte_as_int = ord(s[i])
    length = _Utf8CharLen(byte_as_int)
    for j in xrange(i + 1, i + length):
        if j >= n:
            e_strict(INCOMPLETE_CHAR)
        _CheckContinuationByte(s[j])

    return i + length
예제 #5
0
파일: string_ops.py 프로젝트: dotmpe/oil
def _NextUtf8Char(s, i):
    # type: (str, int) -> int
    """
  Given a string and a byte offset, returns the byte position after
  the character at this position.  Usually this is the position of the
  next character, but for the last character in the string, it's the
  position just past the end of the string.

  Validates UTF-8.
  """
    byte_as_int = ord(s[i])  # Should never raise IndexError

    try:
        length = _Utf8CharLen(byte_as_int)
        for j in xrange(i + 1, i + length):
            _CheckContinuationByte(s[j])
        i += length
    except IndexError:
        e_strict(INCOMPLETE_CHAR)

    return i
예제 #6
0
파일: sh_expr_eval.py 프로젝트: drwilly/oil
  def _StringToInteger(self, s, span_id=runtime.NO_SPID):
    # type: (str, int) -> int
    """Use bash-like rules to coerce a string to an integer.

    Runtime parsing enables silly stuff like $(( $(echo 1)$(echo 2) + 1 )) => 13

    0xAB -- hex constant
    042  -- octal constant
    42   -- decimal constant
    64#z -- arbitary base constant

    bare word: variable
    quoted word: string (not done?)
    """
    if s.startswith('0x'):
      try:
        integer = int(s, 16)
      except ValueError:
        e_strict('Invalid hex constant %r', s, span_id=span_id)
      return integer

    if s.startswith('0'):
      try:
        integer = int(s, 8)
      except ValueError:
        e_strict('Invalid octal constant %r', s, span_id=span_id)
      return integer

    if '#' in s:
      b, digits = mylib.split_once(s, '#')
      try:
        base = int(b)
      except ValueError:
        e_strict('Invalid base for numeric constant %r',  b, span_id=span_id)

      integer = 0
      for ch in digits:
        if IsLower(ch):
          digit = ord(ch) - ord('a') + 10
        elif IsUpper(ch):
          digit = ord(ch) - ord('A') + 36
        elif ch == '@':  # horrible syntax
          digit = 62
        elif ch == '_':
          digit = 63
        elif ch.isdigit():
          digit = int(ch)
        else:
          e_strict('Invalid digits for numeric constant %r', digits, span_id=span_id)

        if digit >= base:
          e_strict('Digits %r out of range for base %d', digits, base, span_id=span_id)

        integer = integer * base + digit
      return integer

    try:
      # Normal base 10 integer.  This includes negative numbers like '-42'.
      integer = int(s)
    except ValueError:
      # doesn't look like an integer

      # note: 'test' and '[' never evaluate recursively
      if self.exec_opts.eval_unsafe_arith() and self.parse_ctx:
        # Special case so we don't get EOF error
        if len(s.strip()) == 0:
          return 0

        # For compatibility: Try to parse it as an expression and evaluate it.

        arena = self.parse_ctx.arena

        a_parser = self.parse_ctx.MakeArithParser(s)
        with alloc.ctx_Location(arena, source.Variable(span_id)):
          try:
            node2 = a_parser.Parse()  # may raise error.Parse
          except error.Parse as e:
            ui.PrettyPrintError(e, arena)
            e_die('Parse error in recursive arithmetic', span_id=e.span_id)

        # Prevent infinite recursion of $(( 1x )) -- it's a word that evaluates
        # to itself, and you don't want to reparse it as a word.
        if node2.tag_() == arith_expr_e.Word:
          e_die("Invalid integer constant %r", s, span_id=span_id)
        else:
          integer = self.EvalToInt(node2)
      else:
        if len(s.strip()) == 0 or match.IsValidVarName(s):
          # x42 could evaluate to 0
          e_strict("Invalid integer constant %r", s, span_id=span_id)
        else:
          # 42x is always fatal!
          e_die("Invalid integer constant %r", s, span_id=span_id)

    return integer
예제 #7
0
파일: string_ops.py 프로젝트: dotmpe/oil
def _CheckContinuationByte(byte):
    # type: (str) -> None
    if (ord(byte) >> 6) != 0b10:
        e_strict(INVALID_CONT)