def Run(self, cmd_val): # type: (cmd_value__Argv) -> int argv = cmd_val.argv[1:] attrs, arg_r = flag_spec.ParseLikeEcho('echo', cmd_val) arg = arg_types.echo(attrs.attrs) argv = arg_r.Rest() backslash_c = False # \c terminates input arg0_spid = cmd_val.arg_spids[0] if arg.e: new_argv = [] # type: List[str] for a in argv: parts = [] # type: List[str] lex = match.EchoLexer(a) while not backslash_c: id_, value = lex.Next() if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator break tok = Token(id_, arg0_spid, value) p = word_compile.EvalCStringToken(tok) # Unusual behavior: '\c' prints what is there and aborts processing! if p is None: backslash_c = True break parts.append(p) new_argv.append(''.join(parts)) if backslash_c: # no more args either break # Replace it argv = new_argv if self.exec_opts.simple_echo(): n = len(argv) if n == 0: pass elif n == 1: self.f.write(argv[0]) else: # TODO: span_id could be more accurate e_usage( "takes at most one arg when simple_echo is on (hint: add quotes)" ) else: #log('echo argv %s', argv) for i, a in enumerate(argv): if i != 0: self.f.write(' ') # arg separator self.f.write(a) if not arg.n and not backslash_c: self.f.write('\n') return 0
def __call__(self, arg_vec): if self.exec_opts.simple_echo: arg_r = args.Reader(arg_vec.strs, spids=arg_vec.spids) arg_r.Next() # skip 'echo' arg, _ = OIL_ECHO_SPEC.Parse(arg_r) #print(arg) i = 0 while not arg_r.AtEnd(): if i != 0: sys.stdout.write(arg.sep) s = arg_r.Peek() sys.stdout.write(s) arg_r.Next() i += 1 if arg.n: pass elif arg.end: sys.stdout.write(arg.end) return 0 argv = arg_vec.strs[1:] arg, arg_index = ECHO_SPEC.ParseLikeEcho(argv) argv = argv[arg_index:] if arg.e: new_argv = [] for a in argv: parts = [] for id_, value in match.ECHO_LEXER.Tokens(a): p = word_compile.EvalCStringToken(id_, value) # Unusual behavior: '\c' prints what is there and aborts processing! if p is None: new_argv.append(''.join(parts)) for i, a in enumerate(new_argv): if i != 0: sys.stdout.write(' ') # arg separator sys.stdout.write(a) return 0 # EARLY RETURN parts.append(p) new_argv.append(''.join(parts)) # Replace it argv = new_argv #log('echo argv %s', argv) for i, a in enumerate(argv): if i != 0: sys.stdout.write(' ') # arg separator sys.stdout.write(a) if not arg.n: sys.stdout.write('\n') return 0
def Run(self, cmd_val): # type: (cmd_value__Argv) -> int argv = cmd_val.argv[1:] arg, arg_index = ECHO_SPEC.ParseLikeEcho(argv) argv = argv[arg_index:] backslash_c = False # \c terminates input if arg.e: new_argv = [] for a in argv: parts = [] # type: List[str] lex = match.EchoLexer(a) while not backslash_c: id_, value = lex.Next() if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator break p = word_compile.EvalCStringToken(id_, value) # Unusual behavior: '\c' prints what is there and aborts processing! if p is None: backslash_c = True break parts.append(p) new_argv.append(''.join(parts)) if backslash_c: # no more args either break # Replace it argv = new_argv if self.exec_opts.strict_echo(): n = len(argv) if n == 0: pass elif n == 1: sys.stdout.write(argv[0]) else: # TODO: span_id could be more accurate raise error.Usage( "takes at most one arg when strict_echo is on (hint: add quotes)" ) else: #log('echo argv %s', argv) for i, a in enumerate(argv): if i != 0: sys.stdout.write(' ') # arg separator sys.stdout.write(a) if not arg.n and not backslash_c: sys.stdout.write('\n') return 0
def Echo(arg_vec): """echo builtin. set -o sane-echo could do the following: - only one arg, no implicit joining. - no -e: should be echo c'one\ttwo\t' - no -n: should be write 'one' multiple args on a line: echo-lines one two three """ # NOTE: both getopt and optparse are unsuitable for 'echo' because: # - 'echo -c' should print '-c', not fail # - echo '---' should print ---, not fail argv = arg_vec.strs[1:] arg, arg_index = ECHO_SPEC.ParseLikeEcho(argv) argv = argv[arg_index:] if arg.e: new_argv = [] for a in argv: parts = [] for id_, value in match.ECHO_LEXER.Tokens(a): p = word_compile.EvalCStringToken(id_, value) # Unusual behavior: '\c' prints what is there and aborts processing! if p is None: new_argv.append(''.join(parts)) for i, a in enumerate(new_argv): if i != 0: sys.stdout.write(' ') # arg separator sys.stdout.write(a) return 0 # EARLY RETURN parts.append(p) new_argv.append(''.join(parts)) # Replace it argv = new_argv #log('echo argv %s', argv) for i, a in enumerate(argv): if i != 0: sys.stdout.write(' ') # arg separator sys.stdout.write(a) if not arg.n: sys.stdout.write('\n') return 0
def __call__(self, cmd_val): argv = cmd_val.argv[1:] arg, arg_index = ECHO_SPEC.ParseLikeEcho(argv) argv = argv[arg_index:] if arg.e: new_argv = [] for a in argv: parts = [] lex = match.EchoLexer(a) while True: id_, value = lex.Next() if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator break p = word_compile.EvalCStringToken(id_, value) # Unusual behavior: '\c' prints what is there and aborts processing! if p is None: new_argv.append(''.join(parts)) for i, a in enumerate(new_argv): if i != 0: sys.stdout.write(' ') # arg separator sys.stdout.write(a) return 0 # EARLY RETURN parts.append(p) new_argv.append(''.join(parts)) # Replace it argv = new_argv #log('echo argv %s', argv) for i, a in enumerate(argv): if i != 0: sys.stdout.write(' ') # arg separator sys.stdout.write(a) if not arg.n: sys.stdout.write('\n') return 0
def _Line(self, arg, var_name): # type: (arg_types.read, str) -> int line = _ReadLine() if len(line) == 0: # EOF return 1 if not arg.with_eol: if line.endswith('\r\n'): line = line[:-2] elif line.endswith('\n'): line = line[:-1] # Lines that don't start with a single quote aren't QSN. They may contain # a single quote internally, like: # # Fool's Gold if arg.q and line.startswith("'"): arena = self.parse_ctx.arena line_reader = reader.StringLineReader(line, arena) lexer = self.parse_ctx._MakeLexer(line_reader) # The parser only yields valid tokens: # Char_Literals, Char_OneChar, Char_Hex, Char_UBraced # So we can use word_compile.EvalCStringToken, which is also used for # $''. # Important: we don't generate Id.Unknown_Backslash because that is valid # in echo -e. We just make it Id.Unknown_Tok? try: # TODO: read should know about stdin, and redirects, and pipelines? with alloc.ctx_Location(arena, source.Stdin('')): tokens = qsn_native.Parse(lexer) except error.Parse as e: ui.PrettyPrintError(e, arena) return 1 tmp = [word_compile.EvalCStringToken(t) for t in tokens] line = ''.join(tmp) lhs = lvalue.Named(var_name) self.mem.SetValue(lhs, value.Str(line), scope_e.LocalOnly) return 0
def _MaybeReplaceLeaf(self, node): # type: (re_t) -> Tuple[Optional[re_t], bool] """ If a leaf node needs to be evaluated, do it and return the replacement. Otherwise return None. """ new_leaf = None recurse = True if node.tag == re_e.Speck: id_ = node.id if id_ == Id.Expr_Dot: new_leaf = re.Primitive(Id.Re_Dot) elif id_ == Id.Arith_Caret: # ^ new_leaf = re.Primitive(Id.Re_Start) elif id_ == Id.Expr_Dollar: # $ new_leaf = re.Primitive(Id.Re_End) else: raise NotImplementedError(id_) elif node.tag == re_e.Token: id_ = node.id val = node.val if id_ == Id.Expr_Name: if val == 'dot': new_leaf = re.Primitive(Id.Re_Dot) else: raise NotImplementedError(val) elif id_ == Id.Expr_Symbol: if val == '%start': new_leaf = re.Primitive(Id.Re_Start) elif val == '%end': new_leaf = re.Primitive(Id.Re_End) else: raise NotImplementedError(val) else: # Must be Id.Char_{OneChar,Hex,Unicode4,Unicode8} kind = consts.GetKind(id_) assert kind == Kind.Char, id_ s = word_compile.EvalCStringToken(id_, val) new_leaf = re.LiteralChars(s, node.span_id) elif node.tag == re_e.SingleQuoted: s = word_eval.EvalSingleQuoted(node) new_leaf = re.LiteralChars(s, node.left.span_id) elif node.tag == re_e.DoubleQuoted: s = self.word_ev.EvalDoubleQuotedToString(node) new_leaf = re.LiteralChars(s, node.left.span_id) elif node.tag == re_e.BracedVarSub: s = self.word_ev.EvalBracedVarSubToString(node) new_leaf = re.LiteralChars(s, node.spids[0]) elif node.tag == re_e.SimpleVarSub: s = self.word_ev.EvalSimpleVarSubToString(node.token) new_leaf = re.LiteralChars(s, node.token.span_id) elif node.tag == re_e.Splice: obj = self.LookupVar(node.name.val) if not isinstance(obj, objects.Regex): e_die("Can't splice object of type %r into regex", obj.__class__, token=node.name) # Note: we only splice the regex, and ignore flags. # Should we warn about this? new_leaf = obj.regex # These are leaves we don't need to do anything with. elif node.tag == re_e.PosixClass: recurse = False elif node.tag == re_e.PerlClass: recurse = False return new_leaf, recurse
def _EvalWordPart(self, part, part_vals, quoted=False): """Evaluate a word part. Args: part_vals: Output parameter. Returns: None """ if part.tag == word_part_e.ArrayLiteralPart: raise AssertionError( 'Array literal should have been handled at word level') elif part.tag == word_part_e.LiteralPart: v = part_value.String(part.token.val, not quoted) part_vals.append(v) elif part.tag == word_part_e.EscapedLiteralPart: val = part.token.val assert len(val) == 2, val # e.g. \* assert val[0] == '\\' s = val[1] v = part_value.String(s, False) part_vals.append(v) elif part.tag == word_part_e.SingleQuotedPart: if part.left.id == Id.Left_SingleQuote: s = ''.join(t.val for t in part.tokens) elif part.left.id == Id.Left_DollarSingleQuote: # NOTE: This could be done at compile time s = ''.join(word_compile.EvalCStringToken(t.id, t.val) for t in part.tokens) else: raise AssertionError(part.left.id) v = part_value.String(s, False) part_vals.append(v) elif part.tag == word_part_e.DoubleQuotedPart: self._EvalDoubleQuotedPart(part, part_vals) elif part.tag == word_part_e.CommandSubPart: id_ = part.left_token.id if id_ in (Id.Left_CommandSub, Id.Left_Backtick): v = self._EvalCommandSub(part.command_list, quoted) elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut): v = self._EvalProcessSub(part.command_list, id_) else: raise AssertionError(id_) part_vals.append(v) elif part.tag == word_part_e.SimpleVarSub: maybe_decay_array = False # 1. Evaluate from (var_name, var_num, token) -> defined, value if part.token.id == Id.VSub_DollarName: var_name = part.token.val[1:] val = self.mem.GetVar(var_name) elif part.token.id == Id.VSub_Number: var_num = int(part.token.val[1:]) val = self._EvalVarNum(var_num) else: val, maybe_decay_array = self._EvalSpecialVar(part.token.id, quoted) #log('SIMPLE %s', part) val = self._EmptyStrOrError(val, token=part.token) if maybe_decay_array and val.tag == value_e.StrArray: val = self._DecayArray(val) v = _ValueToPartValue(val, quoted) part_vals.append(v) elif part.tag == word_part_e.BracedVarSub: self._EvalBracedVarSub(part, part_vals, quoted) elif part.tag == word_part_e.TildeSubPart: # We never parse a quoted string into a TildeSubPart. assert not quoted s = self._EvalTildeSub(part.token) v = part_value.String(s, False) part_vals.append(v) elif part.tag == word_part_e.ArithSubPart: num = self.arith_ev.Eval(part.anode) v = part_value.String(str(num), False) part_vals.append(v) elif part.tag == word_part_e.ExtGlobPart: # do_split_glob should be renamed 'unquoted'? or inverted and renamed # 'quoted'? part_vals.append(part_value.String(part.op.val, True)) for i, w in enumerate(part.arms): if i != 0: part_vals.append(part_value.String('|', True)) # separator # This flattens the tree! self._EvalWordToParts(w, False, part_vals) # eval like not quoted? part_vals.append(part_value.String(')', True)) # closing ) else: raise AssertionError(part.__class__.__name__)
def Echo(argv): """echo builtin. set -o sane-echo could do the following: - only one arg, no implicit joining. - no -e: should be echo c'one\ttwo\t' - no -n: should be write 'one' multiple args on a line: echo-lines one two three """ # NOTE: both getopt and optparse are unsuitable for 'echo' because: # - 'echo -c' should print '-c', not fail # - echo '---' should print ---, not fail arg, arg_index = ECHO_SPEC.ParseLikeEcho(argv) argv = argv[arg_index:] if arg.e: new_argv = [] for a in argv: parts = [] for id_, value in match.ECHO_LEXER.Tokens(a): p = word_compile.EvalCStringToken(id_, value) # Unusual behavior: '\c' prints what is there and aborts processing! if p is None: new_argv.append(''.join(parts)) for i, a in enumerate(new_argv): if i != 0: sys.stdout.write(' ') # arg separator sys.stdout.write(a) return 0 # EARLY RETURN parts.append(p) new_argv.append(''.join(parts)) # Replace it argv = new_argv #log('echo argv %s', argv) for i, a in enumerate(argv): if i != 0: sys.stdout.write(' ') # arg separator sys.stdout.write(a) if not arg.n: sys.stdout.write('\n') # I think the flush fixes a problem with command sub. But it causes # IOError non-deterministically, when spec/background.test.sh is run in # parallel with other tests. So just silence it. # File "/home/andy/git/oil/bin/../core/cmd_exec.py", line 251, in _RunBuiltin # status = builtin.Echo(argv) # File "/home/andy/git/oil/bin/../core/builtin.py", line 431, in Echo # sys.stdout.flush() # IOError: [Errno 32] Broken pipe try: sys.stdout.flush() except IOError as e: pass return 0
def Run(self, cmd_val): # type: (cmd_value__Argv) -> int """ printf: printf [-v var] format [argument ...] """ attrs, arg_r = flag_spec.ParseCmdVal('printf', cmd_val) arg = arg_types.printf(attrs.attrs) fmt, fmt_spid = arg_r.ReadRequired2('requires a format string') varargs, spids = arg_r.Rest2() #log('fmt %s', fmt) #log('vals %s', vals) arena = self.parse_ctx.arena if fmt in self.parse_cache: parts = self.parse_cache[fmt] else: line_reader = reader.StringLineReader(fmt, arena) # TODO: Make public lexer = self.parse_ctx._MakeLexer(line_reader) parser = _FormatStringParser(lexer) with alloc.ctx_Location(arena, source.ArgvWord(fmt_spid)): try: parts = parser.Parse() except error.Parse as e: self.errfmt.PrettyPrintError(e) return 2 # parse error self.parse_cache[fmt] = parts if 0: print() for part in parts: part.PrettyPrint() print() out = [] # type: List[str] arg_index = 0 num_args = len(varargs) backslash_c = False while True: for part in parts: UP_part = part if part.tag_() == printf_part_e.Literal: part = cast(printf_part__Literal, UP_part) token = part.token if token.id == Id.Format_EscapedPercent: s = '%' else: s = word_compile.EvalCStringToken(token) out.append(s) elif part.tag_() == printf_part_e.Percent: part = cast(printf_part__Percent, UP_part) flags = [] # type: List[str] if len(part.flags) > 0: for flag_token in part.flags: flags.append(flag_token.val) width = -1 # nonexistent if part.width: if part.width.id in (Id.Format_Num, Id.Format_Zero): width_str = part.width.val width_spid = part.width.span_id elif part.width.id == Id.Format_Star: if arg_index < num_args: width_str = varargs[arg_index] width_spid = spids[arg_index] arg_index += 1 else: width_str = '' # invalid width_spid = runtime.NO_SPID else: raise AssertionError() try: width = int(width_str) except ValueError: if width_spid == runtime.NO_SPID: width_spid = part.width.span_id self.errfmt.Print_("printf got invalid width %r" % width_str, span_id=width_spid) return 1 precision = -1 # nonexistent if part.precision: if part.precision.id == Id.Format_Dot: precision_str = '0' precision_spid = part.precision.span_id elif part.precision.id in (Id.Format_Num, Id.Format_Zero): precision_str = part.precision.val precision_spid = part.precision.span_id elif part.precision.id == Id.Format_Star: if arg_index < num_args: precision_str = varargs[arg_index] precision_spid = spids[arg_index] arg_index += 1 else: precision_str = '' precision_spid = runtime.NO_SPID else: raise AssertionError() try: precision = int(precision_str) except ValueError: if precision_spid == runtime.NO_SPID: precision_spid = part.precision.span_id self.errfmt.Print_( 'printf got invalid precision %r' % precision_str, span_id=precision_spid) return 1 #log('index=%d n=%d', arg_index, num_args) if arg_index < num_args: s = varargs[arg_index] word_spid = spids[arg_index] arg_index += 1 else: s = '' word_spid = runtime.NO_SPID typ = part.type.val if typ == 's': if precision >= 0: s = s[:precision] # truncate elif typ == 'q': s = qsn.maybe_shell_encode(s) elif typ == 'b': # Process just like echo -e, except \c handling is simpler. c_parts = [] # type: List[str] lex = match.EchoLexer(s) while True: id_, tok_val = lex.Next() if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator break # TODO: add span_id from argv tok = Token(id_, runtime.NO_SPID, tok_val) p = word_compile.EvalCStringToken(tok) # Unusual behavior: '\c' aborts processing! if p is None: backslash_c = True break c_parts.append(p) s = ''.join(c_parts) elif typ in 'diouxX' or part.type.id == Id.Format_Time: try: d = int(s) except ValueError: if len(s) >= 1 and s[0] in '\'"': # TODO: utf-8 decode s[1:] to be more correct. Probably # depends on issue #366, a utf-8 library. # Note: len(s) == 1 means there is a NUL (0) after the quote.. d = ord(s[1]) if len(s) >= 2 else 0 elif part.type.id == Id.Format_Time and len( s) == 0 and word_spid == runtime.NO_SPID: # Note: No argument means -1 for %(...)T as in Bash Reference # Manual 4.2 "If no argument is specified, conversion behaves # as if -1 had been given." d = -1 else: if word_spid == runtime.NO_SPID: # Blame the format string blame_spid = part.type.span_id else: blame_spid = word_spid self.errfmt.Print_( 'printf expected an integer, got %r' % s, span_id=blame_spid) return 1 if typ in 'di': s = str(d) elif typ in 'ouxX': if d < 0: e_die( "Can't format negative number %d with %%%s", d, typ, span_id=part.type.span_id) if typ == 'u': s = str(d) elif typ == 'o': s = mylib.octal(d) elif typ == 'x': s = mylib.hex_lower(d) elif typ == 'X': s = mylib.hex_upper(d) elif part.type.id == Id.Format_Time: # %(...)T # Initialize timezone: # `localtime' uses the current timezone information initialized # by `tzset'. The function `tzset' refers to the environment # variable `TZ'. When the exported variable `TZ' is present, # its value should be reflected in the real environment # variable `TZ' before call of `tzset'. # # Note: unlike LANG, TZ doesn't seem to change behavior if it's # not exported. # # TODO: In Oil, provide an API that doesn't rely on libc's # global state. tzcell = self.mem.GetCell('TZ') if tzcell and tzcell.exported and tzcell.val.tag_( ) == value_e.Str: tzval = cast(value__Str, tzcell.val) posix.putenv('TZ', tzval.s) time_.tzset() # Handle special values: # User can specify two special values -1 and -2 as in Bash # Reference Manual 4.2: "Two special argument values may be # used: -1 represents the current time, and -2 represents the # time the shell was invoked." from # https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-printf if d == -1: # the current time ts = time_.time() elif d == -2: # the shell start time ts = self.shell_start_time else: ts = d s = time_.strftime(typ[1:-2], time_.localtime(ts)) if precision >= 0: s = s[:precision] # truncate else: raise AssertionError() else: raise AssertionError() if width >= 0: if len(flags): if '-' in flags: s = s.ljust(width, ' ') elif '0' in flags: s = s.rjust(width, '0') else: pass else: s = s.rjust(width, ' ') out.append(s) else: raise AssertionError() if backslash_c: # 'printf %b a\cb xx' - \c terminates processing! break if arg_index >= num_args: break # Otherwise there are more args. So cycle through the loop once more to # implement the 'arg recycling' behavior. result = ''.join(out) if arg.v is not None: # TODO: get the span_id for arg.v! v_spid = runtime.NO_SPID arena = self.parse_ctx.arena a_parser = self.parse_ctx.MakeArithParser(arg.v) with alloc.ctx_Location(arena, source.ArgvWord(v_spid)): try: anode = a_parser.Parse() except error.Parse as e: ui.PrettyPrintError(e, arena) # show parse error e_usage('Invalid -v expression', span_id=v_spid) lval = self.arith_ev.EvalArithLhs(anode, v_spid) if not self.exec_opts.eval_unsafe_arith( ) and lval.tag_() != lvalue_e.Named: e_usage( '-v expected a variable name. shopt -s eval_unsafe_arith allows expressions', span_id=v_spid) state.SetRef(self.mem, lval, value.Str(result)) else: mylib.Stdout().write(result) return 0
def __call__(self, cmd_val): """ printf: printf [-v var] format [argument ...] """ arg_r = args.Reader(cmd_val.argv, spids=cmd_val.arg_spids) arg_r.Next() # skip argv[0] arg, _ = PRINTF_SPEC.Parse(arg_r) fmt, fmt_spid = arg_r.ReadRequired2('requires a format string') varargs, spids = arg_r.Rest2() #log('fmt %s', fmt) #log('vals %s', vals) arena = self.parse_ctx.arena if fmt in self.parse_cache: parts = self.parse_cache[fmt] else: line_reader = reader.StringLineReader(fmt, arena) # TODO: Make public lexer = self.parse_ctx._MakeLexer(line_reader) p = _FormatStringParser(lexer) arena.PushSource(source.ArgvWord(fmt_spid)) try: parts = p.Parse() except error.Parse as e: self.errfmt.PrettyPrintError(e) return 2 # parse error finally: arena.PopSource() self.parse_cache[fmt] = parts if 0: print() for part in parts: part.PrettyPrint() print() out = [] arg_index = 0 num_args = len(varargs) while True: for part in parts: if isinstance(part, printf_part.Literal): token = part.token if token.id == Id.Format_EscapedPercent: s = '%' else: s = word_compile.EvalCStringToken(token.id, token.val) out.append(s) elif isinstance(part, printf_part.Percent): try: s = varargs[arg_index] word_spid = spids[arg_index] except IndexError: s = '' word_spid = runtime.NO_SPID typ = part.type.val if typ == 's': if part.precision: precision = int(part.precision.val) s = s[:precision] # truncate elif typ == 'q': s = string_ops.ShellQuoteOneLine(s) elif typ in 'diouxX': try: d = int(s) except ValueError: if len(s) >= 2 and s[0] in '\'"': # TODO: utf-8 decode s[1:] to be more correct. Probably # depends on issue #366, a utf-8 library. d = ord(s[1]) else: # This works around the fact that in the arg recycling case, you have no spid. if word_spid == runtime.NO_SPID: self.errfmt.Print( "printf got invalid number %r for this substitution", s, span_id=part.type.span_id) else: self.errfmt.Print( "printf got invalid number %r", s, span_id=word_spid) return 1 if typ in 'di': s = str(d) elif typ in 'ouxX': if d < 0: e_die( "Can't format negative number %d with %%%s", d, typ, span_id=part.type.span_id) if typ == 'u': s = str(d) elif typ == 'o': s = '%o' % d elif typ == 'x': s = '%x' % d elif typ == 'X': s = '%X' % d else: raise AssertionError else: raise AssertionError if part.width: width = int(part.width.val) if part.flag: flag = part.flag.val if flag == '-': s = s.ljust(width, ' ') elif flag == '0': s = s.rjust(width, '0') else: pass else: s = s.rjust(width, ' ') out.append(s) arg_index += 1 else: raise AssertionError if arg_index >= num_args: break # Otherwise there are more args. So cycle through the loop once more to # implement the 'arg recycling' behavior. result = ''.join(out) if arg.v: var_name = arg.v # Notes: # - bash allows a[i] here (as in unset and ${!x}), but we haven't # implemented it. # - TODO: get the span_id for arg.v! if not match.IsValidVarName(var_name): raise args.UsageError('got invalid variable name %r' % var_name) state.SetStringDynamic(self.mem, var_name, result) else: sys.stdout.write(result) return 0
def __call__(self, arg_vec): """ printf: printf [-v var] format [argument ...] """ arg_r = args.Reader(arg_vec.strs, spids=arg_vec.spids) arg_r.Next() # skip argv[0] arg, _ = PRINTF_SPEC.Parse(arg_r) fmt, fmt_spid = arg_r.ReadRequired2('requires a format string') varargs, spids = arg_r.Rest2() #from core.util import log #log('fmt %s', fmt) #log('vals %s', vals) arena = self.parse_ctx.arena if fmt in self.parse_cache: parts = self.parse_cache[fmt] else: line_reader = reader.StringLineReader(fmt, arena) # TODO: Make public lexer = self.parse_ctx._MakeLexer(line_reader) p = _FormatStringParser(lexer) arena.PushSource(source.ArgvWord(fmt_spid)) try: parts = p.Parse() except util.ParseError as e: self.errfmt.PrettyPrintError(e) return 2 # parse error finally: arena.PopSource() self.parse_cache[fmt] = parts if 0: print() for part in parts: part.PrettyPrint() print() out = [] arg_index = 0 num_args = len(varargs) while True: for part in parts: if isinstance(part, printf_part.Literal): token = part.token if token.id == Id.Format_EscapedPercent: s = '%' else: s = word_compile.EvalCStringToken(token.id, token.val) out.append(s) elif isinstance(part, printf_part.Percent): try: s = varargs[arg_index] word_spid = spids[arg_index] except IndexError: s = '' word_spid = const.NO_INTEGER typ = part.type.val if typ == 's': pass # val remains the same elif typ == 'q': s = string_ops.ShellQuoteOneLine(s) elif typ in 'di': try: d = int(s) except ValueError: # This works around the fact that in the arg recycling case, you have no spid. if word_spid == const.NO_INTEGER: self.errfmt.Print( "printf got invalid number %r for this substitution", s, span_id=part.type.span_id) else: self.errfmt.Print( "printf got invalid number %r", s, span_id=word_spid) return 1 s = str(d) else: raise AssertionError if part.width: width = int(part.width.val) if part.flag: flag = part.flag.val if flag == '-': s = s.ljust(width, ' ') elif flag == '0': s = s.rjust(width, '0') else: pass else: s = s.rjust(width, ' ') out.append(s) arg_index += 1 else: raise AssertionError if arg_index >= num_args: break # Otherwise there are more args. So cycle through the loop once more to # implement the 'arg recycling' behavior. result = ''.join(out) if arg.v: var_name = arg.v # Notes: # - bash allows a[i] here (as in unset and ${!x}), but we haven't # implemented it. # - TODO: get the span_id for arg.v! if not match.IsValidVarName(var_name): raise args.UsageError('got invalid variable name %r' % var_name) state.SetStringDynamic(self.mem, var_name, result) else: sys.stdout.write(result) return 0
def Run(self, cmd_val): # type: (cmd_value__Argv) -> int """ printf: printf [-v var] format [argument ...] """ arg_r = args.Reader(cmd_val.argv, spids=cmd_val.arg_spids) arg_r.Next() # skip argv[0] arg = PRINTF_SPEC.Parse(arg_r) fmt, fmt_spid = arg_r.ReadRequired2('requires a format string') varargs, spids = arg_r.Rest2() #log('fmt %s', fmt) #log('vals %s', vals) arena = self.parse_ctx.arena if fmt in self.parse_cache: parts = self.parse_cache[fmt] else: line_reader = reader.StringLineReader(fmt, arena) # TODO: Make public lexer = self.parse_ctx._MakeLexer(line_reader) p = _FormatStringParser(lexer) arena.PushSource(source.ArgvWord(fmt_spid)) try: parts = p.Parse() except error.Parse as e: self.errfmt.PrettyPrintError(e) return 2 # parse error finally: arena.PopSource() self.parse_cache[fmt] = parts if 0: print() for part in parts: part.PrettyPrint() print() out = [] arg_index = 0 num_args = len(varargs) backslash_c = False while True: for part in parts: if isinstance(part, printf_part.Literal): token = part.token if token.id == Id.Format_EscapedPercent: s = '%' else: s = word_compile.EvalCStringToken(token.id, token.val) out.append(s) elif isinstance(part, printf_part.Percent): flags = None if len(part.flags) > 0: flags = '' for flag_token in part.flags: flags += flag_token.val width = None if part.width: if part.width.id in (Id.Format_Num, Id.Format_Zero): width = part.width.val width_spid = part.width.span_id elif part.width.id == Id.Format_Star: if arg_index < num_args: width = varargs[arg_index] width_spid = spids[arg_index] arg_index += 1 else: width = '' width_spid = runtime.NO_SPID else: raise AssertionError() try: width = int(width) except ValueError: if width_spid == runtime.NO_SPID: width_spid = part.width.span_id self.errfmt.Print( "printf got invalid number %r for the width", s, span_id=width_spid) return 1 precision = None if part.precision: if part.precision.id == Id.Format_Dot: precision = '0' precision_spid = part.precision.span_id elif part.precision.id in (Id.Format_Num, Id.Format_Zero): precision = part.precision.val precision_spid = part.precision.span_id elif part.precision.id == Id.Format_Star: if arg_index < num_args: precision = varargs[arg_index] precision_spid = spids[arg_index] arg_index += 1 else: precision = '' precision_spid = runtime.NO_SPID else: raise AssertionError() try: precision = int(precision) except ValueError: if precision_spid == runtime.NO_SPID: precision_spid = part.precision.span_id self.errfmt.Print( "printf got invalid number %r for the precision", s, span_id=precision_spid) return 1 if arg_index < num_args: s = varargs[arg_index] word_spid = spids[arg_index] arg_index += 1 else: s = '' word_spid = runtime.NO_SPID typ = part.type.val if typ == 's': if precision is not None: s = s[:precision] # truncate elif typ == 'q': s = qsn.maybe_shell_encode(s) elif typ == 'b': # Process just like echo -e, except \c handling is simpler. parts = [] # type: List[str] lex = match.EchoLexer(s) while True: id_, value = lex.Next() if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator break p = word_compile.EvalCStringToken(id_, value) # Unusual behavior: '\c' aborts processing! if p is None: backslash_c = True break parts.append(p) s = ''.join(parts) elif typ in 'diouxX' or part.type.id == Id.Format_Time: try: d = int(s) except ValueError: if len(s) >= 1 and s[0] in '\'"': # TODO: utf-8 decode s[1:] to be more correct. Probably # depends on issue #366, a utf-8 library. # Note: len(s) == 1 means there is a NUL (0) after the quote.. d = ord(s[1]) if len(s) >= 2 else 0 elif part.type.id == Id.Format_Time and len( s) == 0 and word_spid == runtime.NO_SPID: # Note: No argument means -1 for %(...)T as in Bash Reference # Manual 4.2 "If no argument is specified, conversion behaves # as if -1 had been given." d = -1 else: # This works around the fact that in the arg recycling case, you have no spid. if word_spid == runtime.NO_SPID: self.errfmt.Print( "printf got invalid number %r for this substitution", s, span_id=part.type.span_id) else: self.errfmt.Print( "printf got invalid number %r", s, span_id=word_spid) return 1 if typ in 'di': s = str(d) elif typ in 'ouxX': if d < 0: e_die( "Can't format negative number %d with %%%s", d, typ, span_id=part.type.span_id) if typ == 'u': s = str(d) elif typ == 'o': s = '%o' % d elif typ == 'x': s = '%x' % d elif typ == 'X': s = '%X' % d elif part.type.id == Id.Format_Time: # %(...)T # Initialize timezone: # `localtime' uses the current timezone information initialized # by `tzset'. The function `tzset' refers to the environment # variable `TZ'. When the exported variable `TZ' is present, # its value should be reflected in the real environment # variable `TZ' before call of `tzset'. # # Note: unlike LANG, TZ doesn't seem to change behavior if it's # not exported. # # TODO: In Oil, provide an API that doesn't rely on libc's # global state. tzcell = self.mem.GetCell('TZ') if tzcell and tzcell.exported and tzcell.val.tag_( ) == value_e.Str: tzval = cast(value__Str, tzcell.val) posix.putenv('TZ', tzval.s) time.tzset() # Handle special values: # User can specify two special values -1 and -2 as in Bash # Reference Manual 4.2: "Two special argument values may be # used: -1 represents the current time, and -2 represents the # time the shell was invoked." from # https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-printf if d == -1: # the current time d = time.time() elif d == -2: # the shell start time d = shell_start_time s = time.strftime(typ[1:-2], time.localtime(d)) if precision is not None: s = s[:precision] # truncate else: raise AssertionError() else: raise AssertionError() if width is not None: if flags: if '-' in flags: s = s.ljust(width, ' ') elif '0' in flags: s = s.rjust(width, '0') else: pass else: s = s.rjust(width, ' ') out.append(s) else: raise AssertionError() if backslash_c: # 'printf %b a\cb xx' - \c terminates processing! break if arg_index >= num_args: break # Otherwise there are more args. So cycle through the loop once more to # implement the 'arg recycling' behavior. result = ''.join(out) if arg.v: var_name = arg.v # Notes: # - bash allows a[i] here (as in unset and ${!x}), but we haven't # implemented it. # - TODO: get the span_id for arg.v! if not match.IsValidVarName(var_name): raise error.Usage('got invalid variable name %r' % var_name) state.SetStringDynamic(self.mem, var_name, result) else: sys.stdout.write(result) return 0
def _Format(self, parts, varargs, spids, out): # type: (List[printf_part_t], List[str], List[int], List[str]) -> int """Hairy printf formatting logic.""" arg_index = 0 num_args = len(varargs) backslash_c = False while True: # loop over arguments for part in parts: # loop over parsed format string UP_part = part if part.tag_() == printf_part_e.Literal: part = cast(printf_part__Literal, UP_part) token = part.token if token.id == Id.Format_EscapedPercent: s = '%' else: s = word_compile.EvalCStringToken(token) out.append(s) elif part.tag_() == printf_part_e.Percent: # Note: This case is very long, but hard to refactor because of the # error cases and "recycling" of args! (arg_index, return 1, etc.) part = cast(printf_part__Percent, UP_part) # TODO: These calculations are independent of the data, so could be # cached flags = [] # type: List[str] if len(part.flags) > 0: for flag_token in part.flags: flags.append(flag_token.val) width = -1 # nonexistent if part.width: if part.width.id in (Id.Format_Num, Id.Format_Zero): width_str = part.width.val width_spid = part.width.span_id elif part.width.id == Id.Format_Star: if arg_index < num_args: width_str = varargs[arg_index] width_spid = spids[arg_index] arg_index += 1 else: width_str = '' # invalid width_spid = runtime.NO_SPID else: raise AssertionError() try: width = int(width_str) except ValueError: if width_spid == runtime.NO_SPID: width_spid = part.width.span_id self.errfmt.Print_("printf got invalid width %r" % width_str, span_id=width_spid) return 1 precision = -1 # nonexistent if part.precision: if part.precision.id == Id.Format_Dot: precision_str = '0' precision_spid = part.precision.span_id elif part.precision.id in (Id.Format_Num, Id.Format_Zero): precision_str = part.precision.val precision_spid = part.precision.span_id elif part.precision.id == Id.Format_Star: if arg_index < num_args: precision_str = varargs[arg_index] precision_spid = spids[arg_index] arg_index += 1 else: precision_str = '' precision_spid = runtime.NO_SPID else: raise AssertionError() try: precision = int(precision_str) except ValueError: if precision_spid == runtime.NO_SPID: precision_spid = part.precision.span_id self.errfmt.Print_( 'printf got invalid precision %r' % precision_str, span_id=precision_spid) return 1 if arg_index < num_args: s = varargs[arg_index] word_spid = spids[arg_index] arg_index += 1 has_arg = True else: s = '' word_spid = runtime.NO_SPID has_arg = False typ = part.type.val if typ == 's': if precision >= 0: s = s[:precision] # truncate elif typ == 'q': # TODO: most shells give \' for single quote, while OSH gives $'\'' # this could matter when SSH'ing s = qsn.maybe_shell_encode(s) elif typ == 'b': # Process just like echo -e, except \c handling is simpler. c_parts = [] # type: List[str] lex = match.EchoLexer(s) while True: id_, tok_val = lex.Next() if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator break # TODO: add span_id from argv tok = Token(id_, runtime.NO_SPID, tok_val) p = word_compile.EvalCStringToken(tok) # Unusual behavior: '\c' aborts processing! if p is None: backslash_c = True break c_parts.append(p) s = ''.join(c_parts) elif part.type.id == Id.Format_Time or typ in 'diouxX': # %(...)T and %d share this complex integer conversion logic try: d = int( s ) # note: spaces like ' -42 ' accepted and normalized except ValueError: # 'a is interpreted as the ASCII value of 'a' if len(s) >= 1 and s[0] in '\'"': # TODO: utf-8 decode s[1:] to be more correct. Probably # depends on issue #366, a utf-8 library. # Note: len(s) == 1 means there is a NUL (0) after the quote.. d = ord(s[1]) if len(s) >= 2 else 0 # No argument means -1 for %(...)T as in Bash Reference Manual # 4.2 "If no argument is specified, conversion behaves as if -1 # had been given." elif not has_arg and part.type.id == Id.Format_Time: d = -1 else: blame_spid = word_spid if has_arg else part.type.span_id self.errfmt.Print_( 'printf expected an integer, got %r' % s, span_id=blame_spid) return 1 if part.type.id == Id.Format_Time: # Initialize timezone: # `localtime' uses the current timezone information initialized # by `tzset'. The function `tzset' refers to the environment # variable `TZ'. When the exported variable `TZ' is present, # its value should be reflected in the real environment # variable `TZ' before call of `tzset'. # # Note: unlike LANG, TZ doesn't seem to change behavior if it's # not exported. # # TODO: In Oil, provide an API that doesn't rely on libc's # global state. tzcell = self.mem.GetCell('TZ') if tzcell and tzcell.exported and tzcell.val.tag_( ) == value_e.Str: tzval = cast(value__Str, tzcell.val) posix.putenv('TZ', tzval.s) time_.tzset() # Handle special values: # User can specify two special values -1 and -2 as in Bash # Reference Manual 4.2: "Two special argument values may be # used: -1 represents the current time, and -2 represents the # time the shell was invoked." from # https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-printf if d == -1: # the current time ts = time_.time() elif d == -2: # the shell start time ts = self.shell_start_time else: ts = d s = time_.strftime(typ[1:-2], time_.localtime(ts)) if precision >= 0: s = s[:precision] # truncate else: # typ in 'diouxX' # Disallowed because it depends on 32- or 64- bit if d < 0 and typ in 'ouxX': e_die( "Can't format negative number %d with %%%s", d, typ, span_id=part.type.span_id) if typ == 'o': s = mylib.octal(d) elif typ == 'x': s = mylib.hex_lower(d) elif typ == 'X': s = mylib.hex_upper(d) else: # diu s = str(d) # without spaces like ' -42 ' # There are TWO different ways to ZERO PAD, and they differ on # the negative sign! See spec/builtin-printf zero_pad = 0 # no zero padding if width >= 0 and '0' in flags: zero_pad = 1 # style 1 elif precision > 0 and len(s) < precision: zero_pad = 2 # style 2 if zero_pad: negative = (s[0] == '-') if negative: digits = s[1:] sign = '-' if zero_pad == 1: # [%06d] -42 becomes [-00042] (6 TOTAL) n = width - 1 else: # [%6.6d] -42 becomes [-000042] (1 for '-' + 6) n = precision else: digits = s sign = '' if zero_pad == 1: n = width else: n = precision s = sign + digits.rjust(n, '0') else: raise AssertionError() if width >= 0: if '-' in flags: s = s.ljust(width, ' ') else: s = s.rjust(width, ' ') out.append(s) else: raise AssertionError() if backslash_c: # 'printf %b a\cb xx' - \c terminates processing! break if arg_index >= num_args: break # Otherwise there are more args. So cycle through the loop once more to # implement the 'arg recycling' behavior. return 0