def MakeReplacer(pat, replace_str, slash_spid): """Helper for ${x/pat/replace} Parses 'pat' and returns either a _GlobReplacer or a _ConstStringReplacer. Using these objects is more efficient when performing the same operation on multiple strings. """ regex, warnings = glob_.GlobToERE(pat) if warnings: # TODO: Add strict mode and expose warnings. pass if regex is None: return _ConstStringReplacer(pat, replace_str) else: return _GlobReplacer(regex, replace_str, slash_spid)
def testGlobParser(self): CASES = [ # (glob input, expected AST, expected extended regexp, has error) ('*.py', '.*\.py', False), ('*.?', '.*\..', False), ('<*>', '<.*>', False), ('\**+', '\*.*\+', False), ('\**', '\*.*', False), ('*.[ch]pp', '.*\.[ch]pp', False), # not globs ('abc', None, False), ('\\*', None, False), ('c:\\foo', None, False), ('strange]one', None, False), # character class globs ('[[:space:]abc]', '[[:space:]abc]', False), ('[abc]', '[abc]', False), (r'[\a\b\c]', r'[\a\b\c]', False), ('[abc\[]', '[abc\[]', False), ('[!not]', '[^not]', False), ('[^also_not]', '[^also_not]', False), ('[!*?!\\[]', '[^*?!\\[]', False), ('[!\]foo]', '[^\]foo]', False), # invalid globs ('not_closed[a-z', None, True), ('[[:spa[ce:]]', None, True), # Regression test for IndexError. ('[', None, True), ('\\', None, True), (']', None, False), ] for glob, expected_ere, expected_err in CASES: print('===') print(glob) regex, warnings = glob_.GlobToERE(glob) self.assertEqual( expected_ere, regex, 'Expected %r to translate to %r, got %r' % (glob, expected_ere, regex)) print('regex : %s' % regex) print('warnings: %s' % warnings)
def _EvalBracedVarSub(self, part, part_vals, quoted): """ Args: part_vals: output param to append to. """ # We have four types of operator that interact. # # 1. Bracket: value -> (value, bool maybe_decay_array) # # 2. Then these four cases are mutually exclusive: # # a. Prefix length: value -> value # b. Test: value -> part_value[] # c. Other Suffix: value -> value # d. no operator: you have a value # # That is, we don't have both prefix and suffix operators. # # 3. Process maybe_decay_array here before returning. maybe_decay_array = False # for $*, ${a[*]}, etc. var_name = None # For ${foo=default} # 1. Evaluate from (var_name, var_num, token Id) -> value if part.token.id == Id.VSub_Name: var_name = part.token.val val = self.mem.GetVar(var_name) #log('EVAL NAME %s -> %s', var_name, val) elif part.token.id == Id.VSub_Number: var_num = int(part.token.val) val = self._EvalVarNum(var_num) else: # $* decays val, maybe_decay_array = self._EvalSpecialVar(part.token.id, quoted) # 2. Bracket: value -> (value v, bool maybe_decay_array) # maybe_decay_array is for joining ${a[*]} and unquoted ${a[@]} AFTER # suffix ops are applied. If we take the length with a prefix op, the # distinction is ignored. if part.bracket_op: if part.bracket_op.tag == bracket_op_e.WholeArray: op_id = part.bracket_op.op_id if op_id == Id.Lit_At: if not quoted: maybe_decay_array = True # ${a[@]} decays but "${a[@]}" doesn't if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: e_die("Can't index string with @: %r", val, part=part) elif val.tag == value_e.StrArray: # TODO: Is this a no-op? Just leave 'val' alone. val = value.StrArray(val.strs) elif op_id == Id.Arith_Star: maybe_decay_array = True # both ${a[*]} and "${a[*]}" decay if val.tag == value_e.Undef: val = self._EmptyStrArrayOrError(part.token) elif val.tag == value_e.Str: e_die("Can't index string with *: %r", val, part=part) elif val.tag == value_e.StrArray: # TODO: Is this a no-op? Just leave 'val' alone. # ${a[*]} or "${a[*]}" : maybe_decay_array is always true val = value.StrArray(val.strs) else: raise AssertionError(op_id) # unknown elif part.bracket_op.tag == bracket_op_e.ArrayIndex: anode = part.bracket_op.expr if val.tag == value_e.Undef: pass # it will be checked later elif val.tag == value_e.Str: # Bash treats any string as an array, so we can't add our own # behavior here without making valid OSH invalid bash. e_die("Can't index string %r with integer", part.token.val, token=part.token) elif val.tag == value_e.StrArray: index = self.arith_ev.Eval(anode) try: # could be None because representation is sparse s = val.strs[index] except IndexError: s = None if s is None: val = value.Undef() else: val = value.Str(s) elif val.tag == value_e.AssocArray: key = self.arith_ev.Eval(anode, int_coerce=False) try: val = value.Str(val.d[key]) except KeyError: val = value.Undef() else: raise AssertionError(val.__class__.__name__) else: raise AssertionError(part.bracket_op.tag) if part.prefix_op: val = self._EmptyStrOrError(val) # maybe error val = self._ApplyPrefixOp(val, part.prefix_op, token=part.token) # NOTE: When applying the length operator, we can't have a test or # suffix afterward. And we don't want to decay the array elif part.suffix_op: op = part.suffix_op if op.tag == suffix_op_e.StringNullary: if op.op_id == Id.VOp0_P: prompt = self.prompt_ev.EvalPrompt(val) val = value.Str(prompt) elif op.op_id == Id.VOp0_Q: val = value.Str(string_ops.ShellQuote(val.s)) else: raise NotImplementedError(op.op_id) elif op.tag == suffix_op_e.StringUnary: if LookupKind(part.suffix_op.op_id) == Kind.VTest: # TODO: Change style to: # if self._ApplyTestOp(...) # return # It should return whether anything was done. If not, we continue to # the end, where we might throw an error. assign_part_vals, effect = self._ApplyTestOp(val, part.suffix_op, quoted, part_vals) # NOTE: Splicing part_values is necessary because of code like # ${undef:-'a b' c 'd # e'}. Each part_value can have a different # do_glob/do_elide setting. if effect == effect_e.SpliceParts: return # EARLY RETURN, part_vals mutated elif effect == effect_e.SpliceAndAssign: if var_name is None: # TODO: error context e_die("Can't assign to special variable") else: # NOTE: This decays arrays too! 'set -o strict_array' could # avoid it. rhs_str = _DecayPartValuesToString(assign_part_vals, self.splitter.GetJoinChar()) state.SetLocalString(self.mem, var_name, rhs_str) return # EARLY RETURN, part_vals mutated elif effect == effect_e.Error: raise NotImplementedError else: # The old one #val = self._EmptyStringPartOrError(part_val, quoted) pass # do nothing, may still be undefined else: val = self._EmptyStrOrError(val) # maybe error # Other suffix: value -> value val = self._ApplyUnarySuffixOp(val, part.suffix_op) elif op.tag == suffix_op_e.PatSub: # PatSub, vectorized val = self._EmptyStrOrError(val) # ${undef//x/y} # globs are supported in the pattern pat_val = self.EvalWordToString(op.pat, do_fnmatch=True) assert pat_val.tag == value_e.Str, pat_val if op.replace: replace_val = self.EvalWordToString(op.replace) assert replace_val.tag == value_e.Str, replace_val replace_str = replace_val.s else: replace_str = '' regex, warnings = glob_.GlobToERE(pat_val.s) if warnings: # TODO: # - Add 'set -o strict-glob' mode and expose warnings. # "Glob is not in CANONICAL FORM". # - Propagate location info back to the 'op.pat' word. pass replacer = string_ops.GlobReplacer(regex, replace_str, op.spids[0]) if val.tag == value_e.Str: s = replacer.Replace(val.s, op) val = value.Str(s) elif val.tag == value_e.StrArray: strs = [] for s in val.strs: if s is not None: strs.append(replacer.Replace(s, op)) val = value.StrArray(strs) else: raise AssertionError(val.__class__.__name__) elif op.tag == suffix_op_e.Slice: val = self._EmptyStrOrError(val) # ${undef:3:1} if op.begin: begin = self.arith_ev.Eval(op.begin) else: begin = 0 if op.length: length = self.arith_ev.Eval(op.length) else: length = None if val.tag == value_e.Str: # Slice UTF-8 characters in a string. s = val.s try: if begin < 0: # It could be negative if we compute unicode length, but that's # confusing. # TODO: Instead of attributing it to the word part, it would be # better if we attributed it to arith_expr begin. raise util.InvalidSlice( "The start index of a string slice can't be negative: %d", begin, part=part) byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0) if length is None: byte_end = len(s) else: if length < 0: # TODO: Instead of attributing it to the word part, it would be # better if we attributed it to arith_expr begin. raise util.InvalidSlice( "The length of a string slice can't be negative: %d", length, part=part) byte_end = string_ops.AdvanceUtf8Chars(s, length, byte_begin) except (util.InvalidSlice, util.InvalidUtf8) as e: if self.exec_opts.strict_word_eval: raise else: # TODO: # - We don't see the error location here, but we see it when set # -o strict-word-eval. # - Doesn't make the command exit with 1. It just sets the word # to empty string. util.warn(e.UserErrorString()) substr = '' # error condition else: substr = s[byte_begin : byte_end] val = value.Str(substr) elif val.tag == value_e.StrArray: # Slice array entries. # NOTE: unset elements don't count towards the length. strs = [] for s in val.strs[begin:]: if s is not None: strs.append(s) if len(strs) == length: # never true for unspecified length break val = value.StrArray(strs) else: raise AssertionError(val.__class__.__name__) # Not possible # After applying suffixes, process maybe_decay_array here. if maybe_decay_array and val.tag == value_e.StrArray: val = self._DecayArray(val) # For the case where there are no prefix or suffix ops. val = self._EmptyStrOrError(val) # For example, ${a} evaluates to value_t.Str(), but we want a # part_value.StringPartValue. part_val = _ValueToPartValue(val, quoted) part_vals.append(part_val)