def DoUnarySuffixOp(s, op, arg, extglob): # type: (str, suffix_op__Unary, str, bool) -> str """Helper for ${x#prefix} and family.""" # Fast path for constant strings. if not glob_.LooksLikeGlob(arg): # It doesn't look like a glob, but we glob-escaped it (e.g. [ -> \[). So # reverse it. NOTE: We also do this check in Globber.Expand(). It would # be nice to somehow store the original string rather tahn # escaping/unescaping. arg = glob_.GlobUnescape(arg) if op.op_id in (Id.VOp1_Pound, Id.VOp1_DPound): # const prefix # explicit check for non-empty arg (len for mycpp) if len(arg) and s.startswith(arg): return s[len(arg):] else: return s elif op.op_id in (Id.VOp1_Percent, Id.VOp1_DPercent): # const suffix # need explicit check for non-empty arg (len for mycpp) if len(arg) and s.endswith(arg): return s[:-len(arg)] else: return s # These operators take glob arguments, we don't implement that obscure case. elif op.op_id == Id.VOp1_Comma: # Only lowercase the first letter if arg != '': # TODO: location info for op e_die("%s can't have an argument", ui.PrettyId(op.op_id)) if len(s): return s[0].lower() + s[1:] else: return s elif op.op_id == Id.VOp1_DComma: if arg != '': e_die("%s can't have an argument", ui.PrettyId(op.op_id)) return s.lower() elif op.op_id == Id.VOp1_Caret: # Only uppercase the first letter if arg != '': e_die("%s can't have an argument", ui.PrettyId(op.op_id)) if len(s): return s[0].upper() + s[1:] else: return s elif op.op_id == Id.VOp1_DCaret: if arg != '': e_die("%s can't have an argument", ui.PrettyId(op.op_id)) return s.upper() else: # e.g. ^ ^^ , ,, raise AssertionError(op.op_id) # For patterns, do fnmatch() in a loop. # # TODO: # - Another potential fast path: # v=aabbccdd # echo ${v#*b} # strip shortest prefix # # If the whole thing doesn't match '*b*', then no test can succeed. So we # can fail early. Conversely echo ${v%%c*} and '*c*'. # # (Although honestly this whole construct is nuts and should be deprecated.) n = len(s) if op.op_id == Id.VOp1_Pound: # shortest prefix # 'abcd': match '', 'a', 'ab', 'abc', ... i = 0 while True: assert i <= n #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i], extglob): return s[i:] if i >= n: break i = _NextUtf8Char(s, i) return s elif op.op_id == Id.VOp1_DPound: # longest prefix # 'abcd': match 'abc', 'ab', 'a' i = n while True: assert i >= 0 #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i], extglob): return s[i:] if i == 0: break i = PreviousUtf8Char(s, i) return s elif op.op_id == Id.VOp1_Percent: # shortest suffix # 'abcd': match 'abcd', 'abc', 'ab', 'a' i = n while True: assert i >= 0 #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:], extglob): return s[:i] if i == 0: break i = PreviousUtf8Char(s, i) return s elif op.op_id == Id.VOp1_DPercent: # longest suffix # 'abcd': match 'abc', 'bc', 'c', ... i = 0 while True: assert i <= n #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:], extglob): return s[:i] if i >= n: break i = _NextUtf8Char(s, i) return s else: raise NotImplementedError(ui.PrettyId(op.op_id))
def DoUnarySuffixOp(s, op, arg): """Helper for ${x#prefix} and family.""" # Fast path for constant strings. if not glob_.LooksLikeGlob(arg): # It doesn't look like a glob, but we glob-escaped it (e.g. [ -> \[). So # reverse it. NOTE: We also do this check in Globber.Expand(). It would # be nice to somehow store the original string rather tahn # escaping/unescaping. arg = glob_.GlobUnescape(arg) if op.op_id in (Id.VOp1_Pound, Id.VOp1_DPound): # const prefix # explicit check for non-empty arg (len for mycpp) if len(arg) and s.startswith(arg): return s[len(arg):] else: return s elif op.op_id in (Id.VOp1_Percent, Id.VOp1_DPercent): # const suffix # need explicit check for non-empty arg (len for mycpp) if len(arg) and s.endswith(arg): return s[:-len(arg)] else: return s # These operators take glob arguments, we don't implement that obscure case. elif op.op_id == Id.VOp1_Comma: # Only lowercase the first letter if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s[0].lower() + s[1:] elif op.op_id == Id.VOp1_DComma: if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s.lower() elif op.op_id == Id.VOp1_Caret: # Only uppercase the first letter if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s[0].upper() + s[1:] elif op.op_id == Id.VOp1_DCaret: if arg != '': raise NotImplementedError("%s can't have an argument" % op.op_id) return s.upper() else: # e.g. ^ ^^ , ,, raise AssertionError(op.op_id) # For patterns, do fnmatch() in a loop. # # TODO: The loop needs to iterate over code points, not bytes! # - The forward case can probably be handled in a similar manner. # - The backward case might be handled by pre-calculating an array of start # positions with _NextUtf8Char. # # TODO: Another potential fast path: # # v=aabbccdd # echo ${v#*b} # strip shortest prefix # # If the whole thing doesn't match '*b*', then no test can succeed. So we # can fail early. Conversely echo ${v%%c*} and '*c*'. # # (Although honestly this whole construct is nuts and should be deprecated.) n = len(s) if op.op_id == Id.VOp1_Pound: # shortest prefix # 'abcd': match 'a', 'ab', 'abc', ... for i in xrange(1, n + 1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i]): return s[i:] else: return s elif op.op_id == Id.VOp1_DPound: # longest prefix # 'abcd': match 'abc', 'ab', 'a' for i in xrange(n, 0, -1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[:i]): return s[i:] else: return s elif op.op_id == Id.VOp1_Percent: # shortest suffix # 'abcd': match 'abc', 'ab', 'a' for i in xrange(n - 1, -1, -1): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:]): return s[:i] else: return s elif op.op_id == Id.VOp1_DPercent: # longest suffix # 'abcd': match 'abc', 'bc', 'c', ... for i in xrange(0, n): #log('Matching pattern %r with %r', arg, s[:i]) if libc.fnmatch(arg, s[i:]): return s[:i] else: return s else: raise NotImplementedError("Can't use %s with pattern" % op.op_id)