def testRegex(self): #print(libc.regcomp(r'.*\.py')) print(libc.regex_parse(r'.*\.py')) print(libc.regex_parse(r'*')) print(libc.regex_parse('\\')) print(libc.regex_match(r'.*\.py', 'foo.py')) print(libc.regex_match(r'.*\.py', 'abcd')) # error print(libc.regex_match(r'*', 'abcd'))
def testRegexParse(self): self.assertEqual(True, libc.regex_parse(r'.*\.py')) # Syntax errors self.assertRaises(RuntimeError, libc.regex_parse, r'*') self.assertRaises(RuntimeError, libc.regex_parse, '\\') self.assertRaises(RuntimeError, libc.regex_parse, '{') cases = [ ('([a-z]+)([0-9]+)', 'foo123', ['foo123', 'foo', '123']), (r'.*\.py', 'foo.py', ['foo.py']), (r'.*\.py', 'abcd', None), # The match is unanchored (r'bc', 'abcd', ['bc']), # The match is unanchored (r'.c', 'abcd', ['bc']), # Empty matches empty (r'', '', ['']), (r'^$', '', ['']), (r'^.$', '', None), ] for pat, s, expected in cases: #print('CASE %s' % pat) actual = libc.regex_match(pat, s) self.assertEqual(expected, actual)
def testRegex(self): #print(libc.regcomp(r'.*\.py')) print(libc.regex_parse(r'.*\.py')) print(libc.regex_parse(r'*')) print(libc.regex_parse('\\')) cases = [ (r'.*\.py', 'foo.py', True), (r'.*\.py', 'abcd', False), # The match is unanchored (r'bc', 'abcd', True), # The match is unanchored (r'.c', 'abcd', True), ] for pat, s, expected in cases: actual = libc.regex_match(pat, s) self.assertEqual(expected, actual) # Error. print(libc.regex_match(r'*', 'abcd'))
def testRegex(self): #print(libc.regcomp(r'.*\.py')) self.assertEqual(True, libc.regex_parse(r'.*\.py')) self.assertEqual(False, libc.regex_parse(r'*')) self.assertEqual(False, libc.regex_parse('\\')) self.assertEqual(False, libc.regex_parse('{')) cases = [ ('([a-z]+)([0-9]+)', 'foo123', ['foo123', 'foo', '123']), (r'.*\.py', 'foo.py', ['foo.py']), (r'.*\.py', 'abcd', None), # The match is unanchored (r'bc', 'abcd', ['bc']), # The match is unanchored (r'.c', 'abcd', ['bc']) ] for pat, s, expected in cases: print('CASE %s' % pat) actual = libc.regex_match(pat, s) self.assertEqual(expected, actual) # Error. print(libc.regex_match(r'*', 'abcd'))
def testPatSubRegexesLibc(self): r = libc.regex_parse('^(.*)git.*(.*)') print(r) # It matches. But we need to get the positions out! print(libc.regex_match('^(.*)git.*(.*)', '~/git/oil'))
def ParseFactor(self): """ Factor : WORD | UNARY_OP WORD | WORD BINARY_OP WORD | '(' Expr ')' """ if self.b_kind == Kind.BoolUnary: # Just save the type and not the token itself? op = self.op_id if not self._Next(): return None w = self.cur_word if not self._Next(): return None node = ast.BoolUnary(op, w) return node if self.b_kind == Kind.Word: # Peek ahead another token. t2 = self._LookAhead() t2_op_id = word.BoolId(t2) t2_b_kind = LookupKind(t2_op_id) #log('t2 %s / t2_op_id %s / t2_b_kind %s', t2, t2_op_id, t2_b_kind) # Redir pun for < and >, -a and -o pun if t2_b_kind in (Kind.BoolBinary, Kind.Redir): left = self.cur_word if not self._Next(): return None op = self.op_id # TODO: Need to change to lex_mode_e.BASH_REGEX. # _Next(lex_mode) then? is_regex = t2_op_id == Id.BoolBinary_EqualTilde if is_regex: if not self._Next(lex_mode=lex_mode_e.BASH_REGEX): return None else: if not self._Next(): return None right = self.cur_word if is_regex: # TODO: Quoted parts need to be regex-escaped, e.g. [[ $a =~ "{" ]]. # I don't think libc has a function to do this. Escape these # characters: # https://www.gnu.org/software/sed/manual/html_node/ERE-syntax.html0 ok, regex_str, unused_quoted = word.StaticEval(right) # doesn't contain $foo, etc. if ok and not libc.regex_parse(regex_str): self.AddErrorContext("Invalid regex: %r" % regex_str, word=right) return None if not self._Next(): return None return ast.BoolBinary(op, left, right) else: # [[ foo ]] w = self.cur_word if not self._Next(): return None return ast.WordTest(w) if self.op_id == Id.Op_LParen: if not self._Next(): return None node = self.ParseExpr() if self.op_id != Id.Op_RParen: self.AddErrorContext('Expected ), got %s', self.cur_word, word=self.cur_word) return None if not self._Next(): return None return node # TODO: A proper error, e.g. for [[ && ]] or [[ ]] self.AddErrorContext('Unexpected token: %s' % self.cur_word, word=self.cur_word) return None
def ParseFactor(self): """ Factor : WORD | UNARY_OP WORD | WORD BINARY_OP WORD | '(' Expr ')' """ #print('ParseFactor %s %s' % (self.b_kind, IdName(self.op_id))) if self.b_kind == Kind.BoolUnary: # Just save the type and not the token itself? op = self.op_id if not self._Next(): return None w = self.cur_word if not self._Next(): return None node = ast.BoolUnary(op, w) return node if self.b_kind == Kind.Word: # Peek ahead another token. t2 = self._LookAhead() t2_op_id = word.BoolId(t2) t2_b_kind = LookupKind(t2_op_id) # Redir PUN for < and > if t2_b_kind in (Kind.BoolBinary, Kind.Redir): left = self.cur_word if not self._Next(): return None op = self.op_id # TODO: Need to change to LexMode.BASH_REGEX. # _Next(lex_mode) then? is_regex = t2_op_id == Id.BoolBinary_EqualTilde if is_regex: if not self._Next(lex_mode=LexMode.BASH_REGEX): return None else: if not self._Next(): return None right = self.cur_word if is_regex: ok, regex_str, unused_quoted = word.StaticEval(right) # doesn't contain $foo, etc. if ok and not libc.regex_parse(regex_str): self.AddErrorContext("Invalid regex: %r" % regex_str, word=right) return None if not self._Next(): return None return ast.BoolBinary(op, left, right) else: # [[ foo ]] w = self.cur_word if not self._Next(): return None return ast.WordTest(w) if self.op_id == Id.Op_LParen: if not self._Next(): return None node = self.ParseExpr() if self.op_id != Id.Op_RParen: raise RuntimeError("Expected ), got %s", self.cur_word) if not self._Next(): return None return node # TODO: A proper error, e.g. for "&&" raise AssertionError("Unexpected token: %s" % self.cur_word)