def test_charClass(self): g = VisualRegexGrammar() self._parseStringTest(g.charClass(), '\\A', Schema.CharClass(cls='A')) self._parseStringTest(g.charClass(), '\\b', Schema.CharClass(cls='b')) self._parseStringTest(g.charClass(), '\\B', Schema.CharClass(cls='B')) self._parseStringTest(g.charClass(), '\\d', Schema.CharClass(cls='d')) self._parseStringTest(g.charClass(), '\\D', Schema.CharClass(cls='D')) self._parseStringTest(g.charClass(), '\\s', Schema.CharClass(cls='s')) self._parseStringTest(g.charClass(), '\\S', Schema.CharClass(cls='S')) self._parseStringTest(g.charClass(), '\\w', Schema.CharClass(cls='w')) self._parseStringTest(g.charClass(), '\\W', Schema.CharClass(cls='W')) self._parseStringTest(g.charClass(), '\\Z', Schema.CharClass(cls='Z'))
def test_choice(self): g = VisualRegexGrammar() self._parseStringTest( g.choice(), 'abc|def', Schema.Choice(subexps=[ Schema.Sequence(subexps=[ Schema.LiteralChar(char='a'), Schema.LiteralChar(char='b'), Schema.LiteralChar(char='c') ]), Schema.Sequence(subexps=[ Schema.LiteralChar(char='d'), Schema.LiteralChar(char='e'), Schema.LiteralChar(char='f') ]) ]))
def __init__(self, regex=None): if regex is None: regex = Schema.PythonRegEx(expr=Schema.UNPARSED(value=[''])) if isinstance(regex, re._pattern_type): # Extract pattern string regex = regex.pattern if isinstance(regex, str) or isinstance(regex, unicode): # Convert to structural form g = VisualRegexGrammar() x = g.regex().parseStringChars(regex, None) regex = Schema.PythonRegEx(expr=x.value) if isinstance(regex, DMNode): if not regex.isInstanceOf(Schema.PythonRegEx): if regex.isInstanceOf(Schema.Node): regex = Schema.PythonRegEx(expr=regex) else: raise TypeError, 'Wrong schema' self.regex = regex else: raise TypeError, 'Invalid regular expression type'
def setFlags(self): return (Literal('(?') + RegEx('[iLmsux]') + Literal(')')).action( lambda input, begin, end, x, bindings: Schema.SetFlags(flags=x[1]))
def _commitExprOuterEmpty(model, parsed): model['expr'] = Schema.UNPARSED(value=[''])
def charSetItemRange(self): return ( self.charSetChar() + Literal('-') + self.charSetChar()).action(lambda input, begin, end, x, bindings: Schema.CharSetRange(min=x[0], max=x[2]))
def charSetChar(self): return self.escapedChar( ) | (RegEx('[^\\]\\-\\\\]') | Literal('-')).action( lambda input, begin, end, x, bindings: Schema.LiteralChar(char=x))
def endOfLine(self): return Literal('$').action( lambda input, begin, end, x, bindings: Schema.EndOfLine())
def anyChar(self): return Literal('.').action( lambda input, begin, end, x, bindings: Schema.AnyChar())
def test_specials(self): g = VisualRegexGrammar() self._parseStringTest(g.specials(), '.', Schema.AnyChar()) self._parseStringTest(g.specials(), '^', Schema.StartOfLine()) self._parseStringTest(g.specials(), '$', Schema.EndOfLine())
def escapedRegexChar(self): return (Literal('\\') + RegEx('[^0-9]')).action(lambda input, begin, end, x, bindings: Schema.EscapedChar(char=x[1]))
def test_literalChar(self): g = VisualRegexGrammar() self._parseStringTest(g.literalChar(), 'a', Schema.LiteralChar(char='a')) self._parseStringTest(g.literalChar(), '\\.', Schema.EscapedChar(char='.')) self._parseStringTest(g.literalChar(), '\\^', Schema.EscapedChar(char='^')) self._parseStringTest(g.literalChar(), '\\$', Schema.EscapedChar(char='$')) self._parseStringTest(g.literalChar(), '\\[', Schema.EscapedChar(char='[')) self._parseStringTest(g.literalChar(), '\\\\', Schema.EscapedChar(char='\\')) self._parseStringTest(g.literalChar(), '\\(', Schema.EscapedChar(char='(')) self._parseStringTest(g.literalChar(), '\\|', Schema.EscapedChar(char='|')) self._parseStringTest(g.literalChar(), '\\+', Schema.EscapedChar(char='+')) self._parseStringTest(g.literalChar(), '\\*', Schema.EscapedChar(char='*')) self._parseStringTest(g.literalChar(), '\\?', Schema.EscapedChar(char='?')) self._parseStringFailTest(g.literalChar(), '.') self._parseStringFailTest(g.literalChar(), '^') self._parseStringFailTest(g.literalChar(), '$') self._parseStringFailTest(g.literalChar(), '[') self._parseStringFailTest(g.literalChar(), '\\') self._parseStringFailTest(g.literalChar(), '(') self._parseStringFailTest(g.literalChar(), '|') self._parseStringFailTest(g.literalChar(), '+') self._parseStringFailTest(g.literalChar(), '*') self._parseStringFailTest(g.literalChar(), '?')
def choice(self): return (self.sequence() + (Literal('|') + self.sequence()).oneOrMore() ).action(lambda input, begin, end, x, bindings: Schema.Choice( subexps=[x[0]] + [p[1] for p in x[1]])) | self.sequence()
def sequence(self): return self.repeatedItem().oneOrMore().action( lambda input, begin, end, x, bindings: Schema.Sequence(subexps=x) if len(x) > 1 else x[0])
def optional(self): return (self.item() + Literal('?') + Literal('?').optional()).action( lambda input, begin, end, x, bindings: Schema.Optional( subexp=x[0], greedy=('1' if x[2] is not None else None)))
def comment(self): return (Literal('(?#') + RegEx('[^)]+').optional() + Literal(')') ).action(lambda input, begin, end, x, bindings: Schema.Comment( text=(x[1] if x[1] is not None else '')))
def test_regex(self): g = VisualRegexGrammar() self._parseStringTest(g.regex(), 'a', Schema.LiteralChar(char='a')) self._parseStringTest( g.regex(), 'abc|def', Schema.Choice(subexps=[ Schema.Sequence(subexps=[ Schema.LiteralChar(char='a'), Schema.LiteralChar(char='b'), Schema.LiteralChar(char='c') ]), Schema.Sequence(subexps=[ Schema.LiteralChar(char='d'), Schema.LiteralChar(char='e'), Schema.LiteralChar(char='f') ]) ])) self._parseStringTest( g.regex(), r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+[a-zA-Z]{1,4}', Schema.Sequence(subexps=[ Schema.CharSet(items=[ Schema.CharSetChar(char=Schema.CharClass(cls='w')), Schema.CharSetChar(char=Schema.EscapedChar(char='-')) ]), Schema.OneOrMore(subexp=Schema.CharSet(items=[ Schema.CharSetChar(char=Schema.CharClass(cls='w')), Schema.CharSetChar(char=Schema.EscapedChar(char='-')), Schema.CharSetChar(char=Schema.EscapedChar(char='.')) ])), Schema.LiteralChar(char='@'), Schema.CharSet(items=[ Schema.CharSetChar(char=Schema.CharClass(cls='w')), Schema.CharSetChar(char=Schema.EscapedChar(char='-')) ]), Schema.OneOrMore(subexp=Schema.CharSet(items=[ Schema.CharSetChar(char=Schema.CharClass(cls='w')), Schema.CharSetChar(char=Schema.EscapedChar(char='-')), Schema.CharSetChar(char=Schema.EscapedChar(char='.')) ])), Schema.RepeatRange(subexp=Schema.CharSet(items=[ Schema.CharSetRange(min=Schema.LiteralChar(char='a'), max=Schema.LiteralChar(char='z')), Schema.CharSetRange(min=Schema.LiteralChar(char='A'), max=Schema.LiteralChar(char='Z')) ]), min='1', max='4', greedy='1') ]))
def test_charSet(self): g = VisualRegexGrammar() self._parseStringFailTest(g.charSet(), '[]') self._parseStringTest( g.charSet(), '[abc]', Schema.CharSet(items=[ Schema.CharSetChar(char=Schema.LiteralChar(char='a')), Schema.CharSetChar(char=Schema.LiteralChar(char='b')), Schema.CharSetChar(char=Schema.LiteralChar(char='c')) ])) self._parseStringTest( g.charSet(), '[^abc]', Schema.CharSet( invert='1', items=[ Schema.CharSetChar(char=Schema.LiteralChar(char='a')), Schema.CharSetChar(char=Schema.LiteralChar(char='b')), Schema.CharSetChar(char=Schema.LiteralChar(char='c')) ])) self._parseStringTest( g.charSet(), '[a-z]', Schema.CharSet(items=[ Schema.CharSetRange(min=Schema.LiteralChar(char='a'), max=Schema.LiteralChar(char='z')) ])) self._parseStringTest( g.charSet(), '[a-zA-Z0-9_]', Schema.CharSet(items=[ Schema.CharSetRange(min=Schema.LiteralChar(char='a'), max=Schema.LiteralChar(char='z')), Schema.CharSetRange(min=Schema.LiteralChar(char='A'), max=Schema.LiteralChar(char='Z')), Schema.CharSetRange(min=Schema.LiteralChar(char='0'), max=Schema.LiteralChar(char='9')), Schema.CharSetChar(char=Schema.LiteralChar(char='_')) ])) self._parseStringTest( g.charSet(), '[^^]', Schema.CharSet( invert='1', items=[Schema.CharSetChar(char=Schema.LiteralChar(char='^'))])) self._parseStringTest( g.charSet(), '[-]', Schema.CharSet( items=[Schema.CharSetChar(char=Schema.LiteralChar(char='-'))])) self._parseStringTest( g.charSet(), '[a-]', Schema.CharSet(items=[ Schema.CharSetChar(char=Schema.LiteralChar(char='a')), Schema.CharSetChar(char=Schema.LiteralChar(char='-')) ])) self._parseStringTest( g.charSet(), '[+--]', Schema.CharSet(items=[ Schema.CharSetRange(min=Schema.LiteralChar(char='+'), max=Schema.LiteralChar(char='-')) ]))
def startOfLine(self): return Literal('^').action( lambda input, begin, end, x, bindings: Schema.StartOfLine())
def newLine(self): return Literal('\n').action(lambda input, begin, end, x, bindings: Schema.PythonEscapedChar(char='n'))
def charClass(self): return (Literal('\\') + RegEx('[AbBdDsSwWZ]')).action( lambda input, begin, end, x, bindings: Schema.CharClass(cls=x[1]))
def test_group(self): g = VisualRegexGrammar() self._parseStringTest( g.item(), '(a)', Schema.Group(capturing='1', subexp=Schema.LiteralChar(char='a'))) self._parseStringTest(g.item(), '(?i)', Schema.SetFlags(flags='i')) self._parseStringTest( g.item(), '(?:a)', Schema.Group(subexp=Schema.LiteralChar(char='a'))) self._parseStringTest( g.item(), '(?P<id>a)', Schema.DefineNamedGroup(subexp=Schema.LiteralChar(char='a'), name='id')) self._parseStringTest(g.item(), '(?P=id)', Schema.MatchNamedGroup(name='id')) self._parseStringTest(g.item(), '(?#abc)', Schema.Comment(text='abc')) self._parseStringTest( g.item(), '(?=a)', Schema.Lookahead(subexp=Schema.LiteralChar(char='a'), positive='1')) self._parseStringTest( g.item(), '(?!a)', Schema.Lookahead(subexp=Schema.LiteralChar(char='a'))) self._parseStringTest( g.item(), '(?<=a)', Schema.Lookbehind(subexp=Schema.LiteralChar(char='a'), positive='1')) self._parseStringTest( g.item(), '(?<!a)', Schema.Lookbehind(subexp=Schema.LiteralChar(char='a')))
def charSetItemChar(self): return (self.charClass() | self.charSetChar()).action( lambda input, begin, end, x, bindings: Schema.CharSetChar(char=x))
def test_repetition(self): g = VisualRegexGrammar() self._parseStringTest( g.repeatedItem(), 'a*', Schema.ZeroOrMore(subexp=Schema.LiteralChar(char='a'))) self._parseStringTest( g.repeatedItem(), 'a*?', Schema.ZeroOrMore(subexp=Schema.LiteralChar(char='a'), greedy='1')) self._parseStringTest( g.repeatedItem(), 'a+', Schema.OneOrMore(subexp=Schema.LiteralChar(char='a'))) self._parseStringTest( g.repeatedItem(), 'a+?', Schema.OneOrMore(subexp=Schema.LiteralChar(char='a'), greedy='1')) self._parseStringTest( g.repeatedItem(), 'a?', Schema.Optional(subexp=Schema.LiteralChar(char='a'))) self._parseStringTest( g.repeatedItem(), 'a??', Schema.Optional(subexp=Schema.LiteralChar(char='a'), greedy='1')) self._parseStringTest( g.repeatedItem(), 'a{5}', Schema.Repeat(subexp=Schema.LiteralChar(char='a'), repetitions='5')) self._parseStringTest( g.repeatedItem(), 'a{1,2}', Schema.RepeatRange(subexp=Schema.LiteralChar(char='a'), min='1', max='2', greedy='1')) self._parseStringTest( g.repeatedItem(), 'a{1,2}?', Schema.RepeatRange(subexp=Schema.LiteralChar(char='a'), min='1', max='2'))
def charSet(self): return (Literal('[') + Literal('^').optional() + self.charSetItem().oneOrMore() + Literal(']') ).action(lambda input, begin, end, x, bindings: Schema.CharSet( invert=('1' if x[1] is not None else None), items=x[2]))
def test_sequence(self): g = VisualRegexGrammar() self._parseStringTest( g.sequence(), 'abc', Schema.Sequence(subexps=[ Schema.LiteralChar(char='a'), Schema.LiteralChar(char='b'), Schema.LiteralChar(char='c') ])) self._parseStringTest( g.sequence(), 'a*bc', Schema.Sequence(subexps=[ Schema.ZeroOrMore(subexp=Schema.LiteralChar(char='a')), Schema.LiteralChar(char='b'), Schema.LiteralChar(char='c') ])) self._parseStringTest( g.sequence(), 'ab*c', Schema.Sequence(subexps=[ Schema.LiteralChar(char='a'), Schema.ZeroOrMore(subexp=Schema.LiteralChar(char='b')), Schema.LiteralChar(char='c') ])) self._parseStringTest( g.sequence(), 'abc*', Schema.Sequence(subexps=[ Schema.LiteralChar(char='a'), Schema.LiteralChar(char='b'), Schema.ZeroOrMore(subexp=Schema.LiteralChar(char='c')) ])) self._parseStringTest( g.sequence(), '[a-z]bc', Schema.Sequence(subexps=[ Schema.CharSet(items=[ Schema.CharSetRange(min=Schema.LiteralChar(char='a'), max=Schema.LiteralChar(char='z')) ]), Schema.LiteralChar(char='b'), Schema.LiteralChar(char='c') ])) self._parseStringTest( g.sequence(), '\\wbc*', Schema.Sequence(subexps=[ Schema.CharClass(cls='w'), Schema.LiteralChar(char='b'), Schema.ZeroOrMore(subexp=Schema.LiteralChar(char='c')) ]))
def _commitInnerUnparsed(model, value): unparsed = Schema.UNPARSED(value=value.getItemValues()) # In some cases, we will be replacing @model with an UNPARSED node that contains a reference to @model. # Since vreReplaceNode calls model.become(), this causes severe problems, due to circular references. # The call to deepcopy eliminates this possibility. vreReplaceNode(model, deepcopy(unparsed))
def literalChar(self): return self.escapedChar() | RegEx('[^\\.^$\\[\\\\()|+*?]').action( lambda input, begin, end, x, bindings: Schema.LiteralChar(char=x))
def _commitExprOuterUnparsed(model, value): values = value.getItemValues() if values == []: values = [''] model['expr'] = Schema.UNPARSED(value=values)
def negativeLookbehind(self): return (Literal('(?<!') + self.choice() + Literal(')')).action(lambda input, begin, end, x, bindings: Schema.Lookbehind(subexp=x[1]))