def test_urlparse1(): url_lines = ''' http://www.st.cs.uni-saarland.de/zeller#ref '''[1:-1] grammar = ''' $START ::= $PARSERESULT.__NEW__:SCHEME://$PARSERESULT.__NEW__:NETLOC$PARSERESULT.__NEW__:PATH#$PARSERESULT.__NEW__:FRAGMENT $PARSERESULT.__NEW__:FRAGMENT ::= $SPLITRESULT.__NEW__:FRAGMENT $PARSERESULT.__NEW__:NETLOC ::= $SPLITRESULT.__NEW__:NETLOC $PARSERESULT.__NEW__:SCHEME ::= $SPLITRESULT.__NEW__:SCHEME $PARSERESULT.__NEW__:PATH ::= $SPLITRESULT.__NEW__:PATH $SPLITRESULT.__NEW__:FRAGMENT ::= [email protected]:FRAGMENT $SPLITRESULT.__NEW__:NETLOC ::= [email protected]:NETLOC $SPLITRESULT.__NEW__:SCHEME ::= http $SPLITRESULT.__NEW__:PATH ::= /zeller [email protected]:FRAGMENT ::= [email protected]:FRAGMENT [email protected]:NETLOC ::= [email protected]:NETLOC [email protected]:FRAGMENT ::= ref [email protected]:NETLOC ::= www.st.cs.uni-saarland.de '''[1:-1] result = [] for url in url_lines.split('\n'): with induce.Tracer(url, result) as t: urlparse(url) with induce.grammar() as g: for jframe in result: g.handle_events(jframe) print(str(g)) assert (grammar == str(g))
def test_dateparse1(): date_lines = ''' Jun 1 2005 1:33PM|%b %d %Y %I:%M%p '''[1:-1] grammar = ''' $START ::= $@._STRPTIME_DATETIME:DATA_STRING|$@._STRPTIME_DATETIME:FORMAT $@._STRPTIME_DATETIME:DATA_STRING ::= $@._STRPTIME:DATA_STRING $@._STRPTIME_DATETIME:FORMAT ::= $_LOCALIZED_MONTH.FORMAT %d %Y %I:%M%p $@._STRPTIME:DATA_STRING ::= $@._STRPTIME:ARG $_LOCALIZED_MONTH.FORMAT ::= $@.<LAMBDA>:X $@._STRPTIME:ARG ::= $@.<LISTCOMP>:_LOCALIZED_MONTH.__GETITEM___@ 1 $@._STRPTIME:FOUND_DICT.Y 1:$@._STRPTIME:FOUND_DICT.M$@._STRPTIME:FOUND_DICT.P $@.<LAMBDA>:X ::= %b $@.<LISTCOMP>:_LOCALIZED_MONTH.__GETITEM___@ ::= $@._STRPTIME:FOUND_DICT.B $@._STRPTIME:FOUND_DICT.M ::= 33 $@._STRPTIME:FOUND_DICT.Y ::= 2005 $@._STRPTIME:FOUND_DICT.P ::= PM $@._STRPTIME:FOUND_DICT.B ::= Jun '''[1:-1] result = [] for line in date_lines.split('\n'): if line.strip() == '': continue dat, fmt = line.split('|') with induce.Tracer(line, result) as t: datetime.strptime(dat, fmt) with induce.grammar() as g: for jframe in result: g.handle_events(jframe) print(str(g)) assert (grammar == str(g))
def test_arith1(): arith_lines = ''' ( AAAA - BBBB ) == 0 '''[1:-1] grammar = ''' $START ::= $FOLLOWEDBY._PARSENOCACHE:INSTRING $FOLLOWEDBY._PARSENOCACHE:INSTRING ::= $MATCHFIRST._PARSENOCACHE:INSTRING $MATCHFIRST._PARSENOCACHE:INSTRING ::= $ONEORMORE._PARSENOCACHE:INSTRING $ONEORMORE._PARSENOCACHE:INSTRING ::= $STRINGEND._PARSENOCACHE:INSTRING $STRINGEND._PARSENOCACHE:INSTRING ::= $OPTIONAL._PARSENOCACHE:INSTRING $OPTIONAL._PARSENOCACHE:INSTRING ::= $SUPPRESS._PARSENOCACHE:INSTRING $SUPPRESS._PARSENOCACHE:INSTRING ::= $COMBINE._PARSENOCACHE:INSTRING $COMBINE._PARSENOCACHE:INSTRING ::= $FORWARD._PARSENOCACHE:INSTRING $FORWARD._PARSENOCACHE:INSTRING ::= $LITERAL._PARSENOCACHE:INSTRING $LITERAL._PARSENOCACHE:INSTRING ::= $FOLLOWEDBY.PARSEIMPL:INSTRING $FOLLOWEDBY.PARSEIMPL:INSTRING ::= $FOLLOWEDBY.POSTPARSE:INSTRING $FOLLOWEDBY.POSTPARSE:INSTRING ::= $MATCHFIRST.PARSEIMPL:INSTRING $MATCHFIRST.PARSEIMPL:INSTRING ::= $MATCHFIRST.POSTPARSE:INSTRING $MATCHFIRST.POSTPARSE:INSTRING ::= ( $PARSERESULTS.__INIT__:TOKLIST - $PARSERESULTS.__INIT__:TOKLIST ) $PARSERESULTS.__INIT__:TOKLIST 0 $PARSERESULTS.__INIT__:TOKLIST ::= $PARSERESULTS.__NEW__:TOKLIST $PARSERESULTS.__NEW__:TOKLIST ::= [email protected]:O1 | $WORD._PARSENOCACHE:TOKENS $WORD._PARSENOCACHE:TOKENS ::= $WORD.POSTPARSE:TOKENLIST [email protected]:O1 ::= == $WORD.POSTPARSE:TOKENLIST ::= $EVALCONSTANT.VALUE $EVALCONSTANT.VALUE ::= $EVALCONSTANT.__INIT__:PARSERESULTS.__GETITEM___@ $EVALCONSTANT.__INIT__:PARSERESULTS.__GETITEM___@ ::= $WORD._PARSENOCACHE:WORD.POSTPARSE_@ $WORD._PARSENOCACHE:WORD.POSTPARSE_@ ::= AAAA | BBBB '''[1:-1] result = [] rules = arith_lines.split('\n') myvars = { 'AAAA': 0, 'BBBB': 1.1, 'CCCC': 2.2, 'DDDD': 3.3, 'EEEE': 4.4, 'FFFF': 5.5, 'GGGG': 6.6, 'HHHH': 7.7, 'IIII': 8.8, 'JJJJ': 9.9, "abc": 20, } # define tests from given rules tests = [(t, eval(t, myvars)) for t in rules if t.strip() != ''] myarith = arith.Arith(myvars) for test, expected in tests: with induce.Tracer(test, result): res = myarith.eval(test) #print(test,expected,"<>",res) with induce.grammar() as g: for jframe in result: g.handle_events(jframe) print(str(g)) assert (grammar == str(g))
def helper(data): out = [] parts = None grammar = None with induce.Tracer(data, out) as t: parts = basic_parse(data) with induce.grammar(True) as g: for jframe in out: g.handle_events(jframe) grammar = str(g) return (parts, out, grammar)
def test_urlparser2(): url_lines = ''' http://www.st.cs.uni-saarland.de/zeller#ref https://www.cispa.saarland:80/bar http://[email protected]:8080/bar?q=r#ref2 '''[1:-1] grammar = ''' $START ::= $_PARSERESULTSWITHOFFSET.__INIT__:P1:$PARSERESULTS.__INIT__:TOKLIST$PARSERESULTS._ASSTRINGLIST:ITEM.$PARSERESULTS._ASSTRINGLIST:ITEM$_PARSERESULTSWITHOFFSET.__INIT__:P1?$PARSERESULTS._ASSTRINGLIST:ITEM#$PARSERESULTS._ASSTRINGLIST:ITEM | $_PARSERESULTSWITHOFFSET.__INIT__:P1:$PARSERESULTS.__INIT__:TOKLIST$PARSERESULTS._ASSTRINGLIST:ITEM.$PARSERESULTS._ASSTRINGLIST:ITEM.$PARSERESULTS._ASSTRINGLIST:ITEM$_PARSERESULTSWITHOFFSET.__INIT__:P1 | http://[email protected]:NETLOC/zeller#ref [email protected]:NETLOC ::= www.st.cs.uni-saarland.de $_PARSERESULTSWITHOFFSET.__INIT__:P1 ::= $PARSERESULTS.__DELITEM__:VALUE $PARSERESULTS._ASSTRINGLIST:ITEM ::= $PARSERESULTS.__NEW__:TOKLIST $PARSERESULTS.__INIT__:TOKLIST ::= $LITERAL._PARSENOCACHE:TOKENS $PARSERESULTS.__DELITEM__:VALUE ::= $PARSERESULTS.__SETITEM__:SUB $LITERAL._PARSENOCACHE:TOKENS ::= $LITERAL.POSTPARSE:TOKENLIST $PARSERESULTS.__NEW__:TOKLIST ::= $WORD._PARSENOCACHE:TOKENS $PARSERESULTS.__SETITEM__:SUB ::= $PARSERESULTS.__SETITEM__:V $LITERAL.POSTPARSE:TOKENLIST ::= $LITERAL.MATCH $WORD._PARSENOCACHE:TOKENS ::= $WORD.POSTPARSE:TOKENLIST $PARSERESULTS.__SETITEM__:V ::= $PARSERESULTS.__DELITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ $WORD.POSTPARSE:TOKENLIST ::= [email protected]:FRAGMENT | [email protected]:QUERY | $WORD._PARSENOCACHE:WORD.POSTPARSE_@ $LITERAL.MATCH ::= $LITERAL._PARSENOCACHE:LITERAL.POSTPARSE_@ $PARSERESULTS.__DELITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ ::= $PARSERESULTS.__GETITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ $LITERAL._PARSENOCACHE:LITERAL.POSTPARSE_@ ::= // $WORD._PARSENOCACHE:WORD.POSTPARSE_@ ::= cispa | com:8080 | foo@google | saarland:80 | www $PARSERESULTS.__GETITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ ::= $PARSERESULTS.__GETATTR__:PARSERESULTS.__GETITEM___@ | $PARSERESULTS.__SETITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ $PARSERESULTS.__SETITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ ::= $PARSERESULTS.__IADD__:_PARSERESULTSWITHOFFSET.__GETITEM___@ $PARSERESULTS.__GETATTR__:PARSERESULTS.__GETITEM___@ ::= [email protected]:PARSERESULTS.__GETATTR___@ $PARSERESULTS.__IADD__:_PARSERESULTSWITHOFFSET.__GETITEM___@ ::= $@.<LISTCOMP>:_PARSERESULTSWITHOFFSET.__GETITEM___@ [email protected]:PARSERESULTS.__GETATTR___@ ::= http | https $@.<LISTCOMP>:_PARSERESULTSWITHOFFSET.__GETITEM___@ ::= /bar [email protected]:FRAGMENT ::= [email protected]:PARSERESULTS.__GETITEM___@ [email protected]:QUERY ::= [email protected]:PARSERESULTS.__GETITEM___@ [email protected]:PARSERESULTS.__GETITEM___@ ::= q=r | ref2 '''[1:-1] result = [] for url in url_lines.split('\n'): with induce.Tracer(url, result) as t: urlparser.urlparse(url) with induce.grammar() as g: for jframe in result: g.handle_events(jframe) print(str(g)) assert (grammar == str(g))
def test_arrow1(): lines = ''' 2013-05-11T21:23:58.970460+00:00 2013-05-07T04:20:39.369271+00:00 2013-05-07T04:24:24.152325+00:00 2013-05-05T00:00:00-07:00 2013-05-06T21:24:49.552236-07:00 1980-05-01T00:00:00+00:00 '''[1:-1] grammar = ''' $START ::= $DATETIMEPARSER._PARSE_MULTIFORMAT:STRING $DATETIMEPARSER._PARSE_MULTIFORMAT:STRING ::= $DATETIMEPARSER.PARSE_ISO:DATE_STRINGT$DATETIMEPARSER.PARSE_ISO:TIME_STRING $DATETIMEPARSER.PARSE_ISO:DATE_STRING ::= $DATETIMEPARSER._PARSE_TOKEN:VALUE-$DATETIMEPARSER._PARSE_TOKEN:VALUE-$DATETIMEPARSER._PARSE_TOKEN:VALUE $DATETIMEPARSER.PARSE_ISO:TIME_STRING ::= $DATETIMEPARSER._PARSE_TOKEN:VALUE:$DATETIMEPARSER._PARSE_TOKEN:VALUE:$DATETIMEPARSER._PARSE_TOKEN:VALUE+$DATETIMEPARSER._PARSE_TOKEN:VALUE:$DATETIMEPARSER._PARSE_TOKEN:VALUE | $DATETIMEPARSER._PARSE_TOKEN:VALUE:$DATETIMEPARSER._PARSE_TOKEN:VALUE:$DATETIMEPARSER._PARSE_TOKEN:[email protected]:HOURS:$DATETIMEPARSER._PARSE_TOKEN:VALUE | $DATETIMEPARSER._PARSE_TOKEN:VALUE:$DATETIMEPARSER._PARSE_TOKEN:VALUE:$DATETIMEPARSER._PARSE_TOKEN:VALUE.$DATETIMEPARSER._PARSE_TOKEN:VALUE$DATETIMEPARSER._PARSE_TOKEN:VALUE $DATETIMEPARSER._PARSE_TOKEN:VALUE ::= $DATETIMEPARSER.PARSE:VALUE $DATETIMEPARSER.PARSE:VALUE ::= [email protected]:MINUTES | [email protected]:MINUTES:[email protected]:MINUTES | [email protected]:HOURS:[email protected]:MINUTES | 01 | 04 | 05 | 06 | 07 | 11 | 152325 | 1980 | 20 | 2013 | 21 | 23 | 24 | 369271 | 39 | 49 | 552236 | 58 | 970460 [email protected]:MINUTES ::= [email protected]:HOURS | 00 [email protected]:HOURS ::= 00 | 07 '''[1:-1] result = [] for line in lines.split('\n'): with induce.Tracer(line, result) as t: arrow.get(line) with induce.grammar() as g: for jframe in result: g.handle_events(jframe) print(str(g)) assert(grammar == str(g))
def test_basic7(): data = ''' CS 2110 CS 2110 and INFO 3300 CS 2110, INFO 3300 CS 2110, 3300, 3140 CS 2110 or INFO 3300 MATH 2210, 2230, 2310, or 2940 '''[1:-1] grammar = ''' $START ::= [email protected]_PARSE:DEPT [email protected]_PARSE:NUMBER | [email protected]_PARSE:DEPT [email protected]_PARSE:NUMBER [email protected]_PARSE:TOK [email protected]_PARSE:DEPT [email protected]_PARSE:NUMBER | [email protected]_PARSE:DEPT [email protected]_PARSE:NUMBER and [email protected]_PARSE:DEPT [email protected]_PARSE:NUMBER | [email protected]_PARSE:DEPT [email protected]_PARSE:NUMBER, [email protected]_PARSE:DEPT [email protected]_PARSE:NUMBER | [email protected]_PARSE:DEPT [email protected]_PARSE:NUMBER, [email protected]_PARSE:NUMBER, [email protected]_PARSE:NUMBER | [email protected]_PARSE:DEPT [email protected]_PARSE:NUMBER, [email protected]_PARSE:NUMBER, [email protected]_PARSE:NUMBER, [email protected]_PARSE:TOK [email protected]_PARSE:NUMBER [email protected]_PARSE:NUMBER ::= [email protected]_PARSE:TOK | 2110 | 2210 | 2230 | 2310 | 2940 | 3300 [email protected]_PARSE:DEPT ::= [email protected]_PARSE:TOK | CS | INFO | MATH [email protected]_PARSE:TOK ::= 2110 | 3140 | 3300 | CS | INFO | or '''[1:-1] out = [] parts = None gout = None ldata = data.split('\n') for l in ldata: with induce.Tracer(l, out) as t: parts = basic_parse(l) with induce.grammar(True) as g: for jframe in out: g.handle_events(jframe) gout = str(g) print(gout) assert (grammar == gout)
def test_urlparse2(): url_lines = ''' http://www.st.cs.uni-saarland.de/zeller#ref https://www.cispa.saarland:80/bar http://[email protected]:8080/bar?q=r#ref2 '''[1:-1] grammar = ''' $START ::= $PARSERESULT.__NEW__:SCHEME://$PARSERESULT.__NEW__:NETLOC$PARSERESULT.__NEW__:PATH | $PARSERESULT.__NEW__:SCHEME://$PARSERESULT.__NEW__:NETLOC$PARSERESULT.__NEW__:PATH#$PARSERESULT.__NEW__:FRAGMENT | $PARSERESULT.__NEW__:SCHEME://$PARSERESULT.__NEW__:NETLOC$PARSERESULT.__NEW__:PATH?$PARSERESULT.__NEW__:QUERY#$PARSERESULT.__NEW__:FRAGMENT $PARSERESULT.__NEW__:FRAGMENT ::= [email protected]:FRAGMENT | $SPLITRESULT.__NEW__:FRAGMENT $PARSERESULT.__NEW__:NETLOC ::= [email protected]:NETLOC | $SPLITRESULT.__NEW__:NETLOC $PARSERESULT.__NEW__:SCHEME ::= $SPLITRESULT.__NEW__:SCHEME | http $PARSERESULT.__NEW__:PATH ::= $SPLITRESULT.__NEW__:PATH | /zeller [email protected]:FRAGMENT ::= [email protected]:FRAGMENT | ref [email protected]:NETLOC ::= [email protected]:NETLOC | www.st.cs.uni-saarland.de $SPLITRESULT.__NEW__:NETLOC ::= [email protected]:NETLOC $SPLITRESULT.__NEW__:SCHEME ::= http | https $SPLITRESULT.__NEW__:PATH ::= /bar [email protected]:NETLOC ::= [email protected]:8080 | www.cispa.saarland:80 $PARSERESULT.__NEW__:QUERY ::= $SPLITRESULT.__NEW__:QUERY $SPLITRESULT.__NEW__:FRAGMENT ::= [email protected]:FRAGMENT $SPLITRESULT.__NEW__:QUERY ::= [email protected]:QUERY [email protected]:QUERY ::= [email protected]:QUERY [email protected]:FRAGMENT ::= ref2 [email protected]:QUERY ::= q=r '''[1:-1] result = [] for url in url_lines.split('\n'): with induce.Tracer(url, result) as t: urlparse(url) with induce.grammar() as g: for jframe in result: g.handle_events(jframe) print(str(g)) assert (grammar == str(g))
def test_urlparser1(): url_lines = ''' http://www.st.cs.uni-saarland.de/zeller#ref '''[1:-1] grammar = ''' $START ::= $_PARSERESULTSWITHOFFSET.__INIT__:P1:$PARSERESULTS.__INIT__:TOKLIST$PARSERESULTS._ASSTRINGLIST:ITEM.$PARSERESULTS._ASSTRINGLIST:ITEM.$PARSERESULTS._ASSTRINGLIST:ITEM.$PARSERESULTS._ASSTRINGLIST:ITEM.$PARSERESULTS._ASSTRINGLIST:ITEM$_PARSERESULTSWITHOFFSET.__INIT__:P1 $_PARSERESULTSWITHOFFSET.__INIT__:P1 ::= $PARSERESULTS.__DELITEM__:VALUE $PARSERESULTS._ASSTRINGLIST:ITEM ::= $PARSERESULTS.__NEW__:TOKLIST $PARSERESULTS.__INIT__:TOKLIST ::= $LITERAL.__INIT__:MATCHSTRING $PARSERESULTS.__DELITEM__:VALUE ::= $PARSERESULTS.__SETITEM__:SUB $LITERAL.__INIT__:MATCHSTRING ::= $LITERAL._PARSENOCACHE:TOKENS $PARSERESULTS.__NEW__:TOKLIST ::= $WORD._PARSENOCACHE:TOKENS $LITERAL._PARSENOCACHE:TOKENS ::= $LITERAL.POSTPARSE:TOKENLIST $PARSERESULTS.__SETITEM__:SUB ::= $PARSERESULTS.__SETITEM__:V $WORD._PARSENOCACHE:TOKENS ::= $WORD.POSTPARSE:TOKENLIST $LITERAL.POSTPARSE:TOKENLIST ::= $LITERAL.MATCH $PARSERESULTS.__SETITEM__:V ::= $PARSERESULTS.__DELITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ $WORD.POSTPARSE:TOKENLIST ::= $WORD._PARSENOCACHE:WORD.POSTPARSE_@ $LITERAL.MATCH ::= $LITERAL._PARSENOCACHE:LITERAL.POSTPARSE_@ $PARSERESULTS.__DELITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ ::= $PARSERESULTS.__GETITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ $WORD._PARSENOCACHE:WORD.POSTPARSE_@ ::= cs | de | st | uni-saarland | www $PARSERESULTS.__GETITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ ::= $PARSERESULTS.__GETATTR__:PARSERESULTS.__GETITEM___@ | $PARSERESULTS.__SETITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ $LITERAL._PARSENOCACHE:LITERAL.POSTPARSE_@ ::= // $PARSERESULTS.__SETITEM__:_PARSERESULTSWITHOFFSET.__GETITEM___@ ::= $PARSERESULTS.__IADD__:_PARSERESULTSWITHOFFSET.__GETITEM___@ $PARSERESULTS.__GETATTR__:PARSERESULTS.__GETITEM___@ ::= [email protected]:PARSERESULTS.__GETATTR___@ $PARSERESULTS.__IADD__:_PARSERESULTSWITHOFFSET.__GETITEM___@ ::= $@.<LISTCOMP>:_PARSERESULTSWITHOFFSET.__GETITEM___@ [email protected]:PARSERESULTS.__GETATTR___@ ::= http $@.<LISTCOMP>:_PARSERESULTSWITHOFFSET.__GETITEM___@ ::= /zeller#ref '''[1:-1] result = [] for url in url_lines.split('\n'): with induce.Tracer(url, result) as t: urlparser.urlparse(url) with induce.grammar() as g: for jframe in result: g.handle_events(jframe) print(str(g)) assert (grammar == str(g))
def test_accesslog1(): content_lines = ''' 1.1.1.1 - - [21/Feb/2014:06:35:45 +0100] "GET /robots.txt HTTP/1.1" 200 112 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" '''[1:-1] grammar = ''' $START ::= $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:06:35:45 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 112 "-" "$LOGANALYZER.ANALYZE:USERAGENT" $LOGANALYZER.ANALYZE:USERAGENT ::= $LOGANALYZER.SUMMARIZE:COL.USERAGENT $LOGANALYZER.ANALYZE:REQUEST ::= $LOGANALYZER.SUMMARIZE:COL.REQUEST $LOGANALYZER.ANALYZE:IP ::= $LOGANALYZER.SUMMARIZE:COL.IP $LOGANALYZER.SUMMARIZE:COL.USERAGENT ::= Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) $LOGANALYZER.SUMMARIZE:COL.REQUEST ::= GET /robots.txt HTTP/1.1 $LOGANALYZER.SUMMARIZE:COL.IP ::= 1.1.1.1 '''[1:-1] result = [] for line in content_lines.split('\n'): with induce.Tracer(line, result) as t: summary = accesslog.LogAnalyzer(line, 5) summary.analyze() with induce.grammar() as g: for jframe in result: g.handle_events(jframe) print(str(g)) assert (grammar == str(g))
import arith import induce # sample expressions posted on comp.lang.python, asking for advice # in safely evaluating them rules= induce.slurplstriparg() myvars={'AAAA': 0, 'BBBB': 1.1, 'CCCC': 2.2, 'DDDD': 3.3, 'EEEE': 4.4, 'FFFF': 5.5, 'GGGG': 6.6, 'HHHH':7.7, 'IIII':8.8, 'JJJJ':9.9, "abc": 20, } # define tests from given rules tests = [(t, eval(t,myvars)) for t in rules if t.strip() != ''] # copy myvars to EvalConstant lookup dict arith = arith.Arith(myvars) for test,expected in tests: with induce.Tracer(test): result = arith.eval( test ) print(test,expected,"<>",result)
def basic_parse(line): astr = line.replace(',','') astr = astr.replace('and','') tokens = lexical_split(astr) dept = None number = None result = [] option = [] for tok in tokens: if tok == 'or': result.append(option) option = [] continue if tok.isalpha(): dept = tok number = None else: number = tok if dept and number: option.append((dept,number)) else: if option: result.append(option) return result for line in induce.helpers.slurplarg(): if not line.strip(): continue with induce.Tracer(line): parts = basic_parse(line)
import selectsql import helpers, induce with induce.grammar() as g: for t in helpers.slurplstriparg(): print "<%s>" % t with induce.Tracer(t, g): selectsql.select_stmt.parseString(t)
import configobj import induce ini = induce.slurparg() with induce.Tracer(ini): config = configobj.ConfigObj(ini) print(config)
# json_choices is a hack to get around mutual recursion # a json is value is one of text, number, mapping, and collection # text is any characters between quotes # a number is like the regular expression -?[0-9]+(\.[0-9]+)? # "parser >> Parser.lift(func)" means "pass the parsed value into func and return a new Parser" # quoted_collection(start, space, inner, joiner, end) # means "a list of inner separated by joiner surrounded by start and end" # we have to put a lot of "spaces" in since JSON allows lot of optional whitespace json_choices = [] json = choice(json_choices) text = quoted_chars("'", "'") number = group_chars( [option_chars(["-"]), digits, option_chars([".", digits])]) >> Parser.lift(float) joiner = between(spaces, match(","), spaces) mapping_pair = pair(text, spaces & match(":") & spaces & json) collection = quoted_collection("[", spaces, json, joiner, "]") >> Parser.lift(list) mapping = quoted_collection("{", spaces, mapping_pair, joiner, "}") >> Parser.lift(dict) json_choices.extend([text, number, mapping, collection]) import induce, helpers jsonstr = "{'a' : -1.0, 'b' : 2.0, 'z' : {'c' : [1.0, [2.0, [3.0]]]}}" with induce.grammar() as g: with induce.Tracer(jsonstr, g): print json.parseString(jsonstr)
import microc import helpers, induce with induce.grammar() as g: test_program_example = helpers.slurparg() mc = microc.MicroC() with induce.Tracer(test_program_example, g): mc.parse_text(test_program_example) print mc.codegen.code
import simplesql import induce, helpers with induce.grammar() as g: for l in helpers.slurplstriparg(): with induce.Tracer(l.strip(), g): simplesql.simpleSQL.parseString(l)
from datetime import datetime import induce for l in induce.slurplstriparg(): if l.strip() == '': continue dat, fmt = l.split('|') with induce.Tracer(l): datetime.strptime(dat, fmt)
from httplib import HTTPResponse from StringIO import StringIO import helpers, induce http_response_str = helpers.slurparg() class FakeSocket(): def __init__(self, response_str): self._file = StringIO(response_str) def makefile(self, *args, **kwargs): return self._file with induce.grammar() as g: with induce.Tracer(http_response_str, g): source = FakeSocket(http_response_str) response = HTTPResponse(source) response.begin() print "status:", response.status print "single header:", response.getheader('Content-Type') print "content:", response.read(len(http_response_str)) # the len here will give a 'big enough' value to read the whole content
def test_accesslog2(): content_lines = ''' 1.1.1.1 - - [21/Feb/2014:06:35:45 +0100] "GET /robots.txt HTTP/1.1" 200 112 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" 1.1.1.1 - - [21/Feb/2014:06:35:45 +0100] "GET /blog.css HTTP/1.1" 200 3663 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" 2.2.2.2 - - [21/Feb/2014:06:52:04 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 2.2.2.2 - - [21/Feb/2014:06:52:04 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 3.3.3.3 - - [21/Feb/2014:06:58:14 +0100] "/" 200 1664 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117" 4.4.4.4 - - [21/Feb/2014:07:22:03 +0100] "/" 200 1664 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117" 5.5.5.5 - - [21/Feb/2014:07:32:48 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 5.5.5.5 - - [21/Feb/2014:07:32:48 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 6.6.6.6 - - [21/Feb/2014:08:13:01 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 6.6.6.6 - - [21/Feb/2014:08:13:01 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 7.7.7.7 - - [21/Feb/2014:08:51:25 +0100] "GET /main.php HTTP/1.1" 200 3681 "-" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Q312461)" 7.7.7.7 - - [21/Feb/2014:08:51:34 +0100] "-" 400 0 "-" "-" 7.7.7.7 - - [21/Feb/2014:08:51:48 +0100] "GET /tag/php.php HTTP/1.1" 200 4673 "-" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Q312461)" 8.8.8.8 - - [21/Feb/2014:08:53:43 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 8.8.8.8 - - [21/Feb/2014:08:53:43 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 9.9.9.9 - - [21/Feb/2014:09:18:40 +0100] "-" 400 0 "-" "-" 9.9.9.9 - - [21/Feb/2014:09:18:40 +0100] "GET /main HTTP/1.1" 200 3681 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117" 9.9.9.9 - - [21/Feb/2014:09:18:41 +0100] "GET /phpMyAdmin/scripts/setup.php HTTP/1.1" 404 27 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117" 9.9.9.9 - - [21/Feb/2014:09:18:42 +0100] "GET /pma/scripts/setup.php HTTP/1.1" 404 27 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117" 10.10.10.10 - - [21/Feb/2014:09:21:29 +0100] "-" 400 0 "-" "-" 10.10.10.10 - - [21/Feb/2014:09:21:29 +0100] "GET /main.php HTTP/1.1" 200 3681 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117" 10.10.10.10 - - [21/Feb/2014:09:21:30 +0100] "GET /about.php HTTP/1.1" 200 2832 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117" 10.10.10.10 - - [21/Feb/2014:09:21:30 +0100] "GET /tag/nginx.php HTTP/1.1" 200 3295 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117" 10.10.10.10 - - [21/Feb/2014:09:21:31 +0100] "GET /how-to-setup.php HTTP/1.1" 200 2637 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117" 1.1.1.1 - - [21/Feb/2014:09:27:27 +0100] "GET /robots.txt HTTP/1.1" 200 112 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" 1.1.1.1 - - [21/Feb/2014:09:27:27 +0100] "GET /tag/tor.php HTTP/1.1" 200 2041 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" 5.5.5.5 - - [21/Feb/2014:10:14:37 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 5.5.5.5 - - [21/Feb/2014:10:14:37 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 8.8.8.8 - - [21/Feb/2014:10:55:19 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 8.8.8.8 - - [21/Feb/2014:10:55:19 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 1.1.1.1 - - [21/Feb/2014:11:19:05 +0100] "GET /robots.txt HTTP/1.1" 200 112 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" 1.1.1.1 - - [21/Feb/2014:11:19:06 +0100] "GET /robots.txt HTTP/1.1" 200 112 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" 1.1.1.1 - - [21/Feb/2014:11:19:06 +0100] "GET / HTTP/1.1" 200 3649 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" 6.6.6.6 - - [21/Feb/2014:12:16:14 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 6.6.6.6 - - [21/Feb/2014:12:16:15 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 5.5.5.5 - - [21/Feb/2014:14:17:52 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 5.5.5.5 - - [21/Feb/2014:14:17:52 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 6.6.6.6 - - [21/Feb/2014:14:58:04 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 6.6.6.6 - - [21/Feb/2014:14:58:04 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 5.5.5.5 - - [21/Feb/2014:15:38:46 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 5.5.5.5 - - [21/Feb/2014:15:38:47 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 2.2.2.2 - - [21/Feb/2014:18:20:36 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 2.2.2.2 - - [21/Feb/2014:18:20:37 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 5.5.5.5 - - [21/Feb/2014:19:42:00 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 5.5.5.5 - - [21/Feb/2014:19:42:00 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 2.2.2.2 - - [21/Feb/2014:20:22:13 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 2.2.2.2 - - [21/Feb/2014:20:22:13 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 6.6.6.6 - - [21/Feb/2014:21:02:55 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 6.6.6.6 - - [21/Feb/2014:21:02:55 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 8.8.8.8 - - [22/Feb/2014:01:05:37 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 8.8.8.8 - - [22/Feb/2014:01:05:38 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 8.8.8.8 - - [22/Feb/2014:04:28:10 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 8.8.8.8 - - [22/Feb/2014:04:28:10 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 2.2.2.2 - - [22/Feb/2014:05:49:34 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 2.2.2.2 - - [22/Feb/2014:05:49:34 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" 5.5.5.5 - - [22/Feb/2014:06:29:47 +0100] "GET /main/rss HTTP/1.1" 301 178 "-" "Motorola" 5.5.5.5 - - [22/Feb/2014:06:29:47 +0100] "GET /feed/atom.xml HTTP/1.1" 304 0 "-" "Motorola" '''[1:-1] grammar = ''' $START ::= $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:06:35:45 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 112 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:06:35:45 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 3663 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:06:52:04 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:06:52:04 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:06:58:14 +0100] "/" 200 1664 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:07:22:03 +0100] "/" 200 1664 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:07:32:48 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:07:32:48 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:08:13:01 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:08:13:01 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:08:51:25 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 3681 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:08:51:48 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 4673 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:08:53:43 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:08:53:43 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:09:18:40 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 3681 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:09:18:41 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 404 27 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:09:18:42 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 404 27 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:09:21:29 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 3681 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:09:21:30 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 2832 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:09:21:30 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 3295 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:09:21:31 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 2637 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:09:27:27 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 112 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:09:27:27 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 2041 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:10:14:37 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:10:14:37 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:10:55:19 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:10:55:19 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:11:19:05 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 112 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:11:19:06 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 112 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:11:19:06 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 200 3649 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:12:16:14 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:12:16:15 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:14:17:52 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:14:17:52 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:14:58:04 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:14:58:04 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:15:38:46 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:15:38:47 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:18:20:36 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:18:20:37 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:19:42:00 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:19:42:00 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:20:22:13 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:20:22:13 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:21:02:55 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:21:02:55 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [22/Feb/2014:01:05:37 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [22/Feb/2014:01:05:38 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [22/Feb/2014:04:28:10 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [22/Feb/2014:04:28:10 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [22/Feb/2014:05:49:34 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [22/Feb/2014:05:49:34 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [22/Feb/2014:06:29:47 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 301 178 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.ANALYZE:IP - - [22/Feb/2014:06:29:47 +0100] "$LOGANALYZER.ANALYZE:REQUEST" 304 0 "-" "$LOGANALYZER.ANALYZE:USERAGENT" | $LOGANALYZER.__INIT__:CONTENT $LOGANALYZER.ANALYZE:USERAGENT ::= $LOGANALYZER.SUMMARIZE:COL.USERAGENT $LOGANALYZER.ANALYZE:REQUEST ::= $LOGANALYZER.SUMMARIZE:COL.REQUEST $LOGANALYZER.ANALYZE:IP ::= $LOGANALYZER.SUMMARIZE:COL.IP $LOGANALYZER.SUMMARIZE:COL.USERAGENT ::= Motorola | Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Q312461) | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 | Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) $LOGANALYZER.SUMMARIZE:COL.REQUEST ::= GET / HTTP/1.1 | GET /about.php HTTP/1.1 | GET /blog.css HTTP/1.1 | GET /feed/atom.xml HTTP/1.1 | GET /how-to-setup.php HTTP/1.1 | GET /main HTTP/1.1 | GET /main.php HTTP/1.1 | GET /main/rss HTTP/1.1 | GET /phpMyAdmin/scripts/setup.php HTTP/1.1 | GET /pma/scripts/setup.php HTTP/1.1 | GET /robots.txt HTTP/1.1 | GET /tag/nginx.php HTTP/1.1 | GET /tag/php.php HTTP/1.1 | GET /tag/tor.php HTTP/1.1 $LOGANALYZER.SUMMARIZE:COL.IP ::= 1.1.1.1 | 10.10.10.10 | 2.2.2.2 | 3.3.3.3 | 4.4.4.4 | 5.5.5.5 | 6.6.6.6 | 7.7.7.7 | 8.8.8.8 | 9.9.9.9 $LOGANALYZER.__INIT__:CONTENT ::= $LOGANALYZER.ANALYZE:LINE $LOGANALYZER.ANALYZE:LINE ::= $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:08:51:34 +0100] "-" 400 0 "-" "-" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:09:18:40 +0100] "-" 400 0 "-" "-" | $LOGANALYZER.ANALYZE:IP - - [21/Feb/2014:09:21:29 +0100] "-" 400 0 "-" "-" '''[1:-1] result = [] for line in content_lines.split('\n'): with induce.Tracer(line, result) as t: summary = accesslog.LogAnalyzer(line, 5) summary.analyze() with induce.grammar() as g: for jframe in result: g.handle_events(jframe) print(str(g)) assert (grammar == str(g))
import pgn import induce, helpers tpgn = helpers.slurparg() with induce.grammar() as g: with induce.Tracer(tpgn, g): tokens = pgn.pgnGrammar.parseString(tpgn) print "tokens = ", tokens
def test_apachelogparse1(): log_lines = ''' 127.0.0.1 <<6113>> [16/Aug/2013:15:45:34 +0000] 1966093us "GET / HTTP/1.1" 200 3478 "https://example.com/" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.18)" - - '''[1:-1] grammar = ''' $START ::= $@.<LAMBDA>:[email protected]:PATTERN$@.<LAMBDA>:[email protected]:PATTERN[16/Aug/$USERAGENTPARSER.V1_REPLACEMENT:15:45:34 +$FIXEDOFFSET._FIXEDOFFSET__NAME] $@.<LAMBDA>:[email protected]:[email protected]_REQUEST_FROM_FIRST_LINE:[email protected]:PATTERN$@.<LAMBDA>:MATCHED_STRINGS.STATUS $@.<LAMBDA>:[email protected]:PATTERN$@.<LAMBDA>:[email protected]:PATTERN"$USERAGENTPARSER.PARSE:[email protected]:PATTERN- - [email protected]_REQUEST_FROM_FIRST_LINE:FIRST_LINE ::= [email protected]:STRING $USERAGENTPARSER.PARSE:USER_AGENT_STRING ::= $DEVICEPARSER.PARSE:USER_AGENT_STRING $FIXEDOFFSET._FIXEDOFFSET__NAME ::= $FIXEDOFFSET.__INIT__:STRING $USERAGENTPARSER.V1_REPLACEMENT ::= $OSPARSER.OS_V1_REPLACEMENT [email protected]:PATTERN ::= " | << | " | >> | us " $@.<LAMBDA>:MATCHED_STRINGS.REQUEST_HEADER_REFERER ::= $PARSER.PARSE:RESULTS.REQUEST_HEADER_REFERER $@.<LAMBDA>:MATCHED_STRINGS.RESPONSE_BYTES_CLF ::= $PARSER.PARSE:RESULTS.RESPONSE_BYTES_CLF $@.<LAMBDA>:MATCHED_STRINGS.REMOTE_HOST ::= $PARSER.PARSE:RESULTS.REMOTE_HOST $@.<LAMBDA>:MATCHED_STRINGS.TIME_US ::= $PARSER.PARSE:RESULTS.TIME_US $@.<LAMBDA>:MATCHED_STRINGS.STATUS ::= $PARSER.PARSE:RESULTS.STATUS $@.<LAMBDA>:MATCHED_STRINGS.PID ::= $PARSER.PARSE:RESULTS.PID $DEVICEPARSER.PARSE:USER_AGENT_STRING ::= $USERAGENT.__INIT__:USER_AGENT_STRING $FIXEDOFFSET.__INIT__:STRING ::= 0000 $OSPARSER.OS_V1_REPLACEMENT ::= 2013 [email protected]:STRING ::= [email protected]_REQUEST_FROM_FIRST_LINE:MATCHED_STRINGS.REQUEST_FIRST_LINE $PARSER.PARSE:RESULTS.REQUEST_HEADER_REFERER ::= $PARSER.PARSE:VALUES.REQUEST_HEADER_REFERER $PARSER.PARSE:RESULTS.RESPONSE_BYTES_CLF ::= $PARSER.PARSE:VALUES.RESPONSE_BYTES_CLF $PARSER.PARSE:RESULTS.REMOTE_HOST ::= $PARSER.PARSE:VALUES.REMOTE_HOST $PARSER.PARSE:RESULTS.TIME_US ::= $PARSER.PARSE:VALUES.TIME_US $PARSER.PARSE:RESULTS.STATUS ::= $PARSER.PARSE:VALUES.STATUS $PARSER.PARSE:RESULTS.PID ::= $PARSER.PARSE:VALUES.PID $USERAGENT.__INIT__:USER_AGENT_STRING ::= [email protected]:USER_AGENT_STRING [email protected]_REQUEST_FROM_FIRST_LINE:MATCHED_STRINGS.REQUEST_FIRST_LINE ::= [email protected]_REQUEST_FROM_FIRST_LINE:RESULTS.REQUEST_FIRST_LINE $PARSER.PARSE:VALUES.REQUEST_HEADER_REFERER ::= [email protected]_APACHELOGPARSE1:[email protected]_HEADER_REFERER $PARSER.PARSE:VALUES.RESPONSE_BYTES_CLF ::= [email protected]_APACHELOGPARSE1:[email protected]_BYTES_CLF $PARSER.PARSE:VALUES.REMOTE_HOST ::= [email protected]_APACHELOGPARSE1:[email protected]_HOST $PARSER.PARSE:VALUES.TIME_US ::= [email protected]_APACHELOGPARSE1:[email protected]_US $PARSER.PARSE:VALUES.STATUS ::= [email protected]_APACHELOGPARSE1:[email protected] $PARSER.PARSE:VALUES.PID ::= [email protected]_APACHELOGPARSE1:[email protected] [email protected]:USER_AGENT_STRING ::= $OSPARSER.PARSE:USER_AGENT_STRING [email protected]_REQUEST_FROM_FIRST_LINE:RESULTS.REQUEST_FIRST_LINE ::= [email protected]_REQUEST_FROM_FIRST_LINE:RESULTS.REQUEST_METHOD / HTTP/[email protected]_REQUEST_FROM_FIRST_LINE:RESULTS.REQUEST_HTTP_VER [email protected]_APACHELOGPARSE1:[email protected]_HEADER_REFERER ::= $PARSER.PARSE:@.<LAMBDA>[email protected]_HEADER_REFERER [email protected]_APACHELOGPARSE1:[email protected]_BYTES_CLF ::= $PARSER.PARSE:@.<LAMBDA>[email protected]_BYTES_CLF [email protected]_APACHELOGPARSE1:[email protected]_HOST ::= $PARSER.PARSE:@.<LAMBDA>[email protected]_HOST [email protected]_APACHELOGPARSE1:[email protected]_US ::= $PARSER.PARSE:@.<LAMBDA>[email protected]_US [email protected]_APACHELOGPARSE1:[email protected] ::= $PARSER.PARSE:@.<LAMBDA>[email protected] [email protected]_APACHELOGPARSE1:[email protected] ::= $PARSER.PARSE:@.<LAMBDA>[email protected] $OSPARSER.PARSE:USER_AGENT_STRING ::= [email protected]:USER_AGENT_STRING [email protected]_REQUEST_FROM_FIRST_LINE:RESULTS.REQUEST_HTTP_VER ::= $PARSER.PARSE:RESULTS.REQUEST_HTTP_VER [email protected]_REQUEST_FROM_FIRST_LINE:RESULTS.REQUEST_METHOD ::= $PARSER.PARSE:RESULTS.REQUEST_METHOD $PARSER.PARSE:@.<LAMBDA>[email protected]_HEADER_REFERER ::= https://example.com/ $PARSER.PARSE:@.<LAMBDA>[email protected]_BYTES_CLF ::= 3478 $PARSER.PARSE:@.<LAMBDA>[email protected]_HOST ::= 127.0.0.1 $PARSER.PARSE:@.<LAMBDA>[email protected]_US ::= 1966093 $PARSER.PARSE:@.<LAMBDA>[email protected] ::= 200 $PARSER.PARSE:@.<LAMBDA>[email protected] ::= 6113 [email protected]:USER_AGENT_STRING ::= Mozilla/5.0 (X11; U; [email protected]_OPERATING_SYSTEM:FAMILY x86_64; en-US; rv:1.9.2.18) $PARSER.PARSE:RESULTS.REQUEST_HTTP_VER ::= $PARSER.PARSE:VALUES.REQUEST_HTTP_VER $PARSER.PARSE:RESULTS.REQUEST_METHOD ::= $PARSER.PARSE:VALUES.REQUEST_METHOD [email protected]_OPERATING_SYSTEM:FAMILY ::= $OPERATINGSYSTEM.__NEW__:FAMILY $PARSER.PARSE:VALUES.REQUEST_HTTP_VER ::= $PARSER.PARSE:@[email protected]_HTTP_VER $PARSER.PARSE:VALUES.REQUEST_METHOD ::= $PARSER.PARSE:@[email protected]_METHOD $OPERATINGSYSTEM.__NEW__:FAMILY ::= $OSPARSER.PARSE:OS $PARSER.PARSE:@[email protected]_HTTP_VER ::= [email protected]_APACHELOGPARSE1:[email protected]_HTTP_VER $PARSER.PARSE:@[email protected]_METHOD ::= [email protected]_APACHELOGPARSE1:[email protected]_METHOD $OSPARSER.PARSE:OS ::= [email protected]:OS [email protected]_APACHELOGPARSE1:[email protected]_HTTP_VER ::= 1.1 [email protected]_APACHELOGPARSE1:[email protected]_METHOD ::= GET [email protected]:OS ::= $PARSER.PARSE:RESULTS.REQUEST_HEADER_USER_AGENT__OS__FAMILY $PARSER.PARSE:RESULTS.REQUEST_HEADER_USER_AGENT__OS__FAMILY ::= $PARSER.PARSE:VALUES.REQUEST_HEADER_USER_AGENT__OS__FAMILY $PARSER.PARSE:VALUES.REQUEST_HEADER_USER_AGENT__OS__FAMILY ::= [email protected]_APACHELOGPARSE1:[email protected]_HEADER_USER_AGENT__OS__FAMILY [email protected]_APACHELOGPARSE1:[email protected]_HEADER_USER_AGENT__OS__FAMILY ::= $PARSER.PARSE:@[email protected]_HEADER_USER_AGENT__OS__FAMILY $PARSER.PARSE:@[email protected]_HEADER_USER_AGENT__OS__FAMILY ::= $USERAGENT.__INIT__:UA_DICT.OS.FAMILY $USERAGENT.__INIT__:UA_DICT.OS.FAMILY ::= [email protected]:@[email protected] [email protected]:@[email protected] ::= [email protected]:V.OS.FAMILY [email protected]:V.OS.FAMILY ::= $USERAGENT.__INIT__:@[email protected] $USERAGENT.__INIT__:@[email protected] ::= Linux '''[1:-1] result = [] for log in log_lines.split('\n'): if log.strip() == '': continue with induce.Tracer(log, result) as t: line_parser = apache_log_parser.make_parser( "%h <<%P>> %t %Dus \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %l %u" ) log_line_data = line_parser(log) with induce.grammar() as g: for jframe in result: g.handle_events(jframe) print(str(g)) assert (grammar == str(g))
from dateutil import parser import helpers, induce with induce.grammar() as g: lines = helpers.slurplstriparg() for l in lines: dat, fmt = l.split('|') with induce.Tracer(dat, g): dt = parser.parse(dat)
import urlparser import induce, induce.helpers base = '' for line in induce.helpers.slurplarg(): words = line.split() if not words: continue url = words[0] with induce.Tracer(url): parts = urlparser.urlparse(url)
def test_dateparse2(): date_lines = ''' Jun 1 2005 1:33PM|%b %d %Y %I:%M%p Aug 28 1999 12:00AM|%b %d %Y %I:%M%p Jun 1 2005 1:33PM|%b %d %Y %I:%M%p 1 Dec 00|%d %b %y 2 Nov 01|%d %b %y 3 Oct 02|%d %b %y 4 Sep 03|%d %b %y 13 Nov 90|%d %b %y 14 Oct 10|%d %b %y '''[1:-1] grammar = ''' $START ::= $@._STRPTIME_DATETIME:DATA_STRING|$@._STRPTIME_DATETIME:FORMAT $@._STRPTIME_DATETIME:DATA_STRING ::= $@._STRPTIME:DATA_STRING $@._STRPTIME_DATETIME:FORMAT ::= $@._STRPTIME:FORMAT | $TIMERE.COMPILE:FORMAT $@._STRPTIME:DATA_STRING ::= $@._STRPTIME:ARG $@._STRPTIME:FORMAT ::= $@._STRPTIME:ARG | %d %b %y $@._STRPTIME:ARG ::= $@._STRPTIME:FOUND_DICT.B $@._STRPTIME:FOUND_DICT.D $@._STRPTIME:FOUND_DICT.Y $@._STRPTIME:FOUND_DICT.I:$@._STRPTIME:FOUND_DICT.M$@._STRPTIME:FOUND_DICT.P | $@._STRPTIME:FOUND_DICT.B 1 $@._STRPTIME:FOUND_DICT.Y 1:$@._STRPTIME:FOUND_DICT.M$@._STRPTIME:FOUND_DICT.P | $@._STRPTIME:FOUND_DICT.D $@._STRPTIME:FOUND_DICT.B $@._STRPTIME:FOUND_DICT.Y | %b %d %Y %I:%M%p | %d %b %y | 1 $@._STRPTIME:FOUND_DICT.B $@._STRPTIME:FOUND_DICT.Y | 2 $@._STRPTIME:FOUND_DICT.B $@._STRPTIME:FOUND_DICT.Y | 3 $@._STRPTIME:FOUND_DICT.B $@._STRPTIME:FOUND_DICT.Y | 4 $@._STRPTIME:FOUND_DICT.B $@._STRPTIME:FOUND_DICT.Y $@._STRPTIME:FOUND_DICT.M ::= 00 | 33 $@._STRPTIME:FOUND_DICT.Y ::= 00 | 01 | 02 | 03 | 10 | 1999 | 2005 | 90 $@._STRPTIME:FOUND_DICT.B ::= Aug | Dec | Jun | Nov | Oct | Sep $@._STRPTIME:FOUND_DICT.P ::= AM | PM $@._STRPTIME:FOUND_DICT.I ::= 12 $@._STRPTIME:FOUND_DICT.D ::= 13 | 14 | 28 $TIMERE.COMPILE:FORMAT ::= $TIMERE.PATTERN:FORMAT $TIMERE.PATTERN:FORMAT ::= $@._STRPTIME:FORMAT '''[1:-1] result = [] for line in date_lines.split('\n'): if line.strip() == '': continue dat, fmt = line.split('|') with induce.Tracer(line, result) as t: datetime.strptime(dat, fmt) with induce.grammar() as g: for jframe in result: g.handle_events(jframe) print(str(g)) assert (grammar == str(g))
import simpleselectsql import induce, helpers with induce.grammar() as g: for i in helpers.slurplstriparg(): with induce.Tracer(i, g): simpleselectsql.selectStmt.parseString(i)
import mathexpr import induce, helpers with induce.grammar() as g: exprs = helpers.slurplstriparg() for e in exprs: with induce.Tracer(e, g): mathexpr.Parser(e, {})
import simplejson import induce myfile = induce.slurparg() with induce.Tracer(myfile): simplejson.loads(myfile)
import dateparser import helpers, induce with induce.grammar() as g: lines = helpers.slurplstriparg() for l in lines: print l with induce.Tracer(l, g): dt = dateparser.parse(l)
import romannumerals import induce, helpers with induce.grammar() as g: for rn in helpers.slurplstriparg(): with induce.Tracer(rn, g): romannumerals.romanNumeral.parseString(rn)