def __init__(self): self.lex = SQLexer() self.alpha_re = re.compile(r'^\s*[A-Za-z0-9_]*\s*$') # foo10" or foo1' must start with letter to preven # 7" # .... which is likely a measurement, not sqli self.alpha_str_re = re.compile(r'^[A-Z][A-Z0-9_]+[\'\"]$') self.pmap_new = frozenset( [ '1ok','1oks,','1oksc','noL1R','1okLk','1okfL', 'sonos', 'sono1', 'sosos', '1ono1', 'sonoo', '1Rono', 's;n:k', 'k1,1,', 'k1,1k', 'nokLk', # unions 'okkkn', 'ofL1R', 'fLk,L', '1,1R,', '1,LfL', '1,Lk1', '1,LkL', '1,Lkf', '1,fL1', '1,sR,', '1;kL1', '1;kLL', '1;kLo', '1;kfL', '1;kks', '1;knL', '1;knc', '1;koL', '1;kok', '1R,L1', '1R;kL', '1R;kf', '1R;kk', '1R;kn', '1R;ko', '1RLRs', '1RR;k', '1RRR;', '1RRRR', '1RRRk', '1RRRo', '1RRk1', '1RRkk', '1RRo1', '1RRoL', '1RRof', '1RRok', '1RRon', '1RRoo', '1Rk1', '1Rk1c', '1Rk1o', '1Rkks', '1Ro1f', '1Ro1k', '1Ro1o', '1RoL1', '1RoLk', '1RoLn', '1RofL', '1Rok1', '1RokL', '1RooL', '1k1', '1k1c', '1kfL1', '1kkL1', '1kksc', '1o1Ro', '1o1fL', '1o1kf', '1o1o1', '1o1oL', '1o1of', '1o1ok', '1o1on', '1o1oo', '1o1ov', '1oL1R', '1oL1o', '1oLL1', '1oLLL', '1oLLf', '1oLfL', '1oLk1', '1oLkf', '1oLkn', '1oLnR', '1oLsR', '1ofL1', '1ofLR', '1ofLf', '1ofLn', '1ok1', '1ok1,', '1ok1c', '1ok1k', '1okL1', '1okv,', # '1ono1', '1onos', '1oo1o', '1ooL1', '1oso1', ';kknc', 'fL1,f', 'fL1Ro', 'fL1o1', 'fLRof', 'fLfL1', 'fLfLR', 'fLkLR', 'fLnLR', 'fLv,1', 'k1kLk', 'k1oLs', 'kLRok', 'kLk,L', 'kLokL', 'kLvvR', 'kfL1,', 'kfL1R', 'kfLfL', 'kfLn,', 'kvkL1', 'n,LfL', 'n,Lk1', 'n,LkL', 'n,Lkf', 'n,fL1', 'n;kL1', 'n;kLL', 'n;kfL', 'n;kks', 'n;knL', 'n;koL', 'n;kok', 'nR;kL', 'nR;kf', 'nR;kk', 'nR;kn', 'nR;ko', 'nRR;k', 'nRRR;', 'nRRRk', 'nRRRo', 'nRRkk', 'nRRo1', 'nRRoL', 'nRRof', 'nRRok', 'nRk1o', 'nRkks', 'nRo1f', 'nRo1o', 'nRoLk', 'nRofL', 'nRokL', 'nkksc', 'no1fL', 'no1o1', 'no1oL', 'no1of', 'noLk1', 'nofL1', 'nokL1', 'noo1o', 'ofL1o', 'ofLRo', 'ok1o1', 'oo1kf', 's,1R,', 's;k1,', 's;k1o', 's;k;', 's;kL1', 's;kLL', 's;kLo', 's;k[k', 's;k[n', 's;kfL', 's;kkn', 's;kks', 's;knL', 's;knc', 's;knk', 's;knn', 's;koL', 's;kok', 'sR,L1', 'sR;kL', 'sR;kf', 'sR;kk', 'sR;kn', 'sR;ko', 'sRR;k', 'sRRR;', 'sRRRk', 'sRRRo', 'sRRk1', 'sRRkk', 'sRRo1', 'sRRoL', 'sRRof', 'sRRok', 'sRRoo', 'sRk1', 'sRk1c', 'sRk1o', 'sRkks', 'sRo1f', 'sRo1k', 'sRo1o', 'sRoLk', 'sRofL', 'sRok1', 'sRokL', 'sRooL', 'sc', 'sfL1R', 'sfLn,', 'sfLsR', 'sk1', 'sk1c', 'sk1o1', 'sk1os', 'skR;k', 'skRk1', 'skRkk', 'skRo1', 'skRoL', 'skRof', 'skRok', 'skks', 'skksc', 'skoL1', 'skoLk', 'so1c', 'so1fL', 'so1kf', 'so1o1', 'so1oL', 'so1of', 'so1ok', 'so1on', 'so1oo', 'so1os', 'so1ov', 'soL1R', 'soL1o', 'soLLL', 'soLLk', 'soLLs', 'soLfL', 'soLk1', 'soLkR', 'soLkk', 'soLkn', 'soLks', 'soLsR', 'sofL1', 'sofLR', 'sofLf', 'sofLk', 'sok1', 'sok1,', 'sok1c', 'sok1o', 'sokL1', 'sokLk', 'sokLo', 'sokLs', 'sokc', 'sokfL', 'sokn,', 'soknk', 'soko1', 'sokoL', 'sokok', 'sokoo', 'sokos', 'son:o', # 'sonk1', 'soLko', 'soo1o', 'sooL1', 'sooLk', 'sooLo', 'soofL', 'sookc', 'soos', 'sos', 'sovo1', 'sovok', 'sovoo', 'sovos', 'sovov', 'vok1,'] ) self.pmap_new2 = frozenset( ['1,1R', '1,Lf', '1,Lk', '1,LL', '1,fL', '1,sR', '1;kL', '1;kf', '1;kk', '1;kn', '1;ko', '1R,L', '1R;k', '1RLR', '1RR;', '1RRR', '1RRk', '1RRo', '1Rk1', '1Rkk', '1Ro1', '1RoL', '1Rof', '1Rok', '1Roo', '1k1', '1k1c', '1kfL', '1kkL', '1kks', '1o1R', '1o1f', '1o1k', '1o1o', '1oL1', '1oLf', '1oLL', '1oLk', '1oLn', '1oLs', '1ofL', '1ok1', '1okL', '1okk', '1okv', '1ono', '1oo1', '1ooL', '1oso', ';kkn', 'fL1,', 'fL1R', 'fL1o', 'fLRo', 'fLfL', 'fLkL', 'fLnL', 'fLv,', 'fLL1', 'fLLf','k1kL', 'k1oL', 'kLRo', 'kLk,', 'kLok', 'kLvv', 'kfL1', 'kfLL', 'kfLf', 'kfLn', 'kvkL', 'n,Lf', 'n,LL', 'n,Lk', 'n,fL', 'n;kL', 'n;kf', 'n;kk', 'n;kn', 'n;ko', 'nR;k', 'nRR;', 'nRRR', 'nRRk', 'nRRo', 'nRk1', 'nRkk', 'nRo1', 'nRoL', 'nRof', 'nRok', 'nkks', 'no1f', 'no1o', 'noLk', 'nofL', 'nokL', 'noo1', 'ofL1', 'ofLL', 'ofLR', 'ok1o', 'oo1k', 's,1R', 's;k1', 's;k;', 's;kL', 's;k[', 's;kf', 's;kk', 's;kn', 's;ko', 'sR,L', 'sR;k', 'sRR;', 'sRRR', 'sRRk', 'sRRo', 'sRk1', 'sRkk', 'sRo1', 'sRoL', 'sRof', 'sRok', 'sRoo', 'sc', 'sfL1', 'sfLL', 'sfLn', 'sfLs', 'sk1', 'sk1c', 'sk1o', 'skR;', 'skRk', 'skRo', 'skks', 'skoL', 'skoo', 'so1c', 'so1f', 'so1k', 'so1o', 'soL1', 'soLL', 'soLf', 'soLk', 'soLs', 'sofL', 'sok1', 'sokL', 'sokc', 'sokf', 'sokk', 'sokn', 'soko', 'son:', 'sonk', # 'sono', 'soo1', 'sooL', 'soof', 'sook', 'soos', 'sos', # 'soso', 'sovo', 'vok1', 'voko']) self.badsql= ( '\\', # backslash not allowed in non-string '[1]', 'o[', 'on,n', 'knn', '1:', 'no1n', ';no', ';nn', ';n,', ';[', ';s', 'Lnon', # "(FOO, BAR...." # 'Ln,n', # "(FOO OR BAR... 'Lnn', 'onon', 'nono', 'nnn', 'nLn', 'o,', 'n1', '1nn', '11', 's1', ';1', ';n;', ';s;', 'onn', 'sns', 'LnnR' )
def testParser(self): s = SQLexer() tokens = s.tokenize('1 "foo" "bar" 1') self.assertEquals([('1', '1'), ('s', '"foo"'), ('s', '"bar"'), ('1', '1')], tokens) tokens = s.syntax(tokens) self.assertEquals([('1', '1'), ('s', '"foo""bar"'), ('1', '1')], tokens) tokens = s.tokenize('"foo" "bar" 1') self.assertEquals([('s', '"foo"'), ('s', '"bar"'), ('1', '1')], tokens) tokens = s.syntax(tokens) self.assertEquals([('s', '"foo""bar"'), ('1', '1')], tokens) tokens = s.tokenize('1 "foo" "bar" 1') self.assertEquals([('1', '1'), ('s', '"foo"'), ('s', '"bar"'), ('1', '1')], tokens) tokens = s.syntax(tokens) self.assertEquals([('1', '1'), ('s', '"foo""bar"'), ('1', '1')], tokens) tokens = s.tokenize('select 1'.upper()) self.assertEquals([('k', 'SELECT'), ('1', '1')], tokens) tokens = s.tokenize('1 /* foo */ 2'.upper()) self.assertEquals([('1', '1'), ('c', '/* FOO */'), ('1', '2')], tokens) tokens = s.tokenize('1 /*foo*/ 2'.upper()) self.assertEquals([('1', '1'), ('c', '/*FOO*/'), ('1', '2')], tokens) tokens = s.tokenize('1 /*foo*/ 2'.upper()) self.assertEquals([('1', '1'), ('c', '/*FOO*/'), ('1', '2')], tokens) tokens = s.tokenize('1 || select'.upper()) self.assertEquals([('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens) tokens = s.tokenize('1 /*! || */ select'.upper()) self.assertEquals([('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens) tokens = s.tokenize('1 /*!32302 || */ select'.upper()) self.assertEquals([('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens) tokens = s.tokenize('select 1 /*!00000AND 2>1*/'.upper()) self.assertEquals([('k', 'SELECT'), ('1', '1'), ('o', 'AND'), ('1', '2'), ('o', '>'), ('1', '1')], tokens) tokens = s.tokenize('@@NEW UNION#SQLMAP'.upper()) self.assertEquals([('v', '@@NEW'), ('o', 'UNION'), ('c', '#SQLMAP')], tokens) #tokens = s.tokenize('"FOO" IN BOOLEAN MODE'.upper()) #self.assertEquals( [('string', 'FOO'), ('k', 'IN BOOLEAN MODE')], tokens) # mysql comments terminate on normal "*/" AND # on another C-style start comment /* !!!! # ==> select 1,2 tokens = s.tokenize("SELECT /*!000001,/*!000002") self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens) tokens = s.tokenize("SELECT /*!1,/*!2*/") self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens) # ==> select 1,2 tokens = s.tokenize("SELECT /*!000001,/*!000002*/") self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens) # ==> select 1,2,3 tokens = s.tokenize("SELECT /*!000001,/*!2*/,3") self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2'), (',', ','), ('1', '3')], tokens) # ==> select 1,2,3 tokens = s.tokenize("SELECT /*!000001,/*!2*/,3") self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2'), (',', ','), ('1', '3')], tokens) tokens = s.tokenize("1+2") self.assertEquals([('1', '1'), ('o', '+'), ('1', '2')], tokens) tokens = s.tokenize("1 /**/UNION/**/SELECT") self.assertEquals([('1', '1'), ('c', '/**/'), ('o', 'UNION'), ('c', '/**/'), ('k', 'SELECT')], tokens) tokens = s.syntax(tokens) self.assertEquals([('1', '1'), ('o', 'UNION'), ('k', 'SELECT')], tokens) tokens = s.tokenize("1 /**/UNION/**/ALL/**/SELECT") #self.assertEquals( [('1', '1'), ('c', '/**/'), ('o', 'UNION'), ('c', '/**/'), ('k', 'SELECT')], tokens) tokens = s.syntax(tokens) self.assertEquals([('1', '1'), ('o', 'UNION ALL'), ('k', 'SELECT')], tokens)