def __init__(self): self.lex = SQLexer() self.alpha_re = re.compile(r'^\s*[A-Za-z0-9_]*\s*$') # foo10" or foo1' must start with letter to preven # 7" # .... which is likely a measurement, not sqli self.alpha_str_re = re.compile(r'^[A-Z][A-Z0-9_]+[\'\"]$') self.pmap_new = frozenset( [ '1ok','1oks,','1oksc','noL1R','1okLk','1okfL', 'sonos', 'sono1', 'sosos', '1ono1', 'sonoo', '1Rono', 's;n:k', 'k1,1,', 'k1,1k', 'nokLk', # unions 'okkkn', 'ofL1R', 'fLk,L', '1,1R,', '1,LfL', '1,Lk1', '1,LkL', '1,Lkf', '1,fL1', '1,sR,', '1;kL1', '1;kLL', '1;kLo', '1;kfL', '1;kks', '1;knL', '1;knc', '1;koL', '1;kok', '1R,L1', '1R;kL', '1R;kf', '1R;kk', '1R;kn', '1R;ko', '1RLRs', '1RR;k', '1RRR;', '1RRRR', '1RRRk', '1RRRo', '1RRk1', '1RRkk', '1RRo1', '1RRoL', '1RRof', '1RRok', '1RRon', '1RRoo', '1Rk1', '1Rk1c', '1Rk1o', '1Rkks', '1Ro1f', '1Ro1k', '1Ro1o', '1RoL1', '1RoLk', '1RoLn', '1RofL', '1Rok1', '1RokL', '1RooL', '1k1', '1k1c', '1kfL1', '1kkL1', '1kksc', '1o1Ro', '1o1fL', '1o1kf', '1o1o1', '1o1oL', '1o1of', '1o1ok', '1o1on', '1o1oo', '1o1ov', '1oL1R', '1oL1o', '1oLL1', '1oLLL', '1oLLf', '1oLfL', '1oLk1', '1oLkf', '1oLkn', '1oLnR', '1oLsR', '1ofL1', '1ofLR', '1ofLf', '1ofLn', '1ok1', '1ok1,', '1ok1c', '1ok1k', '1okL1', '1okv,', # '1ono1', '1onos', '1oo1o', '1ooL1', '1oso1', ';kknc', 'fL1,f', 'fL1Ro', 'fL1o1', 'fLRof', 'fLfL1', 'fLfLR', 'fLkLR', 'fLnLR', 'fLv,1', 'k1kLk', 'k1oLs', 'kLRok', 'kLk,L', 'kLokL', 'kLvvR', 'kfL1,', 'kfL1R', 'kfLfL', 'kfLn,', 'kvkL1', 'n,LfL', 'n,Lk1', 'n,LkL', 'n,Lkf', 'n,fL1', 'n;kL1', 'n;kLL', 'n;kfL', 'n;kks', 'n;knL', 'n;koL', 'n;kok', 'nR;kL', 'nR;kf', 'nR;kk', 'nR;kn', 'nR;ko', 'nRR;k', 'nRRR;', 'nRRRk', 'nRRRo', 'nRRkk', 'nRRo1', 'nRRoL', 'nRRof', 'nRRok', 'nRk1o', 'nRkks', 'nRo1f', 'nRo1o', 'nRoLk', 'nRofL', 'nRokL', 'nkksc', 'no1fL', 'no1o1', 'no1oL', 'no1of', 'noLk1', 'nofL1', 'nokL1', 'noo1o', 'ofL1o', 'ofLRo', 'ok1o1', 'oo1kf', 's,1R,', 's;k1,', 's;k1o', 's;k;', 's;kL1', 's;kLL', 's;kLo', 's;k[k', 's;k[n', 's;kfL', 's;kkn', 's;kks', 's;knL', 's;knc', 's;knk', 's;knn', 's;koL', 's;kok', 'sR,L1', 'sR;kL', 'sR;kf', 'sR;kk', 'sR;kn', 'sR;ko', 'sRR;k', 'sRRR;', 'sRRRk', 'sRRRo', 'sRRk1', 'sRRkk', 'sRRo1', 'sRRoL', 'sRRof', 'sRRok', 'sRRoo', 'sRk1', 'sRk1c', 'sRk1o', 'sRkks', 'sRo1f', 'sRo1k', 'sRo1o', 'sRoLk', 'sRofL', 'sRok1', 'sRokL', 'sRooL', 'sc', 'sfL1R', 'sfLn,', 'sfLsR', 'sk1', 'sk1c', 'sk1o1', 'sk1os', 'skR;k', 'skRk1', 'skRkk', 'skRo1', 'skRoL', 'skRof', 'skRok', 'skks', 'skksc', 'skoL1', 'skoLk', 'so1c', 'so1fL', 'so1kf', 'so1o1', 'so1oL', 'so1of', 'so1ok', 'so1on', 'so1oo', 'so1os', 'so1ov', 'soL1R', 'soL1o', 'soLLL', 'soLLk', 'soLLs', 'soLfL', 'soLk1', 'soLkR', 'soLkk', 'soLkn', 'soLks', 'soLsR', 'sofL1', 'sofLR', 'sofLf', 'sofLk', 'sok1', 'sok1,', 'sok1c', 'sok1o', 'sokL1', 'sokLk', 'sokLo', 'sokLs', 'sokc', 'sokfL', 'sokn,', 'soknk', 'soko1', 'sokoL', 'sokok', 'sokoo', 'sokos', 'son:o', # 'sonk1', 'soLko', 'soo1o', 'sooL1', 'sooLk', 'sooLo', 'soofL', 'sookc', 'soos', 'sos', 'sovo1', 'sovok', 'sovoo', 'sovos', 'sovov', 'vok1,'] ) self.pmap_new2 = frozenset( ['1,1R', '1,Lf', '1,Lk', '1,LL', '1,fL', '1,sR', '1;kL', '1;kf', '1;kk', '1;kn', '1;ko', '1R,L', '1R;k', '1RLR', '1RR;', '1RRR', '1RRk', '1RRo', '1Rk1', '1Rkk', '1Ro1', '1RoL', '1Rof', '1Rok', '1Roo', '1k1', '1k1c', '1kfL', '1kkL', '1kks', '1o1R', '1o1f', '1o1k', '1o1o', '1oL1', '1oLf', '1oLL', '1oLk', '1oLn', '1oLs', '1ofL', '1ok1', '1okL', '1okk', '1okv', '1ono', '1oo1', '1ooL', '1oso', ';kkn', 'fL1,', 'fL1R', 'fL1o', 'fLRo', 'fLfL', 'fLkL', 'fLnL', 'fLv,', 'fLL1', 'fLLf','k1kL', 'k1oL', 'kLRo', 'kLk,', 'kLok', 'kLvv', 'kfL1', 'kfLL', 'kfLf', 'kfLn', 'kvkL', 'n,Lf', 'n,LL', 'n,Lk', 'n,fL', 'n;kL', 'n;kf', 'n;kk', 'n;kn', 'n;ko', 'nR;k', 'nRR;', 'nRRR', 'nRRk', 'nRRo', 'nRk1', 'nRkk', 'nRo1', 'nRoL', 'nRof', 'nRok', 'nkks', 'no1f', 'no1o', 'noLk', 'nofL', 'nokL', 'noo1', 'ofL1', 'ofLL', 'ofLR', 'ok1o', 'oo1k', 's,1R', 's;k1', 's;k;', 's;kL', 's;k[', 's;kf', 's;kk', 's;kn', 's;ko', 'sR,L', 'sR;k', 'sRR;', 'sRRR', 'sRRk', 'sRRo', 'sRk1', 'sRkk', 'sRo1', 'sRoL', 'sRof', 'sRok', 'sRoo', 'sc', 'sfL1', 'sfLL', 'sfLn', 'sfLs', 'sk1', 'sk1c', 'sk1o', 'skR;', 'skRk', 'skRo', 'skks', 'skoL', 'skoo', 'so1c', 'so1f', 'so1k', 'so1o', 'soL1', 'soLL', 'soLf', 'soLk', 'soLs', 'sofL', 'sok1', 'sokL', 'sokc', 'sokf', 'sokk', 'sokn', 'soko', 'son:', 'sonk', # 'sono', 'soo1', 'sooL', 'soof', 'sook', 'soos', 'sos', # 'soso', 'sovo', 'vok1', 'voko']) self.badsql= ( '\\', # backslash not allowed in non-string '[1]', 'o[', 'on,n', 'knn', '1:', 'no1n', ';no', ';nn', ';n,', ';[', ';s', 'Lnon', # "(FOO, BAR...." # 'Ln,n', # "(FOO OR BAR... 'Lnn', 'onon', 'nono', 'nnn', 'nLn', 'o,', 'n1', '1nn', '11', 's1', ';1', ';n;', ';s;', 'onn', 'sns', 'LnnR' )
def testParser(self): s = SQLexer() tokens = s.tokenize('1 "foo" "bar" 1') self.assertEquals([('1', '1'), ('s', '"foo"'), ('s', '"bar"'), ('1', '1')], tokens) tokens = s.syntax(tokens) self.assertEquals([('1', '1'), ('s', '"foo""bar"'), ('1', '1')], tokens) tokens = s.tokenize('"foo" "bar" 1') self.assertEquals([('s', '"foo"'), ('s', '"bar"'), ('1', '1')], tokens) tokens = s.syntax(tokens) self.assertEquals([('s', '"foo""bar"'), ('1', '1')], tokens) tokens = s.tokenize('1 "foo" "bar" 1') self.assertEquals([('1', '1'), ('s', '"foo"'), ('s', '"bar"'), ('1', '1')], tokens) tokens = s.syntax(tokens) self.assertEquals([('1', '1'), ('s', '"foo""bar"'), ('1', '1')], tokens) tokens = s.tokenize('select 1'.upper()) self.assertEquals([('k', 'SELECT'), ('1', '1')], tokens) tokens = s.tokenize('1 /* foo */ 2'.upper()) self.assertEquals([('1', '1'), ('c', '/* FOO */'), ('1', '2')], tokens) tokens = s.tokenize('1 /*foo*/ 2'.upper()) self.assertEquals([('1', '1'), ('c', '/*FOO*/'), ('1', '2')], tokens) tokens = s.tokenize('1 /*foo*/ 2'.upper()) self.assertEquals([('1', '1'), ('c', '/*FOO*/'), ('1', '2')], tokens) tokens = s.tokenize('1 || select'.upper()) self.assertEquals([('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens) tokens = s.tokenize('1 /*! || */ select'.upper()) self.assertEquals([('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens) tokens = s.tokenize('1 /*!32302 || */ select'.upper()) self.assertEquals([('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens) tokens = s.tokenize('select 1 /*!00000AND 2>1*/'.upper()) self.assertEquals([('k', 'SELECT'), ('1', '1'), ('o', 'AND'), ('1', '2'), ('o', '>'), ('1', '1')], tokens) tokens = s.tokenize('@@NEW UNION#SQLMAP'.upper()) self.assertEquals([('v', '@@NEW'), ('o', 'UNION'), ('c', '#SQLMAP')], tokens) #tokens = s.tokenize('"FOO" IN BOOLEAN MODE'.upper()) #self.assertEquals( [('string', 'FOO'), ('k', 'IN BOOLEAN MODE')], tokens) # mysql comments terminate on normal "*/" AND # on another C-style start comment /* !!!! # ==> select 1,2 tokens = s.tokenize("SELECT /*!000001,/*!000002") self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens) tokens = s.tokenize("SELECT /*!1,/*!2*/") self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens) # ==> select 1,2 tokens = s.tokenize("SELECT /*!000001,/*!000002*/") self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens) # ==> select 1,2,3 tokens = s.tokenize("SELECT /*!000001,/*!2*/,3") self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2'), (',', ','), ('1', '3')], tokens) # ==> select 1,2,3 tokens = s.tokenize("SELECT /*!000001,/*!2*/,3") self.assertEquals([('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2'), (',', ','), ('1', '3')], tokens) tokens = s.tokenize("1+2") self.assertEquals([('1', '1'), ('o', '+'), ('1', '2')], tokens) tokens = s.tokenize("1 /**/UNION/**/SELECT") self.assertEquals([('1', '1'), ('c', '/**/'), ('o', 'UNION'), ('c', '/**/'), ('k', 'SELECT')], tokens) tokens = s.syntax(tokens) self.assertEquals([('1', '1'), ('o', 'UNION'), ('k', 'SELECT')], tokens) tokens = s.tokenize("1 /**/UNION/**/ALL/**/SELECT") #self.assertEquals( [('1', '1'), ('c', '/**/'), ('o', 'UNION'), ('c', '/**/'), ('k', 'SELECT')], tokens) tokens = s.syntax(tokens) self.assertEquals([('1', '1'), ('o', 'UNION ALL'), ('k', 'SELECT')], tokens)
class Attacker: def __init__(self): self.lex = SQLexer() self.alpha_re = re.compile(r'^\s*[A-Za-z0-9_]*\s*$') # foo10" or foo1' must start with letter to preven # 7" # .... which is likely a measurement, not sqli self.alpha_str_re = re.compile(r'^[A-Z][A-Z0-9_]+[\'\"]$') self.pmap_new = frozenset( [ '1ok','1oks,','1oksc','noL1R','1okLk','1okfL', 'sonos', 'sono1', 'sosos', '1ono1', 'sonoo', '1Rono', 's;n:k', 'k1,1,', 'k1,1k', 'nokLk', # unions 'okkkn', 'ofL1R', 'fLk,L', '1,1R,', '1,LfL', '1,Lk1', '1,LkL', '1,Lkf', '1,fL1', '1,sR,', '1;kL1', '1;kLL', '1;kLo', '1;kfL', '1;kks', '1;knL', '1;knc', '1;koL', '1;kok', '1R,L1', '1R;kL', '1R;kf', '1R;kk', '1R;kn', '1R;ko', '1RLRs', '1RR;k', '1RRR;', '1RRRR', '1RRRk', '1RRRo', '1RRk1', '1RRkk', '1RRo1', '1RRoL', '1RRof', '1RRok', '1RRon', '1RRoo', '1Rk1', '1Rk1c', '1Rk1o', '1Rkks', '1Ro1f', '1Ro1k', '1Ro1o', '1RoL1', '1RoLk', '1RoLn', '1RofL', '1Rok1', '1RokL', '1RooL', '1k1', '1k1c', '1kfL1', '1kkL1', '1kksc', '1o1Ro', '1o1fL', '1o1kf', '1o1o1', '1o1oL', '1o1of', '1o1ok', '1o1on', '1o1oo', '1o1ov', '1oL1R', '1oL1o', '1oLL1', '1oLLL', '1oLLf', '1oLfL', '1oLk1', '1oLkf', '1oLkn', '1oLnR', '1oLsR', '1ofL1', '1ofLR', '1ofLf', '1ofLn', '1ok1', '1ok1,', '1ok1c', '1ok1k', '1okL1', '1okv,', # '1ono1', '1onos', '1oo1o', '1ooL1', '1oso1', ';kknc', 'fL1,f', 'fL1Ro', 'fL1o1', 'fLRof', 'fLfL1', 'fLfLR', 'fLkLR', 'fLnLR', 'fLv,1', 'k1kLk', 'k1oLs', 'kLRok', 'kLk,L', 'kLokL', 'kLvvR', 'kfL1,', 'kfL1R', 'kfLfL', 'kfLn,', 'kvkL1', 'n,LfL', 'n,Lk1', 'n,LkL', 'n,Lkf', 'n,fL1', 'n;kL1', 'n;kLL', 'n;kfL', 'n;kks', 'n;knL', 'n;koL', 'n;kok', 'nR;kL', 'nR;kf', 'nR;kk', 'nR;kn', 'nR;ko', 'nRR;k', 'nRRR;', 'nRRRk', 'nRRRo', 'nRRkk', 'nRRo1', 'nRRoL', 'nRRof', 'nRRok', 'nRk1o', 'nRkks', 'nRo1f', 'nRo1o', 'nRoLk', 'nRofL', 'nRokL', 'nkksc', 'no1fL', 'no1o1', 'no1oL', 'no1of', 'noLk1', 'nofL1', 'nokL1', 'noo1o', 'ofL1o', 'ofLRo', 'ok1o1', 'oo1kf', 's,1R,', 's;k1,', 's;k1o', 's;k;', 's;kL1', 's;kLL', 's;kLo', 's;k[k', 's;k[n', 's;kfL', 's;kkn', 's;kks', 's;knL', 's;knc', 's;knk', 's;knn', 's;koL', 's;kok', 'sR,L1', 'sR;kL', 'sR;kf', 'sR;kk', 'sR;kn', 'sR;ko', 'sRR;k', 'sRRR;', 'sRRRk', 'sRRRo', 'sRRk1', 'sRRkk', 'sRRo1', 'sRRoL', 'sRRof', 'sRRok', 'sRRoo', 'sRk1', 'sRk1c', 'sRk1o', 'sRkks', 'sRo1f', 'sRo1k', 'sRo1o', 'sRoLk', 'sRofL', 'sRok1', 'sRokL', 'sRooL', 'sc', 'sfL1R', 'sfLn,', 'sfLsR', 'sk1', 'sk1c', 'sk1o1', 'sk1os', 'skR;k', 'skRk1', 'skRkk', 'skRo1', 'skRoL', 'skRof', 'skRok', 'skks', 'skksc', 'skoL1', 'skoLk', 'so1c', 'so1fL', 'so1kf', 'so1o1', 'so1oL', 'so1of', 'so1ok', 'so1on', 'so1oo', 'so1os', 'so1ov', 'soL1R', 'soL1o', 'soLLL', 'soLLk', 'soLLs', 'soLfL', 'soLk1', 'soLkR', 'soLkk', 'soLkn', 'soLks', 'soLsR', 'sofL1', 'sofLR', 'sofLf', 'sofLk', 'sok1', 'sok1,', 'sok1c', 'sok1o', 'sokL1', 'sokLk', 'sokLo', 'sokLs', 'sokc', 'sokfL', 'sokn,', 'soknk', 'soko1', 'sokoL', 'sokok', 'sokoo', 'sokos', 'son:o', # 'sonk1', 'soLko', 'soo1o', 'sooL1', 'sooLk', 'sooLo', 'soofL', 'sookc', 'soos', 'sos', 'sovo1', 'sovok', 'sovoo', 'sovos', 'sovov', 'vok1,'] ) self.pmap_new2 = frozenset( ['1,1R', '1,Lf', '1,Lk', '1,LL', '1,fL', '1,sR', '1;kL', '1;kf', '1;kk', '1;kn', '1;ko', '1R,L', '1R;k', '1RLR', '1RR;', '1RRR', '1RRk', '1RRo', '1Rk1', '1Rkk', '1Ro1', '1RoL', '1Rof', '1Rok', '1Roo', '1k1', '1k1c', '1kfL', '1kkL', '1kks', '1o1R', '1o1f', '1o1k', '1o1o', '1oL1', '1oLf', '1oLL', '1oLk', '1oLn', '1oLs', '1ofL', '1ok1', '1okL', '1okk', '1okv', '1ono', '1oo1', '1ooL', '1oso', ';kkn', 'fL1,', 'fL1R', 'fL1o', 'fLRo', 'fLfL', 'fLkL', 'fLnL', 'fLv,', 'fLL1', 'fLLf','k1kL', 'k1oL', 'kLRo', 'kLk,', 'kLok', 'kLvv', 'kfL1', 'kfLL', 'kfLf', 'kfLn', 'kvkL', 'n,Lf', 'n,LL', 'n,Lk', 'n,fL', 'n;kL', 'n;kf', 'n;kk', 'n;kn', 'n;ko', 'nR;k', 'nRR;', 'nRRR', 'nRRk', 'nRRo', 'nRk1', 'nRkk', 'nRo1', 'nRoL', 'nRof', 'nRok', 'nkks', 'no1f', 'no1o', 'noLk', 'nofL', 'nokL', 'noo1', 'ofL1', 'ofLL', 'ofLR', 'ok1o', 'oo1k', 's,1R', 's;k1', 's;k;', 's;kL', 's;k[', 's;kf', 's;kk', 's;kn', 's;ko', 'sR,L', 'sR;k', 'sRR;', 'sRRR', 'sRRk', 'sRRo', 'sRk1', 'sRkk', 'sRo1', 'sRoL', 'sRof', 'sRok', 'sRoo', 'sc', 'sfL1', 'sfLL', 'sfLn', 'sfLs', 'sk1', 'sk1c', 'sk1o', 'skR;', 'skRk', 'skRo', 'skks', 'skoL', 'skoo', 'so1c', 'so1f', 'so1k', 'so1o', 'soL1', 'soLL', 'soLf', 'soLk', 'soLs', 'sofL', 'sok1', 'sokL', 'sokc', 'sokf', 'sokk', 'sokn', 'soko', 'son:', 'sonk', # 'sono', 'soo1', 'sooL', 'soof', 'sook', 'soos', 'sos', # 'soso', 'sovo', 'vok1', 'voko']) self.badsql= ( '\\', # backslash not allowed in non-string '[1]', 'o[', 'on,n', 'knn', '1:', 'no1n', ';no', ';nn', ';n,', ';[', ';s', 'Lnon', # "(FOO, BAR...." # 'Ln,n', # "(FOO OR BAR... 'Lnn', 'onon', 'nono', 'nnn', 'nLn', 'o,', 'n1', '1nn', '11', 's1', ';1', ';n;', ';s;', 'onn', 'sns', 'LnnR' ) def type_string(self, s, pmap, tname, delim=None): tokens = self.lex.tokenize_reset(s) if delim is not None: self.lex.parseInitialString(delim) tokens = self.lex.tokenize_loop() tokens = self.lex.syntax(tokens) tokens = self.constant_folding2(tokens) (sqli, fullpat, pat, reason) = self.patmatch(tokens, pmap) if not sqli: #print 'False: %s %s in %s on full %s' % (tname, reason, pat, fullpat) return None else: #print 'False: %s matched' % (tname) pass return tname, pat, fullpat, tokens def test(self, s): m = self.type_string(s, self.pmap_new, 'type1') if m: return m m = self.type_string(s, self.pmap_new, 'type2', "'") if m: return m m = self.type_string(s, self.pmap_new, 'type3', '"') if m: return m return None def normalize(self, s): while True: snew = unquote(s) if s == snew: break s = snew.upper() # common html cut-n-paste problem # we do NOT want to make this a '"' # since they one could attack by inserting " which # SQL will know, but we'll think it's a " s = s.replace('"', '"') s = s.replace(''', '\'') return s # simplfies basic arithmetic expressions tha might be used # as english abbreviatio # merges ';;' in to ';' def constant_folding2(self, tokens): tlen = len(tokens) if tlen == 0: return tokens # skip all leading left-parens and unary chars index = 0 while index < tlen: if tokens[index][0] == 'L': index += 1 elif tokens[index][0] == 'o' and tokens[index][1] in ('-', '+', '~'): index +=1 else: break newt = [] last = None isunary = False for t in tokens[index:]: if len(newt) == 5: if last and last[0] == 'o': newt.append( last ) newt += tokens[index:] return newt index += 1 # skip over repeated runs of unary operators # 1+---+2 -> 1+2 -> 1 if t[0] == 'o' and t[1] in ('!', '+', '-', '~'): if isunary: continue else: isunary = True else: isunary = False if t[0] == '1': if last == None or last[0] == '1' or last[0]==';': newt.append(t) last = t else: last = t elif t[0] == 'x': if last == None or last[0] in ('n', ';', '1'): newt.append(t) last = t else: last = t elif t[0] == 'X': if last == None or last[0] in ('n', ';', '1'): newt.append(t) last = t else: last = t elif t[0] == 'o' and t[1] in ('!', '+', '-', '~', '/', '%', '*', 'MOD', 'DIV'): #print 'current is operator: ' + t[1] if last and last[0] == '1': #print 'and last is number' last = t elif last and last[0] == 'n': #print 'and last is number' last = t elif last and last[0] == 'o' and last[1] in ('!', '+', '-', '~', '/', '%', '*', 'MOD', 'DIV'): pass else: newt.append(t) last = None elif t[0] == ';': if last and last[0] == ';': pass elif last and last[0] == 'o': newt.append( last ) last = t else: newt.append( t ) last = t else: if last and last[0] == 'o': newt.append( last ) newt.append(t) last = None if last and last[0] == 'o': newt.append( last ) return newt def is_valid_sql(self, pat, tokens=None): tlen = len(tokens) # common english screwups if tlen == 5: if pat in ('so1on', 'no1oo', 'no1of'): return 'too short' elif pat in ('no1o1', '1ono1'): if tokens[1][1] in ('AND', 'OR', '&&', '||') and tokens[1][1] != tokens[3][1]: return None else: return 'bogon' pat5 = pat[0:5] if pat5 in ('sonos', 'sono1', 'sosos', '1ono1', 'so1on', 'sonoo', 'no1oL', 'no1o1'): if tlen == 5 and tokens[1][1] != tokens[3][1] and tokens[1][1] not in ('&',): return None elif tokens[1][1] in ('UNION', 'UNION ALL'): return None elif tokens[1][1] in ('AND', 'OR', '&&', '||') and tokens[1][1] != tokens[3][1]: return None #elif tokens[3][1] in ('AND', 'OR', '&&', '||') and tokens[1][1] != tokens[3][1]: # return None else: return "Unlikely" elif pat5.endswith('f') and tlen > 5 and pat[5] != 'L': return 'function missing left' if tlen <= 4: if pat == 'sos': if tokens[1][1] in ('*', '/', '-', '+'): return 'too small, harmless' isnormal_left = tokens[0][1][0] in ('"',"'") isnormal_right = tokens[2][1][-1] in ('"',"'") isempty_right = len(tokens[2][1]) == 1 isenglish = tokens[1][1] in ('AND','&','NOT','UNION','IS','MOD') if isenglish: # open string ...foo "LIKE" return "pat is string-operator-string and operator is logical" elif isnormal_left and isnormal_right: # "FOO" + "BAR", type1 style return "fully formed type1 sos -- ignoring" elif not isnormal_left and isempty_right: return "not much of an attack" elif isnormal_left and not isnormal_right: return "looks like truncation" elif pat == 'soos': if tokens[1][1] == tokens[2][1]: return "likely double typing or AND or OR" elif pat == 'sc': if self.alpha_str_re.match(tokens[0][1]): return None else: return "gibberish" elif (pat in ('sk1','1k1')): if (tokens[1][1] not in ('ORDER BY', 'GROUP BY', 'OWN3D BY')): return "pat is string-k-number but k not order/group by" elif pat == '1ok': # sqli fragment if tokens[1][1] not in ( 'UNION', 'UNION ALL'): return 'too short' # right start, but too short to sqli elif pat in ('n;kn', 'no1o'): return "too short" return None def is_valid_sql2(self, pat, tokens=None): # if form of foo'--, foo'#, foo'/* (half open string)+comment if pat == 'sc': if self.alpha_str_re.match(tokens[0][1]): return None else: return "gibberish" pos = pat.find(';') if pos != -1 and pos+ 1< tlen and tlen > 4: pos += 1 if pat[pos:].startswith('fL'): return None elif tokens[pos][0] == 'c': return None elif tokens[pos][0] == 'k' and tokens[pos][1] in ('CREATE','SELECT', 'SET', 'INSERT', 'UPDATE', 'DECLARE', 'BEGIN', 'SHUTDOWN', 'WHILE', 'WAITFOR', 'DROP', 'DELETE', 'IF', 'CASE', 'END', 'EXEC', 'EXECUTE'): return None else: return "Token after semicolon isnt a sql verb or function" # if function isnt followed by a ( # f[^l]|f$ for i in range(tlen - 1): if tokens[i][0] == 'o' and tokens[i][1] in ('UNION', 'UNION ALL'): if tokens[i+1][0] == 'k' and tokens[i+1][1] == 'SELECT': return None if tokens[i][0] == 'f': if tokens[i+1][0] != 'L': #pass return "function '%s' not followed by L" % (tokens[i][1],) # last real token can't be a function or operator # ends with "function" # ends with "operator" # ends with "function-comment" # ends with "operator-comments" # [fo]c?$ if tokens[-1][0] in ('f', 'o'): return "sql can't end in a operator or function" elif tokens[-1][0] == 'c' and tlen > 1 and tokens[-2][0] in ('f', 'o'): return "sql can't end in a operator or function" if pat[0] == 's' and ';' not in pat: if tokens[0][1][0] not in ('"', "'"): # type2 or type3 #if pat[-1] not in ('s', 'c'): # pass # #print "False: Expression started as half-open but didn't end as half-open string" # #return "Expression started as half-open but didn't end as half-open string" if pat[-1] == 's' and len(tokens[-1][1]) >1 and tokens[-1][1][-1] in ('"', "'"): #print "False: Expression started with opens tring, but closed with full string" #pass return "Expression started with opens tring, but closed with full string" # operator (and conditional) case # must have a WHEN adn THEN following it cstate = 0 for t in tokens: if cstate == 0 and t == ('o', 'CASE'): cstate = 1 elif t == ('k', 'WHEN'): if cstate == 1: cstate = 2 else: return "Found WHEN without CASE" elif t == ('k', 'THEN'): if cstate == 2: cstate = 3 else: return "Found THEN without CASE" # maybe origtokens[cpos] = ('n', 'CASE'); break # too... this isn't right form for case statement # if have a case without WHEN.. THEN.. then it's not an operator if cstate == 1 or cstate == 2: return "Found case without WHEN/THEN" # check unary followed by binary operator uo = self.lex.unary_operators for i in range(tlen - 1): if tokens[i][0] == 'o' and tokens[i][1] in uo and tokens[i+1][0] == 'o' and tokens[i+1][1] not in uo: return "unary operator can't be followed by a binary operator" # reject other simple statements for p in self.badsql: if p in pat: return "Found invalid sql pattern " + p if pat.startswith('son') and tokens[1][1] == '+': return "Likely false positive, ....'+FOO, due to URL as parameter" #if pat == 'sono1s' and tokens[1][1] == '+' and tokens[3][1] == '+': # return "Likely a url encoded measurement, e.g. 2' x 4'" # a leftparens ( must have a right parnens) #has_rightp = 'R' in pat #if 'L' in pat and not has_rightp and pat[-1] != 'c': # return "leftparens found without right parens" # number,comma must have a )... (IN list) # ^[1ns],.*l #if pat.startswith('1,') and not has_rightp: # return "number-comma missing right parens" #if pat.startswith('n,') and not has_rightp: # return "n-comma missing right parens" #if pat.startswith('s,') and not has_rightp: # return "string-comma missing right parens" if pat.startswith('noLn') and (tokens[1][1] not in ('AND','OR', '&&', '||')): return "starts with n-operator and operator is not logical" # ignore query string like things # foo=1234&bar=fruit if pat.startswith('no1o') or pat.startswith('nono') or pat.startswith('soso'): if tokens[1][1] == tokens[3][1]: return "XoYo where o is the same -- bogus" if tokens[1][1] == '&' and tokens[3][1] == '=': return "starts with no1o and looks like qstring" # foo=1234&bar=fruit if pat.startswith('sono') and tokens[1][1] == '&' and tokens[3][1] == '=': return 'got ..."&foo=... likely query string' #if pat == '1k' and not (tokens[1][0] == 'k' and tokens[1][1] in ('ORDER BY', 'GROUP BY', 'OWN3D BY')): # return 'starts with number-k but k is not order/group by' # reject stuff like 1/FOO 1-FOO if pat.startswith('1on') and tokens[1][1] not in ('AND','OR','&&','||', 'UNION', 'UNION ALL'): return "number-operator-n but operator is not logical or union" if pat.startswith('sok') or pat.startswith('nok'): if tokens[1][1] == 'NOT': return '[string|number]-NOT-keyword rejected' if tokens[2][1] == 'WITH': return '[string|number]-operator-WITH rejected' # common english screwups if tlen <= 4: if pat == 'sos': isnormal_left = tokens[0][1][0] in ('"',"'") isnormal_right = tokens[2][1][-1] in ('"',"'") isempty_right = len(tokens[2][1]) == 1 isenglish = tokens[1][1] in ('AND','&','NOT','UNION','IS','MOD') if isenglish: # open string ...foo "LIKE" return "pat is string-operator-string and operator is logical" elif isnormal_left and isnormal_right: # "FOO" + "BAR", type1 style return "fully formed type1 sos -- ignoring" elif not isnormal_left and isempty_right: return "not much of an attack" elif isnormal_left and not isnormal_right: return "looks like truncation" elif (pat in ('sk1','1k1')): if (tokens[1][1] not in ('ORDER BY', 'GROUP BY', 'OWN3D BY')): return "pat is string-k-number but k not order/group by" return None def patmatch(self, tokens, pmap): fullpat = ''.join([ t[0] for t in tokens ]) pat = fullpat[0:5] if pat in pmap: oksql =self.is_valid_sql(fullpat, tokens) if oksql is None: return (True, fullpat, pat, '') else: return (False, fullpat, pat, oksql) return (False, fullpat, '', 'No starting pattern found') def parens_match(self, origtokens): count = 0 for t in origtokens: if t[0] == 'L': count += 1 elif t[0] == 'R': count -= 1 if count < 0: return False if count != 0: return False return True
def testParser(self): s = SQLexer() tokens = s.tokenize('1 "foo" "bar" 1') self.assertEquals( [('1', '1'), ('s', '"foo"'), ('s', '"bar"'), ('1', '1')], tokens ) tokens = s.syntax(tokens) self.assertEquals( [('1', '1'), ('s', '"foo""bar"'), ('1', '1')], tokens) tokens = s.tokenize('"foo" "bar" 1') self.assertEquals( [('s', '"foo"'), ('s', '"bar"'), ('1','1')], tokens ) tokens = s.syntax(tokens) self.assertEquals( [('s', '"foo""bar"'),('1', '1')], tokens) tokens = s.tokenize('1 "foo" "bar" 1') self.assertEquals( [('1','1'), ('s', '"foo"'), ('s', '"bar"'), ('1','1')], tokens ) tokens = s.syntax(tokens) self.assertEquals( [('1', '1'), ('s', '"foo""bar"'), ('1', '1') ], tokens) tokens = s.tokenize('select 1'.upper()) self.assertEquals( [('k', 'SELECT'), ('1', '1')], tokens) tokens = s.tokenize('1 /* foo */ 2'.upper()) self.assertEquals( [('1', '1'), ('c', '/* FOO */'), ('1', '2')], tokens) tokens = s.tokenize('1 /*foo*/ 2'.upper()) self.assertEquals( [('1', '1'), ('c', '/*FOO*/'), ('1', '2')], tokens) tokens = s.tokenize('1 /*foo*/ 2'.upper()) self.assertEquals( [('1', '1'), ('c', '/*FOO*/'), ('1', '2')], tokens) tokens = s.tokenize('1 || select'.upper()) self.assertEquals( [('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens) tokens = s.tokenize('1 /*! || */ select'.upper()) self.assertEquals( [('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens) tokens = s.tokenize('1 /*!32302 || */ select'.upper()) self.assertEquals( [('1', '1'), ('o', '||'), ('k', 'SELECT')], tokens) tokens = s.tokenize('select 1 /*!00000AND 2>1*/'.upper()); self.assertEquals( [('k', 'SELECT'), ('1', '1'), ('o', 'AND'), ('1', '2'), ('o', '>'), ('1', '1')], tokens) tokens = s.tokenize('@@NEW UNION#SQLMAP'.upper()) self.assertEquals( [('v', '@@NEW'), ('o', 'UNION'), ('c', '#SQLMAP')], tokens) #tokens = s.tokenize('"FOO" IN BOOLEAN MODE'.upper()) #self.assertEquals( [('string', 'FOO'), ('k', 'IN BOOLEAN MODE')], tokens) # mysql comments terminate on normal "*/" AND # on another C-style start comment /* !!!! # ==> select 1,2 tokens = s.tokenize("SELECT /*!000001,/*!000002") self.assertEquals( [('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens) tokens = s.tokenize("SELECT /*!1,/*!2*/") self.assertEquals( [('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens) # ==> select 1,2 tokens = s.tokenize("SELECT /*!000001,/*!000002*/") self.assertEquals( [('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2')], tokens) # ==> select 1,2,3 tokens = s.tokenize("SELECT /*!000001,/*!2*/,3") self.assertEquals( [('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2'), (',', ','), ('1', '3')], tokens) # ==> select 1,2,3 tokens = s.tokenize("SELECT /*!000001,/*!2*/,3") self.assertEquals( [('k', 'SELECT'), ('1', '1'), (',', ','), ('1', '2'), (',', ','), ('1', '3')], tokens) tokens = s.tokenize("1+2") self.assertEquals( [('1', '1'), ('o', '+'), ('1','2')], tokens) tokens = s.tokenize("1 /**/UNION/**/SELECT") self.assertEquals( [('1', '1'), ('c', '/**/'), ('o', 'UNION'), ('c', '/**/'), ('k', 'SELECT')], tokens) tokens = s.syntax(tokens) self.assertEquals( [('1', '1'), ('o', 'UNION'), ('k', 'SELECT')], tokens) tokens = s.tokenize("1 /**/UNION/**/ALL/**/SELECT") #self.assertEquals( [('1', '1'), ('c', '/**/'), ('o', 'UNION'), ('c', '/**/'), ('k', 'SELECT')], tokens) tokens = s.syntax(tokens) self.assertEquals( [('1', '1'), ('o', 'UNION ALL'), ('k', 'SELECT')], tokens)