def test_insert_trailing_slash(self): # When dealing with a path-less url, we should insert a trailing slash. paths = [ ('', ''), ('' , '') ] for bad, clean in paths: self.assertEqual(Url.sanitize('http://' + bad), 'http://' + clean)
def test_case_insensitivity(self): paths = [ ('www.TESTING.coM' , ''), ('' , ''), ('WWW.testing.COM/FOOBAR', '') ] for bad, clean in paths: self.assertEqual(Url.sanitize('http://' + bad), 'http://' + clean)
def test_escaping(self): paths = [ ('hello%20and%20how%20are%20you', 'hello%20and%20how%20are%20you'), ('danny\'s pub' , 'danny%27s%20pub'), ('danny%27s pub?foo=bar&yo' , 'danny%27s%20pub?foo=bar&yo') ] base = '' for bad, clean in paths: self.assertEqual(Url.sanitize(base + bad), base + clean)
def test_x_robots_header(self): examples = [(['noindex'], False), (['none'], False), (['noindex,none'], False), (['index'], True), (['foobot:index'], True), (['foobot:none'], False), (['barbar:index'], True), (['barbot:none'], True)] for line in examples: e, result = line d = {'x-robots-tag': e} self.assertEqual( Url.allowed('', 'foobot', headers=d), result)
def test_double_forward_slash(self): paths = [ ('howdy' , 'howdy'), ('hello//how//are' , 'hello/how/are'), ('hello/../how/are', 'how/are'), ('hello//..//how/' , 'how/'), ('a/b/../../c' , 'c'), ('../../../c' , 'c'), ('./hello' , 'hello'), ('./././hello' , 'hello'), ('a/b/c/' , 'a/b/c/') ] base = '' for bad, clean in paths: self.assertEqual(Url.sanitize(base + bad), base + clean) # This is the example from the wild that spawned this whole change bad = '' clean = '' self.assertEqual(Url.sanitize(bad), clean)
def test_multiple_ampersands(self): paths = [ ('howdy?&&' , 'howdy'), ('howdy?&&&foo=bar&&&' , 'howdy?foo=bar'), ('howdy;;;;foo=bar;' , 'howdy;foo=bar'), # These come from the prototype lsapi: # In query parameters, we should escape these characters #('?foo=\xe4\xb8\xad' , '?foo=%E4%B8%AD'), # But in a path, we should not #('\xe4\xb8\xad/bar.html', '\xe4\xb8\xadbar.html') ] base = '' for bad, clean in paths: self.assertEqual(Url.sanitize(base + bad), base + clean)
def test_wild(self): # These are some examples from the wild that have been seeming to fail # It apparently comes from the fact that the input is a unicode string, # and has disallowed character pairs = [ (u'®-easy.html', ''), (u' Fonden för mindre arbetarbos/', ''), (u' quest/itinerary/mexico\'s sea of cortés - aquarium of the world (8 days)/itinerary/', ''), (u'Υπόλοιπα%20Νησιά/', '') ] for bad, good in pairs: self.assertEqual(Url.sanitize(bad), good)
def test_x_robots_header(self): examples = [ (['noindex'] , False), (['none'] , False), (['noindex,none'], False), (['index'] , True ), (['foobot:index'], True ), (['foobot:none' ], False), (['barbar:index'], True ), (['barbot:none' ], True ) ] for line in examples: e, result = line d = { 'x-robots-tag': e } self.assertEqual(Url.allowed('', 'foobot', headers=d), result)
def test_preserve_order(self): '''Make sure we keep it all in order''' for b in banned: bad = '' % b good = '' self.assertEqual(Url.sanitize(bad, param_blacklist=banned), good)
def test_all_together(self): '''And make sure we can remove all of the blacklisted query params''' params = '&'.join('%s=foo' % b for b in banned) bad = '' % params good = '' self.assertEqual(Url.sanitize(bad, param_blacklist=banned), good)
def test_case_insensitivity(self): '''Make sure we can do it upper-cased''' for b in banned: bad = '' % b.upper() good = '' self.assertEqual(Url.sanitize(bad, param_blacklist=banned), good)
def test_pruning_with_other_args(self): '''Make sure we can strip out a single blacklisted query''' for b in banned: bad = '' % b good = '' self.assertEqual(Url.sanitize(bad, param_blacklist=banned), good)
def test_prefix_param_ok_params(self): '''Make sure we can give each blacklisted param a prefix''' for b in banned: ok = ';howdy_%s=foo;ok=foo' % b self.assertEqual(Url.sanitize(ok), ok)
def test_param_values_ok_params(self): '''Make sure we can include them as param values''' for b in banned: ok = ';foo=%s;ok=foo' % b self.assertEqual(Url.sanitize(ok), ok)
def test_pruning_alone_params(self): '''Make sure we don't include that ";"''' for b in banned: bad = ';%s=foo' % b good = '' self.assertEqual(Url.sanitize(bad, param_blacklist=banned), good)
def test_join(self): # We should be able to join urls self.assertEqual(Url.sanitize('/foo', ''), '')