class TestQueryForOldData(unittest.TestCase): def setUp(self): self.parser = QueryParser() self.query_old_data = query_old_data.RuleChecker() def test_single_timedelta(self): """ Select queries may not query data that is too old """ for big_timedelta in ["20d", "22w", "50000m", "10010h", "1000000s"]: self.assertFalse(self.query_old_data.check( self.parser.parse( "select * from 'myseries' where time > now() - {}".format(big_timedelta)) ).is_ok()) for small_timedelta in ["10s", "1h", "50m"]: self.assertTrue(self.query_old_data.check( self.parser.parse( "select * from 'myseries' where time > now() - {}".format(small_timedelta)) ).is_ok()) self.assertTrue(self.query_old_data.check( self.parser.parse('select * from "myseries" limit 1000') ).is_ok()) self.assertTrue(self.query_old_data.check( self.parser.parse('select * from "myseries" LIMIT 1000') ).is_ok()) def test_multiple_timedeltas(self): self.assertTrue(self.query_old_data.check( self.parser.parse( "select mean(value) from 'myseries' where time > now() - 2d and time < now() - 1d" )).is_ok())
class TestTooManyDatapoints(unittest.TestCase): def setUp(self): self.parser = QueryParser() self.too_many_datapoints = too_many_datapoints.RuleChecker() def test_small_number_of_datapoints(self): """ Select queries with a reasonable number of datapoints shall be allowed """ self.assertTrue(self.too_many_datapoints.check( SelectQuery('*', '/myseries/', where_stmt='time > now() - 24h') ).is_ok()) self.assertTrue(self.too_many_datapoints.check( self.parser.parse( "select * from 'server.search.item.standard-average' where time > now()-24h") ).is_ok()) self.assertTrue(self.too_many_datapoints.check( self.parser.parse( 'select * from /host.df_complex-free/ where time > now()-30d group by time(12h) order asc') ).is_ok()) self.assertTrue(self.too_many_datapoints.check( self.parser.parse("select * from /test/ where time > now()-7d group by time(1h)") ).is_ok()) self.assertTrue(self.too_many_datapoints.check( self.parser.parse( "select * from /^myseries/ where value > -1 and time > now() - 100w GROUP by time(30s) limit 10") ).is_ok()) self.assertTrue(self.too_many_datapoints.check( SelectQuery('*', '/myseries/', where_stmt='time > now() - 25h') ).is_ok()) self.assertTrue(self.too_many_datapoints.check( self.parser.parse("select * from /test/ where time > now()-7d group by time(10m)") ).is_ok()) def test_big_number_of_datapoints(self): self.assertFalse(self.too_many_datapoints.check( self.parser.parse("select * from /test/ where time > now()-7d group by time(1m)") ).is_ok()) self.assertFalse(self.too_many_datapoints.check( self.parser.parse("select median(value) from /test/ where time > now()-30d group by time(15s)") ).is_ok()) self.assertFalse(self.too_many_datapoints.check( self.parser.parse( "select * from /^mylongseriesname/ where value > -1 and time > now() - 1w GROUP by time(30s)") ).is_ok()) self.assertFalse(self.too_many_datapoints.check( self.parser.parse("select * from /my.dashboard_.*/ where title =~ /.*.*/i") ).is_ok())
class TestGuard(unittest.TestCase): def setUp(self): self.parser = QueryParser() def test_guard(self): # Test rules loading guard = Guard(all_rules) q = self.parser.parse("select * from 'my.awesome.series' where time > now()-24h") self.assertTrue(guard.is_allowed(q))
class TestParseDelete(unittest.TestCase): def setUp(self): self.parser = QueryParser() def test_delete_query(self): """ Test parsing DELETE queries """ query = self.parser.parse("delete from response_times where time < now() - 1h") self.assertEqual(query.get_type(), Keyword.DELETE) self.assertEqual(query.from_stmt, "response_times") self.assertEqual(query.where_stmt, "time < now() - 1h") query = self.parser.parse("delete from /^stats.*/ where time < now() - 7d") self.assertEqual(query.get_type(), Keyword.DELETE) self.assertEqual(query.from_stmt, "/^stats.*/") self.assertEqual(query.where_stmt, "time < now() - 7d") self.assertIsNone(self.parser.parse("delete from response_times where user = '******'"), None)
class TestGuard(unittest.TestCase): def setUp(self): self.parser = QueryParser() def test_guard(self): # Test rules loading guard = Guard(all_rules) q = self.parser.parse( "select * from 'my.awesome.series' where time > now()-24h") self.assertTrue(guard.is_allowed(q))
class TestParseDrop(unittest.TestCase): def setUp(self): self.parser = QueryParser() def test_drop_query(self): """ Test parsing DROP series queries """ query = self.parser.parse("drop series response_times") self.assertEqual(query.get_type(), Keyword.DROP) self.assertEqual(query.series_stmt, "response_times")
class TestQueryForOldData(unittest.TestCase): def setUp(self): self.parser = QueryParser() self.query_old_data = query_old_data.RuleChecker() def test_single_timedelta(self): """ Select queries may not query data that is too old """ for big_timedelta in ["20d", "22w", "50000m", "10010h", "1000000s"]: self.assertFalse( self.query_old_data.check( self.parser.parse( "select * from 'myseries' where time > now() - {}". format(big_timedelta))).is_ok()) for small_timedelta in ["10s", "1h", "50m"]: self.assertTrue( self.query_old_data.check( self.parser.parse( "select * from 'myseries' where time > now() - {}". format(small_timedelta))).is_ok()) self.assertTrue( self.query_old_data.check( self.parser.parse( 'select * from "myseries" limit 1000')).is_ok()) self.assertTrue( self.query_old_data.check( self.parser.parse( 'select * from "myseries" LIMIT 1000')).is_ok()) def test_multiple_timedeltas(self): self.assertTrue( self.query_old_data.check( self.parser.parse( "select mean(value) from 'myseries' where time > now() - 2d and time < now() - 1d" )).is_ok())
class TestParseList(unittest.TestCase): def setUp(self): self.parser = QueryParser() def test_list_series_query(self): """ Test LIST series queries """ query = self.parser.parse('list series') self.assertEqual(query.get_type(), Keyword.LIST) self.assertEqual(query.series_stmt, '') print(query) query = self.parser.parse('list series /my_regex\.test/') self.assertEqual(query.get_type(), Keyword.LIST) self.assertEqual(query.series_stmt, '/my_regex\.test/') print(query) query = self.parser.parse('list series "my-awesome.series.name"') self.assertEqual(query.get_type(), Keyword.LIST) self.assertEqual(query.series_stmt, '"my-awesome.series.name"') print(query)
class Protector(object): """ The main protector class which checks for malicious queries """ def __init__(self, rules, whitelist=[], safe_mode=True): """ :param rules: A list of rules to evaluate :param safe_mode: If set to True, allow the query in case it can not be parsed :return: """ self.parser = QueryParser() self.guard = Guard(rules) self.sanitizer = Sanitizer() self.whitelist = whitelist self.safe_mode = safe_mode def check(self, query_string): logging.debug("Checking {}".format(query_string)) query_sanitized = self.sanitizer.sanitize(query_string) query = self.parser.parse(query_sanitized) if query: if self.is_whitelisted(query): return Ok(True) return self.guard.is_allowed(query) else: error_msg = "Could not parse query: '{}'".format(query_string) logging.info(error_msg) if self.safe_mode: return Ok(True) else: return Err(error_msg) def is_whitelisted(self, query): if query.get_type() in {Keyword.LIST, Keyword.DROP}: series = query.series_stmt else: series = query.from_stmt for pattern in self.whitelist: match = re.match(pattern, series) if match: return True return False
class TestNegativeGroupByStatements(unittest.TestCase): def setUp(self): self.parser = QueryParser() self.negative_groupby_statement = negative_groupby_statement.RuleChecker() def test_negative_groupby(self): q = self.parser.parse("select * from test group by time(-100ms)") self.assertFalse(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(-1s)") self.assertFalse(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(-10h)") self.assertFalse(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(-20w)") self.assertFalse(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(100ms)") self.assertTrue(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(1s)") self.assertTrue(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(10h)") self.assertTrue(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(20w)") self.assertTrue(self.negative_groupby_statement.check(q).is_ok())
class TestNegativeGroupByStatements(unittest.TestCase): def setUp(self): self.parser = QueryParser() self.negative_groupby_statement = negative_groupby_statement.RuleChecker( ) def test_negative_groupby(self): q = self.parser.parse("select * from test group by time(-100ms)") self.assertFalse(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(-1s)") self.assertFalse(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(-10h)") self.assertFalse(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(-20w)") self.assertFalse(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(100ms)") self.assertTrue(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(1s)") self.assertTrue(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(10h)") self.assertTrue(self.negative_groupby_statement.check(q).is_ok()) q = self.parser.parse("select * from test group by time(20w)") self.assertTrue(self.negative_groupby_statement.check(q).is_ok())
class TestParseInvalid(unittest.TestCase): def setUp(self): self.parser = QueryParser() def test_invalid_queries(self): """ Test invalid queries """ self.assertIsNone(self.parser.parse(None)) self.assertIsNone(self.parser.parse("hello")) self.assertIsNone(self.parser.parse("select")) self.assertIsNone(self.parser.parse("select *")) self.assertIsNone(self.parser.parse("select * from")) def test_naughty_strings(self): """ Test malicious user input """ strings = load_fixture('naughty-strings/blns.txt') for string in strings: self.assertIsNone(self.parser.parse(string))
class TestTooManyDatapoints(unittest.TestCase): def setUp(self): self.parser = QueryParser() self.too_many_datapoints = too_many_datapoints.RuleChecker() def test_small_number_of_datapoints(self): """ Select queries with a reasonable number of datapoints shall be allowed """ self.assertTrue( self.too_many_datapoints.check( SelectQuery('*', '/myseries/', where_stmt='time > now() - 24h')).is_ok()) self.assertTrue( self.too_many_datapoints.check( self.parser.parse( "select * from 'server.search.item.standard-average' where time > now()-24h" )).is_ok()) self.assertTrue( self.too_many_datapoints.check( self.parser.parse( 'select * from /host.df_complex-free/ where time > now()-30d group by time(12h) order asc' )).is_ok()) self.assertTrue( self.too_many_datapoints.check( self.parser.parse( "select * from /test/ where time > now()-7d group by time(1h)" )).is_ok()) self.assertTrue( self.too_many_datapoints.check( self.parser.parse( "select * from /^myseries/ where value > -1 and time > now() - 100w GROUP by time(30s) limit 10" )).is_ok()) self.assertTrue( self.too_many_datapoints.check( SelectQuery('*', '/myseries/', where_stmt='time > now() - 25h')).is_ok()) self.assertTrue( self.too_many_datapoints.check( self.parser.parse( "select * from /test/ where time > now()-7d group by time(10m)" )).is_ok()) def test_big_number_of_datapoints(self): self.assertFalse( self.too_many_datapoints.check( self.parser.parse( "select * from /test/ where time > now()-7d group by time(1m)" )).is_ok()) self.assertFalse( self.too_many_datapoints.check( self.parser.parse( "select median(value) from /test/ where time > now()-30d group by time(15s)" )).is_ok()) self.assertFalse( self.too_many_datapoints.check( self.parser.parse( "select * from /^mylongseriesname/ where value > -1 and time > now() - 1w GROUP by time(30s)" )).is_ok()) self.assertFalse( self.too_many_datapoints.check( self.parser.parse( "select * from /my.dashboard_.*/ where title =~ /.*.*/i")). is_ok())
class TestParseSelect(unittest.TestCase): def setUp(self): self.parser = QueryParser() self.default_resolution = Resolution.MAX_RESOLUTION def test_simple_select_query(self): """ Test simple SELECT Queries """ query = self.parser.parse("select * from mymetric") self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "*") self.assertEqual(query.from_stmt, "mymetric") self.assertIsNone(query.where_stmt) self.assertIsNone(query.limit_stmt) self.assertIsNone(query.group_by_stmt) self.assertEqual(query.resolution, self.default_resolution) self.assertEqual(query.datapoints, 5 * 24 * 60 * 6) self.assertEqual(query.get_earliest_date(), TimeExpression.INFLUXDB_EPOCH) def test_complex_select_query(self): """ Test complex SELECT queries """ query = self.parser.parse( "select value, test from /my.regex/ where time > now() - 24h limit 10" ) self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "value, test") self.assertEqual(query.from_stmt, "/my.regex/") self.assertEqual(query.where_stmt, "time > now() - 24h") self.assertEqual(query.limit_stmt, '10') self.assertIsNone(query.group_by_stmt) self.assertEqual(query.resolution, self.default_resolution) self.assertEqual(query.datapoints, 10) self.assertEqual(query.get_earliest_date(), datetime.now() - timedelta(days=1)) def test_upper_lowercase_keywords(self): queries = set() queries.add( self.parser.parse( "select value, test from /my.regex/ where time > now() - 24h limit 10" )) queries.add( self.parser.parse( "SELECT value, test from /my.regex/ WHERE time > now() - 24h LIMIT 10" )) queries.add( self.parser.parse( "select value, test from /my.regex/ WHERE time > now() - 24h LIMIT 10" )) queries.add( self.parser.parse( "select value, test FROM /my.regex/ WHERE time > now() - 24h limit 10" )) for query in queries: self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "value, test") self.assertEqual(query.from_stmt, "/my.regex/") self.assertEqual(query.where_stmt, "time > now() - 24h") self.assertEqual(query.limit_stmt, '10') self.assertIsNone(query.group_by_stmt) self.assertEqual(query.resolution, self.default_resolution) self.assertEqual(query.datapoints, 10) self.assertEqual(query.get_earliest_date(), datetime.now() - timedelta(days=1)) def test_group_by(self): """ Test SELECT queries with group by statement """ query = self.parser.parse( 'select * from "series" where time > now() - 24h group by time(10m)' ) self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "*") self.assertEqual(query.from_stmt, '"series"') self.assertEqual(query.where_stmt, "time > now() - 24h") self.assertIsNone(query.limit_stmt) self.assertEqual(query.group_by_stmt, "time(10m)") self.assertEqual(query.resolution, 10 * 60) self.assertEqual(query.datapoints, (24 * 60 * 60) / (10 * 60)) self.assertEqual(query.get_earliest_date(), datetime.now() - timedelta(days=1)) query = self.parser.parse( 'select count(type) from events group by time(10m), type;') self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "count(type)") self.assertEqual(query.from_stmt, 'events') self.assertIsNone(query.where_stmt) self.assertIsNone(query.limit_stmt) self.assertEqual(query.group_by_stmt, "time(10m), type") self.assertEqual(query.resolution, 10 * 60) self.assertEqual(query.datapoints, (5 * 24 * 60 * 60) / (10 * 60)) self.assertEqual(query.get_earliest_date(), TimeExpression.INFLUXDB_EPOCH) query = self.parser.parse( 'select percentile(value, 95) from response_times group by time(30s);' ) self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "percentile(value, 95)") self.assertEqual(query.from_stmt, 'response_times') self.assertIsNone(query.where_stmt) self.assertIsNone(query.limit_stmt) self.assertEqual(query.group_by_stmt, "time(30s)") self.assertEqual(query.resolution, 30) self.assertEqual(query.datapoints, (5 * 24 * 60 * 60) / 30) self.assertEqual(query.get_earliest_date(), TimeExpression.INFLUXDB_EPOCH) def test_merge(self): query = self.parser.parse( 'select count(type) from user_events merge admin_events group by time(10m)' ) self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "count(type)") self.assertEqual(query.from_stmt, 'user_events merge admin_events') self.assertIsNone(query.where_stmt) self.assertIsNone(query.limit_stmt) self.assertEqual(query.group_by_stmt, "time(10m)") self.assertEqual(query.resolution, 10 * 60) self.assertEqual(query.datapoints, (5 * 24 * 60 * 60) / (10 * 60)) self.assertEqual(query.get_earliest_date(), TimeExpression.INFLUXDB_EPOCH) def test_multiple_times(self): query = self.parser.parse( "select * from 's' where time > now() - 2d and time < now() - 1d") self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "*") self.assertEqual(query.from_stmt, "'s'") self.assertEqual(query.where_stmt, "time > now() - 2d and time < now() - 1d") self.assertIsNone(query.limit_stmt) self.assertIsNone(query.group_by_stmt) self.assertEqual(query.resolution, self.default_resolution) self.assertEqual(query.datapoints, (24 * 60 * 60) / 10) self.assertEqual(query.get_earliest_date(), datetime.now() - timedelta(days=2))
class TestParseSelect(unittest.TestCase): def setUp(self): self.parser = QueryParser() self.default_resolution = Resolution.MAX_RESOLUTION def test_simple_select_query(self): """ Test simple SELECT Queries """ query = self.parser.parse("select * from mymetric") self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "*") self.assertEqual(query.from_stmt, "mymetric") self.assertIsNone(query.where_stmt) self.assertIsNone(query.limit_stmt) self.assertIsNone(query.group_by_stmt) self.assertEqual(query.resolution, self.default_resolution) self.assertEqual(query.datapoints, 5 * 24 * 60 * 6) self.assertEqual(query.get_earliest_date(), TimeExpression.INFLUXDB_EPOCH) def test_complex_select_query(self): """ Test complex SELECT queries """ query = self.parser.parse("select value, test from /my.regex/ where time > now() - 24h limit 10") self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "value, test") self.assertEqual(query.from_stmt, "/my.regex/") self.assertEqual(query.where_stmt, "time > now() - 24h") self.assertEqual(query.limit_stmt, '10') self.assertIsNone(query.group_by_stmt) self.assertEqual(query.resolution, self.default_resolution) self.assertEqual(query.datapoints, 10) self.assertEqual(query.get_earliest_date(), datetime.now() - timedelta(days=1)) def test_upper_lowercase_keywords(self): queries = set() queries.add(self.parser.parse("select value, test from /my.regex/ where time > now() - 24h limit 10")) queries.add(self.parser.parse("SELECT value, test from /my.regex/ WHERE time > now() - 24h LIMIT 10")) queries.add(self.parser.parse("select value, test from /my.regex/ WHERE time > now() - 24h LIMIT 10")) queries.add(self.parser.parse("select value, test FROM /my.regex/ WHERE time > now() - 24h limit 10")) for query in queries: self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "value, test") self.assertEqual(query.from_stmt, "/my.regex/") self.assertEqual(query.where_stmt, "time > now() - 24h") self.assertEqual(query.limit_stmt, '10') self.assertIsNone(query.group_by_stmt) self.assertEqual(query.resolution, self.default_resolution) self.assertEqual(query.datapoints, 10) self.assertEqual(query.get_earliest_date(), datetime.now() - timedelta(days=1)) def test_group_by(self): """ Test SELECT queries with group by statement """ query = self.parser.parse('select * from "series" where time > now() - 24h group by time(10m)') self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "*") self.assertEqual(query.from_stmt, '"series"') self.assertEqual(query.where_stmt, "time > now() - 24h") self.assertIsNone(query.limit_stmt) self.assertEqual(query.group_by_stmt, "time(10m)") self.assertEqual(query.resolution, 10 * 60) self.assertEqual(query.datapoints, (24 * 60 * 60) / (10 * 60)) self.assertEqual(query.get_earliest_date(), datetime.now() - timedelta(days=1)) query = self.parser.parse('select count(type) from events group by time(10m), type;') self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "count(type)") self.assertEqual(query.from_stmt, 'events') self.assertIsNone(query.where_stmt) self.assertIsNone(query.limit_stmt) self.assertEqual(query.group_by_stmt, "time(10m), type") self.assertEqual(query.resolution, 10 * 60) self.assertEqual(query.datapoints, (5 * 24 * 60 * 60) / (10 * 60)) self.assertEqual(query.get_earliest_date(), TimeExpression.INFLUXDB_EPOCH) query = self.parser.parse('select percentile(value, 95) from response_times group by time(30s);') self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "percentile(value, 95)") self.assertEqual(query.from_stmt, 'response_times') self.assertIsNone(query.where_stmt) self.assertIsNone(query.limit_stmt) self.assertEqual(query.group_by_stmt, "time(30s)") self.assertEqual(query.resolution, 30) self.assertEqual(query.datapoints, (5 * 24 * 60 * 60) / 30) self.assertEqual(query.get_earliest_date(), TimeExpression.INFLUXDB_EPOCH) def test_merge(self): query = self.parser.parse('select count(type) from user_events merge admin_events group by time(10m)') self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "count(type)") self.assertEqual(query.from_stmt, 'user_events merge admin_events') self.assertIsNone(query.where_stmt) self.assertIsNone(query.limit_stmt) self.assertEqual(query.group_by_stmt, "time(10m)") self.assertEqual(query.resolution, 10 * 60) self.assertEqual(query.datapoints, (5 * 24 * 60 * 60) / (10 * 60)) self.assertEqual(query.get_earliest_date(), TimeExpression.INFLUXDB_EPOCH) def test_multiple_times(self): query = self.parser.parse("select * from 's' where time > now() - 2d and time < now() - 1d") self.assertEqual(query.get_type(), Keyword.SELECT) self.assertEqual(query.select_stmt, "*") self.assertEqual(query.from_stmt, "'s'") self.assertEqual(query.where_stmt, "time > now() - 2d and time < now() - 1d") self.assertIsNone(query.limit_stmt) self.assertIsNone(query.group_by_stmt) self.assertEqual(query.resolution, self.default_resolution) self.assertEqual(query.datapoints, (24 * 60 * 60) / 10) self.assertEqual(query.get_earliest_date(), datetime.now() - timedelta(days=2))