def test_valid_expressions(self): """Test that expressions are parsed correctly.""" valid = [ "1 == 1", "false != (1 == 1)", 'abc != "ghi"', "abc > 20", "startsWith(abc, 'abc')", "concat(a,b,c,d,)", "a in (1,2,3,4,)", "length(abc) < length(hij)", "length(concat(abc))", 'abc == substring("abc", 1, 3)', "1", '(1)', "true", "false", "null", "not null", "abc", '"string"', 'abc and def', '(1==abc) and def', '1 * 2 + 3 * 4 + 10 / 2', 'abc == (1 and 2)', 'abc == (def and 2)', 'abc == (def and def)', 'abc == (def and ghi)', '"\\b\\t\\r\\n\\f\\\\\\"\\\'"', ] for query in valid: parse_expression(query)
def test_type_mismatch_comparisons(self): """Check that improperly compared types raise mismatch errors.""" comparables = { (float, int), (int, float), (bool, bool), (str, str), } get_values = [ utils.random_bool, utils.random_int, utils.random_float, utils.random_string ] for lhs_getter, rhs_getter in itertools.product(get_values, repeat=2): lv = lhs_getter() rv = rhs_getter() # skip over types that we know will match if type(lv) == type(rv) or (type(lv), type(rv)) in comparables: continue left = utils.unfold(lv) right = utils.unfold(rv) for comparison in ["==", "!=", "<", "<=", ">=", ">"]: with self.assertRaises(EqlTypeMismatchError): parse_expression("{left} {comp} {right}".format( left=left, comp=comparison, right=right))
def test_query_events(self): """Test that event queries work with events[n].* syntax in pipes.""" base_queries = [ 'abc', 'abc[123]', 'abc.def.ghi', 'abc.def[123].ghi[456]' ] for text in base_queries: field_query = parse_expression(text) # type: Field events_query = parse_expression('events[0].' + text) # type: Field index, query = field_query.query_multiple_events() self.assertEqual(index, 0, "Didn't query from first event") self.assertEqual(query, field_query, "Didn't unconvert query") index, query = events_query.query_multiple_events() self.assertEqual(index, 0, "Didn't query from first event") self.assertEqual(query, field_query, "Didn't unconvert query") for event_index, text in enumerate(base_queries): events_text = 'events[{}].{}'.format(event_index, text) field_query = parse_expression(text) # type: Field events_query = parse_expression(events_text) # type: Field index, query = events_query.query_multiple_events() self.assertEqual(index, event_index, "Didn't query from {} event".format(event_index)) self.assertEqual(query, field_query, "Didn't unconvert query")
def test_invalid_expressions(self): """Test that expressions are parsed correctly.""" invalid = [ 'a xor b', # made up comparator 'def[ghi]', # index not a number 'def[-1]', # negative indexes not supported 'someFunc().abc', # can't index these '1.2.3', # invalid number 'a.1', '()', # nothing inside '', '"invalid"string"', '--100', '1000 100', '"" 100', # literal values as fields 'true.100', 'null.abc', 'abc[0].null', # require escape slashes, '\\R', '\\W', ] keywords = [ 'and', 'by', 'in', 'join', 'macro', 'not', 'of', 'or', 'sequence', 'until', 'where', 'with' ] for query in invalid: self.assertRaises(ParseError, parse_expression, query) for keyword in keywords: self.assertRaises(ParseError, parse_expression, keyword) parse_expression(keyword.upper())
def test_walker(self): """Check that walker transformation works properly.""" walker = RecursiveWalker() node = parse_expression("process_name == 'net.exe' or file_name == 'abc.txt'") def assert_deep_copy(a, b): """Check that deep copies are created.""" self.assertEqual(a, b) self.assertIsNot(a, b) for deep_a, deep_b in zip(a, b): self.assertEqual(deep_a, deep_b) self.assertIsNot(deep_a, deep_b) assert_deep_copy(node, walker.copy_node(node)) class SimpleWalker(RecursiveWalker): def _walk_comparison(self, node): if node.left == Field('file_name'): return self.walk(parse_expression('user_name == "TEMP_USER"')) return self._walk_base_node(node) def _walk_string(self, node): if node == String("TEMP_USER"): return String("artemis") return node walker = SimpleWalker() expected = parse_expression('process_name == "net.exe" or user_name == "artemis"') self.assertEqual(walker.walk(node), expected)
def test_unoptimized(self): """Test that optimization can be turned off.""" with skip_optimizations: self.assertEqual(parse_expression("1 + 2"), MathOperation(Number(1), "+", Number(2))) self.assertEqual(parse_expression("1 + 2"), MathOperation(Number(1), "+", Number(2)))
def test_valid_expressions(self): """Test that expressions are parsed correctly.""" valid = [ "1 == 1", "1 == (1 == 1)", 'abc != "ghi"', "abc > 20", "f()", "somef(a,b,c,d,)", "a in (1,2,3,4,)", "f(abc) < g(hij)", "f(f(f(f(abc))))", 'abc == f()', 'f() and g()', "1", '(1)', "true", "false", "null", "not null", "abc", '"string"', 'abc and def', '(1==abc) and def', 'abc == (1 and 2)', 'abc == (def and 2)', 'abc == (def and def)', 'abc == (def and ghi)', '"\\b\\t\\r\\n\\f\\\\\\"\\\'"', ] for query in valid: parse_expression(query)
def test_mixed_definitions(self): """Test that macro and constant definitions can be loaded correctly.""" defn = parse_definitions(""" const magic = 100 macro OR(a, b) a or b """) pp = PreProcessor(defn) # Confirm that copy and adding is working pp2 = pp.copy() pp.add_definition( parse_definition("macro ABC(a, b, c) error_error_error")) pp2.add_definition( parse_definition("macro ABC(a, b, c) f(a, magic, c)")) matches = [ ("abc", "abc"), ("OR(x, y)", "x or y"), ("magic", "100"), ("ABC(0,1,2)", "f(0, 100, 2)"), ] for before, after in matches: before = parse_expression(before) after = parse_expression(after) self.assertEqual(pp2.expand(before), after)
def test_comments(self): """Test that comments are valid syntax but stripped from AST.""" match = parse_query("process where pid=4 and ppid=0") query = parse_query( """process where pid = 4 /* multi\nline\ncomment */ and ppid=0""") self.assertEqual(match, query) query = parse_query( """process where pid = 4 // something \n and ppid=0""") self.assertEqual(match, query) query = parse_query("""process where pid = 4 and ppid=0 """) self.assertEqual(match, query) query = parse_query("""process where // test // //line //comments pid = 4 and ppid = 0 """) self.assertEqual(match, query) match = parse_expression("true") query = parse_expression( "true // something else \r\n /* test\r\n something \r\n*/") self.assertEqual(match, query) commented = parse_definitions( "macro test() pid = 4 and /* comment */ ppid = 0") macro = parse_definitions("macro test() pid = 4 and ppid = 0") self.assertEqual(commented, macro)
def parse_to(text, path): node = parse_expression(text) self.assertIsInstance(node, Field) self.assertEqual(node.full_path, path) # now render back as text and parse again node2 = parse_expression(node.render()) self.assertEqual(node2, node)
def test_set_static_optimizations(self): """Check that checks for static fields in sets return optimized ASTs.""" expression = '"something" in ("str", "str2", "str3", "str4", someField)' optimized = '"something" == someField' self.assertEqual(parse_expression(expression), parse_expression(optimized)) expression = '"something" in ("str", "str2", "str3", "str4", field1, field2)' optimized = '"something" in (field1, field2)' self.assertEqual(parse_expression(expression), parse_expression(optimized))
def test_parse_implied_booleans(self): """Test that parsing with implicit boolean casting works as expected.""" with implied_booleans: for num_bools in range(2, 10): values = [ utils.unfold(utils.random_value()) for _ in range(num_bools) ] parse_expression(" and ".join(values)) parse_expression(" or ".join(values))
def test_invalid_expressions(self): """Test that expressions are parsed correctly.""" invalid = [ '', # empty 'a xor b', # made up comparator 'a ^ b', # made up comparator 'a % "b"', # made up comparator 'a b c d', # missing syntax 'def[]', # no index 'def[ghi]', # index not a number 'def[-1]', # negative indexes not supported 'someFunc().abc', # invalid function 'length().abc', # can't index these '1.2.3', # invalid number 'a.1', '(field', # unclosed paren '(field xx', # unclosed paren and bad syntax 'field[', # unclosed bracket 'field[0', # unclosed bracket '(', ')', '()', # nothing inside '', '"invalid"string"', 'descendant of [event_type where true', '--100', '1000 100', '"" 100', # literal values as fields and functions 'true.100', 'true()', 'null.abc', 'abc[0].null', # require escape slashes, '\\R', '\\W', # minimum of 1 argument 'length()', 'concat()', ] keywords = [ 'and', 'by', 'in', 'join', 'macro', 'not', 'of', 'or', 'sequence', 'until', 'where', 'with' ] for query in invalid: self.assertRaises(EqlParseError, parse_expression, query) for keyword in keywords: self.assertRaises(EqlSyntaxError, parse_expression, keyword) parse_expression(keyword.upper())
def test_compound_merging_sets(self): """Test that compound boolean terms are merged correctly.""" mixed_sets = parse_expression( 'opcode=1 and name in ("a", "b", "c", "d") and name in ("b", "d")') optimized = parse_expression('opcode=1 and name in ("b", "d")') self.assertEqual(mixed_sets, optimized, "failed to merge at tail of AND") mixed_sets = parse_expression( 'opcode=1 and name in ("a", "b", "c", "d") and name in ("b", "d") and x=1' ) optimized = parse_expression('opcode=1 and name in ("b", "d") and x=1') self.assertEqual(mixed_sets, optimized, "failed to merge at middle of AND") mixed_sets = parse_expression( 'opcode=1 or name in ("a", "b", "c", "d") or name in ("e", "f")') optimized = parse_expression( 'opcode=1 or name in ("a", "b", "c", "d", "e", "f")') self.assertEqual(mixed_sets, optimized, "failed to merge at tail of OR") mixed_sets = parse_expression( 'opcode=1 or name in ("a", "b", "c", "d") or name in ("e", "f") or x=1' ) optimized = parse_expression( 'opcode=1 or name in ("a", "b", "c", "d", "e", "f") or x=1') self.assertEqual(mixed_sets, optimized, "failed to merge at middle of OR")
def test_wildcard_or(self): """Test that wildcard calls over the same field are combined when adjacent.""" wildcard_or = parse_expression('name == "foo*" or name == "*bar"') optimized = parse_expression('wildcard(name, "foo*", "*bar")') self.assertEqual( wildcard_or, optimized, "Failed to combine OR with matching adjacent wildcard() calls") wildcard_or = parse_expression( 'match(name, "fo[o]") or match(name, "ba[r]?")') optimized = parse_expression('match(name, "fo[o]", "ba[r]?")') self.assertEqual( wildcard_or, optimized, "Failed to combine OR with matching adjacent match() calls") # this isn't necessary as a test, but is worth keeping so the behavior is well defined wildcard_or = parse_expression( 'name == "foo*" or other_field == "bar" or name == "*baz"') optimized = parse_expression( 'wildcard(name, "foo*") or other_field == "bar" or wildcard(name, "*baz")' ) self.assertEqual( wildcard_or, optimized, "Combined nonadjacent matching adjacent wildcard() calls") wildcard_or = parse_expression('name == "foo*" or title == "*bar"') optimized = parse_expression( 'wildcard(name, "foo*") or wildcard(title, "*bar")') self.assertEqual( wildcard_or, optimized, "Shouldn't have combined wildcards of different fields")
def test_functions(self): """Test that functions are being parsed correctly.""" # Make sure that functions are parsing all arguments with ignore_missing_functions: fn = parse_expression('somefunction( a and c, false, d or g) ') self.assertIsInstance(fn, FunctionCall) self.assertEqual(len(fn.arguments), 3)
def assert_kv_match(condition_dict, condition_text, *args): """Helper function for validation.""" condition_node = match_kv(condition_dict) parsed_node = parse_expression(condition_text) print(condition_node) print(parsed_node) self.assertEquals(condition_node.render(), parsed_node.render(), *args)
def test_parse_type_matches(self): """Check that improperly compared types are raising errors.""" expected_type_match = [ '1 or 2', 'abc == null or def == null', "false or 1", "1 or 'abcdefg'", "false or 'string-false'", "port == 80 or command_line == 'defghi'", "(port != null or command_line != null)", "(process_path or process_name) == '*net.exe'", "'hello' < 'hELLO'", "1 < 2", "(data and data.alert_details and data.alert_details.process_path) == 'net.exe'", ] for expression in expected_type_match: parse_expression(expression)
def fold(expr): """Test method for parsing and folding.""" if is_string(expr): expr = parse_expression(expr) return expr.fold() elif isinstance(expr, Expression): return expr.fold() else: raise TypeError("Unable to fold {}".format(expr))
def __init__(self, config): """Create the normalizer.""" self.config = config self.strict = config['strict'] self.domain = config['domain'] self.name = config['name'] self.time_field = config['timestamp']['field'] self.time_format = config['timestamp']['format'] # Parse out the EQL field mapping self.field_mapping = { field: parse_expression(eql_text) for field, eql_text in self.config['fields']['mapping'].items() } # Parse out the EQL event types self.event_filters = OrderedDict() self.event_enums = OrderedDict() self.event_field_mapping = OrderedDict() for event_name, event_config in self.config['events'].items(): self.event_filters[event_name] = parse_expression( event_config['filter']) self.event_enums[event_name] = OrderedDict() self.event_field_mapping[event_name] = OrderedDict() # Create a lookup for all of the event fields for field_name, mapped_expression in event_config.get( 'mapping', {}).items(): self.event_field_mapping[event_name][ field_name] = parse_expression(mapped_expression) # Now loop over all of the enums, and build a mapping for EQL for field_name, enum_mapping in event_config.get('enum', {}).items(): self.event_enums[event_name][field_name] = OrderedDict() for enum_option, enum_expr in enum_mapping.items(): self.event_enums[event_name][field_name][ enum_option] = parse_expression(enum_expr) self._current_event_type = None self.data_normalizer = self.get_data_normalizer() self.normalize_ast = QueryNormalizer(self).convert
def test_functions(self): """Test that functions are being parsed correctly.""" # Make sure that functions are parsing all arguments fn = parse_expression('somefunction(' ' a and c,' ' false,' ' d or g' ')') self.assertIsInstance(fn, FunctionCall) self.assertEqual(len(fn.arguments), 3)
def test_method_syntax(self): """Test correct parsing and rendering of methods.""" parse1 = parse_expression("(a and b):concat():length()") parse2 = parse_expression("a and b:concat():length()") self.assertNotEquals(parse1, parse2) class Unmethodize(DepthFirstWalker): """Strip out the method metadata, so its rendered directly as a node.""" def _walk_function_call(self, node): node.as_method = False return node without_method = Unmethodize().walk(parse1) expected = parse_expression("length(concat(a and b))") self.assertEquals(parse1, parse_expression("(a and b):concat():length()")) self.assertIsNot(parse1, without_method) self.assertEquals(without_method, expected)
def test_literals(self): """Test that literals are parsed correctly.""" eql_literals = [ ('true', True, Boolean), ('false', False, Boolean), ('100', 100, Number), ('1.5', 1.5, Number), ('.6', .6, Number), ('-100', -100, Number), ('-15.24', -15.24, Number), ('"100"', "100", String), ('null', None, Null), ] for text, expected_value, expected_type in eql_literals: node = parse_expression(text) rendered = node.render() re_parsed = parse_expression(rendered) self.assertIsInstance(node, expected_type) self.assertEqual(node.value, expected_value) self.assertEqual(node, re_parsed)
def get_scoper(self): """Get a nested object for an EQL field.""" scope = self.config['fields'].get('scope') if scope is None: return field = parse_expression(scope) # type: Field keys = [field.base] + field.path def walk_path(value): for key in keys: if value is None: break elif isinstance(value, dict): value = value.get(key) elif key < len(value): value = value[key] else: value = None return value or {} return walk_path
def _walk_comparison(self, node): if node.left == Field('file_name'): return self.walk(parse_expression('user_name == "TEMP_USER"')) return self._walk_base_node(node)
def test_set_optimizations(self): """Test that set unions, intersections, etc. are correct.""" duplicate_values = parse_expression( 'fieldname in ("a", "b", "C", "d", 1, "d", "D", "c")') no_duplicates = parse_expression( 'fieldname in ("a", "b", "C", "d", 1)') self.assertEqual(duplicate_values, no_duplicates, "duplicate values were not removed") two_sets = parse_expression( 'fieldname in ("a", "b", "C", "x") and fieldname in ("d", "c", "g", "X")' ) intersection = parse_expression('fieldname in ("C", "x")') self.assertEqual(two_sets, intersection, "intersection test failed") two_sets = parse_expression( '(fieldname in ("a", "b", "C", "x")) and fieldname in ("d", "f", "g", 123)' ) self.assertEqual(two_sets, Boolean(False), "empty intersection test failed") two_sets = parse_expression( 'fieldname in ("a", "b", "C", "x") or fieldname in ("d", "c", "g", "X")' ) union = parse_expression('fieldname in ("a", "b", "C", "x", "d", "g")') self.assertEqual(two_sets, union, "union test failed") literal_check = parse_expression('"ABC" in ("a", "ABC", "C")') self.assertEqual(literal_check, Boolean(True), "literal comparison failed") literal_check = parse_expression('"def" in ("a", "ABC", "C")') self.assertEqual(literal_check, Boolean(False), "literal comparison failed") dynamic_values = parse_expression( '"abc" in ("a", "b", fieldA, "C", "d", fieldB, fieldC)') no_duplicates = parse_expression('"abc" in (fieldA, fieldB, fieldC)') self.assertEqual(dynamic_values, no_duplicates, "literal values were not removed") dynamic_values = parse_expression( 'fieldA in ("a", "b", "C", "d", fieldA, fieldB, fieldC)') self.assertEqual(dynamic_values, Boolean(True), "dynamic set lookup not optimized") and_not = parse_expression( 'NAME in ("a", "b", "c", "d") and not NAME in ("b", "d")') subtracted = parse_expression('NAME in ("a", "c")') self.assertEqual(and_not, subtracted, "set subtraction failed")
def test_static_value_optimizations(self): """Test parser optimizations for comparing static values.""" expected_true = [ '10 == 10', '10 == 10.0', '"abc" == "abc"', 'true == true', 'true != false', '"" == ""', '"" == "*"', '"aaaaa" == "*"', '"abc" == "*abc*"', '"abc" == "*ABC*"', '"ABC" == "*abc*"', '"abc" != "d*"', '"net view" == "net* view*"', '"net view" == "net* view"', '"net view view" == "net* view"', '"net view " == "net* VIEW*"', '"Net!!! VIEW view net view" == "net* view*"', 'not "Net!!! VIEW view net view" != "net* view*"', '"Newww!!! VIEW view net view" != "net* view*"', '1 < 2', '1 <= 2', '2 <= 2', '1 <= 1.0', '1.0 <= 1', '2 > 1', '2 >= 1', '2 >= 2', '2 != 1', '(1 * 2 + 3 * 4 + 10 / 2) == (2 + 12 + 5)', '(1 * 2 + 3 * 4 + 10 / 2) == 19', '1 * 2 + 3 * 4 + 10 / 2 == 2 + 12 + 5', '"ABC" <= "ABC"', "length('abcdefg') == 7", "100 in (1, 2, 3, 4, 100, 105)", "'rundll' in (abc.def[100], 'RUNDLL')", "not 'rundll' in ('100', 'nothing')", '1 - -2 == 3', '1 - +2 == -1', '1 +- length(a) == 1 - length(a)', '100:concat():length() == 3', '995 == (100 * 10):subtract("hello":length())', ] expected_false = [ '"b" == "a"', '1 == 2', '1 > 2', '5 <= -3', '"ABC" = "abcd"', '"ABC*DEF" == " ABC DEF "', '"abc" > "def"', '"abc" != "abc"', # check that these aren't left to right '1 * 2 + 3 * 4 + 10 / 2 == 15', ] for expression in expected_true: ast = parse_expression(expression) self.assertIsInstance(ast, Boolean, 'Failed to optimize {}'.format(expression)) self.assertTrue( ast.value, 'Parser did not evaluate {} as true'.format(expression)) for expression in expected_false: ast = parse_expression(expression) self.assertIsInstance(ast, Boolean, 'Failed to optimize {}'.format(expression)) self.assertFalse( ast.value, 'Parser did not evaluate {} as false'.format(expression))
def test_set_comparison_optimizations(self): """Test that sets and comparisons are merged.""" set_or_comp = parse_expression('name in ("a", "b") or name == "c"') optimized = parse_expression('name in ("a", "b", "c")') self.assertEqual(set_or_comp, optimized, "Failed to OR a set with matching comparison") set_and_comp = parse_expression('name in ("a", "b") and name == "c"') optimized = parse_expression('false') self.assertEqual( set_and_comp, optimized, "Failed to AND a set with matching missing comparison") set_and_comp = parse_expression('name in ("a", "b") and name == "b"') optimized = parse_expression('name == "b"') self.assertEqual(set_and_comp, optimized, "Failed to AND a set with matching comparison") # switch the order comp_or_set = parse_expression('name == "c" or name in ("a", "b")') optimized = parse_expression('name in ("c", "a", "b")') self.assertEqual(comp_or_set, optimized, "Failed to OR a comparison with a matching set") comp_and_set = parse_expression('name == "c" and name in ("a", "b")') optimized = parse_expression('false') self.assertEqual( comp_and_set, optimized, "Failed to AND a comparison with a matching missing set") comp_and_set = parse_expression('name == "b" and name in ("a", "b")') optimized = parse_expression('name == "b"') self.assertEqual(comp_and_set, optimized, "Failed to AND a comparisong with a matching set") # test that values can be subtracted individually from sets set_and_not = parse_expression( 'name in ("a", "b", "c") and name != "c"') optimized = parse_expression('name in ("a", "b")') self.assertEqual(set_and_not, optimized, "Failed to subtract specific value from set")
def test_comparisons_to_sets(self): """Test that multiple comparisons become sets.""" multi_compare = parse_expression('pid == 4 or pid == 8 or pid == 520') optimized = parse_expression("pid in (4, 8, 520)") self.assertEqual(multi_compare, optimized, "Failed to merge comparisons into a set")
def test_backtick_split_lines(self): """Confirm that backticks can't be split across lines.""" with self.assertRaises(EqlSyntaxError): parse_expression("`abc \n def`")