def _validate_eql(ecs_versions, indexes, query, name): # validate against all specified schemas or the latest if none specified parsed = eql.parse_query(query) beat_types = [index.split("-")[0] for index in indexes if "beat-*" in index] beat_schema = beats.get_schema_from_eql(parsed, beat_types) if beat_types else None ecs_versions = ecs_versions or [ecs_versions] schemas = [] for version in ecs_versions: try: schemas.append(ecs.get_kql_schema(indexes=indexes, beat_schema=beat_schema, version=version)) except KeyError: raise KeyError('Unknown ecs schema version: {} in rule {}.\n' 'Do you need to update schemas?'.format(version, name)) from None for schema in schemas: try: with ecs.KqlSchema2Eql(schema): eql.parse_query(query) except eql.EqlTypeMismatchError: raise except eql.EqlParseError as exc: message = exc.error_msg trailer = None if "Unknown field" in message and beat_types: trailer = "\nTry adding event.module and event.dataset to specify beats module" raise type(exc)(exc.error_msg, exc.line, exc.column, exc.source, len(exc.caret.lstrip()), trailer=trailer) from None
def assert_normalization_match(self, standard_query, converted, source="Microsoft Sysmon"): parsed_original = eql.parse_query(standard_query) parsed_converted = eql.parse_query(converted) normalizer = self.config.normalizers[source] converted = normalizer.normalize_ast(parsed_original) self.assertEqual(str(parsed_converted), str(converted))
def validate(self, data: 'QueryRuleData', meta: RuleMeta) -> None: """Validate an EQL query while checking TOMLRule.""" _ = self.ast if meta.query_schema_validation is False or meta.maturity == "deprecated": # syntax only, which is done via self.ast return indexes = data.index or [] beats_version = meta.beats_version or beats.get_max_version() ecs_versions = meta.ecs_versions or [ecs.get_max_version()] # TODO: remove once py-eql supports ipv6 for cidrmatch # Or, unregister the cidrMatch function and replace it with one that doesn't validate against strict IPv4 with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions: parsed = eql.parse_query(self.query) beat_types = [ index.split("-")[0] for index in indexes if "beat-*" in index ] beat_schema = beats.get_schema_from_eql( parsed, beat_types, version=beats_version) if beat_types else None for version in ecs_versions: schema = ecs.get_kql_schema(indexes=indexes, beat_schema=beat_schema, version=version) eql_schema = ecs.KqlSchema2Eql(schema) try: # TODO: switch to custom cidrmatch that allows ipv6 with eql_schema, eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions: eql.parse_query(self.query) except eql.EqlTypeMismatchError: raise except eql.EqlParseError as exc: message = exc.error_msg trailer = None if "Unknown field" in message and beat_types: trailer = "\nTry adding event.module or event.dataset to specify beats module" raise exc.__class__(exc.error_msg, exc.line, exc.column, exc.source, len(exc.caret.lstrip()), trailer=trailer) from None
def test_invalid_normalization(self): original = eql.parse_query( "registry where concat(registry_key) == 'blah'") with self.assertRaises(eql.EqlCompileError): normalizer = self.config.normalizers["Endgame Platform"] normalizer.normalize_ast(original)
def _execute_eql_query(self, events, query): """ Execute an EQL query on the provided events. :param events: events :param query: EQL query :return: the result of the query as a list of dictionaries or None when the query did not match the schema """ schema = eql.Schema.learn(events) query_result = [] # this function is used to store the result of the query to 'query_result' def store_result(result): for event in result.events: query_result.append(event.data) engine = eql.PythonEngine() with schema: try: eql_query = eql.parse_query(query, implied_any=True, implied_base=True) engine.add_query(eql_query) except eql.EqlError as e: print(e, file=sys.stderr) print('\nTake into account the following schema:') pprint(schema.schema) return None engine.add_output_hook(store_result) # execute the query engine.stream_events(events) return query_result
def _execute_eql_query(events, query): """ Execute an EQL query against the provided events :param events: events :param query: EQL query :return: the query results (i.e. filtered events) or None when the query did not match the schema """ # learn and load the schema schema = eql.Schema.learn(events) query_results = [] def callback(results): for event in results.events: query_results.append(event.data) # create the engine and parse the query engine = eql.PythonEngine() with schema: try: eql_query = eql.parse_query(query, implied_any=True, implied_base=True) engine.add_query(eql_query) except eql.EqlError as e: print(e, file=sys.stderr) print('\nTake into account the following schema:') pprint(schema.schema) # when using an EQL query that does not match the schema, return None. return None engine.add_output_hook(callback) # execute the query engine.stream_events(events) return query_results
def parsed_query(self): if self.query: if self.contents['language'] == 'kuery': return kql.parse(self.query) elif self.contents['language'] == 'eql': # TODO: remove once py-eql supports ipv6 for cidrmatch with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions: return eql.parse_query(self.query)
def from_eql(tree, optimize=True): if not isinstance(tree, eql.ast.EqlNode): try: tree = eql.parse_query(tree, implied_any=True) except eql.EqlSemanticError: tree = eql.parse_expression(tree) converted = Eql2Kql().walk(tree) return converted.optimize(recursive=True) if optimize else converted
def get_unique_query_fields(cls, rule_contents): """Get a list of unique fields used in a rule query from rule contents.""" query = rule_contents.get('query') language = rule_contents.get('language') if language in ('kuery', 'eql'): # TODO: remove once py-eql supports ipv6 for cidrmatch with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions: parsed = kql.parse(query) if language == 'kuery' else eql.parse_query(query) return sorted(set(str(f) for f in parsed if isinstance(f, (eql.ast.Field, kql.ast.Field))))
def get_unique_query_fields(cls, rule_contents): """Get a list of unique fields used in a rule query from rule contents.""" query = rule_contents.get('query') language = rule_contents.get('language') if language in ('kuery', 'eql'): parsed = kql.parse( query) if language == 'kuery' else eql.parse_query(query) return sorted( set( str(f) for f in parsed if isinstance(f, (eql.ast.Field, kql.ast.Field))))
def search_rules(query, columns, language, verbose=True): """Use KQL or EQL to find matching rules.""" from kql import get_evaluator from eql.table import Table from eql.build import get_engine from eql import parse_query from eql.pipes import CountPipe flattened_rules = [] for file_name, rule_doc in rule_loader.load_rule_files().items(): flat = {"file": os.path.relpath(file_name)} flat.update(rule_doc) flat.update(rule_doc["metadata"]) flat.update(rule_doc["rule"]) attacks = [ threat for threat in rule_doc["rule"].get("threat", []) if threat["framework"] == "MITRE ATT&CK" ] techniques = [ t["id"] for threat in attacks for t in threat.get("technique", []) ] tactics = [threat["tactic"]["name"] for threat in attacks] flat.update(techniques=techniques, tactics=tactics) flattened_rules.append(flat) flattened_rules.sort(key=lambda dct: dct["name"]) filtered = [] if language == "kql": evaluator = get_evaluator(query) if query else lambda x: True filtered = list(filter(evaluator, flattened_rules)) elif language == "eql": parsed = parse_query(query, implied_any=True, implied_base=True) evaluator = get_engine(parsed) filtered = [ result.events[0].data for result in evaluator(flattened_rules) ] if not columns and any( isinstance(pipe, CountPipe) for pipe in parsed.pipes): columns = ["key", "count", "percent"] if columns: columns = ",".join(columns).split(",") else: columns = ["rule_id", "file", "name"] table = Table.from_list(columns, filtered) if verbose: click.echo(table) return filtered
def validate(self, data: 'QueryRuleData', meta: RuleMeta) -> None: """Validate an EQL query while checking TOMLRule.""" ast = self.ast if meta.query_schema_validation is False or meta.maturity == "deprecated": # syntax only, which is done via self.ast return for stack_version, mapping in meta.get_validation_stack_versions( ).items(): beats_version = mapping['beats'] ecs_version = mapping['ecs'] err_trailer = f'stack: {stack_version}, beats: {beats_version}, ecs: {ecs_version}' beat_types = beats.parse_beats_from_index(data.index) beat_schema = beats.get_schema_from_kql( ast, beat_types, version=beats_version) if beat_types else None schema = ecs.get_kql_schema(version=ecs_version, indexes=data.index or [], beat_schema=beat_schema) eql_schema = ecs.KqlSchema2Eql(schema) try: # TODO: switch to custom cidrmatch that allows ipv6 with eql_schema, eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions: eql.parse_query(self.query) except eql.EqlParseError as exc: message = exc.error_msg trailer = err_trailer if "Unknown field" in message and beat_types: trailer = f"\nTry adding event.module or event.dataset to specify beats module\n\n{trailer}" raise exc.__class__(exc.error_msg, exc.line, exc.column, exc.source, len(exc.caret.lstrip()), trailer=trailer) from None except Exception: print(err_trailer) raise
def validate_query(self, beats_version: str, ecs_versions: List[str]): """Validate an EQL query while checking TOMLRule.""" # TODO: remove once py-eql supports ipv6 for cidrmatch # Or, unregister the cidrMatch function and replace it with one that doesn't validate against strict IPv4 with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions: parsed = eql.parse_query(self.query) beat_types = [ index.split("-")[0] for index in self.index or [] if "beat-*" in index ] beat_schema = beats.get_schema_from_eql( parsed, beat_types, version=beats_version) if beat_types else None for version in ecs_versions: schema = ecs.get_kql_schema(indexes=self.index or [], beat_schema=beat_schema, version=version) try: # TODO: switch to custom cidrmatch that allows ipv6 with ecs.KqlSchema2Eql( schema ), eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions: eql.parse_query(self.query) except eql.EqlTypeMismatchError: raise except eql.EqlParseError as exc: message = exc.error_msg trailer = None if "Unknown field" in message and beat_types: trailer = "\nTry adding event.module or event.dataset to specify beats module" raise exc.__class__(exc.error_msg, exc.line, exc.column, exc.source, len(exc.caret.lstrip()), trailer=trailer) from None
def add_analytic(self, analytic): if isinstance(analytic, dict) and list( analytic.keys()) == ['analytic']: analytic = analytic['analytic'] Analytic.validate(analytic) analytic['metadata']['_source'] = analytic['query'] analytic = eql.ast.EqlAnalytic(metadata=analytic['metadata'], query=eql.parse_query( analytic['query'])) self.analytic_lookup[analytic.id] = analytic self.analytics.append(analytic) for tactic in analytic.metadata.get('tactics', []): for technique in analytic.metadata.get('techniques', []): self.coverage[tactic][technique].append(analytic)
def add_analytic(self, analytic, path=None): # type: (dict, str) -> None if isinstance(analytic, dict) and list( analytic.keys()) == ['analytic']: analytic = analytic['analytic'] Analytic.validate(analytic) analytic['metadata']['_source'] = '\n'.join( l.rstrip() for l in analytic['query'].strip().splitlines()) if path: analytic['metadata']['_path'] = path analytic = eql.ast.EqlAnalytic(metadata=analytic['metadata'], query=eql.parse_query( analytic['query'])) self.analytic_lookup[analytic.id] = analytic self.analytics.append(analytic) for tactic in analytic.metadata.get('tactics', []): for technique in analytic.metadata.get('techniques', []): self.coverage[tactic][technique].append(analytic)
def parsed_query(self) -> kql.ast.Expression: with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions: return eql.parse_query(self.query)
def parse(text): try: return parse_query(text, implied_base=True, implied_any=True) except EqlError as exc: print(exc, file=sys.stderr) sys.exit(2)
def assert_normalization_match(self, standard_query, sysmon_query): parsed_original = eql.parse_query(standard_query) parsed_sysmon = eql.parse_query(sysmon_query) converted = self.sysmon_normalizer.normalize_ast(parsed_original) self.assertEqual(parsed_sysmon, converted)
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from eql import parse_query, ParseError import argparse, sys if __name__ == '__main__': parser = argparse.ArgumentParser( description='check an eql query for valid syntax against the eql parser' ) parser.add_argument('--query', '-q', help='the eql query to check for valid syntax') args = parser.parse_args() if not args.query: print("Must specify the --query or -q parameter") sys.exit(-1) eql_query = args.query try: _ = parse_query(eql_query) except ParseError as e: print("eql_error: {0}".format(e.message)) sys.exit(-1) sys.exit(0)
def search_rules(query, columns, language, count, verbose=True, rules: Dict[str, dict] = None, pager=False): """Use KQL or EQL to find matching rules.""" from kql import get_evaluator from eql.table import Table from eql.build import get_engine from eql import parse_query from eql.pipes import CountPipe flattened_rules = [] rules = rules or rule_loader.load_rule_files(verbose=verbose) for file_name, rule_doc in rules.items(): flat = {"file": os.path.relpath(file_name)} flat.update(rule_doc) flat.update(rule_doc["metadata"]) flat.update(rule_doc["rule"]) tactic_names = [] technique_ids = [] subtechnique_ids = [] for entry in rule_doc['rule'].get('threat', []): if entry["framework"] != "MITRE ATT&CK": continue techniques = entry.get('technique', []) tactic_names.append(entry['tactic']['name']) technique_ids.extend([t['id'] for t in techniques]) subtechnique_ids.extend([ st['id'] for t in techniques for st in t.get('subtechnique', []) ]) flat.update(techniques=technique_ids, tactics=tactic_names, subtechniques=subtechnique_ids, unique_fields=Rule.get_unique_query_fields( rule_doc['rule'])) flattened_rules.append(flat) flattened_rules.sort(key=lambda dct: dct["name"]) filtered = [] if language == "kql": evaluator = get_evaluator(query) if query else lambda x: True filtered = list(filter(evaluator, flattened_rules)) elif language == "eql": parsed = parse_query(query, implied_any=True, implied_base=True) evaluator = get_engine(parsed) filtered = [ result.events[0].data for result in evaluator(flattened_rules) ] if not columns and any( isinstance(pipe, CountPipe) for pipe in parsed.pipes): columns = ["key", "count", "percent"] if count: click.echo(f'{len(filtered)} rules') return filtered if columns: columns = ",".join(columns).split(",") else: columns = ["rule_id", "file", "name"] table = Table.from_list(columns, filtered) if verbose: click.echo_via_pager(table) if pager else click.echo(table) return filtered
def parsed_query(self): if self.query: if self.contents['language'] == 'kuery': return kql.parse(self.query) elif self.contents['language'] == 'eql': return eql.parse_query(self.query)