Example #1
0
    def _validate_eql(ecs_versions, indexes, query, name):
        # validate against all specified schemas or the latest if none specified
        parsed = eql.parse_query(query)
        beat_types = [index.split("-")[0] for index in indexes if "beat-*" in index]
        beat_schema = beats.get_schema_from_eql(parsed, beat_types) if beat_types else None

        ecs_versions = ecs_versions or [ecs_versions]
        schemas = []

        for version in ecs_versions:
            try:
                schemas.append(ecs.get_kql_schema(indexes=indexes, beat_schema=beat_schema, version=version))
            except KeyError:
                raise KeyError('Unknown ecs schema version: {} in rule {}.\n'
                               'Do you need to update schemas?'.format(version, name)) from None

        for schema in schemas:
            try:
                with ecs.KqlSchema2Eql(schema):
                    eql.parse_query(query)

            except eql.EqlTypeMismatchError:
                raise

            except eql.EqlParseError as exc:
                message = exc.error_msg
                trailer = None
                if "Unknown field" in message and beat_types:
                    trailer = "\nTry adding event.module and event.dataset to specify beats module"

                raise type(exc)(exc.error_msg, exc.line, exc.column, exc.source,
                                len(exc.caret.lstrip()), trailer=trailer) from None
Example #2
0
    def assert_normalization_match(self,
                                   standard_query,
                                   converted,
                                   source="Microsoft Sysmon"):
        parsed_original = eql.parse_query(standard_query)
        parsed_converted = eql.parse_query(converted)

        normalizer = self.config.normalizers[source]
        converted = normalizer.normalize_ast(parsed_original)
        self.assertEqual(str(parsed_converted), str(converted))
Example #3
0
    def validate(self, data: 'QueryRuleData', meta: RuleMeta) -> None:
        """Validate an EQL query while checking TOMLRule."""
        _ = self.ast

        if meta.query_schema_validation is False or meta.maturity == "deprecated":
            # syntax only, which is done via self.ast
            return

        indexes = data.index or []
        beats_version = meta.beats_version or beats.get_max_version()
        ecs_versions = meta.ecs_versions or [ecs.get_max_version()]

        # TODO: remove once py-eql supports ipv6 for cidrmatch
        # Or, unregister the cidrMatch function and replace it with one that doesn't validate against strict IPv4
        with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
            parsed = eql.parse_query(self.query)

        beat_types = [
            index.split("-")[0] for index in indexes if "beat-*" in index
        ]
        beat_schema = beats.get_schema_from_eql(
            parsed, beat_types, version=beats_version) if beat_types else None

        for version in ecs_versions:
            schema = ecs.get_kql_schema(indexes=indexes,
                                        beat_schema=beat_schema,
                                        version=version)
            eql_schema = ecs.KqlSchema2Eql(schema)

            try:
                # TODO: switch to custom cidrmatch that allows ipv6
                with eql_schema, eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
                    eql.parse_query(self.query)

            except eql.EqlTypeMismatchError:
                raise

            except eql.EqlParseError as exc:
                message = exc.error_msg
                trailer = None
                if "Unknown field" in message and beat_types:
                    trailer = "\nTry adding event.module or event.dataset to specify beats module"

                raise exc.__class__(exc.error_msg,
                                    exc.line,
                                    exc.column,
                                    exc.source,
                                    len(exc.caret.lstrip()),
                                    trailer=trailer) from None
Example #4
0
    def test_invalid_normalization(self):
        original = eql.parse_query(
            "registry where concat(registry_key) == 'blah'")

        with self.assertRaises(eql.EqlCompileError):
            normalizer = self.config.normalizers["Endgame Platform"]
            normalizer.normalize_ast(original)
Example #5
0
    def _execute_eql_query(self, events, query):
        """
        Execute an EQL query on the provided events.
        :param events: events
        :param query: EQL query
        :return: the result of the query as a list of dictionaries or None when the query did not match the schema
        """
        schema = eql.Schema.learn(events)

        query_result = []

        # this function is used to store the result of the query to 'query_result'
        def store_result(result):
            for event in result.events:
                query_result.append(event.data)

        engine = eql.PythonEngine()
        with schema:
            try:
                eql_query = eql.parse_query(query, implied_any=True, implied_base=True)
                engine.add_query(eql_query)
            except eql.EqlError as e:
                print(e, file=sys.stderr)
                print('\nTake into account the following schema:')
                pprint(schema.schema)
                return None
            engine.add_output_hook(store_result)

        # execute the query
        engine.stream_events(events)

        return query_result
Example #6
0
def _execute_eql_query(events, query):
    """
    Execute an EQL query against the provided events
    :param events: events
    :param query: EQL query
    :return: the query results (i.e. filtered events) or None when the query did not match the schema
    """
    # learn and load the schema
    schema = eql.Schema.learn(events)

    query_results = []

    def callback(results):
        for event in results.events:
            query_results.append(event.data)

    # create the engine and parse the query
    engine = eql.PythonEngine()
    with schema:
        try:
            eql_query = eql.parse_query(query, implied_any=True, implied_base=True)
            engine.add_query(eql_query)
        except eql.EqlError as e:
            print(e, file=sys.stderr)
            print('\nTake into account the following schema:')
            pprint(schema.schema)
            # when using an EQL query that does not match the schema, return None.
            return None
    engine.add_output_hook(callback)

    # execute the query
    engine.stream_events(events)

    return query_results
Example #7
0
 def parsed_query(self):
     if self.query:
         if self.contents['language'] == 'kuery':
             return kql.parse(self.query)
         elif self.contents['language'] == 'eql':
             # TODO: remove once py-eql supports ipv6 for cidrmatch
             with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
                 return eql.parse_query(self.query)
Example #8
0
def from_eql(tree, optimize=True):
    if not isinstance(tree, eql.ast.EqlNode):
        try:
            tree = eql.parse_query(tree, implied_any=True)
        except eql.EqlSemanticError:
            tree = eql.parse_expression(tree)

    converted = Eql2Kql().walk(tree)
    return converted.optimize(recursive=True) if optimize else converted
Example #9
0
    def get_unique_query_fields(cls, rule_contents):
        """Get a list of unique fields used in a rule query from rule contents."""
        query = rule_contents.get('query')
        language = rule_contents.get('language')
        if language in ('kuery', 'eql'):
            # TODO: remove once py-eql supports ipv6 for cidrmatch
            with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
                parsed = kql.parse(query) if language == 'kuery' else eql.parse_query(query)

            return sorted(set(str(f) for f in parsed if isinstance(f, (eql.ast.Field, kql.ast.Field))))
Example #10
0
 def get_unique_query_fields(cls, rule_contents):
     """Get a list of unique fields used in a rule query from rule contents."""
     query = rule_contents.get('query')
     language = rule_contents.get('language')
     if language in ('kuery', 'eql'):
         parsed = kql.parse(
             query) if language == 'kuery' else eql.parse_query(query)
         return sorted(
             set(
                 str(f) for f in parsed
                 if isinstance(f, (eql.ast.Field, kql.ast.Field))))
Example #11
0
def search_rules(query, columns, language, verbose=True):
    """Use KQL or EQL to find matching rules."""
    from kql import get_evaluator
    from eql.table import Table
    from eql.build import get_engine
    from eql import parse_query
    from eql.pipes import CountPipe

    flattened_rules = []

    for file_name, rule_doc in rule_loader.load_rule_files().items():
        flat = {"file": os.path.relpath(file_name)}
        flat.update(rule_doc)
        flat.update(rule_doc["metadata"])
        flat.update(rule_doc["rule"])
        attacks = [
            threat for threat in rule_doc["rule"].get("threat", [])
            if threat["framework"] == "MITRE ATT&CK"
        ]
        techniques = [
            t["id"] for threat in attacks for t in threat.get("technique", [])
        ]
        tactics = [threat["tactic"]["name"] for threat in attacks]
        flat.update(techniques=techniques, tactics=tactics)
        flattened_rules.append(flat)

    flattened_rules.sort(key=lambda dct: dct["name"])

    filtered = []
    if language == "kql":
        evaluator = get_evaluator(query) if query else lambda x: True
        filtered = list(filter(evaluator, flattened_rules))
    elif language == "eql":
        parsed = parse_query(query, implied_any=True, implied_base=True)
        evaluator = get_engine(parsed)
        filtered = [
            result.events[0].data for result in evaluator(flattened_rules)
        ]

        if not columns and any(
                isinstance(pipe, CountPipe) for pipe in parsed.pipes):
            columns = ["key", "count", "percent"]

    if columns:
        columns = ",".join(columns).split(",")
    else:
        columns = ["rule_id", "file", "name"]

    table = Table.from_list(columns, filtered)

    if verbose:
        click.echo(table)

    return filtered
Example #12
0
    def validate(self, data: 'QueryRuleData', meta: RuleMeta) -> None:
        """Validate an EQL query while checking TOMLRule."""
        ast = self.ast

        if meta.query_schema_validation is False or meta.maturity == "deprecated":
            # syntax only, which is done via self.ast
            return

        for stack_version, mapping in meta.get_validation_stack_versions(
        ).items():
            beats_version = mapping['beats']
            ecs_version = mapping['ecs']
            err_trailer = f'stack: {stack_version}, beats: {beats_version}, ecs: {ecs_version}'

            beat_types = beats.parse_beats_from_index(data.index)
            beat_schema = beats.get_schema_from_kql(
                ast, beat_types, version=beats_version) if beat_types else None
            schema = ecs.get_kql_schema(version=ecs_version,
                                        indexes=data.index or [],
                                        beat_schema=beat_schema)
            eql_schema = ecs.KqlSchema2Eql(schema)

            try:
                # TODO: switch to custom cidrmatch that allows ipv6
                with eql_schema, eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
                    eql.parse_query(self.query)
            except eql.EqlParseError as exc:
                message = exc.error_msg
                trailer = err_trailer
                if "Unknown field" in message and beat_types:
                    trailer = f"\nTry adding event.module or event.dataset to specify beats module\n\n{trailer}"

                raise exc.__class__(exc.error_msg,
                                    exc.line,
                                    exc.column,
                                    exc.source,
                                    len(exc.caret.lstrip()),
                                    trailer=trailer) from None
            except Exception:
                print(err_trailer)
                raise
Example #13
0
    def validate_query(self, beats_version: str, ecs_versions: List[str]):
        """Validate an EQL query while checking TOMLRule."""
        # TODO: remove once py-eql supports ipv6 for cidrmatch
        # Or, unregister the cidrMatch function and replace it with one that doesn't validate against strict IPv4
        with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
            parsed = eql.parse_query(self.query)

        beat_types = [
            index.split("-")[0] for index in self.index or []
            if "beat-*" in index
        ]
        beat_schema = beats.get_schema_from_eql(
            parsed, beat_types, version=beats_version) if beat_types else None

        for version in ecs_versions:
            schema = ecs.get_kql_schema(indexes=self.index or [],
                                        beat_schema=beat_schema,
                                        version=version)

            try:
                # TODO: switch to custom cidrmatch that allows ipv6
                with ecs.KqlSchema2Eql(
                        schema
                ), eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
                    eql.parse_query(self.query)

            except eql.EqlTypeMismatchError:
                raise

            except eql.EqlParseError as exc:
                message = exc.error_msg
                trailer = None
                if "Unknown field" in message and beat_types:
                    trailer = "\nTry adding event.module or event.dataset to specify beats module"

                raise exc.__class__(exc.error_msg,
                                    exc.line,
                                    exc.column,
                                    exc.source,
                                    len(exc.caret.lstrip()),
                                    trailer=trailer) from None
Example #14
0
    def add_analytic(self, analytic):
        if isinstance(analytic, dict) and list(
                analytic.keys()) == ['analytic']:
            analytic = analytic['analytic']

        Analytic.validate(analytic)
        analytic['metadata']['_source'] = analytic['query']
        analytic = eql.ast.EqlAnalytic(metadata=analytic['metadata'],
                                       query=eql.parse_query(
                                           analytic['query']))
        self.analytic_lookup[analytic.id] = analytic
        self.analytics.append(analytic)

        for tactic in analytic.metadata.get('tactics', []):
            for technique in analytic.metadata.get('techniques', []):
                self.coverage[tactic][technique].append(analytic)
Example #15
0
    def add_analytic(self, analytic, path=None):  # type: (dict, str) -> None
        if isinstance(analytic, dict) and list(
                analytic.keys()) == ['analytic']:
            analytic = analytic['analytic']

        Analytic.validate(analytic)
        analytic['metadata']['_source'] = '\n'.join(
            l.rstrip() for l in analytic['query'].strip().splitlines())
        if path:
            analytic['metadata']['_path'] = path
        analytic = eql.ast.EqlAnalytic(metadata=analytic['metadata'],
                                       query=eql.parse_query(
                                           analytic['query']))
        self.analytic_lookup[analytic.id] = analytic
        self.analytics.append(analytic)

        for tactic in analytic.metadata.get('tactics', []):
            for technique in analytic.metadata.get('techniques', []):
                self.coverage[tactic][technique].append(analytic)
Example #16
0
 def parsed_query(self) -> kql.ast.Expression:
     with eql.parser.elasticsearch_syntax, eql.parser.ignore_missing_functions:
         return eql.parse_query(self.query)
Example #17
0
def parse(text):
    try:
        return parse_query(text, implied_base=True, implied_any=True)
    except EqlError as exc:
        print(exc, file=sys.stderr)
        sys.exit(2)
Example #18
0
    def assert_normalization_match(self, standard_query, sysmon_query):
        parsed_original = eql.parse_query(standard_query)
        parsed_sysmon = eql.parse_query(sysmon_query)

        converted = self.sysmon_normalizer.normalize_ast(parsed_original)
        self.assertEqual(parsed_sysmon, converted)
Example #19
0
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.

# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from eql import parse_query, ParseError
import argparse, sys

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='check an eql query for valid syntax against the eql parser'
    )
    parser.add_argument('--query',
                        '-q',
                        help='the eql query to check for valid syntax')
    args = parser.parse_args()

    if not args.query:
        print("Must specify the --query or -q parameter")
        sys.exit(-1)

    eql_query = args.query

    try:
        _ = parse_query(eql_query)
    except ParseError as e:
        print("eql_error: {0}".format(e.message))
        sys.exit(-1)
    sys.exit(0)
Example #20
0
def search_rules(query,
                 columns,
                 language,
                 count,
                 verbose=True,
                 rules: Dict[str, dict] = None,
                 pager=False):
    """Use KQL or EQL to find matching rules."""
    from kql import get_evaluator
    from eql.table import Table
    from eql.build import get_engine
    from eql import parse_query
    from eql.pipes import CountPipe

    flattened_rules = []
    rules = rules or rule_loader.load_rule_files(verbose=verbose)

    for file_name, rule_doc in rules.items():
        flat = {"file": os.path.relpath(file_name)}
        flat.update(rule_doc)
        flat.update(rule_doc["metadata"])
        flat.update(rule_doc["rule"])

        tactic_names = []
        technique_ids = []
        subtechnique_ids = []

        for entry in rule_doc['rule'].get('threat', []):
            if entry["framework"] != "MITRE ATT&CK":
                continue

            techniques = entry.get('technique', [])
            tactic_names.append(entry['tactic']['name'])
            technique_ids.extend([t['id'] for t in techniques])
            subtechnique_ids.extend([
                st['id'] for t in techniques
                for st in t.get('subtechnique', [])
            ])

        flat.update(techniques=technique_ids,
                    tactics=tactic_names,
                    subtechniques=subtechnique_ids,
                    unique_fields=Rule.get_unique_query_fields(
                        rule_doc['rule']))
        flattened_rules.append(flat)

    flattened_rules.sort(key=lambda dct: dct["name"])

    filtered = []
    if language == "kql":
        evaluator = get_evaluator(query) if query else lambda x: True
        filtered = list(filter(evaluator, flattened_rules))
    elif language == "eql":
        parsed = parse_query(query, implied_any=True, implied_base=True)
        evaluator = get_engine(parsed)
        filtered = [
            result.events[0].data for result in evaluator(flattened_rules)
        ]

        if not columns and any(
                isinstance(pipe, CountPipe) for pipe in parsed.pipes):
            columns = ["key", "count", "percent"]

    if count:
        click.echo(f'{len(filtered)} rules')
        return filtered

    if columns:
        columns = ",".join(columns).split(",")
    else:
        columns = ["rule_id", "file", "name"]

    table = Table.from_list(columns, filtered)

    if verbose:
        click.echo_via_pager(table) if pager else click.echo(table)

    return filtered
Example #21
0
 def parsed_query(self):
     if self.query:
         if self.contents['language'] == 'kuery':
             return kql.parse(self.query)
         elif self.contents['language'] == 'eql':
             return eql.parse_query(self.query)