Ejemplo n.º 1
0
    def test_regular_expression_compile(self):
        """Test compiling regular expressions."""
        test_re_string = r"Foo[0-9]bar"
        string_test = "foo2bar"
        string_test_2 = "Foo9bar"

        expression = utils.compile_regular_expression(expression_string=test_re_string)
        self.assertFalse(expression.match(string_test))
        self.assertTrue(expression.match(string_test_2))

        expression = utils.compile_regular_expression(
            expression_string=test_re_string, expression_flags=["IGNORECASE"]
        )
        self.assertTrue(expression.match(string_test))

        test_re_string = r"Foo[0-9]{param}"
        re_parameters = {
            "param": "bar",
        }
        expression = utils.compile_regular_expression(
            expression_string=test_re_string,
            expression_flags=["IGNORECASE"],
            expression_parameters=re_parameters,
        )
        self.assertTrue(expression.match(string_test))
Ejemplo n.º 2
0
    def test_regular_expression_compile(self):
        """Test compiling regular expressions."""
        test_re_string = r'Foo[0-9]bar'
        string_test = 'foo2bar'
        string_test_2 = 'Foo9bar'

        expression = utils.compile_regular_expression(
            expression_string=test_re_string)
        self.assertFalse(expression.match(string_test))
        self.assertTrue(expression.match(string_test_2))

        expression = utils.compile_regular_expression(
            expression_string=test_re_string, expression_flags=['IGNORECASE'])
        self.assertTrue(expression.match(string_test))

        test_re_string = r'Foo[0-9]{param}'
        re_parameters = {
            'param': 'bar',
        }
        expression = utils.compile_regular_expression(
            expression_string=test_re_string,
            expression_flags=['IGNORECASE'],
            expression_parameters=re_parameters)
        self.assertTrue(expression.match(string_test))
Ejemplo n.º 3
0
    def extract_feature(self, name, config):
        """Extract features from events.

        Args:
            name: String with the name describing the feature to be extracted.
            config: A dict that contains the configuration for the feature
                extraction. See data/features.yaml for fields and further
                documentation of what needs to be defined.

        Returns:
            String with summary of the analyzer result.
        """
        query = config.get('query_string')
        query_dsl = config.get('query_dsl')
        attribute = config.get('attribute')
        store_type_list = config.get('store_type_list', False)
        keep_multimatch = config.get('keep_multimatch', False)
        overwrite_store_as = config.get('overwrite_store_as', True)
        overwrite_and_merge_store_as = config.get(
            'overwrite_and_merge_store_as', False)

        if not attribute:
            logger.warning('No attribute defined.')
            return ''

        store_as = config.get('store_as')
        if not store_as:
            logger.warning('No attribute defined to store results in.')
            return ''

        tags = config.get('tags', [])

        expression_string = config.get('re')
        if not expression_string:
            logger.warning('No regular expression defined.')
            return ''

        expression = utils.compile_regular_expression(
            expression_string=expression_string,
            expression_flags=config.get('re_flags'))

        emoji_names = config.get('emojis', [])
        emojis_to_add = [emojis.get_emoji(x) for x in emoji_names]

        return_fields = [attribute, store_as]

        events = self.event_stream(query_string=query,
                                   query_dsl=query_dsl,
                                   return_fields=return_fields)

        event_counter = 0
        for event in events:
            attribute_field = event.source.get(attribute)
            if isinstance(attribute_field, six.text_type):
                attribute_value = attribute_field
            elif isinstance(attribute_field, (list, tuple)):
                attribute_value = ','.join(attribute_field)
            elif isinstance(attribute_field, (int, float)):
                attribute_value = attribute_field
            else:
                attribute_value = None

            if not attribute_value:
                continue

            result = expression.findall(attribute_value)
            if not result:
                continue
            result = list(set(result))

            event_counter += 1
            store_as_current_val = event.source.get(store_as)
            if store_as_current_val and not overwrite_store_as:
                continue
            if isinstance(store_as_current_val, six.text_type):
                store_type_list = False
            elif isinstance(store_as_current_val, (list, tuple)):
                store_type_list = True
            new_value = self._get_attribute_value(
                store_as_current_val, result, keep_multimatch,
                overwrite_and_merge_store_as, store_type_list)
            if not new_value:
                continue
            event.add_attributes({store_as: new_value})
            event.add_emojis(emojis_to_add)
            event.add_tags(tags)

            # Commit the event to the datastore.
            event.commit()

        aggregate_results = config.get('aggregate', False)
        create_view = config.get('create_view', False)

        # If aggregation is turned on, we automatically create an aggregation.
        if aggregate_results:
            create_view = True

        if create_view and event_counter:
            view = self.sketch.add_view(name,
                                        self.NAME,
                                        query_string=query,
                                        query_dsl=query_dsl)

            if aggregate_results:
                params = {
                    'field': store_as,
                    'limit': 20,
                    'index': [self.timeline_id],
                }
                self.sketch.add_aggregation(
                    name='Top 20 for: {0:s} [{1:s}]'.format(store_as, name),
                    agg_name='field_bucket',
                    agg_params=params,
                    description='Created by the feature extraction analyzer',
                    view_id=view.id,
                    chart_type='hbarchart')

        return 'Feature extraction [{0:s}] extracted {1:d} features.'.format(
            name, event_counter)
Ejemplo n.º 4
0
    def tagger(self, name, config):
        """Tag and add emojis to events.

        Args:
            name: String with the name describing what will be tagged.
            config: A dict that contains the configuration See data/tags.yaml
                for fields and documentation of what needs to be defined.

        Returns:
            String with summary of the analyzer result.
        """
        query = config.get('query_string')
        query_dsl = config.get('query_dsl')
        save_search = config.get('save_search', False)
        # For legacy reasons to support both save_search and
        # create_view parameters.
        if not save_search:
            save_search = config.get('create_view', False)

        search_name = config.get('search_name', None)
        # For legacy reasons to support both search_name and view_name.
        if search_name is None:
            search_name = config.get('view_name', name)

        tags = config.get('tags', [])
        emoji_names = config.get('emojis', [])
        emojis_to_add = [emojis.get_emoji(x) for x in emoji_names]

        expression_string = config.get('regular_expression', '')
        attributes = None
        expression = None
        if expression_string:
            expression = utils.compile_regular_expression(
                expression_string=expression_string,
                expression_flags=config.get('re_flags'))

            attribute = config.get('re_attribute')
            if attribute:
                attributes = [attribute]

        event_counter = 0
        events = self.event_stream(query_string=query,
                                   query_dsl=query_dsl,
                                   return_fields=attributes)

        for event in events:
            if expression:
                value = event.source.get(attributes[0])
                if value:
                    result = expression.findall(value)
                    if not result:
                        # Skip counting this tag since the regular expression
                        # didn't find anything.
                        continue

            event_counter += 1
            event.add_tags(tags)
            event.add_emojis(emojis_to_add)

            # Commit the event to the datastore.
            event.commit()

        if save_search and event_counter:
            self.sketch.add_view(search_name,
                                 self.NAME,
                                 query_string=query,
                                 query_dsl=query_dsl)

        return '{0:d} events tagged for [{1:s}]'.format(event_counter, name)
Ejemplo n.º 5
0
    def find_data(self):
        """Returns a tuple with a bool on whether data was found and a message.

        Raises:
            RuntimeError: If the data finder cannot run.

        Returns:
            A tuple with two entries:
                bool: whether data was discovered or not.
                str: a message string indicating how the data was found or the
                    the reason why it wasn't.
        """
        if not self.can_run():
            return False, "Unable to run the data finder, missing information."

        query_string = self._rule.get("query_string")
        query_dsl = self._rule.get("query_dsl")

        if not query_string and not query_dsl:
            raise RuntimeError(
                "Unable to run, missing either a query string or a DSL to "
                "perform the search.")

        attribute = self._rule.get("attribute")
        regular_expression = self._rule.get("regular_expression")
        if regular_expression:
            if not attribute:
                raise RuntimeError(
                    "Attribute must be set in a rule if a regular expression "
                    "is used.")
            expression = utils.compile_regular_expression(
                expression_string=regular_expression,
                expression_flags=self._rule.get("re_flags"),
                expression_parameters=self._rule.get("re_parameters"),
            )
        else:
            expression = None

        query_filter = {
            "chips": [{
                "field": "",
                "type": "datetime_range",
                "operator": "must",
                "active": True,
                "value": f"{self._start_date},{self._end_date}",
            }]
        }

        event_generator = self._datastore.search_stream(
            query_string=query_string,
            query_dsl=query_dsl,
            query_filter=query_filter,
            indices=self._indices,
            return_fields=attribute,
            enable_scroll=True,
            timeline_ids=self._timeline_ids,
        )

        for event in event_generator:
            # TODO: Save the result to the Investigation object when that
            # exist in the future.
            if not expression:
                return True, "Data discovered"

            source = event.get("_source", {})
            value = source.get(attribute)
            if not value:
                logger.warning("Attribute: [{0:s}] is empty".format(attribute))

            result = expression.findall(value)
            if not result:
                continue

            return True, "Data discovered using Regular Expression"

        return False, "No hits discovered"
Ejemplo n.º 6
0
    def tagger(self, name, config):
        """Tag and add emojis to events.

        Args:
            name: String with the name describing what will be tagged.
            config: A dict that contains the configuration See data/tags.yaml
                for fields and documentation of what needs to be defined.

        Returns:
            String with summary of the analyzer result.
        """
        query = config.get("query_string")
        query_dsl = config.get("query_dsl")
        save_search = config.get("save_search", False)
        # For legacy reasons to support both save_search and
        # create_view parameters.
        if not save_search:
            save_search = config.get("create_view", False)

        search_name = config.get("search_name", None)
        # For legacy reasons to support both search_name and view_name.
        if search_name is None:
            search_name = config.get("view_name", name)

        tags = set(config.get("tags", []))
        dynamic_tags = {tag[1:] for tag in tags if tag.startswith("$")}
        tags = {tag for tag in tags if not tag.startswith("$")}

        emoji_names = config.get("emojis", [])
        emojis_to_add = [emojis.get_emoji(x) for x in emoji_names]

        expression_string = config.get("regular_expression", "")
        attributes = list(dynamic_tags)
        expression = None
        if expression_string:
            expression = utils.compile_regular_expression(
                expression_string=expression_string,
                expression_flags=config.get("re_flags"),
            )

            attribute = config.get("re_attribute")
            if attribute:
                attributes.append(attribute)

        event_counter = 0
        events = self.event_stream(query_string=query,
                                   query_dsl=query_dsl,
                                   return_fields=attributes)

        for event in events:
            if expression:
                value = event.source.get(config.get("re_attribute"))
                if value:
                    result = expression.findall(value)
                    if not result:
                        # Skip counting this tag since the regular expression
                        # didn't find anything.
                        continue

            event_counter += 1
            event.add_tags(tags)

            # Compute dynamic tag values with modifiers.
            dynamic_tag_values = []
            for attribute in dynamic_tags:
                tag_value = event.source.get(attribute)
                for mod in config.get("modifiers", []):
                    tag_value = self.MODIFIERS[mod](tag_value)
                if isinstance(tag_value, Iterable):
                    dynamic_tag_values.extend(tag_value)
                else:
                    dynamic_tag_values.append(tag_value)
            event.add_tags(dynamic_tag_values)

            event.add_emojis(emojis_to_add)

            # Commit the event to the datastore.
            event.commit()

        if save_search and event_counter:
            self.sketch.add_view(search_name,
                                 self.NAME,
                                 query_string=query,
                                 query_dsl=query_dsl)
        return "{0:d} events tagged for [{1:s}]".format(event_counter, name)