Exemple #1
0
 def test_get_validated_indices(self):
     """Test for validating indices."""
     sketch = self.sketch1
     sketch_indices = [t.searchindex.index_name for t in sketch.timelines]
     valid_indices = ['test']
     invalid_indices = ['test', 'fail']
     self.assertListEqual(sketch_indices,
                          get_validated_indices(valid_indices,
                                                sketch_indices))
     self.assertFalse('fail' in get_validated_indices(
         invalid_indices, sketch_indices))
Exemple #2
0
 def test_get_validated_indices(self):
     """Test for validating indices."""
     sketch = self.sketch1
     sketch_indices = [t.searchindex.index_name for t in sketch.timelines]
     valid_indices = [u'test']
     invalid_indices = [u'test', u'fail']
     self.assertListEqual(
         sketch_indices, get_validated_indices(valid_indices,
                                               sketch_indices))
     self.assertFalse(
         u'fail' in get_validated_indices(invalid_indices, sketch_indices))
Exemple #3
0
    def test_get_validated_indices(self):
        """Test for validating indices."""
        sketch = self.sketch1
        sketch_indices = [t.searchindex.index_name for t in sketch.timelines]

        valid_indices = ["test"]
        invalid_indices = ["test", "fail"]
        test_indices, _ = get_validated_indices(valid_indices, sketch)
        self.assertListEqual(sketch_indices, test_indices)

        test_indices, _ = get_validated_indices(invalid_indices, sketch)
        self.assertFalse("fail" in test_indices)
Exemple #4
0
    def post(self, sketch_id):
        """Handles POST request to the resource.
        Handler for /api/v1/sketches/:sketch_id/aggregation/

        Args:
            sketch_id: Integer primary key for a sketch database model

        Returns:
            JSON with aggregation results
        """
        sketch = Sketch.query.get_with_acl(sketch_id)
        form = AggregationForm.build(request)

        if form.validate_on_submit():
            query_filter = form.filter.data
            query_dsl = form.dsl.data
            sketch_indices = [
                t.searchindex.index_name for t in sketch.timelines
            ]
            indices = query_filter.get(u'indices', sketch_indices)

            # If _all in indices then execute the query on all indices
            if u'_all' in indices:
                indices = sketch_indices

            # Make sure that the indices in the filter are part of the sketch.
            # This will also remove any deleted timeline from the search result.
            indices = get_validated_indices(indices, sketch_indices)

            # Make sure we have a query string or star filter
            if not (form.query.data, query_filter.get(u'star'),
                    query_filter.get(u'events')):
                abort(HTTP_STATUS_CODE_BAD_REQUEST)

            result = []
            if form.aggtype.data == u'heatmap':
                result = heatmap(
                    es_client=self.datastore,
                    sketch_id=sketch_id,
                    query_string=form.query.data,
                    query_filter=query_filter,
                    query_dsl=query_dsl,
                    indices=indices)
            elif form.aggtype.data == u'histogram':
                result = histogram(
                    es_client=self.datastore,
                    sketch_id=sketch_id,
                    query_string=form.query.data,
                    query_filter=query_filter,
                    query_dsl=query_dsl,
                    indices=indices)

            else:
                abort(HTTP_STATUS_CODE_BAD_REQUEST)

            schema = {u'objects': result}
            return jsonify(schema)
        return abort(HTTP_STATUS_CODE_BAD_REQUEST)
Exemple #5
0
    def post(self, sketch_id):
        """Handles POST request to the resource.
        Handler for /api/v1/sketches/:sketch_id/explore/

        Args:
            sketch_id: Integer primary key for a sketch database model

        Returns:
            JSON with list of matched events
        """
        sketch = Sketch.query.get_with_acl(sketch_id)
        if not sketch:
            abort(HTTP_STATUS_CODE_NOT_FOUND, 'No sketch found with this ID.')

        if not sketch.has_permission(current_user, 'read'):
            abort(HTTP_STATUS_CODE_FORBIDDEN,
                  'User does not have read access controls on sketch.')

        if sketch.get_status.status == 'archived':
            abort(HTTP_STATUS_CODE_BAD_REQUEST,
                  'Unable to query on an archived sketch.')

        form = forms.ExploreForm.build(request)

        if not form.validate_on_submit():
            abort(HTTP_STATUS_CODE_BAD_REQUEST,
                  'Unable to explore data, unable to validate form data')

        # TODO: Remove form and use json instead.
        query_dsl = form.dsl.data
        enable_scroll = form.enable_scroll.data
        scroll_id = form.scroll_id.data
        file_name = form.file_name.data

        query_filter = request.json.get('filter', {})

        return_field_string = form.fields.data
        if return_field_string:
            return_fields = [x.strip() for x in return_field_string.split(',')]
        else:
            return_fields = query_filter.get('fields', [])
            return_fields = [field['field'] for field in return_fields]
            return_fields.extend(DEFAULT_SOURCE_FIELDS)

        sketch_indices = {
            t.searchindex.index_name
            for t in sketch.timelines if t.get_status.status.lower() == 'ready'
        }
        if not query_filter:
            query_filter = {}

        indices = query_filter.get('indices', sketch_indices)

        # If _all in indices then execute the query on all indices
        if '_all' in indices:
            indices = sketch_indices

        # Make sure that the indices in the filter are part of the sketch.
        # This will also remove any deleted timeline from the search result.
        indices = get_validated_indices(indices, sketch_indices)

        # Make sure we have a query string or star filter
        if not (form.query.data, query_filter.get('star'),
                query_filter.get('events'), query_dsl):
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                'The request needs a query string/DSL and or a star filter.')

        # Aggregate hit count per index.
        index_stats_agg = {"indices": {"terms": {"field": "_index"}}}

        if file_name:
            file_object = io.BytesIO()

            form_data = {
                'created_at': datetime.datetime.utcnow().isoformat(),
                'created_by': current_user.username,
                'sketch': sketch_id,
                'query': form.query.data,
                'query_dsl': query_dsl,
                'query_filter': query_filter,
                'return_fields': return_fields,
            }
            with zipfile.ZipFile(file_object, mode='w') as zip_file:
                zip_file.writestr('METADATA', data=json.dumps(form_data))
                fh = export.query_to_filehandle(query_string=form.query.data,
                                                query_dsl=query_dsl,
                                                query_filter=query_filter,
                                                indices=indices,
                                                sketch=sketch,
                                                datastore=self.datastore)
                fh.seek(0)
                zip_file.writestr('query_results.csv', fh.read())
            file_object.seek(0)
            return send_file(file_object,
                             mimetype='zip',
                             attachment_filename=file_name)

        if scroll_id:
            # pylint: disable=unexpected-keyword-arg
            result = self.datastore.client.scroll(scroll_id=scroll_id,
                                                  scroll='1m')
        else:
            try:
                result = self.datastore.search(sketch_id,
                                               form.query.data,
                                               query_filter,
                                               query_dsl,
                                               indices,
                                               aggregations=index_stats_agg,
                                               return_fields=return_fields,
                                               enable_scroll=enable_scroll)
            except ValueError as e:
                abort(HTTP_STATUS_CODE_BAD_REQUEST, e)

        # Get number of matching documents per index.
        count_per_index = {}
        try:
            for bucket in result['aggregations']['indices']['buckets']:
                key = bucket.get('key')
                if key:
                    count_per_index[key] = bucket.get('doc_count')
        except KeyError:
            pass

        # Get labels for each event that matches the sketch.
        # Remove all other labels.
        for event in result['hits']['hits']:
            event['selected'] = False
            event['_source']['label'] = []
            try:
                for label in event['_source']['timesketch_label']:
                    if sketch.id != label['sketch_id']:
                        continue
                    event['_source']['label'].append(label['name'])
                del event['_source']['timesketch_label']
            except KeyError:
                pass

        # Update or create user state view. This is used in the UI to let
        # the user get back to the last state in the explore view.
        # TODO: Add a call to utils.update_sketch_last_activity once new
        # mechanism has been added, instead of relying on user views.
        view = View.get_or_create(user=current_user, sketch=sketch, name='')
        view.update_modification_time()
        view.query_string = form.query.data
        view.query_filter = json.dumps(query_filter, ensure_ascii=False)
        view.query_dsl = json.dumps(query_dsl, ensure_ascii=False)
        db_session.add(view)
        db_session.commit()

        # Add metadata for the query result. This is used by the UI to
        # render the event correctly and to display timing and hit count
        # information.
        tl_colors = {}
        tl_names = {}
        for timeline in sketch.timelines:
            tl_colors[timeline.searchindex.index_name] = timeline.color
            tl_names[timeline.searchindex.index_name] = timeline.name

        meta = {
            'es_time': result['took'],
            'es_total_count': result['hits']['total'],
            'timeline_colors': tl_colors,
            'timeline_names': tl_names,
            'count_per_index': count_per_index,
            'scroll_id': result.get('_scroll_id', ''),
        }

        # Elasticsearch version 7.x returns total hits as a dictionary.
        # TODO: Refactor when version 6.x has been deprecated.
        if isinstance(meta['es_total_count'], dict):
            meta['es_total_count'] = meta['es_total_count'].get('value', 0)

        schema = {'meta': meta, 'objects': result['hits']['hits']}
        return jsonify(schema)
Exemple #6
0
    def post(self, sketch_id):
        """Handles POST request to the resource.
        Handler for /api/v1/sketches/:sketch_id/explore/

        Args:
            sketch_id: Integer primary key for a sketch database model

        Returns:
            JSON with list of matched events
        """
        sketch = Sketch.query.get_with_acl(sketch_id)
        if not sketch:
            abort(HTTP_STATUS_CODE_NOT_FOUND, 'No sketch found with this ID.')

        if not sketch.has_permission(current_user, 'read'):
            abort(HTTP_STATUS_CODE_FORBIDDEN,
                  'User does not have read access controls on sketch.')

        if sketch.get_status.status == 'archived':
            abort(HTTP_STATUS_CODE_BAD_REQUEST,
                  'Unable to query on an archived sketch.')

        form = forms.ExploreForm.build(request)

        if not form.validate_on_submit():
            abort(HTTP_STATUS_CODE_BAD_REQUEST,
                  'Unable to explore data, unable to validate form data')

        # TODO: Remove form and use json instead.
        query_dsl = form.dsl.data
        enable_scroll = form.enable_scroll.data
        scroll_id = form.scroll_id.data
        file_name = form.file_name.data
        count = bool(form.count.data)

        query_filter = request.json.get('filter', {})
        parent = request.json.get('parent', None)
        incognito = request.json.get('incognito', False)

        return_field_string = form.fields.data
        if return_field_string:
            return_fields = [x.strip() for x in return_field_string.split(',')]
        else:
            return_fields = query_filter.get('fields', [])
            return_fields = [field['field'] for field in return_fields]
            return_fields.extend(DEFAULT_SOURCE_FIELDS)

        if not query_filter:
            query_filter = {}

        all_indices = list(
            {t.searchindex.index_name
             for t in sketch.timelines})
        indices = query_filter.get('indices', all_indices)

        # If _all in indices then execute the query on all indices
        if '_all' in indices:
            indices = all_indices

        # Make sure that the indices in the filter are part of the sketch.
        # This will also remove any deleted timeline from the search result.
        indices, timeline_ids = get_validated_indices(indices, sketch)

        # Remove indices that don't exist from search.
        indices = utils.validate_indices(indices, self.datastore)

        if not indices:
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                'No valid search indices were found to perform the search on.')

        # Make sure we have a query string or star filter
        if not (form.query.data, query_filter.get('star'),
                query_filter.get('events'), query_dsl):
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                'The request needs a query string/DSL and or a star filter.')

        # Aggregate hit count per index.
        index_stats_agg = {
            'indices': {
                'terms': {
                    'field': '_index',
                    'min_doc_count': 0,
                    'size': len(sketch.timelines)
                }
            },
            'timelines': {
                'terms': {
                    'field': '__ts_timeline_id',
                    'min_doc_count': 0,
                    'size': len(sketch.timelines)
                }
            }
        }
        if count:
            # Count operations do not support size parameters.
            if 'size' in query_filter:
                _ = query_filter.pop('size')
            if 'terminate_after' in query_filter:
                _ = query_filter.pop('terminate_after')

            try:
                result = self.datastore.search(sketch_id=sketch_id,
                                               query_string=form.query.data,
                                               query_filter=query_filter,
                                               query_dsl=query_dsl,
                                               indices=indices,
                                               timeline_ids=timeline_ids,
                                               count=True)
            except ValueError as e:
                abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e))

            # Get number of matching documents per index.
            schema = {'meta': {'total_count': result}, 'objects': []}
            return jsonify(schema)

        if file_name:
            file_object = io.BytesIO()

            form_data = {
                'created_at': datetime.datetime.utcnow().isoformat(),
                'created_by': current_user.username,
                'sketch': sketch_id,
                'query': form.query.data,
                'query_dsl': query_dsl,
                'query_filter': query_filter,
                'return_fields': return_fields,
            }
            with zipfile.ZipFile(file_object, mode='w') as zip_file:
                zip_file.writestr('METADATA', data=json.dumps(form_data))
                fh = export.query_to_filehandle(query_string=form.query.data,
                                                query_dsl=query_dsl,
                                                query_filter=query_filter,
                                                indices=indices,
                                                sketch=sketch,
                                                datastore=self.datastore,
                                                return_fields=return_fields,
                                                timeline_ids=timeline_ids)
                fh.seek(0)
                zip_file.writestr('query_results.csv', fh.read())
            file_object.seek(0)
            return send_file(file_object,
                             mimetype='zip',
                             attachment_filename=file_name)

        if scroll_id:
            # pylint: disable=unexpected-keyword-arg
            result = self.datastore.client.scroll(scroll_id=scroll_id,
                                                  scroll='1m')
        else:
            try:
                result = self.datastore.search(sketch_id=sketch_id,
                                               query_string=form.query.data,
                                               query_filter=query_filter,
                                               query_dsl=query_dsl,
                                               indices=indices,
                                               aggregations=index_stats_agg,
                                               return_fields=return_fields,
                                               enable_scroll=enable_scroll,
                                               timeline_ids=timeline_ids)
            except ValueError as e:
                abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e))

        # Get number of matching documents per index.
        count_per_index = {}
        try:
            for bucket in result['aggregations']['indices']['buckets']:
                key = bucket.get('key')
                if key:
                    count_per_index[key] = bucket.get('doc_count')
        except KeyError:
            pass

        # Get number of matching documents per timeline.
        count_per_timeline = {}
        try:
            for bucket in result['aggregations']['timelines']['buckets']:
                key = bucket.get('key')
                if key:
                    count_per_timeline[key] = bucket.get('doc_count')
        except KeyError:
            pass

        # Total count for query regardless of returned results.
        count_total_complete = sum(count_per_index.values())

        comments = {}
        if 'comment' in return_fields:
            events = Event.query.filter_by(sketch=sketch).all()
            for event in events:
                for comment in event.comments:
                    comments.setdefault(event.document_id, [])
                    comments[event.document_id].append(comment.comment)

        # Get labels for each event that matches the sketch.
        # Remove all other labels.
        for event in result['hits']['hits']:
            event['selected'] = False
            event['_source']['label'] = []
            try:
                for label in event['_source']['timesketch_label']:
                    if sketch.id != label['sketch_id']:
                        continue
                    event['_source']['label'].append(label['name'])
                del event['_source']['timesketch_label']
            except KeyError:
                pass

            if 'comment' in return_fields:
                event['_source']['comment'] = comments.get(event['_id'], [])

        # Update or create user state view. This is used in the UI to let
        # the user get back to the last state in the explore view.
        # TODO: Deprecate this and change how last activity is determined, e.g
        # use the new Search History feature instead.
        view = View.get_or_create(user=current_user, sketch=sketch, name='')
        view.update_modification_time()
        view.query_string = form.query.data
        view.query_filter = json.dumps(query_filter, ensure_ascii=False)
        view.query_dsl = json.dumps(query_dsl, ensure_ascii=False)
        db_session.add(view)
        db_session.commit()

        # Search History
        search_node = None
        new_search = SearchHistory(user=current_user, sketch=sketch)

        if parent:
            previous_search = SearchHistory.query.get(parent)
        else:
            previous_search = SearchHistory.query.filter_by(
                user=current_user,
                sketch=sketch).order_by(SearchHistory.id.desc()).first()

        if not incognito:
            is_same_query = False
            is_same_filter = False

            new_search.query_string = form.query.data
            new_search.query_filter = json.dumps(query_filter,
                                                 ensure_ascii=False)

            new_search.query_result_count = count_total_complete
            new_search.query_time = result['took']

            if previous_search:
                new_search.parent = previous_search

                new_query = new_search.query_string
                new_filter = new_search.query_filter
                previous_query = previous_search.query_string
                previous_filter = previous_search.query_filter

                is_same_query = previous_query == new_query
                is_same_filter = previous_filter == new_filter

            if not all([is_same_query, is_same_filter]):
                db_session.add(new_search)
                db_session.commit()
                # Create metric if user creates a new branch.
                if new_search.parent:
                    if len(new_search.parent.children) > 1:
                        METRICS['searchhistory'].labels(action='branch').inc()
            else:
                METRICS['searchhistory'].labels(
                    action='ignore_same_query').inc()
        else:
            METRICS['searchhistory'].labels(action='incognito').inc()

        search_node = new_search if new_search.id else previous_search

        if not search_node:
            abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to save search')

        search_node = search_node.build_tree(search_node, {}, recurse=False)

        # Add metadata for the query result. This is used by the UI to
        # render the event correctly and to display timing and hit count
        # information.
        tl_colors = {}
        tl_names = {}
        for timeline in sketch.timelines:
            tl_colors[timeline.searchindex.index_name] = timeline.color
            tl_names[timeline.searchindex.index_name] = timeline.name

        meta = {
            'es_time': result['took'],
            'es_total_count': result['hits']['total'],
            'es_total_count_complete': count_total_complete,
            'timeline_colors': tl_colors,
            'timeline_names': tl_names,
            'count_per_index': count_per_index,
            'count_per_timeline': count_per_timeline,
            'scroll_id': result.get('_scroll_id', ''),
            'search_node': search_node
        }

        # Elasticsearch version 7.x returns total hits as a dictionary.
        # TODO: Refactor when version 6.x has been deprecated.
        if isinstance(meta['es_total_count'], dict):
            meta['es_total_count'] = meta['es_total_count'].get('value', 0)

        schema = {'meta': meta, 'objects': result['hits']['hits']}
        return jsonify(schema)
Exemple #7
0
    def post(self, sketch_id):
        """Handles POST request to the resource.

        Handler for /api/v1/sketches/<int:sketch_id>/aggregation/explore/

        Args:
            sketch_id: Integer primary key for a sketch database model

        Returns:
            JSON with aggregation results
        """
        form = forms.AggregationExploreForm.build(request)
        if not form.validate_on_submit():
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                'Not able to run aggregation, unable to validate form data.')

        sketch = Sketch.query.get_with_acl(sketch_id)
        if not sketch:
            abort(
                HTTP_STATUS_CODE_NOT_FOUND, 'No sketch found with this ID.')

        if not sketch.has_permission(current_user, 'read'):
            abort(HTTP_STATUS_CODE_FORBIDDEN,
                  'User does not have read access controls on sketch.')

        if sketch.get_status.status == 'archived':
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                'Not able to run aggregation on an archived sketch.')

        sketch_indices = {
            t.searchindex.index_name
            for t in sketch.timelines
            if t.get_status.status.lower() == 'ready'
        }

        aggregation_dsl = form.aggregation_dsl.data
        aggregator_name = form.aggregator_name.data

        if aggregator_name:
            if isinstance(form.aggregator_parameters.data, dict):
                aggregator_parameters = form.aggregator_parameters.data
            else:
                aggregator_parameters = json.loads(
                    form.aggregator_parameters.data)

            agg_class = aggregator_manager.AggregatorManager.get_aggregator(
                aggregator_name)
            if not agg_class:
                return {}
            if not aggregator_parameters:
                aggregator_parameters = {}

            indices = aggregator_parameters.pop('index', sketch_indices)
            indices, timeline_ids = lib_utils.get_validated_indices(
                indices, sketch)

            aggregator = agg_class(
                sketch_id=sketch_id, indices=indices,
                timeline_ids=timeline_ids)

            chart_type = aggregator_parameters.pop('supported_charts', None)
            chart_color = aggregator_parameters.pop('chart_color', '')
            chart_title = aggregator_parameters.pop(
                'chart_title', aggregator.chart_title)

            time_before = time.time()
            try:
                result_obj = aggregator.run(**aggregator_parameters)
            except NotFoundError:
                abort(
                    HTTP_STATUS_CODE_NOT_FOUND,
                    'Attempting to run an aggregation on a non-existing '
                    'Elastic index, index: {0:s} and parameters: {1!s}'.format(
                        ','.join(indices), aggregator_parameters))
            except ValueError as exc:
                abort(
                    HTTP_STATUS_CODE_BAD_REQUEST,
                    'Unable to run the aggregation, with error: {0!s}'.format(
                        exc))
            time_after = time.time()

            aggregator_description = aggregator.describe

            buckets = result_obj.to_dict()
            buckets['buckets'] = buckets.pop('values')
            result = {
                'aggregation_result': {
                    aggregator_name: buckets
                }
            }
            meta = {
                'method': 'aggregator_run',
                'chart_type': chart_type,
                'name': aggregator_description.get('name'),
                'description': aggregator_description.get('description'),
                'es_time': time_after - time_before,
            }

            if chart_type:
                meta['vega_spec'] = result_obj.to_chart(
                    chart_name=chart_type,
                    chart_title=chart_title, color=chart_color)
                meta['vega_chart_title'] = chart_title

        elif aggregation_dsl:
            # pylint: disable=unexpected-keyword-arg
            result = self.datastore.client.search(
                index=','.join(sketch_indices), body=aggregation_dsl, size=0)

            meta = {
                'es_time': result.get('took', 0),
                'es_total_count': result.get('hits', {}).get('total', 0),
                'timed_out': result.get('timed_out', False),
                'method': 'aggregator_query',
                'max_score': result.get('hits', {}).get('max_score', 0.0)
            }
        else:
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                'An aggregation DSL or a name for an aggregator name needs '
                'to be provided!')

        result_keys = set(result.keys()) - self.REMOVE_FIELDS
        objects = [result[key] for key in result_keys]
        schema = {'meta': meta, 'objects': objects}

        # Update the last activity of a sketch.
        utils.update_sketch_last_activity(sketch)

        return jsonify(schema)
Exemple #8
0
    def post(self, sketch_id):
        """Handles POST request to the resource.
        Handler for /api/v1/sketches/:sketch_id/explore/

        Args:
            sketch_id: Integer primary key for a sketch database model

        Returns:
            JSON with list of matched events
        """
        sketch = Sketch.query.get_with_acl(sketch_id)
        form = ExploreForm.build(request)

        if form.validate_on_submit():
            query_dsl = form.dsl.data
            query_filter = form.filter.data
            sketch_indices = {
                t.searchindex.index_name
                for t in sketch.timelines
            }
            indices = query_filter.get(u'indices', sketch_indices)

            # If _all in indices then execute the query on all indices
            if u'_all' in indices:
                indices = sketch_indices

            # Make sure that the indices in the filter are part of the sketch.
            # This will also remove any deleted timeline from the search result.
            indices = get_validated_indices(indices, sketch_indices)

            # Make sure we have a query string or star filter
            if not (form.query.data, query_filter.get(u'star'),
                    query_filter.get(u'events'), query_dsl):
                abort(HTTP_STATUS_CODE_BAD_REQUEST)

            result = self.datastore.search(sketch_id,
                                           form.query.data,
                                           query_filter,
                                           query_dsl,
                                           indices,
                                           aggregations=None,
                                           return_results=True)

            # Get labels for each event that matches the sketch.
            # Remove all other labels.
            for event in result[u'hits'][u'hits']:
                event[u'selected'] = False
                event[u'_source'][u'label'] = []
                try:
                    for label in event[u'_source'][u'timesketch_label']:
                        if sketch.id != label[u'sketch_id']:
                            continue
                        event[u'_source'][u'label'].append(label[u'name'])
                    del event[u'_source'][u'timesketch_label']
                except KeyError:
                    pass

            # Update or create user state view. This is used in the UI to let
            # the user get back to the last state in the explore view.
            view = View.get_or_create(user=current_user,
                                      sketch=sketch,
                                      name=u'')
            view.query_string = form.query.data
            view.query_filter = json.dumps(query_filter, ensure_ascii=False)
            view.query_dsl = json.dumps(query_dsl, ensure_ascii=False)
            db_session.add(view)
            db_session.commit()

            # Add metadata for the query result. This is used by the UI to
            # render the event correctly and to display timing and hit count
            # information.
            tl_colors = {}
            tl_names = {}
            for timeline in sketch.timelines:
                tl_colors[timeline.searchindex.index_name] = timeline.color
                tl_names[timeline.searchindex.index_name] = timeline.name

            meta = {
                u'es_time': result[u'took'],
                u'es_total_count': result[u'hits'][u'total'],
                u'timeline_colors': tl_colors,
                u'timeline_names': tl_names,
            }
            schema = {u'meta': meta, u'objects': result[u'hits'][u'hits']}
            return jsonify(schema)
        return abort(HTTP_STATUS_CODE_BAD_REQUEST)
Exemple #9
0
    def post(self, sketch_id):
        """Handles POST request to the resource.
        Handler for /api/v1/sketches/:sketch_id/explore/

        Args:
            sketch_id: Integer primary key for a sketch database model

        Returns:
            JSON with list of matched events
        """
        sketch = Sketch.query.get_with_acl(sketch_id)
        if not sketch:
            abort(HTTP_STATUS_CODE_NOT_FOUND, "No sketch found with this ID.")

        if not sketch.has_permission(current_user, "read"):
            abort(
                HTTP_STATUS_CODE_FORBIDDEN,
                "User does not have read access controls on sketch.",
            )

        if sketch.get_status.status == "archived":
            abort(HTTP_STATUS_CODE_BAD_REQUEST,
                  "Unable to query on an archived sketch.")

        form = forms.ExploreForm.build(request)

        if not form.validate_on_submit():
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                "Unable to explore data, unable to validate form data",
            )

        # TODO: Remove form and use json instead.
        query_dsl = form.dsl.data
        enable_scroll = form.enable_scroll.data
        scroll_id = form.scroll_id.data
        file_name = form.file_name.data
        count = bool(form.count.data)

        query_filter = request.json.get("filter", {})
        parent = request.json.get("parent", None)
        incognito = request.json.get("incognito", False)

        return_field_string = form.fields.data
        if return_field_string:
            return_fields = [x.strip() for x in return_field_string.split(",")]
        else:
            return_fields = query_filter.get("fields", [])
            return_fields = [field["field"] for field in return_fields]
            return_fields.extend(DEFAULT_SOURCE_FIELDS)

        if not query_filter:
            query_filter = {}

        all_indices = list(
            {t.searchindex.index_name
             for t in sketch.timelines})
        indices = query_filter.get("indices", all_indices)

        # If _all in indices then execute the query on all indices
        if "_all" in indices:
            indices = all_indices

        # Make sure that the indices in the filter are part of the sketch.
        # This will also remove any deleted timeline from the search result.
        indices, timeline_ids = get_validated_indices(indices, sketch)

        # Remove indices that don't exist from search.
        indices = utils.validate_indices(indices, self.datastore)

        if not indices:
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                "No valid search indices were found to perform the search on.",
            )

        # Make sure we have a query string or star filter
        if not (
                form.query.data,
                query_filter.get("star"),
                query_filter.get("events"),
                query_dsl,
        ):
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                "The request needs a query string/DSL and or a star filter.",
            )

        # Aggregate hit count per index.
        index_stats_agg = {
            "indices": {
                "terms": {
                    "field": "_index",
                    "min_doc_count": 0,
                    "size": len(sketch.timelines),
                }
            },
            "timelines": {
                "terms": {
                    "field": "__ts_timeline_id",
                    "min_doc_count": 0,
                    "size": len(sketch.timelines),
                }
            },
            "count_over_time": {
                "auto_date_histogram": {
                    "field": "datetime",
                    "buckets": 50,
                }
            },
        }
        if count:
            # Count operations do not support size parameters.
            if "size" in query_filter:
                _ = query_filter.pop("size")
            if "terminate_after" in query_filter:
                _ = query_filter.pop("terminate_after")

            try:
                result = self.datastore.search(
                    sketch_id=sketch_id,
                    query_string=form.query.data,
                    query_filter=query_filter,
                    query_dsl=query_dsl,
                    indices=indices,
                    timeline_ids=timeline_ids,
                    count=True,
                )
            except ValueError as e:
                abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e))

            # Get number of matching documents per index.
            schema = {"meta": {"total_count": result}, "objects": []}
            return jsonify(schema)

        if file_name:
            file_object = io.BytesIO()

            form_data = {
                "created_at": datetime.datetime.utcnow().isoformat(),
                "created_by": current_user.username,
                "sketch": sketch_id,
                "query": form.query.data,
                "query_dsl": query_dsl,
                "query_filter": query_filter,
                "return_fields": return_fields,
            }
            with zipfile.ZipFile(file_object, mode="w") as zip_file:
                zip_file.writestr("METADATA", data=json.dumps(form_data))
                fh = export.query_to_filehandle(
                    query_string=form.query.data,
                    query_dsl=query_dsl,
                    query_filter=query_filter,
                    indices=indices,
                    sketch=sketch,
                    datastore=self.datastore,
                    return_fields=return_fields,
                    timeline_ids=timeline_ids,
                )
                fh.seek(0)
                zip_file.writestr("query_results.csv", fh.read())
            file_object.seek(0)
            return send_file(file_object,
                             mimetype="zip",
                             attachment_filename=file_name)

        if scroll_id:
            # pylint: disable=unexpected-keyword-arg
            result = self.datastore.client.scroll(scroll_id=scroll_id,
                                                  scroll="1m")
        else:
            try:
                result = self.datastore.search(
                    sketch_id=sketch_id,
                    query_string=form.query.data,
                    query_filter=query_filter,
                    query_dsl=query_dsl,
                    indices=indices,
                    aggregations=index_stats_agg,
                    return_fields=return_fields,
                    enable_scroll=enable_scroll,
                    timeline_ids=timeline_ids,
                )
            except ValueError as e:
                abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e))

        # Get number of matching documents over time.
        histogram_interval = (result.get("aggregations",
                                         {}).get("count_over_time",
                                                 {}).get("interval", ""))
        count_over_time = {"data": {}, "interval": histogram_interval}
        try:
            for bucket in result["aggregations"]["count_over_time"]["buckets"]:
                key = bucket.get("key")
                if key:
                    count_over_time["data"][key] = bucket.get("doc_count")
        except KeyError:
            pass

        # Get number of matching documents per index.
        count_per_index = {}
        try:
            for bucket in result["aggregations"]["indices"]["buckets"]:
                key = bucket.get("key")
                if key:
                    count_per_index[key] = bucket.get("doc_count")
        except KeyError:
            pass

        # Get number of matching documents per timeline.
        count_per_timeline = {}
        try:
            for bucket in result["aggregations"]["timelines"]["buckets"]:
                key = bucket.get("key")
                if key:
                    count_per_timeline[key] = bucket.get("doc_count")
        except KeyError:
            pass

        # Total count for query regardless of returned results.
        count_total_complete = sum(count_per_index.values())

        comments = {}
        if "comment" in return_fields:
            events = Event.query.filter_by(sketch=sketch).all()
            for event in events:
                for comment in event.comments:
                    comments.setdefault(event.document_id, [])
                    comments[event.document_id].append(comment.comment)

        # Get labels for each event that matches the sketch.
        # Remove all other labels.
        for event in result["hits"]["hits"]:
            event["selected"] = False
            event["_source"]["label"] = []
            try:
                for label in event["_source"]["timesketch_label"]:
                    if sketch.id != label["sketch_id"]:
                        continue
                    event["_source"]["label"].append(label["name"])
                del event["_source"]["timesketch_label"]
            except KeyError:
                pass

            if "comment" in return_fields:
                event["_source"]["comment"] = comments.get(event["_id"], [])

        # Update or create user state view. This is used in the UI to let
        # the user get back to the last state in the explore view.
        # TODO: Deprecate this and change how last activity is determined, e.g
        # use the new Search History feature instead.
        view = View.get_or_create(user=current_user, sketch=sketch, name="")
        view.update_modification_time()
        view.query_string = form.query.data
        view.query_filter = json.dumps(query_filter, ensure_ascii=False)
        view.query_dsl = json.dumps(query_dsl, ensure_ascii=False)
        db_session.add(view)
        db_session.commit()

        # Search History
        search_node = None
        new_search = SearchHistory(user=current_user, sketch=sketch)

        if parent:
            previous_search = SearchHistory.query.get(parent)
        else:
            previous_search = (SearchHistory.query.filter_by(
                user=current_user,
                sketch=sketch).order_by(SearchHistory.id.desc()).first())

        if not incognito:
            is_same_query = False
            is_same_filter = False

            new_search.query_string = form.query.data
            new_search.query_filter = json.dumps(query_filter,
                                                 ensure_ascii=False)

            new_search.query_result_count = count_total_complete
            new_search.query_time = result["took"]

            if previous_search:
                new_search.parent = previous_search

                new_query = new_search.query_string
                new_filter = new_search.query_filter
                previous_query = previous_search.query_string
                previous_filter = previous_search.query_filter

                is_same_query = previous_query == new_query
                is_same_filter = previous_filter == new_filter

            if not all([is_same_query, is_same_filter]):
                db_session.add(new_search)
                db_session.commit()
                # Create metric if user creates a new branch.
                if new_search.parent:
                    if len(new_search.parent.children) > 1:
                        METRICS["searchhistory"].labels(action="branch").inc()
            else:
                METRICS["searchhistory"].labels(
                    action="ignore_same_query").inc()
        else:
            METRICS["searchhistory"].labels(action="incognito").inc()

        search_node = new_search if new_search.id else previous_search

        if not search_node:
            abort(HTTP_STATUS_CODE_BAD_REQUEST, "Unable to save search")

        search_node = search_node.build_tree(search_node, {}, recurse=False)

        # Add metadata for the query result. This is used by the UI to
        # render the event correctly and to display timing and hit count
        # information.
        tl_colors = {}
        tl_names = {}
        for timeline in sketch.timelines:
            tl_colors[timeline.searchindex.index_name] = timeline.color
            tl_names[timeline.searchindex.index_name] = timeline.name

        meta = {
            "es_time": result["took"],
            "es_total_count": result["hits"]["total"],
            "es_total_count_complete": count_total_complete,
            "timeline_colors": tl_colors,
            "timeline_names": tl_names,
            "count_per_index": count_per_index,
            "count_per_timeline": count_per_timeline,
            "count_over_time": count_over_time,
            "scroll_id": result.get("_scroll_id", ""),
            "search_node": search_node,
        }

        # Elasticsearch version 7.x returns total hits as a dictionary.
        # TODO: Refactor when version 6.x has been deprecated.
        if isinstance(meta["es_total_count"], dict):
            meta["es_total_count"] = meta["es_total_count"].get("value", 0)

        schema = {"meta": meta, "objects": result["hits"]["hits"]}
        return jsonify(schema)
Exemple #10
0
    def post(self, sketch_id):
        """Handles POST request to the resource.

        Handler for /api/v1/sketches/<int:sketch_id>/aggregation/explore/

        Args:
            sketch_id: Integer primary key for a sketch database model

        Returns:
            JSON with aggregation results
        """
        form = forms.AggregationExploreForm.build(request)
        if not form.validate_on_submit():
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                "Not able to run aggregation, unable to validate form data.",
            )

        sketch = Sketch.query.get_with_acl(sketch_id)
        if not sketch:
            abort(HTTP_STATUS_CODE_NOT_FOUND, "No sketch found with this ID.")

        if not sketch.has_permission(current_user, "read"):
            abort(
                HTTP_STATUS_CODE_FORBIDDEN,
                "User does not have read access controls on sketch.",
            )

        if sketch.get_status.status == "archived":
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                "Not able to run aggregation on an archived sketch.",
            )

        sketch_indices = {
            t.searchindex.index_name
            for t in sketch.timelines
            if t.get_status.status.lower() == "ready"
        }

        aggregation_dsl = form.aggregation_dsl.data
        aggregator_name = form.aggregator_name.data

        if aggregator_name:
            if isinstance(form.aggregator_parameters.data, dict):
                aggregator_parameters = form.aggregator_parameters.data
            else:
                aggregator_parameters = json.loads(form.aggregator_parameters.data)

            agg_class = aggregator_manager.AggregatorManager.get_aggregator(
                aggregator_name
            )
            if not agg_class:
                return {}
            if not aggregator_parameters:
                aggregator_parameters = {}

            indices = aggregator_parameters.pop("index", sketch_indices)
            indices, timeline_ids = lib_utils.get_validated_indices(indices, sketch)

            if not (indices or timeline_ids):
                abort(HTTP_STATUS_CODE_BAD_REQUEST, "No indices to aggregate on")

            aggregator = agg_class(
                sketch_id=sketch_id, indices=indices, timeline_ids=timeline_ids
            )

            chart_type = aggregator_parameters.pop("supported_charts", None)
            chart_color = aggregator_parameters.pop("chart_color", "")
            chart_title = aggregator_parameters.pop(
                "chart_title", aggregator.chart_title
            )

            time_before = time.time()
            try:
                result_obj = aggregator.run(**aggregator_parameters)
            except NotFoundError:
                abort(
                    HTTP_STATUS_CODE_NOT_FOUND,
                    "Attempting to run an aggregation on a non-existing "
                    "index, index: {0:s} and parameters: {1!s}".format(
                        ",".join(indices), aggregator_parameters
                    ),
                )
            except ValueError as exc:
                abort(
                    HTTP_STATUS_CODE_BAD_REQUEST,
                    "Unable to run the aggregation, with error: {0!s}".format(exc),
                )
            time_after = time.time()

            aggregator_description = aggregator.describe

            buckets = result_obj.to_dict()
            buckets["buckets"] = buckets.pop("values")
            result = {"aggregation_result": {aggregator_name: buckets}}
            meta = {
                "method": "aggregator_run",
                "chart_type": chart_type,
                "name": aggregator_description.get("name"),
                "description": aggregator_description.get("description"),
                "es_time": time_after - time_before,
            }

            if chart_type:
                meta["vega_spec"] = result_obj.to_chart(
                    chart_name=chart_type, chart_title=chart_title, color=chart_color
                )
                meta["vega_chart_title"] = chart_title

        elif aggregation_dsl:
            # pylint: disable=unexpected-keyword-arg
            result = self.datastore.client.search(
                index=",".join(sketch_indices), body=aggregation_dsl, size=0
            )

            meta = {
                "es_time": result.get("took", 0),
                "es_total_count": result.get("hits", {}).get("total", 0),
                "timed_out": result.get("timed_out", False),
                "method": "aggregator_query",
                "max_score": result.get("hits", {}).get("max_score", 0.0),
            }
        else:
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                "An aggregation DSL or a name for an aggregator name needs "
                "to be provided!",
            )

        result_keys = set(result.keys()) - self.REMOVE_FIELDS
        objects = [result[key] for key in result_keys]
        schema = {"meta": meta, "objects": objects}

        # Update the last activity of a sketch.
        utils.update_sketch_last_activity(sketch)

        return jsonify(schema)