def test_get_validated_indices(self): """Test for validating indices.""" sketch = self.sketch1 sketch_indices = [t.searchindex.index_name for t in sketch.timelines] valid_indices = ['test'] invalid_indices = ['test', 'fail'] self.assertListEqual(sketch_indices, get_validated_indices(valid_indices, sketch_indices)) self.assertFalse('fail' in get_validated_indices( invalid_indices, sketch_indices))
def test_get_validated_indices(self): """Test for validating indices.""" sketch = self.sketch1 sketch_indices = [t.searchindex.index_name for t in sketch.timelines] valid_indices = [u'test'] invalid_indices = [u'test', u'fail'] self.assertListEqual( sketch_indices, get_validated_indices(valid_indices, sketch_indices)) self.assertFalse( u'fail' in get_validated_indices(invalid_indices, sketch_indices))
def test_get_validated_indices(self): """Test for validating indices.""" sketch = self.sketch1 sketch_indices = [t.searchindex.index_name for t in sketch.timelines] valid_indices = ["test"] invalid_indices = ["test", "fail"] test_indices, _ = get_validated_indices(valid_indices, sketch) self.assertListEqual(sketch_indices, test_indices) test_indices, _ = get_validated_indices(invalid_indices, sketch) self.assertFalse("fail" in test_indices)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/:sketch_id/aggregation/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with aggregation results """ sketch = Sketch.query.get_with_acl(sketch_id) form = AggregationForm.build(request) if form.validate_on_submit(): query_filter = form.filter.data query_dsl = form.dsl.data sketch_indices = [ t.searchindex.index_name for t in sketch.timelines ] indices = query_filter.get(u'indices', sketch_indices) # If _all in indices then execute the query on all indices if u'_all' in indices: indices = sketch_indices # Make sure that the indices in the filter are part of the sketch. # This will also remove any deleted timeline from the search result. indices = get_validated_indices(indices, sketch_indices) # Make sure we have a query string or star filter if not (form.query.data, query_filter.get(u'star'), query_filter.get(u'events')): abort(HTTP_STATUS_CODE_BAD_REQUEST) result = [] if form.aggtype.data == u'heatmap': result = heatmap( es_client=self.datastore, sketch_id=sketch_id, query_string=form.query.data, query_filter=query_filter, query_dsl=query_dsl, indices=indices) elif form.aggtype.data == u'histogram': result = histogram( es_client=self.datastore, sketch_id=sketch_id, query_string=form.query.data, query_filter=query_filter, query_dsl=query_dsl, indices=indices) else: abort(HTTP_STATUS_CODE_BAD_REQUEST) schema = {u'objects': result} return jsonify(schema) return abort(HTTP_STATUS_CODE_BAD_REQUEST)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/:sketch_id/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with list of matched events """ sketch = Sketch.query.get_with_acl(sketch_id) if not sketch: abort(HTTP_STATUS_CODE_NOT_FOUND, 'No sketch found with this ID.') if not sketch.has_permission(current_user, 'read'): abort(HTTP_STATUS_CODE_FORBIDDEN, 'User does not have read access controls on sketch.') if sketch.get_status.status == 'archived': abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to query on an archived sketch.') form = forms.ExploreForm.build(request) if not form.validate_on_submit(): abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to explore data, unable to validate form data') # TODO: Remove form and use json instead. query_dsl = form.dsl.data enable_scroll = form.enable_scroll.data scroll_id = form.scroll_id.data file_name = form.file_name.data query_filter = request.json.get('filter', {}) return_field_string = form.fields.data if return_field_string: return_fields = [x.strip() for x in return_field_string.split(',')] else: return_fields = query_filter.get('fields', []) return_fields = [field['field'] for field in return_fields] return_fields.extend(DEFAULT_SOURCE_FIELDS) sketch_indices = { t.searchindex.index_name for t in sketch.timelines if t.get_status.status.lower() == 'ready' } if not query_filter: query_filter = {} indices = query_filter.get('indices', sketch_indices) # If _all in indices then execute the query on all indices if '_all' in indices: indices = sketch_indices # Make sure that the indices in the filter are part of the sketch. # This will also remove any deleted timeline from the search result. indices = get_validated_indices(indices, sketch_indices) # Make sure we have a query string or star filter if not (form.query.data, query_filter.get('star'), query_filter.get('events'), query_dsl): abort( HTTP_STATUS_CODE_BAD_REQUEST, 'The request needs a query string/DSL and or a star filter.') # Aggregate hit count per index. index_stats_agg = {"indices": {"terms": {"field": "_index"}}} if file_name: file_object = io.BytesIO() form_data = { 'created_at': datetime.datetime.utcnow().isoformat(), 'created_by': current_user.username, 'sketch': sketch_id, 'query': form.query.data, 'query_dsl': query_dsl, 'query_filter': query_filter, 'return_fields': return_fields, } with zipfile.ZipFile(file_object, mode='w') as zip_file: zip_file.writestr('METADATA', data=json.dumps(form_data)) fh = export.query_to_filehandle(query_string=form.query.data, query_dsl=query_dsl, query_filter=query_filter, indices=indices, sketch=sketch, datastore=self.datastore) fh.seek(0) zip_file.writestr('query_results.csv', fh.read()) file_object.seek(0) return send_file(file_object, mimetype='zip', attachment_filename=file_name) if scroll_id: # pylint: disable=unexpected-keyword-arg result = self.datastore.client.scroll(scroll_id=scroll_id, scroll='1m') else: try: result = self.datastore.search(sketch_id, form.query.data, query_filter, query_dsl, indices, aggregations=index_stats_agg, return_fields=return_fields, enable_scroll=enable_scroll) except ValueError as e: abort(HTTP_STATUS_CODE_BAD_REQUEST, e) # Get number of matching documents per index. count_per_index = {} try: for bucket in result['aggregations']['indices']['buckets']: key = bucket.get('key') if key: count_per_index[key] = bucket.get('doc_count') except KeyError: pass # Get labels for each event that matches the sketch. # Remove all other labels. for event in result['hits']['hits']: event['selected'] = False event['_source']['label'] = [] try: for label in event['_source']['timesketch_label']: if sketch.id != label['sketch_id']: continue event['_source']['label'].append(label['name']) del event['_source']['timesketch_label'] except KeyError: pass # Update or create user state view. This is used in the UI to let # the user get back to the last state in the explore view. # TODO: Add a call to utils.update_sketch_last_activity once new # mechanism has been added, instead of relying on user views. view = View.get_or_create(user=current_user, sketch=sketch, name='') view.update_modification_time() view.query_string = form.query.data view.query_filter = json.dumps(query_filter, ensure_ascii=False) view.query_dsl = json.dumps(query_dsl, ensure_ascii=False) db_session.add(view) db_session.commit() # Add metadata for the query result. This is used by the UI to # render the event correctly and to display timing and hit count # information. tl_colors = {} tl_names = {} for timeline in sketch.timelines: tl_colors[timeline.searchindex.index_name] = timeline.color tl_names[timeline.searchindex.index_name] = timeline.name meta = { 'es_time': result['took'], 'es_total_count': result['hits']['total'], 'timeline_colors': tl_colors, 'timeline_names': tl_names, 'count_per_index': count_per_index, 'scroll_id': result.get('_scroll_id', ''), } # Elasticsearch version 7.x returns total hits as a dictionary. # TODO: Refactor when version 6.x has been deprecated. if isinstance(meta['es_total_count'], dict): meta['es_total_count'] = meta['es_total_count'].get('value', 0) schema = {'meta': meta, 'objects': result['hits']['hits']} return jsonify(schema)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/:sketch_id/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with list of matched events """ sketch = Sketch.query.get_with_acl(sketch_id) if not sketch: abort(HTTP_STATUS_CODE_NOT_FOUND, 'No sketch found with this ID.') if not sketch.has_permission(current_user, 'read'): abort(HTTP_STATUS_CODE_FORBIDDEN, 'User does not have read access controls on sketch.') if sketch.get_status.status == 'archived': abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to query on an archived sketch.') form = forms.ExploreForm.build(request) if not form.validate_on_submit(): abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to explore data, unable to validate form data') # TODO: Remove form and use json instead. query_dsl = form.dsl.data enable_scroll = form.enable_scroll.data scroll_id = form.scroll_id.data file_name = form.file_name.data count = bool(form.count.data) query_filter = request.json.get('filter', {}) parent = request.json.get('parent', None) incognito = request.json.get('incognito', False) return_field_string = form.fields.data if return_field_string: return_fields = [x.strip() for x in return_field_string.split(',')] else: return_fields = query_filter.get('fields', []) return_fields = [field['field'] for field in return_fields] return_fields.extend(DEFAULT_SOURCE_FIELDS) if not query_filter: query_filter = {} all_indices = list( {t.searchindex.index_name for t in sketch.timelines}) indices = query_filter.get('indices', all_indices) # If _all in indices then execute the query on all indices if '_all' in indices: indices = all_indices # Make sure that the indices in the filter are part of the sketch. # This will also remove any deleted timeline from the search result. indices, timeline_ids = get_validated_indices(indices, sketch) # Remove indices that don't exist from search. indices = utils.validate_indices(indices, self.datastore) if not indices: abort( HTTP_STATUS_CODE_BAD_REQUEST, 'No valid search indices were found to perform the search on.') # Make sure we have a query string or star filter if not (form.query.data, query_filter.get('star'), query_filter.get('events'), query_dsl): abort( HTTP_STATUS_CODE_BAD_REQUEST, 'The request needs a query string/DSL and or a star filter.') # Aggregate hit count per index. index_stats_agg = { 'indices': { 'terms': { 'field': '_index', 'min_doc_count': 0, 'size': len(sketch.timelines) } }, 'timelines': { 'terms': { 'field': '__ts_timeline_id', 'min_doc_count': 0, 'size': len(sketch.timelines) } } } if count: # Count operations do not support size parameters. if 'size' in query_filter: _ = query_filter.pop('size') if 'terminate_after' in query_filter: _ = query_filter.pop('terminate_after') try: result = self.datastore.search(sketch_id=sketch_id, query_string=form.query.data, query_filter=query_filter, query_dsl=query_dsl, indices=indices, timeline_ids=timeline_ids, count=True) except ValueError as e: abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e)) # Get number of matching documents per index. schema = {'meta': {'total_count': result}, 'objects': []} return jsonify(schema) if file_name: file_object = io.BytesIO() form_data = { 'created_at': datetime.datetime.utcnow().isoformat(), 'created_by': current_user.username, 'sketch': sketch_id, 'query': form.query.data, 'query_dsl': query_dsl, 'query_filter': query_filter, 'return_fields': return_fields, } with zipfile.ZipFile(file_object, mode='w') as zip_file: zip_file.writestr('METADATA', data=json.dumps(form_data)) fh = export.query_to_filehandle(query_string=form.query.data, query_dsl=query_dsl, query_filter=query_filter, indices=indices, sketch=sketch, datastore=self.datastore, return_fields=return_fields, timeline_ids=timeline_ids) fh.seek(0) zip_file.writestr('query_results.csv', fh.read()) file_object.seek(0) return send_file(file_object, mimetype='zip', attachment_filename=file_name) if scroll_id: # pylint: disable=unexpected-keyword-arg result = self.datastore.client.scroll(scroll_id=scroll_id, scroll='1m') else: try: result = self.datastore.search(sketch_id=sketch_id, query_string=form.query.data, query_filter=query_filter, query_dsl=query_dsl, indices=indices, aggregations=index_stats_agg, return_fields=return_fields, enable_scroll=enable_scroll, timeline_ids=timeline_ids) except ValueError as e: abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e)) # Get number of matching documents per index. count_per_index = {} try: for bucket in result['aggregations']['indices']['buckets']: key = bucket.get('key') if key: count_per_index[key] = bucket.get('doc_count') except KeyError: pass # Get number of matching documents per timeline. count_per_timeline = {} try: for bucket in result['aggregations']['timelines']['buckets']: key = bucket.get('key') if key: count_per_timeline[key] = bucket.get('doc_count') except KeyError: pass # Total count for query regardless of returned results. count_total_complete = sum(count_per_index.values()) comments = {} if 'comment' in return_fields: events = Event.query.filter_by(sketch=sketch).all() for event in events: for comment in event.comments: comments.setdefault(event.document_id, []) comments[event.document_id].append(comment.comment) # Get labels for each event that matches the sketch. # Remove all other labels. for event in result['hits']['hits']: event['selected'] = False event['_source']['label'] = [] try: for label in event['_source']['timesketch_label']: if sketch.id != label['sketch_id']: continue event['_source']['label'].append(label['name']) del event['_source']['timesketch_label'] except KeyError: pass if 'comment' in return_fields: event['_source']['comment'] = comments.get(event['_id'], []) # Update or create user state view. This is used in the UI to let # the user get back to the last state in the explore view. # TODO: Deprecate this and change how last activity is determined, e.g # use the new Search History feature instead. view = View.get_or_create(user=current_user, sketch=sketch, name='') view.update_modification_time() view.query_string = form.query.data view.query_filter = json.dumps(query_filter, ensure_ascii=False) view.query_dsl = json.dumps(query_dsl, ensure_ascii=False) db_session.add(view) db_session.commit() # Search History search_node = None new_search = SearchHistory(user=current_user, sketch=sketch) if parent: previous_search = SearchHistory.query.get(parent) else: previous_search = SearchHistory.query.filter_by( user=current_user, sketch=sketch).order_by(SearchHistory.id.desc()).first() if not incognito: is_same_query = False is_same_filter = False new_search.query_string = form.query.data new_search.query_filter = json.dumps(query_filter, ensure_ascii=False) new_search.query_result_count = count_total_complete new_search.query_time = result['took'] if previous_search: new_search.parent = previous_search new_query = new_search.query_string new_filter = new_search.query_filter previous_query = previous_search.query_string previous_filter = previous_search.query_filter is_same_query = previous_query == new_query is_same_filter = previous_filter == new_filter if not all([is_same_query, is_same_filter]): db_session.add(new_search) db_session.commit() # Create metric if user creates a new branch. if new_search.parent: if len(new_search.parent.children) > 1: METRICS['searchhistory'].labels(action='branch').inc() else: METRICS['searchhistory'].labels( action='ignore_same_query').inc() else: METRICS['searchhistory'].labels(action='incognito').inc() search_node = new_search if new_search.id else previous_search if not search_node: abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to save search') search_node = search_node.build_tree(search_node, {}, recurse=False) # Add metadata for the query result. This is used by the UI to # render the event correctly and to display timing and hit count # information. tl_colors = {} tl_names = {} for timeline in sketch.timelines: tl_colors[timeline.searchindex.index_name] = timeline.color tl_names[timeline.searchindex.index_name] = timeline.name meta = { 'es_time': result['took'], 'es_total_count': result['hits']['total'], 'es_total_count_complete': count_total_complete, 'timeline_colors': tl_colors, 'timeline_names': tl_names, 'count_per_index': count_per_index, 'count_per_timeline': count_per_timeline, 'scroll_id': result.get('_scroll_id', ''), 'search_node': search_node } # Elasticsearch version 7.x returns total hits as a dictionary. # TODO: Refactor when version 6.x has been deprecated. if isinstance(meta['es_total_count'], dict): meta['es_total_count'] = meta['es_total_count'].get('value', 0) schema = {'meta': meta, 'objects': result['hits']['hits']} return jsonify(schema)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/<int:sketch_id>/aggregation/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with aggregation results """ form = forms.AggregationExploreForm.build(request) if not form.validate_on_submit(): abort( HTTP_STATUS_CODE_BAD_REQUEST, 'Not able to run aggregation, unable to validate form data.') sketch = Sketch.query.get_with_acl(sketch_id) if not sketch: abort( HTTP_STATUS_CODE_NOT_FOUND, 'No sketch found with this ID.') if not sketch.has_permission(current_user, 'read'): abort(HTTP_STATUS_CODE_FORBIDDEN, 'User does not have read access controls on sketch.') if sketch.get_status.status == 'archived': abort( HTTP_STATUS_CODE_BAD_REQUEST, 'Not able to run aggregation on an archived sketch.') sketch_indices = { t.searchindex.index_name for t in sketch.timelines if t.get_status.status.lower() == 'ready' } aggregation_dsl = form.aggregation_dsl.data aggregator_name = form.aggregator_name.data if aggregator_name: if isinstance(form.aggregator_parameters.data, dict): aggregator_parameters = form.aggregator_parameters.data else: aggregator_parameters = json.loads( form.aggregator_parameters.data) agg_class = aggregator_manager.AggregatorManager.get_aggregator( aggregator_name) if not agg_class: return {} if not aggregator_parameters: aggregator_parameters = {} indices = aggregator_parameters.pop('index', sketch_indices) indices, timeline_ids = lib_utils.get_validated_indices( indices, sketch) aggregator = agg_class( sketch_id=sketch_id, indices=indices, timeline_ids=timeline_ids) chart_type = aggregator_parameters.pop('supported_charts', None) chart_color = aggregator_parameters.pop('chart_color', '') chart_title = aggregator_parameters.pop( 'chart_title', aggregator.chart_title) time_before = time.time() try: result_obj = aggregator.run(**aggregator_parameters) except NotFoundError: abort( HTTP_STATUS_CODE_NOT_FOUND, 'Attempting to run an aggregation on a non-existing ' 'Elastic index, index: {0:s} and parameters: {1!s}'.format( ','.join(indices), aggregator_parameters)) except ValueError as exc: abort( HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to run the aggregation, with error: {0!s}'.format( exc)) time_after = time.time() aggregator_description = aggregator.describe buckets = result_obj.to_dict() buckets['buckets'] = buckets.pop('values') result = { 'aggregation_result': { aggregator_name: buckets } } meta = { 'method': 'aggregator_run', 'chart_type': chart_type, 'name': aggregator_description.get('name'), 'description': aggregator_description.get('description'), 'es_time': time_after - time_before, } if chart_type: meta['vega_spec'] = result_obj.to_chart( chart_name=chart_type, chart_title=chart_title, color=chart_color) meta['vega_chart_title'] = chart_title elif aggregation_dsl: # pylint: disable=unexpected-keyword-arg result = self.datastore.client.search( index=','.join(sketch_indices), body=aggregation_dsl, size=0) meta = { 'es_time': result.get('took', 0), 'es_total_count': result.get('hits', {}).get('total', 0), 'timed_out': result.get('timed_out', False), 'method': 'aggregator_query', 'max_score': result.get('hits', {}).get('max_score', 0.0) } else: abort( HTTP_STATUS_CODE_BAD_REQUEST, 'An aggregation DSL or a name for an aggregator name needs ' 'to be provided!') result_keys = set(result.keys()) - self.REMOVE_FIELDS objects = [result[key] for key in result_keys] schema = {'meta': meta, 'objects': objects} # Update the last activity of a sketch. utils.update_sketch_last_activity(sketch) return jsonify(schema)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/:sketch_id/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with list of matched events """ sketch = Sketch.query.get_with_acl(sketch_id) form = ExploreForm.build(request) if form.validate_on_submit(): query_dsl = form.dsl.data query_filter = form.filter.data sketch_indices = { t.searchindex.index_name for t in sketch.timelines } indices = query_filter.get(u'indices', sketch_indices) # If _all in indices then execute the query on all indices if u'_all' in indices: indices = sketch_indices # Make sure that the indices in the filter are part of the sketch. # This will also remove any deleted timeline from the search result. indices = get_validated_indices(indices, sketch_indices) # Make sure we have a query string or star filter if not (form.query.data, query_filter.get(u'star'), query_filter.get(u'events'), query_dsl): abort(HTTP_STATUS_CODE_BAD_REQUEST) result = self.datastore.search(sketch_id, form.query.data, query_filter, query_dsl, indices, aggregations=None, return_results=True) # Get labels for each event that matches the sketch. # Remove all other labels. for event in result[u'hits'][u'hits']: event[u'selected'] = False event[u'_source'][u'label'] = [] try: for label in event[u'_source'][u'timesketch_label']: if sketch.id != label[u'sketch_id']: continue event[u'_source'][u'label'].append(label[u'name']) del event[u'_source'][u'timesketch_label'] except KeyError: pass # Update or create user state view. This is used in the UI to let # the user get back to the last state in the explore view. view = View.get_or_create(user=current_user, sketch=sketch, name=u'') view.query_string = form.query.data view.query_filter = json.dumps(query_filter, ensure_ascii=False) view.query_dsl = json.dumps(query_dsl, ensure_ascii=False) db_session.add(view) db_session.commit() # Add metadata for the query result. This is used by the UI to # render the event correctly and to display timing and hit count # information. tl_colors = {} tl_names = {} for timeline in sketch.timelines: tl_colors[timeline.searchindex.index_name] = timeline.color tl_names[timeline.searchindex.index_name] = timeline.name meta = { u'es_time': result[u'took'], u'es_total_count': result[u'hits'][u'total'], u'timeline_colors': tl_colors, u'timeline_names': tl_names, } schema = {u'meta': meta, u'objects': result[u'hits'][u'hits']} return jsonify(schema) return abort(HTTP_STATUS_CODE_BAD_REQUEST)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/:sketch_id/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with list of matched events """ sketch = Sketch.query.get_with_acl(sketch_id) if not sketch: abort(HTTP_STATUS_CODE_NOT_FOUND, "No sketch found with this ID.") if not sketch.has_permission(current_user, "read"): abort( HTTP_STATUS_CODE_FORBIDDEN, "User does not have read access controls on sketch.", ) if sketch.get_status.status == "archived": abort(HTTP_STATUS_CODE_BAD_REQUEST, "Unable to query on an archived sketch.") form = forms.ExploreForm.build(request) if not form.validate_on_submit(): abort( HTTP_STATUS_CODE_BAD_REQUEST, "Unable to explore data, unable to validate form data", ) # TODO: Remove form and use json instead. query_dsl = form.dsl.data enable_scroll = form.enable_scroll.data scroll_id = form.scroll_id.data file_name = form.file_name.data count = bool(form.count.data) query_filter = request.json.get("filter", {}) parent = request.json.get("parent", None) incognito = request.json.get("incognito", False) return_field_string = form.fields.data if return_field_string: return_fields = [x.strip() for x in return_field_string.split(",")] else: return_fields = query_filter.get("fields", []) return_fields = [field["field"] for field in return_fields] return_fields.extend(DEFAULT_SOURCE_FIELDS) if not query_filter: query_filter = {} all_indices = list( {t.searchindex.index_name for t in sketch.timelines}) indices = query_filter.get("indices", all_indices) # If _all in indices then execute the query on all indices if "_all" in indices: indices = all_indices # Make sure that the indices in the filter are part of the sketch. # This will also remove any deleted timeline from the search result. indices, timeline_ids = get_validated_indices(indices, sketch) # Remove indices that don't exist from search. indices = utils.validate_indices(indices, self.datastore) if not indices: abort( HTTP_STATUS_CODE_BAD_REQUEST, "No valid search indices were found to perform the search on.", ) # Make sure we have a query string or star filter if not ( form.query.data, query_filter.get("star"), query_filter.get("events"), query_dsl, ): abort( HTTP_STATUS_CODE_BAD_REQUEST, "The request needs a query string/DSL and or a star filter.", ) # Aggregate hit count per index. index_stats_agg = { "indices": { "terms": { "field": "_index", "min_doc_count": 0, "size": len(sketch.timelines), } }, "timelines": { "terms": { "field": "__ts_timeline_id", "min_doc_count": 0, "size": len(sketch.timelines), } }, "count_over_time": { "auto_date_histogram": { "field": "datetime", "buckets": 50, } }, } if count: # Count operations do not support size parameters. if "size" in query_filter: _ = query_filter.pop("size") if "terminate_after" in query_filter: _ = query_filter.pop("terminate_after") try: result = self.datastore.search( sketch_id=sketch_id, query_string=form.query.data, query_filter=query_filter, query_dsl=query_dsl, indices=indices, timeline_ids=timeline_ids, count=True, ) except ValueError as e: abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e)) # Get number of matching documents per index. schema = {"meta": {"total_count": result}, "objects": []} return jsonify(schema) if file_name: file_object = io.BytesIO() form_data = { "created_at": datetime.datetime.utcnow().isoformat(), "created_by": current_user.username, "sketch": sketch_id, "query": form.query.data, "query_dsl": query_dsl, "query_filter": query_filter, "return_fields": return_fields, } with zipfile.ZipFile(file_object, mode="w") as zip_file: zip_file.writestr("METADATA", data=json.dumps(form_data)) fh = export.query_to_filehandle( query_string=form.query.data, query_dsl=query_dsl, query_filter=query_filter, indices=indices, sketch=sketch, datastore=self.datastore, return_fields=return_fields, timeline_ids=timeline_ids, ) fh.seek(0) zip_file.writestr("query_results.csv", fh.read()) file_object.seek(0) return send_file(file_object, mimetype="zip", attachment_filename=file_name) if scroll_id: # pylint: disable=unexpected-keyword-arg result = self.datastore.client.scroll(scroll_id=scroll_id, scroll="1m") else: try: result = self.datastore.search( sketch_id=sketch_id, query_string=form.query.data, query_filter=query_filter, query_dsl=query_dsl, indices=indices, aggregations=index_stats_agg, return_fields=return_fields, enable_scroll=enable_scroll, timeline_ids=timeline_ids, ) except ValueError as e: abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e)) # Get number of matching documents over time. histogram_interval = (result.get("aggregations", {}).get("count_over_time", {}).get("interval", "")) count_over_time = {"data": {}, "interval": histogram_interval} try: for bucket in result["aggregations"]["count_over_time"]["buckets"]: key = bucket.get("key") if key: count_over_time["data"][key] = bucket.get("doc_count") except KeyError: pass # Get number of matching documents per index. count_per_index = {} try: for bucket in result["aggregations"]["indices"]["buckets"]: key = bucket.get("key") if key: count_per_index[key] = bucket.get("doc_count") except KeyError: pass # Get number of matching documents per timeline. count_per_timeline = {} try: for bucket in result["aggregations"]["timelines"]["buckets"]: key = bucket.get("key") if key: count_per_timeline[key] = bucket.get("doc_count") except KeyError: pass # Total count for query regardless of returned results. count_total_complete = sum(count_per_index.values()) comments = {} if "comment" in return_fields: events = Event.query.filter_by(sketch=sketch).all() for event in events: for comment in event.comments: comments.setdefault(event.document_id, []) comments[event.document_id].append(comment.comment) # Get labels for each event that matches the sketch. # Remove all other labels. for event in result["hits"]["hits"]: event["selected"] = False event["_source"]["label"] = [] try: for label in event["_source"]["timesketch_label"]: if sketch.id != label["sketch_id"]: continue event["_source"]["label"].append(label["name"]) del event["_source"]["timesketch_label"] except KeyError: pass if "comment" in return_fields: event["_source"]["comment"] = comments.get(event["_id"], []) # Update or create user state view. This is used in the UI to let # the user get back to the last state in the explore view. # TODO: Deprecate this and change how last activity is determined, e.g # use the new Search History feature instead. view = View.get_or_create(user=current_user, sketch=sketch, name="") view.update_modification_time() view.query_string = form.query.data view.query_filter = json.dumps(query_filter, ensure_ascii=False) view.query_dsl = json.dumps(query_dsl, ensure_ascii=False) db_session.add(view) db_session.commit() # Search History search_node = None new_search = SearchHistory(user=current_user, sketch=sketch) if parent: previous_search = SearchHistory.query.get(parent) else: previous_search = (SearchHistory.query.filter_by( user=current_user, sketch=sketch).order_by(SearchHistory.id.desc()).first()) if not incognito: is_same_query = False is_same_filter = False new_search.query_string = form.query.data new_search.query_filter = json.dumps(query_filter, ensure_ascii=False) new_search.query_result_count = count_total_complete new_search.query_time = result["took"] if previous_search: new_search.parent = previous_search new_query = new_search.query_string new_filter = new_search.query_filter previous_query = previous_search.query_string previous_filter = previous_search.query_filter is_same_query = previous_query == new_query is_same_filter = previous_filter == new_filter if not all([is_same_query, is_same_filter]): db_session.add(new_search) db_session.commit() # Create metric if user creates a new branch. if new_search.parent: if len(new_search.parent.children) > 1: METRICS["searchhistory"].labels(action="branch").inc() else: METRICS["searchhistory"].labels( action="ignore_same_query").inc() else: METRICS["searchhistory"].labels(action="incognito").inc() search_node = new_search if new_search.id else previous_search if not search_node: abort(HTTP_STATUS_CODE_BAD_REQUEST, "Unable to save search") search_node = search_node.build_tree(search_node, {}, recurse=False) # Add metadata for the query result. This is used by the UI to # render the event correctly and to display timing and hit count # information. tl_colors = {} tl_names = {} for timeline in sketch.timelines: tl_colors[timeline.searchindex.index_name] = timeline.color tl_names[timeline.searchindex.index_name] = timeline.name meta = { "es_time": result["took"], "es_total_count": result["hits"]["total"], "es_total_count_complete": count_total_complete, "timeline_colors": tl_colors, "timeline_names": tl_names, "count_per_index": count_per_index, "count_per_timeline": count_per_timeline, "count_over_time": count_over_time, "scroll_id": result.get("_scroll_id", ""), "search_node": search_node, } # Elasticsearch version 7.x returns total hits as a dictionary. # TODO: Refactor when version 6.x has been deprecated. if isinstance(meta["es_total_count"], dict): meta["es_total_count"] = meta["es_total_count"].get("value", 0) schema = {"meta": meta, "objects": result["hits"]["hits"]} return jsonify(schema)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/<int:sketch_id>/aggregation/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with aggregation results """ form = forms.AggregationExploreForm.build(request) if not form.validate_on_submit(): abort( HTTP_STATUS_CODE_BAD_REQUEST, "Not able to run aggregation, unable to validate form data.", ) sketch = Sketch.query.get_with_acl(sketch_id) if not sketch: abort(HTTP_STATUS_CODE_NOT_FOUND, "No sketch found with this ID.") if not sketch.has_permission(current_user, "read"): abort( HTTP_STATUS_CODE_FORBIDDEN, "User does not have read access controls on sketch.", ) if sketch.get_status.status == "archived": abort( HTTP_STATUS_CODE_BAD_REQUEST, "Not able to run aggregation on an archived sketch.", ) sketch_indices = { t.searchindex.index_name for t in sketch.timelines if t.get_status.status.lower() == "ready" } aggregation_dsl = form.aggregation_dsl.data aggregator_name = form.aggregator_name.data if aggregator_name: if isinstance(form.aggregator_parameters.data, dict): aggregator_parameters = form.aggregator_parameters.data else: aggregator_parameters = json.loads(form.aggregator_parameters.data) agg_class = aggregator_manager.AggregatorManager.get_aggregator( aggregator_name ) if not agg_class: return {} if not aggregator_parameters: aggregator_parameters = {} indices = aggregator_parameters.pop("index", sketch_indices) indices, timeline_ids = lib_utils.get_validated_indices(indices, sketch) if not (indices or timeline_ids): abort(HTTP_STATUS_CODE_BAD_REQUEST, "No indices to aggregate on") aggregator = agg_class( sketch_id=sketch_id, indices=indices, timeline_ids=timeline_ids ) chart_type = aggregator_parameters.pop("supported_charts", None) chart_color = aggregator_parameters.pop("chart_color", "") chart_title = aggregator_parameters.pop( "chart_title", aggregator.chart_title ) time_before = time.time() try: result_obj = aggregator.run(**aggregator_parameters) except NotFoundError: abort( HTTP_STATUS_CODE_NOT_FOUND, "Attempting to run an aggregation on a non-existing " "index, index: {0:s} and parameters: {1!s}".format( ",".join(indices), aggregator_parameters ), ) except ValueError as exc: abort( HTTP_STATUS_CODE_BAD_REQUEST, "Unable to run the aggregation, with error: {0!s}".format(exc), ) time_after = time.time() aggregator_description = aggregator.describe buckets = result_obj.to_dict() buckets["buckets"] = buckets.pop("values") result = {"aggregation_result": {aggregator_name: buckets}} meta = { "method": "aggregator_run", "chart_type": chart_type, "name": aggregator_description.get("name"), "description": aggregator_description.get("description"), "es_time": time_after - time_before, } if chart_type: meta["vega_spec"] = result_obj.to_chart( chart_name=chart_type, chart_title=chart_title, color=chart_color ) meta["vega_chart_title"] = chart_title elif aggregation_dsl: # pylint: disable=unexpected-keyword-arg result = self.datastore.client.search( index=",".join(sketch_indices), body=aggregation_dsl, size=0 ) meta = { "es_time": result.get("took", 0), "es_total_count": result.get("hits", {}).get("total", 0), "timed_out": result.get("timed_out", False), "method": "aggregator_query", "max_score": result.get("hits", {}).get("max_score", 0.0), } else: abort( HTTP_STATUS_CODE_BAD_REQUEST, "An aggregation DSL or a name for an aggregator name needs " "to be provided!", ) result_keys = set(result.keys()) - self.REMOVE_FIELDS objects = [result[key] for key in result_keys] schema = {"meta": meta, "objects": objects} # Update the last activity of a sketch. utils.update_sketch_last_activity(sketch) return jsonify(schema)