def add_view(self, view_name, analyzer_name, query_string=None, query_dsl=None, query_filter=None): """Add saved view to the Sketch. Args: view_name: The name of the view. analyzer_name: The name of the analyzer. query_string: Elasticsearch query string. query_dsl: Dictionary with Elasticsearch DSL query. query_filter: Dictionary with Elasticsearch filters. Raises: ValueError: If both query_string an query_dsl are missing. Returns: An instance of a SQLAlchemy View object. """ if not query_string or query_dsl: raise ValueError('Both query_string and query_dsl are missing.') if not query_filter: query_filter = {'indices': '_all'} name = '[{0:s}] {1:s}'.format(analyzer_name, view_name) view = View.get_or_create(name=name, sketch=self.sql_sketch, user=None) view.query_string = query_string view.query_filter = view.validate_filter(query_filter) view.query_dsl = query_dsl view.searchtemplate = None db_session.add(view) db_session.commit() return view
def update_sketch_last_activity(sketch): """Update the last activity date of a sketch.""" view = View.get_or_create(user=current_user, sketch=sketch, name='') view.update_modification_time() db_session.add(view) db_session.commit()
def add_view( self, view_name, analyzer_name, query_string=None, query_dsl=None, query_filter=None, additional_fields=None, ): """Add saved view to the Sketch. Args: view_name: The name of the view. analyzer_name: The name of the analyzer. query_string: OpenSearch query string. query_dsl: Dictionary with OpenSearch DSL query. query_filter: Dictionary with OpenSearch filters. additional_fields: A list with field names to include in the view output. Raises: ValueError: If both query_string an query_dsl are missing. Returns: An instance of a SQLAlchemy View object. """ if not (query_string or query_dsl): raise ValueError("Both query_string and query_dsl are missing.") if not query_filter: query_filter = {"indices": "_all"} if additional_fields: query_filter["fields"] = [{ "field": x.strip() } for x in additional_fields] description = "analyzer: {0:s}".format(analyzer_name) view = View.get_or_create(name=view_name, description=description, sketch=self.sql_sketch, user=None) view.description = description view.query_string = query_string view.query_filter = view.validate_filter(query_filter) view.query_dsl = query_dsl view.searchtemplate = None view.set_status(status="new") db_session.add(view) db_session.commit() return view
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/:sketch_id/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with list of matched events """ sketch = Sketch.query.get_with_acl(sketch_id) if not sketch: abort(HTTP_STATUS_CODE_NOT_FOUND, 'No sketch found with this ID.') if not sketch.has_permission(current_user, 'read'): abort(HTTP_STATUS_CODE_FORBIDDEN, 'User does not have read access controls on sketch.') if sketch.get_status.status == 'archived': abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to query on an archived sketch.') form = forms.ExploreForm.build(request) if not form.validate_on_submit(): abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to explore data, unable to validate form data') # TODO: Remove form and use json instead. query_dsl = form.dsl.data enable_scroll = form.enable_scroll.data scroll_id = form.scroll_id.data file_name = form.file_name.data query_filter = request.json.get('filter', {}) return_field_string = form.fields.data if return_field_string: return_fields = [x.strip() for x in return_field_string.split(',')] else: return_fields = query_filter.get('fields', []) return_fields = [field['field'] for field in return_fields] return_fields.extend(DEFAULT_SOURCE_FIELDS) sketch_indices = { t.searchindex.index_name for t in sketch.timelines if t.get_status.status.lower() == 'ready' } if not query_filter: query_filter = {} indices = query_filter.get('indices', sketch_indices) # If _all in indices then execute the query on all indices if '_all' in indices: indices = sketch_indices # Make sure that the indices in the filter are part of the sketch. # This will also remove any deleted timeline from the search result. indices = get_validated_indices(indices, sketch_indices) # Make sure we have a query string or star filter if not (form.query.data, query_filter.get('star'), query_filter.get('events'), query_dsl): abort( HTTP_STATUS_CODE_BAD_REQUEST, 'The request needs a query string/DSL and or a star filter.') # Aggregate hit count per index. index_stats_agg = {"indices": {"terms": {"field": "_index"}}} if file_name: file_object = io.BytesIO() form_data = { 'created_at': datetime.datetime.utcnow().isoformat(), 'created_by': current_user.username, 'sketch': sketch_id, 'query': form.query.data, 'query_dsl': query_dsl, 'query_filter': query_filter, 'return_fields': return_fields, } with zipfile.ZipFile(file_object, mode='w') as zip_file: zip_file.writestr('METADATA', data=json.dumps(form_data)) fh = export.query_to_filehandle(query_string=form.query.data, query_dsl=query_dsl, query_filter=query_filter, indices=indices, sketch=sketch, datastore=self.datastore) fh.seek(0) zip_file.writestr('query_results.csv', fh.read()) file_object.seek(0) return send_file(file_object, mimetype='zip', attachment_filename=file_name) if scroll_id: # pylint: disable=unexpected-keyword-arg result = self.datastore.client.scroll(scroll_id=scroll_id, scroll='1m') else: try: result = self.datastore.search(sketch_id, form.query.data, query_filter, query_dsl, indices, aggregations=index_stats_agg, return_fields=return_fields, enable_scroll=enable_scroll) except ValueError as e: abort(HTTP_STATUS_CODE_BAD_REQUEST, e) # Get number of matching documents per index. count_per_index = {} try: for bucket in result['aggregations']['indices']['buckets']: key = bucket.get('key') if key: count_per_index[key] = bucket.get('doc_count') except KeyError: pass # Get labels for each event that matches the sketch. # Remove all other labels. for event in result['hits']['hits']: event['selected'] = False event['_source']['label'] = [] try: for label in event['_source']['timesketch_label']: if sketch.id != label['sketch_id']: continue event['_source']['label'].append(label['name']) del event['_source']['timesketch_label'] except KeyError: pass # Update or create user state view. This is used in the UI to let # the user get back to the last state in the explore view. # TODO: Add a call to utils.update_sketch_last_activity once new # mechanism has been added, instead of relying on user views. view = View.get_or_create(user=current_user, sketch=sketch, name='') view.update_modification_time() view.query_string = form.query.data view.query_filter = json.dumps(query_filter, ensure_ascii=False) view.query_dsl = json.dumps(query_dsl, ensure_ascii=False) db_session.add(view) db_session.commit() # Add metadata for the query result. This is used by the UI to # render the event correctly and to display timing and hit count # information. tl_colors = {} tl_names = {} for timeline in sketch.timelines: tl_colors[timeline.searchindex.index_name] = timeline.color tl_names[timeline.searchindex.index_name] = timeline.name meta = { 'es_time': result['took'], 'es_total_count': result['hits']['total'], 'timeline_colors': tl_colors, 'timeline_names': tl_names, 'count_per_index': count_per_index, 'scroll_id': result.get('_scroll_id', ''), } # Elasticsearch version 7.x returns total hits as a dictionary. # TODO: Refactor when version 6.x has been deprecated. if isinstance(meta['es_total_count'], dict): meta['es_total_count'] = meta['es_total_count'].get('value', 0) schema = {'meta': meta, 'objects': result['hits']['hits']} return jsonify(schema)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/:sketch_id/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with list of matched events """ sketch = Sketch.query.get_with_acl(sketch_id) form = ExploreForm.build(request) if form.validate_on_submit(): query_dsl = form.dsl.data query_filter = form.filter.data sketch_indices = { t.searchindex.index_name for t in sketch.timelines } indices = query_filter.get(u'indices', sketch_indices) # If _all in indices then execute the query on all indices if u'_all' in indices: indices = sketch_indices # Make sure that the indices in the filter are part of the sketch if set(indices) - set(sketch_indices): abort(HTTP_STATUS_CODE_BAD_REQUEST) # Make sure we have a query string or star filter if not (form.query.data, query_filter.get(u'star'), query_filter.get(u'events'), query_dsl): abort(HTTP_STATUS_CODE_BAD_REQUEST) result = self.datastore.search(sketch_id, form.query.data, query_filter, query_dsl, indices, aggregations=None, return_results=True) # Get labels for each event that matches the sketch. # Remove all other labels. for event in result[u'hits'][u'hits']: event[u'selected'] = False event[u'_source'][u'label'] = [] try: for label in event[u'_source'][u'timesketch_label']: if sketch.id != label[u'sketch_id']: continue event[u'_source'][u'label'].append(label[u'name']) del event[u'_source'][u'timesketch_label'] except KeyError: pass # Update or create user state view. This is used in the UI to let # the user get back to the last state in the explore view. view = View.get_or_create(user=current_user, sketch=sketch, name=u'') view.query_string = form.query.data view.query_filter = json.dumps(query_filter, ensure_ascii=False) view.query_dsl = json.dumps(query_dsl, ensure_ascii=False) db_session.add(view) db_session.commit() # Add metadata for the query result. This is used by the UI to # render the event correctly and to display timing and hit count # information. tl_colors = {} tl_names = {} for timeline in sketch.timelines: tl_colors[timeline.searchindex.index_name] = timeline.color tl_names[timeline.searchindex.index_name] = timeline.name try: buckets = result[u'aggregations'][u'field_aggregation'][ u'buckets'] except KeyError: buckets = None es_total_count_unfiltered = 0 if buckets: for bucket in buckets: es_total_count_unfiltered += bucket[u'doc_count'] meta = { u'es_time': result[u'took'], u'es_total_count': result[u'hits'][u'total'], u'es_total_count_unfiltered': es_total_count_unfiltered, u'timeline_colors': tl_colors, u'timeline_names': tl_names, u'histogram': buckets } schema = {u'meta': meta, u'objects': result[u'hits'][u'hits']} return jsonify(schema) return abort(HTTP_STATUS_CODE_BAD_REQUEST)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/:sketch_id/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with list of matched events """ sketch = Sketch.query.get_with_acl(sketch_id) form = ExploreForm.build(request) if form.validate_on_submit(): query_filter = form.filter.data sketch_indices = [ t.searchindex.index_name for t in sketch.timelines] indices = query_filter.get(u'indices', sketch_indices) # Make sure that the indices in the filter are part of the sketch if set(indices) - set(sketch_indices): abort(HTTP_STATUS_CODE_BAD_REQUEST) # Make sure we have a query string or star filter if not form.query.data and not query_filter.get(u'star'): abort(HTTP_STATUS_CODE_BAD_REQUEST) result = self.datastore.search( sketch_id, form.query.data, query_filter, indices) # Get labels for each event that matches the sketch. # Remove all other labels. for event in result[u'hits'][u'hits']: event[u'selected'] = False event[u'_source'][u'label'] = [] try: for label in event[u'_source'][u'timesketch_label']: if sketch.id != label[u'sketch_id']: continue event[u'_source'][u'label'].append(label[u'name']) del event[u'_source'][u'timesketch_label'] except KeyError: pass # Update or create user state view. This is used in the UI to let # the user get back to the last state in the explore view. view = View.get_or_create( user=current_user, sketch=sketch, name=u'') view.query_string = form.query.data view.query_filter = json.dumps(query_filter) db_session.add(view) db_session.commit() # Add metadata for the query result. This is used by the UI to # render the event correctly and to display timing and hit count # information. tl_colors = {} tl_names = {} for timeline in sketch.timelines: tl_colors[timeline.searchindex.index_name] = timeline.color tl_names[timeline.searchindex.index_name] = timeline.name meta = { u'es_time': result[u'took'], u'es_total_count': result[u'hits'][u'total'], u'timeline_colors': tl_colors, u'timeline_names': tl_names } schema = { u'meta': meta, u'objects': result[u'hits'][u'hits'] } return jsonify(schema) return abort(HTTP_STATUS_CODE_BAD_REQUEST)
def get(self, sketch_id): """Handles GET request to the resource. Handler for /api/v1/sketches/:sketch_id/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with list of matched events """ args = self.parser.parse_args() sketch = Sketch.query.get_with_acl(sketch_id) query_filter = json.loads(args.get(u'filter')) sketch_indices = [t.searchindex.index_name for t in sketch.timelines] indices = query_filter.get(u'indices', sketch_indices) # Make sure that the indices in the filter is part of the sketch if set(indices) - set(sketch_indices): abort(HTTP_STATUS_CODE_BAD_REQUEST) # Make sure we have a query string or star filter if not args.get(u'q') and not query_filter.get(u'star'): abort(HTTP_STATUS_CODE_BAD_REQUEST) result = self.datastore.search(sketch_id, args[u'q'], query_filter, indices) # Get labels for each event that matches the sketch. # Remove all other labels. for event in result[u'hits'][u'hits']: event[u'_source'][u'label'] = [] try: for label in event[u'_source'][u'timesketch_label']: if sketch.id != label[u'sketch_id']: continue event[u'_source'][u'label'].append(label[u'name']) del event[u'_source'][u'timesketch_label'] except KeyError: pass # Update or create user state view. This is used in the UI to let the # user get back to the last state in the explore view. view = View.get_or_create(user=current_user, sketch=sketch, name=u'', query_string=u'', query_filter=u'') view.query_string = args.get(u'q') view.query_filter = args.get(u'filter') db_session.add(view) db_session.commit() # Add metadata for the query result. This is used by the UI to render # the event correctly and to display timing and hit count information. tl_colors = {} tl_names = {} for timeline in sketch.timelines: tl_colors[timeline.searchindex.index_name] = timeline.color tl_names[timeline.searchindex.index_name] = timeline.name meta = { u'es_time': result[u'took'], u'es_total_count': result[u'hits'][u'total'], u'timeline_colors': tl_colors, u'timeline_names': tl_names } schema = {u'meta': meta, u'objects': result[u'hits'][u'hits']} return jsonify(schema)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/:sketch_id/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with list of matched events """ sketch = Sketch.query.get_with_acl(sketch_id) if not sketch: abort(HTTP_STATUS_CODE_NOT_FOUND, 'No sketch found with this ID.') if not sketch.has_permission(current_user, 'read'): abort(HTTP_STATUS_CODE_FORBIDDEN, 'User does not have read access controls on sketch.') if sketch.get_status.status == 'archived': abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to query on an archived sketch.') form = forms.ExploreForm.build(request) if not form.validate_on_submit(): abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to explore data, unable to validate form data') # TODO: Remove form and use json instead. query_dsl = form.dsl.data enable_scroll = form.enable_scroll.data scroll_id = form.scroll_id.data file_name = form.file_name.data count = bool(form.count.data) query_filter = request.json.get('filter', {}) parent = request.json.get('parent', None) incognito = request.json.get('incognito', False) return_field_string = form.fields.data if return_field_string: return_fields = [x.strip() for x in return_field_string.split(',')] else: return_fields = query_filter.get('fields', []) return_fields = [field['field'] for field in return_fields] return_fields.extend(DEFAULT_SOURCE_FIELDS) if not query_filter: query_filter = {} all_indices = list( {t.searchindex.index_name for t in sketch.timelines}) indices = query_filter.get('indices', all_indices) # If _all in indices then execute the query on all indices if '_all' in indices: indices = all_indices # Make sure that the indices in the filter are part of the sketch. # This will also remove any deleted timeline from the search result. indices, timeline_ids = get_validated_indices(indices, sketch) # Remove indices that don't exist from search. indices = utils.validate_indices(indices, self.datastore) if not indices: abort( HTTP_STATUS_CODE_BAD_REQUEST, 'No valid search indices were found to perform the search on.') # Make sure we have a query string or star filter if not (form.query.data, query_filter.get('star'), query_filter.get('events'), query_dsl): abort( HTTP_STATUS_CODE_BAD_REQUEST, 'The request needs a query string/DSL and or a star filter.') # Aggregate hit count per index. index_stats_agg = { 'indices': { 'terms': { 'field': '_index', 'min_doc_count': 0, 'size': len(sketch.timelines) } }, 'timelines': { 'terms': { 'field': '__ts_timeline_id', 'min_doc_count': 0, 'size': len(sketch.timelines) } } } if count: # Count operations do not support size parameters. if 'size' in query_filter: _ = query_filter.pop('size') if 'terminate_after' in query_filter: _ = query_filter.pop('terminate_after') try: result = self.datastore.search(sketch_id=sketch_id, query_string=form.query.data, query_filter=query_filter, query_dsl=query_dsl, indices=indices, timeline_ids=timeline_ids, count=True) except ValueError as e: abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e)) # Get number of matching documents per index. schema = {'meta': {'total_count': result}, 'objects': []} return jsonify(schema) if file_name: file_object = io.BytesIO() form_data = { 'created_at': datetime.datetime.utcnow().isoformat(), 'created_by': current_user.username, 'sketch': sketch_id, 'query': form.query.data, 'query_dsl': query_dsl, 'query_filter': query_filter, 'return_fields': return_fields, } with zipfile.ZipFile(file_object, mode='w') as zip_file: zip_file.writestr('METADATA', data=json.dumps(form_data)) fh = export.query_to_filehandle(query_string=form.query.data, query_dsl=query_dsl, query_filter=query_filter, indices=indices, sketch=sketch, datastore=self.datastore, return_fields=return_fields, timeline_ids=timeline_ids) fh.seek(0) zip_file.writestr('query_results.csv', fh.read()) file_object.seek(0) return send_file(file_object, mimetype='zip', attachment_filename=file_name) if scroll_id: # pylint: disable=unexpected-keyword-arg result = self.datastore.client.scroll(scroll_id=scroll_id, scroll='1m') else: try: result = self.datastore.search(sketch_id=sketch_id, query_string=form.query.data, query_filter=query_filter, query_dsl=query_dsl, indices=indices, aggregations=index_stats_agg, return_fields=return_fields, enable_scroll=enable_scroll, timeline_ids=timeline_ids) except ValueError as e: abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e)) # Get number of matching documents per index. count_per_index = {} try: for bucket in result['aggregations']['indices']['buckets']: key = bucket.get('key') if key: count_per_index[key] = bucket.get('doc_count') except KeyError: pass # Get number of matching documents per timeline. count_per_timeline = {} try: for bucket in result['aggregations']['timelines']['buckets']: key = bucket.get('key') if key: count_per_timeline[key] = bucket.get('doc_count') except KeyError: pass # Total count for query regardless of returned results. count_total_complete = sum(count_per_index.values()) comments = {} if 'comment' in return_fields: events = Event.query.filter_by(sketch=sketch).all() for event in events: for comment in event.comments: comments.setdefault(event.document_id, []) comments[event.document_id].append(comment.comment) # Get labels for each event that matches the sketch. # Remove all other labels. for event in result['hits']['hits']: event['selected'] = False event['_source']['label'] = [] try: for label in event['_source']['timesketch_label']: if sketch.id != label['sketch_id']: continue event['_source']['label'].append(label['name']) del event['_source']['timesketch_label'] except KeyError: pass if 'comment' in return_fields: event['_source']['comment'] = comments.get(event['_id'], []) # Update or create user state view. This is used in the UI to let # the user get back to the last state in the explore view. # TODO: Deprecate this and change how last activity is determined, e.g # use the new Search History feature instead. view = View.get_or_create(user=current_user, sketch=sketch, name='') view.update_modification_time() view.query_string = form.query.data view.query_filter = json.dumps(query_filter, ensure_ascii=False) view.query_dsl = json.dumps(query_dsl, ensure_ascii=False) db_session.add(view) db_session.commit() # Search History search_node = None new_search = SearchHistory(user=current_user, sketch=sketch) if parent: previous_search = SearchHistory.query.get(parent) else: previous_search = SearchHistory.query.filter_by( user=current_user, sketch=sketch).order_by(SearchHistory.id.desc()).first() if not incognito: is_same_query = False is_same_filter = False new_search.query_string = form.query.data new_search.query_filter = json.dumps(query_filter, ensure_ascii=False) new_search.query_result_count = count_total_complete new_search.query_time = result['took'] if previous_search: new_search.parent = previous_search new_query = new_search.query_string new_filter = new_search.query_filter previous_query = previous_search.query_string previous_filter = previous_search.query_filter is_same_query = previous_query == new_query is_same_filter = previous_filter == new_filter if not all([is_same_query, is_same_filter]): db_session.add(new_search) db_session.commit() # Create metric if user creates a new branch. if new_search.parent: if len(new_search.parent.children) > 1: METRICS['searchhistory'].labels(action='branch').inc() else: METRICS['searchhistory'].labels( action='ignore_same_query').inc() else: METRICS['searchhistory'].labels(action='incognito').inc() search_node = new_search if new_search.id else previous_search if not search_node: abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to save search') search_node = search_node.build_tree(search_node, {}, recurse=False) # Add metadata for the query result. This is used by the UI to # render the event correctly and to display timing and hit count # information. tl_colors = {} tl_names = {} for timeline in sketch.timelines: tl_colors[timeline.searchindex.index_name] = timeline.color tl_names[timeline.searchindex.index_name] = timeline.name meta = { 'es_time': result['took'], 'es_total_count': result['hits']['total'], 'es_total_count_complete': count_total_complete, 'timeline_colors': tl_colors, 'timeline_names': tl_names, 'count_per_index': count_per_index, 'count_per_timeline': count_per_timeline, 'scroll_id': result.get('_scroll_id', ''), 'search_node': search_node } # Elasticsearch version 7.x returns total hits as a dictionary. # TODO: Refactor when version 6.x has been deprecated. if isinstance(meta['es_total_count'], dict): meta['es_total_count'] = meta['es_total_count'].get('value', 0) schema = {'meta': meta, 'objects': result['hits']['hits']} return jsonify(schema)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/:sketch_id/explore/ Args: sketch_id: Integer primary key for a sketch database model Returns: JSON with list of matched events """ sketch = Sketch.query.get_with_acl(sketch_id) if not sketch: abort(HTTP_STATUS_CODE_NOT_FOUND, "No sketch found with this ID.") if not sketch.has_permission(current_user, "read"): abort( HTTP_STATUS_CODE_FORBIDDEN, "User does not have read access controls on sketch.", ) if sketch.get_status.status == "archived": abort(HTTP_STATUS_CODE_BAD_REQUEST, "Unable to query on an archived sketch.") form = forms.ExploreForm.build(request) if not form.validate_on_submit(): abort( HTTP_STATUS_CODE_BAD_REQUEST, "Unable to explore data, unable to validate form data", ) # TODO: Remove form and use json instead. query_dsl = form.dsl.data enable_scroll = form.enable_scroll.data scroll_id = form.scroll_id.data file_name = form.file_name.data count = bool(form.count.data) query_filter = request.json.get("filter", {}) parent = request.json.get("parent", None) incognito = request.json.get("incognito", False) return_field_string = form.fields.data if return_field_string: return_fields = [x.strip() for x in return_field_string.split(",")] else: return_fields = query_filter.get("fields", []) return_fields = [field["field"] for field in return_fields] return_fields.extend(DEFAULT_SOURCE_FIELDS) if not query_filter: query_filter = {} all_indices = list( {t.searchindex.index_name for t in sketch.timelines}) indices = query_filter.get("indices", all_indices) # If _all in indices then execute the query on all indices if "_all" in indices: indices = all_indices # Make sure that the indices in the filter are part of the sketch. # This will also remove any deleted timeline from the search result. indices, timeline_ids = get_validated_indices(indices, sketch) # Remove indices that don't exist from search. indices = utils.validate_indices(indices, self.datastore) if not indices: abort( HTTP_STATUS_CODE_BAD_REQUEST, "No valid search indices were found to perform the search on.", ) # Make sure we have a query string or star filter if not ( form.query.data, query_filter.get("star"), query_filter.get("events"), query_dsl, ): abort( HTTP_STATUS_CODE_BAD_REQUEST, "The request needs a query string/DSL and or a star filter.", ) # Aggregate hit count per index. index_stats_agg = { "indices": { "terms": { "field": "_index", "min_doc_count": 0, "size": len(sketch.timelines), } }, "timelines": { "terms": { "field": "__ts_timeline_id", "min_doc_count": 0, "size": len(sketch.timelines), } }, "count_over_time": { "auto_date_histogram": { "field": "datetime", "buckets": 50, } }, } if count: # Count operations do not support size parameters. if "size" in query_filter: _ = query_filter.pop("size") if "terminate_after" in query_filter: _ = query_filter.pop("terminate_after") try: result = self.datastore.search( sketch_id=sketch_id, query_string=form.query.data, query_filter=query_filter, query_dsl=query_dsl, indices=indices, timeline_ids=timeline_ids, count=True, ) except ValueError as e: abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e)) # Get number of matching documents per index. schema = {"meta": {"total_count": result}, "objects": []} return jsonify(schema) if file_name: file_object = io.BytesIO() form_data = { "created_at": datetime.datetime.utcnow().isoformat(), "created_by": current_user.username, "sketch": sketch_id, "query": form.query.data, "query_dsl": query_dsl, "query_filter": query_filter, "return_fields": return_fields, } with zipfile.ZipFile(file_object, mode="w") as zip_file: zip_file.writestr("METADATA", data=json.dumps(form_data)) fh = export.query_to_filehandle( query_string=form.query.data, query_dsl=query_dsl, query_filter=query_filter, indices=indices, sketch=sketch, datastore=self.datastore, return_fields=return_fields, timeline_ids=timeline_ids, ) fh.seek(0) zip_file.writestr("query_results.csv", fh.read()) file_object.seek(0) return send_file(file_object, mimetype="zip", attachment_filename=file_name) if scroll_id: # pylint: disable=unexpected-keyword-arg result = self.datastore.client.scroll(scroll_id=scroll_id, scroll="1m") else: try: result = self.datastore.search( sketch_id=sketch_id, query_string=form.query.data, query_filter=query_filter, query_dsl=query_dsl, indices=indices, aggregations=index_stats_agg, return_fields=return_fields, enable_scroll=enable_scroll, timeline_ids=timeline_ids, ) except ValueError as e: abort(HTTP_STATUS_CODE_BAD_REQUEST, str(e)) # Get number of matching documents over time. histogram_interval = (result.get("aggregations", {}).get("count_over_time", {}).get("interval", "")) count_over_time = {"data": {}, "interval": histogram_interval} try: for bucket in result["aggregations"]["count_over_time"]["buckets"]: key = bucket.get("key") if key: count_over_time["data"][key] = bucket.get("doc_count") except KeyError: pass # Get number of matching documents per index. count_per_index = {} try: for bucket in result["aggregations"]["indices"]["buckets"]: key = bucket.get("key") if key: count_per_index[key] = bucket.get("doc_count") except KeyError: pass # Get number of matching documents per timeline. count_per_timeline = {} try: for bucket in result["aggregations"]["timelines"]["buckets"]: key = bucket.get("key") if key: count_per_timeline[key] = bucket.get("doc_count") except KeyError: pass # Total count for query regardless of returned results. count_total_complete = sum(count_per_index.values()) comments = {} if "comment" in return_fields: events = Event.query.filter_by(sketch=sketch).all() for event in events: for comment in event.comments: comments.setdefault(event.document_id, []) comments[event.document_id].append(comment.comment) # Get labels for each event that matches the sketch. # Remove all other labels. for event in result["hits"]["hits"]: event["selected"] = False event["_source"]["label"] = [] try: for label in event["_source"]["timesketch_label"]: if sketch.id != label["sketch_id"]: continue event["_source"]["label"].append(label["name"]) del event["_source"]["timesketch_label"] except KeyError: pass if "comment" in return_fields: event["_source"]["comment"] = comments.get(event["_id"], []) # Update or create user state view. This is used in the UI to let # the user get back to the last state in the explore view. # TODO: Deprecate this and change how last activity is determined, e.g # use the new Search History feature instead. view = View.get_or_create(user=current_user, sketch=sketch, name="") view.update_modification_time() view.query_string = form.query.data view.query_filter = json.dumps(query_filter, ensure_ascii=False) view.query_dsl = json.dumps(query_dsl, ensure_ascii=False) db_session.add(view) db_session.commit() # Search History search_node = None new_search = SearchHistory(user=current_user, sketch=sketch) if parent: previous_search = SearchHistory.query.get(parent) else: previous_search = (SearchHistory.query.filter_by( user=current_user, sketch=sketch).order_by(SearchHistory.id.desc()).first()) if not incognito: is_same_query = False is_same_filter = False new_search.query_string = form.query.data new_search.query_filter = json.dumps(query_filter, ensure_ascii=False) new_search.query_result_count = count_total_complete new_search.query_time = result["took"] if previous_search: new_search.parent = previous_search new_query = new_search.query_string new_filter = new_search.query_filter previous_query = previous_search.query_string previous_filter = previous_search.query_filter is_same_query = previous_query == new_query is_same_filter = previous_filter == new_filter if not all([is_same_query, is_same_filter]): db_session.add(new_search) db_session.commit() # Create metric if user creates a new branch. if new_search.parent: if len(new_search.parent.children) > 1: METRICS["searchhistory"].labels(action="branch").inc() else: METRICS["searchhistory"].labels( action="ignore_same_query").inc() else: METRICS["searchhistory"].labels(action="incognito").inc() search_node = new_search if new_search.id else previous_search if not search_node: abort(HTTP_STATUS_CODE_BAD_REQUEST, "Unable to save search") search_node = search_node.build_tree(search_node, {}, recurse=False) # Add metadata for the query result. This is used by the UI to # render the event correctly and to display timing and hit count # information. tl_colors = {} tl_names = {} for timeline in sketch.timelines: tl_colors[timeline.searchindex.index_name] = timeline.color tl_names[timeline.searchindex.index_name] = timeline.name meta = { "es_time": result["took"], "es_total_count": result["hits"]["total"], "es_total_count_complete": count_total_complete, "timeline_colors": tl_colors, "timeline_names": tl_names, "count_per_index": count_per_index, "count_per_timeline": count_per_timeline, "count_over_time": count_over_time, "scroll_id": result.get("_scroll_id", ""), "search_node": search_node, } # Elasticsearch version 7.x returns total hits as a dictionary. # TODO: Refactor when version 6.x has been deprecated. if isinstance(meta["es_total_count"], dict): meta["es_total_count"] = meta["es_total_count"].get("value", 0) schema = {"meta": meta, "objects": result["hits"]["hits"]} return jsonify(schema)