def export_aggregation(aggregation, sketch, zip_file): """Export an aggregation from a sketch and write it to a ZIP file. Args: aggregation (timesketch.models.sketch.Aggregation): an aggregation object. sketch (timesketch.models.sketch.Sketch): a sketch object. zip_file (ZipFile): a zip file handle that can be used to write content to. """ name = '{0:04d}_{1:s}'.format(aggregation.id, aggregation.name) parameters = json.loads(aggregation.parameters) result_obj, meta = utils.run_aggregator( sketch.id, aggregator_name=aggregation.agg_type, aggregator_parameters=parameters) zip_file.writestr('aggregations/{0:s}.meta'.format(name), data=json.dumps(meta)) html = result_obj.to_chart(chart_name=meta.get('chart_type'), chart_title=aggregation.name, color=meta.get('chart_color'), interactive=True, as_html=True) zip_file.writestr('aggregations/{0:s}.html'.format(name), data=html) string_io = io.StringIO() data_frame = result_obj.to_pandas() data_frame.to_csv(string_io, index=False) string_io.seek(0) zip_file.writestr('aggregations/{0:s}.csv'.format(name), data=string_io.read())
def _export_tagged_events(self, sketch, zip_file): """Export all events that have been tagged and store in a ZIP file.""" result = self.datastore.search(sketch_id=sketch.id, query_string='_exists_:tag', query_filter=self.DEFAULT_QUERY_FILTER, query_dsl='', indices=self.sketch_indices) fh = _query_results_to_filehandle(result, sketch) zip_file.writestr('events/tagged_events.csv', data=fh.read()) parameters = { 'limit': 100, 'field': 'tag', } result_obj, meta = utils.run_aggregator( sketch.id, aggregator_name='field_bucket', aggregator_parameters=parameters) zip_file.writestr('events/tagged_event_stats.meta', data=json.dumps(meta)) html = result_obj.to_chart(chart_name='hbarchart', chart_title='Top 100 identified tags', interactive=True, as_html=True) zip_file.writestr('events/tagged_event_stats.html', data=html) string_io = io.StringIO() data_frame = result_obj.to_pandas() data_frame.to_csv(string_io, index=False) string_io.seek(0) zip_file.writestr('events/tagged_event_stats.csv', data=string_io.read())
def _export_tagged_events(self, sketch, zip_file): """Export all events that have been tagged and store in a ZIP file.""" result = self.datastore.search( sketch_id=sketch.id, query_string="_exists_:tag", query_filter=self.DEFAULT_QUERY_FILTER, query_dsl="", indices=self.sketch_indices, ) fh = export.query_results_to_filehandle(result, sketch) zip_file.writestr("events/tagged_events.csv", data=fh.read()) parameters = { "limit": 100, "field": "tag", } result_obj, meta = utils.run_aggregator( sketch.id, aggregator_name="field_bucket", aggregator_parameters=parameters ) zip_file.writestr("events/tagged_event_stats.meta", data=json.dumps(meta)) html = result_obj.to_chart( chart_name="hbarchart", chart_title="Top 100 identified tags", interactive=True, as_html=True, ) zip_file.writestr("events/tagged_event_stats.html", data=html) string_io = io.StringIO() data_frame = result_obj.to_pandas() data_frame.to_csv(string_io, index=False) string_io.seek(0) zip_file.writestr("events/tagged_event_stats.csv", data=string_io.read())
def get(self, sketch_id): """Handles GET request to the resource. Returns: A sketch in JSON (instance of flask.wrappers.Response) """ if current_user.admin: sketch = Sketch.query.get(sketch_id) if not sketch.has_permission(current_user, 'read'): return self._get_sketch_for_admin(sketch) else: sketch = Sketch.query.get_with_acl(sketch_id) if not sketch: abort(HTTP_STATUS_CODE_NOT_FOUND, 'No sketch found with this ID.') aggregators = {} for _, cls in aggregator_manager.AggregatorManager.get_aggregators(): aggregators[cls.NAME] = { 'form_fields': cls.FORM_FIELDS, 'display_name': cls.DISPLAY_NAME, 'description': cls.DESCRIPTION } # Get mappings for all indices in the sketch. This is used to set # columns shown in the event list. sketch_indices = [ t.searchindex.index_name for t in sketch.active_timelines if t.get_status.status != 'archived' ] # Get event count and size on disk for each index in the sketch. stats_per_index = {} for timeline in sketch.active_timelines: if timeline.get_status.status != 'archived': continue stats_per_index[timeline.searchindex.index_name] = { 'count': 0, 'bytes': 0, 'data_types': [] } if sketch_indices: try: es_stats = self.datastore.client.indices.stats( index=sketch_indices, metric='docs, store') except elasticsearch.NotFoundError: es_stats = {} logger.error('Unable to find index in datastore', exc_info=True) # Stats for index. Num docs per shard and size on disk. for index_name, stats in es_stats.get('indices', {}).items(): doc_count_all_shards = stats.get('total', {}).get('docs', {}).get('count', 0) bytes_on_disk = stats.get('total', {}).get('store', {}).get('size_in_bytes', 0) num_shards = stats.get('_shards', {}).get('total', 1) doc_count = int(doc_count_all_shards / num_shards) stats_per_index[index_name] = { 'count': doc_count, 'bytes': bytes_on_disk } # Stats per data type in the index. parameters = {'limit': '100', 'field': 'data_type'} result_obj, _ = utils.run_aggregator( sketch.id, aggregator_name='field_bucket', aggregator_parameters=parameters, index=[index_name]) stats_per_index[index_name]['data_types'] = result_obj.values if not sketch_indices: mappings_settings = {} else: try: mappings_settings = self.datastore.client.indices.get_mapping( index=sketch_indices) except elasticsearch.NotFoundError: logger.error('Unable to get indices mapping in datastore', exc_info=True) mappings_settings = {} mappings = [] for _, value in mappings_settings.items(): # The structure is different in ES version 6.x and lower. This check # makes sure we support both old and new versions. properties = value['mappings'].get('properties') if not properties: properties = next(iter( value['mappings'].values())).get('properties') for field, value_dict in properties.items(): mapping_dict = {} # Exclude internal fields if field.startswith('__'): continue if field == 'timesketch_label': continue mapping_dict['field'] = field mapping_dict['type'] = value_dict.get('type', 'n/a') mappings.append(mapping_dict) # Make the list of dicts unique mappings = {v['field']: v for v in mappings}.values() views = [] for view in sketch.get_named_views: if not view.user: username = '******' else: username = view.user.username view = { 'name': view.name, 'description': view.description, 'id': view.id, 'query': view.query_string, 'filter': view.query_filter, 'user': username, 'created_at': view.created_at, 'updated_at': view.updated_at } views.append(view) meta = dict( aggregators=aggregators, views=views, searchtemplates=[{ 'name': searchtemplate.name, 'id': searchtemplate.id } for searchtemplate in SearchTemplate.query.all()], emojis=get_emojis_as_dict(), permissions={ 'public': bool(sketch.is_public), 'read': bool(sketch.has_permission(current_user, 'read')), 'write': bool(sketch.has_permission(current_user, 'write')), 'delete': bool(sketch.has_permission(current_user, 'delete')), }, collaborators={ 'users': [user.username for user in sketch.collaborators], 'groups': [group.name for group in sketch.groups], }, analyzers=[ x for x, y in analyzer_manager.AnalysisManager.get_analyzers() ], attributes=utils.get_sketch_attributes(sketch), mappings=list(mappings), stats=stats_per_index, filter_labels=self.datastore.get_filter_labels( sketch.id, sketch_indices), sketch_labels=[label.label for label in sketch.labels]) return self.to_json(sketch, meta=meta)