Example #1
0
    def _export_tagged_events(self, sketch, zip_file):
        """Export all events that have been tagged and store in a ZIP file."""
        result = self.datastore.search(sketch_id=sketch.id,
                                       query_string='_exists_:tag',
                                       query_filter=self.DEFAULT_QUERY_FILTER,
                                       query_dsl='',
                                       indices=self.sketch_indices)

        fh = export.query_results_to_filehandle(result, sketch)
        zip_file.writestr('events/tagged_events.csv', data=fh.read())

        parameters = {
            'limit': 100,
            'field': 'tag',
        }
        result_obj, meta = utils.run_aggregator(
            sketch.id,
            aggregator_name='field_bucket',
            aggregator_parameters=parameters)

        zip_file.writestr('events/tagged_event_stats.meta',
                          data=json.dumps(meta))

        html = result_obj.to_chart(chart_name='hbarchart',
                                   chart_title='Top 100 identified tags',
                                   interactive=True,
                                   as_html=True)
        zip_file.writestr('events/tagged_event_stats.html', data=html)

        string_io = io.StringIO()
        data_frame = result_obj.to_pandas()
        data_frame.to_csv(string_io, index=False)
        string_io.seek(0)
        zip_file.writestr('events/tagged_event_stats.csv',
                          data=string_io.read())
Example #2
0
    def _export_view(self, view, sketch, zip_file):
        """Export a view from a sketch and write it to a ZIP file.

        Args:
            view (timesketch.models.sketch.View): a View object.
            sketch (timesketch.models.sketch.Sketch): a sketch object.
            zip_file (ZipFile): a zip file handle that can be used to write
                content to.
        """
        name = "{0:04d}_{1:s}".format(view.id, view.name)

        query_filter = json.loads(view.query_filter)
        if not query_filter:
            query_filter = self.DEFAULT_QUERY_FILTER

        indices = query_filter.get("indices", self.sketch_indices)
        if not indices or "_all" in indices:
            indices = self.sketch_indices

        # Ignoring the size limits in views to reduce the amount of queries
        # needed to get all the data.
        query_filter["terminate_after"] = 10000
        query_filter["size"] = 10000

        query_dsl = view.query_dsl
        if query_dsl:
            query_dict = json.loads(query_dsl)
            if not query_dict:
                query_dsl = None

        result = self.datastore.search(
            sketch_id=sketch.id,
            query_string=view.query_string,
            query_filter=query_filter,
            query_dsl=query_dsl,
            enable_scroll=True,
            indices=indices,
        )

        scroll_id = result.get("_scroll_id", "")
        if scroll_id:
            data_frame = export.query_results_to_dataframe(result, sketch)

            total_count = result.get("hits", {}).get("total", {}).get("value", 0)

            if isinstance(total_count, str):
                try:
                    total_count = int(total_count, 10)
                except ValueError:
                    total_count = 0

            event_count = len(result["hits"]["hits"])

            while event_count < total_count:
                # pylint: disable=unexpected-keyword-arg
                result = self.datastore.client.scroll(scroll_id=scroll_id, scroll="1m")
                event_count += len(result["hits"]["hits"])
                add_frame = export.query_results_to_dataframe(result, sketch)
                if add_frame.shape[0]:
                    data_frame = pd.concat([data_frame, add_frame], sort=False)
                else:
                    logger.warning(
                        "Data Frame returned from a search operation was "
                        "empty, count {0:d} out of {1:d} total. Query is: "
                        '"{2:s}"'.format(
                            event_count, total_count, view.query_string or query_dsl
                        )
                    )

            fh = io.StringIO()
            data_frame.to_csv(fh, index=False)
            fh.seek(0)
        else:
            fh = export.query_results_to_filehandle(result, sketch)

        zip_file.writestr("views/{0:s}.csv".format(name), data=fh.read())

        if not view.user:
            username = "******"
        else:
            username = view.user.username
        meta = {
            "name": view.name,
            "view_id": view.id,
            "description": view.description,
            "query_string": view.query_string,
            "query_filter": view.query_filter,
            "query_dsl": view.query_dsl,
            "username": username,
            "sketch_id": view.sketch_id,
        }
        zip_file.writestr("views/{0:s}.meta".format(name), data=json.dumps(meta))
Example #3
0
    def _export_view(self, view, sketch, zip_file):
        """Export a view from a sketch and write it to a ZIP file.

        Args:
            view (timesketch.models.sketch.View): a View object.
            sketch (timesketch.models.sketch.Sketch): a sketch object.
            zip_file (ZipFile): a zip file handle that can be used to write
                content to.
        """
        name = '{0:04d}_{1:s}'.format(view.id, view.name)

        query_filter = json.loads(view.query_filter)
        if not query_filter:
            query_filter = self.DEFAULT_QUERY_FILTER

        indices = query_filter.get('indices', self.sketch_indices)
        if not indices or '_all' in indices:
            indices = self.sketch_indices

        # Ignoring the size limits in views to reduce the amount of queries
        # needed to get all the data.
        query_filter['terminate_after'] = 10000
        query_filter['size'] = 10000

        query_dsl = view.query_dsl
        if query_dsl:
            query_dict = json.loads(query_dsl)
            if not query_dict:
                query_dsl = None

        result = self.datastore.search(
            sketch_id=sketch.id,
            query_string=view.query_string,
            query_filter=query_filter,
            query_dsl=query_dsl,
            enable_scroll=True,
            indices=indices)

        scroll_id = result.get('_scroll_id', '')
        if scroll_id:
            data_frame = export.query_results_to_dataframe(result, sketch)

            total_count = result.get(
                'hits', {}).get('total', {}).get('value', 0)

            if isinstance(total_count, str):
                try:
                    total_count = int(total_count, 10)
                except ValueError:
                    total_count = 0

            event_count = len(result['hits']['hits'])

            while event_count < total_count:
                # pylint: disable=unexpected-keyword-arg
                result = self.datastore.client.scroll(
                    scroll_id=scroll_id, scroll='1m')
                event_count += len(result['hits']['hits'])
                add_frame = export.query_results_to_dataframe(result, sketch)
                if add_frame.shape[0]:
                    data_frame = pd.concat([data_frame, add_frame], sort=False)
                else:
                    logger.warning(
                        'Data Frame returned from a search operation was '
                        'empty, count {0:d} out of {1:d} total. Query is: '
                        '"{2:s}"'.format(
                            event_count, total_count,
                            view.query_string or query_dsl))

            fh = io.StringIO()
            data_frame.to_csv(fh, index=False)
            fh.seek(0)
        else:
            fh = export.query_results_to_filehandle(result, sketch)

        zip_file.writestr(
            'views/{0:s}.csv'.format(name), data=fh.read())

        if not view.user:
            username = '******'
        else:
            username = view.user.username
        meta = {
            'name': view.name,
            'view_id': view.id,
            'description': view.description,
            'query_string': view.query_string,
            'query_filter': view.query_filter,
            'query_dsl': view.query_dsl,
            'username': username,
            'sketch_id': view.sketch_id,
        }
        zip_file.writestr(
            'views/{0:s}.meta'.format(name), data=json.dumps(meta))