Esempio n. 1
0
    def col_split(dataset, request: Request,
                  column_split_spec: ColumnSplitSpec, *args, **kwargs):
        """
        Split query in 2 steps if a large number of columns is being selected.
            - First query only selects event_id and project_id.
            - Second query selects all fields for only those events.
            - Shrink the date range.
        """
        # The query function may mutate the request body during query
        # evaluation, so we need to copy the body to ensure that the query has
        # not been modified by the time we're ready to run the full query.
        minimal_request = copy.deepcopy(request)
        minimal_request.query.set_selected_columns(
            column_split_spec.get_min_columns())
        result = query_func(dataset, minimal_request, *args, **kwargs)
        del minimal_request

        if result.result["data"]:
            request = copy.deepcopy(request)

            event_ids = list(
                set([
                    event[column_split_spec.id_column]
                    for event in result.result["data"]
                ]))
            request.query.add_conditions([(column_split_spec.id_column, "IN",
                                           event_ids)])
            request.query.set_offset(0)
            request.query.set_limit(len(event_ids))

            project_ids = list(
                set([
                    event[column_split_spec.project_column]
                    for event in result.result["data"]
                ]))
            request.extensions["project"]["project"] = project_ids

            timestamp_field = column_split_spec.timestamp_column
            timestamps = [
                event[timestamp_field] for event in result.result["data"]
            ]
            request.extensions[
                "timeseries"]["from_date"] = util.parse_datetime(
                    min(timestamps)).isoformat()
            # We add 1 second since this gets translated to ('timestamp', '<', to_date)
            # and events are stored with a granularity of 1 second.
            request.extensions["timeseries"]["to_date"] = (
                util.parse_datetime(max(timestamps)) +
                timedelta(seconds=1)).isoformat()

        return query_func(dataset, request, *args, **kwargs)
Esempio n. 2
0
 def get_split_query_spec(self) -> Union[None, ColumnSplitSpec]:
     return ColumnSplitSpec(
         id_column="events.event_id",
         project_column="events.project_id",
         timestamp_column="events.timestamp",
     )