Exemple #1
0
    def _get_scalar_average(
        self,
        metrics_interval: MetricIntervalGroup,
        task_id: str,
        company_id: str,
        event_type: EventType,
        key: ScalarKey,
    ) -> Sequence[MetricData]:
        """
        Retrieve scalar histograms per several metric variants that share the same interval
        """
        interval, metrics = metrics_interval
        aggregation = self._add_aggregation_average(
            key.get_aggregation(interval))
        aggs = {
            "metrics": {
                "terms": {
                    "field": "metric",
                    "size": EventSettings.max_metrics_count,
                    "order": {
                        "_key": "asc"
                    },
                },
                "aggs": {
                    "variants": {
                        "terms": {
                            "field": "variant",
                            "size": EventSettings.max_variants_count,
                            "order": {
                                "_key": "asc"
                            },
                        },
                        "aggs": aggregation,
                    }
                },
            }
        }
        aggs_result = self._query_aggregation_for_task_metrics(
            company_id=company_id,
            event_type=event_type,
            aggs=aggs,
            task_id=task_id,
            metrics=metrics,
        )

        if not aggs_result:
            return {}

        metrics = [(
            metric["key"],
            {
                variant["key"]: {
                    "name": variant["key"],
                    **key.get_iterations_data(variant),
                }
                for variant in metric["variants"]["buckets"]
            },
        ) for metric in aggs_result["metrics"]["buckets"]]
        return metrics
Exemple #2
0
    def get_scalar_metrics_average_per_iter(
        self,
        company_id: str,
        task_id: str,
        samples: int,
        key: ScalarKeyEnum,
        metric_variants: MetricVariants = None,
    ) -> dict:
        """
        Get scalar metric histogram per metric and variant
        The amount of points in each histogram should not exceed
        the requested samples
        """
        event_type = EventType.metrics_scalar
        if check_empty_data(self.es,
                            company_id=company_id,
                            event_type=event_type):
            return {}

        return self._get_scalar_average_per_iter_core(
            task_id=task_id,
            company_id=company_id,
            event_type=event_type,
            samples=samples,
            key=ScalarKey.resolve(key),
            metric_variants=metric_variants,
        )
    def get_task_events(
        self,
        event_type: EventType,
        company_id: str,
        task_id: str,
        batch_size: int,
        navigate_earlier: bool = True,
        from_key_value: Optional[Any] = None,
        metric_variants: MetricVariants = None,
        key: ScalarKeyEnum = ScalarKeyEnum.timestamp,
        **kwargs,
    ) -> TaskEventsResult:
        if check_empty_data(self.es, company_id, event_type):
            return TaskEventsResult()

        from_key_value = kwargs.pop("from_timestamp", from_key_value)

        res = TaskEventsResult()
        res.events, res.total_events = self._get_events(
            event_type=event_type,
            company_id=company_id,
            task_id=task_id,
            batch_size=batch_size,
            navigate_earlier=navigate_earlier,
            from_key_value=from_key_value,
            metric_variants=metric_variants,
            key=ScalarKey.resolve(key),
        )
        return res
Exemple #4
0
    def compare_scalar_metrics_average_per_iter(
        self,
        company_id,
        task_ids: Sequence[str],
        samples,
        key: ScalarKeyEnum,
        allow_public=True,
    ):
        """
        Compare scalar metrics for different tasks per metric and variant
        The amount of points in each histogram should not exceed the requested samples
        """
        task_name_by_id = {}
        with translate_errors_context():
            task_objs = Task.get_many(
                company=company_id,
                query=Q(id__in=task_ids),
                allow_public=allow_public,
                override_projection=("id", "name", "company",
                                     "company_origin"),
                return_dicts=False,
            )
            if len(task_objs) < len(task_ids):
                invalid = tuple(set(task_ids) - set(r.id for r in task_objs))
                raise errors.bad_request.InvalidTaskId(company=company_id,
                                                       ids=invalid)
            task_name_by_id = {t.id: t.name for t in task_objs}

        companies = {t.get_index_company() for t in task_objs}
        if len(companies) > 1:
            raise errors.bad_request.InvalidTaskId(
                "only tasks from the same company are supported")

        event_type = EventType.metrics_scalar
        company_id = next(iter(companies))
        if check_empty_data(self.es,
                            company_id=company_id,
                            event_type=event_type):
            return {}

        get_scalar_average_per_iter = partial(
            self._get_scalar_average_per_iter_core,
            company_id=company_id,
            event_type=event_type,
            samples=samples,
            key=ScalarKey.resolve(key),
            run_parallel=False,
        )
        with ThreadPoolExecutor(max_workers=EventSettings.max_workers) as pool:
            task_metrics = zip(task_ids,
                               pool.map(get_scalar_average_per_iter, task_ids))

        res = defaultdict(lambda: defaultdict(dict))
        for task_id, task_data in task_metrics:
            task_name = task_name_by_id[task_id]
            for metric_key, metric_data in task_data.items():
                for variant_key, variant_data in metric_data.items():
                    variant_data["name"] = task_name
                    res[metric_key][variant_key][task_id] = variant_data

        return res
Exemple #5
0
def scalar_metrics_iter_raw(call: APICall, company_id: str,
                            request: ScalarMetricsIterRawRequest):
    key = request.key or ScalarKeyEnum.iter
    scalar_key = ScalarKey.resolve(key)
    if request.batch_size and request.batch_size < 0:
        raise errors.bad_request.ValidationError(
            "batch_size should be non negative number")

    if not request.scroll_id:
        from_key_value = None
        total = None
        request.batch_size = request.batch_size or 10_000
    else:
        try:
            scroll = ScalarMetricsIterRawScroll.from_scroll_id(
                request.scroll_id)
        except ValueError:
            raise errors.bad_request.InvalidScrollId(
                scroll_id=request.scroll_id)

        if scroll.from_key_value is None:
            return make_response(scroll_id=request.scroll_id,
                                 total=scroll.total,
                                 variants={})

        from_key_value = scalar_key.cast_value(scroll.from_key_value)
        total = scroll.total
        request.batch_size = request.batch_size or scroll.request.batch_size

    task_id = request.task

    task = task_bll.assert_exists(
        company_id,
        task_id,
        allow_public=True,
        only=("company", ),
    )[0]

    metric_variants = _get_metric_variants_from_request([request.metric])

    if request.count_total and total is None:
        total = event_bll.events_iterator.count_task_events(
            event_type=EventType.metrics_scalar,
            company_id=task.company,
            task_id=task_id,
            metric_variants=metric_variants,
        )

    batch_size = min(
        request.batch_size,
        int(
            config.get("services.events.events_retrieval.max_raw_scalars_size",
                       200_000)),
    )

    events = []
    for iteration in range(0, math.ceil(batch_size / 10_000)):
        res = event_bll.events_iterator.get_task_events(
            event_type=EventType.metrics_scalar,
            company_id=task.company,
            task_id=task_id,
            batch_size=min(batch_size, 10_000),
            navigate_earlier=False,
            from_key_value=from_key_value,
            metric_variants=metric_variants,
            key=key,
        )
        if not res.events:
            break
        events.extend(res.events)
        from_key_value = str(events[-1][scalar_key.field])
Exemple #6
0
def get_task_events(call, company_id, request: TaskEventsRequest):
    task_id = request.task

    task = task_bll.assert_exists(
        company_id,
        task_id,
        allow_public=True,
        only=("company", ),
    )[0]

    key = ScalarKeyEnum.iter
    scalar_key = ScalarKey.resolve(key)

    if not request.scroll_id:
        from_key_value = None if (request.order == LogOrderEnum.desc) else 0
        total = None
    else:
        try:
            scroll = GetTaskEventsScroll.from_scroll_id(request.scroll_id)
        except ValueError:
            raise errors.bad_request.InvalidScrollId(
                scroll_id=request.scroll_id)

        if scroll.from_key_value is None:
            return make_response(scroll_id=request.scroll_id,
                                 total=scroll.total,
                                 events=[])

        from_key_value = scalar_key.cast_value(scroll.from_key_value)
        total = scroll.total

        scroll.request.batch_size = request.batch_size or scroll.request.batch_size
        request = scroll.request

    navigate_earlier = request.order == LogOrderEnum.desc
    metric_variants = _get_metric_variants_from_request(request.metrics)

    if request.count_total and total is None:
        total = event_bll.events_iterator.count_task_events(
            event_type=request.event_type,
            company_id=task.company,
            task_id=task_id,
            metric_variants=metric_variants,
        )

    batch_size = min(
        request.batch_size,
        int(
            config.get("services.events.events_retrieval.max_raw_scalars_size",
                       10_000)),
    )

    res = event_bll.events_iterator.get_task_events(
        event_type=request.event_type,
        company_id=task.company,
        task_id=task_id,
        batch_size=batch_size,
        key=ScalarKeyEnum.iter,
        navigate_earlier=navigate_earlier,
        from_key_value=from_key_value,
        metric_variants=metric_variants,
    )

    scroll = GetTaskEventsScroll(
        from_key_value=str(res.events[-1][scalar_key.field])
        if res.events else None,
        total=total,
        request=request,
    )

    return make_response(
        returned=len(res.events),
        total=total,
        scroll_id=scroll.get_scroll_id(),
        events=res.events,
    )