Beispiel #1
0
    def compare_scalar_metrics_average_per_iter(
        self,
        company_id,
        task_ids: Sequence[str],
        samples,
        key: ScalarKeyEnum,
        allow_public=True,
    ):
        """
        Compare scalar metrics for different tasks per metric and variant
        The amount of points in each histogram should not exceed the requested samples
        """
        task_name_by_id = {}
        with translate_errors_context():
            task_objs = Task.get_many(
                company=company_id,
                query=Q(id__in=task_ids),
                allow_public=allow_public,
                override_projection=("id", "name", "company"),
                return_dicts=False,
            )
            if len(task_objs) < len(task_ids):
                invalid = tuple(set(task_ids) - set(r.id for r in task_objs))
                raise errors.bad_request.InvalidTaskId(company=company_id, ids=invalid)
            task_name_by_id = {t.id: t.name for t in task_objs}

        companies = {t.company for t in task_objs}
        if len(companies) > 1:
            raise errors.bad_request.InvalidTaskId(
                "only tasks from the same company are supported"
            )

        es_index = self.get_index_name(next(iter(companies)), "training_stats_scalar")
        if not self.es.indices.exists(es_index):
            return {}

        get_scalar_average_per_iter = partial(
            self._get_scalar_average_per_iter_core,
            es_index=es_index,
            samples=samples,
            key=ScalarKey.resolve(key),
            run_parallel=False,
        )
        with ThreadPoolExecutor(max_workers=self._max_concurrency) as pool:
            task_metrics = zip(
                task_ids, pool.map(get_scalar_average_per_iter, task_ids)
            )

        res = defaultdict(lambda: defaultdict(dict))
        for task_id, task_data in task_metrics:
            task_name = task_name_by_id[task_id]
            for metric_key, metric_data in task_data.items():
                for variant_key, variant_data in metric_data.items():
                    variant_data["name"] = task_name
                    res[metric_key][variant_key][task_id] = variant_data

        return res
    def compare_scalar_metrics_average_per_iter(
        self,
        company_id,
        task_ids: Sequence[str],
        samples,
        key: ScalarKeyEnum,
        allow_public=True,
    ):
        """
        Compare scalar metrics for different tasks per metric and variant
        The amount of points in each histogram should not exceed the requested samples
        """
        if len(task_ids) > self.MAX_TASKS_COUNT:
            raise errors.BadRequest(
                f"Up to {self.MAX_TASKS_COUNT} tasks supported for comparison",
                len(task_ids),
            )

        task_name_by_id = {}
        with translate_errors_context():
            task_objs = Task.get_many(
                company=company_id,
                query=Q(id__in=task_ids),
                allow_public=allow_public,
                override_projection=("id", "name", "company"),
                return_dicts=False,
            )
            if len(task_objs) < len(task_ids):
                invalid = tuple(set(task_ids) - set(r.id for r in task_objs))
                raise errors.bad_request.InvalidTaskId(company=company_id, ids=invalid)

            task_name_by_id = {t.id: t.name for t in task_objs}

        companies = {t.company for t in task_objs}
        if len(companies) > 1:
            raise errors.bad_request.InvalidTaskId(
                "only tasks from the same company are supported"
            )

        ret = self._run_get_scalar_metrics_as_parallel(
            next(iter(companies)),
            task_ids=task_ids,
            samples=samples,
            key=ScalarKey.resolve(key),
            get_func=self._get_scalar_average_per_task,
        )

        for metric_data in ret.values():
            for variant_data in metric_data.values():
                for task_id, task_data in variant_data.items():
                    task_data["name"] = task_name_by_id[task_id]

        return ret
Beispiel #3
0
    def get_scalar_metrics_average_per_iter(
        self, company_id: str, task_id: str, samples: int, key: ScalarKeyEnum
    ) -> dict:
        """
        Get scalar metric histogram per metric and variant
        The amount of points in each histogram should not exceed
        the requested samples
        """
        es_index = self.get_index_name(company_id, "training_stats_scalar")
        if not self.es.indices.exists(es_index):
            return {}

        return self._get_scalar_average_per_iter_core(
            task_id, es_index, samples, ScalarKey.resolve(key)
        )
    def get_scalar_metrics_average_per_iter(
        self, company_id: str, task_id: str, samples: int, key: ScalarKeyEnum
    ) -> dict:
        """
        Get scalar metric histogram per metric and variant
        The amount of points in each histogram should not exceed
        the requested samples
        """

        return self._run_get_scalar_metrics_as_parallel(
            company_id,
            task_ids=[task_id],
            samples=samples,
            key=ScalarKey.resolve(key),
            get_func=self._get_scalar_average,
        )