コード例 #1
0
    def compare_scalar_metrics_average_per_iter(
        self,
        company_id,
        task_ids: Sequence[str],
        samples,
        key: ScalarKeyEnum,
        allow_public=True,
    ):
        """
        Compare scalar metrics for different tasks per metric and variant
        The amount of points in each histogram should not exceed the requested samples
        """
        if len(task_ids) > self.MAX_TASKS_COUNT:
            raise errors.BadRequest(
                f"Up to {self.MAX_TASKS_COUNT} tasks supported for comparison",
                len(task_ids),
            )

        task_name_by_id = {}
        with translate_errors_context():
            task_objs = Task.get_many(
                company=company_id,
                query=Q(id__in=task_ids),
                allow_public=allow_public,
                override_projection=("id", "name", "company"),
                return_dicts=False,
            )
            if len(task_objs) < len(task_ids):
                invalid = tuple(set(task_ids) - set(r.id for r in task_objs))
                raise errors.bad_request.InvalidTaskId(company=company_id, ids=invalid)

            task_name_by_id = {t.id: t.name for t in task_objs}

        companies = {t.company for t in task_objs}
        if len(companies) > 1:
            raise errors.bad_request.InvalidTaskId(
                "only tasks from the same company are supported"
            )

        ret = self._run_get_scalar_metrics_as_parallel(
            next(iter(companies)),
            task_ids=task_ids,
            samples=samples,
            key=ScalarKey.resolve(key),
            get_func=self._get_scalar_average_per_task,
        )

        for metric_data in ret.values():
            for variant_data in metric_data.values():
                for task_id, task_data in variant_data.items():
                    task_data["name"] = task_name_by_id[task_id]

        return ret
コード例 #2
0
ファイル: event_bll.py プロジェクト: juanlp/trains-server
    def add_events(self, company_id, events, worker, allow_locked_tasks=False):
        actions = []
        task_ids = set()
        task_iteration = defaultdict(lambda: 0)
        task_last_events = nested_dict(
            3, dict)  # task_id -> metric_hash -> variant_hash -> MetricEvent

        for event in events:
            # remove spaces from event type
            if "type" not in event:
                raise errors.BadRequest("Event must have a 'type' field",
                                        event=event)

            event_type = event["type"].replace(" ", "_")
            if event_type not in EVENT_TYPES:
                raise errors.BadRequest(
                    "Invalid event type {}".format(event_type),
                    event=event,
                    types=EVENT_TYPES,
                )

            event["type"] = event_type

            # @timestamp indicates the time the event is written, not when it happened
            event["@timestamp"] = es_factory.get_es_timestamp_str()

            # for backward bomba-tavili-tea
            if "ts" in event:
                event["timestamp"] = event.pop("ts")

            # set timestamp and worker if not sent
            if "timestamp" not in event:
                event["timestamp"] = es_factory.get_timestamp_millis()

            if "worker" not in event:
                event["worker"] = worker

            # force iter to be a long int
            iter = event.get("iter")
            if iter is not None:
                iter = int(iter)
                event["iter"] = iter

            # used to have "values" to indicate array. no need anymore
            if "values" in event:
                event["value"] = event["values"]
                del event["values"]

            index_name = EventMetrics.get_index_name(company_id, event_type)
            es_action = {
                "_op_type": "index",  # overwrite if exists with same ID
                "_index": index_name,
                "_type": "event",
                "_source": event,
            }

            # for "log" events, don't assing custom _id - whatever is sent, is written (not overwritten)
            if event_type != "log":
                es_action["_id"] = self._get_event_id(event)
            else:
                es_action["_id"] = dbutils.id()

            task_id = event.get("task")
            if task_id is not None:
                es_action["_routing"] = task_id
                task_ids.add(task_id)
                if (iter is not None and event.get("metric")
                        not in self._skip_iteration_for_metric):
                    task_iteration[task_id] = max(iter,
                                                  task_iteration[task_id])

                if event_type == EventType.metrics_scalar.value:
                    self._update_last_metric_event_for_task(
                        task_last_events=task_last_events,
                        task_id=task_id,
                        event=event)
            else:
                es_action["_routing"] = task_id

            actions.append(es_action)

        if task_ids:
            # verify task_ids
            with translate_errors_context(), TimingContext(
                    "mongo", "task_by_ids"):
                extra_msg = None
                query = Q(id__in=task_ids, company=company_id)
                if not allow_locked_tasks:
                    query &= Q(status__nin=LOCKED_TASK_STATUSES)
                    extra_msg = "or task published"
                res = Task.objects(query).only("id")
                if len(res) < len(task_ids):
                    invalid_task_ids = tuple(
                        set(task_ids) - set(r.id for r in res))
                    raise errors.bad_request.InvalidTaskId(
                        extra_msg, company=company_id, ids=invalid_task_ids)

        errors_in_bulk = []
        added = 0
        chunk_size = 500
        with translate_errors_context(), TimingContext("es",
                                                       "events_add_batch"):
            # TODO: replace it with helpers.parallel_bulk in the future once the parallel pool leak is fixed
            with closing(
                    helpers.streaming_bulk(
                        self.es,
                        actions,
                        chunk_size=chunk_size,
                        # thread_count=8,
                        refresh=True,
                    )) as it:
                for success, info in it:
                    if success:
                        added += chunk_size
                    else:
                        errors_in_bulk.append(info)

            remaining_tasks = set()
            now = datetime.utcnow()
            for task_id in task_ids:
                # Update related tasks. For reasons of performance, we prefer to update all of them and not only those
                #  who's events were successful

                updated = self._update_task(
                    company_id=company_id,
                    task_id=task_id,
                    now=now,
                    iter_max=task_iteration.get(task_id),
                    last_events=task_last_events.get(task_id),
                )

                if not updated:
                    remaining_tasks.add(task_id)
                    continue

            if remaining_tasks:
                TaskBLL.set_last_update(remaining_tasks,
                                        company_id,
                                        last_update=now)

        # Compensate for always adding chunk_size on success (last chunk is probably smaller)
        added = min(added, len(actions))

        return added, errors_in_bulk