def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set]: """ Delete only the task themselves and their non published version. Child models under the same project are deleted separately. Children tasks should be deleted in the same api call. If any child entities are left in another projects then updated their parent task to None """ tasks = Task.objects(project__in=projects).only("id", "execution__artifacts") if not tasks: return 0, set(), set() task_ids = {t.id for t in tasks} with TimingContext("mongo", "delete_tasks_update_children"): Task.objects(parent__in=task_ids, project__nin=projects).update(parent=None) Model.objects(task__in=task_ids, project__nin=projects).update(task=None) event_urls, artifact_urls = set(), set() for task in tasks: event_urls.update(collect_debug_image_urls(company, task.id)) event_urls.update(collect_plot_image_urls(company, task.id)) if task.execution and task.execution.artifacts: artifact_urls.update( { a.uri for a in task.execution.artifacts.values() if a.mode == ArtifactModes.output and a.uri } ) event_bll.delete_multi_task_events(company, list(task_ids)) deleted = tasks.delete() return deleted, event_urls, artifact_urls
def verify_task_children_and_ouptuts(task: Task, force: bool) -> TaskOutputs[Model]: if not force: with TimingContext("mongo", "count_published_children"): published_children_count = Task.objects( parent=task.id, status=TaskStatus.published).count() if published_children_count: raise errors.bad_request.TaskCannotBeDeleted( "has children, use force=True", task=task.id, children=published_children_count, ) with TimingContext("mongo", "get_task_models"): models = TaskOutputs( attrgetter("ready"), Model, Model.objects(task=task.id).only("id", "task", "ready"), ) if not force and models.published: raise errors.bad_request.TaskCannotBeDeleted( "has output models, use force=True", task=task.id, models=len(models.published), ) if task.models and task.models.output: with TimingContext("mongo", "get_task_output_model"): model_ids = [m.model for m in task.models.output] for output_model in Model.objects(id__in=model_ids): if output_model.ready: if not force: raise errors.bad_request.TaskCannotBeDeleted( "has output model, use force=True", task=task.id, model=output_model.id, ) models.published.append(output_model) else: models.draft.append(output_model) if models.draft: with TimingContext("mongo", "get_execution_models"): model_ids = models.draft.ids dependent_tasks = Task.objects( models__input__model__in=model_ids).only("id", "models") input_models = { m.model for m in chain.from_iterable( t.models.input for t in dependent_tasks if t.models) } if input_models: models.draft = DocumentGroup( Model, (m for m in models.draft if m.id not in input_models)) return models
def enqueue(call: APICall, company_id, req_model: EnqueueRequest): task_id = req_model.task queue_id = req_model.queue status_message = req_model.status_message status_reason = req_model.status_reason if not queue_id: # try to get default queue queue_id = queue_bll.get_default(company_id).id with translate_errors_context(): query = dict(id=task_id, company=company_id) task = Task.get_for_writing( _only=("type", "script", "execution", "status", "project", "id"), **query ) if not task: raise errors.bad_request.InvalidTaskId(**query) res = EnqueueResponse( **ChangeStatusRequest( task=task, new_status=TaskStatus.queued, status_reason=status_reason, status_message=status_message, allow_same_state_transition=False, ).execute() ) try: queue_bll.add_task( company_id=company_id, queue_id=queue_id, task_id=task.id ) except Exception: # failed enqueueing, revert to previous state ChangeStatusRequest( task=task, current_status_override=TaskStatus.queued, new_status=task.status, force=True, status_reason="failed enqueueing", ).execute() raise # set the current queue ID in the task if task.execution: Task.objects(**query).update(execution__queue=queue_id, multi=False) else: Task.objects(**query).update( execution=Execution(queue=queue_id), multi=False ) res.queued = 1 res.fields.update(**{"execution.queue": queue_id}) call.result.data_model = res
def enqueue_task( task_id: str, company_id: str, queue_id: str, status_message: str, status_reason: str, validate: bool = False, ) -> Tuple[int, dict]: if not queue_id: # try to get default queue queue_id = queue_bll.get_default(company_id).id query = dict(id=task_id, company=company_id) task = Task.get_for_writing(**query) if not task: raise errors.bad_request.InvalidTaskId(**query) if validate: TaskBLL.validate(task) res = ChangeStatusRequest( task=task, new_status=TaskStatus.queued, status_reason=status_reason, status_message=status_message, allow_same_state_transition=False, ).execute(enqueue_status=task.status) try: queue_bll.add_task(company_id=company_id, queue_id=queue_id, task_id=task.id) except Exception: # failed enqueueing, revert to previous state ChangeStatusRequest( task=task, current_status_override=TaskStatus.queued, new_status=task.status, force=True, status_reason="failed enqueueing", ).execute(enqueue_status=None) raise # set the current queue ID in the task if task.execution: Task.objects(**query).update(execution__queue=queue_id, multi=False) else: Task.objects(**query).update(execution=Execution(queue=queue_id), multi=False) nested_set(res, ("fields", "execution.queue"), queue_id) return 1, res
def publish_model( cls, model_id: str, company_id: str, force_publish_task: bool = False, publish_task_func: Callable[[str, str, bool], dict] = None, ) -> Tuple[int, ModelTaskPublishResponse]: model = cls.get_company_model_by_id(company_id=company_id, model_id=model_id) if model.ready: raise errors.bad_request.ModelIsReady(company=company_id, model=model_id) published_task = None if model.task and publish_task_func: task = (Task.objects(id=model.task, company=company_id).only("id", "status").first()) if task and task.status != TaskStatus.published: task_publish_res = publish_task_func(model.task, company_id, force_publish_task) published_task = ModelTaskPublishResponse( id=model.task, data=task_publish_res) updated = model.update(upsert=False, ready=True, last_update=datetime.utcnow()) return updated, published_task
def model_set_ready( cls, model_id: str, company_id: str, publish_task: bool, force_publish_task: bool = False, ) -> tuple: with translate_errors_context(): query = dict(id=model_id, company=company_id) model = Model.objects(**query).first() if not model: raise errors.bad_request.InvalidModelId(**query) elif model.ready: raise errors.bad_request.ModelIsReady(**query) published_task_data = {} if model.task and publish_task: task = (Task.objects(id=model.task, company=company_id).only( "id", "status").first()) if task and task.status != TaskStatus.published: published_task_data["data"] = cls.publish_task( task_id=model.task, company_id=company_id, publish_model=False, force=force_publish_task, ) published_task_data["id"] = model.task updated = model.update(upsert=False, ready=True) return updated, published_task_data
def get_parent_tasks( cls, company_id: str, projects: Sequence[str], state: Optional[EntityVisibility] = None, ) -> Sequence[dict]: """ Get list of unique parent tasks sorted by task name for the passed company projects If projects is None or empty then get parents for all the company tasks """ query = Q(company=company_id) if projects: query &= Q(project__in=projects) if state == EntityVisibility.archived: query &= Q(system_tags__in=[EntityVisibility.archived.value]) elif state == EntityVisibility.active: query &= Q(system_tags__nin=[EntityVisibility.archived.value]) parent_ids = set(Task.objects(query).distinct("parent")) if not parent_ids: return [] parents = Task.get_many_with_join( company_id, query=Q(id__in=parent_ids), allow_public=True, override_projection=("id", "name", "project.name"), ) return sorted(parents, key=itemgetter("name"))
def delete_task_events(self, company_id, task_id, allow_locked=False): with translate_errors_context(): extra_msg = None query = Q(id=task_id, company=company_id) if not allow_locked: query &= Q(status__nin=LOCKED_TASK_STATUSES) extra_msg = "or task published" res = Task.objects(query).only("id").first() if not res: raise errors.bad_request.InvalidTaskId(extra_msg, company=company_id, id=task_id) es_req = {"query": {"term": {"task": task_id}}} with translate_errors_context(), TimingContext("es", "delete_task_events"): es_res = delete_company_events( es=self.es, company_id=company_id, event_type=EventType.all, body=es_req, refresh=True, ) return es_res.get("deleted", 0)
def cleanup_tasks(cls, threshold_sec): relevant_status = (TaskStatus.in_progress, ) threshold = timedelta(seconds=threshold_sec) ref_time = datetime.utcnow() - threshold log.info( f"Starting cleanup cycle for running tasks last updated before {ref_time}" ) tasks = list( Task.objects(status__in=relevant_status, last_update__lt=ref_time).only( "id", "name", "status", "project", "last_update")) log.info(f"{len(tasks)} non-responsive tasks found") if not tasks: return 0 err_count = 0 project_ids = set() now = datetime.utcnow() for task in tasks: log.info( f"Stopping {task.id} ({task.name}), last updated at {task.last_update}" ) # noinspection PyBroadException try: updated = Task.objects(id=task.id, status=task.status).update( status=TaskStatus.stopped, status_reason="Forced stop (non-responsive)", status_message="Forced stop (non-responsive)", status_changed=now, last_update=now, last_change=now, ) if updated: project_ids.add(task.project) else: err_count += 1 except Exception as ex: log.error("Failed setting status: %s", str(ex)) update_project_time(list(project_ids)) return len(tasks) - err_count
def get_types(cls, company, project_ids: Optional[Sequence]) -> set: """ Return the list of unique task types used by company and public tasks If project ids passed then only tasks from these projects are considered """ query = get_company_or_none_constraint(company) if project_ids: query &= Q(project__in=project_ids) res = Task.objects(query).distinct(field="type") return set(res).intersection(external_task_types)
def _get_valid_tasks(company_id, task_ids: Set, allow_locked_tasks=False) -> Set: """Verify that task exists and can be updated""" if not task_ids: return set() with translate_errors_context(), TimingContext("mongo", "task_by_ids"): query = Q(id__in=task_ids, company=company_id) if not allow_locked_tasks: query &= Q(status__nin=LOCKED_TASK_STATUSES) res = Task.objects(query).only("id") return {r.id for r in res}
def get_outputs_for_deletion(task, force=False): with TimingContext("mongo", "get_task_models"): models = TaskOutputs( attrgetter("ready"), Model, Model.objects(task=task.id).only("id", "task", "ready"), ) if not force and models.published: raise errors.bad_request.TaskCannotBeDeleted( "has output models, use force=True", task=task.id, models=len(models.published), ) if task.output.model: output_model = get_output_model(task, force) if output_model: if output_model.ready: models.published.append(output_model) else: models.draft.append(output_model) if models.draft: with TimingContext("mongo", "get_execution_models"): model_ids = [m.id for m in models.draft] dependent_tasks = Task.objects(execution__model__in=model_ids).only( "id", "execution.model" ) busy_models = [t.execution.model for t in dependent_tasks] models.draft[:] = [m for m in models.draft if m.id not in busy_models] with TimingContext("mongo", "get_task_children"): tasks = Task.objects(parent=task.id).only("id", "parent", "status") published_tasks = [ task for task in tasks if task.status == TaskStatus.published ] if not force and published_tasks: raise errors.bad_request.TaskCannotBeDeleted( "has children, use force=True", task=task.id, children=published_tasks ) return models, tasks
def set_last_update( task_ids: Collection[str], company_id: str, last_update: datetime, **extra_updates, ): tasks = Task.objects(id__in=task_ids, company=company_id).only("status", "started") for task in tasks: updates = extra_updates if task.status == TaskStatus.in_progress and task.started: updates = { "active_duration": (datetime.utcnow() - task.started).total_seconds(), **extra_updates, } Task.objects(id=task.id, company=company_id).update( upsert=False, last_update=last_update, last_change=last_update, **updates, )
def execute(self, **kwargs): current_status = self.current_status_override or self.task.status project_id = self.task.project # Verify new status is allowed from current status (will throw exception if not valid) self.validate_transition(current_status) control = dict(upsert=False, multi=False, write_concern=None, full_result=False) now = datetime.utcnow() fields = dict( status=self.new_status, status_reason=self.status_reason, status_message=self.status_message, status_changed=now, last_update=now, last_change=now, ) if self.new_status == TaskStatus.queued: fields["pull__system_tags"] = TaskSystemTags.development def safe_mongoengine_key(key): return f"__{key}" if key in control else key fields.update({safe_mongoengine_key(k): v for k, v in kwargs.items()}) with translate_errors_context(), TimingContext("mongo", "task_status"): # atomic change of task status by querying the task with the EXPECTED status before modifying it params = fields.copy() params.update(control) updated = Task.objects(id=self.task.id, status=current_status).update(**params) if not updated: # failed to change status (someone else beat us to it?) raise errors.bad_request.FailedChangingTaskStatus( task_id=self.task.id, current_status=current_status, new_status=self.new_status, ) update_project_time(project_id) # make sure that _raw_ queries are not returned back to the client fields.pop("__raw__", None) return dict(updated=updated, fields=fields)
def get_next_task(call: APICall, company_id, req_model: GetNextTaskRequest): entry = queue_bll.get_next_task(company_id=company_id, queue_id=req_model.queue) if entry: data = {"entry": entry.to_proper_dict()} if req_model.get_task_info: task = Task.objects(id=entry.task).first() if task: data["task_info"] = { "company": task.company, "user": task.user } call.result.data = data
def _update_task_name(task: Task): if not task or not task.project: return project = Project.objects(id=task.project).only("name").first() if not project: return _, _, name_prefix = project.name.rpartition("/") name_mask = re.compile(rf"{re.escape(name_prefix)}( #\d+)?$") count = Task.objects(project=task.project, system_tags__in=["pipeline"], name=name_mask).count() new_name = f"{name_prefix} #{count}" if count > 0 else name_prefix task.update(name=new_name)
def _reinit_outdated_metric_states(self, company_id, state: DebugImageEventsScrollState): """ Determines the metrics for which new debug image events were added since their states were initialized and reinits these states """ task_ids = set(metric.task for metric in state.metrics) tasks = Task.objects(id__in=list(task_ids), company=company_id).only("id", "metric_stats") def get_last_update_times_for_task_metrics( task: Task) -> Sequence[Tuple]: """For metrics that reported debug image events get tuples of task_id/metric_name and last update times""" metric_stats: Mapping[str, MetricEventStats] = task.metric_stats if not metric_stats: return [] return [( (task.id, stats.metric), stats.event_stats_by_type[self.EVENT_TYPE.value].last_update, ) for stats in metric_stats.values() if self.EVENT_TYPE.value in stats.event_stats_by_type] update_times = dict( chain.from_iterable( get_last_update_times_for_task_metrics(task) for task in tasks)) outdated_metrics = [ metric for metric in state.metrics if (metric.task, metric.name) in update_times and update_times[metric.task, metric.name] > metric.timestamp ] state.metrics = [ *(metric for metric in state.metrics if metric not in outdated_metrics), *(self._init_metric_states( company_id, [(metric.task, metric.name) for metric in outdated_metrics], )), ]
def move(call: APICall, company_id: str, request: MoveRequest): if not (request.project or request.project_name): raise errors.bad_request.MissingRequiredFields( "project or project_name is required") updated_projects = set( t.project for t in Task.objects(id__in=request.ids).only("project") if t.project) project_id = project_bll.move_under_project( entity_cls=Task, user=call.identity.user, company=company_id, ids=request.ids, project=request.project, project_name=request.project_name, ) projects = list(updated_projects | {project_id}) _reset_cached_tags(company_id, projects=projects) update_project_time(projects) return {"project_id": project_id}
def add_or_update_model(_: APICall, company_id: str, request: AddUpdateModelRequest): get_task_for_update(company_id=company_id, task_id=request.task, force=True) models_field = f"models__{request.type}" model = ModelItem(name=request.name, model=request.model, updated=datetime.utcnow()) query = {"id": request.task, f"{models_field}__name": request.name} updated = Task.objects(**query).update_one( **{f"set__{models_field}__S": model}) updated = TaskBLL.update_statistics( task_id=request.task, company_id=company_id, last_iteration_max=request.iteration, **({ f"push__{models_field}": model } if not updated else {}), ) return {"updated": updated}
def _reinit_outdated_task_states( self, company_id, state: DebugImageEventsScrollState, task_metrics: Mapping[str, dict], ): """ Determine the metrics for which new debug image events were added since their states were initialized and re-init these states """ tasks = Task.objects(id__in=list(task_metrics), company=company_id).only("id", "metric_stats") def get_last_update_times_for_task_metrics( task: Task, ) -> Mapping[str, datetime]: """For metrics that reported debug image events get mapping of the metric name to the last update times""" metric_stats: Mapping[str, MetricEventStats] = task.metric_stats if not metric_stats: return {} requested_metrics = task_metrics[task.id] return { stats.metric: stats.event_stats_by_type[self.EVENT_TYPE.value].last_update for stats in metric_stats.values() if self.EVENT_TYPE.value in stats.event_stats_by_type and ( not requested_metrics or stats.metric in requested_metrics) } update_times = { task.id: get_last_update_times_for_task_metrics(task) for task in tasks } task_metric_states = { task_state.task: { metric_state.metric: metric_state for metric_state in task_state.metrics } for task_state in state.tasks } task_metrics_to_recalc = {} for task, metrics_times in update_times.items(): old_metric_states = task_metric_states[task] metrics_to_recalc = { m: task_metrics[task].get(m) for m, t in metrics_times.items() if m not in old_metric_states or old_metric_states[m].timestamp < t } if metrics_to_recalc: task_metrics_to_recalc[task] = metrics_to_recalc updated_task_states = self._init_task_states(company_id, task_metrics_to_recalc) def merge_with_updated_task_states( old_state: TaskScrollState, updates: Sequence[TaskScrollState]) -> TaskScrollState: task = old_state.task updated_state = first(uts for uts in updates if uts.task == task) if not updated_state: old_state.reset() return old_state updated_metrics = [m.metric for m in updated_state.metrics] return TaskScrollState( task=task, metrics=[ *updated_state.metrics, *(old_metric for old_metric in old_state.metrics if old_metric.metric not in updated_metrics), ], ) state.tasks = [ merge_with_updated_task_states(task_state, updated_task_states) for task_state in state.tasks ]
def delete_model(cls, model_id: str, company_id: str, force: bool) -> Tuple[int, Model]: model = cls.get_company_model_by_id( company_id=company_id, model_id=model_id, only_fields=("id", "task", "project", "uri"), ) deleted_model_id = f"{deleted_prefix}{model_id}" using_tasks = Task.objects(models__input__model=model_id).only("id") if using_tasks: if not force: raise errors.bad_request.ModelInUse( "as execution model, use force=True to delete", num_tasks=len(using_tasks), ) # update deleted model id in using tasks Task._get_collection().update_many( filter={"_id": { "$in": [t.id for t in using_tasks] }}, update={ "$set": { "models.input.$[elem].model": deleted_model_id } }, array_filters=[{ "elem.model": model_id }], upsert=False, ) if model.task: task = Task.objects(id=model.task).first() if task and task.status == TaskStatus.published: if not force: raise errors.bad_request.ModelCreatingTaskExists( "and published, use force=True to delete", task=model.task) if task.models.output and model_id in task.models.output: now = datetime.utcnow() Task._get_collection().update_one( filter={ "_id": model.task, "models.output.model": model_id }, update={ "$set": { "models.output.$[elem].model": deleted_model_id, "output.error": f"model deleted on {now.isoformat()}", }, "last_change": now, }, array_filters=[{ "elem.model": model_id }], upsert=False, ) del_count = Model.objects(id=model_id, company=company_id).delete() return del_count, model
def status_report( self, company_id: str, user_id: str, ip: str, report: StatusReportRequest, tags: Sequence[str] = None, ) -> None: """ Write worker status report :param company_id: worker's company ID :param user_id: user_id ID under which this worker is running :param ip: worker IP :param report: the report itself :param tags: tags for this worker :raise bad_request.InvalidTaskId: the reported task was not found :return: worker entry instance """ entry = self._get_worker(company_id, user_id, report.worker) try: entry.ip = ip now = datetime.utcnow() entry.last_activity_time = now if tags is not None: entry.tags = tags if report.machine_stats: self._log_stats_to_es( company_id=company_id, company_name=entry.company.name, worker=report.worker, timestamp=report.timestamp, task=report.task, machine_stats=report.machine_stats, ) entry.queue = report.queue if report.queues: entry.queues = report.queues if not report.task: entry.task = None entry.project = None else: with translate_errors_context(): query = dict(id=report.task, company=company_id) update = dict( last_worker=report.worker, last_worker_report=now, last_update=now, last_change=now, ) # modify(new=True, ...) returns the modified object task = Task.objects(**query).modify(new=True, **update) if not task: raise bad_request.InvalidTaskId(**query) entry.task = IdNameEntry(id=task.id, name=task.name) entry.project = None if task.project: project = Project.objects( id=task.project).only("name").first() if project: entry.project = IdNameEntry(id=project.id, name=project.name) entry.last_report_time = now except APIError: raise except Exception as e: msg = "Failed processing worker status report" log.exception(msg) raise server_error.DataError(msg, err=e.args[0]) finally: self._save_worker(entry)
def get_all_with_projection( self, company_id: str, last_seen: int) -> Sequence[WorkerResponseEntry]: helpers = list( map( WorkerConversionHelper.from_worker_entry, self.get_all(company_id=company_id, last_seen=last_seen), )) task_ids = set(filter(None, (helper.task_id for helper in helpers))) all_queues = set( itertools.chain.from_iterable(helper.queue_ids for helper in helpers)) queues_info = {} if all_queues: projection = [ { "$match": { "_id": { "$in": list(all_queues) } } }, { "$project": { "name": 1, "next_entry": { "$arrayElemAt": ["$entries", 0] }, "num_entries": { "$size": "$entries" }, } }, ] queues_info = { res["_id"]: res for res in Queue.objects.aggregate(projection) } task_ids = task_ids.union( filter( None, (safe_get(info, "next_entry/task") for info in queues_info.values()), )) tasks_info = {} if task_ids: tasks_info = { task.id: task for task in Task.objects(id__in=task_ids).only( "name", "started", "last_iteration", "active_duration") } def update_queue_entries(*entries): for entry in entries: if not entry: continue info = queues_info.get(entry.id, None) if not info: continue entry.name = info.get("name", None) entry.num_tasks = info.get("num_entries", 0) task_id = safe_get(info, "next_entry/task") if task_id: task = tasks_info.get(task_id, None) entry.next_task = IdNameEntry( id=task_id, name=task.name if task else None) for helper in helpers: worker = helper.worker if helper.task_id: task = tasks_info.get(helper.task_id, None) if task: worker.task.running_time = (task.active_duration or 0) * 1000 worker.task.last_iteration = task.last_iteration update_queue_entries(worker.queue) if worker.queues: update_queue_entries(*worker.queues) return [helper.worker for helper in helpers]
def cleanup_task( task: Task, force: bool = False, update_children=True, return_file_urls=False, delete_output_models=True, ) -> CleanupResult: """ Validate task deletion and delete/modify all its output. :param task: task object :param force: whether to delete task with published outputs :return: count of delete and modified items """ models = verify_task_children_and_ouptuts(task, force) event_urls, artifact_urls, model_urls = set(), set(), set() if return_file_urls: event_urls = collect_debug_image_urls(task.company, task.id) event_urls.update(collect_plot_image_urls(task.company, task.id)) if task.execution and task.execution.artifacts: artifact_urls = { a.uri for a in task.execution.artifacts.values() if a.mode == ArtifactModes.output and a.uri } model_urls = { m.uri for m in models.draft.objects().only("uri") if m.uri } deleted_task_id = f"{deleted_prefix}{task.id}" if update_children: with TimingContext("mongo", "update_task_children"): updated_children = Task.objects(parent=task.id).update( parent=deleted_task_id) else: updated_children = 0 if models.draft and delete_output_models: with TimingContext("mongo", "delete_models"): deleted_models = models.draft.objects().delete() else: deleted_models = 0 if models.published and update_children: with TimingContext("mongo", "update_task_models"): updated_models = models.published.objects().update( task=deleted_task_id) else: updated_models = 0 event_bll.delete_task_events(task.company, task.id, allow_locked=force) return CleanupResult( deleted_models=deleted_models, updated_children=updated_children, updated_models=updated_models, urls=TaskUrls( event_urls=list(event_urls), artifact_urls=list(artifact_urls), model_urls=list(model_urls), ) if return_file_urls else None, )
def get_task_hyperparam_distinct_values( self, company_id: str, project_ids: Sequence[str], section: str, name: str, include_subprojects: bool, allow_public: bool = True, ) -> ParamValues: company_constraint = self._get_company_constraint( company_id, allow_public) project_constraint = self._get_project_constraint( project_ids, include_subprojects) key_path = f"hyperparams.{ParameterKeyEscaper.escape(section)}.{ParameterKeyEscaper.escape(name)}" last_updated_task = (Task.objects( **company_constraint, **project_constraint, **{ f"{key_path.replace('.', '__')}__exists": True }, ).only("last_update").order_by("-last_update").limit(1).first()) if not last_updated_task: return 0, [] redis_key = f"hyperparam_values_{company_id}_{'_'.join(project_ids)}_{section}_{name}_{allow_public}" last_update = last_updated_task.last_update or datetime.utcnow() cached_res = self._get_cached_param_values( key=redis_key, last_update=last_update, allowed_delta_sec=config.get( "services.tasks.hyperparam_values.cache_allowed_outdate_sec", 60), ) if cached_res: return cached_res max_values = config.get("services.tasks.hyperparam_values.max_count", 100) pipeline = [ { "$match": { **company_constraint, **project_constraint, key_path: { "$exists": True }, } }, { "$project": { "value": f"${key_path}.value" } }, { "$group": { "_id": "$value" } }, { "$sort": { "_id": 1 } }, { "$limit": max_values }, { "$group": { "_id": 1, "total": { "$sum": 1 }, "results": { "$push": "$$ROOT._id" }, } }, ] result = next(Task.aggregate(pipeline, collation=Task._numeric_locale), None) if not result: return 0, [] total = int(result.get("total", 0)) values = result.get("results", []) ttl = config.get("services.tasks.hyperparam_values.cache_ttl_sec", 86400) cached = dict(last_update=last_update.timestamp(), total=total, values=values) self.redis.setex(redis_key, ttl, json.dumps(cached)) return total, values