def params_unprepare_from_saved(fields, copy_to_legacy=False): """ Unescape all section and param names for hyper params and configuration If copy_to_legacy is set then copy hyperparams and configuration data to the legacy location for the old clients """ for param_field in ("hyperparams", "configuration"): params = fields.get(param_field) if params: unescaped_params = { ParameterKeyEscaper.unescape(key): {ParameterKeyEscaper.unescape(k): v for k, v in value.items()} if isinstance(value, dict) else value for key, value in params.items() } fields[param_field] = unescaped_params if copy_to_legacy: for new_params_field, old_params_field, use_sections in ( ("hyperparams", ("execution", "parameters"), True), ("configuration", ("execution", "model_desc"), False), ): legacy_params = _get_legacy_params(fields.get(new_params_field), with_sections=use_sections) if legacy_params: nested_set( fields, old_params_field, { _get_full_param_name(p): p["value"] for p in legacy_params }, )
def unescape_dict_field(fields: dict, path: Union[str, Sequence[str]]): if isinstance(path, str): path = (path,) data = nested_get(fields, path) if not data or not isinstance(data, dict): return nested_set(fields, path, unescape_dict(data))
def artifacts_prepare_for_save(fields: dict): artifacts_field = ("execution", "artifacts") artifacts = nested_get(fields, artifacts_field) if artifacts is None: return nested_set(fields, artifacts_field, value={get_artifact_id(a): a for a in artifacts})
def prepare_for_save(cls, call: APICall, fields: dict): if call.requested_endpoint_version >= cls.max_version: return docker_cmd = nested_get(fields, cls.field) if docker_cmd is not None: image, _, arguments = docker_cmd.partition(" ") nested_set(fields, ("container", "image"), value=image) nested_set(fields, ("container", "arguments"), value=arguments) nested_delete(fields, cls.field)
def artifacts_unprepare_from_saved(fields): artifacts_field = ("execution", "artifacts") artifacts = nested_get(fields, artifacts_field) if artifacts is None: return nested_set( fields, artifacts_field, value=sorted(artifacts.values(), key=itemgetter("key")), )
def params_prepare_for_save(fields: dict, previous_task: Task = None): """ If legacy hyper params or configuration is passed then replace the corresponding section in the new structure Escape all the section and param names for hyper params and configuration to make it mongo sage """ for old_params_field, new_params_field, default_section in ( (("execution", "parameters"), "hyperparams", hyperparams_default_section), (("execution", "model_desc"), "configuration", None), ): legacy_params = nested_get(fields, old_params_field) if legacy_params is None: continue if (not fields.get(new_params_field) and previous_task and previous_task[new_params_field]): previous_data = previous_task.to_proper_dict().get( new_params_field) removed = _remove_legacy_params(previous_data, with_sections=default_section is not None) if not legacy_params and not removed: # if we only need to delete legacy fields from the db # but they are not there then there is no point to proceed continue fields_update = {new_params_field: previous_data} params_unprepare_from_saved(fields_update) fields.update(fields_update) for full_name, value in legacy_params.items(): section, name = split_param_name(full_name, default_section) new_path = list(filter(None, (new_params_field, section, name))) new_param = dict(name=name, type=hyperparams_legacy_type, value=str(value)) if section is not None: new_param["section"] = section nested_set(fields, new_path, new_param) nested_delete(fields, old_params_field) for param_field in ("hyperparams", "configuration"): params = fields.get(param_field) if params: escaped_params = { ParameterKeyEscaper.escape(key): {ParameterKeyEscaper.escape(k): v for k, v in value.items()} if isinstance(value, dict) else value for key, value in params.items() } fields[param_field] = escaped_params
def enqueue_task( task_id: str, company_id: str, queue_id: str, status_message: str, status_reason: str, validate: bool = False, ) -> Tuple[int, dict]: if not queue_id: # try to get default queue queue_id = queue_bll.get_default(company_id).id query = dict(id=task_id, company=company_id) task = Task.get_for_writing(**query) if not task: raise errors.bad_request.InvalidTaskId(**query) if validate: TaskBLL.validate(task) res = ChangeStatusRequest( task=task, new_status=TaskStatus.queued, status_reason=status_reason, status_message=status_message, allow_same_state_transition=False, ).execute(enqueue_status=task.status) try: queue_bll.add_task(company_id=company_id, queue_id=queue_id, task_id=task.id) except Exception: # failed enqueueing, revert to previous state ChangeStatusRequest( task=task, current_status_override=TaskStatus.queued, new_status=task.status, force=True, status_reason="failed enqueueing", ).execute(enqueue_status=None) raise # set the current queue ID in the task if task.execution: Task.objects(**query).update(execution__queue=queue_id, multi=False) else: Task.objects(**query).update(execution=Execution(queue=queue_id), multi=False) nested_set(res, ("fields", "execution.queue"), queue_id) return 1, res
def _apply_multi_dict(body: dict, md: ImmutableMultiDict): def convert_value(v: str): if v.replace(".", "", 1).isdigit(): return float(v) if "." in v else int(v) if v in ("true", "True", "TRUE"): return True if v in ("false", "False", "FALSE"): return False return v for k, v in md.lists(): v = [convert_value(x) for x in v] if (len(v) > 1 or k.endswith("[]")) else convert_value(v[0]) nested_set(body, k.rstrip("[]").split("."), v)
def unprepare_from_saved(cls, call: APICall, tasks_data: Union[Sequence[dict], dict]): if call.requested_endpoint_version >= cls.max_version: return if isinstance(tasks_data, dict): tasks_data = [tasks_data] for task in tasks_data: container = task.get("container") if not container or not container.get("image"): continue docker_cmd = " ".join( filter(None, map(container.get, ("image", "arguments")))) if docker_cmd: nested_set(task, cls.field, docker_cmd)
def unprepare_from_saved( cls, call: APICall, tasks_data: Union[Sequence[dict], dict] ): if call.requested_endpoint_version >= cls.max_version: return if isinstance(tasks_data, dict): tasks_data = [tasks_data] for task in tasks_data: for mode, field in cls.mode_to_fields.items(): models = nested_get(task, (cls.models_field, mode)) if not models: continue model = models[0] if mode == TaskModelTypes.input else models[-1] if model: nested_set(task, field, model.get("model"))
def prepare_for_save(cls, call: APICall, fields: dict): if call.requested_endpoint_version >= cls.max_version: return for mode, field in cls.mode_to_fields.items(): value = nested_get(fields, field) if value is None: continue val = [ dict( name=TaskModelNames[mode], model=value, updated=datetime.utcnow(), ) ] if value else [] nested_set(fields, (cls.models_field, mode), value=val) nested_delete(fields, field)
def _upgrade_task_data(task_data: dict) -> dict: """ Migrate from execution/parameters and model_desc to hyperparams and configuration fiields Upgrade artifacts list to dict Migrate from execution.model and output.model to the new models field Move docker_cmd contents into the container field :param task_data: Upgraded in place :return: The upgraded task data """ for old_param_field, new_param_field, default_section in ( ("execution.parameters", "hyperparams", hyperparams_default_section), ("execution.model_desc", "configuration", None), ): legacy_path = old_param_field.split(".") legacy = nested_get(task_data, legacy_path) if legacy: for full_name, value in legacy.items(): section, name = split_param_name(full_name, default_section) new_path = list( filter(None, (new_param_field, section, name))) if not nested_get(task_data, new_path): new_param = dict(name=name, type=hyperparams_legacy_type, value=str(value)) if section is not None: new_param["section"] = section nested_set(task_data, path=new_path, value=new_param) nested_delete(task_data, legacy_path) artifacts_path = ("execution", "artifacts") artifacts = nested_get(task_data, artifacts_path) if isinstance(artifacts, list): nested_set( task_data, path=artifacts_path, value={get_artifact_id(a): a for a in artifacts}, ) models = task_data.get("models", {}) now = datetime.utcnow() for old_field, type_ in ( ("execution.model", TaskModelTypes.input), ("output.model", TaskModelTypes.output), ): old_path = old_field.split(".") old_model = nested_get(task_data, old_path) new_models = models.get(type_, []) name = TaskModelNames[type_] if old_model and not any( m for m in new_models if m.get("model") == old_model or m.get("name") == name): model_item = {"model": old_model, "name": name, "updated": now} if type_ == TaskModelTypes.input: new_models = [model_item, *new_models] else: new_models = [*new_models, model_item] models[type_] = new_models nested_delete(task_data, old_path) task_data["models"] = models docker_cmd_path = ("execution", "docker_cmd") docker_cmd = nested_get(task_data, docker_cmd_path) if docker_cmd and not task_data.get("container"): image, _, arguments = docker_cmd.partition(" ") task_data["container"] = {"image": image, "arguments": arguments} nested_delete(task_data, docker_cmd_path) return task_data
def _import_entity( cls, f: IO[bytes], full_name: str, company_id: str, user_id: str, metadata: Mapping[str, Any], ) -> Optional[Sequence[Task]]: cls_ = cls._get_entity_type(full_name) print(f"Writing {cls_.__name__.lower()}s into database") tasks = [] override_project_count = 0 for item in cls.json_lines(f): if cls_ == cls.task_cls: task_data = json.loads(item) artifacts_path = ("execution", "artifacts") artifacts = nested_get(task_data, artifacts_path) if isinstance(artifacts, list): nested_set( task_data, artifacts_path, value={get_artifact_id(a): a for a in artifacts}, ) item = json.dumps(task_data) doc = cls_.from_json(item, created=True) if hasattr(doc, "user"): doc.user = user_id if hasattr(doc, "company"): doc.company = company_id if isinstance(doc, cls.project_cls): override_project_name = metadata.get("project_name", None) if override_project_name: if override_project_count: override_project_name = ( f"{override_project_name} {override_project_count + 1}" ) override_project_count += 1 doc.name = override_project_name doc.logo_url = metadata.get("logo_url", None) doc.logo_blob = metadata.get("logo_blob", None) cls_.objects( company=company_id, name=doc.name, id__ne=doc.id ).update( set__name= f"{doc.name}_{datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S')}" ) doc.save() if isinstance(doc, cls.task_cls): tasks.append(doc) cls.event_bll.delete_task_events(company_id, doc.id, allow_locked=True) if tasks: return tasks