def get_status_counts(project_id, section): return { "total_runtime": nested_get(runtime, (project_id, section), 0), "status_count": nested_get( status_count, (project_id, section), default_counts ), }
def sum_status_count(a: Mapping[str, Mapping], b: Mapping[str, Mapping]) -> Dict[str, dict]: return { section: { status: nested_get(a, (section, status), default=0) + nested_get(b, (section, status), default=0) for status in set(a.get(section, {})) | set(b.get(section, {})) } for section in set(a) | set(b) }
def _migrate_docker_cmd(db: Database): tasks: Collection = db["task"] docker_cmd_field = "execution.docker_cmd" query = {docker_cmd_field: {"$exists": True}} for doc in tasks.find(filter=query, projection=(docker_cmd_field, )): set_commands = {} docker_cmd = nested_get(doc, docker_cmd_field.split(".")) if docker_cmd: image, _, arguments = docker_cmd.partition(" ") set_commands["container"] = { "image": image, "arguments": arguments } tasks.update_one( {"_id": doc["_id"]}, { "$unset": { docker_cmd_field: 1 }, **({ "$set": set_commands } if set_commands else {}), }, )
def unescape_dict_field(fields: dict, path: Union[str, Sequence[str]]): if isinstance(path, str): path = (path,) data = nested_get(fields, path) if not data or not isinstance(data, dict): return nested_set(fields, path, unescape_dict(data))
def _migrate_task_models(db: Database): """ Move the execution and output models to new models.input and output lists """ tasks: Collection = db["task"] models_field = "models" now = datetime.utcnow() fields = { TaskModelTypes.input: "execution.model", TaskModelTypes.output: "output.model", } query = {"$or": [{field: {"$exists": True}} for field in fields.values()]} for doc in tasks.find(filter=query, projection=[*fields.values(), models_field]): set_commands = {} for mode, field in fields.items(): value = nested_get(doc, field.split(".")) if value: name = TaskModelNames[mode] model_item = {"model": value, "name": name, "updated": now} existing_models = nested_get(doc, (models_field, mode), default=[]) existing_models = ( m for m in existing_models if m.get("name") != name and m.get("model") != value) if mode == TaskModelTypes.input: updated_models = [model_item, *existing_models] else: updated_models = [*existing_models, model_item] set_commands[f"{models_field}.{mode}"] = updated_models tasks.update_one( {"_id": doc["_id"]}, { "$unset": {field: 1 for field in fields.values()}, **({ "$set": set_commands } if set_commands else {}), }, )
def artifacts_prepare_for_save(fields: dict): artifacts_field = ("execution", "artifacts") artifacts = nested_get(fields, artifacts_field) if artifacts is None: return nested_set(fields, artifacts_field, value={get_artifact_id(a): a for a in artifacts})
def artifacts_unprepare_from_saved(fields): artifacts_field = ("execution", "artifacts") artifacts = nested_get(fields, artifacts_field) if artifacts is None: return nested_set( fields, artifacts_field, value=sorted(artifacts.values(), key=itemgetter("key")), )
def prepare_for_save(cls, call: APICall, fields: dict): if call.requested_endpoint_version >= cls.max_version: return docker_cmd = nested_get(fields, cls.field) if docker_cmd is not None: image, _, arguments = docker_cmd.partition(" ") nested_set(fields, ("container", "image"), value=image) nested_set(fields, ("container", "arguments"), value=arguments) nested_delete(fields, cls.field)
def params_prepare_for_save(fields: dict, previous_task: Task = None): """ If legacy hyper params or configuration is passed then replace the corresponding section in the new structure Escape all the section and param names for hyper params and configuration to make it mongo sage """ for old_params_field, new_params_field, default_section in ( (("execution", "parameters"), "hyperparams", hyperparams_default_section), (("execution", "model_desc"), "configuration", None), ): legacy_params = nested_get(fields, old_params_field) if legacy_params is None: continue if (not fields.get(new_params_field) and previous_task and previous_task[new_params_field]): previous_data = previous_task.to_proper_dict().get( new_params_field) removed = _remove_legacy_params(previous_data, with_sections=default_section is not None) if not legacy_params and not removed: # if we only need to delete legacy fields from the db # but they are not there then there is no point to proceed continue fields_update = {new_params_field: previous_data} params_unprepare_from_saved(fields_update) fields.update(fields_update) for full_name, value in legacy_params.items(): section, name = split_param_name(full_name, default_section) new_path = list(filter(None, (new_params_field, section, name))) new_param = dict(name=name, type=hyperparams_legacy_type, value=str(value)) if section is not None: new_param["section"] = section nested_set(fields, new_path, new_param) nested_delete(fields, old_params_field) for param_field in ("hyperparams", "configuration"): params = fields.get(param_field) if params: escaped_params = { ParameterKeyEscaper.escape(key): {ParameterKeyEscaper.escape(k): v for k, v in value.items()} if isinstance(value, dict) else value for key, value in params.items() } fields[param_field] = escaped_params
def migrate_backend(db: Database): collection: Collection = db["task"] artifacts_field = "execution.artifacts" query = {artifacts_field: {"$type": 4}} for doc in collection.find(filter=query, projection=(artifacts_field, )): artifacts = nested_get(doc, artifacts_field.split(".")) if not isinstance(artifacts, list): continue new_artifacts = {get_artifact_id(a): a for a in artifacts} collection.update_one({"_id": doc["_id"]}, {"$set": { artifacts_field: new_artifacts }})
def unprepare_from_saved( cls, call: APICall, tasks_data: Union[Sequence[dict], dict] ): if call.requested_endpoint_version >= cls.max_version: return if isinstance(tasks_data, dict): tasks_data = [tasks_data] for task in tasks_data: for mode, field in cls.mode_to_fields.items(): models = nested_get(task, (cls.models_field, mode)) if not models: continue model = models[0] if mode == TaskModelTypes.input else models[-1] if model: nested_set(task, field, model.get("model"))
def prepare_for_save(cls, call: APICall, fields: dict): if call.requested_endpoint_version >= cls.max_version: return for mode, field in cls.mode_to_fields.items(): value = nested_get(fields, field) if value is None: continue val = [ dict( name=TaskModelNames[mode], model=value, updated=datetime.utcnow(), ) ] if value else [] nested_set(fields, (cls.models_field, mode), value=val) nested_delete(fields, field)
def _migrate_model_labels(db: Database): tasks: Collection = db["task"] fields = ("execution.model_labels", "container") query = {"$or": [{field: {"$nin": [None, {}]}} for field in fields]} for doc in tasks.find(filter=query, projection=fields): set_commands = {} for field in fields: data = nested_get(doc, field.split(".")) if not data: continue escaped = escape_dict(data) if data == escaped: continue set_commands[field] = escaped if set_commands: tasks.update_one({"_id": doc["_id"]}, {"$set": set_commands})
def get_status_counts(project_id, section): project_runtime = runtime.get(project_id, {}) project_section_statuses = nested_get(status_count, (project_id, section), default=default_counts) def get_time_or_none(value): return value if value != datetime.min else None return { "status_count": project_section_statuses, "total_tasks": sum(project_section_statuses.values()), "total_runtime": project_runtime.get(section, 0), "completed_tasks_24h": project_runtime.get(f"{section}_recently_completed", 0), "last_task_run": get_time_or_none( project_runtime.get(f"{section}_max_task_started", datetime.min)), }
def _upgrade_task_data(task_data: dict) -> dict: """ Migrate from execution/parameters and model_desc to hyperparams and configuration fiields Upgrade artifacts list to dict Migrate from execution.model and output.model to the new models field Move docker_cmd contents into the container field :param task_data: Upgraded in place :return: The upgraded task data """ for old_param_field, new_param_field, default_section in ( ("execution.parameters", "hyperparams", hyperparams_default_section), ("execution.model_desc", "configuration", None), ): legacy_path = old_param_field.split(".") legacy = nested_get(task_data, legacy_path) if legacy: for full_name, value in legacy.items(): section, name = split_param_name(full_name, default_section) new_path = list( filter(None, (new_param_field, section, name))) if not nested_get(task_data, new_path): new_param = dict(name=name, type=hyperparams_legacy_type, value=str(value)) if section is not None: new_param["section"] = section nested_set(task_data, path=new_path, value=new_param) nested_delete(task_data, legacy_path) artifacts_path = ("execution", "artifacts") artifacts = nested_get(task_data, artifacts_path) if isinstance(artifacts, list): nested_set( task_data, path=artifacts_path, value={get_artifact_id(a): a for a in artifacts}, ) models = task_data.get("models", {}) now = datetime.utcnow() for old_field, type_ in ( ("execution.model", TaskModelTypes.input), ("output.model", TaskModelTypes.output), ): old_path = old_field.split(".") old_model = nested_get(task_data, old_path) new_models = models.get(type_, []) name = TaskModelNames[type_] if old_model and not any( m for m in new_models if m.get("model") == old_model or m.get("name") == name): model_item = {"model": old_model, "name": name, "updated": now} if type_ == TaskModelTypes.input: new_models = [model_item, *new_models] else: new_models = [*new_models, model_item] models[type_] = new_models nested_delete(task_data, old_path) task_data["models"] = models docker_cmd_path = ("execution", "docker_cmd") docker_cmd = nested_get(task_data, docker_cmd_path) if docker_cmd and not task_data.get("container"): image, _, arguments = docker_cmd.partition(" ") task_data["container"] = {"image": image, "arguments": arguments} nested_delete(task_data, docker_cmd_path) return task_data
def get_aggregated_project_parameters( cls, company_id, project_ids: Sequence[str], include_subprojects: bool, page: int = 0, page_size: int = 500, ) -> Tuple[int, int, Sequence[dict]]: page = max(0, page) page_size = max(1, page_size) pipeline = [ { "$match": { **cls._get_company_constraint(company_id), **cls._get_project_constraint(project_ids, include_subprojects), "hyperparams": { "$exists": True, "$gt": {} }, } }, { "$project": { "sections": { "$objectToArray": "$hyperparams" } } }, { "$unwind": "$sections" }, { "$project": { "section": "$sections.k", "names": { "$objectToArray": "$sections.v" }, } }, { "$unwind": "$names" }, { "$group": { "_id": { "section": "$section", "name": "$names.k" } } }, { "$sort": OrderedDict({ "_id.section": 1, "_id.name": 1 }) }, { "$skip": page * page_size }, { "$limit": page_size }, { "$group": { "_id": 1, "total": { "$sum": 1 }, "results": { "$push": "$$ROOT" }, } }, ] result = next(Task.aggregate(pipeline), None) total = 0 remaining = 0 results = [] if result: total = int(result.get("total", -1)) results = [{ "section": ParameterKeyEscaper.unescape(nested_get(r, ("_id", "section"))), "name": ParameterKeyEscaper.unescape(nested_get(r, ("_id", "name"))), } for r in result.get("results", [])] remaining = max(0, total - (len(results) + page * page_size)) return total, remaining, results
def _import_entity( cls, f: IO[bytes], full_name: str, company_id: str, user_id: str, metadata: Mapping[str, Any], ) -> Optional[Sequence[Task]]: cls_ = cls._get_entity_type(full_name) print(f"Writing {cls_.__name__.lower()}s into database") tasks = [] override_project_count = 0 for item in cls.json_lines(f): if cls_ == cls.task_cls: task_data = json.loads(item) artifacts_path = ("execution", "artifacts") artifacts = nested_get(task_data, artifacts_path) if isinstance(artifacts, list): nested_set( task_data, artifacts_path, value={get_artifact_id(a): a for a in artifacts}, ) item = json.dumps(task_data) doc = cls_.from_json(item, created=True) if hasattr(doc, "user"): doc.user = user_id if hasattr(doc, "company"): doc.company = company_id if isinstance(doc, cls.project_cls): override_project_name = metadata.get("project_name", None) if override_project_name: if override_project_count: override_project_name = ( f"{override_project_name} {override_project_count + 1}" ) override_project_count += 1 doc.name = override_project_name doc.logo_url = metadata.get("logo_url", None) doc.logo_blob = metadata.get("logo_blob", None) cls_.objects( company=company_id, name=doc.name, id__ne=doc.id ).update( set__name= f"{doc.name}_{datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S')}" ) doc.save() if isinstance(doc, cls.task_cls): tasks.append(doc) cls.event_bll.delete_task_events(company_id, doc.id, allow_locked=True) if tasks: return tasks