Esempio n. 1
0
def migrate_backend(db: Database):
    collection: Collection = db["task"]
    artifacts_field = "execution.artifacts"
    query = {artifacts_field: {"$type": 4}}
    for doc in collection.find(filter=query, projection=(artifacts_field, )):
        artifacts = nested_get(doc, artifacts_field.split("."))
        if not isinstance(artifacts, list):
            continue

        new_artifacts = {get_artifact_id(a): a for a in artifacts}
        collection.update_one({"_id": doc["_id"]},
                              {"$set": {
                                  artifacts_field: new_artifacts
                              }})
Esempio n. 2
0
    def _upgrade_task_data(task_data: dict) -> dict:
        """
        Migrate from execution/parameters and model_desc to hyperparams and configuration fiields
        Upgrade artifacts list to dict
        Migrate from execution.model and output.model to the new models field
        Move docker_cmd contents into the container field
        :param task_data: Upgraded in place
        :return: The upgraded task data
        """
        for old_param_field, new_param_field, default_section in (
            ("execution.parameters", "hyperparams",
             hyperparams_default_section),
            ("execution.model_desc", "configuration", None),
        ):
            legacy_path = old_param_field.split(".")
            legacy = nested_get(task_data, legacy_path)
            if legacy:
                for full_name, value in legacy.items():
                    section, name = split_param_name(full_name,
                                                     default_section)
                    new_path = list(
                        filter(None, (new_param_field, section, name)))
                    if not nested_get(task_data, new_path):
                        new_param = dict(name=name,
                                         type=hyperparams_legacy_type,
                                         value=str(value))
                        if section is not None:
                            new_param["section"] = section
                        nested_set(task_data, path=new_path, value=new_param)
            nested_delete(task_data, legacy_path)

        artifacts_path = ("execution", "artifacts")
        artifacts = nested_get(task_data, artifacts_path)
        if isinstance(artifacts, list):
            nested_set(
                task_data,
                path=artifacts_path,
                value={get_artifact_id(a): a
                       for a in artifacts},
            )

        models = task_data.get("models", {})
        now = datetime.utcnow()
        for old_field, type_ in (
            ("execution.model", TaskModelTypes.input),
            ("output.model", TaskModelTypes.output),
        ):
            old_path = old_field.split(".")
            old_model = nested_get(task_data, old_path)
            new_models = models.get(type_, [])
            name = TaskModelNames[type_]
            if old_model and not any(
                    m for m in new_models
                    if m.get("model") == old_model or m.get("name") == name):
                model_item = {"model": old_model, "name": name, "updated": now}
                if type_ == TaskModelTypes.input:
                    new_models = [model_item, *new_models]
                else:
                    new_models = [*new_models, model_item]
            models[type_] = new_models
            nested_delete(task_data, old_path)
        task_data["models"] = models

        docker_cmd_path = ("execution", "docker_cmd")
        docker_cmd = nested_get(task_data, docker_cmd_path)
        if docker_cmd and not task_data.get("container"):
            image, _, arguments = docker_cmd.partition(" ")
            task_data["container"] = {"image": image, "arguments": arguments}
        nested_delete(task_data, docker_cmd_path)

        return task_data
Esempio n. 3
0
    def _import_entity(
        cls,
        f: IO[bytes],
        full_name: str,
        company_id: str,
        user_id: str,
        metadata: Mapping[str, Any],
    ) -> Optional[Sequence[Task]]:
        cls_ = cls._get_entity_type(full_name)
        print(f"Writing {cls_.__name__.lower()}s into database")
        tasks = []
        override_project_count = 0
        for item in cls.json_lines(f):
            if cls_ == cls.task_cls:
                task_data = json.loads(item)
                artifacts_path = ("execution", "artifacts")
                artifacts = nested_get(task_data, artifacts_path)
                if isinstance(artifacts, list):
                    nested_set(
                        task_data,
                        artifacts_path,
                        value={get_artifact_id(a): a
                               for a in artifacts},
                    )
                    item = json.dumps(task_data)

            doc = cls_.from_json(item, created=True)
            if hasattr(doc, "user"):
                doc.user = user_id
            if hasattr(doc, "company"):
                doc.company = company_id
            if isinstance(doc, cls.project_cls):
                override_project_name = metadata.get("project_name", None)
                if override_project_name:
                    if override_project_count:
                        override_project_name = (
                            f"{override_project_name} {override_project_count + 1}"
                        )
                    override_project_count += 1
                    doc.name = override_project_name

                doc.logo_url = metadata.get("logo_url", None)
                doc.logo_blob = metadata.get("logo_blob", None)

                cls_.objects(
                    company=company_id, name=doc.name, id__ne=doc.id
                ).update(
                    set__name=
                    f"{doc.name}_{datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S')}"
                )

            doc.save()

            if isinstance(doc, cls.task_cls):
                tasks.append(doc)
                cls.event_bll.delete_task_events(company_id,
                                                 doc.id,
                                                 allow_locked=True)

        if tasks:
            return tasks