Esempio n. 1
0
def params_unprepare_from_saved(fields, copy_to_legacy=False):
    """
    Unescape all section and param names for hyper params and configuration
    If copy_to_legacy is set then copy hyperparams and configuration data to the legacy location for the old clients
    """
    for param_field in ("hyperparams", "configuration"):
        params = fields.get(param_field)
        if params:
            unescaped_params = {
                ParameterKeyEscaper.unescape(key):
                {ParameterKeyEscaper.unescape(k): v
                 for k, v in value.items()}
                if isinstance(value, dict) else value
                for key, value in params.items()
            }
            fields[param_field] = unescaped_params

    if copy_to_legacy:
        for new_params_field, old_params_field, use_sections in (
            ("hyperparams", ("execution", "parameters"), True),
            ("configuration", ("execution", "model_desc"), False),
        ):
            legacy_params = _get_legacy_params(fields.get(new_params_field),
                                               with_sections=use_sections)
            if legacy_params:
                nested_set(
                    fields,
                    old_params_field,
                    {
                        _get_full_param_name(p): p["value"]
                        for p in legacy_params
                    },
                )
Esempio n. 2
0
def _process_path(path: str):
    """
    Frontend does a partial escaping on the path so the all '.' in section and key names are escaped
    Need to unescape and apply a full mongo escaping
    """
    parts = path.split(".")
    if len(parts) < 2 or len(parts) > 4:
        raise errors.bad_request.ValidationError("invalid task field",
                                                 path=path)
    return ".".join(
        ParameterKeyEscaper.escape(ParameterKeyEscaper.unescape(p))
        for p in parts)
Esempio n. 3
0
    def get_configuration_names(cls, company_id: str,
                                task_ids: Sequence[str]) -> Dict[str, list]:
        with TimingContext("mongo", "get_configuration_names"):
            pipeline = [
                {
                    "$match": {
                        "company": {
                            "$in": [None, "", company_id]
                        },
                        "_id": {
                            "$in": task_ids
                        },
                    }
                },
                {
                    "$project": {
                        "items": {
                            "$objectToArray": "$configuration"
                        }
                    }
                },
                {
                    "$unwind": "$items"
                },
                {
                    "$group": {
                        "_id": "$_id",
                        "names": {
                            "$addToSet": "$items.k"
                        }
                    }
                },
            ]

            tasks = Task.aggregate(pipeline)

            return {
                task["_id"]: {
                    "names":
                    sorted(
                        ParameterKeyEscaper.unescape(name)
                        for name in task["names"])
                }
                for task in tasks
            }
Esempio n. 4
0
def unescape_metadata(call: APICall, documents: Union[dict, Sequence[dict]]):
    """
    Unescape special characters in metadata keys
    """
    if isinstance(documents, dict):
        documents = [documents]

    old_client = call.requested_endpoint_version <= PartialVersion("2.16")
    for doc in documents:
        if old_client and "metadata" in doc:
            doc["metadata"] = []
            continue

        metadata = doc.get("metadata")
        if not metadata:
            continue

        doc["metadata"] = {
            ParameterKeyEscaper.unescape(k): v
            for k, v in metadata.items()
        }
Esempio n. 5
0
    def get_aggregated_project_parameters(
        cls,
        company_id,
        project_ids: Sequence[str],
        include_subprojects: bool,
        page: int = 0,
        page_size: int = 500,
    ) -> Tuple[int, int, Sequence[dict]]:
        page = max(0, page)
        page_size = max(1, page_size)
        pipeline = [
            {
                "$match": {
                    **cls._get_company_constraint(company_id),
                    **cls._get_project_constraint(project_ids, include_subprojects),
                    "hyperparams": {
                        "$exists": True,
                        "$gt": {}
                    },
                }
            },
            {
                "$project": {
                    "sections": {
                        "$objectToArray": "$hyperparams"
                    }
                }
            },
            {
                "$unwind": "$sections"
            },
            {
                "$project": {
                    "section": "$sections.k",
                    "names": {
                        "$objectToArray": "$sections.v"
                    },
                }
            },
            {
                "$unwind": "$names"
            },
            {
                "$group": {
                    "_id": {
                        "section": "$section",
                        "name": "$names.k"
                    }
                }
            },
            {
                "$sort": OrderedDict({
                    "_id.section": 1,
                    "_id.name": 1
                })
            },
            {
                "$skip": page * page_size
            },
            {
                "$limit": page_size
            },
            {
                "$group": {
                    "_id": 1,
                    "total": {
                        "$sum": 1
                    },
                    "results": {
                        "$push": "$$ROOT"
                    },
                }
            },
        ]

        result = next(Task.aggregate(pipeline), None)

        total = 0
        remaining = 0
        results = []

        if result:
            total = int(result.get("total", -1))
            results = [{
                "section":
                ParameterKeyEscaper.unescape(nested_get(r,
                                                        ("_id", "section"))),
                "name":
                ParameterKeyEscaper.unescape(nested_get(r, ("_id", "name"))),
            } for r in result.get("results", [])]
            remaining = max(0, total - (len(results) + page * page_size))

        return total, remaining, results
Esempio n. 6
0
    def get_model_metadata_keys(
        cls,
        company_id,
        project_ids: Sequence[str],
        include_subprojects: bool,
        page: int = 0,
        page_size: int = 500,
    ) -> Tuple[int, int, Sequence[dict]]:
        page = max(0, page)
        page_size = max(1, page_size)
        pipeline = [
            {
                "$match": {
                    **cls._get_company_constraint(company_id),
                    **cls._get_project_constraint(project_ids, include_subprojects),
                    "metadata": {
                        "$exists": True,
                        "$gt": {}
                    },
                }
            },
            {
                "$project": {
                    "metadata": {
                        "$objectToArray": "$metadata"
                    }
                }
            },
            {
                "$unwind": "$metadata"
            },
            {
                "$group": {
                    "_id": "$metadata.k"
                }
            },
            {
                "$sort": {
                    "_id": 1
                }
            },
            {
                "$skip": page * page_size
            },
            {
                "$limit": page_size
            },
            {
                "$group": {
                    "_id": 1,
                    "total": {
                        "$sum": 1
                    },
                    "results": {
                        "$push": "$$ROOT"
                    },
                }
            },
        ]

        result = next(Model.aggregate(pipeline), None)

        total = 0
        remaining = 0
        results = []

        if result:
            total = int(result.get("total", -1))
            results = [
                ParameterKeyEscaper.unescape(r.get("_id"))
                for r in result.get("results", [])
            ]
            remaining = max(0, total - (len(results) + page * page_size))

        return total, remaining, results
Esempio n. 7
0
def unescape_dict(data: dict) -> dict:
    if not data:
        return data

    return {ParameterKeyEscaper.unescape(k): v for k, v in data.items()}
Esempio n. 8
0
    def get_aggregated_project_parameters(
        company_id,
        project_ids: Sequence[str] = None,
        page: int = 0,
        page_size: int = 500,
    ) -> Tuple[int, int, Sequence[dict]]:

        page = max(0, page)
        page_size = max(1, page_size)
        pipeline = [
            {
                "$match": {
                    "company": {
                        "$in": [None, "", company_id]
                    },
                    "hyperparams": {
                        "$exists": True,
                        "$gt": {}
                    },
                    **({
                        "project": {
                            "$in": project_ids
                        }
                    } if project_ids else {}),
                }
            },
            {
                "$project": {
                    "sections": {
                        "$objectToArray": "$hyperparams"
                    }
                }
            },
            {
                "$unwind": "$sections"
            },
            {
                "$project": {
                    "section": "$sections.k",
                    "names": {
                        "$objectToArray": "$sections.v"
                    },
                }
            },
            {
                "$unwind": "$names"
            },
            {
                "$group": {
                    "_id": {
                        "section": "$section",
                        "name": "$names.k"
                    }
                }
            },
            {
                "$sort": OrderedDict({
                    "_id.section": 1,
                    "_id.name": 1
                })
            },
            {
                "$group": {
                    "_id": 1,
                    "total": {
                        "$sum": 1
                    },
                    "results": {
                        "$push": "$$ROOT"
                    },
                }
            },
            {
                "$project": {
                    "total": 1,
                    "results": {
                        "$slice": ["$results", page * page_size, page_size]
                    },
                }
            },
        ]

        with translate_errors_context():
            result = next(Task.aggregate(pipeline), None)

        total = 0
        remaining = 0
        results = []

        if result:
            total = int(result.get("total", -1))
            results = [{
                "section":
                ParameterKeyEscaper.unescape(dpath.get(r, "_id/section")),
                "name":
                ParameterKeyEscaper.unescape(dpath.get(r, "_id/name")),
            } for r in result.get("results", [])]
            remaining = max(0, total - (len(results) + page * page_size))

        return total, remaining, results