Esempio n. 1
0
def _process_include_subprojects(call_data: dict):
    include_subprojects = call_data.pop("include_subprojects", False)
    project_ids = call_data.get("project")
    if not project_ids or not include_subprojects:
        return

    if not isinstance(project_ids, list):
        project_ids = [project_ids]
    call_data["project"] = project_ids_with_children(project_ids)
Esempio n. 2
0
    def _resolve_entities(
        cls,
        experiments: Sequence[str] = None,
        projects: Sequence[str] = None,
        task_statuses: Sequence[str] = None,
    ) -> Dict[Type[mongoengine.Document], Set[mongoengine.Document]]:
        entities = defaultdict(set)

        if projects:
            print("Reading projects...")
            projects = project_ids_with_children(projects)
            entities[cls.project_cls].update(
                cls._resolve_type(cls.project_cls, projects)
            )
            print("--> Reading project experiments...")
            query = Q(
                project__in=list(
                    set(filter(None, (p.id for p in entities[cls.project_cls])))
                ),
                system_tags__nin=[EntityVisibility.archived.value],
            )
            if task_statuses:
                query &= Q(status__in=list(set(task_statuses)))
            objs = cls.task_cls.objects(query)
            entities[cls.task_cls].update(
                o for o in objs if o.id not in (experiments or [])
            )

        if experiments:
            print("Reading experiments...")
            entities[cls.task_cls].update(cls._resolve_type(cls.task_cls, experiments))
            print("--> Reading experiments projects...")
            objs = cls.project_cls.objects(
                id__in=list(
                    set(filter(None, (p.project for p in entities[cls.task_cls])))
                )
            )
            project_ids = {p.id for p in entities[cls.project_cls]}
            entities[cls.project_cls].update(o for o in objs if o.id not in project_ids)

        cls._check_projects_hierarchy(entities[cls.project_cls])

        task_models = chain.from_iterable(
            models
            for task in entities[cls.task_cls]
            if task.models
            for models in (task.models.input, task.models.output)
            if models
        )
        model_ids = {tm.model for tm in task_models}
        if model_ids:
            print("Reading models...")
            entities[cls.model_cls] = set(cls.model_cls.objects(id__in=list(model_ids)))

        return entities
Esempio n. 3
0
    def _get_tags_from_db(
        self,
        company_id: str,
        field: str,
        project: str = None,
        filter_: Dict[str, Sequence[str]] = None,
    ) -> set:
        query = Q(company=company_id)
        if filter_:
            for name, vals in filter_.items():
                if vals:
                    query &= GetMixin.get_list_field_query(name, vals)
        if project:
            query &= Q(project__in=project_ids_with_children([project]))

        return self.db_cls.objects(query).distinct(field)
Esempio n. 4
0
    def get_hyperparam_distinct_values(
        self,
        company_id: str,
        project_ids: Sequence[str],
        section: str,
        name: str,
        include_subprojects: bool,
        allow_public: bool = True,
    ) -> HyperParamValues:
        if allow_public:
            company_constraint = {"company": {"$in": [None, "", company_id]}}
        else:
            company_constraint = {"company": company_id}
        if project_ids:
            if include_subprojects:
                project_ids = project_ids_with_children(project_ids)
            project_constraint = {"project": {"$in": project_ids}}
        else:
            project_constraint = {}

        key_path = f"hyperparams.{ParameterKeyEscaper.escape(section)}.{ParameterKeyEscaper.escape(name)}"
        last_updated_task = (Task.objects(
            **company_constraint,
            **project_constraint,
            **{
                f"{key_path.replace('.', '__')}__exists": True
            },
        ).only("last_update").order_by("-last_update").limit(1).first())
        if not last_updated_task:
            return 0, []

        redis_key = f"hyperparam_values_{company_id}_{'_'.join(project_ids)}_{section}_{name}_{allow_public}"
        last_update = last_updated_task.last_update or datetime.utcnow()
        cached_res = self._get_cached_hyperparam_values(
            key=redis_key, last_update=last_update)
        if cached_res:
            return cached_res

        max_values = config.get("services.tasks.hyperparam_values.max_count",
                                100)
        pipeline = [
            {
                "$match": {
                    **company_constraint,
                    **project_constraint,
                    key_path: {
                        "$exists": True
                    },
                }
            },
            {
                "$project": {
                    "value": f"${key_path}.value"
                }
            },
            {
                "$group": {
                    "_id": "$value"
                }
            },
            {
                "$sort": {
                    "_id": 1
                }
            },
            {
                "$limit": max_values
            },
            {
                "$group": {
                    "_id": 1,
                    "total": {
                        "$sum": 1
                    },
                    "results": {
                        "$push": "$$ROOT._id"
                    },
                }
            },
        ]

        result = next(Task.aggregate(pipeline, collation=Task._numeric_locale),
                      None)
        if not result:
            return 0, []

        total = int(result.get("total", 0))
        values = result.get("results", [])

        ttl = config.get("services.tasks.hyperparam_values.cache_ttl_sec",
                         86400)
        cached = dict(last_update=last_update.timestamp(),
                      total=total,
                      values=values)
        self.redis.setex(redis_key, ttl, json.dumps(cached))

        return total, values
Esempio n. 5
0
    def get_aggregated_project_parameters(
        company_id,
        project_ids: Sequence[str],
        include_subprojects: bool,
        page: int = 0,
        page_size: int = 500,
    ) -> Tuple[int, int, Sequence[dict]]:
        if project_ids:
            if include_subprojects:
                project_ids = project_ids_with_children(project_ids)
            project_constraint = {"project": {"$in": project_ids}}
        else:
            project_constraint = {}
        page = max(0, page)
        page_size = max(1, page_size)
        pipeline = [
            {
                "$match": {
                    "company": {
                        "$in": [None, "", company_id]
                    },
                    "hyperparams": {
                        "$exists": True,
                        "$gt": {}
                    },
                    **project_constraint,
                }
            },
            {
                "$project": {
                    "sections": {
                        "$objectToArray": "$hyperparams"
                    }
                }
            },
            {
                "$unwind": "$sections"
            },
            {
                "$project": {
                    "section": "$sections.k",
                    "names": {
                        "$objectToArray": "$sections.v"
                    },
                }
            },
            {
                "$unwind": "$names"
            },
            {
                "$group": {
                    "_id": {
                        "section": "$section",
                        "name": "$names.k"
                    }
                }
            },
            {
                "$sort": OrderedDict({
                    "_id.section": 1,
                    "_id.name": 1
                })
            },
            {
                "$skip": page * page_size
            },
            {
                "$limit": page_size
            },
            {
                "$group": {
                    "_id": 1,
                    "total": {
                        "$sum": 1
                    },
                    "results": {
                        "$push": "$$ROOT"
                    },
                }
            },
        ]

        result = next(Task.aggregate(pipeline), None)

        total = 0
        remaining = 0
        results = []

        if result:
            total = int(result.get("total", -1))
            results = [{
                "section":
                ParameterKeyEscaper.unescape(dpath.get(r, "_id/section")),
                "name":
                ParameterKeyEscaper.unescape(dpath.get(r, "_id/name")),
            } for r in result.get("results", [])]
            remaining = max(0, total - (len(results) + page * page_size))

        return total, remaining, results
Esempio n. 6
0
    def get_unique_metric_variants(company_id, project_ids: Sequence[str],
                                   include_subprojects: bool):
        if project_ids:
            if include_subprojects:
                project_ids = project_ids_with_children(project_ids)
            project_constraint = {"project": {"$in": project_ids}}
        else:
            project_constraint = {}
        pipeline = [
            {
                "$match":
                dict(
                    company={"$in": [None, "", company_id]},
                    **project_constraint,
                )
            },
            {
                "$project": {
                    "metrics": {
                        "$objectToArray": "$last_metrics"
                    }
                }
            },
            {
                "$unwind": "$metrics"
            },
            {
                "$project": {
                    "metric": "$metrics.k",
                    "variants": {
                        "$objectToArray": "$metrics.v"
                    },
                }
            },
            {
                "$unwind": "$variants"
            },
            {
                "$group": {
                    "_id": {
                        "metric": "$variants.v.metric",
                        "variant": "$variants.v.variant",
                    },
                    "metrics": {
                        "$addToSet": {
                            "metric": "$variants.v.metric",
                            "metric_hash": "$metric",
                            "variant": "$variants.v.variant",
                            "variant_hash": "$variants.k",
                        }
                    },
                }
            },
            {
                "$sort": OrderedDict({
                    "_id.metric": 1,
                    "_id.variant": 1
                })
            },
        ]

        with translate_errors_context():
            result = Task.aggregate(pipeline)
            return [r["metrics"][0] for r in result]