def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set]: """ Delete only the task themselves and their non published version. Child models under the same project are deleted separately. Children tasks should be deleted in the same api call. If any child entities are left in another projects then updated their parent task to None """ tasks = Task.objects(project__in=projects).only("id", "execution__artifacts") if not tasks: return 0, set(), set() task_ids = {t.id for t in tasks} with TimingContext("mongo", "delete_tasks_update_children"): Task.objects(parent__in=task_ids, project__nin=projects).update(parent=None) Model.objects(task__in=task_ids, project__nin=projects).update(task=None) event_urls, artifact_urls = set(), set() for task in tasks: event_urls.update(collect_debug_image_urls(company, task.id)) event_urls.update(collect_plot_image_urls(company, task.id)) if task.execution and task.execution.artifacts: artifact_urls.update( { a.uri for a in task.execution.artifacts.values() if a.mode == ArtifactModes.output and a.uri } ) event_bll.delete_multi_task_events(company, list(task_ids)) deleted = tasks.delete() return deleted, event_urls, artifact_urls
def create(call: APICall, company_id, req_model: CreateModelRequest): if req_model.public: company_id = "" with translate_errors_context(): project = req_model.project if project: validate_id(Project, company=company_id, project=project) task = req_model.task req_data = req_model.to_struct() if task: validate_task(company_id, req_data) fields = filter_fields(Model, req_data) conform_tag_fields(call, fields, validate=True) validate_metadata(fields.get("metadata")) # create and save model now = datetime.utcnow() model = Model( id=database.utils.id(), user=call.identity.user, company=company_id, created=now, last_update=now, **fields, ) model.save() _update_cached_tags(company_id, project=model.project, fields=fields) call.result.data_model = CreateModelResponse(id=model.id, created=True)
def verify_task_children_and_ouptuts(task: Task, force: bool) -> TaskOutputs[Model]: if not force: with TimingContext("mongo", "count_published_children"): published_children_count = Task.objects( parent=task.id, status=TaskStatus.published).count() if published_children_count: raise errors.bad_request.TaskCannotBeDeleted( "has children, use force=True", task=task.id, children=published_children_count, ) with TimingContext("mongo", "get_task_models"): models = TaskOutputs( attrgetter("ready"), Model, Model.objects(task=task.id).only("id", "task", "ready"), ) if not force and models.published: raise errors.bad_request.TaskCannotBeDeleted( "has output models, use force=True", task=task.id, models=len(models.published), ) if task.models and task.models.output: with TimingContext("mongo", "get_task_output_model"): model_ids = [m.model for m in task.models.output] for output_model in Model.objects(id__in=model_ids): if output_model.ready: if not force: raise errors.bad_request.TaskCannotBeDeleted( "has output model, use force=True", task=task.id, model=output_model.id, ) models.published.append(output_model) else: models.draft.append(output_model) if models.draft: with TimingContext("mongo", "get_execution_models"): model_ids = models.draft.ids dependent_tasks = Task.objects( models__input__model__in=model_ids).only("id", "models") input_models = { m.model for m in chain.from_iterable( t.models.input for t in dependent_tasks if t.models) } if input_models: models.draft = DocumentGroup( Model, (m for m in models.draft if m.id not in input_models)) return models
def delete_metadata(_: APICall, company_id: str, request: DeleteMetadataRequest): model_id = request.model ModelBLL.get_company_model_by_id(company_id=company_id, model_id=model_id, only_fields=("id", )) updated = metadata_delete(cls=Model, _id=model_id, keys=request.keys) if updated: Model.objects(id=model_id).update_one(last_update=datetime.utcnow()) return {"updated": updated}
def add_or_update_metadata(_: APICall, company_id: str, request: AddOrUpdateMetadataRequest): model_id = request.model ModelBLL.get_company_model_by_id(company_id=company_id, model_id=model_id) updated = metadata_add_or_update( cls=Model, _id=model_id, items=get_metadata_from_api(request.metadata), ) if updated: Model.objects(id=model_id).update_one(last_update=datetime.utcnow()) return {"updated": updated}
def parse_model_fields(call, valid_fields): fields = parse_from_call(call.data, valid_fields, Model.get_fields()) conform_tag_fields(call, fields, validate=True) metadata = fields.get("metadata") if metadata: validate_metadata(metadata) return fields
def model_set_ready( cls, model_id: str, company_id: str, publish_task: bool, force_publish_task: bool = False, ) -> tuple: with translate_errors_context(): query = dict(id=model_id, company=company_id) model = Model.objects(**query).first() if not model: raise errors.bad_request.InvalidModelId(**query) elif model.ready: raise errors.bad_request.ModelIsReady(**query) published_task_data = {} if model.task and publish_task: task = (Task.objects(id=model.task, company=company_id).only( "id", "status").first()) if task and task.status != TaskStatus.published: published_task_data["data"] = cls.publish_task( task_id=model.task, company_id=company_id, publish_model=False, force=force_publish_task, ) published_task_data["id"] = model.task updated = model.update(upsert=False, ready=True) return updated, published_task_data
def get_by_task_id(call: APICall, company_id, _): if call.requested_endpoint_version > ModelsBackwardsCompatibility.max_version: raise errors.moved_permanently.NotSupported( "use models.get_by_id/get_all apis") task_id = call.data["task"] with translate_errors_context(): query = dict(id=task_id, company=company_id) task = Task.get(_only=["models"], **query) if not task: raise errors.bad_request.InvalidTaskId(**query) if not task.models or not task.models.output: raise errors.bad_request.MissingTaskFields(field="models.output") model_id = task.models.output[-1].model model = Model.objects( Q(id=model_id) & get_company_or_none_constraint(company_id)).first() if not model: raise errors.bad_request.InvalidModelId( "no such public or company model", id=model_id, company=company_id, ) model_dict = model.to_proper_dict() conform_output_tags(call, model_dict) call.result.data = {"model": model_dict}
def _update_model(call: APICall, company_id, model_id=None): model_id = model_id or call.data["model"] model = ModelBLL.get_company_model_by_id(company_id=company_id, model_id=model_id) data = prepare_update_fields(call, company_id, call.data) task_id = data.get("task") iteration = data.get("iteration") if task_id and iteration is not None: TaskBLL.update_statistics( task_id=task_id, company_id=company_id, last_iteration_max=iteration, ) updated_count, updated_fields = Model.safe_update(company_id, model.id, data) if updated_count: if any(uf in updated_fields for uf in last_update_fields): model.update(upsert=False, last_update=datetime.utcnow()) new_project = updated_fields.get("project", model.project) if new_project != model.project: _reset_cached_tags(company_id, projects=[new_project, model.project]) else: _update_cached_tags(company_id, project=model.project, fields=updated_fields) conform_output_tags(call, updated_fields) unescape_metadata(call, updated_fields) return UpdateResponse(updated=updated_count, fields=updated_fields)
def get_output_model(task, force=False): with TimingContext("mongo", "get_task_output_model"): output_model = Model.objects(id=task.output.model).first() if output_model and output_model.ready and not force: raise errors.bad_request.TaskCannotBeDeleted( "has output model, use force=True", task=task.id, model=task.output.model ) return output_model
def get_by_id_ex(call: APICall, company_id, _): conform_tag_fields(call, call.data) with translate_errors_context(): with TimingContext("mongo", "models_get_by_id_ex"): models = Model.get_many_with_join(company=company_id, query_dict=call.data, allow_public=True) conform_output_tags(call, models) call.result.data = {"models": models}
def get_frameworks(self, company, project_ids: Optional[Sequence]) -> Sequence: """ Return the list of unique frameworks used by company and public models If project ids passed then only models from these projects are considered """ query = get_company_or_none_constraint(company) if project_ids: query &= Q(project__in=project_ids) return Model.objects(query).distinct(field="framework")
def get_by_id_ex(call: APICall, company_id, _): conform_tag_fields(call, call.data) Metadata.escape_query_parameters(call) with TimingContext("mongo", "models_get_by_id_ex"): models = Model.get_many_with_join(company=company_id, query_dict=call.data, allow_public=True) conform_output_tags(call, models) unescape_metadata(call, models) call.result.data = {"models": models}
def unarchive_model(cls, model_id: str, company_id: str): cls.get_company_model_by_id(company_id=company_id, model_id=model_id, only_fields=("id", )) unarchived = Model.objects(company=company_id, id=model_id).update( pull__system_tags=EntityVisibility.archived.value, last_update=datetime.utcnow(), ) return unarchived
def get_company_model_by_id(cls, company_id: str, model_id: str, only_fields=None) -> Model: query = dict(company=company_id, id=model_id) qs = Model.objects(**query) if only_fields: qs = qs.only(*only_fields) model = qs.first() if not model: raise errors.bad_request.InvalidModelId(**query) return model
def validate_execution_model(task, allow_only_public=False): if not task.execution or not task.execution.model: return company = None if allow_only_public else task.company model_id = task.execution.model model = Model.objects( Q(id=model_id) & get_company_or_none_constraint(company)).first() if not model: raise errors.bad_request.InvalidModelId(model=model_id) return model
def publish_task( cls, task_id: str, company_id: str, publish_model: bool, force: bool, status_reason: str = "", status_message: str = "", ) -> dict: task = cls.get_task_with_access(task_id, company_id=company_id, requires_write_access=True) if not force: validate_status_change(task.status, TaskStatus.published) previous_task_status = task.status output = task.output or Output() publish_failed = False try: # set state to publishing task.status = TaskStatus.publishing task.save() # publish task models if task.output.model and publish_model: output_model = (Model.objects(id=task.output.model).only( "id", "task", "ready").first()) if output_model and not output_model.ready: cls.model_set_ready( model_id=task.output.model, company_id=company_id, publish_task=False, ) # set task status to published, and update (or set) it's new output (view and models) return ChangeStatusRequest( task=task, new_status=TaskStatus.published, force=force, status_reason=status_reason, status_message=status_message, ).execute(published=datetime.utcnow(), output=output) except Exception as ex: publish_failed = True raise ex finally: if publish_failed: task.status = previous_task_status task.save()
def publish_task( task_id: str, company_id: str, force: bool, publish_model_func: Callable[[str, str], Any] = None, status_message: str = "", status_reason: str = "", ) -> dict: task = TaskBLL.get_task_with_access( task_id, company_id=company_id, requires_write_access=True ) if not force: validate_status_change(task.status, TaskStatus.published) previous_task_status = task.status output = task.output or Output() publish_failed = False try: # set state to publishing task.status = TaskStatus.publishing task.save() # publish task models if task.models and task.models.output and publish_model_func: model_id = task.models.output[-1].model model = ( Model.objects(id=model_id, company=company_id) .only("id", "ready") .first() ) if model and not model.ready: publish_model_func(model.id, company_id) # set task status to published, and update (or set) it's new output (view and models) return ChangeStatusRequest( task=task, new_status=TaskStatus.published, force=force, status_reason=status_reason, status_message=status_message, ).execute(published=datetime.utcnow(), output=output) except Exception as ex: publish_failed = True raise ex finally: if publish_failed: task.status = previous_task_status task.save()
def validate_input_models(task, allow_only_public=False): if not task.models.input: return company = None if allow_only_public else task.company model_ids = set(m.model for m in task.models.input) models = Model.objects( Q(id__in=model_ids) & get_company_or_none_constraint(company)).only("id") missing = model_ids - {m.id for m in models} if missing: raise errors.bad_request.InvalidModelId(models=missing) return
def get_all(call: APICall, company_id, _): conform_tag_fields(call, call.data) Metadata.escape_query_parameters(call) with TimingContext("mongo", "models_get_all"): ret_params = {} models = Model.get_many( company=company_id, parameters=call.data, query_dict=call.data, allow_public=True, ret_params=ret_params, ) conform_output_tags(call, models) unescape_metadata(call, models) call.result.data = {"models": models, **ret_params}
def get_by_id(call: APICall, company_id, _): model_id = call.data["model"] with translate_errors_context(): models = Model.get_many( company=company_id, query_dict=call.data, query=Q(id=model_id), allow_public=True, ) if not models: raise errors.bad_request.InvalidModelId( "no such public or company model", id=model_id, company=company_id, ) conform_output_tags(call, models[0]) call.result.data = {"model": models[0]}
def get_by_id(call: APICall, company_id, _): model_id = call.data["model"] Metadata.escape_query_parameters(call) models = Model.get_many( company=company_id, query_dict=call.data, query=Q(id=model_id), allow_public=True, ) if not models: raise errors.bad_request.InvalidModelId( "no such public or company model", id=model_id, company=company_id, ) conform_output_tags(call, models[0]) unescape_metadata(call, models[0]) call.result.data = {"model": models[0]}
def get_outputs_for_deletion(task, force=False): with TimingContext("mongo", "get_task_models"): models = TaskOutputs( attrgetter("ready"), Model, Model.objects(task=task.id).only("id", "task", "ready"), ) if not force and models.published: raise errors.bad_request.TaskCannotBeDeleted( "has output models, use force=True", task=task.id, models=len(models.published), ) if task.output.model: output_model = get_output_model(task, force) if output_model: if output_model.ready: models.published.append(output_model) else: models.draft.append(output_model) if models.draft: with TimingContext("mongo", "get_execution_models"): model_ids = [m.id for m in models.draft] dependent_tasks = Task.objects(execution__model__in=model_ids).only( "id", "execution.model" ) busy_models = [t.execution.model for t in dependent_tasks] models.draft[:] = [m for m in models.draft if m.id not in busy_models] with TimingContext("mongo", "get_task_children"): tasks = Task.objects(parent=task.id).only("id", "parent", "status") published_tasks = [ task for task in tasks if task.status == TaskStatus.published ] if not force and published_tasks: raise errors.bad_request.TaskCannotBeDeleted( "has children, use force=True", task=task.id, children=published_tasks ) return models, tasks
def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]: """ Delete project models and update the tasks from other projects that reference them to reference None. """ with TimingContext("mongo", "delete_models"): models = Model.objects(project__in=projects).only("task", "id", "uri") if not models: return 0, set() model_ids = list({m.id for m in models}) Task._get_collection().update_many( filter={ "project": {"$nin": projects}, "models.input.model": {"$in": model_ids}, }, update={"$set": {"models.input.$[elem].model": None}}, array_filters=[{"elem.model": {"$in": model_ids}}], upsert=False, ) model_tasks = list({m.task for m in models if m.task}) if model_tasks: Task._get_collection().update_many( filter={ "_id": {"$in": model_tasks}, "project": {"$nin": projects}, "models.output.model": {"$in": model_ids}, }, update={"$set": {"models.output.$[elem].model": None}}, array_filters=[{"elem.model": {"$in": model_ids}}], upsert=False, ) urls = {m.uri for m in models if m.uri} deleted = models.delete() return deleted, urls
def _cleanup_model(cls, model: Model): model.company = "" model.user = "" model.tags = cls._filter_out_export_tags(model.tags)
def delete_model(cls, model_id: str, company_id: str, force: bool) -> Tuple[int, Model]: model = cls.get_company_model_by_id( company_id=company_id, model_id=model_id, only_fields=("id", "task", "project", "uri"), ) deleted_model_id = f"{deleted_prefix}{model_id}" using_tasks = Task.objects(models__input__model=model_id).only("id") if using_tasks: if not force: raise errors.bad_request.ModelInUse( "as execution model, use force=True to delete", num_tasks=len(using_tasks), ) # update deleted model id in using tasks Task._get_collection().update_many( filter={"_id": { "$in": [t.id for t in using_tasks] }}, update={ "$set": { "models.input.$[elem].model": deleted_model_id } }, array_filters=[{ "elem.model": model_id }], upsert=False, ) if model.task: task = Task.objects(id=model.task).first() if task and task.status == TaskStatus.published: if not force: raise errors.bad_request.ModelCreatingTaskExists( "and published, use force=True to delete", task=model.task) if task.models.output and model_id in task.models.output: now = datetime.utcnow() Task._get_collection().update_one( filter={ "_id": model.task, "models.output.model": model_id }, update={ "$set": { "models.output.$[elem].model": deleted_model_id, "output.error": f"model deleted on {now.isoformat()}", }, "last_change": now, }, array_filters=[{ "elem.model": model_id }], upsert=False, ) del_count = Model.objects(id=model_id, company=company_id).delete() return del_count, model
def make_public(call: APICall, company_id, request: MakePublicRequest): call.result.data = Model.set_public(company_id, request.ids, invalid_cls=InvalidModelId, enabled=False)
def get_model_metadata_distinct_values( self, company_id: str, project_ids: Sequence[str], key: str, include_subprojects: bool, allow_public: bool = True, ) -> ParamValues: company_constraint = self._get_company_constraint( company_id, allow_public) project_constraint = self._get_project_constraint( project_ids, include_subprojects) key_path = f"metadata.{ParameterKeyEscaper.escape(key)}" last_updated_model = (Model.objects( **company_constraint, **project_constraint, **{ f"{key_path.replace('.', '__')}__exists": True }, ).only("last_update").order_by("-last_update").limit(1).first()) if not last_updated_model: return 0, [] redis_key = f"modelmetadata_values_{company_id}_{'_'.join(project_ids)}_{key}_{allow_public}" last_update = last_updated_model.last_update or datetime.utcnow() cached_res = self._get_cached_param_values(key=redis_key, last_update=last_update) if cached_res: return cached_res max_values = config.get("services.models.metadata_values.max_count", 100) pipeline = [ { "$match": { **company_constraint, **project_constraint, key_path: { "$exists": True }, } }, { "$project": { "value": f"${key_path}.value" } }, { "$group": { "_id": "$value" } }, { "$sort": { "_id": 1 } }, { "$limit": max_values }, { "$group": { "_id": 1, "total": { "$sum": 1 }, "results": { "$push": "$$ROOT._id" }, } }, ] result = next( Model.aggregate(pipeline, collation=Model._numeric_locale), None) if not result: return 0, [] total = int(result.get("total", 0)) values = result.get("results", []) ttl = config.get("services.models.metadata_values.cache_ttl_sec", 86400) cached = dict(last_update=last_update.timestamp(), total=total, values=values) self.redis.setex(redis_key, ttl, json.dumps(cached)) return total, values
def get_model_metadata_keys( cls, company_id, project_ids: Sequence[str], include_subprojects: bool, page: int = 0, page_size: int = 500, ) -> Tuple[int, int, Sequence[dict]]: page = max(0, page) page_size = max(1, page_size) pipeline = [ { "$match": { **cls._get_company_constraint(company_id), **cls._get_project_constraint(project_ids, include_subprojects), "metadata": { "$exists": True, "$gt": {} }, } }, { "$project": { "metadata": { "$objectToArray": "$metadata" } } }, { "$unwind": "$metadata" }, { "$group": { "_id": "$metadata.k" } }, { "$sort": { "_id": 1 } }, { "$skip": page * page_size }, { "$limit": page_size }, { "$group": { "_id": 1, "total": { "$sum": 1 }, "results": { "$push": "$$ROOT" }, } }, ] result = next(Model.aggregate(pipeline), None) total = 0 remaining = 0 results = [] if result: total = int(result.get("total", -1)) results = [ ParameterKeyEscaper.unescape(r.get("_id")) for r in result.get("results", []) ] remaining = max(0, total - (len(results) + page * page_size)) return total, remaining, results
def update_for_task(call: APICall, company_id, _): if call.requested_endpoint_version > ModelsBackwardsCompatibility.max_version: raise errors.moved_permanently.NotSupported( "use tasks.add_or_update_model") task_id = call.data["task"] uri = call.data.get("uri") iteration = call.data.get("iteration") override_model_id = call.data.get("override_model_id") if not (uri or override_model_id) or (uri and override_model_id): raise errors.bad_request.MissingRequiredFields( "exactly one field is required", fields=("uri", "override_model_id")) with translate_errors_context(): query = dict(id=task_id, company=company_id) task = Task.get_for_writing( id=task_id, company=company_id, _only=["models", "execution", "name", "status", "project"], ) if not task: raise errors.bad_request.InvalidTaskId(**query) allowed_states = [TaskStatus.created, TaskStatus.in_progress] if task.status not in allowed_states: raise errors.bad_request.InvalidTaskStatus( f"model can only be updated for tasks in the {allowed_states} states", **query, ) if override_model_id: model = ModelBLL.get_company_model_by_id( company_id=company_id, model_id=override_model_id) else: if "name" not in call.data: # use task name if name not provided call.data["name"] = task.name if "comment" not in call.data: call.data[ "comment"] = f"Created by task `{task.name}` ({task.id})" if task.models and task.models.output: # model exists, update model_id = task.models.output[-1].model res = _update_model(call, company_id, model_id=model_id).to_struct() res.update({"id": model_id, "created": False}) call.result.data = res return # new model, create fields = parse_model_fields(call, create_fields) # create and save model now = datetime.utcnow() model = Model( id=database.utils.id(), created=now, last_update=now, user=call.identity.user, company=company_id, project=task.project, framework=task.execution.framework, parent=task.models.input[0].model if task.models and task.models.input else None, design=task.execution.model_desc, labels=task.execution.model_labels, ready=(task.status == TaskStatus.published), **fields, ) model.save() _update_cached_tags(company_id, project=model.project, fields=fields) TaskBLL.update_statistics( task_id=task_id, company_id=company_id, last_iteration_max=iteration, models__output=[ ModelItem( model=model.id, name=TaskModelNames[TaskModelTypes.output], updated=datetime.utcnow(), ) ], ) call.result.data = {"id": model.id, "created": True}