def _delete_tasks(company: str, projects: Sequence[str]) -> Tuple[int, Set, Set]: """ Delete only the task themselves and their non published version. Child models under the same project are deleted separately. Children tasks should be deleted in the same api call. If any child entities are left in another projects then updated their parent task to None """ tasks = Task.objects(project__in=projects).only("id", "execution__artifacts") if not tasks: return 0, set(), set() task_ids = {t.id for t in tasks} with TimingContext("mongo", "delete_tasks_update_children"): Task.objects(parent__in=task_ids, project__nin=projects).update(parent=None) Model.objects(task__in=task_ids, project__nin=projects).update(task=None) event_urls, artifact_urls = set(), set() for task in tasks: event_urls.update(collect_debug_image_urls(company, task.id)) event_urls.update(collect_plot_image_urls(company, task.id)) if task.execution and task.execution.artifacts: artifact_urls.update( { a.uri for a in task.execution.artifacts.values() if a.mode == ArtifactModes.output and a.uri } ) event_bll.delete_multi_task_events(company, list(task_ids)) deleted = tasks.delete() return deleted, event_urls, artifact_urls
def verify_task_children_and_ouptuts(task: Task, force: bool) -> TaskOutputs[Model]: if not force: with TimingContext("mongo", "count_published_children"): published_children_count = Task.objects( parent=task.id, status=TaskStatus.published).count() if published_children_count: raise errors.bad_request.TaskCannotBeDeleted( "has children, use force=True", task=task.id, children=published_children_count, ) with TimingContext("mongo", "get_task_models"): models = TaskOutputs( attrgetter("ready"), Model, Model.objects(task=task.id).only("id", "task", "ready"), ) if not force and models.published: raise errors.bad_request.TaskCannotBeDeleted( "has output models, use force=True", task=task.id, models=len(models.published), ) if task.models and task.models.output: with TimingContext("mongo", "get_task_output_model"): model_ids = [m.model for m in task.models.output] for output_model in Model.objects(id__in=model_ids): if output_model.ready: if not force: raise errors.bad_request.TaskCannotBeDeleted( "has output model, use force=True", task=task.id, model=output_model.id, ) models.published.append(output_model) else: models.draft.append(output_model) if models.draft: with TimingContext("mongo", "get_execution_models"): model_ids = models.draft.ids dependent_tasks = Task.objects( models__input__model__in=model_ids).only("id", "models") input_models = { m.model for m in chain.from_iterable( t.models.input for t in dependent_tasks if t.models) } if input_models: models.draft = DocumentGroup( Model, (m for m in models.draft if m.id not in input_models)) return models
def delete_metadata(_: APICall, company_id: str, request: DeleteMetadataRequest): model_id = request.model ModelBLL.get_company_model_by_id(company_id=company_id, model_id=model_id, only_fields=("id", )) updated = metadata_delete(cls=Model, _id=model_id, keys=request.keys) if updated: Model.objects(id=model_id).update_one(last_update=datetime.utcnow()) return {"updated": updated}
def add_or_update_metadata(_: APICall, company_id: str, request: AddOrUpdateMetadataRequest): model_id = request.model ModelBLL.get_company_model_by_id(company_id=company_id, model_id=model_id) updated = metadata_add_or_update( cls=Model, _id=model_id, items=get_metadata_from_api(request.metadata), ) if updated: Model.objects(id=model_id).update_one(last_update=datetime.utcnow()) return {"updated": updated}
def model_set_ready( cls, model_id: str, company_id: str, publish_task: bool, force_publish_task: bool = False, ) -> tuple: with translate_errors_context(): query = dict(id=model_id, company=company_id) model = Model.objects(**query).first() if not model: raise errors.bad_request.InvalidModelId(**query) elif model.ready: raise errors.bad_request.ModelIsReady(**query) published_task_data = {} if model.task and publish_task: task = (Task.objects(id=model.task, company=company_id).only( "id", "status").first()) if task and task.status != TaskStatus.published: published_task_data["data"] = cls.publish_task( task_id=model.task, company_id=company_id, publish_model=False, force=force_publish_task, ) published_task_data["id"] = model.task updated = model.update(upsert=False, ready=True) return updated, published_task_data
def get_by_task_id(call: APICall, company_id, _): if call.requested_endpoint_version > ModelsBackwardsCompatibility.max_version: raise errors.moved_permanently.NotSupported( "use models.get_by_id/get_all apis") task_id = call.data["task"] with translate_errors_context(): query = dict(id=task_id, company=company_id) task = Task.get(_only=["models"], **query) if not task: raise errors.bad_request.InvalidTaskId(**query) if not task.models or not task.models.output: raise errors.bad_request.MissingTaskFields(field="models.output") model_id = task.models.output[-1].model model = Model.objects( Q(id=model_id) & get_company_or_none_constraint(company_id)).first() if not model: raise errors.bad_request.InvalidModelId( "no such public or company model", id=model_id, company=company_id, ) model_dict = model.to_proper_dict() conform_output_tags(call, model_dict) call.result.data = {"model": model_dict}
def get_output_model(task, force=False): with TimingContext("mongo", "get_task_output_model"): output_model = Model.objects(id=task.output.model).first() if output_model and output_model.ready and not force: raise errors.bad_request.TaskCannotBeDeleted( "has output model, use force=True", task=task.id, model=task.output.model ) return output_model
def get_frameworks(self, company, project_ids: Optional[Sequence]) -> Sequence: """ Return the list of unique frameworks used by company and public models If project ids passed then only models from these projects are considered """ query = get_company_or_none_constraint(company) if project_ids: query &= Q(project__in=project_ids) return Model.objects(query).distinct(field="framework")
def unarchive_model(cls, model_id: str, company_id: str): cls.get_company_model_by_id(company_id=company_id, model_id=model_id, only_fields=("id", )) unarchived = Model.objects(company=company_id, id=model_id).update( pull__system_tags=EntityVisibility.archived.value, last_update=datetime.utcnow(), ) return unarchived
def validate_execution_model(task, allow_only_public=False): if not task.execution or not task.execution.model: return company = None if allow_only_public else task.company model_id = task.execution.model model = Model.objects( Q(id=model_id) & get_company_or_none_constraint(company)).first() if not model: raise errors.bad_request.InvalidModelId(model=model_id) return model
def get_company_model_by_id(cls, company_id: str, model_id: str, only_fields=None) -> Model: query = dict(company=company_id, id=model_id) qs = Model.objects(**query) if only_fields: qs = qs.only(*only_fields) model = qs.first() if not model: raise errors.bad_request.InvalidModelId(**query) return model
def publish_task( cls, task_id: str, company_id: str, publish_model: bool, force: bool, status_reason: str = "", status_message: str = "", ) -> dict: task = cls.get_task_with_access(task_id, company_id=company_id, requires_write_access=True) if not force: validate_status_change(task.status, TaskStatus.published) previous_task_status = task.status output = task.output or Output() publish_failed = False try: # set state to publishing task.status = TaskStatus.publishing task.save() # publish task models if task.output.model and publish_model: output_model = (Model.objects(id=task.output.model).only( "id", "task", "ready").first()) if output_model and not output_model.ready: cls.model_set_ready( model_id=task.output.model, company_id=company_id, publish_task=False, ) # set task status to published, and update (or set) it's new output (view and models) return ChangeStatusRequest( task=task, new_status=TaskStatus.published, force=force, status_reason=status_reason, status_message=status_message, ).execute(published=datetime.utcnow(), output=output) except Exception as ex: publish_failed = True raise ex finally: if publish_failed: task.status = previous_task_status task.save()
def publish_task( task_id: str, company_id: str, force: bool, publish_model_func: Callable[[str, str], Any] = None, status_message: str = "", status_reason: str = "", ) -> dict: task = TaskBLL.get_task_with_access( task_id, company_id=company_id, requires_write_access=True ) if not force: validate_status_change(task.status, TaskStatus.published) previous_task_status = task.status output = task.output or Output() publish_failed = False try: # set state to publishing task.status = TaskStatus.publishing task.save() # publish task models if task.models and task.models.output and publish_model_func: model_id = task.models.output[-1].model model = ( Model.objects(id=model_id, company=company_id) .only("id", "ready") .first() ) if model and not model.ready: publish_model_func(model.id, company_id) # set task status to published, and update (or set) it's new output (view and models) return ChangeStatusRequest( task=task, new_status=TaskStatus.published, force=force, status_reason=status_reason, status_message=status_message, ).execute(published=datetime.utcnow(), output=output) except Exception as ex: publish_failed = True raise ex finally: if publish_failed: task.status = previous_task_status task.save()
def validate_input_models(task, allow_only_public=False): if not task.models.input: return company = None if allow_only_public else task.company model_ids = set(m.model for m in task.models.input) models = Model.objects( Q(id__in=model_ids) & get_company_or_none_constraint(company)).only("id") missing = model_ids - {m.id for m in models} if missing: raise errors.bad_request.InvalidModelId(models=missing) return
def get_outputs_for_deletion(task, force=False): with TimingContext("mongo", "get_task_models"): models = TaskOutputs( attrgetter("ready"), Model, Model.objects(task=task.id).only("id", "task", "ready"), ) if not force and models.published: raise errors.bad_request.TaskCannotBeDeleted( "has output models, use force=True", task=task.id, models=len(models.published), ) if task.output.model: output_model = get_output_model(task, force) if output_model: if output_model.ready: models.published.append(output_model) else: models.draft.append(output_model) if models.draft: with TimingContext("mongo", "get_execution_models"): model_ids = [m.id for m in models.draft] dependent_tasks = Task.objects(execution__model__in=model_ids).only( "id", "execution.model" ) busy_models = [t.execution.model for t in dependent_tasks] models.draft[:] = [m for m in models.draft if m.id not in busy_models] with TimingContext("mongo", "get_task_children"): tasks = Task.objects(parent=task.id).only("id", "parent", "status") published_tasks = [ task for task in tasks if task.status == TaskStatus.published ] if not force and published_tasks: raise errors.bad_request.TaskCannotBeDeleted( "has children, use force=True", task=task.id, children=published_tasks ) return models, tasks
def _delete_models(projects: Sequence[str]) -> Tuple[int, Set[str]]: """ Delete project models and update the tasks from other projects that reference them to reference None. """ with TimingContext("mongo", "delete_models"): models = Model.objects(project__in=projects).only("task", "id", "uri") if not models: return 0, set() model_ids = list({m.id for m in models}) Task._get_collection().update_many( filter={ "project": {"$nin": projects}, "models.input.model": {"$in": model_ids}, }, update={"$set": {"models.input.$[elem].model": None}}, array_filters=[{"elem.model": {"$in": model_ids}}], upsert=False, ) model_tasks = list({m.task for m in models if m.task}) if model_tasks: Task._get_collection().update_many( filter={ "_id": {"$in": model_tasks}, "project": {"$nin": projects}, "models.output.model": {"$in": model_ids}, }, update={"$set": {"models.output.$[elem].model": None}}, array_filters=[{"elem.model": {"$in": model_ids}}], upsert=False, ) urls = {m.uri for m in models if m.uri} deleted = models.delete() return deleted, urls
def delete_model(cls, model_id: str, company_id: str, force: bool) -> Tuple[int, Model]: model = cls.get_company_model_by_id( company_id=company_id, model_id=model_id, only_fields=("id", "task", "project", "uri"), ) deleted_model_id = f"{deleted_prefix}{model_id}" using_tasks = Task.objects(models__input__model=model_id).only("id") if using_tasks: if not force: raise errors.bad_request.ModelInUse( "as execution model, use force=True to delete", num_tasks=len(using_tasks), ) # update deleted model id in using tasks Task._get_collection().update_many( filter={"_id": { "$in": [t.id for t in using_tasks] }}, update={ "$set": { "models.input.$[elem].model": deleted_model_id } }, array_filters=[{ "elem.model": model_id }], upsert=False, ) if model.task: task = Task.objects(id=model.task).first() if task and task.status == TaskStatus.published: if not force: raise errors.bad_request.ModelCreatingTaskExists( "and published, use force=True to delete", task=model.task) if task.models.output and model_id in task.models.output: now = datetime.utcnow() Task._get_collection().update_one( filter={ "_id": model.task, "models.output.model": model_id }, update={ "$set": { "models.output.$[elem].model": deleted_model_id, "output.error": f"model deleted on {now.isoformat()}", }, "last_change": now, }, array_filters=[{ "elem.model": model_id }], upsert=False, ) del_count = Model.objects(id=model_id, company=company_id).delete() return del_count, model
def get_model_metadata_distinct_values( self, company_id: str, project_ids: Sequence[str], key: str, include_subprojects: bool, allow_public: bool = True, ) -> ParamValues: company_constraint = self._get_company_constraint( company_id, allow_public) project_constraint = self._get_project_constraint( project_ids, include_subprojects) key_path = f"metadata.{ParameterKeyEscaper.escape(key)}" last_updated_model = (Model.objects( **company_constraint, **project_constraint, **{ f"{key_path.replace('.', '__')}__exists": True }, ).only("last_update").order_by("-last_update").limit(1).first()) if not last_updated_model: return 0, [] redis_key = f"modelmetadata_values_{company_id}_{'_'.join(project_ids)}_{key}_{allow_public}" last_update = last_updated_model.last_update or datetime.utcnow() cached_res = self._get_cached_param_values(key=redis_key, last_update=last_update) if cached_res: return cached_res max_values = config.get("services.models.metadata_values.max_count", 100) pipeline = [ { "$match": { **company_constraint, **project_constraint, key_path: { "$exists": True }, } }, { "$project": { "value": f"${key_path}.value" } }, { "$group": { "_id": "$value" } }, { "$sort": { "_id": 1 } }, { "$limit": max_values }, { "$group": { "_id": 1, "total": { "$sum": 1 }, "results": { "$push": "$$ROOT._id" }, } }, ] result = next( Model.aggregate(pipeline, collation=Model._numeric_locale), None) if not result: return 0, [] total = int(result.get("total", 0)) values = result.get("results", []) ttl = config.get("services.models.metadata_values.cache_ttl_sec", 86400) cached = dict(last_update=last_update.timestamp(), total=total, values=values) self.redis.setex(redis_key, ttl, json.dumps(cached)) return total, values