def create_experiment(self, experiment: schemas.ExperimentCreate, project_id: str): """ Creates a new experiment in our database and adjusts the position of others. The new experiment is added to the end of the experiment list. Parameters ---------- experiment: projects.schemas.experiment.ExperimentCreate project_id : str Returns ------- projects.schemas.experiment.Experiment Raises ------ NotFound When project_id does not exist. BadRequest When name is not a str instance. When name is already the name of another experiment. """ if not isinstance(experiment.name, str): raise BadRequest("name is required") stored_experiment = self.session.query(models.Experiment) \ .filter(models.Experiment.project_id == project_id) \ .filter_by(name=experiment.name) \ .first() if stored_experiment: raise BadRequest("an experiment with that name already exists") if experiment.copy_from: experiment = self.copy_experiment(experiment=experiment, project_id=project_id) else: experiment = models.Experiment(uuid=uuid_alpha(), name=experiment.name, project_id=project_id) self.session.add(experiment) self.session.flush() self.fix_positions(project_id=project_id, experiment_id=experiment.uuid, new_position=sys.maxsize) # will add to end of list self.session.commit() self.session.refresh(experiment) return schemas.Experiment.from_orm(experiment)
def raise_if_dependencies_are_invalid(self, project_id: str, dependencies: List, experiment_id: Optional[str] = None, deployment_id: Optional[str] = None, operator_id: Optional[str] = None): """ Raises an exception if the specified dependencies are not valid. The invalid dependencies are duplicate elements on the dependencies, dependencies including the actual operator_id, dependencie's operator doesn't exist and ciclycal dependencies. Parameters ---------- project_id : str dependencies : list experiment_id : str or None deployment_id : str or None operator_id : str or None Raises ------ BadRequest When any dependency does not exist. When dependencies are cyclic. """ if not isinstance(dependencies, list): raise BadRequest(DEPENDENCIES_EXCEPTION_MSG) # check if dependencies has duplicates if len(dependencies) != len(set(dependencies)): raise BadRequest(DEPENDENCIES_EXCEPTION_MSG) for d in dependencies: try: self.raise_if_operator_does_not_exist(d) if d == operator_id: raise BadRequest(DEPENDENCIES_EXCEPTION_MSG) except NotFound: raise BadRequest(DEPENDENCIES_EXCEPTION_MSG) self.raise_if_has_cycles( project_id=project_id, experiment_id=experiment_id, deployment_id=deployment_id, operator_id=operator_id, dependencies=dependencies, )
def task_category_is_not_none(self, task_cat): if task_cat.category is not None and task_cat.category not in VALID_CATEGORIES: valid_str = ",".join(VALID_CATEGORIES) raise BadRequest( code="InvalidCategory", message=f"Invalid category. Choose any of {valid_str}", )
def delete_multiple_projects(self, project_ids): """ Delete multiple projects. Parameters ---------- project_ids : str The list of project ids. Returns ------- projects.schemas.message.Message Raises ------ BadRequest When any project_id does not exist. """ total_elements = len(project_ids) if total_elements < 1: raise BadRequest(code="MissingRequiredProjectId", message="inform at least one project") projects = (self.session.query(models.Project).filter( models.Project.uuid.in_(project_ids)).filter_by( tenant=self.kubeflow_userid).all()) for project in projects: self.session.delete(project) self.session.commit() return schemas.Message(message="Successfully removed projects")
def list_tasks(self, page: Optional[int] = None, page_size: Optional[int] = None, order_by: str = Optional[str], **filters): """ Lists tasks. Supports pagination, and sorting. Parameters ---------- page : int The page number. First page is 1. page_size : int The page size. order_by : str Order by instruction. Format is "column [asc|desc]". **filters : dict Returns ------- projects.schemas.task.TaskList Raises ------ BadRequest When order_by is invalid. """ query = self.session.query(models.Task) query_total = self.session.query(func.count(models.Task.uuid)) for column, value in filters.items(): query = query.filter(getattr(models.Task, column).ilike(f"%{value}%")) query_total = query_total.filter(getattr(models.Task, column).ilike(f"%{value}%")) total = query_total.scalar() # Default sort is name in ascending order if not order_by: order_by = "name asc" # Sorts records try: (column, sort) = order_by.replace('+', ' ').strip().split() assert sort.lower() in ["asc", "desc"] assert column in models.Task.__table__.columns.keys() except (AssertionError, ValueError): raise BadRequest("Invalid order argument") if sort.lower() == "asc": query = query.order_by(asc(getattr(models.Task, column))) elif sort.lower() == "desc": query = query.order_by(desc(getattr(models.Task, column))) if page and page_size: # Applies pagination query = query.limit(page_size).offset((page - 1) * page_size) tasks = query.all() container_state = get_notebook_state() return schemas.TaskList.from_orm(container_state, tasks, total)
def retry_run(run_id, experiment_id): """ Retry a run in Kubeflow Pipelines. Parameters ---------- run_id : str experiment_id : str Returns ------- dict Retry response confirmation. Raises ------ ApiException BadRequest """ if run_id == "latest": run_id = get_latest_run_id(experiment_id) kfp_run = kfp_client().get_run( run_id=run_id, ) if kfp_run.run.status == "Failed": kfp_client().runs.retry_run(run_id=kfp_run.run.id) else: raise BadRequest("Not a failed run") return {"message": "Run re-initiated successfully"}
def delete_multiple_templates(self, template_ids): """ Delete multiple templates. Parameters ---------- template_ids : str The list of template ids. Returns ------- projects.schemas.message.Message Raises ------ BadRequest When any template_id does not exist. """ total_elements = len(template_ids) if total_elements < 1: raise BadRequest(code="MissingRequiredTemplateId", message="inform at least one template") templates = (self.session.query(models.Template).filter( models.Template.uuid.in_(template_ids)).filter_by( tenant=self.kubeflow_userid).all()) for template in templates: self.session.delete(template) self.session.commit() return schemas.Message(message="Successfully removed templates")
def create_run(self, project_id: str, deployment_id: str): """ Starts a new run in Kubeflow Pipelines. Parameters ---------- project_id : str deployment_id : str Returns ------- projects.schemas.run.Run Raises ------ NotFound When any of project_id, or deployment_id does not exist. """ deployment = self.session.query(models.Deployment).get(deployment_id) if deployment is None: raise NotFound("The specified deployment does not exist") # Removes operators that don't have a deployment_notebook (eg. Upload de Dados). # Then, fix dependencies in their children. operators = self.remove_non_deployable_operators(deployment.operators) try: run = kfp_runs.start_run(operators=operators, project_id=deployment.project_id, experiment_id=deployment.experiment_id, deployment_id=deployment_id, deployment_name=deployment.name) except ValueError as e: raise BadRequest(str(e)) # Deploy monitoring tasks monitorings = self.monitoring_controller.list_monitorings(project_id=project_id, deployment_id=deployment_id).monitorings if monitorings: for monitoring in monitorings: self.background_tasks.add_task( deploy_monitoring, deployment_id=deployment_id, experiment_id=deployment.experiment_id, run_id=run["uuid"], task_id=monitoring.task_id, monitoring_id=monitoring.uuid ) update_data = {"status": "Pending"} self.session.query(models.Operator) \ .filter_by(deployment_id=deployment_id) \ .update(update_data) self.session.commit() run["deploymentId"] = deployment_id return run
def create_deployments_from_experiments(self, experiments: list, project_id: str): """ Create deployments from given experiments. Parameters ---------- experiments : list List of experiments uuids to copy. project_id : str Returns ------- list A list of projects.models.deployment.Deployment. Raises ------ BadRequest When any of the experiments does not exist. """ experiments_dict = { e.uuid: e for e in self.session.query(models.Experiment).filter_by( project_id=project_id) } for experiment_id in experiments: if experiment_id not in experiments_dict: raise BadRequest(code="InvalidExperiments", message="some experiments do not exist") deployments = [] for experiment_id in experiments: experiment = experiments_dict[experiment_id] deployment = models.Deployment(uuid=uuid_alpha(), experiment_id=experiment_id, name=experiment.name, project_id=project_id, created_at=now(), updated_at=now()) self.session.add(deployment) self.session.flush() deployments.append(deployment) self.copy_operators(deployment_id=deployment.uuid, stored_operators=experiment.operators) self.fix_positions( project_id=project_id, deployment_id=deployment.uuid, new_position=sys.maxsize, ) # will add to end of list return deployments
def update_experiment_from_template( self, experiment: schemas.ExperimentUpdate, experiment_id: str ): """ Recreates the operators of experiment using a template. Parameters ---------- experiment : projects.schemas.experiment.ExperimentUpdate experiment_id : str """ template = self.session.query(models.Template).get(experiment.template_id) if template is None: raise BadRequest( code="InvalidTemplateId", message="The specified template does not exist", ) # remove operators self.session.query(models.Operator).filter( models.Operator.experiment_id == experiment_id ).delete() # save the operators created to get the created_uuid to use on dependencies operators_created = [] for task in template.tasks: dependencies = [] task_dependencies = task["dependencies"] if len(task_dependencies) > 0: for d in task_dependencies: op_created = next( (o for o in operators_created if o["uuid"] == d), None ) dependencies.append(op_created["created_uuid"]) operator_id = uuid_alpha() objects = [ models.Operator( uuid=operator_id, experiment_id=experiment_id, task_id=task["task_id"], dependencies=dependencies, position_x=task["position_x"], position_y=task["position_y"], ) ] self.session.bulk_save_objects(objects) task["created_uuid"] = operator_id operators_created.append(task) self.session.commit() experiment = self.session.query(models.Experiment).get(experiment_id) return schemas.Experiment.from_orm(experiment)
def raise_if_parameters_are_invalid(self, parameters: List[Dict]): """ Raises an exception if the specified parameters are not valid. Parameters ---------- parameters : dict Raises ------ BadRequest When any parameter value is not str, int, float, bool, list, or dict. """ if not isinstance(parameters, dict): raise BadRequest(PARAMETERS_EXCEPTION_MSG) for key, value in parameters.items(): if value is not None and not isinstance( value, (str, int, float, bool, list, dict)): raise BadRequest(PARAMETERS_EXCEPTION_MSG)
def update_deployment(self, deployment: schemas.DeploymentUpdate, project_id: str, deployment_id: str): """ Updates a deployment in our database and adjusts the position of others. Parameters ---------- deployment : projects.schemas.deployment.DeploymentUpdate project_id : str deployment_id : str Returns ------- projects.schemas.deployment.Deployment Raises ------ NotFound When deployment_id does not exist. BadRequest When name is already the name of another deployment. """ self.raise_if_deployment_does_not_exist(deployment_id) stored_deployment = (self.session.query( models.deployment.Deployment).filter( models.deployment.Deployment.project_id == project_id).filter_by(name=deployment.name).first()) if stored_deployment and stored_deployment.uuid != deployment_id: raise BadRequest( code="DeploymentNameExists", message="a deployment with that name already exists", ) update_data = deployment.dict(exclude_unset=True) update_data.update({"updated_at": datetime.utcnow()}) self.session.query(models.Deployment).filter_by( uuid=deployment_id).update(update_data) if deployment.position: self.fix_positions( project_id=project_id, deployment_id=deployment_id, new_position=deployment.position, ) self.session.commit() deployment = self.session.query(models.Deployment).get(deployment_id) return schemas.Deployment.from_orm(deployment)
def update_experiment(self, experiment: schemas.ExperimentUpdate, project_id: str, experiment_id: str): """ Updates an experiment in our database and adjusts the position of others. Parameters ---------- experiment: projects.schemas.experiment.ExperimentUpdate project_id : str experiment_id : str Returns ------- projects.schemas.experiment.Experiment Raises ------ NotFound When experiment_id does not exist. BadRequest When name is already the name of another experiment. When `experiment.template_id` is informed but it does not exist. """ self.raise_if_experiment_does_not_exist(experiment_id) stored_experiment = self.session.query(models.Experiment) \ .filter(models.Experiment.project_id == project_id) \ .filter_by(name=experiment.name) \ .first() if stored_experiment and stored_experiment.uuid != experiment_id: raise BadRequest("an experiment with that name already exists") if experiment.template_id: return self.update_experiment_from_template( experiment=experiment, experiment_id=experiment_id) update_data = experiment.dict(exclude_unset=True) update_data.update({"updated_at": datetime.utcnow()}) self.session.query(models.Experiment).filter_by( uuid=experiment_id).update(update_data) if experiment.position: self.fix_positions(project_id=project_id, experiment_id=experiment_id, new_position=experiment.position) self.session.commit() experiment = self.session.query(models.Experiment).get(experiment_id) return schemas.Experiment.from_orm(experiment)
def create_monitoring(self, monitoring: schemas.MonitoringCreate, deployment_id: str): """ Creates a new monitoring in our database. Parameters ---------- monitoring : projects.schemas.monitoring.MonitoringCreate project_id : str deployment_id : str Returns ------- projects.schemas.monitoring.Monitoring """ task_exists = (self.session.query( models.Task.uuid).filter_by(uuid=monitoring.task_id).scalar() is not None) if not task_exists: raise BadRequest( code="InvalidTaskId", message="The specified task does not exist", ) monitoring = models.Monitoring( uuid=uuid_alpha(), deployment_id=deployment_id, task_id=monitoring.task_id, ) self.session.add(monitoring) self.session.commit() self.session.refresh(monitoring) deployment = self.session.query(models.Deployment).get(deployment_id) run = self.run_controller.get_run(deployment_id) # Uses empty run_id if a deployment does not have a run if not run: run = {"runId": ""} # Deploy the new monitoring self.background_tasks.add_task( deploy_monitoring, deployment_id=deployment_id, experiment_id=deployment.experiment_id, run_id=run["runId"], task_id=monitoring.task_id, monitoring_id=monitoring.uuid, ) return schemas.Monitoring.from_orm(monitoring)
def create_project(self, project: schemas.ProjectCreate): """ Creates a new project in our database. Parameters ---------- project: projects.schemas.project.ProjectCreate Returns ------- project: projects.schemas.project.Project Raises ------ BadRequest When the project attributes are invalid. """ if not isinstance(project.name, str): raise BadRequest("name is required") store_project = self.session.query(models.Project) \ .filter_by(name=project.name) \ .first() if store_project: raise BadRequest("a project with that name already exists") project = models.Project(uuid=uuid_alpha(), name=project.name, description=project.description) self.session.add(project) self.session.flush() experiment = schemas.ExperimentCreate(name="Experimento 1") self.experiment_controller.create_experiment(experiment=experiment, project_id=project.uuid) self.session.commit() self.session.refresh(project) return schemas.Project.from_orm(project)
async def handle_post_prediction( project_id: str, deployment_id: str, request: Request, background_tasks: BackgroundTasks, file: Optional[UploadFile] = File(None), session: Session = Depends(session_scope), kubeflow_userid: Optional[str] = Header("anonymous"), ): """ Handles POST request to /. Parameters ------- project_id : str deployment_id : str request : starlette.requests.Request file : starlette.datastructures.UploadFile session : sqlalchemy.orm.session.Session kubeflow_userid : fastapi.Header Returns ------- Prediction: projects.schemas.prediction.Prediction """ project_controller = ProjectController(session, kubeflow_userid=kubeflow_userid) project_controller.raise_if_project_does_not_exist(project_id) deployment_controller = DeploymentController(session) deployment_controller.raise_if_deployment_does_not_exist(deployment_id) # at this endpoint, we can accept both form-data and json as the request content-type kwargs = {} if file is not None: kwargs = {"upload_file": file} else: try: kwargs = await request.json() except JSONDecodeError: raise BadRequest( code="MissingRequiredFormDataOrJson", message="either form-data or json is required", ) prediction_controller = PredictionController(session, background_tasks) prediction = prediction_controller.create_prediction( deployment_id=deployment_id, **kwargs) return prediction
def copy_deployment(self, deployment_id: str, name: str, project_id: str): """ Makes a copy of a deployment in our database. Paramenters ----------- deployment_id : str name : str project_id : str Returns ------- list A list of projects.models.deployment.Deployment. Raises ------ BadRequest When deployment_id does not exist. """ stored_deployment = self.session.query( models.Deployment).get(deployment_id) if stored_deployment is None: raise BadRequest(code="InvalidDeploymentId", message="source deployment does not exist") deployment = models.Deployment( uuid=uuid_alpha(), experiment_id=stored_deployment.experiment_id, name=name, project_id=project_id, created_at=now(), updated_at=now()) self.session.add(deployment) self.session.flush() self.copy_operators(deployment_id=deployment.uuid, stored_operators=stored_deployment.operators) self.fix_positions( project_id=project_id, deployment_id=deployment.uuid, new_position=sys.maxsize, ) # will add to end of list return [deployment]
def deploy_run(self, deployment): """ Starts a new run in Kubeflow Pipelines. Parameters ---------- deployment : projects.models.deployment.Deployment Returns ------- projects.schemas.run.Run Raises ------ NotFound When any of project_id, or deployment_id does not exist. """ if deployment is None: raise NotFound( code="DeploymentNotFound", message="The specified deployment does not exist", ) # Removes operators that don't have a deployment_notebook (eg. Upload de Dados). # Then, fix dependencies in their children. operators = self.remove_non_deployable_operators(deployment.operators) try: run = kfp_runs.start_run( operators=operators, project_id=deployment.project_id, experiment_id=deployment.experiment_id, deployment_id=deployment.uuid, deployment_name=deployment.name, ) except ValueError: raise BadRequest( code="MissingRequiredOperatorId", message=f"Necessary at least one operator.", ) # Remove the object from the operator session in order not to update the database, # Just need to remove the dependencies for the runs. for operator in deployment.operators: self.session.expunge(operator) return schemas.Run.from_orm(run)
def update_comparison( self, comparison: schemas.ComparisonUpdate, project_id: str, comparison_id: str ): """ Updates a comparison in our database. Parameters ---------- comparison: projects.schemas.comparison.ComparisonUpdate project_id : str comparison_id : str Returns ------- projects.schemas.comparison.Comparison Raises ------ BadRequest When comparison attributes are invalid. NotFound When comparison_id does not exist. """ self.raise_if_comparison_does_not_exist(comparison_id) if comparison.experiment_id: stored_experiment = self.session.query(models.Experiment).get( comparison.experiment_id ) if stored_experiment is None: raise BadRequest( code="InvalidExperimentId", message="source experiment does not exist", ) update_data = comparison.dict(exclude_unset=True) update_data.update({"updated_at": datetime.utcnow()}) self.session.query(models.Comparison).filter_by(uuid=comparison_id).update( update_data ) self.session.commit() comparison = self.session.query(models.Comparison).get(comparison_id) return schemas.Comparison.from_orm(comparison)
def delete_multiple_projects(self, project_ids): """ Delete multiple projects. Parameters ---------- project_ids : str The list of project ids. Returns ------- projects.schemas.message.Message Raises ------ BadRequest When any project_id does not exist. """ total_elements = len(project_ids) if total_elements < 1: raise BadRequest("inform at least one project") experiments = self.session.query(models.Experiment) \ .filter(models.Experiment.project_id.in_(project_ids)) \ .all() projects = self.session.query(models.Project) \ .filter(models.Project.uuid.in_(project_ids)) \ .all() for project in projects: self.session.delete(project) self.session.commit() for experiment in experiments: prefix = join("experiments", experiment.uuid) try: remove_objects(prefix=prefix) except Exception: pass return schemas.Message(message="Successfully removed projects")
def update_project(self, project: schemas.ProjectUpdate, project_id: str): """ Updates a project in our database. Parameters ---------- project: projects.schemas.project.ProjectUpdate project_id: str Returns ------- project: projects.schemas.project.Project Raises ------ NotFound When project_id does not exist. BadRequest When the project attributes are invalid. """ self.raise_if_project_does_not_exist(project_id) stored_project = (self.session.query(models.Project).filter_by( name=project.name).filter_by(tenant=self.kubeflow_userid).first()) if stored_project and stored_project.uuid != project_id: raise BadRequest( code="ProjectNameExists", message="a project with that name already exists", ) update_data = project.dict(exclude_unset=True) update_data.update({"updated_at": datetime.utcnow()}) self.session.query( models.Project).filter_by(uuid=project_id).filter_by( tenant=self.kubeflow_userid).update(update_data) self.session.commit() project = (self.session.query(models.Project).filter_by( uuid=project_id).filter_by(tenant=self.kubeflow_userid).first()) return schemas.Project.from_orm(project)
def create_project(self, project: schemas.ProjectCreate): """ Creates a new project in our database. Parameters ---------- project: projects.schemas.project.ProjectCreate Returns ------- project: projects.schemas.project.Project Raises ------ BadRequest When the project attributes are invalid. """ store_project = (self.session.query(models.Project).filter_by( name=project.name).filter_by(tenant=self.kubeflow_userid).first()) if store_project: raise BadRequest( code="ProjectNameExists", message="a project with that name already exists", ) project = models.Project(uuid=uuid_alpha(), name=project.name, description=project.description, tenant=self.kubeflow_userid, created_at=now(), updated_at=now()) self.session.add(project) self.session.flush() experiment = schemas.ExperimentCreate(name="Experimento 1") self.experiment_controller.create_experiment(experiment=experiment, project_id=project.uuid) self.session.commit() self.session.refresh(project) return schemas.Project.from_orm(project)
def raise_if_invalid_docker_image(self, image): """ Raise an error if a str does not meet the standards for a docker image name. Example: (username/organization)/name-of-the-image:tag Parameters ---------- image : str or None The image name. Raises ------ BadRequest When a given image is a invalid one. """ pattern = re.compile("[a-z0-9.-]+([/]{1}[a-z0-9.-]+)+([:]{1}[a-z0-9.-]+){0,1}$") if image and pattern.match(image) is None: raise BadRequest("invalid docker image name")
def update_template(self, template: schemas.TemplateUpdate, template_id: str): """ Updates a template in our database. Parameters ---------- template: projects.schemas.template.TemplateUpdate template_id : str Returns ------- projects.schemas.template.Template Raises ------ NotFound When template_id does not exist. """ self.raise_if_template_does_not_exist(template_id) stored_template = (self.session.query(models.Template).filter_by( name=template.name).filter_by(tenant=self.kubeflow_userid).first()) if stored_template and stored_template.uuid != template_id: raise BadRequest( code="TemplateNameExists", message="a template with that name already exists", ) update_data = template.dict(exclude_unset=True) update_data.update({"updated_at": datetime.utcnow()}) self.session.query( models.Template).filter_by(uuid=template_id).filter_by( tenant=self.kubeflow_userid).update(update_data) self.session.commit() template = (self.session.query(models.Template).filter_by( uuid=template_id).filter_by(tenant=self.kubeflow_userid).first()) return schemas.Template.from_orm(template)
def raise_if_has_cycles( self, project_id: str, operator_id: str, dependencies: List[str], experiment_id: Optional[str] = None, deployment_id: Optional[str] = None, ): """ Raises an exception if the dependencies of operators from experiment are cyclical. Parameters ---------- project_id : str operator_id : str dependencies : list or None experiment_id : str or None deployment_id : str or None Raises ------ BadRequest When dependencies are cyclic. """ operators = (self.session.query( models.Operator).filter_by(experiment_id=experiment_id).filter_by( deployment_id=deployment_id).all()) visited = dict.fromkeys([op.uuid for op in operators], False) recursion_stack = dict.fromkeys([op.uuid for op in operators], False) for op in operators: op_uuid = op.uuid if (visited[op_uuid] is False and self.has_cycles_util(op_uuid, visited, recursion_stack, dependencies, operator_id) is True): raise BadRequest(code="InvalidCyclicalDependencies", message="Cyclical dependencies.") return False
from projects import models, schemas from projects.controllers.tasks import TaskController from projects.controllers.utils import uuid_alpha from projects.exceptions import BadRequest, NotFound from projects.kubernetes.kube_config import load_kube_config from projects.agent.utils import list_resource_version from projects.kfp import KF_PIPELINES_NAMESPACE from projects.kfp.runs import get_latest_run_id from projects.utils import now from kubernetes import client from kubernetes.watch import Watch NOT_FOUND = NotFound(code="OperatorNotFound", message="The specified operator does not exist") INVALID_PARAMETERS = BadRequest( code="InvalidParameters", message="The specified parameters are not valid") INVALID_DEPENDENCIES = BadRequest( code="InvalidDependencies", message="The specified dependencies are not valid.") class OperatorController: def __init__(self, session): self.session = session self.task_controller = TaskController(session) def raise_if_operator_does_not_exist(self, operator_id: str): """ Raises an exception if the specified operator does not exist. Parameters
def create_template(self, template: schemas.TemplateCreate): """ Creates a new template in our database. Parameters ---------- template : projects.schemas.template.TemplateCreate Returns ------- projects.schemas.template.Template Raises ------ BadRequest When the project attributes are invalid. """ if not isinstance(template.name, str): raise BadRequest("name is required") if template.experiment_id: exists = self.session.query(models.Experiment.uuid) \ .filter_by(uuid=template.experiment_id) \ .scalar() is not None if not exists: raise NotFound("The specified experiment does not exist") operators = self.session.query(models.Operator) \ .filter_by(experiment_id=template.experiment_id) \ .all() elif template.deployment_id: exists = self.session.query(models.Deployment.uuid) \ .filter_by(uuid=template.deployment_id) \ .scalar() is not None if not exists: raise NotFound("The specified deployment does not exist") operators = self.session.query(models.Operator) \ .filter_by(deployment_id=template.deployment_id) \ .all() else: raise BadRequest( "experimentId or deploymentId needed to create template.") stored_template = self.session.query(models.Template) \ .filter_by(name=template.name) \ .first() if stored_template: raise BadRequest("a template with that name already exists") # order operators by dependencies operators_ordered = [] while len(operators) != len(operators_ordered): for operator in operators: self.order_operators_by_dependencies(operators_ordered, operator) # JSON array order of elements are preserved, so there is no need to save positions tasks = [] for uuid in operators_ordered: operator = next((op for op in operators if op.uuid == uuid), None) task = { "uuid": operator.uuid, "task_id": operator.task_id, "dependencies": operator.dependencies, "position_x": operator.position_x, "position_y": operator.position_y, } tasks.append(task) template = models.Template(uuid=uuid_alpha(), name=template.name, tasks=tasks) self.session.add(template) self.session.commit() self.session.refresh(template) return schemas.Template.from_orm(template)
def list_projects( self, page: Optional[int] = 1, page_size: Optional[int] = 10, order_by: Optional[str] = None, **filters, ): """ Lists projects. Supports pagination, and sorting. Parameters ---------- page : int The page number. First page is 1. page_size : int The page size. Default value is 10. order_by : str Order by instruction. Format is "column [asc|desc]". **filters : dict Returns ------- projects.schemas.project.ProjectList Raises ------ BadRequest When order_by is invalid. """ query = self.session.query( models.Project).filter_by(tenant=self.kubeflow_userid) query_total = self.session.query(func.count( models.Project.uuid)).filter_by(tenant=self.kubeflow_userid) for column, value in filters.items(): query = query.filter( getattr(models.Project, column).ilike(f"%{value}%").collate("utf8mb4_bin")) query_total = query_total.filter( getattr(models.Project, column).ilike(f"%{value}%").collate("utf8mb4_bin")) # BUG # query_total.limit(page_size) didn't work. I'm not sure why... # This solution uses an unoptimized query, and should be improved. total = min(page_size, query_total.scalar()) # Default sort is name in ascending order if not order_by: order_by = "name asc" # Sorts records try: (column, sort) = order_by.strip().split() assert sort.lower() in ["asc", "desc"] assert column in models.Project.__table__.columns.keys() except (AssertionError, ValueError): raise BadRequest(code="InvalidOrderBy", message="Invalid order argument") if sort.lower() == "asc": query = query.order_by(asc(getattr(models.Project, column))) elif sort.lower() == "desc": query = query.order_by(desc(getattr(models.Project, column))) # Applies pagination query = query.limit(page_size).offset((page - 1) * page_size) projects = query.all() return schemas.ProjectList.from_orm(projects, total)
def create_operator( self, operator: schemas.OperatorCreate, project_id: str, experiment_id: Optional[str] = None, deployment_id: Optional[str] = None, ): """ Creates a new operator in our database. Parameters ---------- operator: projects.schemas.operator.OperatorCreate project_id : str experiment_id : str or None deployment_id : str or None Returns ------- projects.schemas.operator.Operator Raises ------ BadRequest When the operator attributes are invalid. """ if not isinstance(operator.task_id, str): raise BadRequest(code="MissingRequiredTaskId", message="taskId is required") try: self.task_controller.raise_if_task_does_not_exist(operator.task_id) except NotFound: raise BadRequest(code="InvalidTaskId", message="source task does not exist") if operator.dependencies is None: operator.dependencies = [] if experiment_id: self.raise_if_dependencies_are_invalid( project_id=project_id, experiment_id=experiment_id, deployment_id=deployment_id, dependencies=operator.dependencies, ) if experiment_id and deployment_id: raise BadRequest( code="InvalidOperatorRequestBody", message= "Operator cannot contain an experiment and a deployment simultaneously", ) if operator.parameters is None: operator.parameters = {} self.raise_if_parameters_are_invalid(operator.parameters) operator = models.Operator(uuid=uuid_alpha(), name=operator.name, experiment_id=experiment_id, deployment_id=deployment_id, task_id=operator.task_id, dependencies=operator.dependencies, status=operator.status, parameters=operator.parameters, position_x=operator.position_x, position_y=operator.position_y, created_at=now(), updated_at=now()) self.session.add(operator) self.session.commit() self.session.refresh(operator) return schemas.Operator.from_orm(operator)
def copy_experiment(self, experiment: schemas.ExperimentCreate, project_id: str): """ Makes a copy of an experiment in our database. Parameters ---------- task: projects.schemas.experiment.ExperimentCreate project_id: str Returns ------- projects.schemas.experiment.Experiment Raises ------ BadRequest When copy_from does not exist. """ stored_experiment = self.session.query(models.Experiment).get( experiment.copy_from) if stored_experiment is None: raise BadRequest("source experiment does not exist") experiment = models.Experiment(uuid=uuid_alpha(), name=experiment.name, project_id=project_id) self.session.add(experiment) self.session.flush() # Creates a dict to map source operator_id to its copy operator_id. # This map will be used to build the dependencies using new operator_ids copies_map = {} for stored_operator in stored_experiment.operators: operator = schemas.OperatorCreate( task_id=stored_operator.task_id, experiment_id=experiment.uuid, parameters=stored_operator.parameters, position_x=stored_operator.position_x, position_y=stored_operator.position_y, ) operator = self.operator_controller.create_operator( operator=operator, project_id=project_id, experiment_id=experiment.uuid) copies_map[stored_operator.uuid] = { "copy_uuid": operator.uuid, "dependencies": stored_operator.dependencies, } # sets dependencies on new operators for _, value in copies_map.items(): operator = schemas.OperatorUpdate(dependencies=[ copies_map[d]["copy_uuid"] for d in value["dependencies"] ], ) self.operator_controller.update_operator( project_id=project_id, experiment_id=experiment.uuid, operator_id=value["copy_uuid"], operator=operator) return experiment