Beispiel #1
0
    def create_experiment(self, experiment: schemas.ExperimentCreate,
                          project_id: str):
        """
        Creates a new experiment in our database and adjusts the position of others.
        The new experiment is added to the end of the experiment list.

        Parameters
        ----------
        experiment: projects.schemas.experiment.ExperimentCreate
        project_id : str

        Returns
        -------
        projects.schemas.experiment.Experiment

        Raises
        ------
        NotFound
            When project_id does not exist.
        BadRequest
            When name is not a str instance.
            When name is already the name of another experiment.
        """
        if not isinstance(experiment.name, str):
            raise BadRequest("name is required")

        stored_experiment = self.session.query(models.Experiment) \
            .filter(models.Experiment.project_id == project_id) \
            .filter_by(name=experiment.name) \
            .first()
        if stored_experiment:
            raise BadRequest("an experiment with that name already exists")

        if experiment.copy_from:
            experiment = self.copy_experiment(experiment=experiment,
                                              project_id=project_id)
        else:
            experiment = models.Experiment(uuid=uuid_alpha(),
                                           name=experiment.name,
                                           project_id=project_id)
            self.session.add(experiment)
            self.session.flush()

        self.fix_positions(project_id=project_id,
                           experiment_id=experiment.uuid,
                           new_position=sys.maxsize)  # will add to end of list

        self.session.commit()
        self.session.refresh(experiment)

        return schemas.Experiment.from_orm(experiment)
Beispiel #2
0
    def raise_if_dependencies_are_invalid(self,
                                          project_id: str,
                                          dependencies: List,
                                          experiment_id: Optional[str] = None,
                                          deployment_id: Optional[str] = None,
                                          operator_id: Optional[str] = None):
        """
        Raises an exception if the specified dependencies are not valid.
        The invalid dependencies are duplicate elements on the dependencies,
        dependencies including the actual operator_id, dependencie's operator
        doesn't exist and ciclycal dependencies.

        Parameters
        ----------
        project_id : str
        dependencies : list
        experiment_id : str or None
        deployment_id : str or None
        operator_id : str or None

        Raises
        ------
        BadRequest
            When any dependency does not exist.
            When dependencies are cyclic.
        """
        if not isinstance(dependencies, list):
            raise BadRequest(DEPENDENCIES_EXCEPTION_MSG)

        # check if dependencies has duplicates
        if len(dependencies) != len(set(dependencies)):
            raise BadRequest(DEPENDENCIES_EXCEPTION_MSG)

        for d in dependencies:
            try:
                self.raise_if_operator_does_not_exist(d)
                if d == operator_id:
                    raise BadRequest(DEPENDENCIES_EXCEPTION_MSG)
            except NotFound:
                raise BadRequest(DEPENDENCIES_EXCEPTION_MSG)

        self.raise_if_has_cycles(
            project_id=project_id,
            experiment_id=experiment_id,
            deployment_id=deployment_id,
            operator_id=operator_id,
            dependencies=dependencies,
        )
Beispiel #3
0
 def task_category_is_not_none(self, task_cat):
     if task_cat.category is not None and task_cat.category not in VALID_CATEGORIES:
         valid_str = ",".join(VALID_CATEGORIES)
         raise BadRequest(
             code="InvalidCategory",
             message=f"Invalid category. Choose any of {valid_str}",
         )
Beispiel #4
0
    def delete_multiple_projects(self, project_ids):
        """
        Delete multiple projects.

        Parameters
        ----------
        project_ids : str
            The list of project ids.

        Returns
        -------
        projects.schemas.message.Message

        Raises
        ------
        BadRequest
            When any project_id does not exist.
        """
        total_elements = len(project_ids)
        if total_elements < 1:
            raise BadRequest(code="MissingRequiredProjectId",
                             message="inform at least one project")

        projects = (self.session.query(models.Project).filter(
            models.Project.uuid.in_(project_ids)).filter_by(
                tenant=self.kubeflow_userid).all())

        for project in projects:
            self.session.delete(project)

        self.session.commit()

        return schemas.Message(message="Successfully removed projects")
Beispiel #5
0
    def list_tasks(self,
                   page: Optional[int] = None,
                   page_size: Optional[int] = None,
                   order_by: str = Optional[str],
                   **filters):
        """
        Lists tasks. Supports pagination, and sorting.

        Parameters
        ----------
        page : int
            The page number. First page is 1.
        page_size : int
            The page size.
        order_by : str
            Order by instruction. Format is "column [asc|desc]".
        **filters : dict

        Returns
        -------
        projects.schemas.task.TaskList

        Raises
        ------
        BadRequest
            When order_by is invalid.
        """
        query = self.session.query(models.Task)
        query_total = self.session.query(func.count(models.Task.uuid))

        for column, value in filters.items():
            query = query.filter(getattr(models.Task, column).ilike(f"%{value}%"))
            query_total = query_total.filter(getattr(models.Task, column).ilike(f"%{value}%"))

        total = query_total.scalar()

        # Default sort is name in ascending order
        if not order_by:
            order_by = "name asc"

        # Sorts records
        try:
            (column, sort) = order_by.replace('+', ' ').strip().split()
            assert sort.lower() in ["asc", "desc"]
            assert column in models.Task.__table__.columns.keys()
        except (AssertionError, ValueError):
            raise BadRequest("Invalid order argument")

        if sort.lower() == "asc":
            query = query.order_by(asc(getattr(models.Task, column)))
        elif sort.lower() == "desc":
            query = query.order_by(desc(getattr(models.Task, column)))

        if page and page_size:
            # Applies pagination
            query = query.limit(page_size).offset((page - 1) * page_size)

        tasks = query.all()
        container_state = get_notebook_state()
        return schemas.TaskList.from_orm(container_state, tasks, total)
Beispiel #6
0
def retry_run(run_id, experiment_id):
    """
    Retry a run in Kubeflow Pipelines.

    Parameters
    ----------
    run_id : str
    experiment_id : str

    Returns
    -------
    dict
        Retry response confirmation.

    Raises
    ------
    ApiException
    BadRequest
    """
    if run_id == "latest":
        run_id = get_latest_run_id(experiment_id)

    kfp_run = kfp_client().get_run(
        run_id=run_id,
    )

    if kfp_run.run.status == "Failed":
        kfp_client().runs.retry_run(run_id=kfp_run.run.id)
    else:
        raise BadRequest("Not a failed run")

    return {"message": "Run re-initiated successfully"}
Beispiel #7
0
    def delete_multiple_templates(self, template_ids):
        """
        Delete multiple templates.

        Parameters
        ----------
        template_ids : str
            The list of template ids.

        Returns
        -------
        projects.schemas.message.Message

        Raises
        ------
        BadRequest
            When any template_id does not exist.
        """
        total_elements = len(template_ids)
        if total_elements < 1:
            raise BadRequest(code="MissingRequiredTemplateId",
                             message="inform at least one template")

        templates = (self.session.query(models.Template).filter(
            models.Template.uuid.in_(template_ids)).filter_by(
                tenant=self.kubeflow_userid).all())

        for template in templates:
            self.session.delete(template)

        self.session.commit()

        return schemas.Message(message="Successfully removed templates")
Beispiel #8
0
    def create_run(self, project_id: str, deployment_id: str):
        """
        Starts a new run in Kubeflow Pipelines.

        Parameters
        ----------
        project_id : str
        deployment_id : str

        Returns
        -------
        projects.schemas.run.Run

        Raises
        ------
        NotFound
            When any of project_id, or deployment_id does not exist.
        """
        deployment = self.session.query(models.Deployment).get(deployment_id)

        if deployment is None:
            raise NotFound("The specified deployment does not exist")

        # Removes operators that don't have a deployment_notebook (eg. Upload de Dados).
        # Then, fix dependencies in their children.
        operators = self.remove_non_deployable_operators(deployment.operators)

        try:
            run = kfp_runs.start_run(operators=operators,
                                     project_id=deployment.project_id,
                                     experiment_id=deployment.experiment_id,
                                     deployment_id=deployment_id,
                                     deployment_name=deployment.name)
        except ValueError as e:
            raise BadRequest(str(e))

        # Deploy monitoring tasks
        monitorings = self.monitoring_controller.list_monitorings(project_id=project_id,
                                                                  deployment_id=deployment_id).monitorings
        if monitorings:
            for monitoring in monitorings:
                self.background_tasks.add_task(
                    deploy_monitoring,
                    deployment_id=deployment_id,
                    experiment_id=deployment.experiment_id,
                    run_id=run["uuid"],
                    task_id=monitoring.task_id,
                    monitoring_id=monitoring.uuid
                )

        update_data = {"status": "Pending"}
        self.session.query(models.Operator) \
            .filter_by(deployment_id=deployment_id) \
            .update(update_data)
        self.session.commit()

        run["deploymentId"] = deployment_id
        return run
Beispiel #9
0
    def create_deployments_from_experiments(self, experiments: list,
                                            project_id: str):
        """
        Create deployments from given experiments.

        Parameters
        ----------
        experiments : list
            List of experiments uuids to copy.
        project_id : str

        Returns
        -------
        list
            A list of projects.models.deployment.Deployment.

        Raises
        ------
        BadRequest
            When any of the experiments does not exist.
        """
        experiments_dict = {
            e.uuid: e
            for e in self.session.query(models.Experiment).filter_by(
                project_id=project_id)
        }

        for experiment_id in experiments:
            if experiment_id not in experiments_dict:
                raise BadRequest(code="InvalidExperiments",
                                 message="some experiments do not exist")

        deployments = []

        for experiment_id in experiments:
            experiment = experiments_dict[experiment_id]
            deployment = models.Deployment(uuid=uuid_alpha(),
                                           experiment_id=experiment_id,
                                           name=experiment.name,
                                           project_id=project_id,
                                           created_at=now(),
                                           updated_at=now())
            self.session.add(deployment)
            self.session.flush()

            deployments.append(deployment)

            self.copy_operators(deployment_id=deployment.uuid,
                                stored_operators=experiment.operators)

            self.fix_positions(
                project_id=project_id,
                deployment_id=deployment.uuid,
                new_position=sys.maxsize,
            )  # will add to end of list

        return deployments
Beispiel #10
0
    def update_experiment_from_template(
        self, experiment: schemas.ExperimentUpdate, experiment_id: str
    ):
        """
        Recreates the operators of experiment using a template.

        Parameters
        ----------
        experiment : projects.schemas.experiment.ExperimentUpdate
        experiment_id : str
        """
        template = self.session.query(models.Template).get(experiment.template_id)

        if template is None:
            raise BadRequest(
                code="InvalidTemplateId",
                message="The specified template does not exist",
            )

        # remove operators
        self.session.query(models.Operator).filter(
            models.Operator.experiment_id == experiment_id
        ).delete()

        # save the operators created to get the created_uuid to use on dependencies
        operators_created = []
        for task in template.tasks:
            dependencies = []
            task_dependencies = task["dependencies"]
            if len(task_dependencies) > 0:
                for d in task_dependencies:
                    op_created = next(
                        (o for o in operators_created if o["uuid"] == d), None
                    )
                    dependencies.append(op_created["created_uuid"])

            operator_id = uuid_alpha()
            objects = [
                models.Operator(
                    uuid=operator_id,
                    experiment_id=experiment_id,
                    task_id=task["task_id"],
                    dependencies=dependencies,
                    position_x=task["position_x"],
                    position_y=task["position_y"],
                )
            ]
            self.session.bulk_save_objects(objects)
            task["created_uuid"] = operator_id
            operators_created.append(task)

        self.session.commit()

        experiment = self.session.query(models.Experiment).get(experiment_id)

        return schemas.Experiment.from_orm(experiment)
Beispiel #11
0
    def raise_if_parameters_are_invalid(self, parameters: List[Dict]):
        """
        Raises an exception if the specified parameters are not valid.

        Parameters
        ----------
        parameters : dict

        Raises
        ------
        BadRequest
            When any parameter value is not str, int, float, bool, list, or dict.
        """
        if not isinstance(parameters, dict):
            raise BadRequest(PARAMETERS_EXCEPTION_MSG)

        for key, value in parameters.items():
            if value is not None and not isinstance(
                    value, (str, int, float, bool, list, dict)):
                raise BadRequest(PARAMETERS_EXCEPTION_MSG)
Beispiel #12
0
    def update_deployment(self, deployment: schemas.DeploymentUpdate,
                          project_id: str, deployment_id: str):
        """
        Updates a deployment in our database and adjusts the position of others.

        Parameters
        ----------
        deployment : projects.schemas.deployment.DeploymentUpdate
        project_id : str
        deployment_id : str

        Returns
        -------
        projects.schemas.deployment.Deployment

        Raises
        ------
        NotFound
            When deployment_id does not exist.
        BadRequest
            When name is already the name of another deployment.
        """
        self.raise_if_deployment_does_not_exist(deployment_id)

        stored_deployment = (self.session.query(
            models.deployment.Deployment).filter(
                models.deployment.Deployment.project_id ==
                project_id).filter_by(name=deployment.name).first())
        if stored_deployment and stored_deployment.uuid != deployment_id:
            raise BadRequest(
                code="DeploymentNameExists",
                message="a deployment with that name already exists",
            )

        update_data = deployment.dict(exclude_unset=True)
        update_data.update({"updated_at": datetime.utcnow()})

        self.session.query(models.Deployment).filter_by(
            uuid=deployment_id).update(update_data)

        if deployment.position:
            self.fix_positions(
                project_id=project_id,
                deployment_id=deployment_id,
                new_position=deployment.position,
            )

        self.session.commit()

        deployment = self.session.query(models.Deployment).get(deployment_id)

        return schemas.Deployment.from_orm(deployment)
Beispiel #13
0
    def update_experiment(self, experiment: schemas.ExperimentUpdate,
                          project_id: str, experiment_id: str):
        """
        Updates an experiment in our database and adjusts the position of others.

        Parameters
        ----------
        experiment: projects.schemas.experiment.ExperimentUpdate
        project_id : str
        experiment_id : str

        Returns
        -------
        projects.schemas.experiment.Experiment

        Raises
        ------
        NotFound
            When experiment_id does not exist.
        BadRequest
            When name is already the name of another experiment.
            When `experiment.template_id` is informed but it does not exist.
        """
        self.raise_if_experiment_does_not_exist(experiment_id)

        stored_experiment = self.session.query(models.Experiment) \
            .filter(models.Experiment.project_id == project_id) \
            .filter_by(name=experiment.name) \
            .first()
        if stored_experiment and stored_experiment.uuid != experiment_id:
            raise BadRequest("an experiment with that name already exists")

        if experiment.template_id:
            return self.update_experiment_from_template(
                experiment=experiment, experiment_id=experiment_id)

        update_data = experiment.dict(exclude_unset=True)
        update_data.update({"updated_at": datetime.utcnow()})

        self.session.query(models.Experiment).filter_by(
            uuid=experiment_id).update(update_data)

        if experiment.position:
            self.fix_positions(project_id=project_id,
                               experiment_id=experiment_id,
                               new_position=experiment.position)

        self.session.commit()

        experiment = self.session.query(models.Experiment).get(experiment_id)

        return schemas.Experiment.from_orm(experiment)
Beispiel #14
0
    def create_monitoring(self, monitoring: schemas.MonitoringCreate,
                          deployment_id: str):
        """
        Creates a new monitoring in our database.

        Parameters
        ----------
        monitoring : projects.schemas.monitoring.MonitoringCreate
        project_id : str
        deployment_id : str

        Returns
        -------
        projects.schemas.monitoring.Monitoring
        """
        task_exists = (self.session.query(
            models.Task.uuid).filter_by(uuid=monitoring.task_id).scalar()
                       is not None)

        if not task_exists:
            raise BadRequest(
                code="InvalidTaskId",
                message="The specified task does not exist",
            )

        monitoring = models.Monitoring(
            uuid=uuid_alpha(),
            deployment_id=deployment_id,
            task_id=monitoring.task_id,
        )
        self.session.add(monitoring)
        self.session.commit()
        self.session.refresh(monitoring)

        deployment = self.session.query(models.Deployment).get(deployment_id)
        run = self.run_controller.get_run(deployment_id)

        # Uses empty run_id if a deployment does not have a run
        if not run:
            run = {"runId": ""}

        # Deploy the new monitoring
        self.background_tasks.add_task(
            deploy_monitoring,
            deployment_id=deployment_id,
            experiment_id=deployment.experiment_id,
            run_id=run["runId"],
            task_id=monitoring.task_id,
            monitoring_id=monitoring.uuid,
        )

        return schemas.Monitoring.from_orm(monitoring)
Beispiel #15
0
    def create_project(self, project: schemas.ProjectCreate):
        """
        Creates a new project in our database.

        Parameters
        ----------
        project: projects.schemas.project.ProjectCreate

        Returns
        -------
        project: projects.schemas.project.Project

        Raises
        ------
        BadRequest
            When the project attributes are invalid.
        """
        if not isinstance(project.name, str):
            raise BadRequest("name is required")

        store_project = self.session.query(models.Project) \
            .filter_by(name=project.name) \
            .first()
        if store_project:
            raise BadRequest("a project with that name already exists")

        project = models.Project(uuid=uuid_alpha(), name=project.name, description=project.description)
        self.session.add(project)
        self.session.flush()

        experiment = schemas.ExperimentCreate(name="Experimento 1")
        self.experiment_controller.create_experiment(experiment=experiment, project_id=project.uuid)

        self.session.commit()
        self.session.refresh(project)

        return schemas.Project.from_orm(project)
Beispiel #16
0
async def handle_post_prediction(
        project_id: str,
        deployment_id: str,
        request: Request,
        background_tasks: BackgroundTasks,
        file: Optional[UploadFile] = File(None),
        session: Session = Depends(session_scope),
        kubeflow_userid: Optional[str] = Header("anonymous"),
):
    """
    Handles POST request to /.

    Parameters
    -------
    project_id : str
    deployment_id : str
    request : starlette.requests.Request
    file : starlette.datastructures.UploadFile
    session : sqlalchemy.orm.session.Session
    kubeflow_userid : fastapi.Header

    Returns
    -------
    Prediction: projects.schemas.prediction.Prediction
    """
    project_controller = ProjectController(session,
                                           kubeflow_userid=kubeflow_userid)
    project_controller.raise_if_project_does_not_exist(project_id)

    deployment_controller = DeploymentController(session)
    deployment_controller.raise_if_deployment_does_not_exist(deployment_id)

    # at this endpoint, we can accept both form-data and json as the request content-type
    kwargs = {}
    if file is not None:
        kwargs = {"upload_file": file}
    else:
        try:
            kwargs = await request.json()
        except JSONDecodeError:
            raise BadRequest(
                code="MissingRequiredFormDataOrJson",
                message="either form-data or json is required",
            )

    prediction_controller = PredictionController(session, background_tasks)
    prediction = prediction_controller.create_prediction(
        deployment_id=deployment_id, **kwargs)
    return prediction
Beispiel #17
0
    def copy_deployment(self, deployment_id: str, name: str, project_id: str):
        """
        Makes a copy of a deployment in our database.

        Paramenters
        -----------
        deployment_id : str
        name : str
        project_id : str

        Returns
        -------
        list
            A list of projects.models.deployment.Deployment.

        Raises
        ------
        BadRequest
            When deployment_id does not exist.
        """
        stored_deployment = self.session.query(
            models.Deployment).get(deployment_id)

        if stored_deployment is None:
            raise BadRequest(code="InvalidDeploymentId",
                             message="source deployment does not exist")

        deployment = models.Deployment(
            uuid=uuid_alpha(),
            experiment_id=stored_deployment.experiment_id,
            name=name,
            project_id=project_id,
            created_at=now(),
            updated_at=now())

        self.session.add(deployment)
        self.session.flush()

        self.copy_operators(deployment_id=deployment.uuid,
                            stored_operators=stored_deployment.operators)

        self.fix_positions(
            project_id=project_id,
            deployment_id=deployment.uuid,
            new_position=sys.maxsize,
        )  # will add to end of list

        return [deployment]
Beispiel #18
0
    def deploy_run(self, deployment):
        """
        Starts a new run in Kubeflow Pipelines.

        Parameters
        ----------
        deployment : projects.models.deployment.Deployment

        Returns
        -------
        projects.schemas.run.Run

        Raises
        ------
        NotFound
            When any of project_id, or deployment_id does not exist.
        """
        if deployment is None:
            raise NotFound(
                code="DeploymentNotFound",
                message="The specified deployment does not exist",
            )

        # Removes operators that don't have a deployment_notebook (eg. Upload de Dados).
        # Then, fix dependencies in their children.
        operators = self.remove_non_deployable_operators(deployment.operators)

        try:
            run = kfp_runs.start_run(
                operators=operators,
                project_id=deployment.project_id,
                experiment_id=deployment.experiment_id,
                deployment_id=deployment.uuid,
                deployment_name=deployment.name,
            )
        except ValueError:
            raise BadRequest(
                code="MissingRequiredOperatorId",
                message=f"Necessary at least one operator.",
            )

        # Remove the object from the operator session in order not to update the database,
        # Just need to remove the dependencies for the runs.
        for operator in deployment.operators:
            self.session.expunge(operator)

        return schemas.Run.from_orm(run)
Beispiel #19
0
    def update_comparison(
        self, comparison: schemas.ComparisonUpdate, project_id: str, comparison_id: str
    ):
        """
        Updates a comparison in our database.

        Parameters
        ----------
        comparison: projects.schemas.comparison.ComparisonUpdate
        project_id : str
        comparison_id : str

        Returns
        -------
        projects.schemas.comparison.Comparison

        Raises
        ------
        BadRequest
            When comparison attributes are invalid.
        NotFound
            When comparison_id does not exist.
        """
        self.raise_if_comparison_does_not_exist(comparison_id)

        if comparison.experiment_id:
            stored_experiment = self.session.query(models.Experiment).get(
                comparison.experiment_id
            )
            if stored_experiment is None:
                raise BadRequest(
                    code="InvalidExperimentId",
                    message="source experiment does not exist",
                )

        update_data = comparison.dict(exclude_unset=True)
        update_data.update({"updated_at": datetime.utcnow()})

        self.session.query(models.Comparison).filter_by(uuid=comparison_id).update(
            update_data
        )
        self.session.commit()

        comparison = self.session.query(models.Comparison).get(comparison_id)

        return schemas.Comparison.from_orm(comparison)
Beispiel #20
0
    def delete_multiple_projects(self, project_ids):
        """
        Delete multiple projects.

        Parameters
        ----------
        project_ids : str
            The list of project ids.

        Returns
        -------
        projects.schemas.message.Message

        Raises
        ------
        BadRequest
            When any project_id does not exist.
        """
        total_elements = len(project_ids)
        if total_elements < 1:
            raise BadRequest("inform at least one project")

        experiments = self.session.query(models.Experiment) \
            .filter(models.Experiment.project_id.in_(project_ids)) \
            .all()

        projects = self.session.query(models.Project) \
            .filter(models.Project.uuid.in_(project_ids)) \
            .all()

        for project in projects:
            self.session.delete(project)

        self.session.commit()

        for experiment in experiments:
            prefix = join("experiments", experiment.uuid)
            try:
                remove_objects(prefix=prefix)
            except Exception:
                pass

        return schemas.Message(message="Successfully removed projects")
Beispiel #21
0
    def update_project(self, project: schemas.ProjectUpdate, project_id: str):
        """
        Updates a project in our database.

        Parameters
        ----------
        project: projects.schemas.project.ProjectUpdate
        project_id: str

        Returns
        -------
        project: projects.schemas.project.Project

        Raises
        ------
        NotFound
            When project_id does not exist.
        BadRequest
            When the project attributes are invalid.
        """
        self.raise_if_project_does_not_exist(project_id)

        stored_project = (self.session.query(models.Project).filter_by(
            name=project.name).filter_by(tenant=self.kubeflow_userid).first())
        if stored_project and stored_project.uuid != project_id:
            raise BadRequest(
                code="ProjectNameExists",
                message="a project with that name already exists",
            )

        update_data = project.dict(exclude_unset=True)
        update_data.update({"updated_at": datetime.utcnow()})

        self.session.query(
            models.Project).filter_by(uuid=project_id).filter_by(
                tenant=self.kubeflow_userid).update(update_data)
        self.session.commit()

        project = (self.session.query(models.Project).filter_by(
            uuid=project_id).filter_by(tenant=self.kubeflow_userid).first())

        return schemas.Project.from_orm(project)
Beispiel #22
0
    def create_project(self, project: schemas.ProjectCreate):
        """
        Creates a new project in our database.

        Parameters
        ----------
        project: projects.schemas.project.ProjectCreate

        Returns
        -------
        project: projects.schemas.project.Project

        Raises
        ------
        BadRequest
            When the project attributes are invalid.
        """
        store_project = (self.session.query(models.Project).filter_by(
            name=project.name).filter_by(tenant=self.kubeflow_userid).first())
        if store_project:
            raise BadRequest(
                code="ProjectNameExists",
                message="a project with that name already exists",
            )

        project = models.Project(uuid=uuid_alpha(),
                                 name=project.name,
                                 description=project.description,
                                 tenant=self.kubeflow_userid,
                                 created_at=now(),
                                 updated_at=now())
        self.session.add(project)
        self.session.flush()

        experiment = schemas.ExperimentCreate(name="Experimento 1")
        self.experiment_controller.create_experiment(experiment=experiment,
                                                     project_id=project.uuid)

        self.session.commit()
        self.session.refresh(project)

        return schemas.Project.from_orm(project)
Beispiel #23
0
    def raise_if_invalid_docker_image(self, image):
        """
        Raise an error if a str does not meet the standards for a docker image name.

        Example: (username/organization)/name-of-the-image:tag

        Parameters
        ----------
        image : str or None
            The image name.

        Raises
        ------
        BadRequest
            When a given image is a invalid one.
        """
        pattern = re.compile("[a-z0-9.-]+([/]{1}[a-z0-9.-]+)+([:]{1}[a-z0-9.-]+){0,1}$")

        if image and pattern.match(image) is None:
            raise BadRequest("invalid docker image name")
Beispiel #24
0
    def update_template(self, template: schemas.TemplateUpdate,
                        template_id: str):
        """
        Updates a template in our database.

        Parameters
        ----------
        template: projects.schemas.template.TemplateUpdate
        template_id : str

        Returns
        -------
        projects.schemas.template.Template

        Raises
        ------
        NotFound
            When template_id does not exist.
        """
        self.raise_if_template_does_not_exist(template_id)

        stored_template = (self.session.query(models.Template).filter_by(
            name=template.name).filter_by(tenant=self.kubeflow_userid).first())
        if stored_template and stored_template.uuid != template_id:
            raise BadRequest(
                code="TemplateNameExists",
                message="a template with that name already exists",
            )

        update_data = template.dict(exclude_unset=True)
        update_data.update({"updated_at": datetime.utcnow()})

        self.session.query(
            models.Template).filter_by(uuid=template_id).filter_by(
                tenant=self.kubeflow_userid).update(update_data)
        self.session.commit()

        template = (self.session.query(models.Template).filter_by(
            uuid=template_id).filter_by(tenant=self.kubeflow_userid).first())

        return schemas.Template.from_orm(template)
Beispiel #25
0
    def raise_if_has_cycles(
        self,
        project_id: str,
        operator_id: str,
        dependencies: List[str],
        experiment_id: Optional[str] = None,
        deployment_id: Optional[str] = None,
    ):
        """
        Raises an exception if the dependencies of operators from experiment are cyclical.

        Parameters
        ----------
        project_id : str
        operator_id : str
        dependencies : list or None
        experiment_id : str or None
        deployment_id : str or None

        Raises
        ------
        BadRequest
            When dependencies are cyclic.
        """
        operators = (self.session.query(
            models.Operator).filter_by(experiment_id=experiment_id).filter_by(
                deployment_id=deployment_id).all())

        visited = dict.fromkeys([op.uuid for op in operators], False)
        recursion_stack = dict.fromkeys([op.uuid for op in operators], False)

        for op in operators:
            op_uuid = op.uuid
            if (visited[op_uuid] is False and
                    self.has_cycles_util(op_uuid, visited, recursion_stack,
                                         dependencies, operator_id) is True):
                raise BadRequest(code="InvalidCyclicalDependencies",
                                 message="Cyclical dependencies.")
        return False
Beispiel #26
0
from projects import models, schemas
from projects.controllers.tasks import TaskController
from projects.controllers.utils import uuid_alpha
from projects.exceptions import BadRequest, NotFound
from projects.kubernetes.kube_config import load_kube_config
from projects.agent.utils import list_resource_version
from projects.kfp import KF_PIPELINES_NAMESPACE
from projects.kfp.runs import get_latest_run_id
from projects.utils import now

from kubernetes import client
from kubernetes.watch import Watch

NOT_FOUND = NotFound(code="OperatorNotFound",
                     message="The specified operator does not exist")
INVALID_PARAMETERS = BadRequest(
    code="InvalidParameters", message="The specified parameters are not valid")
INVALID_DEPENDENCIES = BadRequest(
    code="InvalidDependencies",
    message="The specified dependencies are not valid.")


class OperatorController:
    def __init__(self, session):
        self.session = session
        self.task_controller = TaskController(session)

    def raise_if_operator_does_not_exist(self, operator_id: str):
        """
        Raises an exception if the specified operator does not exist.

        Parameters
Beispiel #27
0
    def create_template(self, template: schemas.TemplateCreate):
        """
        Creates a new template in our database.

        Parameters
        ----------
        template : projects.schemas.template.TemplateCreate

        Returns
        -------
        projects.schemas.template.Template

        Raises
        ------
        BadRequest
            When the project attributes are invalid.
        """
        if not isinstance(template.name, str):
            raise BadRequest("name is required")

        if template.experiment_id:

            exists = self.session.query(models.Experiment.uuid) \
                .filter_by(uuid=template.experiment_id) \
                .scalar() is not None

            if not exists:
                raise NotFound("The specified experiment does not exist")

            operators = self.session.query(models.Operator) \
                .filter_by(experiment_id=template.experiment_id) \
                .all()
        elif template.deployment_id:

            exists = self.session.query(models.Deployment.uuid) \
                .filter_by(uuid=template.deployment_id) \
                .scalar() is not None

            if not exists:
                raise NotFound("The specified deployment does not exist")

            operators = self.session.query(models.Operator) \
                .filter_by(deployment_id=template.deployment_id) \
                .all()
        else:
            raise BadRequest(
                "experimentId or deploymentId needed to create template.")

        stored_template = self.session.query(models.Template) \
            .filter_by(name=template.name) \
            .first()
        if stored_template:
            raise BadRequest("a template with that name already exists")

        # order operators by dependencies
        operators_ordered = []
        while len(operators) != len(operators_ordered):
            for operator in operators:
                self.order_operators_by_dependencies(operators_ordered,
                                                     operator)

        # JSON array order of elements are preserved, so there is no need to save positions
        tasks = []
        for uuid in operators_ordered:
            operator = next((op for op in operators if op.uuid == uuid), None)
            task = {
                "uuid": operator.uuid,
                "task_id": operator.task_id,
                "dependencies": operator.dependencies,
                "position_x": operator.position_x,
                "position_y": operator.position_y,
            }
            tasks.append(task)

        template = models.Template(uuid=uuid_alpha(),
                                   name=template.name,
                                   tasks=tasks)
        self.session.add(template)
        self.session.commit()
        self.session.refresh(template)

        return schemas.Template.from_orm(template)
Beispiel #28
0
    def list_projects(
        self,
        page: Optional[int] = 1,
        page_size: Optional[int] = 10,
        order_by: Optional[str] = None,
        **filters,
    ):
        """
        Lists projects. Supports pagination, and sorting.

        Parameters
        ----------
        page : int
            The page number. First page is 1.
        page_size : int
            The page size. Default value is 10.
        order_by : str
            Order by instruction. Format is "column [asc|desc]".
        **filters : dict

        Returns
        -------
        projects.schemas.project.ProjectList

        Raises
        ------
        BadRequest
            When order_by is invalid.
        """
        query = self.session.query(
            models.Project).filter_by(tenant=self.kubeflow_userid)
        query_total = self.session.query(func.count(
            models.Project.uuid)).filter_by(tenant=self.kubeflow_userid)

        for column, value in filters.items():
            query = query.filter(
                getattr(models.Project,
                        column).ilike(f"%{value}%").collate("utf8mb4_bin"))
            query_total = query_total.filter(
                getattr(models.Project,
                        column).ilike(f"%{value}%").collate("utf8mb4_bin"))

        # BUG
        # query_total.limit(page_size) didn't work. I'm not sure why...
        # This solution uses an unoptimized query, and should be improved.
        total = min(page_size, query_total.scalar())

        # Default sort is name in ascending order
        if not order_by:
            order_by = "name asc"

        # Sorts records
        try:
            (column, sort) = order_by.strip().split()
            assert sort.lower() in ["asc", "desc"]
            assert column in models.Project.__table__.columns.keys()
        except (AssertionError, ValueError):
            raise BadRequest(code="InvalidOrderBy",
                             message="Invalid order argument")

        if sort.lower() == "asc":
            query = query.order_by(asc(getattr(models.Project, column)))
        elif sort.lower() == "desc":
            query = query.order_by(desc(getattr(models.Project, column)))

        # Applies pagination
        query = query.limit(page_size).offset((page - 1) * page_size)
        projects = query.all()

        return schemas.ProjectList.from_orm(projects, total)
Beispiel #29
0
    def create_operator(
        self,
        operator: schemas.OperatorCreate,
        project_id: str,
        experiment_id: Optional[str] = None,
        deployment_id: Optional[str] = None,
    ):
        """
        Creates a new operator in our database.

        Parameters
        ----------
        operator: projects.schemas.operator.OperatorCreate
        project_id : str
        experiment_id : str or None
        deployment_id : str or None

        Returns
        -------
        projects.schemas.operator.Operator

        Raises
        ------
        BadRequest
            When the operator attributes are invalid.
        """
        if not isinstance(operator.task_id, str):
            raise BadRequest(code="MissingRequiredTaskId",
                             message="taskId is required")

        try:
            self.task_controller.raise_if_task_does_not_exist(operator.task_id)
        except NotFound:
            raise BadRequest(code="InvalidTaskId",
                             message="source task does not exist")

        if operator.dependencies is None:
            operator.dependencies = []

        if experiment_id:
            self.raise_if_dependencies_are_invalid(
                project_id=project_id,
                experiment_id=experiment_id,
                deployment_id=deployment_id,
                dependencies=operator.dependencies,
            )

        if experiment_id and deployment_id:
            raise BadRequest(
                code="InvalidOperatorRequestBody",
                message=
                "Operator cannot contain an experiment and a deployment simultaneously",
            )

        if operator.parameters is None:
            operator.parameters = {}

        self.raise_if_parameters_are_invalid(operator.parameters)

        operator = models.Operator(uuid=uuid_alpha(),
                                   name=operator.name,
                                   experiment_id=experiment_id,
                                   deployment_id=deployment_id,
                                   task_id=operator.task_id,
                                   dependencies=operator.dependencies,
                                   status=operator.status,
                                   parameters=operator.parameters,
                                   position_x=operator.position_x,
                                   position_y=operator.position_y,
                                   created_at=now(),
                                   updated_at=now())
        self.session.add(operator)
        self.session.commit()
        self.session.refresh(operator)

        return schemas.Operator.from_orm(operator)
Beispiel #30
0
    def copy_experiment(self, experiment: schemas.ExperimentCreate,
                        project_id: str):
        """
        Makes a copy of an experiment in our database.

        Parameters
        ----------
        task: projects.schemas.experiment.ExperimentCreate
        project_id: str

        Returns
        -------
        projects.schemas.experiment.Experiment

        Raises
        ------
        BadRequest
            When copy_from does not exist.
        """
        stored_experiment = self.session.query(models.Experiment).get(
            experiment.copy_from)

        if stored_experiment is None:
            raise BadRequest("source experiment does not exist")

        experiment = models.Experiment(uuid=uuid_alpha(),
                                       name=experiment.name,
                                       project_id=project_id)
        self.session.add(experiment)
        self.session.flush()

        # Creates a dict to map source operator_id to its copy operator_id.
        # This map will be used to build the dependencies using new operator_ids
        copies_map = {}

        for stored_operator in stored_experiment.operators:
            operator = schemas.OperatorCreate(
                task_id=stored_operator.task_id,
                experiment_id=experiment.uuid,
                parameters=stored_operator.parameters,
                position_x=stored_operator.position_x,
                position_y=stored_operator.position_y,
            )
            operator = self.operator_controller.create_operator(
                operator=operator,
                project_id=project_id,
                experiment_id=experiment.uuid)

            copies_map[stored_operator.uuid] = {
                "copy_uuid": operator.uuid,
                "dependencies": stored_operator.dependencies,
            }

        # sets dependencies on new operators
        for _, value in copies_map.items():
            operator = schemas.OperatorUpdate(dependencies=[
                copies_map[d]["copy_uuid"] for d in value["dependencies"]
            ], )
            self.operator_controller.update_operator(
                project_id=project_id,
                experiment_id=experiment.uuid,
                operator_id=value["copy_uuid"],
                operator=operator)

        return experiment