Exemple #1
0
    def _collateral(self, project_uuid: str, pipeline_uuid: str,
                    pipeline_name: str, **kwargs):
        pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid)
        pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid)

        os.makedirs(pipeline_dir, exist_ok=True)

        # Generate clean pipeline.json.
        pipeline_json = {
            "name": pipeline_name,
            "version": "1.0.0",
            "uuid": pipeline_uuid,
            "settings": {
                "auto_eviction": False,
                "data_passing_memory_size": "1GB",
            },
            "steps": {},
            "parameters": {},
        }

        resp = requests.post(
            f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/pipelines/',
            json={
                "project_uuid": project_uuid,
                "uuid": pipeline_uuid
            },
        )
        if resp.status_code != 201:
            raise Exception("Orchest-api pipeline creation failed.")

        with open(pipeline_json_path, "w") as pipeline_json_file:
            json.dump(pipeline_json,
                      pipeline_json_file,
                      indent=4,
                      sort_keys=True)
Exemple #2
0
    def pipelines_json(project_uuid, pipeline_uuid):

        pipeline_json_path = get_pipeline_path(
            pipeline_uuid,
            project_uuid,
            request.args.get("experiment_uuid"),
            request.args.get("pipeline_run_uuid"),
        )

        if request.method == "POST":

            pipeline_directory = get_pipeline_directory(
                pipeline_uuid,
                project_uuid,
                request.args.get("experiment_uuid"),
                request.args.get("pipeline_run_uuid"),
            )

            # parse JSON
            pipeline_json = json.loads(request.form.get("pipeline_json"))

            # first create all files part of pipeline_json definition
            # TODO: consider removing other files (no way to do this reliably,
            # special case might be rename)
            create_pipeline_files(pipeline_json, pipeline_directory,
                                  project_uuid)

            # side effect: for each Notebook in de pipeline.json set the correct kernel
            pipeline_set_notebook_kernels(pipeline_json, pipeline_directory,
                                          project_uuid)

            with open(pipeline_json_path, "w") as json_file:
                json_file.write(json.dumps(pipeline_json, indent=4))

            # Analytics call
            send_anonymized_pipeline_definition(app, pipeline_json)

            return jsonify({"message": "Successfully saved pipeline."})

        elif request.method == "GET":

            if not os.path.isfile(pipeline_json_path):
                return (
                    jsonify({
                        "success":
                        False,
                        "reason":
                        ".orchest file doesn't exist at location %s" %
                        pipeline_json_path,
                    }),
                    404,
                )
            else:
                with open(pipeline_json_path) as json_file:
                    return jsonify({
                        "success": True,
                        "pipeline_json": json_file.read()
                    })

            return ""
Exemple #3
0
    def _collateral(
        self,
        new_uuid: bool,
        project_uuid: str,
        pipeline_uuid: str,
        pipeline_path: str,
        pipeline_json: str,
    ):
        resp = requests.post(
            f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/pipelines/',
            json={
                "project_uuid": project_uuid,
                "uuid": pipeline_uuid
            },
        )
        if resp.status_code != 201:
            raise Exception("Orchest-api pipeline creation failed.")

        if new_uuid:
            pipeline_json_path = get_pipeline_path(None,
                                                   project_uuid,
                                                   pipeline_path=pipeline_path)

            with open(pipeline_json_path, "w") as json_file:
                pipeline_json["uuid"] = pipeline_uuid
                json.dump(pipeline_json, json_file, indent=4, sort_keys=True)
Exemple #4
0
    def _collateral(self, project_uuid: str, pipeline_uuid: str,
                    pipeline_name: str, **kwargs):
        pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid)
        pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid)

        os.makedirs(pipeline_dir, exist_ok=True)

        # Generate clean pipeline.json.
        pipeline_json = {
            "name": pipeline_name,
            "version": "1.0.0",
            "uuid": pipeline_uuid,
            "settings": {
                "auto_eviction": False,
                "data_passing_memory_size": "1GB",
            },
            "steps": {},
            "parameters": {},
        }

        with open(pipeline_json_path, "w") as pipeline_json_file:
            json.dump(pipeline_json,
                      pipeline_json_file,
                      indent=4,
                      sort_keys=True)
Exemple #5
0
    def pipelines_json(project_uuid, pipeline_uuid):

        pipeline_json_path = get_pipeline_path(
            pipeline_uuid,
            project_uuid,
            request.args.get("job_uuid"),
            request.args.get("pipeline_run_uuid"),
        )

        if request.method == "POST":

            pipeline_directory = get_pipeline_directory(
                pipeline_uuid,
                project_uuid,
                request.args.get("job_uuid"),
                request.args.get("pipeline_run_uuid"),
            )

            # Parse JSON.
            pipeline_json = json.loads(request.form.get("pipeline_json"))

            # Side effect: for each Notebook in de pipeline.json set the
            # correct kernel.
            pipeline_set_notebook_kernels(
                pipeline_json, pipeline_directory, project_uuid
            )

            with open(pipeline_json_path, "w") as json_file:
                json.dump(pipeline_json, json_file, indent=4, sort_keys=True)

            # Analytics call.
            send_anonymized_pipeline_definition(app, pipeline_json)

            return jsonify({"message": "Successfully saved pipeline."})

        elif request.method == "GET":

            if not os.path.isfile(pipeline_json_path):
                return (
                    jsonify(
                        {
                            "success": False,
                            "reason": ".orchest file doesn't exist at location %s"
                            % pipeline_json_path,
                        }
                    ),
                    404,
                )
            else:
                with open(pipeline_json_path, "r") as json_file:
                    pipeline_json = json.load(json_file)

                # json.dumps because the front end expects it as a
                # string.
                return jsonify(
                    {"success": True, "pipeline_json": json.dumps(pipeline_json)}
                )

            return ""
Exemple #6
0
    def file_viewer(project_uuid, pipeline_uuid, step_uuid):

        experiment_uuid = request.args.get("experiment_uuid")
        pipeline_run_uuid = request.args.get("pipeline_run_uuid")

        pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid,
                                               experiment_uuid,
                                               pipeline_run_uuid)
        pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid,
                                              experiment_uuid,
                                              pipeline_run_uuid)

        if os.path.isfile(pipeline_json_path):
            with open(pipeline_json_path, "r") as json_file:
                pipeline_json = json.load(json_file)

            try:
                file_path = os.path.join(
                    pipeline_dir,
                    pipeline_json["steps"][step_uuid]["file_path"])
                filename = pipeline_json["steps"][step_uuid]["file_path"]
                step_title = pipeline_json["steps"][step_uuid]["title"]
            except Exception as e:
                logging.info(e)
                return return_404("Invalid JSON for pipeline %s error: %e" %
                                  (pipeline_json_path, e))
        else:
            return return_404("Could not find pipeline.json for pipeline %s" %
                              pipeline_json_path)

        file_ext = file_path.split(".")[-1]
        file_content = ""

        if file_ext == "ipynb":
            if os.path.isfile(file_path):
                try:

                    html_exporter = HTMLExporter()
                    (file_content, _) = html_exporter.from_filename(file_path)

                except IOError as error:
                    logging.info("Error opening notebook file %s error: %s" %
                                 (file_path, error))
                    return return_404("Could not find notebook file %s" %
                                      file_path)
        else:
            try:
                with open(file_path) as file:
                    file_content = file.read()
            except (IOError, Exception) as e:
                return jsonify({"message": "Could not read file."}), 500

        return jsonify({
            "ext": file_ext,
            "content": file_content,
            "step_title": step_title,
            "filename": filename,
        })
Exemple #7
0
    def pipelines_delete(project_uuid, pipeline_uuid):

        pipeline = (Pipeline.query.filter(
            Pipeline.uuid == pipeline_uuid).filter(
                Pipeline.project_uuid == project_uuid).one_or_none())
        if pipeline is not None:
            pipeline_json_path = get_pipeline_path(pipeline.uuid, project_uuid)
            os.remove(pipeline_json_path)
            cleanup_pipeline_from_orchest(pipeline)

            return jsonify({"success": True})
        else:
            return jsonify({"message": "Pipeline could not be found."}), 404
Exemple #8
0
    def _transaction(self, project_uuid: str, pipeline_uuid: str):
        """Remove a pipeline from the db"""
        # Necessary because get_pipeline_path is going to query the db
        # entry, but the db entry does not exist anymore because it has
        # been deleted.
        pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid)

        # Will delete cascade job -> pipeline run.
        Pipeline.query.filter_by(project_uuid=project_uuid,
                                 uuid=pipeline_uuid).delete()

        self.collateral_kwargs["project_uuid"] = project_uuid
        self.collateral_kwargs["pipeline_uuid"] = pipeline_uuid
        self.collateral_kwargs["pipeline_json_path"] = pipeline_json_path
Exemple #9
0
    def _revert(self):
        project_uuid = self.collateral_kwargs["project_uuid"]
        pipeline_uuid = self.collateral_kwargs["pipeline_uuid"]

        old_path = get_pipeline_path(
            None,
            project_uuid,
            pipeline_path=self.collateral_kwargs["old_path"])

        # Move it back if necessary. This avoids the pipeline being
        # discovered as a new one.
        if self.collateral_kwargs.get("moved", False):
            new_path = get_pipeline_path(
                None,
                project_uuid,
                pipeline_path=self.collateral_kwargs["new_path"])
            try:
                os.rename(new_path, old_path)
            except Exception as e:
                current_app.logger.error(
                    f"Error while reverting pipeline move: {e}")

        # Restore the original pipeline step relative paths.
        pp_bk = self.collateral_kwargs.get("pipeline_def_backup")
        if pp_bk is not None:
            with open(old_path, "w") as json_file:
                json.dump(pp_bk, json_file, indent=4, sort_keys=True)

        Pipeline.query.filter_by(
            project_uuid=project_uuid,
            uuid=pipeline_uuid,
        ).update({
            "status": "READY",
            "path": self.collateral_kwargs["old_path"]
        })
        db.session.commit()
Exemple #10
0
 def _collateral(
     self,
     new_uuid: bool,
     project_uuid: str,
     pipeline_uuid: str,
     pipeline_path: str,
     pipeline_json: str,
 ):
     if new_uuid:
         pipeline_json_path = get_pipeline_path(None,
                                                project_uuid,
                                                pipeline_path=pipeline_path)
         with open(pipeline_json_path, "w") as json_file:
             pipeline_json["uuid"] = pipeline_uuid
             json.dump(pipeline_json, json_file, indent=4, sort_keys=True)
Exemple #11
0
    def pipelines_delete(project_uuid, pipeline_uuid):

        if (Pipeline.query.filter(Pipeline.uuid == pipeline_uuid).filter(
                Pipeline.project_uuid == project_uuid).count() > 0):

            pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid)
            os.remove(pipeline_json_path)

            pipeline = (Pipeline.query.filter(
                Pipeline.uuid == pipeline_uuid).filter(
                    Pipeline.project_uuid == project_uuid).first())
            db.session.delete(pipeline)
            db.session.commit()

            return jsonify({"success": True})
        else:
            return jsonify({"message": "Pipeline could not be found."}), 404
Exemple #12
0
    def _transaction(self, project_uuid: str, pipeline_path: str):

        pipeline_json_path = get_pipeline_path(None,
                                               project_uuid,
                                               pipeline_path=pipeline_path)

        # Check the uuid of the pipeline. If the uuid is taken by
        # another pipeline in the project then generate a new uuid for
        # the pipeline.
        with open(pipeline_json_path, "r") as json_file:
            pipeline_json = json.load(json_file)
            file_pipeline_uuid = pipeline_json.get("uuid")

            self.collateral_kwargs["new_uuid"] = None
            self.collateral_kwargs["project_uuid"] = None
            self.collateral_kwargs["pipeline_uuid"] = None
            self.collateral_kwargs["pipeline_path"] = None
            self.collateral_kwargs["pipeline_json"] = None

            # If the pipeline has its own uuid and the uuid is not in
            # the DB already then the pipeline does not need to have a
            # new uuid assigned and written to disk.
            if (file_pipeline_uuid is not None
                    and Pipeline.query.filter_by(project_uuid=project_uuid,
                                                 uuid=file_pipeline_uuid,
                                                 status="READY").count() == 0):
                self.collateral_kwargs["new_uuid"] = False

            else:
                self.collateral_kwargs["new_uuid"] = True
                # Generate a new uuid for the pipeline.
                file_pipeline_uuid = str(uuid.uuid4())

            self.collateral_kwargs["project_uuid"] = project_uuid
            self.collateral_kwargs["pipeline_uuid"] = file_pipeline_uuid
            self.collateral_kwargs["pipeline_path"] = pipeline_path
            self.collateral_kwargs["pipeline_json"] = pipeline_json

            # Add the pipeline to the db.
            new_pipeline = Pipeline(
                uuid=file_pipeline_uuid,
                path=pipeline_path,
                project_uuid=project_uuid,
            )
            db.session.add(new_pipeline)
Exemple #13
0
    def pipelines_create(project_uuid):

        pipeline_path = request.json["pipeline_path"]

        if (Pipeline.query.filter(
                Pipeline.project_uuid == project_uuid).filter(
                    Pipeline.path == pipeline_path).count() == 0):

            pipeline_uuid = str(uuid.uuid4())

            pipeline = Pipeline(path=pipeline_path,
                                uuid=pipeline_uuid,
                                project_uuid=project_uuid)
            db.session.add(pipeline)
            db.session.commit()

            pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid)
            pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid)

            os.makedirs(pipeline_dir, exist_ok=True)

            # generate clean pipeline.json
            pipeline_json = {
                "name": request.json["name"],
                "version": "1.0.0",
                "uuid": pipeline_uuid,
                "settings": {
                    "auto_eviction": False,
                    "data_passing_memory_size": "1GB",
                },
                "steps": {},
            }

            with open(pipeline_json_path, "w") as pipeline_json_file:
                pipeline_json_file.write(json.dumps(pipeline_json, indent=4))

            return jsonify({"success": True})
        else:
            return (
                jsonify({
                    "message":
                    "Pipeline already exists at path '%s'." % pipeline_path
                }),
                409,
            )
Exemple #14
0
    def notebook_html_get(project_uuid, pipeline_uuid, step_uuid):

        experiment_uuid = request.args.get("experiment_uuid")
        pipeline_run_uuid = request.args.get("pipeline_run_uuid")

        pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid,
                                               experiment_uuid,
                                               pipeline_run_uuid)
        pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid,
                                              experiment_uuid,
                                              pipeline_run_uuid)

        if os.path.isfile(pipeline_json_path):
            with open(pipeline_json_path, "r") as json_file:
                pipeline_json = json.load(json_file)

            try:
                notebook_path = os.path.join(
                    pipeline_dir,
                    pipeline_json["steps"][step_uuid]["file_path"])
            except Exception as e:
                logging.info(e)
                return return_404("Invalid JSON for pipeline %s error: %e" %
                                  (pipeline_json_path, e))
        else:
            return return_404("Could not find pipeline.json for pipeline %s" %
                              pipeline_json_path)

        if os.path.isfile(notebook_path):
            try:

                html_exporter = HTMLExporter()
                (body, _) = html_exporter.from_filename(notebook_path)

                return body

            except IOError as error:
                logging.info("Error opening notebook file %s error: %s" %
                             (notebook_path, error))
                return return_404("Could not find notebook file %s" %
                                  notebook_path)
Exemple #15
0
    def _collateral(
        self,
        new_uuid: bool,
        project_uuid: str,
        pipeline_uuid: str,
        pipeline_path: str,
        pipeline_json: str,
    ):
        # At the project level, pipeline files with the same UUID are
        # considered to be the same pipeline. If we are "replacing" the
        # pipeline it's because the previous pipeline was deleted and
        # this new pipeline has been discovered through the FS. DELETEs
        # of a pipeline to the orchest-api don't actually delete the
        # pipeline, so we don't need to POST, since the old entry will
        # still be there. Currently, we don't need to PUT since no field
        # of the pipeline entry in the orchest-api needs to be updated
        # when replacing.
        resp = requests.get(
            f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/pipelines/'
            f"{project_uuid}/{pipeline_uuid}", )
        if resp.status_code == 404:
            resp = requests.post(
                f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/pipelines/',
                json={
                    "project_uuid": project_uuid,
                    "uuid": pipeline_uuid
                },
            )
            if resp.status_code != 201:
                raise Exception("Orchest-api pipeline creation failed.")

        if new_uuid:
            pipeline_json_path = get_pipeline_path(None,
                                                   project_uuid,
                                                   pipeline_path=pipeline_path)

            with open(pipeline_json_path, "w") as json_file:
                pipeline_json["uuid"] = pipeline_uuid
                json.dump(pipeline_json, json_file, indent=4, sort_keys=True)
Exemple #16
0
    def pipelines_get(project_uuid):

        try:
            with TwoPhaseExecutor(db.session) as tpe:
                SyncProjectPipelinesDBState(tpe).transaction(project_uuid)
        except Exception as e:
            msg = ("Error during project pipelines synchronization of "
                   f"{project_uuid}: {str(e)}.")
            return jsonify({"message": msg}), 500

        pipelines = Pipeline.query.filter(
            Pipeline.project_uuid == project_uuid).all()
        pipelines_augmented = []

        for pipeline in pipelines:

            pipeline_json_path = get_pipeline_path(pipeline.uuid,
                                                   pipeline.project_uuid)

            pipeline_augmented = {
                "uuid": pipeline.uuid,
                "path": pipeline.path,
            }
            if os.path.isfile(pipeline_json_path):
                with open(pipeline_json_path, "r") as json_file:
                    pipeline_json = json.load(json_file)
                    pipeline_augmented["name"] = pipeline_json["name"]
            else:
                pipeline_augmented[
                    "name"] = "Warning: pipeline file was not found."

            pipelines_augmented.append(pipeline_augmented)

        json_string = json.dumps({
            "success": True,
            "result": pipelines_augmented
        })

        return json_string, 200, {"content-type": "application/json"}
Exemple #17
0
    def pipelines_rename(project_uuid, pipeline_uuid):

        if Pipeline.query.filter(Pipeline.uuid == pipeline_uuid).count() > 0:

            pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid)

            if os.path.isfile(pipeline_json_path):

                with open(pipeline_json_path, "r") as json_file:
                    pipeline_json = json.load(json_file)

                pipeline_json["name"] = request.form.get("name")

                with open(pipeline_json_path, "w") as json_file:
                    json_file.write(json.dumps(pipeline_json, indent=2))

                json_string = json.dumps({"success": True})
                return json_string, 200, {"content-type": "application/json"}
            else:
                return "", 404
        else:
            return "", 404
Exemple #18
0
    def pipelines_get(project_uuid):

        try:
            sync_project_pipelines_db_state(project_uuid)
        except Exception as e:
            return jsonify({"message": str(e)}), 500

        pipelines = Pipeline.query.filter(
            Pipeline.project_uuid == project_uuid).all()
        pipelines_augmented = []

        for pipeline in pipelines:

            pipeline_json_path = get_pipeline_path(pipeline.uuid,
                                                   pipeline.project_uuid)

            pipeline_augmented = {
                "uuid": pipeline.uuid,
                "path": pipeline.path,
            }
            if os.path.isfile(pipeline_json_path):
                with open(pipeline_json_path, "r") as json_file:
                    pipeline_json = json.load(json_file)
                    pipeline_augmented["name"] = pipeline_json["name"]
            else:
                pipeline_augmented[
                    "name"] = "Warning: pipeline file was not found."

            pipelines_augmented.append(pipeline_augmented)

        json_string = json.dumps({
            "success": True,
            "result": pipelines_augmented
        })

        return json_string, 200, {"content-type": "application/json"}
Exemple #19
0
    def _transaction(self, project_uuid):
        """Synchronizes the state of the pipelines of a project.

        Synchronizes the state of the filesystem with the db when it
        comes to the pipelines of a project. Pipelines removed from the
        filesystem are removed, new pipelines (or pipelines that where
        there after, for example a project import) are registered in
        the db.

        Args:
            project_uuid:

        Raises:
            FileNotFoundError: If the project directory is not found.
        """

        project_path = project_uuid_to_path(project_uuid)
        project_dir = safe_join(current_app.config["USER_DIR"], "projects",
                                project_path)

        # Lock the project to avoid race conditions in pipeline deletion
        # or creation.
        Project.query.with_for_update().filter_by(uuid=project_uuid).one()

        if not os.path.isdir(project_dir):
            raise FileNotFoundError("Project directory not found")

        # Find all pipelines in the project directory.
        pipeline_paths = find_pipelines_in_dir(project_dir, project_dir)
        # Cleanup pipelines that have been manually removed.
        fs_removed_pipelines = [
            pipeline for pipeline in Pipeline.query.filter(
                Pipeline.path.notin_(pipeline_paths)).filter(
                    Pipeline.project_uuid == project_uuid,
                    Pipeline.status == "READY",
                ).all()
        ]
        for pip in fs_removed_pipelines:
            DeletePipeline(self.tpe).transaction(pip.project_uuid,
                                                 pip.uuid,
                                                 remove_file=False)

        # Identify all pipeline paths that are not yet a pipeline, that
        # is, pipelines that were added through the filesystem.
        existing_pipeline_paths = [
            pipeline.path for pipeline in Pipeline.query.filter(
                Pipeline.path.in_(pipeline_paths)).filter(
                    Pipeline.project_uuid == project_uuid).all()
        ]
        # TODO: handle existing pipeline assignments.
        new_pipelines_from_fs = set(pipeline_paths) - set(
            existing_pipeline_paths)

        for path in new_pipelines_from_fs:
            pipeline_json_path = get_pipeline_path(None,
                                                   project_uuid,
                                                   pipeline_path=path)
            with open(pipeline_json_path, "r") as json_file:
                pipeline_uuid = json.load(json_file)["uuid"]
            # This is not a new pipeline, the pipeline is being moved.
            is_moving = (Pipeline.query.filter_by(project_uuid=project_uuid,
                                                  uuid=pipeline_uuid,
                                                  status="MOVING").count() > 0)
            if not is_moving:
                AddPipelineFromFS(self.tpe).transaction(project_uuid, path)
Exemple #20
0
    def _collateral(
        self,
        project_uuid: str,
        pipeline_uuid: str,
        old_path: str,
        new_path: str,
    ):
        """Move a pipeline to another path, i.e. rename it."""

        if not is_valid_project_relative_path(project_uuid, new_path):
            raise error.OutOfProjectError(
                "New pipeline path points outside of the project directory.")

        old_path = get_pipeline_path(None,
                                     project_uuid,
                                     pipeline_path=old_path)
        new_path = get_pipeline_path(None,
                                     project_uuid,
                                     pipeline_path=new_path)

        if not os.path.exists(old_path):
            raise error.PipelineFileDoesNotExist()

        if os.path.exists(new_path) and old_path != new_path:
            raise error.PipelineFileExists()

        # Update the pipeline definition by adjusting the step file
        # paths, since they should be relative to the pipeline file.
        rel_path = os.path.relpath(
            os.path.split(old_path)[0],
            os.path.split(new_path)[0])
        if rel_path != ".":
            with open(old_path, "r") as json_file:
                pipeline_def = json.load(json_file)
                self.collateral_kwargs["pipeline_def_backup"] = copy.deepcopy(
                    pipeline_def)
            for step in pipeline_def["steps"].values():
                step_f_prefix, step_f_name = os.path.split(step["file_path"])
                file_path = os.path.normpath(
                    # Get to the "previous" position + use the relative
                    # path of the notebook w.r.t. the previous position,
                    # then normalize to cleanup paths such as
                    # 1/2/3/../../2 , that would become 1/2.
                    os.path.join(rel_path, step_f_prefix, step_f_name))
                step["file_path"] = file_path
                if not is_valid_pipeline_relative_path(
                        project_uuid, pipeline_uuid, file_path):
                    raise error.OutOfProjectError(
                        "Step path points outside of the project directory.")

            with open(old_path, "w") as json_file:
                errors = check_pipeline_correctness(pipeline_def)
                if errors:
                    raise Exception("Incorrect pipeline.")
                json.dump(pipeline_def, json_file, indent=4, sort_keys=True)

        # Create the parent directories if needed.
        directories, _ = os.path.split(new_path)
        if directories:
            os.makedirs(directories, exist_ok=True)
        os.rename(old_path, new_path)

        # So that the moving can be reverted in case of failure of the
        # rest of the collateral.
        self.collateral_kwargs["moved"] = True

        Pipeline.query.filter_by(
            project_uuid=project_uuid,
            uuid=pipeline_uuid,
        ).update({"status": "READY"})
        db.session.commit()
Exemple #21
0
    def pipelines_json(project_uuid, pipeline_uuid):

        pipeline_json_path = get_pipeline_path(
            pipeline_uuid,
            project_uuid,
            request.args.get("job_uuid"),
            request.args.get("pipeline_run_uuid"),
        )

        if request.method == "POST":

            pipeline_directory = get_pipeline_directory(
                pipeline_uuid,
                project_uuid,
                request.args.get("job_uuid"),
                request.args.get("pipeline_run_uuid"),
            )

            # Parse JSON.
            pipeline_json = json.loads(request.form.get("pipeline_json"))

            # First create all files part of pipeline_json definition
            # TODO: consider removing other files (no way to do this
            # reliably, special case might be rename).
            create_pipeline_files(pipeline_json, pipeline_directory,
                                  project_uuid)

            # Side effect: for each Notebook in de pipeline.json set the
            # correct kernel.
            pipeline_set_notebook_kernels(pipeline_json, pipeline_directory,
                                          project_uuid)

            with open(pipeline_json_path, "w") as json_file:
                json.dump(pipeline_json, json_file, indent=4, sort_keys=True)

            # Analytics call.
            send_anonymized_pipeline_definition(app, pipeline_json)

            return jsonify({"message": "Successfully saved pipeline."})

        elif request.method == "GET":

            if not os.path.isfile(pipeline_json_path):
                return (
                    jsonify({
                        "success":
                        False,
                        "reason":
                        ".orchest file doesn't exist at location %s" %
                        pipeline_json_path,
                    }),
                    404,
                )
            else:
                with open(pipeline_json_path) as json_file:
                    pipeline_json = json.load(json_file)
                    # Take care of old pipelines with no defined params.
                    if "parameters" not in pipeline_json:
                        pipeline_json["parameters"] = {}
                    # json.dumps because the front end expects it as a
                    # string.
                    return jsonify({
                        "success": True,
                        "pipeline_json": json.dumps(pipeline_json)
                    })

            return ""
Exemple #22
0
    def sync_project_pipelines_db_state(project_uuid):
        """Synchronizes the state of the pipelines of a project (fs/db).

        Synchronizes the state of the filesystem with the db
        when it comes to the pipelines of a project. Pipelines removed
        from the file system are removed, new pipelines (or pipelines
        that were there after, for example, a project import) are
        registered in the db.

        Args:
            project_uuid:

        Raises:
            FileNotFoundError: If the project directory is not found.
        """
        project_path = project_uuid_to_path(project_uuid)
        project_dir = os.path.join(app.config["USER_DIR"], "projects",
                                   project_path)

        if not os.path.isdir(project_dir):
            raise FileNotFoundError("Project directory not found")

        # find all pipelines in project dir
        pipeline_paths = find_pipelines_in_dir(project_dir, project_dir)

        # cleanup pipelines that have been manually removed
        fs_removed_pipelines = [
            pipeline for pipeline in Pipeline.query.filter(
                Pipeline.path.notin_(pipeline_paths)).filter(
                    Pipeline.project_uuid == project_uuid).all()
        ]
        for fs_removed_pipeline in fs_removed_pipelines:
            cleanup_pipeline_from_orchest(fs_removed_pipeline)

        # identify all pipeline paths that are not yet a pipeline
        existing_pipeline_paths = [
            pipeline.path for pipeline in Pipeline.query.filter(
                Pipeline.path.in_(pipeline_paths)).filter(
                    Pipeline.project_uuid == project_uuid).all()
        ]

        # TODO: handle existing pipeline assignments
        new_pipeline_paths = set(pipeline_paths) - set(existing_pipeline_paths)

        for new_pipeline_path in new_pipeline_paths:

            # write pipeline uuid to file
            pipeline_json_path = get_pipeline_path(
                None, project_uuid, pipeline_path=new_pipeline_path)

            try:
                with open(pipeline_json_path, "r") as json_file:
                    pipeline_json = json.load(json_file)

                file_pipeline_uuid = pipeline_json.get("uuid")

                new_pipeline_uuid = file_pipeline_uuid

                # see if pipeline_uuid is taken
                if (Pipeline.query.filter(
                        Pipeline.uuid == file_pipeline_uuid).filter(
                            Pipeline.project_uuid == project_uuid).count() > 0
                        or len(file_pipeline_uuid) == 0):
                    new_pipeline_uuid = str(uuid.uuid4())

                with open(pipeline_json_path, "w") as json_file:
                    pipeline_json["uuid"] = new_pipeline_uuid
                    json_file.write(json.dumps(pipeline_json, indent=4))

                # only commit if writing succeeds
                new_pipeline = Pipeline(
                    uuid=new_pipeline_uuid,
                    path=new_pipeline_path,
                    project_uuid=project_uuid,
                )
                db.session.add(new_pipeline)
                db.session.commit()

            except Exception as e:
                logging.info(e)
Exemple #23
0
    def pipelines_get(project_uuid):

        project_path = project_uuid_to_path(project_uuid)
        project_dir = os.path.join(app.config["USER_DIR"], "projects",
                                   project_path)

        if not os.path.isdir(project_dir):
            return jsonify({"message": "Project directory not found."}), 404

        # find all pipelines in project dir
        pipeline_paths = find_pipelines_in_dir(project_dir, project_dir)

        # identify all pipeline paths that are not yet a pipeline
        existing_pipeline_paths = [
            pipeline.path for pipeline in Pipeline.query.filter(
                Pipeline.path.in_(pipeline_paths)).filter(
                    Pipeline.project_uuid == project_uuid).all()
        ]

        # TODO: handle existing pipeline assignments
        new_pipeline_paths = set(pipeline_paths) - set(existing_pipeline_paths)

        for new_pipeline_path in new_pipeline_paths:

            # write pipeline uuid to file
            pipeline_json_path = get_pipeline_path(
                None, project_uuid, pipeline_path=new_pipeline_path)

            try:
                with open(pipeline_json_path, "r") as json_file:
                    pipeline_json = json.load(json_file)

                file_pipeline_uuid = pipeline_json.get("uuid")

                new_pipeline_uuid = file_pipeline_uuid

                # see if pipeline_uuid is taken
                if (Pipeline.query.filter(
                        Pipeline.uuid == file_pipeline_uuid).filter(
                            Pipeline.project_uuid == project_uuid).count() > 0
                        or len(file_pipeline_uuid) == 0):
                    new_pipeline_uuid = str(uuid.uuid4())

                with open(pipeline_json_path, "w") as json_file:
                    pipeline_json["uuid"] = new_pipeline_uuid
                    json_file.write(json.dumps(pipeline_json, indent=2))

                # only commit if writing succeeds
                new_pipeline = Pipeline(
                    uuid=new_pipeline_uuid,
                    path=new_pipeline_path,
                    project_uuid=project_uuid,
                )
                db.session.add(new_pipeline)
                db.session.commit()

            except Exception as e:
                logging.info(e)

        pipelines = Pipeline.query.filter(
            Pipeline.project_uuid == project_uuid).all()
        pipelines_augmented = []

        for pipeline in pipelines:

            pipeline_json_path = get_pipeline_path(pipeline.uuid,
                                                   pipeline.project_uuid)

            pipeline_augmented = {
                "uuid": pipeline.uuid,
                "path": pipeline.path,
            }
            if os.path.isfile(pipeline_json_path):
                with open(pipeline_json_path, "r") as json_file:
                    pipeline_json = json.load(json_file)
                    pipeline_augmented["name"] = pipeline_json["name"]
            else:
                pipeline_augmented[
                    "name"] = "Warning: pipeline file was not found."

            pipelines_augmented.append(pipeline_augmented)

        json_string = json.dumps({
            "success": True,
            "result": pipelines_augmented
        })

        return json_string, 200, {"content-type": "application/json"}
Exemple #24
0
    def file_viewer(project_uuid, pipeline_uuid, step_uuid):

        job_uuid = request.args.get("job_uuid")
        pipeline_run_uuid = request.args.get("pipeline_run_uuid")

        pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid,
                                               job_uuid, pipeline_run_uuid)
        pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid,
                                              job_uuid, pipeline_run_uuid)

        if os.path.isfile(pipeline_json_path):
            with open(pipeline_json_path, "r") as json_file:
                pipeline_json = json.load(json_file)
            try:
                step_file_path = pipeline_json["steps"][step_uuid]["file_path"]
                if not is_valid_pipeline_relative_path(
                        project_uuid, pipeline_uuid, step_file_path):
                    raise app_error.OutOfProjectError(
                        "Step path points outside of the project directory.")

                if step_file_path.startswith("/"):
                    file_path = resolve_absolute_path(step_file_path)
                else:
                    file_path = safe_join(pipeline_dir, step_file_path)

                filename = pipeline_json["steps"][step_uuid]["file_path"]
                step_title = pipeline_json["steps"][step_uuid]["title"]
            except Exception as e:
                app.logger.info(e)
                return return_404("Invalid JSON for pipeline %s error: %e" %
                                  (pipeline_json_path, e))
        else:
            return return_404("Could not find pipeline.json for pipeline %s" %
                              pipeline_json_path)

        file_ext = file_path.split(".")[-1]
        file_content = ""

        if file_ext == "ipynb":
            if os.path.isfile(file_path):
                try:

                    html_exporter = HTMLExporter()
                    (file_content, _) = html_exporter.from_filename(file_path)

                    # custom CSS
                    custom_style = "<style>.CodeMirror pre {overflow: auto}</style>"
                    file_content = file_content.replace(
                        "</head>", custom_style + "</head>", 1)

                except IOError as error:
                    app.logger.info(
                        "Error opening notebook file %s error: %s" %
                        (file_path, error))
                    return return_404(
                        ("Could not find notebook file %s") % file_path)
        else:
            try:
                with open(file_path) as file:
                    file_content = file.read()
            except (IOError, Exception):
                return jsonify({"message": "Could not read file."}), 500

        return jsonify({
            "ext": file_ext,
            "content": file_content,
            "step_title": step_title,
            "filename": filename,
        })
Exemple #25
0
    def pipelines_json(project_uuid, pipeline_uuid):

        if request.method == "POST":

            pipeline_json_path = get_pipeline_path(
                pipeline_uuid,
                project_uuid,
                None,
                request.args.get("pipeline_run_uuid"),
            )

            pipeline_directory = get_pipeline_directory(
                pipeline_uuid,
                project_uuid,
                None,
                request.args.get("pipeline_run_uuid"),
            )

            # Parse JSON.
            pipeline_json = json.loads(request.form.get("pipeline_json"))

            # Normalize relative paths.
            for step in pipeline_json["steps"].values():

                is_project_file = is_valid_pipeline_relative_path(
                    project_uuid, pipeline_uuid, step["file_path"])

                is_data_file = is_valid_data_path(step["file_path"])

                if not (is_project_file or is_data_file):
                    raise app_error.OutOfAllowedDirectoryError(
                        "File is neither in the project, nor in the data directory."
                    )

                if not step["file_path"].startswith("/"):
                    step["file_path"] = normalize_project_relative_path(
                        step["file_path"])

            errors = check_pipeline_correctness(pipeline_json)
            if errors:
                msg = {}
                msg = {"success": False}
                reason = ", ".join([key for key in errors])
                reason = f"Invalid value: {reason}."
                msg["reason"] = reason
                return jsonify(msg), 400

            # Side effect: for each Notebook in de pipeline.json set the
            # correct kernel.
            try:
                pipeline_set_notebook_kernels(pipeline_json,
                                              pipeline_directory, project_uuid)
            except KeyError:
                msg = {
                    "success": False,
                    "reason": "Invalid Notebook metadata structure.",
                }
                return jsonify(msg), 400

            with open(pipeline_json_path, "r") as json_file:
                old_pipeline_json = json.load(json_file)

            # Save the pipeline JSON again to make sure its keys are
            # sorted.
            with open(pipeline_json_path, "w") as json_file:
                json.dump(pipeline_json, json_file, indent=4, sort_keys=True)

            if old_pipeline_json["name"] != pipeline_json["name"]:
                resp = requests.put(
                    (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}'
                     f"/api/pipelines/{project_uuid}/{pipeline_uuid}"),
                    json={"name": pipeline_json["name"]},
                )
                if resp.status_code != 200:
                    return (
                        jsonify(
                            {"message": "Failed to PUT name to orchest-api."}),
                        resp.status_code,
                    )

            # Analytics call.
            analytics.send_event(
                app,
                analytics.Event.PIPELINE_SAVE,
                {"pipeline_definition": pipeline_json},
            )
            return jsonify({
                "success": True,
                "message": "Successfully saved pipeline."
            })

        elif request.method == "GET":
            pipeline_json_path = get_pipeline_path(
                pipeline_uuid,
                project_uuid,
                request.args.get("job_uuid"),
                request.args.get("pipeline_run_uuid"),
            )

            if not os.path.isfile(pipeline_json_path):
                return (
                    jsonify({
                        "success":
                        False,
                        "reason":
                        ".orchest file doesn't exist at location " +
                        pipeline_json_path,
                    }),
                    404,
                )
            else:
                pipeline_json = get_pipeline_json(pipeline_uuid, project_uuid)

                return jsonify({
                    "success": True,
                    "pipeline_json": json.dumps(pipeline_json)
                })