def get(self, user_id): """ """ current_user_id = get_jwt_identity() current_user_roles = get_jwt_claims()['roles'] if not is_current_or_admin(user_id, current_user_id, current_user_roles): return dict(status='fail', message='unauthorised'), 403 project_schema = ProjectSchema(many=True) user = User.get_by_id(user_id) if not user: return dict(status='fail', message=f'user {user_id} not found'), 404 projects = user.projects projects_json, errors = project_schema.dumps(projects) if errors: return dict(status='fail', message='Internal server error'), 500 return dict(status='success', data=dict(projects=json.loads(projects_json))), 200
def patch(self, project_id): """ """ try: current_user_id = get_jwt_identity() current_user_roles = get_jwt_claims()['roles'] project_schema = ProjectSchema(only=("name", "description"), partial=True) project_data = request.get_json() validate_project_data, errors = project_schema.load(project_data) existing_project = False if errors: return dict(status='fail', message=errors), 400 if 'name' in validate_project_data: existing_project = Project.find_first( name=validate_project_data['name'], owner_id=current_user_id) if existing_project: return dict( status='fail', message= f'project with name {validate_project_data["name"]} already exists' ), 409 project = Project.get_by_id(project_id) if not project: return dict(status='fail', message=f'Project {project_id} not found'), 404 if not is_owner_or_admin(project, current_user_id, current_user_roles): return dict(status='fail', message='unauthorised'), 403 updated = Project.update(project, **validate_project_data) if not updated: return dict(status='fail', message='internal server error'), 500 return dict( status='success', message=f'project {project_id} updated successfully'), 200 except Exception as e: return dict(status='fail', message=str(e)), 500
def get(self): """ """ current_user_id = get_jwt_identity() current_user_roles = get_jwt_claims()['roles'] project_schema = ProjectSchema(many=True) if has_role(current_user_roles, 'administrator'): projects = Project.find_all() else: projects = Project.find_all(owner_id=current_user_id) project_data, errors = project_schema.dumps(projects) if errors: return dict(status='fail', message=errors), 500 return dict(status='success', data=dict(projects=json.loads(project_data))), 200
def get(self, project_id): """ """ current_user_id = get_jwt_identity() current_user_roles = get_jwt_claims()['roles'] project_schema = ProjectSchema() project = Project.get_by_id(project_id) if not project: return dict(status='fail', message=f'project {project_id} not found'), 404 if not is_owner_or_admin(project, current_user_id, current_user_roles): return dict(status='fail', message='unauthorised'), 403 project_data, errors = project_schema.dumps(project) if errors: return dict(status='fail', message=errors), 500 return dict(status='success', data=dict(project=json.loads(project_data))), 200
from flask import jsonify, request, Blueprint from flask_jwt_extended import jwt_optional from marshmallow import ValidationError from webargs.flaskparser import use_args from webargs.fields import Integer, String from app import db from app.models import Project, Directory, BlockFile from app.schemas import ProjectSchema from app.utils import get_user from app.utils.responses import make_resp, NOT_FOUND, NO_JSON, FORBIDDEN, UNAUTHORIZED projects_bp = Blueprint("projects", __name__) project_schema = ProjectSchema() @projects_bp.route('/projects/', methods=["GET", "POST"]) @projects_bp.route('/user/<int:user_id>/projects/', methods=["GET"]) @projects_bp.route('/user/<string:username>/projects/', methods=["GET"]) @jwt_optional def projects(user_id: int = None, username: str = None) -> Tuple[Any, int]: user = get_user( user_id if user_id else username.lower() if username else None) if not user: return make_resp(NOT_FOUND) if request.method == "GET": return jsonify(data=project_schema.dump(Project.query.filter( Project.user_id == user.id).all(), many=True)), 200
def register_views(app, db): errors = {} api = Api(app, errors=errors) projects_schema = ProjectSchema(many=True) environment_schema = EnvironmentSchema() environments_schema = EnvironmentSchema(many=True) background_task_schema = BackgroundTaskSchema() def register_environments(db, api): class EnvironmentsResource(Resource): def get(self, project_uuid): return environments_schema.dump( get_environments(project_uuid, language=request.args.get("language"))) class EnvironmentResource(Resource): def put(self, project_uuid, environment_uuid): return self.post(project_uuid, environment_uuid) def get(self, project_uuid, environment_uuid): return environment_schema.dump( get_environment(environment_uuid, project_uuid)) def delete(self, project_uuid, environment_uuid): delete_environment(app, project_uuid, environment_uuid) # refresh kernels after change in environments populate_kernels(app, db, project_uuid) return jsonify( {"message": "Environment deletion was successful."}) def post(self, project_uuid, environment_uuid): # create a new environment in the project environment_json = request.json.get("environment") e = Environment( uuid=str(uuid.uuid4()), name=environment_json["name"], project_uuid=project_uuid, language=environment_json["language"], setup_script=environment_json["setup_script"], base_image=environment_json["base_image"], gpu_support=environment_json["gpu_support"], ) # use specified uuid if it's not keyword 'new' if environment_uuid != "new": e.uuid = environment_uuid environment_dir = get_environment_directory( e.uuid, project_uuid) os.makedirs(environment_dir, exist_ok=True) serialize_environment_to_disk(e, environment_dir) # refresh kernels after change in environments populate_kernels(app, db, project_uuid) return environment_schema.dump(e) api.add_resource(EnvironmentsResource, "/store/environments/<string:project_uuid>") api.add_resource( EnvironmentResource, "/store/environments/<string:project_uuid>/<string:environment_uuid>", ) register_environments(db, api) def return_404(reason=""): json_string = json.dumps({"success": False, "reason": reason}) return json_string, 404, {"content-type": "application/json"} @app.route("/", methods=["GET"]) def index(): js_bundle_path = os.path.join(app.config["STATIC_DIR"], "js", "dist", "main.bundle.js") css_bundle_path = os.path.join(app.config["STATIC_DIR"], "css", "dist", "main.css") front_end_config = [ "FLASK_ENV", "TELEMETRY_DISABLED", "ENVIRONMENT_DEFAULTS", "ORCHEST_WEB_URLS", ] front_end_config_internal = [ "ORCHEST_SOCKETIO_ENV_BUILDING_NAMESPACE", "PIPELINE_PARAMETERS_RESERVED_KEY", ] return render_template( "index.html", javascript_bundle_hash=get_hash(js_bundle_path), css_bundle_hash=get_hash(css_bundle_path), user_config=get_user_conf(), config_json=flask_json.htmlsafe_dumps({ **{key: app.config[key] for key in front_end_config}, **{ key: getattr(_config, key) for key in front_end_config_internal }, }), ) @app.route("/async/spawn-update-server", methods=["GET"]) def spawn_update_server(): client = docker.from_env() run_orchest_ctl(client, ["updateserver"]) return "" @app.route("/heartbeat", methods=["GET"]) def heartbeat(): return "" @app.route("/async/restart", methods=["POST"]) def restart_server(): client = docker.from_env() if request.args.get("mode") == "dev": run_orchest_ctl(client, ["restart", "--mode=dev"]) else: run_orchest_ctl(client, ["restart"]) return "" @app.route("/async/version", methods=["GET"]) def version(): return get_repo_tag() @app.route("/async/user-config", methods=["GET", "POST"]) def user_config(): if request.method == "POST": config = request.form.get("config") try: # only save if parseable JSON json.loads(config) save_user_conf_raw(config) except json.JSONDecodeError as e: app.logger.debug(e) return "" else: return get_user_conf_raw() @app.route("/async/pipelines/delete/<project_uuid>/<pipeline_uuid>", methods=["DELETE"]) def pipelines_delete(project_uuid, pipeline_uuid): try: with TwoPhaseExecutor(db.session) as tpe: DeletePipeline(tpe).transaction(project_uuid, pipeline_uuid) except Exception as e: return {"message": str(e)}, 500 return jsonify({"success": True}) @app.route("/async/pipelines/create/<project_uuid>", methods=["POST"]) def pipelines_create(project_uuid): pipeline_path = request.json["pipeline_path"] pipeline_name = request.json["name"] try: with TwoPhaseExecutor(db.session) as tpe: CreatePipeline(tpe).transaction(project_uuid, pipeline_name, pipeline_path) except Exception as e: return jsonify({"message": str(e)}), 409 return jsonify({"success": True}) class ImportGitProjectListResource(Resource): def post(self): try: with TwoPhaseExecutor(db.session) as tpe: task = ImportGitProject(tpe).transaction( request.json["url"], request.json.get("project_name")) except Exception as e: return jsonify({"message": str(e)}), 500 return background_task_schema.dump(task) api.add_resource(ImportGitProjectListResource, "/async/projects/import-git") def discoverFSDeletedProjects(): """Cleanup projects that were deleted from the filesystem.""" project_paths = [ entry.name for entry in os.scandir(app.config["PROJECTS_DIR"]) if entry.is_dir() ] fs_removed_projects = Project.query.filter( Project.path.notin_(project_paths), # This way we do not delete a project that is already being # deleted twice, and avoid considering a project that is # being initialized as deleted from the filesystem. Project.status.in_(["READY"]), ).all() # Use a TwoPhaseExecutor for each project so that issues in one # project do not hinder the deletion of others. for proj_uuid in [project.uuid for project in fs_removed_projects]: try: with TwoPhaseExecutor(db.session) as tpe: DeleteProject(tpe).transaction(proj_uuid) except Exception as e: current_app.logger.error( ("Error during project deletion (discovery) of " f"{proj_uuid}: {e}.")) def discoverFSCreatedProjects(): """Detect projects that were added through the file system.""" # Detect new projects by detecting directories that were not # registered in the db as projects. existing_project_paths = [ project.path for project in Project.query.all() ] project_paths = [ entry.name for entry in os.scandir(app.config["PROJECTS_DIR"]) if entry.is_dir() ] new_project_paths = set(project_paths) - set(existing_project_paths) # Use a TwoPhaseExecutor for each project so that issues in one # project do not hinder the discovery of others. for new_project_path in new_project_paths: try: with TwoPhaseExecutor(db.session) as tpe: CreateProject(tpe).transaction(new_project_path) except Exception as e: current_app.logger.error( ("Error during project initialization (discovery) of " f"{new_project_path}: {e}.")) @app.route("/async/projects/<project_uuid>", methods=["GET"]) def project_get(project_uuid): project = Project.query.filter(Project.uuid == project_uuid).first() if project is None: return jsonify({"message": "Project doesn't exist."}), 404 resp = requests.get( (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}' f"/api/projects/{project_uuid}")) if resp.status_code == 404: return ( jsonify( {"message": "Project doesn't exist in the orchest-api."}), 404, ) elif resp.status_code != 200: return ( jsonify({"message": "Orchest-api project retrieval failed."}), resp.status_code, ) else: # Merge the project data coming from the orchest-api. counts = project_entity_counts(project_uuid) project = {**project.as_dict(), **resp.json(), **counts} return jsonify(project) @app.route("/async/projects/<project_uuid>", methods=["PUT"]) def project_put(project_uuid): # While this seems suited to be in the orchest_api.py module, # I've left it here because some project data lives in the web # server as well, and this PUT request might eventually update # that. resp = requests.put( (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}' f"/api/projects/{project_uuid}"), json=request.json, ) return resp.content, resp.status_code, resp.headers.items() @app.route("/async/projects", methods=["GET"]) def projects_get(): discoverFSDeletedProjects() discoverFSCreatedProjects() # Projects that are in a INITIALIZING or DELETING state won't # be shown until ready. projects = projects_schema.dump( Project.query.filter_by(status="READY").all()) for project in projects: # Discover both pipelines of newly initialized projects and # manually initialized pipelines of existing projects. Use a # a TwoPhaseExecutor for each project so that issues in one # project do not hinder the pipeline synchronization of # others. try: with TwoPhaseExecutor(db.session) as tpe: SyncProjectPipelinesDBState(tpe).transaction( project["uuid"]) except Exception as e: current_app.logger.error( ("Error during project pipelines synchronization of " f'{project["path"]}: {e}.')) counts = project_entity_counts(project["uuid"]) project.update(counts) return jsonify(projects) @app.route("/async/projects", methods=["POST"]) def projects_post(): try: with TwoPhaseExecutor(db.session) as tpe: CreateProject(tpe).transaction(request.json["name"]) except Exception as e: # The sql integrity error message can be quite ugly. if isinstance(e, sqlalchemy.exc.IntegrityError): msg = f'Project "{request.json["name"]}" already exists.' else: msg = str(e) return ( jsonify({"message": msg}), 500, ) return jsonify({"message": "Project created."}) @app.route("/async/projects", methods=["DELETE"]) def projects_delete(): try: with TwoPhaseExecutor(db.session) as tpe: DeleteProject(tpe).transaction(request.json["project_uuid"]) except Exception as e: return ( jsonify( {"message": f"Failed to delete the project. Error: {e}"}), 500, ) return jsonify({"message": "Project deleted."}) @app.route("/async/pipelines/<project_uuid>/<pipeline_uuid>", methods=["GET"]) def pipeline_get(project_uuid, pipeline_uuid): pipeline = (Pipeline.query.filter( Pipeline.project_uuid == project_uuid).filter( Pipeline.uuid == pipeline_uuid).first()) if pipeline is None: return jsonify({"message": "Pipeline doesn't exist."}), 404 resp = requests.get( (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}' f"/api/pipelines/{project_uuid}/{pipeline_uuid}")) if resp.status_code == 404: return ( jsonify( {"message": "Pipeline doesn't exist in the orchest-api."}), 404, ) elif resp.status_code != 200: return ( jsonify({"message": "Orchest-api pipeline retrieval failed."}), resp.status_code, ) else: # Merge the pipeline data coming from the orchest-api. pipeline = {**pipeline.as_dict(), **resp.json()} return jsonify(pipeline) @app.route("/async/pipelines/<project_uuid>/<pipeline_uuid>", methods=["PUT"]) def pipeline_put(project_uuid, pipeline_uuid): # While this seems suited to be in the orchest_api.py module, # I've left it here because some pipeline data lives in the web # server as well, and this PUT request might eventually update # that. resp = requests.put( (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}' f"/api/pipelines/{project_uuid}/{pipeline_uuid}"), json=request.json, ) return resp.content, resp.status_code, resp.headers.items() @app.route("/async/pipelines/<project_uuid>", methods=["GET"]) def pipelines_get(project_uuid): try: with TwoPhaseExecutor(db.session) as tpe: SyncProjectPipelinesDBState(tpe).transaction(project_uuid) except Exception as e: msg = ("Error during project pipelines synchronization of " f"{project_uuid}: {str(e)}.") return jsonify({"message": msg}), 500 pipelines = Pipeline.query.filter( Pipeline.project_uuid == project_uuid).all() pipelines_augmented = [] for pipeline in pipelines: pipeline_json_path = get_pipeline_path(pipeline.uuid, pipeline.project_uuid) pipeline_augmented = { "uuid": pipeline.uuid, "path": pipeline.path, } if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) pipeline_augmented["name"] = pipeline_json["name"] else: pipeline_augmented[ "name"] = "Warning: pipeline file was not found." pipelines_augmented.append(pipeline_augmented) json_string = json.dumps({ "success": True, "result": pipelines_augmented }) return json_string, 200, {"content-type": "application/json"} @app.route( "/async/file-viewer/<project_uuid>/<pipeline_uuid>/<step_uuid>", methods=["GET"], ) def file_viewer(project_uuid, pipeline_uuid, step_uuid): job_uuid = request.args.get("job_uuid") pipeline_run_uuid = request.args.get("pipeline_run_uuid") pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid, job_uuid, pipeline_run_uuid) pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid, job_uuid, pipeline_run_uuid) if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) try: file_path = os.path.join( pipeline_dir, pipeline_json["steps"][step_uuid]["file_path"]) filename = pipeline_json["steps"][step_uuid]["file_path"] step_title = pipeline_json["steps"][step_uuid]["title"] except Exception as e: app.logger.info(e) return return_404("Invalid JSON for pipeline %s error: %e" % (pipeline_json_path, e)) else: return return_404("Could not find pipeline.json for pipeline %s" % pipeline_json_path) file_ext = file_path.split(".")[-1] file_content = "" if file_ext == "ipynb": if os.path.isfile(file_path): try: html_exporter = HTMLExporter() (file_content, _) = html_exporter.from_filename(file_path) except IOError as error: app.logger.info( "Error opening notebook file %s error: %s" % (file_path, error)) return return_404( ("Could not find notebook file %s") % file_path) else: try: with open(file_path) as file: file_content = file.read() except (IOError, Exception): return jsonify({"message": "Could not read file."}), 500 return jsonify({ "ext": file_ext, "content": file_content, "step_title": step_title, "filename": filename, }) @app.route("/async/pipelines/json/<project_uuid>/<pipeline_uuid>", methods=["GET", "POST"]) def pipelines_json(project_uuid, pipeline_uuid): pipeline_json_path = get_pipeline_path( pipeline_uuid, project_uuid, request.args.get("job_uuid"), request.args.get("pipeline_run_uuid"), ) if request.method == "POST": pipeline_directory = get_pipeline_directory( pipeline_uuid, project_uuid, request.args.get("job_uuid"), request.args.get("pipeline_run_uuid"), ) # Parse JSON. pipeline_json = json.loads(request.form.get("pipeline_json")) # First create all files part of pipeline_json definition # TODO: consider removing other files (no way to do this # reliably, special case might be rename). create_pipeline_files(pipeline_json, pipeline_directory, project_uuid) # Side effect: for each Notebook in de pipeline.json set the # correct kernel. pipeline_set_notebook_kernels(pipeline_json, pipeline_directory, project_uuid) with open(pipeline_json_path, "w") as json_file: json.dump(pipeline_json, json_file, indent=4, sort_keys=True) # Analytics call. send_anonymized_pipeline_definition(app, pipeline_json) return jsonify({"message": "Successfully saved pipeline."}) elif request.method == "GET": if not os.path.isfile(pipeline_json_path): return ( jsonify({ "success": False, "reason": ".orchest file doesn't exist at location %s" % pipeline_json_path, }), 404, ) else: with open(pipeline_json_path) as json_file: pipeline_json = json.load(json_file) # Take care of old pipelines with no defined params. if "parameters" not in pipeline_json: pipeline_json["parameters"] = {} # json.dumps because the front end expects it as a # string. return jsonify({ "success": True, "pipeline_json": json.dumps(pipeline_json) }) return "" @app.route( "/async/file-picker-tree/pipeline-cwd/<project_uuid>/<pipeline_uuid>", methods=["GET"], ) def pipeline_cwd(project_uuid, pipeline_uuid): pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) project_dir = get_project_directory(project_uuid) cwd = pipeline_dir.replace(project_dir, "") return jsonify({"cwd": cwd}) @app.route("/async/file-picker-tree/<project_uuid>", methods=["GET"]) def get_file_picker_tree(project_uuid): allowed_file_extensions = ["ipynb", "R", "py", "sh"] project_dir = get_project_directory(project_uuid) if not os.path.isdir(project_dir): return jsonify( {"message": "Project dir %s not found." % project_dir}), 404 tree = {"type": "directory", "root": True, "name": "/", "children": []} dir_nodes = {} dir_nodes[project_dir] = tree for root, dirs, files in os.walk(project_dir): # exclude directories that start with "." from file_picker dirs[:] = [ dirname for dirname in dirs if not dirname.startswith(".") ] for dirname in dirs: dir_path = os.path.join(root, dirname) dir_node = { "type": "directory", "name": dirname, "children": [], } dir_nodes[dir_path] = dir_node dir_nodes[root]["children"].append(dir_node) for filename in files: if filename.split(".")[-1] in allowed_file_extensions: file_node = { "type": "file", "name": filename, } # this key should always exist try: dir_nodes[root]["children"].append(file_node) except KeyError as e: app.logger.error( "Key %s does not exist in dir_nodes %s. Error: %s" % (root, dir_nodes, e)) except Exception as e: app.logger.error("Error: %e" % e) return jsonify(tree) @app.route("/async/project-files/create/<project_uuid>", methods=["POST"]) def create_project_file(project_uuid): """Create project file in specified directory within project.""" project_dir = get_project_directory(project_uuid) # Client sends absolute path relative to project root, hence the # starting / character is removed. file_path = os.path.join(project_dir, request.json["file_path"][1:]) if os.path.isfile(file_path): return jsonify({"message": "File already exists."}), 409 try: open(file_path, "a").close() return jsonify({"message": "File created."}) except IOError as e: app.logger.error("Could not create file at %s. Error: %s" % (file_path, e)) @app.route("/async/project-files/exists/<project_uuid>/<pipeline_uuid>", methods=["POST"]) def project_file_exists(project_uuid, pipeline_uuid): """Check whether file exists""" pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) file_path = os.path.join(pipeline_dir, request.json["relative_path"]) if os.path.isfile(file_path): return jsonify({"message": "File exists."}) else: return jsonify({"message": "File does not exists."}), 404
def post(self): """ """ current_user_id = get_jwt_identity() current_user_roles = get_jwt_claims()['roles'] project_schema = ProjectSchema() project_data = request.get_json() validated_project_data, errors = project_schema.load(project_data) if errors: return dict(status='fail', message=errors), 400 if not has_role(current_user_roles, 'administrator'): validated_project_data['owner_id'] = current_user_id # check if project already exists existing_project = Project.find_first( name=validated_project_data['name'], owner_id=validated_project_data['owner_id']) if existing_project: return dict( status='fail', message= f'project with name {validated_project_data["name"]} already exists' ), 409 try: validated_project_data['alias'] =\ create_alias(validated_project_data['name']) namespace_name = validated_project_data['alias'] cluster_id = validated_project_data['cluster_id'] cluster = Cluster.get_by_id(cluster_id) if not cluster: return dict(status='fail', message=f'cluster {cluster_id} not found'), 404 kube_host = cluster.host kube_token = cluster.token kube_client = create_kube_clients(kube_host, kube_token) # create namespace in cluster cluster_namespace = kube_client.kube.create_namespace( client.V1Namespace(metadata=client.V1ObjectMeta( name=namespace_name))) # create project in database if cluster_namespace: ingress_name = f"{validated_project_data['alias']}-ingress" ingress_meta = client.V1ObjectMeta(name=ingress_name) ingress_default_rule = client.ExtensionsV1beta1IngressRule( host="traefik-ui.cranecloud.io", http=client.ExtensionsV1beta1HTTPIngressRuleValue(paths=[ client.ExtensionsV1beta1HTTPIngressPath( path="/*", backend=client.ExtensionsV1beta1IngressBackend( service_name="traefik-web-ui-ext", service_port=80)) ])) ingress_spec = client.ExtensionsV1beta1IngressSpec( rules=[ingress_default_rule]) ingress_body = client.ExtensionsV1beta1Ingress( metadata=ingress_meta, spec=ingress_spec) kube_client.extension_api.create_namespaced_ingress( namespace=namespace_name, body=ingress_body) project = Project(**validated_project_data) saved = project.save() if not saved: # delete the namespace kube_client.kube.delete_namespace(namespace_name) return dict(status='fail', message='Internal Server Error'), 500 new_project_data, errors = project_schema.dump(project) return dict(status='success', data=dict(project=new_project_data)), 201 except client.rest.ApiException as e: return dict(status='fail', message=e.body), e.status except Exception as err: return dict(status='fail', message=str(err)), 500
def register_views(app, db): errors = { "DataSourceNameInUse": { "message": "A data source with this name already exists.", "status": 409, }, } api = Api(app, errors=errors) class DataSourceNameInUse(HTTPException): pass project_schema = ProjectSchema() projects_schema = ProjectSchema(many=True) pipeline_schema = PipelineSchema() pipelines_schema = PipelineSchema(many=True) datasource_schema = DataSourceSchema() datasources_schema = DataSourceSchema(many=True) environment_schema = EnvironmentSchema() environments_schema = EnvironmentSchema(many=True) experiment_schema = ExperimentSchema() experiments_schema = ExperimentSchema(many=True) background_task_schema = BackgroundTaskSchema() def register_environments(db, api): class EnvironmentsResource(Resource): def get(self, project_uuid): return environments_schema.dump( get_environments(project_uuid, language=request.args.get("language"))) class EnvironmentResource(Resource): def put(self, project_uuid, environment_uuid): return self.post(project_uuid, environment_uuid) def get(self, project_uuid, environment_uuid): return environment_schema.dump( get_environment(environment_uuid, project_uuid)) def delete(self, project_uuid, environment_uuid): delete_environment(app, project_uuid, environment_uuid) # refresh kernels after change in environments populate_kernels(app, db) return jsonify( {"message": "Environment deletion was successful."}) def post(self, project_uuid, environment_uuid): # create a new environment in the project environment_json = request.json.get("environment") e = Environment( uuid=str(uuid.uuid4()), name=environment_json["name"], project_uuid=project_uuid, language=environment_json["language"], setup_script=environment_json["setup_script"], base_image=environment_json["base_image"], gpu_support=environment_json["gpu_support"], ) # use specified uuid if it's not keyword 'new' if environment_uuid != "new": e.uuid = environment_uuid environment_dir = get_environment_directory( e.uuid, project_uuid) os.makedirs(environment_dir, exist_ok=True) serialize_environment_to_disk(e, environment_dir) # refresh kernels after change in environments populate_kernels(app, db) return environment_schema.dump(e) api.add_resource(EnvironmentsResource, "/store/environments/<string:project_uuid>") api.add_resource( EnvironmentResource, "/store/environments/<string:project_uuid>/<string:environment_uuid>", ) def register_datasources(db, api): class DataSourcesResource(Resource): def get(self): show_internal = True if request.args.get("show_internal") == "false": show_internal = False if show_internal: datasources = DataSource.query.all() else: datasources = DataSource.query.filter( ~DataSource.name.like("\_%", escape="\\")).all() return datasources_schema.dump(datasources) class DataSourceResource(Resource): def put(self, name): ds = DataSource.query.filter(DataSource.name == name).first() if ds is None: return "", 404 ds.name = request.json["name"] ds.source_type = request.json["source_type"] ds.connection_details = request.json["connection_details"] db.session.commit() return datasource_schema.dump(ds) def get(self, name): ds = DataSource.query.filter(DataSource.name == name).first() if ds is None: return "", 404 return datasource_schema.dump(ds) def delete(self, name): ds = DataSource.query.filter(DataSource.name == name).first() if ds is None: return "", 404 db.session.delete(ds) db.session.commit() return jsonify( {"message": "Data source deletion was successful"}) def post(self, name): if DataSource.query.filter( DataSource.name == name).count() > 0: raise DataSourceNameInUse() new_ds = DataSource( name=name, source_type=request.json["source_type"], connection_details=request.json["connection_details"], ) db.session.add(new_ds) db.session.commit() return datasource_schema.dump(new_ds) api.add_resource(DataSourcesResource, "/store/datasources") api.add_resource(DataSourceResource, "/store/datasources/<string:name>") def register_experiments(db, api): class ExperimentsResource(Resource): def get(self): experiment_query = Experiment.query project_uuid = request.args.get("project_uuid") if project_uuid is not None: experiment_query = experiment_query.filter( Experiment.project_uuid == project_uuid) experiments = experiment_query.all() return experiments_schema.dump(experiments) class ExperimentResource(Resource): def put(self, experiment_uuid): ex = Experiment.query.filter( Experiment.uuid == experiment_uuid).first() if ex is None: return "", 404 ex.name = request.json["name"] ex.pipeline_uuid = request.json["pipeline_uuid"] ex.pipeline_name = request.json["pipeline_name"] ex.strategy_json = request.json["strategy_json"] ex.draft = request.json["draft"] db.session.commit() return experiment_schema.dump(ex) def get(self, experiment_uuid): ex = Experiment.query.filter( Experiment.uuid == experiment_uuid).first() if ex is None: return "", 404 return experiment_schema.dump(ex) def delete(self, experiment_uuid): # remove experiment directory ex = Experiment.query.filter( Experiment.uuid == experiment_uuid).first() if ex is None: return "", 404 # tell the orchest-api that the experiment does # not exist anymore, will be stopped if necessary, # then cleaned up from the orchest-api db url = f"http://{app.config['ORCHEST_API_ADDRESS']}/api/experiments/cleanup/{ex.uuid}" app.config["SCHEDULER"].add_job(requests.delete, args=[url]) remove_experiment_directory(ex.uuid, ex.pipeline_uuid, ex.project_uuid) db.session.delete(ex) db.session.commit() return jsonify( {"message": "Experiment termination was successful"}) def post(self, experiment_uuid): experiment_uuid = str(uuid.uuid4()) pipeline_path = pipeline_uuid_to_path( request.json["pipeline_uuid"], request.json["project_uuid"]) new_ex = Experiment( uuid=experiment_uuid, name=request.json["name"], pipeline_uuid=request.json["pipeline_uuid"], project_uuid=request.json["project_uuid"], pipeline_name=request.json["pipeline_name"], pipeline_path=pipeline_path, strategy_json="{}", draft=request.json["draft"], ) db.session.add(new_ex) db.session.commit() create_experiment_directory( experiment_uuid, request.json["pipeline_uuid"], request.json["project_uuid"], ) return experiment_schema.dump(new_ex) api.add_resource(ExperimentsResource, "/store/experiments") api.add_resource(ExperimentResource, "/store/experiments/<string:experiment_uuid>") register_datasources(db, api) register_experiments(db, api) register_environments(db, api) def return_404(reason=""): json_string = json.dumps({"success": False, "reason": reason}) return json_string, 404, {"content-type": "application/json"} def generate_gateway_kernel_name(environment_uuid): return _config.KERNEL_NAME.format(environment_uuid=environment_uuid) def build_environments(environment_uuids, project_uuid): project_path = project_uuid_to_path(project_uuid) environment_build_requests = [{ "project_uuid": project_uuid, "project_path": project_path, "environment_uuid": environment_uuid, } for environment_uuid in environment_uuids] return api_proxy_environment_builds(environment_build_requests, app.config["ORCHEST_API_ADDRESS"]) def build_environments_for_project(project_uuid): environments = get_environments(project_uuid) return build_environments( [environment.uuid for environment in environments], project_uuid) def populate_default_environments(project_uuid): for env_spec in app.config["DEFAULT_ENVIRONMENTS"]: e = Environment(**env_spec) e.uuid = str(uuid.uuid4()) e.project_uuid = project_uuid environment_dir = get_environment_directory(e.uuid, project_uuid) os.makedirs(environment_dir, exist_ok=True) serialize_environment_to_disk(e, environment_dir) def pipeline_set_notebook_kernels(pipeline_json, pipeline_directory, project_uuid): # for each step set correct notebook kernel if it exists steps = pipeline_json["steps"].keys() for key in steps: step = pipeline_json["steps"][key] if "ipynb" == step["file_path"].split(".")[-1]: notebook_path = os.path.join(pipeline_directory, step["file_path"]) if os.path.isfile(notebook_path): gateway_kernel = generate_gateway_kernel_name( step["environment"]) with open(notebook_path, "r") as file: notebook_json = json.load(file) notebook_changed = False if (notebook_json["metadata"]["kernelspec"]["name"] != gateway_kernel): notebook_changed = True notebook_json["metadata"]["kernelspec"][ "name"] = gateway_kernel environment = get_environment(step["environment"], project_uuid) if environment is not None: if (notebook_json["metadata"]["kernelspec"] ["display_name"] != environment.name): notebook_changed = True notebook_json["metadata"]["kernelspec"][ "display_name"] = environment.name else: logging.warn( "Could not find environment [%s] while setting notebook kernelspec for notebook %s." % (step["environment"], notebook_path)) if notebook_changed: with open(notebook_path, "w") as file: file.write(json.dumps(notebook_json, indent=4)) else: logging.info( "pipeline_set_notebook_kernels called on notebook_path that doesn't exist %s" % notebook_path) def generate_ipynb_from_template(step, project_uuid): # TODO: support additional languages to Python and R if "python" in step["kernel"]["name"].lower(): template_json = json.load( open( os.path.join(app.config["RESOURCE_DIR"], "ipynb_template.json"), "r")) elif "julia" in step["kernel"]["name"]: template_json = json.load( open( os.path.join(app.config["RESOURCE_DIR"], "ipynb_template_julia.json"), "r", )) else: template_json = json.load( open( os.path.join(app.config["RESOURCE_DIR"], "ipynb_template_r.json"), "r", )) template_json["metadata"]["kernelspec"]["display_name"] = step[ "kernel"]["display_name"] template_json["metadata"]["kernelspec"][ "name"] = generate_gateway_kernel_name(step["environment"]) return json.dumps(template_json, indent=4) def create_pipeline_files(pipeline_json, pipeline_directory, project_uuid): # Currently, we check per step whether the file exists. # If not, we create it (empty by default). # In case the file has an .ipynb extension we generate the file from a # template with a kernel based on the kernel description in the JSON step. # Iterate over steps steps = pipeline_json["steps"].keys() for key in steps: step = pipeline_json["steps"][key] file_name = step["file_path"] full_file_path = os.path.join(pipeline_directory, file_name) file_name_split = file_name.split(".") file_name_without_ext = ".".join(file_name_split[:-1]) ext = file_name_split[-1] file_content = None if not os.path.isfile(full_file_path): if len(file_name_without_ext) > 0: file_content = "" if ext == "ipynb": file_content = generate_ipynb_from_template( step, project_uuid) elif ext == "ipynb": # check for empty .ipynb, for which we also generate a template notebook if os.stat(full_file_path).st_size == 0: file_content = generate_ipynb_from_template( step, project_uuid) if file_content is not None: with open(full_file_path, "w") as file: file.write(file_content) def create_experiment_directory(experiment_uuid, pipeline_uuid, project_uuid): def ignore_patterns(path, fnames): """ Example: path, fnames = \ 'docker/catching-error/testing', ['hello.txt', 'some-dir'] """ # Ignore the ".orchest/pipelines" directory containing the # logs and data directories. if path.endswith(".orchest"): return ["pipelines"] # Ignore nothing. return [] snapshot_path = os.path.join( get_experiment_directory(pipeline_uuid, project_uuid, experiment_uuid), "snapshot", ) os.makedirs(os.path.split(snapshot_path)[0], exist_ok=True) project_dir = os.path.join(app.config["USER_DIR"], "projects", project_uuid_to_path(project_uuid)) shutil.copytree(project_dir, snapshot_path, ignore=ignore_patterns) def remove_experiment_directory(experiment_uuid, pipeline_uuid, project_uuid): experiment_project_path = os.path.join(app.config["USER_DIR"], "experiments", project_uuid) experiment_pipeline_path = os.path.join(experiment_project_path, pipeline_uuid) experiment_path = os.path.join(experiment_pipeline_path, experiment_uuid) if os.path.isdir(experiment_path): shutil.rmtree(experiment_path) # clean up parent directory if this experiment removal created empty directories remove_dir_if_empty(experiment_pipeline_path) remove_dir_if_empty(experiment_project_path) def cleanup_project_from_orchest(project): """Cleanup a project at the orchest level. Removes references of the project in the webserver db, and issues a cleanup request to the orchest-api. Args: project: Returns: """ url = f"http://{app.config['ORCHEST_API_ADDRESS']}/api/projects/{project.uuid}" app.config["SCHEDULER"].add_job(requests.delete, args=[url]) experiments = Experiment.query.filter( Experiment.project_uuid == project.uuid).all() for ex in experiments: remove_experiment_directory(ex.uuid, ex.pipeline_uuid, ex.project_uuid) # will delete cascade # pipeline # experiment -> pipeline run db.session.delete(project) db.session.commit() def cleanup_pipeline_from_orchest(pipeline): """Cleanup a pipeline at the orchest level. Removes references of the pipeline in the webserver db, and issues a cleanup request to the orchest-api. Args: pipeline: Returns: """ url = f"http://{app.config['ORCHEST_API_ADDRESS']}/api/pipelines/{pipeline.project_uuid}/{pipeline.uuid}" app.config["SCHEDULER"].add_job(requests.delete, args=[url]) # will delete cascade # experiment -> pipeline run db.session.delete(pipeline) db.session.commit() def init_project(project_path: str) -> str: """Inits an orchest project. Given a directory it will detect what parts are missing from the .orchest directory for the project to be considered initialized, e.g. the actual .orchest directory, .gitignore file, environments directory, etc. As part of process initialization environments are built and kernels refreshed. Args: project_path: Directory of the project Returns: UUID of the newly initialized project. """ projects_dir = os.path.join(app.config["USER_DIR"], "projects") full_project_path = os.path.join(projects_dir, project_path) new_project = Project( uuid=str(uuid.uuid4()), path=project_path, ) db.session.add(new_project) db.session.commit() try: # this would actually be created as a collateral effect when populating with default environments, # let's not rely on that expected_internal_dir = os.path.join(full_project_path, ".orchest") if os.path.isfile(expected_internal_dir): raise NotADirectoryError( "The expected internal directory (.orchest) is a file.") elif not os.path.isdir(expected_internal_dir): os.makedirs(expected_internal_dir) # init the .gitignore file if it is not there already expected_git_ignore_file = os.path.join(full_project_path, ".orchest", ".gitignore") if os.path.isdir(expected_git_ignore_file): raise FileExistsError(".orchest/.gitignore is a directory") elif not os.path.isfile(expected_git_ignore_file): with open(expected_git_ignore_file, "w") as ign_file: ign_file.write( app.config["PROJECT_ORCHEST_GIT_IGNORE_CONTENT"]) # initialize with default environments only if the project has no environments directory expected_env_dir = os.path.join(full_project_path, ".orchest", "environments") if os.path.isfile(expected_env_dir): raise NotADirectoryError( "The expected environments directory (.orchest/environments) is a file." ) elif not os.path.isdir(expected_env_dir): populate_default_environments(new_project.uuid) # refresh kernels after change in environments, given that either we added the default environments # or the project has environments of its own populate_kernels(app, db) # build environments on project creation build_environments_for_project(new_project.uuid) # some calls rely on the project being in the db, like populate_default_environments or populate_kernels, # for this reason we need to commit the project to the db before the init actually finishes # if an exception is raised during project init we have to cleanup the newly added project from the db # TODO: make use of the complete cleanup of a project from orchest once that is implemented, so that we # use the same code path except Exception as e: db.session.delete(new_project) db.session.commit() populate_kernels(app, db) raise e return new_project.uuid def sync_project_pipelines_db_state(project_uuid): """Synchronizes the state of the pipelines of a project (fs/db). Synchronizes the state of the filesystem with the db when it comes to the pipelines of a project. Pipelines removed from the file system are removed, new pipelines (or pipelines that were there after, for example, a project import) are registered in the db. Args: project_uuid: Raises: FileNotFoundError: If the project directory is not found. """ project_path = project_uuid_to_path(project_uuid) project_dir = os.path.join(app.config["USER_DIR"], "projects", project_path) if not os.path.isdir(project_dir): raise FileNotFoundError("Project directory not found") # find all pipelines in project dir pipeline_paths = find_pipelines_in_dir(project_dir, project_dir) # cleanup pipelines that have been manually removed fs_removed_pipelines = [ pipeline for pipeline in Pipeline.query.filter( Pipeline.path.notin_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid).all() ] for fs_removed_pipeline in fs_removed_pipelines: cleanup_pipeline_from_orchest(fs_removed_pipeline) # identify all pipeline paths that are not yet a pipeline existing_pipeline_paths = [ pipeline.path for pipeline in Pipeline.query.filter( Pipeline.path.in_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid).all() ] # TODO: handle existing pipeline assignments new_pipeline_paths = set(pipeline_paths) - set(existing_pipeline_paths) for new_pipeline_path in new_pipeline_paths: # write pipeline uuid to file pipeline_json_path = get_pipeline_path( None, project_uuid, pipeline_path=new_pipeline_path) try: with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) file_pipeline_uuid = pipeline_json.get("uuid") new_pipeline_uuid = file_pipeline_uuid # see if pipeline_uuid is taken if (Pipeline.query.filter( Pipeline.uuid == file_pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).count() > 0 or len(file_pipeline_uuid) == 0): new_pipeline_uuid = str(uuid.uuid4()) with open(pipeline_json_path, "w") as json_file: pipeline_json["uuid"] = new_pipeline_uuid json_file.write(json.dumps(pipeline_json, indent=4)) # only commit if writing succeeds new_pipeline = Pipeline( uuid=new_pipeline_uuid, path=new_pipeline_path, project_uuid=project_uuid, ) db.session.add(new_pipeline) db.session.commit() except Exception as e: logging.info(e) @app.route("/", methods=["GET"]) def index(): js_bundle_path = os.path.join(app.config["STATIC_DIR"], "js", "dist", "main.bundle.js") css_bundle_path = os.path.join(app.config["STATIC_DIR"], "css", "dist", "main.css") front_end_config = [ "FLASK_ENV", "TELEMETRY_DISABLED", "ENVIRONMENT_DEFAULTS", "ORCHEST_WEB_URLS", ] front_end_config_internal = ["ORCHEST_SOCKETIO_ENV_BUILDING_NAMESPACE"] return render_template( "index.html", javascript_bundle_hash=get_hash(js_bundle_path), css_bundle_hash=get_hash(css_bundle_path), user_config=get_user_conf(), config_json=flask_json.htmlsafe_dumps({ **{key: app.config[key] for key in front_end_config}, **{ key: getattr(_config, key) for key in front_end_config_internal }, }), ) @app.route("/async/spawn-update-server", methods=["GET"]) def spawn_update_server(): client = docker.from_env() run_orchest_ctl(client, ["updateserver"]) return "" @app.route("/heartbeat", methods=["GET"]) def heartbeat(): return "" @app.route("/async/restart", methods=["POST"]) def restart_server(): client = docker.from_env() if request.args.get("mode") == "dev": run_orchest_ctl(client, ["restart", "--mode=dev"]) else: run_orchest_ctl(client, ["restart"]) return "" @app.route("/async/version", methods=["GET"]) def version(): return get_repo_tag() @app.route("/async/user-config", methods=["GET", "POST"]) def user_config(): if request.method == "POST": config = request.form.get("config") try: # only save if parseable JSON json.loads(config) save_user_conf_raw(config) except json.JSONDecodeError as e: logging.debug(e) return "" else: return get_user_conf_raw() @app.route("/async/pipelines/delete/<project_uuid>/<pipeline_uuid>", methods=["DELETE"]) def pipelines_delete(project_uuid, pipeline_uuid): pipeline = (Pipeline.query.filter( Pipeline.uuid == pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).one_or_none()) if pipeline is not None: pipeline_json_path = get_pipeline_path(pipeline.uuid, project_uuid) os.remove(pipeline_json_path) cleanup_pipeline_from_orchest(pipeline) return jsonify({"success": True}) else: return jsonify({"message": "Pipeline could not be found."}), 404 @app.route("/async/pipelineruns/create", methods=["POST"]) def pipelineruns_create(): experiment_uuid = request.json["experiment_uuid"] # Convert a list like [0, 1, 0, 1] to [1, 3]. selected_indices = [ i for i, val in enumerate(request.json["selected_indices"]) if val == 1 ] # A list of all the generated runs (even the ones that are not # selected). The values of the `selected_indices` correspond to # the selected run. generated_runs = request.json["generated_pipeline_runs"] for i, idx in enumerate(selected_indices): # NOTE: the order of the `pipeline_runs` property # corresponds to the order of the `selected_indices`. pipeline_run = request.json["experiment_json"]["pipeline_runs"][i] pr = PipelineRun( uuid=pipeline_run["run_uuid"], experiment=experiment_uuid, parameter_json=generated_runs[idx], id=pipeline_run["pipeline_run_id"], ) db.session.add(pr) db.session.commit() return jsonify({"success": True}) @app.route("/async/pipelines/create/<project_uuid>", methods=["POST"]) def pipelines_create(project_uuid): pipeline_path = request.json["pipeline_path"] if (Pipeline.query.filter( Pipeline.project_uuid == project_uuid).filter( Pipeline.path == pipeline_path).count() == 0): pipeline_uuid = str(uuid.uuid4()) pipeline = Pipeline(path=pipeline_path, uuid=pipeline_uuid, project_uuid=project_uuid) db.session.add(pipeline) db.session.commit() pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid) os.makedirs(pipeline_dir, exist_ok=True) # generate clean pipeline.json pipeline_json = { "name": request.json["name"], "version": "1.0.0", "uuid": pipeline_uuid, "settings": { "auto_eviction": False, "data_passing_memory_size": "1GB", }, "steps": {}, } with open(pipeline_json_path, "w") as pipeline_json_file: pipeline_json_file.write(json.dumps(pipeline_json, indent=4)) return jsonify({"success": True}) else: return ( jsonify({ "message": "Pipeline already exists at path '%s'." % pipeline_path }), 409, ) class ImportGitProjectListResource(Resource): def post(self): n_uuid = str(uuid.uuid4()) new_task = BackgroundTask(task_uuid=n_uuid, task_type="GIT_CLONE_PROJECT", status="PENDING") db.session.add(new_task) db.session.commit() # start the background process in charge of cloning file_dir = os.path.dirname(os.path.realpath(__file__)) args = [ "python3", "-m", "scripts.background_tasks", "--type", "git_clone_project", "--uuid", n_uuid, "--url", request.json["url"], ] project_name = request.json.get("project_name", None) if project_name: args.append("--path") args.append(str(project_name)) background_task_process = Popen( args, cwd=os.path.join(file_dir, "../.."), stderr=subprocess.STDOUT, ) return background_task_schema.dump(new_task) api.add_resource(ImportGitProjectListResource, "/async/projects/import-git") @app.route("/async/projects", methods=["GET", "POST", "DELETE"]) def projects(): projects_dir = os.path.join(app.config["USER_DIR"], "projects") project_paths = [ name for name in os.listdir(projects_dir) if os.path.isdir(os.path.join(projects_dir, name)) ] # look for projects that have been removed through the filesystem by the user, cleanup # dangling resources fs_removed_projects = Project.query.filter( Project.path.notin_(project_paths)).all() for fs_removed_project in fs_removed_projects: cleanup_project_from_orchest(fs_removed_project) if len(fs_removed_projects) > 0: # refresh kernels after change in environments populate_kernels(app, db) # detect new projects by detecting directories that were not registered in the db as projects existing_project_paths = [ project.path for project in Project.query.filter( Project.path.in_(project_paths)).all() ] new_project_paths = set(project_paths) - set(existing_project_paths) for new_project_path in new_project_paths: try: init_project(new_project_path) except Exception as e: logging.error( f"Error during project initialization of {new_project_path}: {e}" ) if request.method == "GET": projects = projects_schema.dump(Project.query.all()) # Get counts for: pipelines, experiments and environments for project in projects: # catch both pipelines of newly initialized projects # and manually initialized pipelines of existing # projects sync_project_pipelines_db_state(project["uuid"]) project["pipeline_count"] = Pipeline.query.filter( Pipeline.project_uuid == project["uuid"]).count() project["experiment_count"] = Experiment.query.filter( Experiment.project_uuid == project["uuid"]).count() project["environment_count"] = len( get_environments(project["uuid"])) return jsonify(projects) elif request.method == "DELETE": project_uuid = request.json["project_uuid"] project = Project.query.filter( Project.uuid == project_uuid).first() if project != None: project_path = project_uuid_to_path(project_uuid) full_project_path = os.path.join(projects_dir, project_path) shutil.rmtree(full_project_path) cleanup_project_from_orchest(project) # refresh kernels after change in environments populate_kernels(app, db) return jsonify({"message": "Project deleted."}) else: return ( jsonify({ "message": "Project not found for UUID %s." % project_uuid }), 404, ) elif request.method == "POST": project_path = request.json["name"] if project_path not in project_paths: full_project_path = os.path.join(projects_dir, project_path) if not os.path.isdir(full_project_path): os.makedirs(full_project_path) # note that given the current pattern we have in the # GUI, where we POST and then GET projects, # this line does not strictly need to be there, # since the new directory will be picked up # on the GET request and initialized, placing it # here is more explicit and less relying # on the POST->GET pattern from the GUI try: init_project(project_path) except Exception as e: return ( jsonify({ "message": "Failed to create the project. Error: %s" % e }), 500, ) else: return ( jsonify( {"message": "Project directory already exists."}), 409, ) else: return ( jsonify({"message": "Project name already exists."}), 409, ) return jsonify({"message": "Project created."}) @app.route("/async/pipelines/<project_uuid>/<pipeline_uuid>", methods=["GET"]) def pipeline_get(project_uuid, pipeline_uuid): pipeline = (Pipeline.query.filter( Pipeline.project_uuid == project_uuid).filter( Pipeline.uuid == pipeline_uuid).first()) if pipeline is None: return jsonify({"message": "Pipeline doesn't exist."}), 404 else: return jsonify(pipeline_schema.dump(pipeline)) @app.route("/async/pipelines/<project_uuid>", methods=["GET"]) def pipelines_get(project_uuid): try: sync_project_pipelines_db_state(project_uuid) except Exception as e: return jsonify({"message": str(e)}), 500 pipelines = Pipeline.query.filter( Pipeline.project_uuid == project_uuid).all() pipelines_augmented = [] for pipeline in pipelines: pipeline_json_path = get_pipeline_path(pipeline.uuid, pipeline.project_uuid) pipeline_augmented = { "uuid": pipeline.uuid, "path": pipeline.path, } if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) pipeline_augmented["name"] = pipeline_json["name"] else: pipeline_augmented[ "name"] = "Warning: pipeline file was not found." pipelines_augmented.append(pipeline_augmented) json_string = json.dumps({ "success": True, "result": pipelines_augmented }) return json_string, 200, {"content-type": "application/json"} @app.route( "/async/file-viewer/<project_uuid>/<pipeline_uuid>/<step_uuid>", methods=["GET"], ) def file_viewer(project_uuid, pipeline_uuid, step_uuid): experiment_uuid = request.args.get("experiment_uuid") pipeline_run_uuid = request.args.get("pipeline_run_uuid") pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid, experiment_uuid, pipeline_run_uuid) pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid, experiment_uuid, pipeline_run_uuid) if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) try: file_path = os.path.join( pipeline_dir, pipeline_json["steps"][step_uuid]["file_path"]) filename = pipeline_json["steps"][step_uuid]["file_path"] step_title = pipeline_json["steps"][step_uuid]["title"] except Exception as e: logging.info(e) return return_404("Invalid JSON for pipeline %s error: %e" % (pipeline_json_path, e)) else: return return_404("Could not find pipeline.json for pipeline %s" % pipeline_json_path) file_ext = file_path.split(".")[-1] file_content = "" if file_ext == "ipynb": if os.path.isfile(file_path): try: html_exporter = HTMLExporter() (file_content, _) = html_exporter.from_filename(file_path) except IOError as error: logging.info("Error opening notebook file %s error: %s" % (file_path, error)) return return_404("Could not find notebook file %s" % file_path) else: try: with open(file_path) as file: file_content = file.read() except (IOError, Exception) as e: return jsonify({"message": "Could not read file."}), 500 return jsonify({ "ext": file_ext, "content": file_content, "step_title": step_title, "filename": filename, }) @app.route("/async/pipelines/json/<project_uuid>/<pipeline_uuid>", methods=["GET", "POST"]) def pipelines_json(project_uuid, pipeline_uuid): pipeline_json_path = get_pipeline_path( pipeline_uuid, project_uuid, request.args.get("experiment_uuid"), request.args.get("pipeline_run_uuid"), ) if request.method == "POST": pipeline_directory = get_pipeline_directory( pipeline_uuid, project_uuid, request.args.get("experiment_uuid"), request.args.get("pipeline_run_uuid"), ) # parse JSON pipeline_json = json.loads(request.form.get("pipeline_json")) # first create all files part of pipeline_json definition # TODO: consider removing other files (no way to do this reliably, # special case might be rename) create_pipeline_files(pipeline_json, pipeline_directory, project_uuid) # side effect: for each Notebook in de pipeline.json set the correct kernel pipeline_set_notebook_kernels(pipeline_json, pipeline_directory, project_uuid) with open(pipeline_json_path, "w") as json_file: json_file.write(json.dumps(pipeline_json, indent=4)) # Analytics call send_anonymized_pipeline_definition(app, pipeline_json) return jsonify({"message": "Successfully saved pipeline."}) elif request.method == "GET": if not os.path.isfile(pipeline_json_path): return ( jsonify({ "success": False, "reason": ".orchest file doesn't exist at location %s" % pipeline_json_path, }), 404, ) else: with open(pipeline_json_path) as json_file: return jsonify({ "success": True, "pipeline_json": json_file.read() }) return "" @app.route( "/async/file-picker-tree/pipeline-cwd/<project_uuid>/<pipeline_uuid>", methods=["GET"], ) def pipeline_cwd(project_uuid, pipeline_uuid): pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) project_dir = get_project_directory(project_uuid) cwd = pipeline_dir.replace(project_dir, "") return jsonify({"cwd": cwd}) @app.route("/async/file-picker-tree/<project_uuid>", methods=["GET"]) def get_file_picker_tree(project_uuid): allowed_file_extensions = ["ipynb", "R", "py", "sh"] project_dir = get_project_directory(project_uuid) if not os.path.isdir(project_dir): return jsonify( {"message": "Project dir %s not found." % project_dir}), 404 tree = {"type": "directory", "root": True, "name": "/", "children": []} dir_nodes = {} dir_nodes[project_dir] = tree for root, dirs, files in os.walk(project_dir): # exclude directories that start with "." from file_picker dirs[:] = [ dirname for dirname in dirs if not dirname.startswith(".") ] for dirname in dirs: dir_path = os.path.join(root, dirname) dir_node = { "type": "directory", "name": dirname, "children": [], } dir_nodes[dir_path] = dir_node dir_nodes[root]["children"].append(dir_node) for filename in files: if filename.split(".")[-1] in allowed_file_extensions: file_node = { "type": "file", "name": filename, } # this key should always exist try: dir_nodes[root]["children"].append(file_node) except KeyError as e: logging.error( "Key %s does not exist in dir_nodes %s. Error: %s" % (root, dir_nodes, e)) except Exception as e: logging.error("Error: %e" % e) return jsonify(tree) @app.route("/async/project-files/create/<project_uuid>", methods=["POST"]) def create_project_file(project_uuid): """Create project file in specified directory within project.""" project_dir = get_project_directory(project_uuid) # Client sends absolute path relative to project root, hence starting / # is removed. file_path = os.path.join(project_dir, request.json["file_path"][1:]) if os.path.isfile(file_path): return jsonify({"message": "File already exists."}), 409 try: open(file_path, "a").close() return jsonify({"message": "File created."}) except IOError as e: logging.error("Could not create file at %s. Error: %s" % (file_path, e)) @app.route("/async/project-files/exists/<project_uuid>/<pipeline_uuid>", methods=["POST"]) def project_file_exists(project_uuid, pipeline_uuid): """Check whether file exists""" pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) file_path = os.path.join(pipeline_dir, request.json["relative_path"]) if os.path.isfile(file_path): return jsonify({"message": "File exists."}) else: return jsonify({"message": "File does not exists."}), 404
def register_views(app, db): errors = {} api = Api(app, errors=errors) projects_schema = ProjectSchema(many=True) environment_schema = EnvironmentSchema() environments_schema = EnvironmentSchema(many=True) background_task_schema = BackgroundTaskSchema() def register_environments(db, api): class EnvironmentsResource(Resource): def get(self, project_uuid): if project_exists(project_uuid): return {"message": "Project could not be found."}, 404 return environments_schema.dump( get_environments(project_uuid, language=request.args.get("language"))) class EnvironmentResource(Resource): def put(self, project_uuid, environment_uuid): return self.post(project_uuid, environment_uuid) def get(self, project_uuid, environment_uuid): environment = get_environment(environment_uuid, project_uuid) if environment is None: return {"message": "Environment could not be found."}, 404 else: return environment_schema.dump(environment) def delete(self, project_uuid, environment_uuid): delete_environment(app, project_uuid, environment_uuid) # refresh kernels after change in environments populate_kernels(app, db, project_uuid) return jsonify( {"message": "Environment deletion was successful."}) def post(self, project_uuid, environment_uuid): # create a new environment in the project environment_json = request.json.get("environment") e = Environment( uuid=str(uuid.uuid4()), name=environment_json["name"], project_uuid=project_uuid, language=environment_json["language"], setup_script=preprocess_script( environment_json["setup_script"]), base_image=environment_json["base_image"], gpu_support=environment_json["gpu_support"], ) # use specified uuid if it's not keyword 'new' if environment_uuid != "new": e.uuid = environment_uuid else: url = (f'http://{app.config["ORCHEST_API_ADDRESS"]}' f"/api/environments/{project_uuid}") resp = requests.post(url, json={"uuid": e.uuid}) if resp.status_code != 201: return {}, resp.status_code, resp.headers.items() environment_dir = get_environment_directory( e.uuid, project_uuid) os.makedirs(environment_dir, exist_ok=True) serialize_environment_to_disk(e, environment_dir) # refresh kernels after change in environments populate_kernels(app, db, project_uuid) return environment_schema.dump(e) api.add_resource(EnvironmentsResource, "/store/environments/<string:project_uuid>") api.add_resource( EnvironmentResource, "/store/environments/<string:project_uuid>/<string:environment_uuid>", ) register_environments(db, api) def return_404(reason=""): json_string = json.dumps({"success": False, "reason": reason}) return json_string, 404, {"content-type": "application/json"} @app.route("/async/orchest-examples", methods=["GET"]) def orchest_examples(): return get_orchest_examples_json() @app.route("/async/orchest-update-info", methods=["GET"]) def orchest_update_info(): return get_orchest_update_info_json() @app.route("/async/server-config", methods=["GET"]) def server_config(): front_end_config = [ "FLASK_ENV", "TELEMETRY_DISABLED", "ENVIRONMENT_DEFAULTS", "ORCHEST_WEB_URLS", "CLOUD", "GPU_ENABLED_INSTANCE", "CLOUD_UNMODIFIABLE_CONFIG_VALUES", "INTERCOM_APP_ID", "INTERCOM_DEFAULT_SIGNUP_DATE", ] front_end_config_internal = [ "ORCHEST_SOCKETIO_ENV_IMG_BUILDING_NAMESPACE", "ORCHEST_SOCKETIO_JUPYTER_IMG_BUILDING_NAMESPACE", "PIPELINE_PARAMETERS_RESERVED_KEY", ] user_config = requests.get( f'http://{app.config["ORCHEST_API_ADDRESS"]}/api/ctl/orchest-settings' ).json() return jsonify({ "user_config": user_config, "config": { **{key: app.config[key] for key in front_end_config}, **{ key: getattr(_config, key) for key in front_end_config_internal }, }, }) @app.route("/async/restart", methods=["POST"]) def restart(): resp = requests.post( f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/ctl/restart' ) return resp.content, resp.status_code, resp.headers.items() @app.route("/async/start-update", methods=["POST"]) def start_update(): resp = requests.post( f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/ctl' "/start-update") return resp.content, resp.status_code, resp.headers.items() @app.route("/heartbeat", methods=["GET"]) def heartbeat(): # Don't bubble up the fact that the heartbeat is proxied to the # orchest-api to the client. requests.get( (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/info/' "client-heartbeat")) return "" @app.route("/async/version", methods=["GET"]) def version(): return {"version": get_repo_tag()} @app.route("/async/user-config", methods=["GET", "POST"]) def user_config(): current_config = requests.get( f'http://{app.config["ORCHEST_API_ADDRESS"]}/api/ctl/orchest-settings' ).json() if request.method == "GET": return { "user_config": current_config, } if request.method == "POST": # Updated config, from client. config = request.form.get("config") if config is None: return {"message": "No config was given."}, 400 try: # Only save if parseable JSON. config = json.loads(config) except json.JSONDecodeError as e: app.logger.debug(e, exc_info=True) return {"message": "Given config is invalid JSON."}, 400 resp = requests.post( f'http://{app.config["ORCHEST_API_ADDRESS"]}/api/ctl/orchest-settings', json=config, ) return resp.content, resp.status_code, resp.headers.items() @app.route("/async/host-info", methods=["GET"]) def host_info(): disk_info = subprocess.getoutput( "df -BKB /userdir --output=size,avail,itotal,fstype | sed -n '2{p;q}'" ) disk_info # Incoming data is in kB (-BKB) size, avail, total_inodes, fstype = disk_info.strip().split() # Remove the "B" total_inodes = int(total_inodes[:-1]) # ext4. inode_size = { "ext4": 256, "small": 128, "floppy": 128, "hurd": 128, }.get(fstype, 256) total_inodes_size = total_inodes * inode_size * 1e-9 # Remove the "kB" avail = int(avail[:-2]) * 1e-6 # Account for the 5% reserved root space, so that used + avail # add up to the total disk size the user would see in a file # explorer. Moreover, account for inodes. total = int(size[:-2]) * 1e-6 + total_inodes_size used = total - avail host_info = { "disk_info": { "used_GB": int(used), "avail_GB": int(avail), "used_pcent": (used / total) * 100, } } return host_info @app.route("/async/jupyter-setup-script", methods=["GET", "POST"]) def jupyter_setup_script(): setup_script_path = os.path.join(app.config["USER_DIR"], _config.JUPYTER_SETUP_SCRIPT) if request.method == "POST": setup_script = request.form.get("setup_script") try: with open(setup_script_path, "w") as f: f.write(preprocess_script(setup_script)) except IOError as io_error: current_app.logger.error("Failed to write setup_script %s" % io_error) return "" else: try: with open(setup_script_path, "r") as f: script = f.read() return jsonify({"script": script if script else ""}) except FileNotFoundError as fnf_error: current_app.logger.error( f"Failed to read setup_script {fnf_error}") return "" # Deprecated: With the new FileManager, this endpoint is no longer # used by FE. @app.route("/async/pipelines/delete/<project_uuid>/<pipeline_uuid>", methods=["DELETE"]) def pipelines_delete(project_uuid, pipeline_uuid): try: with TwoPhaseExecutor(db.session) as tpe: DeletePipeline(tpe).transaction(project_uuid, pipeline_uuid) except Exception as e: return {"message": str(e)}, 500 return jsonify({"success": True}) @app.route("/async/pipelines/create/<project_uuid>", methods=["POST"]) def pipelines_create(project_uuid): pipeline_path = request.json["pipeline_path"] pipeline_name = request.json["name"] try: with TwoPhaseExecutor(db.session) as tpe: pipeline_uuid = CreatePipeline(tpe).transaction( project_uuid, pipeline_name, pipeline_path) return jsonify({"pipeline_uuid": pipeline_uuid}) except FileExistsError: return ( jsonify({ "message": "A pipeline with the given path already exists." }), 400, ) except Exception as e: return jsonify({"message": str(e)}), 409 class ImportGitProjectListResource(Resource): def post(self): try: with TwoPhaseExecutor(db.session) as tpe: task = ImportGitProject(tpe).transaction( request.json["url"], request.json.get("project_name")) except Exception as e: return jsonify({"message": str(e)}), 500 return background_task_schema.dump(task) api.add_resource(ImportGitProjectListResource, "/async/projects/import-git") @app.route("/async/projects/<project_uuid>", methods=["GET"]) def project_get(project_uuid): project = Project.query.filter(Project.uuid == project_uuid).first() if project is None: return jsonify({"message": "Project doesn't exist."}), 404 resp = requests.get( (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}' f"/api/projects/{project_uuid}")) if resp.status_code == 404: return ( jsonify( {"message": "Project doesn't exist in the orchest-api."}), 404, ) elif resp.status_code != 200: return ( jsonify({"message": "Orchest-api project retrieval failed."}), resp.status_code, ) else: # Merge the project data coming from the orchest-api. counts = project_entity_counts(project_uuid, get_job_count=True) project = { **project.as_dict(), **resp.json(), **counts, "project_snapshot_size": get_project_snapshot_size(project_uuid), } return jsonify(project) @app.route("/async/projects/<project_uuid>", methods=["PUT"]) def project_put(project_uuid): # Move the project on the FS and update the db. new_name = request.json.get("name") if new_name is not None: try: with TwoPhaseExecutor(db.session) as tpe: RenameProject(tpe).transaction(project_uuid, new_name) except app_error.ActiveSession: return ( jsonify({ "message": "Can't rename a project with active sessions.", # TODO: we need a standardized way of # communicating with the frontend. "code": 0, }), 409, ) except sqlalchemy.exc.IntegrityError: return ( jsonify({ "message": "A project with this name already exists.", "code": 1, }), 409, ) except app_error.InvalidProjectName: return ( jsonify({ "message": "Invalid project name.", "code": 2, }), 400, ) except OSError as e: if e.errno == 39: return ( jsonify({ "message": "Directory exists.", "code": 3, }), 409, ) # else raise e except NoResultFound: return jsonify({"message": "Project doesn't exist."}), 404 except Exception as e: return ( jsonify({ "message": f"Failed to rename project: {e}. {type(e)}" }), 500, ) resp = requests.put( (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}' f"/api/projects/{project_uuid}"), json=request.json, ) return resp.content, resp.status_code, resp.headers.items() @app.route("/async/projects", methods=["GET"]) def projects_get(): if request.args.get("skip_discovery") != "true": discoverFSDeletedProjects() discoverFSCreatedProjects() # Projects that are in a INITIALIZING or DELETING state won't # be shown until ready. projects = projects_schema.dump( Project.query.filter_by(status="READY").all()) if request.args.get("session_counts") == "true": session_counts = get_session_counts() if request.args.get("job_counts") == "true": job_counts = get_job_counts() for project in projects: # Discover both pipelines of newly initialized projects and # manually initialized pipelines of existing projects. Use a # a TwoPhaseExecutor for each project so that issues in one # project do not hinder the pipeline synchronization of # others. if request.args.get("skip_discovery") != "true": try: with TwoPhaseExecutor(db.session) as tpe: SyncProjectPipelinesDBState(tpe).transaction( project["uuid"]) except Exception as e: current_app.logger.error( ("Error during project pipelines synchronization of " f'{project["path"]}: {e}.')) counts = project_entity_counts(project["uuid"]) project.update(counts) if request.args.get("session_counts") == "true": project.update( {"session_count": session_counts.get(project["uuid"], 0)}) if request.args.get("job_counts") == "true": project.update( {"job_count": job_counts.get(project["uuid"], 0)}) return jsonify(projects) @app.route("/async/projects", methods=["POST"]) def projects_post(): try: with TwoPhaseExecutor(db.session) as tpe: project_uuid = CreateProject(tpe).transaction( request.json["name"]) return jsonify({"project_uuid": project_uuid}) except app_error.InvalidProjectName as e: return ( jsonify({"message": str(e)}), 400, ) except Exception as e: # The sql integrity error message can be quite ugly. if isinstance(e, sqlalchemy.exc.IntegrityError): msg = f'Project "{request.json["name"]}" already exists.' else: msg = str(e) return ( jsonify({"message": msg}), 500, ) @app.route("/async/projects", methods=["DELETE"]) def projects_delete(): try: with TwoPhaseExecutor(db.session) as tpe: DeleteProject(tpe).transaction(request.json["project_uuid"]) except Exception as e: return ( jsonify( {"message": f"Failed to delete the project. Error: {e}"}), 500, ) return jsonify({"message": "Project deleted."}) @app.route("/async/pipelines/<project_uuid>/<pipeline_uuid>", methods=["GET"]) def pipeline_get(project_uuid, pipeline_uuid): pipeline = (Pipeline.query.filter( Pipeline.project_uuid == project_uuid).filter( Pipeline.uuid == pipeline_uuid).first()) if pipeline is None: return jsonify({"message": "Pipeline doesn't exist."}), 404 resp = requests.get( (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}' f"/api/pipelines/{project_uuid}/{pipeline_uuid}")) if resp.status_code == 404: return ( jsonify( {"message": "Pipeline doesn't exist in the orchest-api."}), 404, ) elif resp.status_code != 200: return ( jsonify({"message": "Orchest-api pipeline retrieval failed."}), resp.status_code, ) else: # Merge the pipeline data coming from the orchest-api. pipeline = {**pipeline.as_dict(), **resp.json()} return jsonify(pipeline) @app.route("/async/pipelines/<project_uuid>/<pipeline_uuid>", methods=["PUT"]) def pipeline_put(project_uuid, pipeline_uuid): path = request.json.get("path") if path is not None: try: with TwoPhaseExecutor(db.session) as tpe: MovePipeline(tpe).transaction(project_uuid, pipeline_uuid, path) except app_error.ActiveSession: return ( jsonify({ "message": "Can't move a pipeline with active sessions.", "code": 1, }), 409, ) except app_error.PipelineFileExists: return ( jsonify({ "message": "File exists.", "code": 2 }), 409, ) except NoResultFound: return jsonify({ "message": "Pipeline doesn't exist.", "code": 3 }), 404 except ValueError: return jsonify({ "message": "Invalid file name.", "code": 4 }), 409 except app_error.PipelineFileDoesNotExist: return ( jsonify({ "message": "Pipeline file doesn't exist.", "code": 5 }), 409, ) except app_error.OutOfProjectError: return ( jsonify({ "message": "Can't move outside of the project.", "code": 6 }), 409, ) except Exception as e: return ( jsonify({ "message": f"Failed to move pipeline: {e}.", "code": 0 }), 500, ) resp = requests.put( (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}' f"/api/pipelines/{project_uuid}/{pipeline_uuid}"), json=request.json, ) return resp.content, resp.status_code, resp.headers.items() @app.route("/async/pipelines/<project_uuid>", methods=["GET"]) def pipelines_get(project_uuid): if project_exists(project_uuid): return jsonify({"message": "Project could not be found."}), 404 try: with TwoPhaseExecutor(db.session) as tpe: SyncProjectPipelinesDBState(tpe).transaction(project_uuid) except Exception as e: msg = ("Error during project pipelines synchronization of " f"{project_uuid}: {str(e)}.") return jsonify({"message": msg}), 500 pipelines = Pipeline.query.filter( Pipeline.project_uuid == project_uuid).all() pipelines_augmented = [] for pipeline in pipelines: pipeline_augmented = { "uuid": pipeline.uuid, "path": pipeline.path, } pipeline_json = get_pipeline_json(pipeline.uuid, pipeline.project_uuid) if pipeline_json is not None: pipeline_augmented["name"] = pipeline_json["name"] else: pipeline_augmented[ "name"] = "Warning: pipeline file was not found." pipelines_augmented.append(pipeline_augmented) json_string = json.dumps({ "success": True, "result": pipelines_augmented }) return json_string, 200, {"content-type": "application/json"} @app.route("/async/pipelines", methods=["GET"]) def pipelines_get_all(): pipelines = Pipeline.query.all() pipelines_augmented = [] for pipeline in pipelines: pipeline_augmented = { "uuid": pipeline.uuid, "path": pipeline.path, "project_uuid": pipeline.project_uuid, } pipeline_json = get_pipeline_json(pipeline.uuid, pipeline.project_uuid) if pipeline_json is not None: pipeline_augmented["name"] = pipeline_json["name"] else: pipeline_augmented[ "name"] = "Warning: pipeline file was not found." pipelines_augmented.append(pipeline_augmented) json_string = json.dumps({ "success": True, "result": pipelines_augmented }) return json_string, 200, {"content-type": "application/json"} @app.route( "/async/file-viewer/<project_uuid>/<pipeline_uuid>/<step_uuid>", methods=["GET"], ) def file_viewer(project_uuid, pipeline_uuid, step_uuid): job_uuid = request.args.get("job_uuid") pipeline_run_uuid = request.args.get("pipeline_run_uuid") pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid, job_uuid, pipeline_run_uuid) pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid, job_uuid, pipeline_run_uuid) if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) try: step_file_path = pipeline_json["steps"][step_uuid]["file_path"] if not is_valid_pipeline_relative_path( project_uuid, pipeline_uuid, step_file_path): raise app_error.OutOfProjectError( "Step path points outside of the project directory.") if step_file_path.startswith("/"): file_path = resolve_absolute_path(step_file_path) else: file_path = safe_join(pipeline_dir, step_file_path) filename = pipeline_json["steps"][step_uuid]["file_path"] step_title = pipeline_json["steps"][step_uuid]["title"] except Exception as e: app.logger.info(e) return return_404("Invalid JSON for pipeline %s error: %e" % (pipeline_json_path, e)) else: return return_404("Could not find pipeline.json for pipeline %s" % pipeline_json_path) file_ext = file_path.split(".")[-1] file_content = "" if file_ext == "ipynb": if os.path.isfile(file_path): try: html_exporter = HTMLExporter() (file_content, _) = html_exporter.from_filename(file_path) # custom CSS custom_style = "<style>.CodeMirror pre {overflow: auto}</style>" file_content = file_content.replace( "</head>", custom_style + "</head>", 1) except IOError as error: app.logger.info( "Error opening notebook file %s error: %s" % (file_path, error)) return return_404( ("Could not find notebook file %s") % file_path) else: try: with open(file_path) as file: file_content = file.read() except (IOError, Exception): return jsonify({"message": "Could not read file."}), 500 return jsonify({ "ext": file_ext, "content": file_content, "step_title": step_title, "filename": filename, }) @app.route("/async/pipelines/json/<project_uuid>/<pipeline_uuid>", methods=["GET", "POST"]) def pipelines_json(project_uuid, pipeline_uuid): if request.method == "POST": pipeline_json_path = get_pipeline_path( pipeline_uuid, project_uuid, None, request.args.get("pipeline_run_uuid"), ) pipeline_directory = get_pipeline_directory( pipeline_uuid, project_uuid, None, request.args.get("pipeline_run_uuid"), ) # Parse JSON. pipeline_json = json.loads(request.form.get("pipeline_json")) # Normalize relative paths. for step in pipeline_json["steps"].values(): is_project_file = is_valid_pipeline_relative_path( project_uuid, pipeline_uuid, step["file_path"]) is_data_file = is_valid_data_path(step["file_path"]) if not (is_project_file or is_data_file): raise app_error.OutOfAllowedDirectoryError( "File is neither in the project, nor in the data directory." ) if not step["file_path"].startswith("/"): step["file_path"] = normalize_project_relative_path( step["file_path"]) errors = check_pipeline_correctness(pipeline_json) if errors: msg = {} msg = {"success": False} reason = ", ".join([key for key in errors]) reason = f"Invalid value: {reason}." msg["reason"] = reason return jsonify(msg), 400 # Side effect: for each Notebook in de pipeline.json set the # correct kernel. try: pipeline_set_notebook_kernels(pipeline_json, pipeline_directory, project_uuid) except KeyError: msg = { "success": False, "reason": "Invalid Notebook metadata structure.", } return jsonify(msg), 400 with open(pipeline_json_path, "r") as json_file: old_pipeline_json = json.load(json_file) # Save the pipeline JSON again to make sure its keys are # sorted. with open(pipeline_json_path, "w") as json_file: json.dump(pipeline_json, json_file, indent=4, sort_keys=True) if old_pipeline_json["name"] != pipeline_json["name"]: resp = requests.put( (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}' f"/api/pipelines/{project_uuid}/{pipeline_uuid}"), json={"name": pipeline_json["name"]}, ) if resp.status_code != 200: return ( jsonify( {"message": "Failed to PUT name to orchest-api."}), resp.status_code, ) # Analytics call. analytics.send_event( app, analytics.Event.PIPELINE_SAVE, {"pipeline_definition": pipeline_json}, ) return jsonify({ "success": True, "message": "Successfully saved pipeline." }) elif request.method == "GET": pipeline_json_path = get_pipeline_path( pipeline_uuid, project_uuid, request.args.get("job_uuid"), request.args.get("pipeline_run_uuid"), ) if not os.path.isfile(pipeline_json_path): return ( jsonify({ "success": False, "reason": ".orchest file doesn't exist at location " + pipeline_json_path, }), 404, ) else: pipeline_json = get_pipeline_json(pipeline_uuid, project_uuid) return jsonify({ "success": True, "pipeline_json": json.dumps(pipeline_json) }) @app.route( "/async/file-picker-tree/pipeline-cwd/<project_uuid>/<pipeline_uuid>", methods=["GET"], ) def pipeline_cwd(project_uuid, pipeline_uuid): pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) project_dir = get_project_directory(project_uuid) cwd = pipeline_dir.replace(project_dir, "") return jsonify({"cwd": cwd}) @app.route("/async/file-management/create", methods=["POST"]) def filemanager_create(): """ Create an empty file with the given path within `/project-dir` or `/data`. """ root = request.args.get("root") path = request.args.get("path") project_uuid = request.args.get("project_uuid") try: root_dir_path, _ = process_request(root=root, path=path, project_uuid=project_uuid) except Exception as e: return jsonify({"message": str(e)}), 400 file_path = safe_join(root_dir_path, path[1:]) if not file_path.split(".")[-1] in _config.ALLOWED_FILE_EXTENSIONS: return jsonify({"message": "Given file type is not supported."}), 409 directory, _ = os.path.split(file_path) if directory: os.makedirs(directory, exist_ok=True) if os.path.isfile(file_path): return jsonify({"message": "File already exists."}), 409 try: create_empty_file(file_path) return jsonify({"message": "File created."}) except IOError as e: app.logger.error( f"Could not create file at {file_path}. Error: {e}") @app.route("/async/file-management/exists", methods=["GET"]) def filemanager_exists(): """Check whether file exists.""" path = request.args.get("path") project_uuid = request.args.get("project_uuid") pipeline_uuid = request.args.get("pipeline_uuid") # currently this endpoint only handles "/data" # if path is absolute if path.startswith("/") and not path.startswith("/data"): return jsonify({"message": "Illegal file path prefix."}), 400 file_path = None if path.startswith("/"): file_path = resolve_absolute_path(path) if not is_valid_data_path(file_path, True): raise app_error.OutOfDataDirectoryError( "Path points outside of the data directory.") else: pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) file_path = normalize_project_relative_path(path) file_path = os.path.join(pipeline_dir, file_path) if file_path is None: return jsonify({"message": "Failed to process file_path."}), 500 if os.path.isfile(file_path): return jsonify({"message": "File exists."}) else: return jsonify({"message": "File does not exists."}), 404 @app.route("/async/file-management/delete", methods=["POST"]) def filemanager_delete(): root = request.args.get("root") path = request.args.get("path") project_uuid = request.args.get("project_uuid") try: root_dir_path, _ = process_request(root=root, path=path, project_uuid=project_uuid) except Exception as e: return jsonify({"message": str(e)}), 400 # Make absolute path relative target_path = safe_join(root_dir_path, path[1:]) if target_path == root_dir_path: return ( jsonify({ "message": ("It is not allowed to delete roots " "through the file-manager.") }), 403, ) if os.path.exists(target_path): try: rmtree(target_path) except Exception: return jsonify({"message": "Deletion failed."}), 500 else: return jsonify( {"message": "No file or directory at path %s" % path}), 500 return jsonify({"message": "Success"}) @app.route("/async/file-management/duplicate", methods=["POST"]) def filemanager_duplicate(): root = request.args.get("root") path = request.args.get("path") project_uuid = request.args.get("project_uuid") try: root_dir_path, _ = process_request(root=root, path=path, project_uuid=project_uuid) except Exception as e: return jsonify({"message": str(e)}), 400 # Make absolute path relative target_path = safe_join(root_dir_path, path[1:]) if os.path.isfile(target_path) or os.path.isdir(target_path): new_path = find_unique_duplicate_filepath(target_path) try: if os.path.isfile(target_path): copytree(target_path, new_path) else: copytree(target_path, new_path, use_gitignore=False) except Exception as e: app.logger.error(e) return jsonify({"message": "Copy of file/directory failed"}), 500 else: return jsonify( {"message": "No file or directory at path %s" % path}), 500 return jsonify({"message": "Success"}) @app.route("/async/file-management/create-dir", methods=["POST"]) def filemanager_create_dir(): root = request.args.get("root") path = request.args.get("path") project_uuid = request.args.get("project_uuid") try: root_dir_path, _ = process_request(root=root, path=path, project_uuid=project_uuid) except Exception as e: return jsonify({"message": str(e)}), 400 # Make absolute path relative path = "/".join(path.split("/")[1:]) full_path = safe_join(root_dir_path, path) if os.path.isdir(full_path) or os.path.isfile(full_path): return jsonify({"message": "Path already exists"}), 500 # even if name ends like an extension, e.g. "my-folder.txt" # it will be seen as a folder name os.makedirs(full_path, exist_ok=True) return jsonify({"message": "Success"}) @app.route("/async/file-management/upload", methods=["POST"]) def filemanager_upload(): root = request.args.get("root") path = request.args.get("path") project_uuid = request.args.get("project_uuid") try: root_dir_path, _ = process_request(root=root, path=path, project_uuid=project_uuid) except Exception as e: return jsonify({"message": str(e)}), 400 app.logger.debug(path) # check if the post request has the file part if "file" not in request.files or request.files["file"].filename == "": return jsonify({"message": "No file found"}), 500 file = request.files["file"] if file and allowed_file(file.filename): filename = file.filename.split(os.sep)[-1] # Trim path for joining (up until this point paths always # start and end with a "/") path = path[1:] dir_path = safe_join(root_dir_path, path) # Create directory if it doesn't exist if not os.path.isdir(dir_path): os.makedirs(dir_path, exist_ok=True) file_path = safe_join(dir_path, filename) file.save(file_path) return jsonify({"file_path": file_path}) @app.route("/async/file-management/rename", methods=["POST"]) def filemanager_rename(): old_path = request.args.get("old_path") new_path = request.args.get("new_path") old_root = request.args.get("old_root") new_root = request.args.get("new_root") project_uuid = request.args.get("project_uuid") try: old_root_path, _ = process_request(root=old_root, path=new_path, project_uuid=project_uuid) new_root_path, _ = process_request(root=new_root, path=new_path, project_uuid=project_uuid) except Exception as e: return jsonify({"message": str(e)}), 400 abs_old_path = safe_join(old_root_path, old_path[1:]) abs_new_path = safe_join(new_root_path, new_path[1:]) try: os.rename(abs_old_path, abs_new_path) return jsonify({"message": "Success"}) except Exception: return jsonify({"message": "Failed to rename"}), 500 @app.route("/async/file-management/download", methods=["GET"]) def filemanager_download(): root = request.args.get("root") path = request.args.get("path") project_uuid = request.args.get("project_uuid") try: root_dir_path, _ = process_request(root=root, path=path, project_uuid=project_uuid) except Exception as e: return jsonify({"message": str(e)}), 400 target_path = safe_join(root_dir_path, path[1:]) if os.path.isfile(target_path): return send_file(target_path, as_attachment=True) else: memory_file = io.BytesIO() with zipfile.ZipFile(memory_file, "w", zipfile.ZIP_STORED) as zf: zipdir(target_path, zf) memory_file.seek(0) return send_file( memory_file, mimetype="application/zip", as_attachment=True, attachment_filename=os.path.basename(target_path[:-1]) + ".zip", ) @app.route("/async/file-management/import-project-from-data", methods=["POST"]) def filemanager_import_project_from_data(): """Import a project from the data directory. A temporary workaround to import uploaded projects correctly. """ name = request.args.get("name") if name is None or os.path.sep in name: return jsonify({"message": f"Invalid name: {name}"}), 400 from_path = safe_join("/userdir/data", name) to_path = safe_join("/userdir/projects", name) os.rename(from_path, to_path) # Pick up the project from the fs. with TwoPhaseExecutor(db.session) as tpe: project_uuid = CreateProject(tpe).transaction(name) return {"project_uuid": project_uuid}, 201 @app.route("/async/file-management/extension-search", methods=["GET"]) def filemanager_extension_search(): root = request.args.get("root") path = request.args.get("path") project_uuid = request.args.get("project_uuid") extensions = request.args.get("extensions") try: root_dir_path, _ = process_request(root=root, path=path, project_uuid=project_uuid) except Exception as e: return jsonify({"message": str(e)}), 400 if extensions is None: return jsonify({"message": "extensions is required."}), 400 path_filter = path extensions = extensions.split(",") # Make absolute path relative path_filter = path_filter[1:] app.logger.info(f"Path filter {path_filter}") matches = [] for extension in extensions: matches += list( pathlib.Path(safe_join(root_dir_path, path_filter)).glob( "**/*.{}".format(extension))) return jsonify({ "files": [os.path.relpath(str(match), root_dir_path) for match in matches] }) @app.route("/async/file-management/browse", methods=["GET"]) def browse_files(): root = request.args.get("root") path = request.args.get("path") depth_as_string = request.args.get("depth") project_uuid = request.args.get("project_uuid") try: root_dir_path, depth = process_request( root=root, path=path, project_uuid=project_uuid, depth=depth_as_string, is_path_required=False, ) except Exception as e: return jsonify({"message": str(e)}), 400 # Path path_filter = path if path else "/" app.logger.info(f"Path filter {path_filter}") return jsonify( generate_tree(root_dir_path, path_filter=path_filter, depth=depth))
def register_views(app, db): errors = { "DataSourceNameInUse": { "message": "A data source with this name already exists.", "status": 409, }, } api = Api(app, errors=errors) class DataSourceNameInUse(HTTPException): pass project_schema = ProjectSchema() projects_schema = ProjectSchema(many=True) pipeline_schema = PipelineSchema() pipelines_schema = PipelineSchema(many=True) datasource_schema = DataSourceSchema() datasources_schema = DataSourceSchema(many=True) environment_schema = EnvironmentSchema() environments_schema = EnvironmentSchema(many=True) experiment_schema = ExperimentSchema() experiments_schema = ExperimentSchema(many=True) background_task_schema = BackgroundTaskSchema() def register_environments(db, api): class EnvironmentsResource(Resource): def get(self, project_uuid): return environments_schema.dump( get_environments(project_uuid, language=request.args.get("language"))) class EnvironmentResource(Resource): def put(self, project_uuid, environment_uuid): return self.post(project_uuid, environment_uuid) def get(self, project_uuid, environment_uuid): environment_dir = get_environment_directory( environment_uuid, project_uuid) return read_environment_from_disk(environment_dir) def delete(self, project_uuid, environment_uuid): environment_dir = get_environment_directory( environment_uuid, project_uuid) shutil.rmtree(environment_dir) # refresh kernels after change in environments populate_kernels(app, db) try: requests.delete("http://" + app.config["ORCHEST_API_ADDRESS"] + "/api/environment-images/%s/%s" % (project_uuid, environment_uuid)) except Exception as e: logging.warn("Failed to delete EnvironmentImage: %s" % e) return jsonify( {"message": "Environment deletion was successful."}) def post(self, project_uuid, environment_uuid): # create a new environment in the project environment_json = request.json.get("environment") e = Environment( uuid=str(uuid.uuid4()), name=environment_json["name"], project_uuid=project_uuid, language=environment_json["language"], startup_script=environment_json["startup_script"], base_image=environment_json["base_image"], gpu_support=environment_json["gpu_support"], ) # use specified uuid if it's not keyword 'new' if environment_uuid != "new": e.uuid = environment_uuid environment_dir = get_environment_directory( e.uuid, project_uuid) os.makedirs(environment_dir, exist_ok=True) serialize_environment_to_disk(e, environment_dir) # refresh kernels after change in environments populate_kernels(app, db) return environment_schema.dump(e) api.add_resource(EnvironmentsResource, "/store/environments/<string:project_uuid>") api.add_resource( EnvironmentResource, "/store/environments/<string:project_uuid>/<string:environment_uuid>", ) def register_datasources(db, api): class DataSourcesResource(Resource): def get(self): show_internal = True if request.args.get("show_internal") == "false": show_internal = False if show_internal: datasources = DataSource.query.all() else: datasources = DataSource.query.filter( ~DataSource.name.like("\_%", escape="\\")).all() return datasources_schema.dump(datasources) class DataSourceResource(Resource): def put(self, name): ds = DataSource.query.filter(DataSource.name == name).first() if ds is None: return "", 404 ds.name = request.json["name"] ds.source_type = request.json["source_type"] ds.connection_details = request.json["connection_details"] db.session.commit() return datasource_schema.dump(ds) def get(self, name): ds = DataSource.query.filter(DataSource.name == name).first() if ds is None: return "", 404 return datasource_schema.dump(ds) def delete(self, name): ds = DataSource.query.filter(DataSource.name == name).first() if ds is None: return "", 404 db.session.delete(ds) db.session.commit() return jsonify( {"message": "Data source deletion was successful"}) def post(self, name): if DataSource.query.filter( DataSource.name == name).count() > 0: raise DataSourceNameInUse() new_ds = DataSource( name=name, source_type=request.json["source_type"], connection_details=request.json["connection_details"], ) db.session.add(new_ds) db.session.commit() return datasource_schema.dump(new_ds) api.add_resource(DataSourcesResource, "/store/datasources") api.add_resource(DataSourceResource, "/store/datasources/<string:name>") def register_experiments(db, api): class ExperimentsResource(Resource): def get(self): experiments = Experiment.query.all() return experiments_schema.dump(experiments) class ExperimentResource(Resource): def put(self, experiment_uuid): ex = Experiment.query.filter( Experiment.uuid == experiment_uuid).first() if ex is None: return "", 404 ex.name = request.json["name"] ex.pipeline_uuid = request.json["pipeline_uuid"] ex.pipeline_name = request.json["pipeline_name"] ex.strategy_json = request.json["strategy_json"] ex.draft = request.json["draft"] db.session.commit() return experiment_schema.dump(ex) def get(self, experiment_uuid): ex = Experiment.query.filter( Experiment.uuid == experiment_uuid).first() if ex is None: return "", 404 return experiment_schema.dump(ex) def delete(self, experiment_uuid): # remove experiment directory ex = Experiment.query.filter( Experiment.uuid == experiment_uuid).first() if ex is None: return "", 404 remove_experiment_directory(ex.uuid, ex.pipeline_uuid, ex.project_uuid) db.session.delete(ex) db.session.commit() return jsonify( {"message": "Experiment termination was successful"}) def post(self, experiment_uuid): new_ex = Experiment( uuid=experiment_uuid, name=request.json["name"], pipeline_uuid=request.json["pipeline_uuid"], pipeline_name=request.json["pipeline_name"], strategy_json=request.json["strategy_json"], draft=request.json["draft"], ) db.session.add(new_ex) db.session.commit() return experiment_schema.dump(new_ex) api.add_resource(ExperimentsResource, "/store/experiments") api.add_resource(ExperimentResource, "/store/experiments/<string:experiment_uuid>") register_datasources(db, api) register_experiments(db, api) register_environments(db, api) def return_404(reason=""): json_string = json.dumps({"success": False, "reason": reason}) return json_string, 404, {"content-type": "application/json"} def generate_gateway_kernel_name(project_uuid, environment_uuid): return _config.ENVIRONMENT_IMAGE_NAME.format( project_uuid=project_uuid, environment_uuid=environment_uuid) def build_environments(environment_uuids, project_uuid): project_path = project_uuid_to_path(project_uuid) environment_build_requests = [{ "project_uuid": project_uuid, "project_path": project_path, "environment_uuid": environment_uuid, } for environment_uuid in environment_uuids] return api_proxy_environment_builds(environment_build_requests, app.config["ORCHEST_API_ADDRESS"]) def build_environments_for_project(project_uuid): environments = get_environments(project_uuid) return build_environments( [environment.uuid for environment in environments], project_uuid) def populate_default_environments(project_uuid): for env_spec in app.config["DEFAULT_ENVIRONMENTS"]: e = Environment(**env_spec) e.uuid = str(uuid.uuid4()) e.project_uuid = project_uuid environment_dir = get_environment_directory(e.uuid, project_uuid) os.makedirs(environment_dir, exist_ok=True) serialize_environment_to_disk(e, environment_dir) def pipeline_set_notebook_kernels(pipeline_json, pipeline_directory, project_uuid): # for each step set correct notebook kernel if it exists steps = pipeline_json["steps"].keys() for key in steps: step = pipeline_json["steps"][key] if "ipynb" == step["file_path"].split(".")[-1]: notebook_path = os.path.join(pipeline_directory, step["file_path"]) if os.path.isfile(notebook_path): gateway_kernel = generate_gateway_kernel_name( project_uuid, step["environment"]) with open(notebook_path, "r") as file: notebook_json = json.load(file) notebook_changed = False if (notebook_json["metadata"]["kernelspec"]["name"] != gateway_kernel): notebook_changed = True notebook_json["metadata"]["kernelspec"][ "name"] = gateway_kernel environment = (Environment.query.filter( Environment.uuid == step["environment"]).filter( Environment.project_uuid == project_uuid).first()) if environment is not None: if (notebook_json["metadata"]["kernelspec"] ["display_name"] != environment.name): notebook_changed = True notebook_json["metadata"]["kernelspec"][ "display_name"] = environment.name else: logging.warn( "Could not find environment [%s] while setting notebook kernelspec for notebook %s." % (step["environment"], notebook_path)) if notebook_changed: with open(notebook_path, "w") as file: file.write(json.dumps(notebook_json, indent=2)) else: logging.info( "pipeline_set_notebook_kernels called on notebook_path that doesn't exist %s" % notebook_path) def generate_ipynb_from_template(step, project_uuid): # TODO: support additional languages to Python and R if "python" in step["kernel"]["name"].lower(): template_json = json.load( open( os.path.join(app.config["RESOURCE_DIR"], "ipynb_template.json"), "r")) else: template_json = json.load( open( os.path.join(app.config["RESOURCE_DIR"], "ipynb_template_r.json"), "r", )) template_json["metadata"]["kernelspec"]["display_name"] = step[ "kernel"]["display_name"] template_json["metadata"]["kernelspec"][ "name"] = generate_gateway_kernel_name(project_uuid, step["environment"]) return json.dumps(template_json, indent=2) def create_pipeline_files(pipeline_json, pipeline_directory, project_uuid): # Currently, we check per step whether the file exists. # If not, we create it (empty by default). # In case the file has an .ipynb extension we generate the file from a # template with a kernel based on the kernel description in the JSON step. # Iterate over steps steps = pipeline_json["steps"].keys() for key in steps: step = pipeline_json["steps"][key] file_name = step["file_path"] full_file_path = os.path.join(pipeline_directory, file_name) file_name_split = file_name.split(".") file_name_without_ext = ".".join(file_name_split[:-1]) ext = file_name_split[-1] file_content = None if not os.path.isfile(full_file_path): if len(file_name_without_ext) > 0: file_content = "" if ext == "ipynb": file_content = generate_ipynb_from_template( step, project_uuid) elif ext == "ipynb": # check for empty .ipynb, for which we also generate a template notebook if os.stat(full_file_path).st_size == 0: file_content = generate_ipynb_from_template( step, project_uuid) if file_content is not None: with open(full_file_path, "w") as file: file.write(file_content) def create_experiment_directory(experiment_uuid, pipeline_uuid, project_uuid): experiment_path = os.path.join( app.config["USER_DIR"], "experiments", project_uuid, pipeline_uuid, experiment_uuid, ) os.makedirs(experiment_path) snapshot_path = os.path.join(experiment_path, "snapshot") project_dir = os.path.join(app.config["USER_DIR"], "projects", project_uuid_to_path(project_uuid)) os.system("cp -R %s %s" % (project_dir, snapshot_path)) def remove_experiment_directory(experiment_uuid, pipeline_uuid, project_uuid): experiment_project_path = os.path.join(app.config["USER_DIR"], "experiments", project_uuid) experiment_pipeline_path = os.path.join(experiment_project_path, pipeline_uuid) experiment_path = os.path.join(experiment_pipeline_path, experiment_uuid) if os.path.isdir(experiment_path): shutil.rmtree(experiment_path) # clean up parent directory if this experiment removal created empty directories remove_dir_if_empty(experiment_pipeline_path) remove_dir_if_empty(experiment_project_path) @app.route("/", methods=["GET"]) def index(): js_bundle_path = os.path.join(app.config["STATIC_DIR"], "js", "dist", "main.bundle.js") css_bundle_path = os.path.join(app.config["STATIC_DIR"], "css", "main.css") return render_template( "index.html", javascript_bundle_hash=get_hash(js_bundle_path), css_bundle_hash=get_hash(css_bundle_path), user_config=get_user_conf(), DOCS_ROOT=app.config["DOCS_ROOT"], FLASK_ENV=app.config["FLASK_ENV"], ) @app.route("/async/spawn-update-server", methods=["GET"]) def spawn_update_server(): client = docker.from_env() run_orchest_ctl(client, ["updateserver"]) return "" @app.route("/heartbeat", methods=["GET"]) def heartbeat(): return "" @app.route("/async/restart", methods=["POST"]) def restart_server(): client = docker.from_env() if request.args.get("mode") == "dev": run_orchest_ctl(client, ["restart", "--mode=dev"]) else: run_orchest_ctl(client, ["restart"]) return "" @app.route("/async/version", methods=["GET"]) def version(): git_proc = subprocess.Popen( 'echo "$(git describe --abbrev=0 --tags) "', cwd="/orchest-host", shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, ) outs, _ = git_proc.communicate() return outs @app.route("/async/user-config", methods=["GET", "POST"]) def user_config(): if request.method == "POST": config = request.form.get("config") try: # only save if parseable JSON json.loads(config) save_user_conf_raw(config) except json.JSONDecodeError as e: logging.debug(e) return "" else: return get_user_conf_raw() @app.route("/async/pipelines/delete/<project_uuid>/<pipeline_uuid>", methods=["DELETE"]) def pipelines_delete(project_uuid, pipeline_uuid): if (Pipeline.query.filter(Pipeline.uuid == pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).count() > 0): pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid) os.remove(pipeline_json_path) pipeline = (Pipeline.query.filter( Pipeline.uuid == pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).first()) db.session.delete(pipeline) db.session.commit() return jsonify({"success": True}) else: return jsonify({"message": "Pipeline could not be found."}), 404 @app.route("/async/experiments/create", methods=["POST"]) def experiments_create(): experiment_uuid = str(uuid.uuid4()) new_ex = Experiment( uuid=experiment_uuid, name=request.json["name"], pipeline_uuid=request.json["pipeline_uuid"], project_uuid=request.json["project_uuid"], pipeline_name=request.json["pipeline_name"], strategy_json="", draft=True, ) db.session.add(new_ex) db.session.commit() create_experiment_directory(experiment_uuid, request.json["pipeline_uuid"], request.json["project_uuid"]) return jsonify(experiment_schema.dump(new_ex)) @app.route("/async/pipelineruns/create", methods=["POST"]) def pipelineruns_create(): experiment_uuid = request.json["experiment_uuid"] for idx, pipeline_run in enumerate( request.json["generated_pipeline_runs"]): pr = PipelineRun( uuid=request.json["experiment_json"]["pipeline_runs"][idx] ["run_uuid"], experiment=experiment_uuid, parameter_json=pipeline_run, id=request.json["pipeline_run_ids"][idx], ) db.session.add(pr) db.session.commit() return jsonify({"success": True}) @app.route("/async/pipelines/create/<project_uuid>", methods=["POST"]) def pipelines_create(project_uuid): pipeline_path = request.json["pipeline_path"] if (Pipeline.query.filter( Pipeline.project_uuid == project_uuid).filter( Pipeline.path == pipeline_path).count() == 0): pipeline_uuid = str(uuid.uuid4()) pipeline = Pipeline(path=pipeline_path, uuid=pipeline_uuid, project_uuid=project_uuid) db.session.add(pipeline) db.session.commit() pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid) os.makedirs(pipeline_dir, exist_ok=True) # generate clean pipeline.json pipeline_json = { "name": request.json["name"], "version": "1.0.0", "uuid": pipeline_uuid, "steps": {}, } with open(pipeline_json_path, "w") as pipeline_json_file: pipeline_json_file.write(json.dumps(pipeline_json, indent=2)) return jsonify({"success": True}) else: return ( jsonify({ "message": "Pipeline already exists at path '%s'." % pipeline_path }), 409, ) # Note: only pipelines in project directories can be renamed (not in experiments) @app.route("/async/pipelines/rename/<project_uuid>/<pipeline_uuid>", methods=["POST"]) def pipelines_rename(project_uuid, pipeline_uuid): if Pipeline.query.filter(Pipeline.uuid == pipeline_uuid).count() > 0: pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid) if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) pipeline_json["name"] = request.form.get("name") with open(pipeline_json_path, "w") as json_file: json_file.write(json.dumps(pipeline_json, indent=2)) json_string = json.dumps({"success": True}) return json_string, 200, {"content-type": "application/json"} else: return "", 404 else: return "", 404 class ImportGitProjectListResource(Resource): def post(self): n_uuid = str(uuid.uuid4()) new_task = BackgroundTask(task_uuid=n_uuid, task_type="GIT_CLONE_PROJECT", status="PENDING") db.session.add(new_task) db.session.commit() # start the background process in charge of cloning file_dir = os.path.dirname(os.path.realpath(__file__)) args = [ "python3", "-m", "scripts.background_tasks", "--type", "git_clone_project", "--uuid", n_uuid, "--url", request.json["url"], ] project_name = request.json.get("project_name", None) if project_name: args.append("--path") args.append(str(project_name)) background_task_process = Popen( args, cwd=os.path.join(file_dir, "../.."), stderr=subprocess.STDOUT, ) return background_task_schema.dump(new_task) api.add_resource(ImportGitProjectListResource, "/async/projects/import-git") @app.route("/async/projects", methods=["GET", "POST", "DELETE"]) def projects(): project_dir = os.path.join(app.config["USER_DIR"], "projects") project_paths = [ name for name in os.listdir(project_dir) if os.path.isdir(os.path.join(project_dir, name)) ] # create UUID entry for all projects that do not yet exist existing_project_paths = [ project.path for project in Project.query.filter( Project.path.in_(project_paths)).all() ] new_project_paths = set(project_paths) - set(existing_project_paths) for new_project_path in new_project_paths: new_project = Project( uuid=str(uuid.uuid4()), path=new_project_path, ) db.session.add(new_project) db.session.commit() # build environments on project detection build_environments_for_project(new_project.uuid) # end of UUID creation if request.method == "GET": projects = projects_schema.dump(Project.query.all()) # Get counts for: pipelines, experiments and environments for project in projects: project["pipeline_count"] = Pipeline.query.filter( Pipeline.project_uuid == project["uuid"]).count() project["experiment_count"] = Experiment.query.filter( Experiment.project_uuid == project["uuid"]).count() project["environment_count"] = len( get_environments(project["uuid"])) return jsonify(projects) elif request.method == "DELETE": project_uuid = request.json["project_uuid"] project = Project.query.filter( Project.uuid == project_uuid).first() if project != None: project_path = project_uuid_to_path(project_uuid) full_project_path = os.path.join(project_dir, project_path) shutil.rmtree(full_project_path) db.session.delete(project) db.session.commit() # refresh kernels after change in environments populate_kernels(app, db) return jsonify({"message": "Project deleted."}) else: return ( jsonify({ "message": "Project not found for UUID %s." % project_uuid }), 404, ) elif request.method == "POST": project_path = request.json["name"] if project_path not in project_paths: full_project_path = os.path.join(project_dir, project_path) if not os.path.isdir(full_project_path): new_project = Project( uuid=str(uuid.uuid4()), path=project_path, ) db.session.add(new_project) db.session.commit() os.makedirs(full_project_path) # initialize with default environments populate_default_environments(new_project.uuid) # refresh kernels after change in environments populate_kernels(app, db) # build environments on project creation build_environments_for_project(new_project.uuid) else: return ( jsonify( {"message": "Project directory already exists."}), 409, ) else: return ( jsonify({"message": "Project name already exists."}), 409, ) return jsonify({"message": "Project created."}) @app.route("/async/pipelines/<project_uuid>/<pipeline_uuid>", methods=["GET"]) def pipeline_get(project_uuid, pipeline_uuid): pipeline = (Pipeline.query.filter( Pipeline.project_uuid == project_uuid).filter( Pipeline.uuid == pipeline_uuid).first()) if pipeline is None: return jsonify({"message": "Pipeline doesn't exist."}), 404 else: return jsonify(pipeline_schema.dump(pipeline)) @app.route("/async/pipelines/<project_uuid>", methods=["GET"]) def pipelines_get(project_uuid): project_path = project_uuid_to_path(project_uuid) project_dir = os.path.join(app.config["USER_DIR"], "projects", project_path) if not os.path.isdir(project_dir): return jsonify({"message": "Project directory not found."}), 404 # find all pipelines in project dir pipeline_paths = find_pipelines_in_dir(project_dir, project_dir) # identify all pipeline paths that are not yet a pipeline existing_pipeline_paths = [ pipeline.path for pipeline in Pipeline.query.filter( Pipeline.path.in_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid).all() ] # TODO: handle existing pipeline assignments new_pipeline_paths = set(pipeline_paths) - set(existing_pipeline_paths) for new_pipeline_path in new_pipeline_paths: # write pipeline uuid to file pipeline_json_path = get_pipeline_path( None, project_uuid, pipeline_path=new_pipeline_path) try: with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) file_pipeline_uuid = pipeline_json.get("uuid") new_pipeline_uuid = file_pipeline_uuid # see if pipeline_uuid is taken if (Pipeline.query.filter( Pipeline.uuid == file_pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).count() > 0 or len(file_pipeline_uuid) == 0): new_pipeline_uuid = str(uuid.uuid4()) with open(pipeline_json_path, "w") as json_file: pipeline_json["uuid"] = new_pipeline_uuid json_file.write(json.dumps(pipeline_json, indent=2)) # only commit if writing succeeds new_pipeline = Pipeline( uuid=new_pipeline_uuid, path=new_pipeline_path, project_uuid=project_uuid, ) db.session.add(new_pipeline) db.session.commit() except Exception as e: logging.info(e) pipelines = Pipeline.query.filter( Pipeline.project_uuid == project_uuid).all() pipelines_augmented = [] for pipeline in pipelines: pipeline_json_path = get_pipeline_path(pipeline.uuid, pipeline.project_uuid) pipeline_augmented = { "uuid": pipeline.uuid, "path": pipeline.path, } if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) pipeline_augmented["name"] = pipeline_json["name"] else: pipeline_augmented[ "name"] = "Warning: pipeline file was not found." pipelines_augmented.append(pipeline_augmented) json_string = json.dumps({ "success": True, "result": pipelines_augmented }) return json_string, 200, {"content-type": "application/json"} @app.route( "/async/notebook_html/<project_uuid>/<pipeline_uuid>/<step_uuid>", methods=["GET"], ) def notebook_html_get(project_uuid, pipeline_uuid, step_uuid): experiment_uuid = request.args.get("experiment_uuid") pipeline_run_uuid = request.args.get("pipeline_run_uuid") pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid, experiment_uuid, pipeline_run_uuid) pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid, experiment_uuid, pipeline_run_uuid) if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) try: notebook_path = os.path.join( pipeline_dir, pipeline_json["steps"][step_uuid]["file_path"]) except Exception as e: logging.info(e) return return_404("Invalid JSON for pipeline %s error: %e" % (pipeline_json_path, e)) else: return return_404("Could not find pipeline.json for pipeline %s" % pipeline_json_path) if os.path.isfile(notebook_path): try: html_exporter = HTMLExporter() (body, _) = html_exporter.from_filename(notebook_path) return body except IOError as error: logging.info("Error opening notebook file %s error: %s" % (notebook_path, error)) return return_404("Could not find notebook file %s" % notebook_path) @app.route("/async/pipelines/json/<project_uuid>/<pipeline_uuid>", methods=["GET", "POST"]) def pipelines_json_get(project_uuid, pipeline_uuid): pipeline_json_path = get_pipeline_path( pipeline_uuid, project_uuid, request.args.get("experiment_uuid"), request.args.get("pipeline_run_uuid"), ) if request.method == "POST": pipeline_directory = get_pipeline_directory( pipeline_uuid, project_uuid, request.args.get("experiment_uuid"), request.args.get("pipeline_run_uuid"), ) # parse JSON pipeline_json = json.loads(request.form.get("pipeline_json")) # first create all files part of pipeline_json definition # TODO: consider removing other files (no way to do this reliably, # special case might be rename) create_pipeline_files(pipeline_json, pipeline_directory, project_uuid) # side effect: for each Notebook in de pipeline.json set the correct kernel pipeline_set_notebook_kernels(pipeline_json, pipeline_directory, project_uuid) with open(pipeline_json_path, "w") as json_file: json_file.write(json.dumps(pipeline_json, indent=2)) return jsonify({"success": True}) elif request.method == "GET": if not os.path.isfile(pipeline_json_path): return ( jsonify({ "success": False, "reason": ".orchest file doesn't exist at location %s" % pipeline_json_path, }), 404, ) else: with open(pipeline_json_path) as json_file: return jsonify({ "success": True, "pipeline_json": json_file.read() }) return "" @app.route( "/async/file-picker-tree/pipeline-cwd/<project_uuid>/<pipeline_uuid>", methods=["GET"], ) def pipeline_cwd(project_uuid, pipeline_uuid): pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) project_dir = get_project_directory(project_uuid) cwd = pipeline_dir.replace(project_dir, "") return jsonify({"cwd": cwd}) @app.route("/async/file-picker-tree/<project_uuid>", methods=["GET"]) def get_file_picker_tree(project_uuid): allowed_file_extensions = ["ipynb", "R", "py", "sh"] project_dir = get_project_directory(project_uuid) if not os.path.isdir(project_dir): return jsonify( {"message": "Project dir %s not found." % project_dir}), 404 tree = {"type": "directory", "root": True, "name": "/", "children": []} dir_nodes = {} dir_nodes[project_dir] = tree for root, dirs, files in os.walk(project_dir): for dirname in dirs: if dirname.startswith("."): dirs.remove(dirname) continue dir_path = os.path.join(root, dirname) dir_node = { "type": "directory", "name": dirname, "children": [], } dir_nodes[dir_path] = dir_node dir_nodes[root]["children"].append(dir_node) for filename in files: if filename.split(".")[-1] in allowed_file_extensions: file_node = { "type": "file", "name": filename, } # this key should always exist try: dir_nodes[root]["children"].append(file_node) except KeyError as e: logging.error( "Key %s does not exist in dir_nodes %s. Error: %s" % (root, dir_nodes, e)) except Exception as e: logging.error("Error: %e" % e) return jsonify(tree) @app.route("/async/project-files/create/<project_uuid>", methods=["POST"]) def create_project_file(project_uuid): """Create project file in specified directory within project.""" project_dir = get_project_directory(project_uuid) # Client sends absolute path relative to project root, hence starting / # is removed. file_path = os.path.join(project_dir, request.json["file_path"][1:]) if os.path.isfile(file_path): return jsonify({"message": "File already exists."}), 409 try: open(file_path, "a").close() return jsonify({"message": "File created."}) except IOError as e: logging.error("Could not create file at %s. Error: %s" % (file_path, e)) @app.route("/async/project-files/exists/<project_uuid>/<pipeline_uuid>", methods=["POST"]) def project_file_exists(project_uuid, pipeline_uuid): """Check whether file exists""" pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) file_path = os.path.join(pipeline_dir, request.json["relative_path"]) if os.path.isfile(file_path): return jsonify({"message": "File exists."}) else: return jsonify({"message": "File does not exists."}), 404