def _collateral(self, project_uuid: str, pipeline_uuid: str, pipeline_name: str, **kwargs): pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid) os.makedirs(pipeline_dir, exist_ok=True) # Generate clean pipeline.json. pipeline_json = { "name": pipeline_name, "version": "1.0.0", "uuid": pipeline_uuid, "settings": { "auto_eviction": False, "data_passing_memory_size": "1GB", }, "steps": {}, "parameters": {}, } resp = requests.post( f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/pipelines/', json={ "project_uuid": project_uuid, "uuid": pipeline_uuid }, ) if resp.status_code != 201: raise Exception("Orchest-api pipeline creation failed.") with open(pipeline_json_path, "w") as pipeline_json_file: json.dump(pipeline_json, pipeline_json_file, indent=4, sort_keys=True)
def pipelines_json(project_uuid, pipeline_uuid): pipeline_json_path = get_pipeline_path( pipeline_uuid, project_uuid, request.args.get("experiment_uuid"), request.args.get("pipeline_run_uuid"), ) if request.method == "POST": pipeline_directory = get_pipeline_directory( pipeline_uuid, project_uuid, request.args.get("experiment_uuid"), request.args.get("pipeline_run_uuid"), ) # parse JSON pipeline_json = json.loads(request.form.get("pipeline_json")) # first create all files part of pipeline_json definition # TODO: consider removing other files (no way to do this reliably, # special case might be rename) create_pipeline_files(pipeline_json, pipeline_directory, project_uuid) # side effect: for each Notebook in de pipeline.json set the correct kernel pipeline_set_notebook_kernels(pipeline_json, pipeline_directory, project_uuid) with open(pipeline_json_path, "w") as json_file: json_file.write(json.dumps(pipeline_json, indent=4)) # Analytics call send_anonymized_pipeline_definition(app, pipeline_json) return jsonify({"message": "Successfully saved pipeline."}) elif request.method == "GET": if not os.path.isfile(pipeline_json_path): return ( jsonify({ "success": False, "reason": ".orchest file doesn't exist at location %s" % pipeline_json_path, }), 404, ) else: with open(pipeline_json_path) as json_file: return jsonify({ "success": True, "pipeline_json": json_file.read() }) return ""
def _collateral( self, new_uuid: bool, project_uuid: str, pipeline_uuid: str, pipeline_path: str, pipeline_json: str, ): resp = requests.post( f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/pipelines/', json={ "project_uuid": project_uuid, "uuid": pipeline_uuid }, ) if resp.status_code != 201: raise Exception("Orchest-api pipeline creation failed.") if new_uuid: pipeline_json_path = get_pipeline_path(None, project_uuid, pipeline_path=pipeline_path) with open(pipeline_json_path, "w") as json_file: pipeline_json["uuid"] = pipeline_uuid json.dump(pipeline_json, json_file, indent=4, sort_keys=True)
def _collateral(self, project_uuid: str, pipeline_uuid: str, pipeline_name: str, **kwargs): pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid) os.makedirs(pipeline_dir, exist_ok=True) # Generate clean pipeline.json. pipeline_json = { "name": pipeline_name, "version": "1.0.0", "uuid": pipeline_uuid, "settings": { "auto_eviction": False, "data_passing_memory_size": "1GB", }, "steps": {}, "parameters": {}, } with open(pipeline_json_path, "w") as pipeline_json_file: json.dump(pipeline_json, pipeline_json_file, indent=4, sort_keys=True)
def pipelines_json(project_uuid, pipeline_uuid): pipeline_json_path = get_pipeline_path( pipeline_uuid, project_uuid, request.args.get("job_uuid"), request.args.get("pipeline_run_uuid"), ) if request.method == "POST": pipeline_directory = get_pipeline_directory( pipeline_uuid, project_uuid, request.args.get("job_uuid"), request.args.get("pipeline_run_uuid"), ) # Parse JSON. pipeline_json = json.loads(request.form.get("pipeline_json")) # Side effect: for each Notebook in de pipeline.json set the # correct kernel. pipeline_set_notebook_kernels( pipeline_json, pipeline_directory, project_uuid ) with open(pipeline_json_path, "w") as json_file: json.dump(pipeline_json, json_file, indent=4, sort_keys=True) # Analytics call. send_anonymized_pipeline_definition(app, pipeline_json) return jsonify({"message": "Successfully saved pipeline."}) elif request.method == "GET": if not os.path.isfile(pipeline_json_path): return ( jsonify( { "success": False, "reason": ".orchest file doesn't exist at location %s" % pipeline_json_path, } ), 404, ) else: with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) # json.dumps because the front end expects it as a # string. return jsonify( {"success": True, "pipeline_json": json.dumps(pipeline_json)} ) return ""
def file_viewer(project_uuid, pipeline_uuid, step_uuid): experiment_uuid = request.args.get("experiment_uuid") pipeline_run_uuid = request.args.get("pipeline_run_uuid") pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid, experiment_uuid, pipeline_run_uuid) pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid, experiment_uuid, pipeline_run_uuid) if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) try: file_path = os.path.join( pipeline_dir, pipeline_json["steps"][step_uuid]["file_path"]) filename = pipeline_json["steps"][step_uuid]["file_path"] step_title = pipeline_json["steps"][step_uuid]["title"] except Exception as e: logging.info(e) return return_404("Invalid JSON for pipeline %s error: %e" % (pipeline_json_path, e)) else: return return_404("Could not find pipeline.json for pipeline %s" % pipeline_json_path) file_ext = file_path.split(".")[-1] file_content = "" if file_ext == "ipynb": if os.path.isfile(file_path): try: html_exporter = HTMLExporter() (file_content, _) = html_exporter.from_filename(file_path) except IOError as error: logging.info("Error opening notebook file %s error: %s" % (file_path, error)) return return_404("Could not find notebook file %s" % file_path) else: try: with open(file_path) as file: file_content = file.read() except (IOError, Exception) as e: return jsonify({"message": "Could not read file."}), 500 return jsonify({ "ext": file_ext, "content": file_content, "step_title": step_title, "filename": filename, })
def pipelines_delete(project_uuid, pipeline_uuid): pipeline = (Pipeline.query.filter( Pipeline.uuid == pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).one_or_none()) if pipeline is not None: pipeline_json_path = get_pipeline_path(pipeline.uuid, project_uuid) os.remove(pipeline_json_path) cleanup_pipeline_from_orchest(pipeline) return jsonify({"success": True}) else: return jsonify({"message": "Pipeline could not be found."}), 404
def _transaction(self, project_uuid: str, pipeline_uuid: str): """Remove a pipeline from the db""" # Necessary because get_pipeline_path is going to query the db # entry, but the db entry does not exist anymore because it has # been deleted. pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid) # Will delete cascade job -> pipeline run. Pipeline.query.filter_by(project_uuid=project_uuid, uuid=pipeline_uuid).delete() self.collateral_kwargs["project_uuid"] = project_uuid self.collateral_kwargs["pipeline_uuid"] = pipeline_uuid self.collateral_kwargs["pipeline_json_path"] = pipeline_json_path
def _revert(self): project_uuid = self.collateral_kwargs["project_uuid"] pipeline_uuid = self.collateral_kwargs["pipeline_uuid"] old_path = get_pipeline_path( None, project_uuid, pipeline_path=self.collateral_kwargs["old_path"]) # Move it back if necessary. This avoids the pipeline being # discovered as a new one. if self.collateral_kwargs.get("moved", False): new_path = get_pipeline_path( None, project_uuid, pipeline_path=self.collateral_kwargs["new_path"]) try: os.rename(new_path, old_path) except Exception as e: current_app.logger.error( f"Error while reverting pipeline move: {e}") # Restore the original pipeline step relative paths. pp_bk = self.collateral_kwargs.get("pipeline_def_backup") if pp_bk is not None: with open(old_path, "w") as json_file: json.dump(pp_bk, json_file, indent=4, sort_keys=True) Pipeline.query.filter_by( project_uuid=project_uuid, uuid=pipeline_uuid, ).update({ "status": "READY", "path": self.collateral_kwargs["old_path"] }) db.session.commit()
def _collateral( self, new_uuid: bool, project_uuid: str, pipeline_uuid: str, pipeline_path: str, pipeline_json: str, ): if new_uuid: pipeline_json_path = get_pipeline_path(None, project_uuid, pipeline_path=pipeline_path) with open(pipeline_json_path, "w") as json_file: pipeline_json["uuid"] = pipeline_uuid json.dump(pipeline_json, json_file, indent=4, sort_keys=True)
def pipelines_delete(project_uuid, pipeline_uuid): if (Pipeline.query.filter(Pipeline.uuid == pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).count() > 0): pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid) os.remove(pipeline_json_path) pipeline = (Pipeline.query.filter( Pipeline.uuid == pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).first()) db.session.delete(pipeline) db.session.commit() return jsonify({"success": True}) else: return jsonify({"message": "Pipeline could not be found."}), 404
def _transaction(self, project_uuid: str, pipeline_path: str): pipeline_json_path = get_pipeline_path(None, project_uuid, pipeline_path=pipeline_path) # Check the uuid of the pipeline. If the uuid is taken by # another pipeline in the project then generate a new uuid for # the pipeline. with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) file_pipeline_uuid = pipeline_json.get("uuid") self.collateral_kwargs["new_uuid"] = None self.collateral_kwargs["project_uuid"] = None self.collateral_kwargs["pipeline_uuid"] = None self.collateral_kwargs["pipeline_path"] = None self.collateral_kwargs["pipeline_json"] = None # If the pipeline has its own uuid and the uuid is not in # the DB already then the pipeline does not need to have a # new uuid assigned and written to disk. if (file_pipeline_uuid is not None and Pipeline.query.filter_by(project_uuid=project_uuid, uuid=file_pipeline_uuid, status="READY").count() == 0): self.collateral_kwargs["new_uuid"] = False else: self.collateral_kwargs["new_uuid"] = True # Generate a new uuid for the pipeline. file_pipeline_uuid = str(uuid.uuid4()) self.collateral_kwargs["project_uuid"] = project_uuid self.collateral_kwargs["pipeline_uuid"] = file_pipeline_uuid self.collateral_kwargs["pipeline_path"] = pipeline_path self.collateral_kwargs["pipeline_json"] = pipeline_json # Add the pipeline to the db. new_pipeline = Pipeline( uuid=file_pipeline_uuid, path=pipeline_path, project_uuid=project_uuid, ) db.session.add(new_pipeline)
def pipelines_create(project_uuid): pipeline_path = request.json["pipeline_path"] if (Pipeline.query.filter( Pipeline.project_uuid == project_uuid).filter( Pipeline.path == pipeline_path).count() == 0): pipeline_uuid = str(uuid.uuid4()) pipeline = Pipeline(path=pipeline_path, uuid=pipeline_uuid, project_uuid=project_uuid) db.session.add(pipeline) db.session.commit() pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid) pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid) os.makedirs(pipeline_dir, exist_ok=True) # generate clean pipeline.json pipeline_json = { "name": request.json["name"], "version": "1.0.0", "uuid": pipeline_uuid, "settings": { "auto_eviction": False, "data_passing_memory_size": "1GB", }, "steps": {}, } with open(pipeline_json_path, "w") as pipeline_json_file: pipeline_json_file.write(json.dumps(pipeline_json, indent=4)) return jsonify({"success": True}) else: return ( jsonify({ "message": "Pipeline already exists at path '%s'." % pipeline_path }), 409, )
def notebook_html_get(project_uuid, pipeline_uuid, step_uuid): experiment_uuid = request.args.get("experiment_uuid") pipeline_run_uuid = request.args.get("pipeline_run_uuid") pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid, experiment_uuid, pipeline_run_uuid) pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid, experiment_uuid, pipeline_run_uuid) if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) try: notebook_path = os.path.join( pipeline_dir, pipeline_json["steps"][step_uuid]["file_path"]) except Exception as e: logging.info(e) return return_404("Invalid JSON for pipeline %s error: %e" % (pipeline_json_path, e)) else: return return_404("Could not find pipeline.json for pipeline %s" % pipeline_json_path) if os.path.isfile(notebook_path): try: html_exporter = HTMLExporter() (body, _) = html_exporter.from_filename(notebook_path) return body except IOError as error: logging.info("Error opening notebook file %s error: %s" % (notebook_path, error)) return return_404("Could not find notebook file %s" % notebook_path)
def _collateral( self, new_uuid: bool, project_uuid: str, pipeline_uuid: str, pipeline_path: str, pipeline_json: str, ): # At the project level, pipeline files with the same UUID are # considered to be the same pipeline. If we are "replacing" the # pipeline it's because the previous pipeline was deleted and # this new pipeline has been discovered through the FS. DELETEs # of a pipeline to the orchest-api don't actually delete the # pipeline, so we don't need to POST, since the old entry will # still be there. Currently, we don't need to PUT since no field # of the pipeline entry in the orchest-api needs to be updated # when replacing. resp = requests.get( f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/pipelines/' f"{project_uuid}/{pipeline_uuid}", ) if resp.status_code == 404: resp = requests.post( f'http://{current_app.config["ORCHEST_API_ADDRESS"]}/api/pipelines/', json={ "project_uuid": project_uuid, "uuid": pipeline_uuid }, ) if resp.status_code != 201: raise Exception("Orchest-api pipeline creation failed.") if new_uuid: pipeline_json_path = get_pipeline_path(None, project_uuid, pipeline_path=pipeline_path) with open(pipeline_json_path, "w") as json_file: pipeline_json["uuid"] = pipeline_uuid json.dump(pipeline_json, json_file, indent=4, sort_keys=True)
def pipelines_get(project_uuid): try: with TwoPhaseExecutor(db.session) as tpe: SyncProjectPipelinesDBState(tpe).transaction(project_uuid) except Exception as e: msg = ("Error during project pipelines synchronization of " f"{project_uuid}: {str(e)}.") return jsonify({"message": msg}), 500 pipelines = Pipeline.query.filter( Pipeline.project_uuid == project_uuid).all() pipelines_augmented = [] for pipeline in pipelines: pipeline_json_path = get_pipeline_path(pipeline.uuid, pipeline.project_uuid) pipeline_augmented = { "uuid": pipeline.uuid, "path": pipeline.path, } if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) pipeline_augmented["name"] = pipeline_json["name"] else: pipeline_augmented[ "name"] = "Warning: pipeline file was not found." pipelines_augmented.append(pipeline_augmented) json_string = json.dumps({ "success": True, "result": pipelines_augmented }) return json_string, 200, {"content-type": "application/json"}
def pipelines_rename(project_uuid, pipeline_uuid): if Pipeline.query.filter(Pipeline.uuid == pipeline_uuid).count() > 0: pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid) if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) pipeline_json["name"] = request.form.get("name") with open(pipeline_json_path, "w") as json_file: json_file.write(json.dumps(pipeline_json, indent=2)) json_string = json.dumps({"success": True}) return json_string, 200, {"content-type": "application/json"} else: return "", 404 else: return "", 404
def pipelines_get(project_uuid): try: sync_project_pipelines_db_state(project_uuid) except Exception as e: return jsonify({"message": str(e)}), 500 pipelines = Pipeline.query.filter( Pipeline.project_uuid == project_uuid).all() pipelines_augmented = [] for pipeline in pipelines: pipeline_json_path = get_pipeline_path(pipeline.uuid, pipeline.project_uuid) pipeline_augmented = { "uuid": pipeline.uuid, "path": pipeline.path, } if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) pipeline_augmented["name"] = pipeline_json["name"] else: pipeline_augmented[ "name"] = "Warning: pipeline file was not found." pipelines_augmented.append(pipeline_augmented) json_string = json.dumps({ "success": True, "result": pipelines_augmented }) return json_string, 200, {"content-type": "application/json"}
def _transaction(self, project_uuid): """Synchronizes the state of the pipelines of a project. Synchronizes the state of the filesystem with the db when it comes to the pipelines of a project. Pipelines removed from the filesystem are removed, new pipelines (or pipelines that where there after, for example a project import) are registered in the db. Args: project_uuid: Raises: FileNotFoundError: If the project directory is not found. """ project_path = project_uuid_to_path(project_uuid) project_dir = safe_join(current_app.config["USER_DIR"], "projects", project_path) # Lock the project to avoid race conditions in pipeline deletion # or creation. Project.query.with_for_update().filter_by(uuid=project_uuid).one() if not os.path.isdir(project_dir): raise FileNotFoundError("Project directory not found") # Find all pipelines in the project directory. pipeline_paths = find_pipelines_in_dir(project_dir, project_dir) # Cleanup pipelines that have been manually removed. fs_removed_pipelines = [ pipeline for pipeline in Pipeline.query.filter( Pipeline.path.notin_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid, Pipeline.status == "READY", ).all() ] for pip in fs_removed_pipelines: DeletePipeline(self.tpe).transaction(pip.project_uuid, pip.uuid, remove_file=False) # Identify all pipeline paths that are not yet a pipeline, that # is, pipelines that were added through the filesystem. existing_pipeline_paths = [ pipeline.path for pipeline in Pipeline.query.filter( Pipeline.path.in_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid).all() ] # TODO: handle existing pipeline assignments. new_pipelines_from_fs = set(pipeline_paths) - set( existing_pipeline_paths) for path in new_pipelines_from_fs: pipeline_json_path = get_pipeline_path(None, project_uuid, pipeline_path=path) with open(pipeline_json_path, "r") as json_file: pipeline_uuid = json.load(json_file)["uuid"] # This is not a new pipeline, the pipeline is being moved. is_moving = (Pipeline.query.filter_by(project_uuid=project_uuid, uuid=pipeline_uuid, status="MOVING").count() > 0) if not is_moving: AddPipelineFromFS(self.tpe).transaction(project_uuid, path)
def _collateral( self, project_uuid: str, pipeline_uuid: str, old_path: str, new_path: str, ): """Move a pipeline to another path, i.e. rename it.""" if not is_valid_project_relative_path(project_uuid, new_path): raise error.OutOfProjectError( "New pipeline path points outside of the project directory.") old_path = get_pipeline_path(None, project_uuid, pipeline_path=old_path) new_path = get_pipeline_path(None, project_uuid, pipeline_path=new_path) if not os.path.exists(old_path): raise error.PipelineFileDoesNotExist() if os.path.exists(new_path) and old_path != new_path: raise error.PipelineFileExists() # Update the pipeline definition by adjusting the step file # paths, since they should be relative to the pipeline file. rel_path = os.path.relpath( os.path.split(old_path)[0], os.path.split(new_path)[0]) if rel_path != ".": with open(old_path, "r") as json_file: pipeline_def = json.load(json_file) self.collateral_kwargs["pipeline_def_backup"] = copy.deepcopy( pipeline_def) for step in pipeline_def["steps"].values(): step_f_prefix, step_f_name = os.path.split(step["file_path"]) file_path = os.path.normpath( # Get to the "previous" position + use the relative # path of the notebook w.r.t. the previous position, # then normalize to cleanup paths such as # 1/2/3/../../2 , that would become 1/2. os.path.join(rel_path, step_f_prefix, step_f_name)) step["file_path"] = file_path if not is_valid_pipeline_relative_path( project_uuid, pipeline_uuid, file_path): raise error.OutOfProjectError( "Step path points outside of the project directory.") with open(old_path, "w") as json_file: errors = check_pipeline_correctness(pipeline_def) if errors: raise Exception("Incorrect pipeline.") json.dump(pipeline_def, json_file, indent=4, sort_keys=True) # Create the parent directories if needed. directories, _ = os.path.split(new_path) if directories: os.makedirs(directories, exist_ok=True) os.rename(old_path, new_path) # So that the moving can be reverted in case of failure of the # rest of the collateral. self.collateral_kwargs["moved"] = True Pipeline.query.filter_by( project_uuid=project_uuid, uuid=pipeline_uuid, ).update({"status": "READY"}) db.session.commit()
def pipelines_json(project_uuid, pipeline_uuid): pipeline_json_path = get_pipeline_path( pipeline_uuid, project_uuid, request.args.get("job_uuid"), request.args.get("pipeline_run_uuid"), ) if request.method == "POST": pipeline_directory = get_pipeline_directory( pipeline_uuid, project_uuid, request.args.get("job_uuid"), request.args.get("pipeline_run_uuid"), ) # Parse JSON. pipeline_json = json.loads(request.form.get("pipeline_json")) # First create all files part of pipeline_json definition # TODO: consider removing other files (no way to do this # reliably, special case might be rename). create_pipeline_files(pipeline_json, pipeline_directory, project_uuid) # Side effect: for each Notebook in de pipeline.json set the # correct kernel. pipeline_set_notebook_kernels(pipeline_json, pipeline_directory, project_uuid) with open(pipeline_json_path, "w") as json_file: json.dump(pipeline_json, json_file, indent=4, sort_keys=True) # Analytics call. send_anonymized_pipeline_definition(app, pipeline_json) return jsonify({"message": "Successfully saved pipeline."}) elif request.method == "GET": if not os.path.isfile(pipeline_json_path): return ( jsonify({ "success": False, "reason": ".orchest file doesn't exist at location %s" % pipeline_json_path, }), 404, ) else: with open(pipeline_json_path) as json_file: pipeline_json = json.load(json_file) # Take care of old pipelines with no defined params. if "parameters" not in pipeline_json: pipeline_json["parameters"] = {} # json.dumps because the front end expects it as a # string. return jsonify({ "success": True, "pipeline_json": json.dumps(pipeline_json) }) return ""
def sync_project_pipelines_db_state(project_uuid): """Synchronizes the state of the pipelines of a project (fs/db). Synchronizes the state of the filesystem with the db when it comes to the pipelines of a project. Pipelines removed from the file system are removed, new pipelines (or pipelines that were there after, for example, a project import) are registered in the db. Args: project_uuid: Raises: FileNotFoundError: If the project directory is not found. """ project_path = project_uuid_to_path(project_uuid) project_dir = os.path.join(app.config["USER_DIR"], "projects", project_path) if not os.path.isdir(project_dir): raise FileNotFoundError("Project directory not found") # find all pipelines in project dir pipeline_paths = find_pipelines_in_dir(project_dir, project_dir) # cleanup pipelines that have been manually removed fs_removed_pipelines = [ pipeline for pipeline in Pipeline.query.filter( Pipeline.path.notin_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid).all() ] for fs_removed_pipeline in fs_removed_pipelines: cleanup_pipeline_from_orchest(fs_removed_pipeline) # identify all pipeline paths that are not yet a pipeline existing_pipeline_paths = [ pipeline.path for pipeline in Pipeline.query.filter( Pipeline.path.in_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid).all() ] # TODO: handle existing pipeline assignments new_pipeline_paths = set(pipeline_paths) - set(existing_pipeline_paths) for new_pipeline_path in new_pipeline_paths: # write pipeline uuid to file pipeline_json_path = get_pipeline_path( None, project_uuid, pipeline_path=new_pipeline_path) try: with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) file_pipeline_uuid = pipeline_json.get("uuid") new_pipeline_uuid = file_pipeline_uuid # see if pipeline_uuid is taken if (Pipeline.query.filter( Pipeline.uuid == file_pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).count() > 0 or len(file_pipeline_uuid) == 0): new_pipeline_uuid = str(uuid.uuid4()) with open(pipeline_json_path, "w") as json_file: pipeline_json["uuid"] = new_pipeline_uuid json_file.write(json.dumps(pipeline_json, indent=4)) # only commit if writing succeeds new_pipeline = Pipeline( uuid=new_pipeline_uuid, path=new_pipeline_path, project_uuid=project_uuid, ) db.session.add(new_pipeline) db.session.commit() except Exception as e: logging.info(e)
def pipelines_get(project_uuid): project_path = project_uuid_to_path(project_uuid) project_dir = os.path.join(app.config["USER_DIR"], "projects", project_path) if not os.path.isdir(project_dir): return jsonify({"message": "Project directory not found."}), 404 # find all pipelines in project dir pipeline_paths = find_pipelines_in_dir(project_dir, project_dir) # identify all pipeline paths that are not yet a pipeline existing_pipeline_paths = [ pipeline.path for pipeline in Pipeline.query.filter( Pipeline.path.in_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid).all() ] # TODO: handle existing pipeline assignments new_pipeline_paths = set(pipeline_paths) - set(existing_pipeline_paths) for new_pipeline_path in new_pipeline_paths: # write pipeline uuid to file pipeline_json_path = get_pipeline_path( None, project_uuid, pipeline_path=new_pipeline_path) try: with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) file_pipeline_uuid = pipeline_json.get("uuid") new_pipeline_uuid = file_pipeline_uuid # see if pipeline_uuid is taken if (Pipeline.query.filter( Pipeline.uuid == file_pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).count() > 0 or len(file_pipeline_uuid) == 0): new_pipeline_uuid = str(uuid.uuid4()) with open(pipeline_json_path, "w") as json_file: pipeline_json["uuid"] = new_pipeline_uuid json_file.write(json.dumps(pipeline_json, indent=2)) # only commit if writing succeeds new_pipeline = Pipeline( uuid=new_pipeline_uuid, path=new_pipeline_path, project_uuid=project_uuid, ) db.session.add(new_pipeline) db.session.commit() except Exception as e: logging.info(e) pipelines = Pipeline.query.filter( Pipeline.project_uuid == project_uuid).all() pipelines_augmented = [] for pipeline in pipelines: pipeline_json_path = get_pipeline_path(pipeline.uuid, pipeline.project_uuid) pipeline_augmented = { "uuid": pipeline.uuid, "path": pipeline.path, } if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) pipeline_augmented["name"] = pipeline_json["name"] else: pipeline_augmented[ "name"] = "Warning: pipeline file was not found." pipelines_augmented.append(pipeline_augmented) json_string = json.dumps({ "success": True, "result": pipelines_augmented }) return json_string, 200, {"content-type": "application/json"}
def file_viewer(project_uuid, pipeline_uuid, step_uuid): job_uuid = request.args.get("job_uuid") pipeline_run_uuid = request.args.get("pipeline_run_uuid") pipeline_json_path = get_pipeline_path(pipeline_uuid, project_uuid, job_uuid, pipeline_run_uuid) pipeline_dir = get_pipeline_directory(pipeline_uuid, project_uuid, job_uuid, pipeline_run_uuid) if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) try: step_file_path = pipeline_json["steps"][step_uuid]["file_path"] if not is_valid_pipeline_relative_path( project_uuid, pipeline_uuid, step_file_path): raise app_error.OutOfProjectError( "Step path points outside of the project directory.") if step_file_path.startswith("/"): file_path = resolve_absolute_path(step_file_path) else: file_path = safe_join(pipeline_dir, step_file_path) filename = pipeline_json["steps"][step_uuid]["file_path"] step_title = pipeline_json["steps"][step_uuid]["title"] except Exception as e: app.logger.info(e) return return_404("Invalid JSON for pipeline %s error: %e" % (pipeline_json_path, e)) else: return return_404("Could not find pipeline.json for pipeline %s" % pipeline_json_path) file_ext = file_path.split(".")[-1] file_content = "" if file_ext == "ipynb": if os.path.isfile(file_path): try: html_exporter = HTMLExporter() (file_content, _) = html_exporter.from_filename(file_path) # custom CSS custom_style = "<style>.CodeMirror pre {overflow: auto}</style>" file_content = file_content.replace( "</head>", custom_style + "</head>", 1) except IOError as error: app.logger.info( "Error opening notebook file %s error: %s" % (file_path, error)) return return_404( ("Could not find notebook file %s") % file_path) else: try: with open(file_path) as file: file_content = file.read() except (IOError, Exception): return jsonify({"message": "Could not read file."}), 500 return jsonify({ "ext": file_ext, "content": file_content, "step_title": step_title, "filename": filename, })
def pipelines_json(project_uuid, pipeline_uuid): if request.method == "POST": pipeline_json_path = get_pipeline_path( pipeline_uuid, project_uuid, None, request.args.get("pipeline_run_uuid"), ) pipeline_directory = get_pipeline_directory( pipeline_uuid, project_uuid, None, request.args.get("pipeline_run_uuid"), ) # Parse JSON. pipeline_json = json.loads(request.form.get("pipeline_json")) # Normalize relative paths. for step in pipeline_json["steps"].values(): is_project_file = is_valid_pipeline_relative_path( project_uuid, pipeline_uuid, step["file_path"]) is_data_file = is_valid_data_path(step["file_path"]) if not (is_project_file or is_data_file): raise app_error.OutOfAllowedDirectoryError( "File is neither in the project, nor in the data directory." ) if not step["file_path"].startswith("/"): step["file_path"] = normalize_project_relative_path( step["file_path"]) errors = check_pipeline_correctness(pipeline_json) if errors: msg = {} msg = {"success": False} reason = ", ".join([key for key in errors]) reason = f"Invalid value: {reason}." msg["reason"] = reason return jsonify(msg), 400 # Side effect: for each Notebook in de pipeline.json set the # correct kernel. try: pipeline_set_notebook_kernels(pipeline_json, pipeline_directory, project_uuid) except KeyError: msg = { "success": False, "reason": "Invalid Notebook metadata structure.", } return jsonify(msg), 400 with open(pipeline_json_path, "r") as json_file: old_pipeline_json = json.load(json_file) # Save the pipeline JSON again to make sure its keys are # sorted. with open(pipeline_json_path, "w") as json_file: json.dump(pipeline_json, json_file, indent=4, sort_keys=True) if old_pipeline_json["name"] != pipeline_json["name"]: resp = requests.put( (f'http://{current_app.config["ORCHEST_API_ADDRESS"]}' f"/api/pipelines/{project_uuid}/{pipeline_uuid}"), json={"name": pipeline_json["name"]}, ) if resp.status_code != 200: return ( jsonify( {"message": "Failed to PUT name to orchest-api."}), resp.status_code, ) # Analytics call. analytics.send_event( app, analytics.Event.PIPELINE_SAVE, {"pipeline_definition": pipeline_json}, ) return jsonify({ "success": True, "message": "Successfully saved pipeline." }) elif request.method == "GET": pipeline_json_path = get_pipeline_path( pipeline_uuid, project_uuid, request.args.get("job_uuid"), request.args.get("pipeline_run_uuid"), ) if not os.path.isfile(pipeline_json_path): return ( jsonify({ "success": False, "reason": ".orchest file doesn't exist at location " + pipeline_json_path, }), 404, ) else: pipeline_json = get_pipeline_json(pipeline_uuid, project_uuid) return jsonify({ "success": True, "pipeline_json": json.dumps(pipeline_json) })