def _prepare_transformer_assets(fn: Callable, assets: Dict = None): notebook_path = jputils.get_notebook_path() processor = NotebookProcessor(nb_path=notebook_path, skip_validation=True) fn_source = astutils.get_function_source(fn, strip_signature=False) missing_names = flakeutils.pyflakes_report( processor.get_imports_and_functions() + "\n" + fn_source) if not assets: assets = dict() if not isinstance(assets, dict): ValueError("Please provide preprocessing assets as a dictionary" " mapping variables *names* to their objects") missing_assets = [x not in assets.keys() for x in missing_names] if any(missing_assets): raise RuntimeError( "The following abjects are a dependency for the" " provided preprocessing function. Please add the" " to the `preprocessing_assets` dictionary: %s" % [a for a, m in zip(missing_names, missing_assets) if m]) # save function and assets utils.clean_dir(TRANSFORMER_ASSETS_DIR) marshal.set_data_dir(TRANSFORMER_ASSETS_DIR) marshal.save(fn, TRANSFORMER_FN_ASSET_NAME) for asset_name, asset_value in assets.items(): marshal.save(asset_value, asset_name) # save notebook as well shutil.copy( notebook_path, os.path.join(TRANSFORMER_ASSETS_DIR, TRANSFORMER_SRC_NOTEBOOK_NAME))
def validate_notebook(request, source_notebook_path, notebook_metadata_overrides=None): """Validate notebook metadata.""" # Notebook metadata is validated at class instantiation NotebookProcessor(source_notebook_path, notebook_metadata_overrides) return True
def compile_notebook(request, source_notebook_path, notebook_metadata_overrides=None, debug=False): """Compile the notebook to KFP DSL.""" processor = NotebookProcessor(source_notebook_path, notebook_metadata_overrides) pipeline = processor.to_pipeline() script_path = Compiler(pipeline).compile() # FIXME: Why were we tapping into the Kale logger? # instance = Kale(source_notebook_path, notebook_metadata_overrides, debug) # instance.logger = request.log if hasattr(request, "log") else logger package_path = kfputils.compile_pipeline(script_path, pipeline.config.pipeline_name) return {"pipeline_package_path": os.path.relpath(package_path), "pipeline_metadata": pipeline.config.to_dict()}
def get_pipeline_metrics(request, source_notebook_path): """Get the pipeline metrics tagged in the notebook.""" # read notebook log = request.log if hasattr(request, "log") else logger try: processor = NotebookProcessor(os.path.expanduser(source_notebook_path), skip_validation=True) metrics_source = processor.get_pipeline_metrics_source() if metrics_source == '': raise ValueError("No pipeline metrics found. Please tag a cell" " of the notebook with the `pipeline-metrics`" " tag.") # get a dict from the 'pipeline parameters' cell source code metrics = astutils.parse_metrics_print_statements(metrics_source) except ValueError as e: log.exception("Failed to parse pipeline metrics") raise RPCInternalError(details=str(e), trans_id=request.trans_id) log.info("Pipeline metrics: {}".format(metrics)) return metrics
def test_notebook_to_dsl(random_string, notebook_path, dsl_path): """Test code generation end to end from notebook to DSL.""" random_string.return_value = "rnd" overrides = {"abs_working_dir": "/kale"} pipeline = NotebookProcessor(notebook_path, overrides).to_pipeline() dsl_script_path = Compiler(pipeline).compile() expected_result = open(dsl_path).read() result = open(dsl_script_path).read() assert result == expected_result
def get_pipeline_parameters(request, source_notebook_path): """Get the pipeline parameters tagged in the notebook.""" # read notebook log = request.log if hasattr(request, "log") else logger try: processor = NotebookProcessor(os.path.expanduser(source_notebook_path), skip_validation=True) params_source = processor.get_pipeline_parameters_source() if params_source == '': raise ValueError("No pipeline parameters found. Please tag a cell" " of the notebook with the `pipeline-parameters`" " tag.") # get a dict from the 'pipeline parameters' cell source code params_dict = astutils.parse_assignments_expressions(params_source) except ValueError as e: log.exception("Value Error during parsing of pipeline parameters") raise RPCInternalError(details=str(e), trans_id=request.trans_id) # convert dict in list so its easier to parse in js params = [[k, *v] for k, v in params_dict.items()] log.info("Pipeline parameters:") for ln in tabulate(params, headers=["name", "type", "value"]).split("\n"): log.info(ln) return params
def _load_transformer_assets(self): marshal.set_data_dir(serveutils.TRANSFORMER_ASSETS_DIR) log.info("Loading transformer function...") _fn = marshal.load(serveutils.TRANSFORMER_FN_ASSET_NAME) # create a new function monkey patching the original function's # __globals__. The marshalled function would not be scoped under # the current module, thus its __globals__ dict would be empty. # In this way we create the same function but binding it to the # module's globals(). self.fn = types.FunctionType(_fn.__code__, globals(), _fn.__name__, _fn.__defaults__, _fn.__closure__) log.info("Processing source notebook for imports and functions...") processor = NotebookProcessor(nb_path=os.path.join( serveutils.TRANSFORMER_ASSETS_DIR, serveutils.TRANSFORMER_SRC_NOTEBOOK_NAME), skip_validation=True) self.init_code = processor.get_imports_and_functions() log.info("Initialization code:\n%s" % self.init_code) log.info("Running initialization code...") exec(self.init_code, globals()) log.info("Loading transformer's assets...") for file in os.listdir(serveutils.TRANSFORMER_ASSETS_DIR): if file in [ serveutils.TRANSFORMER_SRC_NOTEBOOK_NAME, serveutils.TRANSFORMER_FN_ASSET_NAME ]: continue # The marshal mechanism works by looking at the name of the files # without extensions. basename = os.path.splitext(file)[0] # remove extension self.assets[basename] = marshal.load(basename) log.info("Assets successfully loaded: %s" % self.assets.keys()) log.info("Initializing assets...") for asset_name, asset_value in self.assets.items(): globals()[asset_name] = asset_value
def main(): """Entry-point of CLI command.""" parser = argparse.ArgumentParser(description=ARGS_DESC, formatter_class=RawTextHelpFormatter) general_group = parser.add_argument_group('General') general_group.add_argument('--nb', type=str, help='Path to source JupyterNotebook', required=True) # use store_const instead of store_true because we None instead of # False in case the flag is missing general_group.add_argument('--upload_pipeline', action='store_const', const=True) general_group.add_argument('--run_pipeline', action='store_const', const=True) general_group.add_argument('--debug', action='store_true') metadata_group = parser.add_argument_group('Notebook Metadata Overrides', METADATA_GROUP_DESC) metadata_group.add_argument('--experiment_name', type=str, help='Name of the created experiment') metadata_group.add_argument('--pipeline_name', type=str, help='Name of the deployed pipeline') metadata_group.add_argument('--pipeline_description', type=str, help='Description of the deployed pipeline') metadata_group.add_argument('--docker_image', type=str, help='Docker base image used to build the ' 'pipeline steps') metadata_group.add_argument('--kfp_host', type=str, help='KFP endpoint. Provide address as ' '<host>:<port>.') metadata_group.add_argument('--storage-class-name', type=str, help='The storage class name for the created' ' volumes') metadata_group.add_argument('--volume-access-mode', type=str, help='The access mode for the created volumes') args = parser.parse_args() # get the notebook metadata args group mt_overrides_group = next( filter(lambda x: x.title == 'Notebook Metadata Overrides', parser._action_groups)) # get the single args of that group mt_overrides_group_dict = { a.dest: getattr(args, a.dest, None) for a in mt_overrides_group._group_actions if getattr(args, a.dest, None) is not None } # FIXME: We are removing the `debug` arg. This shouldn't be an issue processor = NotebookProcessor(args.nb, mt_overrides_group_dict) pipeline = processor.run() dsl_script_path = Compiler(pipeline).compile() pipeline_name = pipeline.config.pipeline_name pipeline_package_path = kfputils.compile_pipeline(dsl_script_path, pipeline_name) if args.upload_pipeline: kfputils.upload_pipeline(pipeline_package_path=pipeline_package_path, pipeline_name=pipeline_name, host=pipeline.config.kfp_host) if args.run_pipeline: run_name = kfputils.generate_run_name(pipeline_name) kfputils.run_pipeline(run_name=run_name, experiment_name=pipeline.config.experiment_name, pipeline_package_path=pipeline_package_path, host=pipeline.config.kfp_host)
def notebook_processor(dummy_nb_config): """Return a notebook processor over a dummy in-memory notebook.""" with patch.object(NotebookProcessor, '_read_notebook', lambda _: nbformat.v4.new_notebook()): return NotebookProcessor("path/to/nb", dummy_nb_config)