def main(): parser = argparse.ArgumentParser(description=ARGS_DESC, formatter_class=RawTextHelpFormatter) parser.add_argument('--nb', type=str, help='Path to source JupyterNotebook', required=True) parser.add_argument('--experiment_name', type=str, help='Name of the created experiment') parser.add_argument('--pipeline_name', type=str, help='Name of the deployed pipeline') parser.add_argument('--pipeline_description', type=str, help='Description of the deployed pipeline') parser.add_argument('--docker_image', type=str, help='Docker base image used to build the pipeline steps') # important to have default=None, otherwise it would default to False and would always override notebook_metadata parser.add_argument('--upload_pipeline', action='store_true') parser.add_argument('--run_pipeline', action='store_true') parser.add_argument('--kfp_dns', type=str, help='DNS to KFP service. Provide address as <host>:<port>. `/pipeline` will be appended automatically') parser.add_argument('--jupyter_args', type=str, help='YAML file with Jupyter parameters as defined by Papermill') parser.add_argument('--debug', action='store_true') args = parser.parse_args() notebook_metadata = nb.read(args.nb, as_version=nb.NO_CONVERT).metadata.get(KALE_NOTEBOOK_METADATA_KEY, dict()) # convert args to dict removing all None elements, and overwrite keys into notebook_metadata metadata_arguments = {**notebook_metadata, **{k: v for k, v in vars(args).items() if v is not None}} for r in REQUIRED_ARGUMENTS: if r not in metadata_arguments: raise ValueError(f"Required argument not found: {r}") # if jupyter_args is set, generate first a set of temporary notebooks # based on the input yml parameters (via Papermill) if 'jupyter_args' in metadata_arguments: generated_notebooks = generate_notebooks_from_yml(input_nb_path=args.nb, yml_parameters_path=metadata_arguments['jupyter_args']) # Run KaleCore over each generated notebook for n, params in generated_notebooks: Kale( source_notebook_path=n, experiment_name=metadata_arguments['experiment_name'] + params, pipeline_name=metadata_arguments['pipeline_name'] + params, pipeline_descr=metadata_arguments['pipeline_description'] + " params" + params, docker_image=metadata_arguments['docker_image'], upload_pipeline=metadata_arguments['upload_pipeline'], run_pipeline=metadata_arguments['run_pipeline'], volumes=metadata_arguments['volumes'], debug=args.debug ).run() else: Kale( source_notebook_path=args.nb, experiment_name=metadata_arguments['experiment_name'], pipeline_name=metadata_arguments['pipeline_name'], pipeline_descr=metadata_arguments['pipeline_description'], docker_image=metadata_arguments['docker_image'], upload_pipeline=metadata_arguments['upload_pipeline'], run_pipeline=metadata_arguments['run_pipeline'], volumes=metadata_arguments['volumes'], debug=args.debug ).run()
def main(): parser = argparse.ArgumentParser( description='KALE: Kubeflow Automated pipeLines Engine') parser.add_argument('--nb', type=str, help='Path to source JupyterNotebook', required=True) parser.add_argument('--deploy', action='store_true') parser.add_argument( '--kfp_port', type=int, default=8080, help= 'Local port map to remote KFP instance. KFP assumed to be at localhost:<port>/pipeline' ) parser.add_argument('--pipeline_name', type=str, help='Name of the deployed pipeline') parser.add_argument('--pipeline_descr', type=str, help='Description of the deployed pipeline') parser.add_argument( '--docker_image', type=str, help='Docker base image used to build the pipeline steps') parser.add_argument( '--jupyter_args', type=str, help='YAML file with Jupyter parameters as defined by Papermill') args = parser.parse_args() # if jupyter_args is set, generate first a set of temporary notebooks # based on the input yml parameters (via Papermill) if args.jupyter_args is not None: generated_notebooks = generate_notebooks_from_yml( input_nb_path=args.nb, yml_parameters_path=args.jupyter_args) # Run KaleCore over each generated notebook for n, params in generated_notebooks: Kale(source_notebook_path=n, pipeline_name=args.pipeline_name + params, pipeline_descr=args.pipeline_descr + " params" + params, docker_image=args.docker_image, auto_deploy=args.deploy, kfp_port=args.kfp_port) else: Kale(source_notebook_path=args.nb, pipeline_name=args.pipeline_name, pipeline_descr=args.pipeline_descr, docker_image=args.docker_image, auto_deploy=args.deploy, kfp_port=args.kfp_port)
def compile_notebook(request, source_notebook_path, notebook_metadata_overrides=None, debug=False): """Compile the notebook to KFP DSL.""" instance = Kale(source_notebook_path, notebook_metadata_overrides, debug) instance.logger = request.log if hasattr(request, "log") else logger pipeline_graph, pipeline_parameters = instance.notebook_to_graph() script_path = instance.generate_kfp_executable(pipeline_graph, pipeline_parameters) pipeline_name = instance.pipeline_metadata["pipeline_name"] package_path = kfputils.compile_pipeline(script_path, pipeline_name) return {"pipeline_package_path": os.path.relpath(package_path), "pipeline_metadata": instance.pipeline_metadata}
def validate_notebook(request, source_notebook_path, notebook_metadata_overrides=None): """Validate notebook metadata.""" # Notebook metadata is validated at class instantiation Kale(source_notebook_path, notebook_metadata_overrides) return True
def main(): parser = argparse.ArgumentParser(description=ARGS_DESC, formatter_class=RawTextHelpFormatter) general_group = parser.add_argument_group('General') general_group.add_argument('--nb', type=str, help='Path to source JupyterNotebook', required=True) # use store_const instead of store_true because we None instead of False in case the flag is missing general_group.add_argument('--upload_pipeline', action='store_const', const=True) general_group.add_argument('--run_pipeline', action='store_const', const=True) general_group.add_argument('--debug', action='store_true') metadata_group = parser.add_argument_group('Notebook Metadata Overrides', METADATA_GROUP_DESC) metadata_group.add_argument('--experiment_name', type=str, help='Name of the created experiment') metadata_group.add_argument('--pipeline_name', type=str, help='Name of the deployed pipeline') metadata_group.add_argument('--pipeline_description', type=str, help='Description of the deployed pipeline') metadata_group.add_argument('--docker_image', type=str, help='Docker base image used to build the pipeline steps') metadata_group.add_argument('--kfp_host', type=str, help='KFP endpoint. Provide address as <host>:<port>.') args = parser.parse_args() # get the notebook metadata args group metadata_overrides_group = next(filter(lambda x: x.title == 'Notebook Metadata Overrides', parser._action_groups)) # get the single args of that group metadata_overrides_group_dict = {a.dest: getattr(args, a.dest, None) for a in metadata_overrides_group._group_actions} kale = Kale( source_notebook_path=args.nb, notebook_metadata_overrides=metadata_overrides_group_dict, debug=args.debug ) pipeline_graph, pipeline_parameters = kale.notebook_to_graph() script_path = kale.generate_kfp_executable(pipeline_graph, pipeline_parameters) # compile the pipeline to kfp tar package pipeline_package_path = kfp_utils.compile_pipeline(script_path, kale.pipeline_metadata['pipeline_name']) if args.upload_pipeline: kfp_utils.upload_pipeline( pipeline_package_path=pipeline_package_path, pipeline_name=kale.pipeline_metadata['pipeline_name'], host=kale.pipeline_metadata.get('kfp_host', None) ) if args.run_pipeline: kfp_utils.run_pipeline( run_name=kale.pipeline_metadata['pipeline_name'] + '_run', experiment_name=kale.pipeline_metadata['experiment_name'], pipeline_package_path=pipeline_package_path, host=kale.pipeline_metadata.get('kfp_host', None) )
def compile_notebook(source_notebook_path, notebook_metadata_overrides=None, debug=False, auto_snapshot=False): instance = Kale(source_notebook_path, notebook_metadata_overrides, debug, auto_snapshot) pipeline_graph, pipeline_parameters = instance.notebook_to_graph() script_path = instance.generate_kfp_executable(pipeline_graph, pipeline_parameters) pipeline_name = instance.pipeline_metadata["pipeline_name"] package_path = kfp_utils.compile_pipeline(script_path, pipeline_name) return { "pipeline_package_path": package_path, "pipeline_metadata": instance.pipeline_metadata }
def post(self): parser = reqparse.RequestParser() parser.add_argument('nb', type=str, help='Rate to charge for this resource') parser.add_argument( 'deploy', type=inputs.boolean, help='True to deploy the pipeline to a running KFP instance') parser.add_argument( 'kfp_port', type=int, default=1234, help= 'Local port map to remote KFP instance. KFP assumed to be at localhost:<port>/pipeline' ) parser.add_argument('pipeline_name', required=True, type=str, help='Name of the deployed pipeline') parser.add_argument('pipeline_descr', required=True, type=str, help='Description of the deployed pipeline') parser.add_argument( 'docker_image', default='stefanofioravanzo/kale-kfp-examples:0.1', type=str, help='Docker base image used to build the pipeline steps') parser.add_argument( 'jupyter_args', type=str, help='YAML file with Jupyter parameters as defined by Papermill') args = parser.parse_args() # create a tmp folder tmp_dir = tempfile.mkdtemp() tmp_notebook_path = f"{tmp_dir}/kale_generated_notebook.ipynb" if args['nb'] is None: f = request.files['notebook_file'] f.save(tmp_notebook_path) else: with open(tmp_notebook_path, 'w+') as f: f.write(args['nb']) Kale(source_notebook_path=tmp_notebook_path, pipeline_name=args['pipeline_name'] + "_" + self.random_string(4), pipeline_descr=args['pipeline_descr'], docker_image=args['docker_image'], auto_deploy=args['deploy'], kfp_port=args['kfp_port']) return {'data': args['nb']}
def test_pipeline_generation_from_local(random_string, abs_working_dir): """Test code generation end to end from notebook to DSL.""" abs_working_dir.return_value = '/kale' random_string.return_value = 'rnd' notebook_path = "../assets/notebooks/pipeline_parameters_and_metrics.ipynb" notebook_path = os.path.join(THIS_DIR, notebook_path) kale = Kale(source_notebook_path=notebook_path) kale.logger = logging.getLogger(__name__) kale.logger.setLevel(logging.DEBUG) pipeline_graph, pipeline_parameters = kale.notebook_to_graph() script_path = kale.generate_kfp_executable(pipeline_graph, pipeline_parameters, save_to_tmp=True) target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/', 'pipeline_parameters_and_metrics.py') expected_result = open(target_asset).read() result = open(script_path).read() assert result == expected_result
def test_pipeline_generation_from_gtihub(random_string, abs_working_dir): """Test code generation end to end from notebook to DSL.""" abs_working_dir.return_value = '/kale' random_string.return_value = 'rnd' notebook_url = EX_REPO + "titanic-ml-dataset/titanic_dataset_ml.ipynb" # download notebook to tmp dir notebook_path, response = urlretrieve(notebook_url) kale = Kale(source_notebook_path=notebook_path) kale.logger = logging.getLogger(__name__) kale.logger.setLevel(logging.DEBUG) pipeline_graph, pipeline_parameters = kale.notebook_to_graph() script_path = kale.generate_kfp_executable(pipeline_graph, pipeline_parameters, save_to_tmp=True) target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/', 'titanic.py') expected_result = open(target_asset).read() result = open(script_path).read() assert result == expected_result
def test_pipeline_generation_from_local(random_string, abs_working_dir): """Test code generation end to end from notebook to DSL.""" abs_working_dir.return_value = '/kale' random_string.return_value = 'rnd' notebook_path = "../assets/notebooks/pipeline_parameters_and_metrics.ipynb" notebook_path = os.path.join(THIS_DIR, notebook_path) kale = Kale(source_notebook_path=notebook_path) pipeline_graph, pipeline_parameters = kale.notebook_to_graph() script_path = kale.generate_kfp_executable(pipeline_graph, pipeline_parameters, save_to_tmp=True) # TODO: Need to suppress log generation when running tests os.remove(os.path.join(os.getcwd(), 'kale.log')) target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/', 'pipeline_parameters_and_metrics.py') expected_result = open(target_asset).read() result = open(script_path).read() assert result == expected_result
def test_metadata_generation(tag_parsing_notebook): result = [{ 'block_names': ['imports'], 'in': [], 'out': [] }, { 'block_names': ['sum'], 'in': [], 'out': [] }, { 'block_names': ['cumsum'], 'in': [], 'out': [], 'previous_blocks': ['sum'] }, { 'block_names': [], 'in': [], 'out': [] }, { 'block_names': ['imports'], 'in': [], 'out': [] }, { 'block_names': ['os'], 'in': [], 'out': [], 'previous_blocks': ['sum', 'cumsum'] }] parsed_tags = list() for c in tag_parsing_notebook.cells: # parse only source code cells if c.cell_type != "code": continue tags = Kale.parse_metadata(c.metadata) parsed_tags.append(tags) pairs = zip(result, parsed_tags) assert all(x == y for x, y in pairs)
def test_tag_block_error(): tag = {'metadata': {'tags': ["block:processing:dataset"]}} with pytest.raises(ValueError): Kale.parse_metadata(tag['metadata'])
def test_tag_block(): tag = {'metadata': {'tags': ["block:processing"]}} target = {'block_names': ["processing"], 'in': [], 'out': []} res = Kale.parse_metadata(tag['metadata']) assert target == res
def test_tag_skip(): tag = {'metadata': {'tags': ['skip']}} target = None res = Kale.parse_metadata(tag['metadata']) assert target == res
def test_empty_tag(): tag = {'metadata': {}} target = {'block_names': [], 'in': [], 'out': []} res = Kale.parse_metadata(tag['metadata']) assert target == res