Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser(description=ARGS_DESC, formatter_class=RawTextHelpFormatter)
    parser.add_argument('--nb', type=str, help='Path to source JupyterNotebook', required=True)
    parser.add_argument('--experiment_name', type=str, help='Name of the created experiment')
    parser.add_argument('--pipeline_name', type=str, help='Name of the deployed pipeline')
    parser.add_argument('--pipeline_description', type=str, help='Description of the deployed pipeline')
    parser.add_argument('--docker_image', type=str, help='Docker base image used to build the pipeline steps')
    # important to have default=None, otherwise it would default to False and would always override notebook_metadata
    parser.add_argument('--upload_pipeline', action='store_true')
    parser.add_argument('--run_pipeline', action='store_true')
    parser.add_argument('--kfp_dns', type=str,
                        help='DNS to KFP service. Provide address as <host>:<port>. `/pipeline` will be appended automatically')
    parser.add_argument('--jupyter_args', type=str, help='YAML file with Jupyter parameters as defined by Papermill')
    parser.add_argument('--debug', action='store_true')

    args = parser.parse_args()

    notebook_metadata = nb.read(args.nb, as_version=nb.NO_CONVERT).metadata.get(KALE_NOTEBOOK_METADATA_KEY, dict())
    # convert args to dict removing all None elements, and overwrite keys into notebook_metadata
    metadata_arguments = {**notebook_metadata, **{k: v for k, v in vars(args).items() if v is not None}}
    for r in REQUIRED_ARGUMENTS:
        if r not in metadata_arguments:
            raise ValueError(f"Required argument not found: {r}")

    # if jupyter_args is set, generate first a set of temporary notebooks
    # based on the input yml parameters (via Papermill)
    if 'jupyter_args' in metadata_arguments:
        generated_notebooks = generate_notebooks_from_yml(input_nb_path=args.nb,
                                                          yml_parameters_path=metadata_arguments['jupyter_args'])

        # Run KaleCore over each generated notebook
        for n, params in generated_notebooks:
            Kale(
                source_notebook_path=n,
                experiment_name=metadata_arguments['experiment_name'] + params,
                pipeline_name=metadata_arguments['pipeline_name'] + params,
                pipeline_descr=metadata_arguments['pipeline_description'] + " params" + params,
                docker_image=metadata_arguments['docker_image'],
                upload_pipeline=metadata_arguments['upload_pipeline'],
                run_pipeline=metadata_arguments['run_pipeline'],
                volumes=metadata_arguments['volumes'],
                debug=args.debug
            ).run()
    else:
        Kale(
            source_notebook_path=args.nb,
            experiment_name=metadata_arguments['experiment_name'],
            pipeline_name=metadata_arguments['pipeline_name'],
            pipeline_descr=metadata_arguments['pipeline_description'],
            docker_image=metadata_arguments['docker_image'],
            upload_pipeline=metadata_arguments['upload_pipeline'],
            run_pipeline=metadata_arguments['run_pipeline'],
            volumes=metadata_arguments['volumes'],
            debug=args.debug
        ).run()
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser(
        description='KALE: Kubeflow Automated pipeLines Engine')
    parser.add_argument('--nb',
                        type=str,
                        help='Path to source JupyterNotebook',
                        required=True)
    parser.add_argument('--deploy', action='store_true')
    parser.add_argument(
        '--kfp_port',
        type=int,
        default=8080,
        help=
        'Local port map to remote KFP instance. KFP assumed to be at localhost:<port>/pipeline'
    )
    parser.add_argument('--pipeline_name',
                        type=str,
                        help='Name of the deployed pipeline')
    parser.add_argument('--pipeline_descr',
                        type=str,
                        help='Description of the deployed pipeline')
    parser.add_argument(
        '--docker_image',
        type=str,
        help='Docker base image used to build the pipeline steps')
    parser.add_argument(
        '--jupyter_args',
        type=str,
        help='YAML file with Jupyter parameters as defined by Papermill')

    args = parser.parse_args()

    # if jupyter_args is set, generate first a set of temporary notebooks
    # based on the input yml parameters (via Papermill)
    if args.jupyter_args is not None:
        generated_notebooks = generate_notebooks_from_yml(
            input_nb_path=args.nb, yml_parameters_path=args.jupyter_args)

        # Run KaleCore over each generated notebook
        for n, params in generated_notebooks:
            Kale(source_notebook_path=n,
                 pipeline_name=args.pipeline_name + params,
                 pipeline_descr=args.pipeline_descr + " params" + params,
                 docker_image=args.docker_image,
                 auto_deploy=args.deploy,
                 kfp_port=args.kfp_port)
    else:
        Kale(source_notebook_path=args.nb,
             pipeline_name=args.pipeline_name,
             pipeline_descr=args.pipeline_descr,
             docker_image=args.docker_image,
             auto_deploy=args.deploy,
             kfp_port=args.kfp_port)
Esempio n. 3
0
File: nb.py Progetto: sylus/kale
def compile_notebook(request, source_notebook_path,
                     notebook_metadata_overrides=None, debug=False):
    """Compile the notebook to KFP DSL."""
    instance = Kale(source_notebook_path, notebook_metadata_overrides, debug)
    instance.logger = request.log if hasattr(request, "log") else logger

    pipeline_graph, pipeline_parameters = instance.notebook_to_graph()
    script_path = instance.generate_kfp_executable(pipeline_graph,
                                                   pipeline_parameters)

    pipeline_name = instance.pipeline_metadata["pipeline_name"]
    package_path = kfputils.compile_pipeline(script_path, pipeline_name)

    return {"pipeline_package_path": os.path.relpath(package_path),
            "pipeline_metadata": instance.pipeline_metadata}
def validate_notebook(request,
                      source_notebook_path,
                      notebook_metadata_overrides=None):
    """Validate notebook metadata."""
    # Notebook metadata is validated at class instantiation
    Kale(source_notebook_path, notebook_metadata_overrides)
    return True
Esempio n. 5
0
def main():
    parser = argparse.ArgumentParser(description=ARGS_DESC, formatter_class=RawTextHelpFormatter)
    general_group = parser.add_argument_group('General')
    general_group.add_argument('--nb', type=str, help='Path to source JupyterNotebook', required=True)
    # use store_const instead of store_true because we None instead of False in case the flag is missing
    general_group.add_argument('--upload_pipeline', action='store_const', const=True)
    general_group.add_argument('--run_pipeline', action='store_const', const=True)
    general_group.add_argument('--debug', action='store_true')

    metadata_group = parser.add_argument_group('Notebook Metadata Overrides', METADATA_GROUP_DESC)
    metadata_group.add_argument('--experiment_name', type=str, help='Name of the created experiment')
    metadata_group.add_argument('--pipeline_name', type=str, help='Name of the deployed pipeline')
    metadata_group.add_argument('--pipeline_description', type=str, help='Description of the deployed pipeline')
    metadata_group.add_argument('--docker_image', type=str, help='Docker base image used to build the pipeline steps')
    metadata_group.add_argument('--kfp_host', type=str, help='KFP endpoint. Provide address as <host>:<port>.')

    args = parser.parse_args()

    # get the notebook metadata args group
    metadata_overrides_group = next(filter(lambda x: x.title == 'Notebook Metadata Overrides', parser._action_groups))
    # get the single args of that group
    metadata_overrides_group_dict = {a.dest: getattr(args, a.dest, None) for a in metadata_overrides_group._group_actions}

    kale = Kale(
        source_notebook_path=args.nb,
        notebook_metadata_overrides=metadata_overrides_group_dict,
        debug=args.debug
    )
    pipeline_graph, pipeline_parameters = kale.notebook_to_graph()
    script_path = kale.generate_kfp_executable(pipeline_graph, pipeline_parameters)
    # compile the pipeline to kfp tar package
    pipeline_package_path = kfp_utils.compile_pipeline(script_path, kale.pipeline_metadata['pipeline_name'])

    if args.upload_pipeline:
        kfp_utils.upload_pipeline(
            pipeline_package_path=pipeline_package_path,
            pipeline_name=kale.pipeline_metadata['pipeline_name'],
            host=kale.pipeline_metadata.get('kfp_host', None)
        )

    if args.run_pipeline:
        kfp_utils.run_pipeline(
            run_name=kale.pipeline_metadata['pipeline_name'] + '_run',
            experiment_name=kale.pipeline_metadata['experiment_name'],
            pipeline_package_path=pipeline_package_path,
            host=kale.pipeline_metadata.get('kfp_host', None)
        )
Esempio n. 6
0
File: nb.py Progetto: zlapp/kale
def compile_notebook(source_notebook_path,
                     notebook_metadata_overrides=None,
                     debug=False,
                     auto_snapshot=False):
    instance = Kale(source_notebook_path, notebook_metadata_overrides, debug,
                    auto_snapshot)
    pipeline_graph, pipeline_parameters = instance.notebook_to_graph()
    script_path = instance.generate_kfp_executable(pipeline_graph,
                                                   pipeline_parameters)

    pipeline_name = instance.pipeline_metadata["pipeline_name"]
    package_path = kfp_utils.compile_pipeline(script_path, pipeline_name)

    return {
        "pipeline_package_path": package_path,
        "pipeline_metadata": instance.pipeline_metadata
    }
Esempio n. 7
0
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument('nb',
                            type=str,
                            help='Rate to charge for this resource')
        parser.add_argument(
            'deploy',
            type=inputs.boolean,
            help='True to deploy the pipeline to a running KFP instance')
        parser.add_argument(
            'kfp_port',
            type=int,
            default=1234,
            help=
            'Local port map to remote KFP instance. KFP assumed to be at localhost:<port>/pipeline'
        )
        parser.add_argument('pipeline_name',
                            required=True,
                            type=str,
                            help='Name of the deployed pipeline')
        parser.add_argument('pipeline_descr',
                            required=True,
                            type=str,
                            help='Description of the deployed pipeline')
        parser.add_argument(
            'docker_image',
            default='stefanofioravanzo/kale-kfp-examples:0.1',
            type=str,
            help='Docker base image used to build the pipeline steps')
        parser.add_argument(
            'jupyter_args',
            type=str,
            help='YAML file with Jupyter parameters as defined by Papermill')

        args = parser.parse_args()

        # create a tmp folder
        tmp_dir = tempfile.mkdtemp()
        tmp_notebook_path = f"{tmp_dir}/kale_generated_notebook.ipynb"

        if args['nb'] is None:
            f = request.files['notebook_file']
            f.save(tmp_notebook_path)
        else:
            with open(tmp_notebook_path, 'w+') as f:
                f.write(args['nb'])

        Kale(source_notebook_path=tmp_notebook_path,
             pipeline_name=args['pipeline_name'] + "_" + self.random_string(4),
             pipeline_descr=args['pipeline_descr'],
             docker_image=args['docker_image'],
             auto_deploy=args['deploy'],
             kfp_port=args['kfp_port'])

        return {'data': args['nb']}
def test_pipeline_generation_from_local(random_string, abs_working_dir):
    """Test code generation end to end from notebook to DSL."""
    abs_working_dir.return_value = '/kale'
    random_string.return_value = 'rnd'
    notebook_path = "../assets/notebooks/pipeline_parameters_and_metrics.ipynb"
    notebook_path = os.path.join(THIS_DIR, notebook_path)

    kale = Kale(source_notebook_path=notebook_path)
    kale.logger = logging.getLogger(__name__)
    kale.logger.setLevel(logging.DEBUG)
    pipeline_graph, pipeline_parameters = kale.notebook_to_graph()
    script_path = kale.generate_kfp_executable(pipeline_graph,
                                               pipeline_parameters,
                                               save_to_tmp=True)

    target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/',
                                'pipeline_parameters_and_metrics.py')
    expected_result = open(target_asset).read()
    result = open(script_path).read()
    assert result == expected_result
def test_pipeline_generation_from_gtihub(random_string, abs_working_dir):
    """Test code generation end to end from notebook to DSL."""
    abs_working_dir.return_value = '/kale'
    random_string.return_value = 'rnd'
    notebook_url = EX_REPO + "titanic-ml-dataset/titanic_dataset_ml.ipynb"
    # download notebook to tmp dir
    notebook_path, response = urlretrieve(notebook_url)

    kale = Kale(source_notebook_path=notebook_path)
    kale.logger = logging.getLogger(__name__)
    kale.logger.setLevel(logging.DEBUG)
    pipeline_graph, pipeline_parameters = kale.notebook_to_graph()
    script_path = kale.generate_kfp_executable(pipeline_graph,
                                               pipeline_parameters,
                                               save_to_tmp=True)

    target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/', 'titanic.py')
    expected_result = open(target_asset).read()
    result = open(script_path).read()
    assert result == expected_result
Esempio n. 10
0
def test_pipeline_generation_from_local(random_string, abs_working_dir):
    """Test code generation end to end from notebook to DSL."""
    abs_working_dir.return_value = '/kale'
    random_string.return_value = 'rnd'
    notebook_path = "../assets/notebooks/pipeline_parameters_and_metrics.ipynb"
    notebook_path = os.path.join(THIS_DIR, notebook_path)

    kale = Kale(source_notebook_path=notebook_path)
    pipeline_graph, pipeline_parameters = kale.notebook_to_graph()
    script_path = kale.generate_kfp_executable(pipeline_graph,
                                               pipeline_parameters,
                                               save_to_tmp=True)
    # TODO: Need to suppress log generation when running tests
    os.remove(os.path.join(os.getcwd(), 'kale.log'))

    target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/',
                                'pipeline_parameters_and_metrics.py')
    expected_result = open(target_asset).read()
    result = open(script_path).read()
    assert result == expected_result
Esempio n. 11
0
def test_metadata_generation(tag_parsing_notebook):
    result = [{
        'block_names': ['imports'],
        'in': [],
        'out': []
    }, {
        'block_names': ['sum'],
        'in': [],
        'out': []
    }, {
        'block_names': ['cumsum'],
        'in': [],
        'out': [],
        'previous_blocks': ['sum']
    }, {
        'block_names': [],
        'in': [],
        'out': []
    }, {
        'block_names': ['imports'],
        'in': [],
        'out': []
    }, {
        'block_names': ['os'],
        'in': [],
        'out': [],
        'previous_blocks': ['sum', 'cumsum']
    }]

    parsed_tags = list()
    for c in tag_parsing_notebook.cells:
        # parse only source code cells
        if c.cell_type != "code":
            continue

        tags = Kale.parse_metadata(c.metadata)
        parsed_tags.append(tags)
    pairs = zip(result, parsed_tags)
    assert all(x == y for x, y in pairs)
Esempio n. 12
0
def test_tag_block_error():
    tag = {'metadata': {'tags': ["block:processing:dataset"]}}

    with pytest.raises(ValueError):
        Kale.parse_metadata(tag['metadata'])
Esempio n. 13
0
def test_tag_block():
    tag = {'metadata': {'tags': ["block:processing"]}}
    target = {'block_names': ["processing"], 'in': [], 'out': []}

    res = Kale.parse_metadata(tag['metadata'])
    assert target == res
Esempio n. 14
0
def test_tag_skip():
    tag = {'metadata': {'tags': ['skip']}}
    target = None

    res = Kale.parse_metadata(tag['metadata'])
    assert target == res
Esempio n. 15
0
def test_empty_tag():
    tag = {'metadata': {}}
    target = {'block_names': [], 'in': [], 'out': []}

    res = Kale.parse_metadata(tag['metadata'])
    assert target == res