Exemple #1
0
def test_log(tmp_nbs, monkeypatch):
    mock = Mock()
    monkeypatch.setattr(
        sys, 'argv',
        ['python', '--log', 'info', '--entry-point', 'pipeline.yaml'])
    monkeypatch.setattr(parsers, 'logging', mock)

    parser = CustomParser()

    with parser:
        pass

    parser.load_from_entry_point_arg()

    mock.basicConfig.assert_called_with(level='INFO')
Exemple #2
0
def main():
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Call an entry point '
                          '(pipeline.yaml or dotted path to factory)',
                          prog='ploomber interact')
    with parser:
        # this command has no static args
        pass

    dag, _ = parser.load_from_entry_point_arg()

    try:
        dag.render()
    except Exception:
        err = ('Your dag failed to render, but you can still inspect the '
               'object to debug it.\n')
        telemetry.log_api("interact_error",
                          dag=dag,
                          metadata={
                              'type': 'dag_render_failed',
                              'exception': err
                          })
        print(err)

    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_interact",
                      total_runtime=str(end_time - start_time),
                      dag=dag)

    # NOTE: do not use embed here, we must use start_ipython, see here:
    # https://github.com/ipython/ipython/issues/8918
    start_ipython(argv=[], user_ns={'dag': dag})
Exemple #3
0
def test_log_file_with_factory_entry_point(backup_test_pkg, monkeypatch, opt):
    mock = Mock()
    monkeypatch.setattr(sys, 'argv', [
        'python', '--log', 'info', opt, 'my.log', '--entry-point',
        'test_pkg.entry.plain_function'
    ])
    monkeypatch.setattr(parsers, 'logging', mock)

    parser = CustomParser()

    with parser:
        pass

    parser.load_from_entry_point_arg()

    mock.basicConfig.assert_called_with(level='INFO')
    mock.FileHandler.assert_called_with('my.log')
    mock.getLogger().addHandler.assert_called()
Exemple #4
0
def test_log_file(tmp_nbs, monkeypatch, opt):
    mock = Mock()
    monkeypatch.setattr(sys, 'argv', [
        'python', '--log', 'info', opt, 'my.log', '--entry-point',
        'pipeline.yaml'
    ])
    monkeypatch.setattr(parsers, 'logging', mock)

    parser = CustomParser()

    with parser:
        pass

    parser.load_from_entry_point_arg()

    mock.basicConfig.assert_called_with(level='INFO')
    mock.FileHandler.assert_called_with('my.log')
    mock.getLogger().addHandler.assert_called()
Exemple #5
0
def test_entry_point_from_factory_in_environment_variable(
        backup_test_pkg, monkeypatch):
    monkeypatch.setattr(sys, 'argv', ['python'])
    monkeypatch.setenv('ENTRY_POINT', 'test_pkg.entry.plain_function')
    parser = CustomParser()

    with parser:
        pass

    dag, _ = parser.load_from_entry_point_arg()

    assert isinstance(dag, DAG)
Exemple #6
0
def main():
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Build tasks', prog='ploomber task')
    with parser:
        parser.add_argument('task_name')
        parser.add_argument('--source',
                            '-s',
                            help='Print task source code',
                            action='store_true')
        parser.add_argument(
            '--build',
            '-b',
            help='Build task (default if no other option is passed)',
            action='store_true')
        parser.add_argument('--force',
                            '-f',
                            help='Force task build (ignore up-to-date status)',
                            action='store_true')
        parser.add_argument('--status',
                            '-st',
                            help='Get task status',
                            action='store_true')
        parser.add_argument('--on-finish',
                            '-of',
                            help='Only execute on_finish hook',
                            action='store_true')
    dag, args = parser.load_from_entry_point_arg()

    dag.render()
    task = dag[args.task_name]

    if args.source:
        print(task.source)

    if args.status:
        print(task.status())

    if args.on_finish:
        task._run_on_finish()

    # task if build by default, but when --source or --status are passed,
    # the --build flag is required
    no_flags = not any((args.build, args.status, args.source, args.on_finish))

    if no_flags or args.build:
        task.build(force=args.force)

    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_task",
                      total_runtime=str(end_time - start_time),
                      dag=dag,
                      metadata={'argv': sys.argv})
Exemple #7
0
def main():
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Show pipeline status',
                          prog='ploomber status')
    with parser:
        # this command has no static args
        pass
    dag, args = parser.load_from_entry_point_arg()
    print(dag.status())
    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_status",
                      total_runtime=str(end_time - start_time),
                      dag=dag)
Exemple #8
0
def main():
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Make a pipeline report',
                          prog='ploomber report')
    with parser:
        parser.add_argument(
            '--output',
            '-o',
            help='Where to save the report, defaults to pipeline.html',
            default='pipeline.html')
    dag, args = parser.load_from_entry_point_arg()
    dag.to_markup(path=args.output)
    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_report",
                      total_runtime=str(end_time - start_time),
                      dag=dag,
                      metadata={'argv': sys.argv})
    print('Report saved at:', args.output)
Exemple #9
0
def test_dagspec_initialization_from_yaml(tmp_nbs_nested, monkeypatch):
    """
    DAGSpec can be initialized with a path to a spec or a dictionary, but
    they have a slightly different behavior. This checks that we initialize
    with the path
    """
    mock = Mock(wraps=parsers.DAGSpec)

    monkeypatch.setattr(sys, 'argv', ['python'])
    monkeypatch.setattr(parsers, 'DAGSpec', mock)

    parser = CustomParser()

    with parser:
        pass

    dag, args = parser.load_from_entry_point_arg()

    mock.assert_called_once_with('pipeline.yaml')
Exemple #10
0
def test_dagspec_initialization_from_yaml_and_env(tmp_nbs, monkeypatch):
    """
    DAGSpec can be initialized with a path to a spec or a dictionary, but
    they have a slightly different behavior. This ensure the cli passes
    the path, instead of a dictionary
    """
    mock_DAGSpec = Mock(wraps=parsers.DAGSpec)
    mock_default_path_to_env = Mock(
        wraps=parsers.default.path_to_env_from_spec)
    mock_EnvDict = Mock(wraps=parsers.EnvDict)

    monkeypatch.setattr(sys, 'argv', ['python'])
    monkeypatch.setattr(parsers, 'DAGSpec', mock_DAGSpec)
    monkeypatch.setattr(parsers.default, 'path_to_env_from_spec',
                        mock_default_path_to_env)
    monkeypatch.setattr(parsers, 'EnvDict', mock_EnvDict)

    # ensure current timestamp does not change
    mock = Mock()
    mock.datetime.now().isoformat.return_value = 'current-timestamp'
    monkeypatch.setattr(expand, "datetime", mock)

    parser = CustomParser()

    with parser:
        pass

    dag, args = parser.load_from_entry_point_arg()

    # ensure called using the path to the yaml spec
    mock_DAGSpec.assert_called_once_with('pipeline.yaml',
                                         env=EnvDict({'sample': False},
                                                     path_to_here='.'))

    # and EnvDict initialized from env.yaml
    mock_EnvDict.assert_called_once_with(str(Path('env.yaml').resolve()),
                                         path_to_here=Path('.'))
Exemple #11
0
def main():
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Plot a pipeline', prog='ploomber plot')
    with parser:
        parser.add_argument(
            '--output',
            '-o',
            help='Where to save the plot, defaults to pipeline.png',
            default=None)
    dag, args = parser.load_from_entry_point_arg()

    if args.output is not None:
        output = args.output
    else:
        name = extract_name(args.entry_point)
        output = 'pipeline.png' if name is None else f'pipeline.{name}.png'

    dag.plot(output=output)
    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_plot",
                      total_runtime=str(end_time - start_time),
                      dag=dag,
                      metadata={'argv': sys.argv})
    print('Plot saved at:', output)
Exemple #12
0
def main():
    parser = CustomParser(description='Manage scripts and notebooks',
                          prog='ploomber nb')

    with parser:
        # The next options do not require a valid entry point

        # opening .py files as notebooks in JupyterLab with a single click
        single_click = parser.add_mutually_exclusive_group()
        single_click.add_argument(
            '--single-click',
            '-S',
            action='store_true',
            help=('Override JupyterLab defaults to open '
                  'scripts as notebook with a single click'))
        single_click.add_argument(
            '--single-click-disable',
            '-d',
            action='store_true',
            help=('Disables opening scripts as notebook with a single '
                  'click in JupyterLab'))

        # install/uninstall hook
        hook = parser.add_mutually_exclusive_group()
        hook.add_argument('--install-hook',
                          '-I',
                          action='store_true',
                          help='Install git pre-commit hook')
        hook.add_argument('--uninstall-hook',
                          '-u',
                          action='store_true',
                          help='Uninstall git pre-commit hook')

        # The next options require a valid entry point

        # inject/remove cell
        cell = parser.add_mutually_exclusive_group()
        cell.add_argument('--inject',
                          '-i',
                          action='store_true',
                          help='Inject cell to all script/notebook tasks')
        cell.add_argument(
            '--remove',
            '-r',
            action='store_true',
            help='Remove injected cell in all script/notebook tasks')

        # re-format
        parser.add_argument('--format',
                            '-f',
                            help='Re-format all script/notebook tasks')

        # pair scripts and nbs
        parser.add_argument('--pair',
                            '-p',
                            help='Pair scripts with ipynb files')

        # sync scripts and nbs
        parser.add_argument('--sync',
                            '-s',
                            action='store_true',
                            help='Sync scripts with ipynb files')

    loading_error = None

    # commands that need an entry point to work
    needs_entry_point = {'format', 'inject', 'remove', 'sync', 'pair'}

    args_ = parser.parse_args()

    if any(getattr(args_, arg) for arg in needs_entry_point):
        try:
            dag, args = parser.load_from_entry_point_arg()
        except Exception as e:
            loading_error = e
        else:
            dag.render(show_progress=False)

        if loading_error:
            err = ('Could not run nb command: the DAG ' 'failed to load')
            telemetry.log_api("nb_error",
                              metadata={
                                  'type': 'dag_load_failed',
                                  'exception': err + f' {loading_error}',
                                  'argv': sys.argv
                              })
            raise RuntimeError(err) from loading_error
    else:
        dag = None
        args = args_

    # options that do not need a DAG

    if args.single_click:
        _py_with_single_click_enable()

    if args.single_click_disable:
        _py_with_single_click_disable()

    if args.install_hook:
        if not Path('.git').is_dir():
            err = ('Expected a .git/ directory in the current working '
                   'directory. Run this from the repository root directory.')
            telemetry.log_api("nb_error",
                              metadata={
                                  'type': 'no_git_config',
                                  'exception': err,
                                  'argv': sys.argv
                              })
            raise NotADirectoryError(err)

        parent = Path('.git', 'hooks')
        parent.mkdir(exist_ok=True)

        # pre-commit: remove injected cells
        _install_hook(parent / 'pre-commit', pre_commit_hook, args.entry_point)
        click.echo('Successfully installed pre-commit git hook')

        # post-commit: inject cells
        _install_hook(parent / 'post-commit', post_commit_hook,
                      args.entry_point)
        click.echo('Successfully installed post-commit git hook')

    if args.uninstall_hook:
        _delete_hook(Path('.git', 'hooks', 'pre-commit'))
        _delete_hook(Path('.git', 'hooks', 'post-commit'))

    # options that need a valid DAG

    if args.format:
        new_paths = [
            str(p) for p in _call_in_source(
                dag,
                'format',
                'Formatted notebooks',
                dict(fmt=args.format),
            ) if p is not None
        ]

        if len(new_paths):
            click.echo('Extension changed for the following '
                       f'tasks: {", ".join(new_paths)}. Update your '
                       'pipeline declaration.')

    if args.inject:
        _call_in_source(
            dag,
            'save_injected_cell',
            'Injected celll',
            dict(),
        )

        click.secho(
            'Finished cell injection. Re-run this command if your '
            'pipeline.yaml changes.',
            fg='green')

    if args.remove:
        _call_in_source(
            dag,
            'remove_injected_cell',
            'Removed injected cell',
            dict(),
        )

    if args.sync:
        # maybe its more efficient to pass all notebook paths at once?
        _call_in_source(dag, 'sync', 'Synced notebooks')

    # can pair give trouble if we're reformatting?
    if args.pair:
        _call_in_source(
            dag,
            'pair',
            'Paired notebooks',
            dict(base_path=args.pair),
        )
        click.echo(f'Finished pairing notebooks. Tip: add {args.pair!r} to '
                   'your .gitignore to keep your repository clean')

    telemetry.log_api("ploomber_nb", dag=dag, metadata={'argv': sys.argv})
Exemple #13
0
def main(render_only=False):
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Build pipeline', prog='ploomber build')

    with parser:
        parser.add_argument('--force',
                            '-f',
                            help='Force execution by ignoring status',
                            action='store_true',
                            default=False)
        parser.add_argument('--skip-upstream',
                            '-su',
                            help='Skip building upstream dependencies. '
                            'Only applicable when using --partially',
                            action='store_true',
                            default=False)
        parser.add_argument(
            '--partially',
            '-p',
            help='Build a pipeline partially until certain task',
            default=None)
        parser.add_argument(
            '--debug',
            '-d',
            help='Drop a debugger session if an exception happens',
            action='store_true',
            default=False)

    # users may try to run "ploomber build {name}" to build a single task
    if len(sys.argv) > 1 and not sys.argv[1].startswith('-'):
        suggestion = 'ploomber task {task-name}'
        cmd_name = parser.prog
        telemetry.log_api("unsupported_build_cmd",
                          metadata={
                              'cmd_name': cmd_name,
                              'suggestion': suggestion,
                              'argv': sys.argv
                          })
        parser.error(f'{cmd_name!r} does not take positional arguments.\n'
                     f'To build a single task, try: {suggestion!r}')

    dag, args = parser.load_from_entry_point_arg()

    # when using the parallel executor from the CLI, ensure we print progress
    # to stdout
    if isinstance(dag.executor, Parallel):
        dag.executor.print_progress = True

    if render_only:
        dag.render()
    else:
        if args.partially:
            report = dag.build_partially(args.partially,
                                         force=args.force,
                                         debug=args.debug,
                                         skip_upstream=args.skip_upstream)
        else:
            report = dag.build(force=args.force, debug=args.debug)

        if report:
            print(report)
    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_build",
                      total_runtime=str(end_time - start_time),
                      dag=dag,
                      metadata={'argv': sys.argv})
    return dag