def test_log(tmp_nbs, monkeypatch): mock = Mock() monkeypatch.setattr( sys, 'argv', ['python', '--log', 'info', '--entry-point', 'pipeline.yaml']) monkeypatch.setattr(parsers, 'logging', mock) parser = CustomParser() with parser: pass parser.load_from_entry_point_arg() mock.basicConfig.assert_called_with(level='INFO')
def main(): start_time = datetime.datetime.now() parser = CustomParser(description='Call an entry point ' '(pipeline.yaml or dotted path to factory)', prog='ploomber interact') with parser: # this command has no static args pass dag, _ = parser.load_from_entry_point_arg() try: dag.render() except Exception: err = ('Your dag failed to render, but you can still inspect the ' 'object to debug it.\n') telemetry.log_api("interact_error", dag=dag, metadata={ 'type': 'dag_render_failed', 'exception': err }) print(err) end_time = datetime.datetime.now() telemetry.log_api("ploomber_interact", total_runtime=str(end_time - start_time), dag=dag) # NOTE: do not use embed here, we must use start_ipython, see here: # https://github.com/ipython/ipython/issues/8918 start_ipython(argv=[], user_ns={'dag': dag})
def test_log_file_with_factory_entry_point(backup_test_pkg, monkeypatch, opt): mock = Mock() monkeypatch.setattr(sys, 'argv', [ 'python', '--log', 'info', opt, 'my.log', '--entry-point', 'test_pkg.entry.plain_function' ]) monkeypatch.setattr(parsers, 'logging', mock) parser = CustomParser() with parser: pass parser.load_from_entry_point_arg() mock.basicConfig.assert_called_with(level='INFO') mock.FileHandler.assert_called_with('my.log') mock.getLogger().addHandler.assert_called()
def test_log_file(tmp_nbs, monkeypatch, opt): mock = Mock() monkeypatch.setattr(sys, 'argv', [ 'python', '--log', 'info', opt, 'my.log', '--entry-point', 'pipeline.yaml' ]) monkeypatch.setattr(parsers, 'logging', mock) parser = CustomParser() with parser: pass parser.load_from_entry_point_arg() mock.basicConfig.assert_called_with(level='INFO') mock.FileHandler.assert_called_with('my.log') mock.getLogger().addHandler.assert_called()
def test_entry_point_from_factory_in_environment_variable( backup_test_pkg, monkeypatch): monkeypatch.setattr(sys, 'argv', ['python']) monkeypatch.setenv('ENTRY_POINT', 'test_pkg.entry.plain_function') parser = CustomParser() with parser: pass dag, _ = parser.load_from_entry_point_arg() assert isinstance(dag, DAG)
def main(): start_time = datetime.datetime.now() parser = CustomParser(description='Build tasks', prog='ploomber task') with parser: parser.add_argument('task_name') parser.add_argument('--source', '-s', help='Print task source code', action='store_true') parser.add_argument( '--build', '-b', help='Build task (default if no other option is passed)', action='store_true') parser.add_argument('--force', '-f', help='Force task build (ignore up-to-date status)', action='store_true') parser.add_argument('--status', '-st', help='Get task status', action='store_true') parser.add_argument('--on-finish', '-of', help='Only execute on_finish hook', action='store_true') dag, args = parser.load_from_entry_point_arg() dag.render() task = dag[args.task_name] if args.source: print(task.source) if args.status: print(task.status()) if args.on_finish: task._run_on_finish() # task if build by default, but when --source or --status are passed, # the --build flag is required no_flags = not any((args.build, args.status, args.source, args.on_finish)) if no_flags or args.build: task.build(force=args.force) end_time = datetime.datetime.now() telemetry.log_api("ploomber_task", total_runtime=str(end_time - start_time), dag=dag, metadata={'argv': sys.argv})
def main(): start_time = datetime.datetime.now() parser = CustomParser(description='Show pipeline status', prog='ploomber status') with parser: # this command has no static args pass dag, args = parser.load_from_entry_point_arg() print(dag.status()) end_time = datetime.datetime.now() telemetry.log_api("ploomber_status", total_runtime=str(end_time - start_time), dag=dag)
def main(): start_time = datetime.datetime.now() parser = CustomParser(description='Make a pipeline report', prog='ploomber report') with parser: parser.add_argument( '--output', '-o', help='Where to save the report, defaults to pipeline.html', default='pipeline.html') dag, args = parser.load_from_entry_point_arg() dag.to_markup(path=args.output) end_time = datetime.datetime.now() telemetry.log_api("ploomber_report", total_runtime=str(end_time - start_time), dag=dag, metadata={'argv': sys.argv}) print('Report saved at:', args.output)
def test_dagspec_initialization_from_yaml(tmp_nbs_nested, monkeypatch): """ DAGSpec can be initialized with a path to a spec or a dictionary, but they have a slightly different behavior. This checks that we initialize with the path """ mock = Mock(wraps=parsers.DAGSpec) monkeypatch.setattr(sys, 'argv', ['python']) monkeypatch.setattr(parsers, 'DAGSpec', mock) parser = CustomParser() with parser: pass dag, args = parser.load_from_entry_point_arg() mock.assert_called_once_with('pipeline.yaml')
def test_dagspec_initialization_from_yaml_and_env(tmp_nbs, monkeypatch): """ DAGSpec can be initialized with a path to a spec or a dictionary, but they have a slightly different behavior. This ensure the cli passes the path, instead of a dictionary """ mock_DAGSpec = Mock(wraps=parsers.DAGSpec) mock_default_path_to_env = Mock( wraps=parsers.default.path_to_env_from_spec) mock_EnvDict = Mock(wraps=parsers.EnvDict) monkeypatch.setattr(sys, 'argv', ['python']) monkeypatch.setattr(parsers, 'DAGSpec', mock_DAGSpec) monkeypatch.setattr(parsers.default, 'path_to_env_from_spec', mock_default_path_to_env) monkeypatch.setattr(parsers, 'EnvDict', mock_EnvDict) # ensure current timestamp does not change mock = Mock() mock.datetime.now().isoformat.return_value = 'current-timestamp' monkeypatch.setattr(expand, "datetime", mock) parser = CustomParser() with parser: pass dag, args = parser.load_from_entry_point_arg() # ensure called using the path to the yaml spec mock_DAGSpec.assert_called_once_with('pipeline.yaml', env=EnvDict({'sample': False}, path_to_here='.')) # and EnvDict initialized from env.yaml mock_EnvDict.assert_called_once_with(str(Path('env.yaml').resolve()), path_to_here=Path('.'))
def main(): start_time = datetime.datetime.now() parser = CustomParser(description='Plot a pipeline', prog='ploomber plot') with parser: parser.add_argument( '--output', '-o', help='Where to save the plot, defaults to pipeline.png', default=None) dag, args = parser.load_from_entry_point_arg() if args.output is not None: output = args.output else: name = extract_name(args.entry_point) output = 'pipeline.png' if name is None else f'pipeline.{name}.png' dag.plot(output=output) end_time = datetime.datetime.now() telemetry.log_api("ploomber_plot", total_runtime=str(end_time - start_time), dag=dag, metadata={'argv': sys.argv}) print('Plot saved at:', output)
def main(): parser = CustomParser(description='Manage scripts and notebooks', prog='ploomber nb') with parser: # The next options do not require a valid entry point # opening .py files as notebooks in JupyterLab with a single click single_click = parser.add_mutually_exclusive_group() single_click.add_argument( '--single-click', '-S', action='store_true', help=('Override JupyterLab defaults to open ' 'scripts as notebook with a single click')) single_click.add_argument( '--single-click-disable', '-d', action='store_true', help=('Disables opening scripts as notebook with a single ' 'click in JupyterLab')) # install/uninstall hook hook = parser.add_mutually_exclusive_group() hook.add_argument('--install-hook', '-I', action='store_true', help='Install git pre-commit hook') hook.add_argument('--uninstall-hook', '-u', action='store_true', help='Uninstall git pre-commit hook') # The next options require a valid entry point # inject/remove cell cell = parser.add_mutually_exclusive_group() cell.add_argument('--inject', '-i', action='store_true', help='Inject cell to all script/notebook tasks') cell.add_argument( '--remove', '-r', action='store_true', help='Remove injected cell in all script/notebook tasks') # re-format parser.add_argument('--format', '-f', help='Re-format all script/notebook tasks') # pair scripts and nbs parser.add_argument('--pair', '-p', help='Pair scripts with ipynb files') # sync scripts and nbs parser.add_argument('--sync', '-s', action='store_true', help='Sync scripts with ipynb files') loading_error = None # commands that need an entry point to work needs_entry_point = {'format', 'inject', 'remove', 'sync', 'pair'} args_ = parser.parse_args() if any(getattr(args_, arg) for arg in needs_entry_point): try: dag, args = parser.load_from_entry_point_arg() except Exception as e: loading_error = e else: dag.render(show_progress=False) if loading_error: err = ('Could not run nb command: the DAG ' 'failed to load') telemetry.log_api("nb_error", metadata={ 'type': 'dag_load_failed', 'exception': err + f' {loading_error}', 'argv': sys.argv }) raise RuntimeError(err) from loading_error else: dag = None args = args_ # options that do not need a DAG if args.single_click: _py_with_single_click_enable() if args.single_click_disable: _py_with_single_click_disable() if args.install_hook: if not Path('.git').is_dir(): err = ('Expected a .git/ directory in the current working ' 'directory. Run this from the repository root directory.') telemetry.log_api("nb_error", metadata={ 'type': 'no_git_config', 'exception': err, 'argv': sys.argv }) raise NotADirectoryError(err) parent = Path('.git', 'hooks') parent.mkdir(exist_ok=True) # pre-commit: remove injected cells _install_hook(parent / 'pre-commit', pre_commit_hook, args.entry_point) click.echo('Successfully installed pre-commit git hook') # post-commit: inject cells _install_hook(parent / 'post-commit', post_commit_hook, args.entry_point) click.echo('Successfully installed post-commit git hook') if args.uninstall_hook: _delete_hook(Path('.git', 'hooks', 'pre-commit')) _delete_hook(Path('.git', 'hooks', 'post-commit')) # options that need a valid DAG if args.format: new_paths = [ str(p) for p in _call_in_source( dag, 'format', 'Formatted notebooks', dict(fmt=args.format), ) if p is not None ] if len(new_paths): click.echo('Extension changed for the following ' f'tasks: {", ".join(new_paths)}. Update your ' 'pipeline declaration.') if args.inject: _call_in_source( dag, 'save_injected_cell', 'Injected celll', dict(), ) click.secho( 'Finished cell injection. Re-run this command if your ' 'pipeline.yaml changes.', fg='green') if args.remove: _call_in_source( dag, 'remove_injected_cell', 'Removed injected cell', dict(), ) if args.sync: # maybe its more efficient to pass all notebook paths at once? _call_in_source(dag, 'sync', 'Synced notebooks') # can pair give trouble if we're reformatting? if args.pair: _call_in_source( dag, 'pair', 'Paired notebooks', dict(base_path=args.pair), ) click.echo(f'Finished pairing notebooks. Tip: add {args.pair!r} to ' 'your .gitignore to keep your repository clean') telemetry.log_api("ploomber_nb", dag=dag, metadata={'argv': sys.argv})
def main(render_only=False): start_time = datetime.datetime.now() parser = CustomParser(description='Build pipeline', prog='ploomber build') with parser: parser.add_argument('--force', '-f', help='Force execution by ignoring status', action='store_true', default=False) parser.add_argument('--skip-upstream', '-su', help='Skip building upstream dependencies. ' 'Only applicable when using --partially', action='store_true', default=False) parser.add_argument( '--partially', '-p', help='Build a pipeline partially until certain task', default=None) parser.add_argument( '--debug', '-d', help='Drop a debugger session if an exception happens', action='store_true', default=False) # users may try to run "ploomber build {name}" to build a single task if len(sys.argv) > 1 and not sys.argv[1].startswith('-'): suggestion = 'ploomber task {task-name}' cmd_name = parser.prog telemetry.log_api("unsupported_build_cmd", metadata={ 'cmd_name': cmd_name, 'suggestion': suggestion, 'argv': sys.argv }) parser.error(f'{cmd_name!r} does not take positional arguments.\n' f'To build a single task, try: {suggestion!r}') dag, args = parser.load_from_entry_point_arg() # when using the parallel executor from the CLI, ensure we print progress # to stdout if isinstance(dag.executor, Parallel): dag.executor.print_progress = True if render_only: dag.render() else: if args.partially: report = dag.build_partially(args.partially, force=args.force, debug=args.debug, skip_upstream=args.skip_upstream) else: report = dag.build(force=args.force, debug=args.debug) if report: print(report) end_time = datetime.datetime.now() telemetry.log_api("ploomber_build", total_runtime=str(end_time - start_time), dag=dag, metadata={'argv': sys.argv}) return dag