def test_stats_off(monkeypatch): mock = Mock() posthog_mock = Mock() mock.patch(telemetry, '_get_telemetry_info', (False, 'TestUID')) telemetry.log_api("test_action") assert posthog_mock.call_count == 0
def test_offline_stats(monkeypatch): mock = Mock() posthog_mock = Mock() mock.patch(telemetry, 'is_online', False) telemetry.log_api("test_action") assert posthog_mock.call_count == 0
def main(): start_time = datetime.datetime.now() parser = CustomParser(description='Call an entry point ' '(pipeline.yaml or dotted path to factory)', prog='ploomber interact') with parser: # this command has no static args pass dag, _ = parser.load_from_entry_point_arg() try: dag.render() except Exception: err = ('Your dag failed to render, but you can still inspect the ' 'object to debug it.\n') telemetry.log_api("interact_error", dag=dag, metadata={ 'type': 'dag_render_failed', 'exception': err }) print(err) end_time = datetime.datetime.now() telemetry.log_api("ploomber_interact", total_runtime=str(end_time - start_time), dag=dag) # NOTE: do not use embed here, we must use start_ipython, see here: # https://github.com/ipython/ipython/issues/8918 start_ipython(argv=[], user_ns={'dag': dag})
def _next_steps(cmdr, cmd_activate, start_time): end_time = datetime.datetime.now() telemetry.log_api("install-success", total_runtime=str(end_time - start_time)) cmdr.success('Next steps') cmdr.print((f'$ {cmd_activate}\n' '$ ploomber build')) cmdr.success()
def main(): start_time = datetime.datetime.now() parser = CustomParser(description='Build tasks', prog='ploomber task') with parser: parser.add_argument('task_name') parser.add_argument('--source', '-s', help='Print task source code', action='store_true') parser.add_argument( '--build', '-b', help='Build task (default if no other option is passed)', action='store_true') parser.add_argument('--force', '-f', help='Force task build (ignore up-to-date status)', action='store_true') parser.add_argument('--status', '-st', help='Get task status', action='store_true') parser.add_argument('--on-finish', '-of', help='Only execute on_finish hook', action='store_true') dag, args = parser.load_from_entry_point_arg() dag.render() task = dag[args.task_name] if args.source: print(task.source) if args.status: print(task.status()) if args.on_finish: task._run_on_finish() # task if build by default, but when --source or --status are passed, # the --build flag is required no_flags = not any((args.build, args.status, args.source, args.on_finish)) if no_flags or args.build: task.build(force=args.force) end_time = datetime.datetime.now() telemetry.log_api("ploomber_task", total_runtime=str(end_time - start_time), dag=dag, metadata={'argv': sys.argv})
def main(): start_time = datetime.datetime.now() parser = CustomParser(description='Show pipeline status', prog='ploomber status') with parser: # this command has no static args pass dag, args = parser.load_from_entry_point_arg() print(dag.status()) end_time = datetime.datetime.now() telemetry.log_api("ploomber_status", total_runtime=str(end_time - start_time), dag=dag)
def _find_conda_root(conda_bin): conda_bin = Path(conda_bin) for parent in conda_bin.parents: # I've seen variations of this. on windows: Miniconda3 and miniconda3 # on linux miniconda3, anaconda and miniconda if parent.name.lower() in {'miniconda3', 'miniconda', 'anaconda3'}: return parent err = ('Failed to locate conda root from ' f'directory: {str(conda_bin)!r}. Please submit an issue: ' 'https://github.com/ploomber/ploomber/issues/new') telemetry.log_api("install-error", metadata={ 'type': 'no_conda_root', 'exception': err }) raise RuntimeError(err)
def main(): start_time = datetime.datetime.now() parser = CustomParser(description='Make a pipeline report', prog='ploomber report') with parser: parser.add_argument( '--output', '-o', help='Where to save the report, defaults to pipeline.html', default='pipeline.html') dag, args = parser.load_from_entry_point_arg() dag.to_markup(path=args.output) end_time = datetime.datetime.now() telemetry.log_api("ploomber_report", total_runtime=str(end_time - start_time), dag=dag, metadata={'argv': sys.argv}) print('Report saved at:', args.output)
def _locate_pip_inside_conda(env_name): """ Locates pip inside the conda env with a given name """ pip = _path_to_pip_in_env_with_name(shutil.which('conda'), env_name) # this might happen if the environment does not contain python/pip if not Path(pip).exists(): err = (f'Could not locate pip in environment {env_name!r}, make sure ' 'it is included in your environment.yml and try again') telemetry.log_api("install-error", metadata={ 'type': 'no_pip_env', 'exception': err }) raise FileNotFoundError(err) return pip
def cmd_router(): cmd_name = None if len(sys.argv) < 2 else sys.argv[1] custom = { 'build': cli_module.build.main, 'plot': cli_module.plot.main, 'task': cli_module.task.main, 'report': cli_module.report.main, 'interact': cli_module.interact.main, 'status': cli_module.status.main, 'nb': cli_module.nb.main, } # users may attempt to run execute/run, suggest to use build instead alias = {'execute': 'build', 'run': 'build'} if cmd_name in custom: # NOTE: we don't use the argument here, it is parsed by _main # pop the second element ('entry') to make the CLI behave as expected sys.argv.pop(1) # Add the current working directory, this is done automatically when # calling "python -m ploomber.build" but not here ("ploomber build") sys.path.insert(0, os.path.abspath('.')) fn = custom[cmd_name] fn() elif cmd_name in alias: suggestion = alias[cmd_name] telemetry.log_api("unsupported_build_cmd", metadata={ 'cmd_name': cmd_name, 'suggestion': suggestion, 'argv': sys.argv }) _exit_with_error_message("Try 'ploomber --help' for help.\n\n" f"Error: {cmd_name!r} is not a valid command." f" Did you mean {suggestion!r}?") else: if cmd_name not in ['examples', 'scaffold', 'install']: telemetry.log_api("unsupported-api-call", metadata={'argv': sys.argv}) cli()
def examples(name, force, branch, output): """Get sample projects. Run "ploomber examples" to list them """ try: cli_module.examples.main(name=name, force=force, branch=branch, output=output) except click.ClickException: raise except Exception as e: telemetry.log_api("examples_error", metadata={ 'type': 'runtime_error', 'exception': e, 'argv': sys.argv }) raise RuntimeError( 'An error happened when executing the examples command. Check out ' 'the full error message for details. Downloading the examples ' 'again or upgrading Ploomber may fix the ' 'issue.\nDownload: ploomber examples -f\n' 'Update: pip install ploomber -U\n' 'Update [conda]: conda update ploomber -c conda-forge') from e
def main(): start_time = datetime.datetime.now() parser = CustomParser(description='Plot a pipeline', prog='ploomber plot') with parser: parser.add_argument( '--output', '-o', help='Where to save the plot, defaults to pipeline.png', default=None) dag, args = parser.load_from_entry_point_arg() if args.output is not None: output = args.output else: name = extract_name(args.entry_point) output = 'pipeline.png' if name is None else f'pipeline.{name}.png' dag.plot(output=output) end_time = datetime.datetime.now() telemetry.log_api("ploomber_plot", total_runtime=str(end_time - start_time), dag=dag, metadata={'argv': sys.argv}) print('Plot saved at:', output)
def main(): parser = CustomParser(description='Manage scripts and notebooks', prog='ploomber nb') with parser: # The next options do not require a valid entry point # opening .py files as notebooks in JupyterLab with a single click single_click = parser.add_mutually_exclusive_group() single_click.add_argument( '--single-click', '-S', action='store_true', help=('Override JupyterLab defaults to open ' 'scripts as notebook with a single click')) single_click.add_argument( '--single-click-disable', '-d', action='store_true', help=('Disables opening scripts as notebook with a single ' 'click in JupyterLab')) # install/uninstall hook hook = parser.add_mutually_exclusive_group() hook.add_argument('--install-hook', '-I', action='store_true', help='Install git pre-commit hook') hook.add_argument('--uninstall-hook', '-u', action='store_true', help='Uninstall git pre-commit hook') # The next options require a valid entry point # inject/remove cell cell = parser.add_mutually_exclusive_group() cell.add_argument('--inject', '-i', action='store_true', help='Inject cell to all script/notebook tasks') cell.add_argument( '--remove', '-r', action='store_true', help='Remove injected cell in all script/notebook tasks') # re-format parser.add_argument('--format', '-f', help='Re-format all script/notebook tasks') # pair scripts and nbs parser.add_argument('--pair', '-p', help='Pair scripts with ipynb files') # sync scripts and nbs parser.add_argument('--sync', '-s', action='store_true', help='Sync scripts with ipynb files') loading_error = None # commands that need an entry point to work needs_entry_point = {'format', 'inject', 'remove', 'sync', 'pair'} args_ = parser.parse_args() if any(getattr(args_, arg) for arg in needs_entry_point): try: dag, args = parser.load_from_entry_point_arg() except Exception as e: loading_error = e else: dag.render(show_progress=False) if loading_error: err = ('Could not run nb command: the DAG ' 'failed to load') telemetry.log_api("nb_error", metadata={ 'type': 'dag_load_failed', 'exception': err + f' {loading_error}', 'argv': sys.argv }) raise RuntimeError(err) from loading_error else: dag = None args = args_ # options that do not need a DAG if args.single_click: _py_with_single_click_enable() if args.single_click_disable: _py_with_single_click_disable() if args.install_hook: if not Path('.git').is_dir(): err = ('Expected a .git/ directory in the current working ' 'directory. Run this from the repository root directory.') telemetry.log_api("nb_error", metadata={ 'type': 'no_git_config', 'exception': err, 'argv': sys.argv }) raise NotADirectoryError(err) parent = Path('.git', 'hooks') parent.mkdir(exist_ok=True) # pre-commit: remove injected cells _install_hook(parent / 'pre-commit', pre_commit_hook, args.entry_point) click.echo('Successfully installed pre-commit git hook') # post-commit: inject cells _install_hook(parent / 'post-commit', post_commit_hook, args.entry_point) click.echo('Successfully installed post-commit git hook') if args.uninstall_hook: _delete_hook(Path('.git', 'hooks', 'pre-commit')) _delete_hook(Path('.git', 'hooks', 'post-commit')) # options that need a valid DAG if args.format: new_paths = [ str(p) for p in _call_in_source( dag, 'format', 'Formatted notebooks', dict(fmt=args.format), ) if p is not None ] if len(new_paths): click.echo('Extension changed for the following ' f'tasks: {", ".join(new_paths)}. Update your ' 'pipeline declaration.') if args.inject: _call_in_source( dag, 'save_injected_cell', 'Injected celll', dict(), ) click.secho( 'Finished cell injection. Re-run this command if your ' 'pipeline.yaml changes.', fg='green') if args.remove: _call_in_source( dag, 'remove_injected_cell', 'Removed injected cell', dict(), ) if args.sync: # maybe its more efficient to pass all notebook paths at once? _call_in_source(dag, 'sync', 'Synced notebooks') # can pair give trouble if we're reformatting? if args.pair: _call_in_source( dag, 'pair', 'Paired notebooks', dict(base_path=args.pair), ) click.echo(f'Finished pairing notebooks. Tip: add {args.pair!r} to ' 'your .gitignore to keep your repository clean') telemetry.log_api("ploomber_nb", dag=dag, metadata={'argv': sys.argv})
def scaffold(conda, package, entry_point, empty): """Create new projects (if no pipeline.yaml exists) or add missings tasks """ template = '-e/--entry-point is not compatible with the {flag} flag' if entry_point and conda: err = template.format(flag='--conda') telemetry.log_api("scaffold_error", metadata={ 'type': 'entry_and_conda_flag', 'exception': err, 'argv': sys.argv }) raise click.ClickException(err) if entry_point and package: err = template.format(flag='--package') telemetry.log_api("scaffold_error", metadata={ 'type': 'entry_and_package_flag', 'exception': err, 'argv': sys.argv }) raise click.ClickException(err) if entry_point and empty: err = template.format(flag='--empty') telemetry.log_api("scaffold_error", metadata={ 'type': 'entry_and_empty_flag', 'exception': err, 'argv': sys.argv }) raise click.ClickException(err) # try to load a dag by looking in default places if entry_point is None: loaded = _scaffold.load_dag() else: try: loaded = ( DAGSpec(entry_point, lazy_import='skip'), Path(entry_point).parent, Path(entry_point), ) except Exception as e: telemetry.log_api("scaffold_error", metadata={ 'type': 'dag_load_failed', 'exception': e, 'argv': sys.argv }) raise click.ClickException(e) from e if loaded: # existing pipeline, add tasks spec, _, path_to_spec = loaded _scaffold.add(spec, path_to_spec) telemetry.log_api("ploomber_scaffold", dag=loaded, metadata={ 'type': 'add_task', 'argv': sys.argv }) else: # no pipeline, create base project telemetry.log_api("ploomber_scaffold", metadata={ 'type': 'base_project', 'argv': sys.argv }) scaffold_project.cli(project_path=None, conda=conda, package=package, empty=empty)
def main(name, force=False, branch=None, output=None): """ Entry point for examples """ manager = _ExamplesManager(home=_home, branch=branch) tw = TerminalWriter() if not manager.examples.exists() or manager.outdated() or force: if not manager.examples.exists(): click.echo('Local copy does not exist...') elif force: click.echo('Forcing download...') manager.clone() if not name: manager.list() else: selected = manager.path_to(name) if not selected.exists(): telemetry.log_api("examples_error", metadata={ 'type': 'wrong_example_name', 'example_name': name }) click.echo(f'There is no example named {name!r}.\n' 'To list examples: ploomber examples\n' 'To update local copy: ploomber examples -f') else: output = output or name tw.sep('=', f'Copying example {name} to {output}/', blue=True) if Path(output).exists(): telemetry.log_api("examples_error", metadata={ 'type': 'duplicated_output', 'output_name': output }) raise click.ClickException( f"{output!r} already exists in the current working " "directory, please rename it or move it " "to another location and try again.") shutil.copytree(selected, output) path_to_readme = Path(output, 'README.md') out_dir = output + ('\\' if platform.system() == 'Windows' else '/') tw.write('Next steps:\n\n' f'$ cd {out_dir}' f'\n$ ploomber install') tw.write(f'\n\nOpen {str(path_to_readme)} for details.\n', blue=True) telemetry.log_api("ploomber_examples", metadata={ 'argv': sys.argv, 'example_name': name })
def main(render_only=False): start_time = datetime.datetime.now() parser = CustomParser(description='Build pipeline', prog='ploomber build') with parser: parser.add_argument('--force', '-f', help='Force execution by ignoring status', action='store_true', default=False) parser.add_argument('--skip-upstream', '-su', help='Skip building upstream dependencies. ' 'Only applicable when using --partially', action='store_true', default=False) parser.add_argument( '--partially', '-p', help='Build a pipeline partially until certain task', default=None) parser.add_argument( '--debug', '-d', help='Drop a debugger session if an exception happens', action='store_true', default=False) # users may try to run "ploomber build {name}" to build a single task if len(sys.argv) > 1 and not sys.argv[1].startswith('-'): suggestion = 'ploomber task {task-name}' cmd_name = parser.prog telemetry.log_api("unsupported_build_cmd", metadata={ 'cmd_name': cmd_name, 'suggestion': suggestion, 'argv': sys.argv }) parser.error(f'{cmd_name!r} does not take positional arguments.\n' f'To build a single task, try: {suggestion!r}') dag, args = parser.load_from_entry_point_arg() # when using the parallel executor from the CLI, ensure we print progress # to stdout if isinstance(dag.executor, Parallel): dag.executor.print_progress = True if render_only: dag.render() else: if args.partially: report = dag.build_partially(args.partially, force=args.force, debug=args.debug, skip_upstream=args.skip_upstream) else: report = dag.build(force=args.force, debug=args.debug) if report: print(report) end_time = datetime.datetime.now() telemetry.log_api("ploomber_build", total_runtime=str(end_time - start_time), dag=dag, metadata={'argv': sys.argv}) return dag
def main(use_lock): """ Install project, automatically detecting if it's a conda-based or pip-based project. Parameters --------- use_lock : bool If True Uses requirements.lock.txt/environment.lock.yml and requirements.dev.lock.txt/environment.dev.lock.yml files. Otherwise it uses regular files and creates the lock ones after installing dependencies """ start_time = datetime.datetime.now() telemetry.log_api("install-started") HAS_CONDA = shutil.which('conda') HAS_ENV_YML = Path(_ENV_YML).exists() HAS_ENV_LOCK_YML = Path(_ENV_LOCK_YML).exists() HAS_REQS_TXT = Path(_REQS_TXT).exists() HAS_REQS_LOCK_TXT = Path(_REQS_LOCK_TXT).exists() if use_lock and not HAS_ENV_LOCK_YML and not HAS_REQS_LOCK_TXT: err = ("Expected and environment.lock.yaml " "(conda) or requirements.lock.txt (pip) in the current " "directory. Add one of them and try again.") telemetry.log_api("install-error", metadata={ 'type': 'no_lock', 'exception': err }) raise exceptions.ClickException(err) elif not use_lock and not HAS_ENV_YML and not HAS_REQS_TXT: err = ("Expected an environment.yaml (conda)" " or requirements.txt (pip) in the current directory." " Add one of them and try again.") telemetry.log_api("install-error", metadata={ 'type': 'no_env_requirements', 'exception': err }) raise exceptions.ClickException(err) elif (not HAS_CONDA and use_lock and HAS_ENV_LOCK_YML and not HAS_REQS_LOCK_TXT): err = ("Found env environment.lock.yaml " "but conda is not installed. Install conda or add a " "requirements.lock.txt to use pip instead") telemetry.log_api("install-error", metadata={ 'type': 'no_conda', 'exception': err }) raise exceptions.ClickException(err) elif not HAS_CONDA and not use_lock and HAS_ENV_YML and not HAS_REQS_TXT: err = ("Found environment.yaml but conda is not installed." " Install conda or add a requirements.txt to use pip instead") telemetry.log_api("install-error", metadata={ 'type': 'no_conda2', 'exception': err }) raise exceptions.ClickException(err) elif HAS_CONDA and use_lock and HAS_ENV_LOCK_YML: main_conda(start_time, use_lock=True) elif HAS_CONDA and not use_lock and HAS_ENV_YML: main_conda(start_time, use_lock=False) else: main_pip(start_time, use_lock=use_lock)
def main_conda(start_time, use_lock): """ Install conda-based project, looks for environment.yml files Parameters ---------- start_time : datetime The initial runtime of the function. use_lock : bool If True Uses environment.lock.yml and environment.dev.lock.yml files """ env_yml = _ENV_LOCK_YML if use_lock else _ENV_YML # TODO: ensure ploomber-scaffold includes dependency file (including # lock files in MANIFEST.in cmdr = Commander() # TODO: provide helpful error messages on each command with open(env_yml) as f: env_name = yaml.safe_load(f)['name'] current_env = Path(shutil.which('python')).parents[1].name if env_name == current_env: err = (f'{env_yml} will create an environment ' f'named {env_name!r}, which is the current active ' 'environment. Move to a different one and try ' 'again (e.g., "conda activate base")') telemetry.log_api("install-error", metadata={ 'type': 'env_running_conflict', 'exception': err }) raise RuntimeError(err) # get current installed envs conda = shutil.which('conda') mamba = shutil.which('mamba') # if already installed and running on windows, ask to delete first, # otherwise it might lead to an intermittent error (permission denied # on vcruntime140.dll) if os.name == 'nt': envs = cmdr.run(conda, 'env', 'list', '--json', capture_output=True) already_installed = any([ env for env in json.loads(envs)['envs'] # only check in the envs folder, ignore envs in other locations if 'envs' in env and env_name in env ]) if already_installed: err = (f'Environment {env_name!r} already exists, ' f'delete it and try again ' f'(conda env remove --name {env_name})') telemetry.log_api("install-error", metadata={ 'type': 'duplicate_env', 'exception': err }) raise ValueError(err) pkg_manager = mamba if mamba else conda cmdr.run(pkg_manager, 'env', 'create', '--file', env_yml, '--force', description='Creating env') if Path(_SETUP_PY).exists(): _pip_install_setup_py_conda(cmdr, env_name) if not use_lock: env_lock = cmdr.run(conda, 'env', 'export', '--no-build', '--name', env_name, description='Locking dependencies', capture_output=True) Path(_ENV_LOCK_YML).write_text(env_lock) _try_conda_install_and_lock_dev(cmdr, pkg_manager, env_name, use_lock=use_lock) cmd_activate = f'conda activate {env_name}' _next_steps(cmdr, cmd_activate, start_time)