def test_save_injected_cell_in_paired_notebooks(tmp_nbs, prefix): dag = DAGSpec('pipeline.yaml').to_dag().render() dag['load'].source.pair(prefix) dag = DAGSpec('pipeline.yaml').to_dag().render() dag['load'].source.save_injected_cell() assert get_injected_cell(jupytext.read(Path(prefix, 'load.ipynb'))) assert get_injected_cell(jupytext.read(Path('load.py')))
def test_remove_injected_cell(tmp_nbs): dag = DAGSpec('pipeline.yaml').to_dag().render() dag['load'].source.save_injected_cell() expected = '# + tags=["injected-parameters"]' assert expected in Path('load.py').read_text() dag = DAGSpec('pipeline.yaml').to_dag().render() dag['load'].source.remove_injected_cell() nb = jupytext.read('load.py') assert expected not in Path('load.py').read_text() assert nb.metadata.ploomber == {}
def test_sync(tmp_nbs): dag = DAGSpec('pipeline.yaml').to_dag().render() dag['load'].source.pair(base_path='nbs') nb = jupytext.reads(Path('load.py').read_text(), fmt='py:light') nb.cells.append(nbformat.v4.new_code_cell(source='x = 42')) jupytext.write(nb, 'load.py', fmt='py:light') dag = DAGSpec('pipeline.yaml').to_dag().render() dag['load'].source.sync() nb = jupytext.reads(Path('nbs', 'load.ipynb').read_text(), fmt='ipynb') assert nb.cells[-1]['source'] == 'x = 42'
def _default_spec_load(starting_dir=None, lazy_import=False, reload=False): """ NOTE: this is a private API. Use DAGSpec.find() instead Looks for a pipeline.yaml, generates a DAGSpec and returns a DAG. Currently, this is only used by the PloomberContentsManager, this is not intended to be a public API since initializing specs from paths where we have to recursively look for a pipeline.yaml has some considerations regarding relative paths that make this confusing, inside the contents manager, all those things are all handled for that use case. The pipeline.yaml parent folder is temporarily added to sys.path when calling DAGSpec.to_dag() to make sure imports work as expected Returns DAG and the directory where the pipeline.yaml file is located. """ root_path = starting_dir or os.getcwd() path_to_entry_point = default.entry_point(root_path=root_path) try: spec = DAGSpec(path_to_entry_point, env=None, lazy_import=lazy_import, reload=reload) path_to_spec = Path(path_to_entry_point) return spec, path_to_spec.parent, path_to_spec except Exception as e: exc = DAGSpecInitializationError('Error initializing DAG from ' f'{path_to_entry_point!s}') raise exc from e
def scaffold(conda, package, entry_point, empty): """Create new projects (if no pipeline.yaml exists) or add missings tasks """ template = '-e/--entry-point is not compatible with the {flag} flag' if entry_point and conda: raise click.ClickException(template.format(flag='--conda')) if entry_point and package: raise click.ClickException(template.format(flag='--package')) if entry_point and empty: raise click.ClickException(template.format(flag='--empty')) # try to load a dag by looking in default places if not entry_point: loaded = _scaffold.load_dag() else: try: loaded = DAGSpec(entry_point, lazy_import=True), Path(entry_point) except Exception as e: raise click.ClickException(e) from e if loaded: # add scaffold tasks spec, path_to_spec = loaded _scaffold.add(spec, path_to_spec) else: scaffold_project.cli(project_path=None, conda=conda, package=package, empty=empty)
def get_partial(): with open('pipeline-features.yaml') as f: tasks = yaml.safe_load(f) meta = {'extract_product': False, 'extract_upstream': False} spec = DAGSpec({'tasks': tasks, 'meta': meta}) return spec.to_dag()
def test_format(tmp_nbs): dag = DAGSpec('pipeline.yaml').to_dag().render() assert '# + tags=["parameters"]' in Path('load.py').read_text() dag['load'].source.format(fmt='py:percent') assert '# %% tags=["parameters"]' in Path('load.py').read_text()
def test_dag_on_render_with_params(tmp_directory, tmp_imports, write_dag_hooks_spec): dag = DAGSpec('pipeline.yaml').to_dag() dag.executor = Serial(build_in_subprocess=False) dag.render() assert Path('hook').read_text() == 'on render'
def test_pair(tmp_nbs): dag = DAGSpec('pipeline.yaml').to_dag().render() dag['load'].source.pair(base_path='nbs') nb = jupytext.reads(Path('load.py').read_text(), fmt='py:light') assert Path('nbs', 'load.ipynb').is_file() assert nb.metadata.jupytext.formats == 'nbs//ipynb,py:light'
def test_does_not_delete_injected_cell_on_save_if_manually_injected(tmp_nbs): dag = DAGSpec('pipeline.yaml').to_dag().render() dag['load'].source.save_injected_cell() cm = PloomberContentsManager() model = cm.get('load.py') cm.save(model, path='/load.py') nb = jupytext.read('load.py') assert get_injected_cell(nb)
def test_dag_manager_root_folder(backup_simple): dag = DAGSpec('pipeline.yaml').to_dag().render() m = JupyterDAGManager(dag) # jupyter represents the root folder with the empty string '', make sure # that correctly retuns the appropriate models content = m.get_by_parent('') assert len(content) == 1 assert content[0]['name'] == 'tasks_simple.py (functions)' assert content[0]['type'] == 'directory'
def test_dag_manager(backup_spec_with_functions): dag = DAGSpec('pipeline.yaml').to_dag().render() m = JupyterDAGManager(dag) assert set(m) == { 'my_tasks/raw/functions.py (functions)', 'my_tasks/raw/functions.py (functions)/raw', 'my_tasks/clean/functions.py (functions)', 'my_tasks/clean/functions.py (functions)/clean' }
def test_dag_on_failure_with_params(tmp_directory, tmp_imports, write_dag_hooks_spec): Path('my_module.py').write_text(""" def touch(product): raise Exception """) dag = DAGSpec('pipeline.yaml').to_dag() dag.executor = Serial(build_in_subprocess=False) with pytest.raises(DAGBuildError): dag.build() assert Path('hook').read_text() == 'on failure'
def test_dag_manager_flat_structure(backup_spec_with_functions_flat): dag = DAGSpec('pipeline.yaml').to_dag().render() m = JupyterDAGManager(dag) assert set(m) == { 'my_tasks_flat/raw.py (functions)', 'my_tasks_flat/raw.py (functions)/raw', 'my_tasks_flat/raw.py (functions)/raw2', 'my_tasks_flat/clean.py (functions)', 'my_tasks_flat/clean.py (functions)/clean', } assert 'my_tasks_flat/raw.py (functions)/' in m assert '/my_tasks_flat/raw.py (functions)/' in m assert '/my_tasks_flat/raw.py (functions)/' in m
def test_renders_valid_script(name, extract_product, extract_upstream, tmp_directory): loader = scaffold.ScaffoldLoader() out = loader.render(name, params=dict(extract_product=extract_product, extract_upstream=extract_upstream)) # test it generates a valid pipelines if Path(name).suffix != '.sql': Path(name).write_text(out) Path('pipeline.yaml').write_text( Template(template).render(name=name, extract_product=extract_product, extract_upstream=extract_upstream)) DAGSpec('pipeline.yaml').to_dag().build()
def test_save_injected_cell_ipynb(tmp_nbs): # modify the spec so it has one ipynb task with open('pipeline.yaml') as f: spec = yaml.safe_load(f) spec['tasks'][0]['source'] = 'load.ipynb' Path('pipeline.yaml').write_text(yaml.dump(spec)) # generate notebook in ipynb format jupytext.write(jupytext.read('load.py'), 'load.ipynb') dag = DAGSpec('pipeline.yaml').to_dag().render() nb = jupytext.read('load.py') expected = '"injected-parameters"' assert expected not in Path('load.ipynb').read_text() assert nb.metadata.get('ploomber') is None dag['load'].source.save_injected_cell() nb = jupytext.read('load.ipynb') assert expected in Path('load.ipynb').read_text() assert nb.metadata.ploomber.injected_manually
def init_dag_from_partial(cls, partial): """Initialize partial returned by get_partial() """ if isinstance(partial, (str, Path)): with open(partial) as f: tasks = yaml.safe_load(f) # cannot extract upstream because this is an incomplete DAG meta = {'extract_product': False, 'extract_upstream': False} spec = DAGSpec( { 'tasks': tasks, 'meta': meta }, parent_path=Path(partial).parent, ) return spec.to_dag() elif isinstance(partial, DAG): return partial else: raise TypeError(f'Expected {cls.__name__}.get_partial() to ' 'return a str, pathlib.Path or ploomber.DAG, ' f'got {type(partial).__name__}')
def test_format_with_extension_change(tmp_nbs): dag = DAGSpec('pipeline.yaml').to_dag().render() dag['load'].source.format(fmt='ipynb') assert not Path('load.py').exists() assert jupytext.read('load.ipynb')
def scaffold(conda, package, entry_point, empty): """Create new projects (if no pipeline.yaml exists) or add missings tasks """ template = '-e/--entry-point is not compatible with the {flag} flag' if entry_point and conda: err = template.format(flag='--conda') telemetry.log_api("scaffold_error", metadata={ 'type': 'entry_and_conda_flag', 'exception': err, 'argv': sys.argv }) raise click.ClickException(err) if entry_point and package: err = template.format(flag='--package') telemetry.log_api("scaffold_error", metadata={ 'type': 'entry_and_package_flag', 'exception': err, 'argv': sys.argv }) raise click.ClickException(err) if entry_point and empty: err = template.format(flag='--empty') telemetry.log_api("scaffold_error", metadata={ 'type': 'entry_and_empty_flag', 'exception': err, 'argv': sys.argv }) raise click.ClickException(err) # try to load a dag by looking in default places if entry_point is None: loaded = _scaffold.load_dag() else: try: loaded = ( DAGSpec(entry_point, lazy_import='skip'), Path(entry_point).parent, Path(entry_point), ) except Exception as e: telemetry.log_api("scaffold_error", metadata={ 'type': 'dag_load_failed', 'exception': e, 'argv': sys.argv }) raise click.ClickException(e) from e if loaded: # existing pipeline, add tasks spec, _, path_to_spec = loaded _scaffold.add(spec, path_to_spec) telemetry.log_api("ploomber_scaffold", dag=loaded, metadata={ 'type': 'add_task', 'argv': sys.argv }) else: # no pipeline, create base project telemetry.log_api("ploomber_scaffold", metadata={ 'type': 'base_project', 'argv': sys.argv }) scaffold_project.cli(project_path=None, conda=conda, package=package, empty=empty)