def test_deletes_metadata_on_save_for_file_used_multiple_times(tmp_directory): Path('my-task.py').write_text(""" # + tags=['parameters'] upstream = None """) # generate two tasks with the same script (but different params) spec = { 'tasks': [{ 'source': 'my-task.py', 'name': 'my-task-', 'product': 'my-task.ipynb', 'grid': { 'param': [1, 2] } }] } Path('pipeline.yaml').write_text(yaml.dump(spec)) m1 = Path('.my-task-0.ipynb.metadata') m2 = Path('.my-task-1.ipynb.metadata') m1.touch() m2.touch() cm = PloomberContentsManager() model = cm.get('my-task.py') cm.save(model, path='/my-task.py') assert not m1.exists() assert not m2.exists()
def test_deletes_metadata_on_save(tmp_nbs): Path('output').mkdir() metadata = Path('output/.plot.ipynb.metadata') metadata.touch() cm = PloomberContentsManager() model = cm.get('plot.py') cm.save(model, path='/plot.py') assert not metadata.exists()
def test_jupyter_workflow_with_functions(backup_spec_with_functions): """ Tests a typical workflow with a pieline where some tasks are functions """ cm = PloomberContentsManager() def get_names(out): return {model['name'] for model in out['content']} assert get_names(cm.get('')) == {'my_tasks', 'pipeline.yaml'} assert get_names(cm.get('my_tasks')) == {'__init__.py', 'clean', 'raw'} # check new notebooks appear, which are generated from the function tasks assert get_names(cm.get('my_tasks/raw')) == { '__init__.py', 'functions.py', 'functions.py (functions)', } assert get_names(cm.get('my_tasks/clean')) == { '__init__.py', 'functions.py', 'functions.py (functions)', 'util.py', } # get notebooks generated from task functions raw = cm.get('my_tasks/raw/functions.py (functions)/raw') clean = cm.get('my_tasks/clean/functions.py (functions)/clean') # add some new code cell = nbformat.versions[nbformat.current_nbformat].new_code_cell('1 + 1') raw['content']['cells'].append(cell) clean['content']['cells'].append(cell) # overwrite the original function cm.save(raw, path='my_tasks/raw/functions.py (functions)/raw') cm.save(clean, path='my_tasks/clean/functions.py (functions)/clean') # make sure source code was updated raw_source = (backup_spec_with_functions / 'my_tasks' / 'raw' / 'functions.py').read_text() clean_source = (backup_spec_with_functions / 'my_tasks' / 'clean' / 'functions.py').read_text() assert '1 + 1' in raw_source assert '1 + 1' in clean_source
def test_save(tmp_nbs): cm = PloomberContentsManager() model = cm.get('plot.py') # I found a bug when saving a .py file in jupyter notebook: the model # received by .save does not have a path, could not reproduce this issue # when running this test so I'm deleting it on purpose to simulate that # behavior - not sure why this is happening del model['path'] source = model['content']['cells'][0]['source'] model['content']['cells'][0]['source'] = '# modification\n' + source cm.save(model, path='/plot.py') nb = jupytext.read('plot.py') code = Path('plot.py').read_text() assert get_injected_cell(nb) is None assert '# modification' in code