Example #1
0
def test_hot_reload_when_adding_function_task_in_existing_module(
        backup_spec_with_functions_flat):
    # setup: configure jupyter settings and save spec
    with open('pipeline.yaml') as f:
        spec = yaml.safe_load(f)

    spec['meta']['jupyter_functions_as_notebooks'] = True
    spec['meta']['jupyter_hot_reload'] = True

    Path('pipeline.yaml').write_text(yaml.dump(spec))

    # initialize contents manager (simulates user starting "jupyter notebook")
    cm = PloomberContentsManager()

    # user adds a new task in existing module...
    path = Path('my_tasks_flat', 'raw.py')
    source = path.read_text()
    new = 'def function_new(product):\n    pass\n'
    path.write_text(source + new)

    spec['tasks'].append({
        'source': 'my_tasks_flat.raw.function_new',
        'product': 'file.csv'
    })

    Path('pipeline.yaml').write_text(yaml.dump(spec))

    # content manager should now display the function
    assert 'raw.py (functions)' in [
        c['name'] for c in cm.get('my_tasks_flat')['content']
    ]
Example #2
0
def test_injects_cell_if_file_in_dag(tmp_nbs):
    def resolve(path):
        return str(Path('.').resolve() / path)

    cm = PloomberContentsManager()
    model = cm.get('plot.py')

    injected = get_injected_cell(model['content'])

    assert injected

    upstream_expected = {
        "clean": {
            "nb": resolve("output/clean.ipynb"),
            "data": resolve("output/clean.csv")
        }
    }
    product_expected = resolve("output/plot.ipynb")

    upstream = None
    product = None

    for node in parso.parse(injected['source']).children:
        code = node.get_code()
        if 'upstream' in code:
            upstream = code.split('=')[1]
        elif 'product' in code:
            product = code.split('=')[1]

    assert upstream_expected == eval(upstream)
    assert product_expected == eval(product)
Example #3
0
def test_deletes_metadata_on_save_for_file_used_multiple_times(tmp_directory):
    Path('my-task.py').write_text("""
# + tags=['parameters']
upstream = None
""")

    # generate two tasks with the same script (but different params)
    spec = {
        'tasks': [{
            'source': 'my-task.py',
            'name': 'my-task-',
            'product': 'my-task.ipynb',
            'grid': {
                'param': [1, 2]
            }
        }]
    }

    Path('pipeline.yaml').write_text(yaml.dump(spec))

    m1 = Path('.my-task-0.ipynb.metadata')
    m2 = Path('.my-task-1.ipynb.metadata')
    m1.touch()
    m2.touch()

    cm = PloomberContentsManager()
    model = cm.get('my-task.py')
    cm.save(model, path='/my-task.py')

    assert not m1.exists()
    assert not m2.exists()
Example #4
0
def test_hot_reload_when_adding_function_task(backup_spec_with_functions_flat):
    # setup: configure jupyter settings and save spec
    with open('pipeline.yaml') as f:
        spec = yaml.safe_load(f)

    spec['meta']['jupyter_functions_as_notebooks'] = True
    spec['meta']['jupyter_hot_reload'] = True

    Path('pipeline.yaml').write_text(yaml.dump(spec))

    # initialize contents manager (simulates user starting "jupyter notebook")
    cm = PloomberContentsManager()

    # user adds a new task...
    Path('new_task.py').write_text("""
def new_task(product):
    pass
""")

    spec['tasks'].append({
        'source': 'new_task.new_task',
        'product': 'file.csv'
    })

    Path('pipeline.yaml').write_text(yaml.dump(spec))

    # content manager should now display the function
    assert 'new_task.py (functions)' in [
        c['name'] for c in cm.get('')['content']
    ]

    assert ['new_task'] == [
        c['name'] for c in cm.get('new_task.py (functions)')['content']
    ]
Example #5
0
def test_hot_reload(tmp_nbs):
    # modify base pipeline.yaml to enable hot reload
    with open('pipeline.yaml') as f:
        spec = yaml.load(f, Loader=yaml.SafeLoader)

    spec['meta']['jupyter_hot_reload'] = True
    spec['meta']['extract_upstream'] = True

    for t in spec['tasks']:
        t.pop('upstream', None)

    with open('pipeline.yaml', 'w') as f:
        yaml.dump(spec, f)

    # init content manager to simulate running "jupyter notebook"
    cm = PloomberContentsManager()

    # this must have an injected cell
    model = cm.get('plot.py')
    assert get_injected_cell(model['content'])

    # replace upstream dependency with a task that does not exist
    path = Path('plot.py')
    original_code = path.read_text()
    new_code = original_code.replace("{'clean': None}", "{'no_task': None}")
    path.write_text(new_code)

    # no cell should be injected this time
    model = cm.get('plot.py')
    assert not get_injected_cell(model['content'])

    # fix it, must inject cell again
    path.write_text(original_code)
    model = cm.get('plot.py')
    assert get_injected_cell(model['content'])
Example #6
0
def test_skips_if_file_not_in_dag(tmp_nbs):
    cm = PloomberContentsManager()
    model = cm.get('dummy.py')
    nb = jupytext.read('dummy.py')

    # this file is not part of the pipeline, the contents manager should not
    # inject cells
    assert len(model['content']['cells']) == len(nb.cells)
Example #7
0
def test_dag_from_dotted_path(monkeypatch, tmp_nbs, add_current_to_sys_path,
                              no_sys_modules_cache):
    monkeypatch.setenv('ENTRY_POINT', 'factory.make')

    cm = PloomberContentsManager()
    model = cm.get('plot.py')
    injected = get_injected_cell(model['content'])
    assert injected
Example #8
0
def test_dag_from_env_var_with_custom_name(monkeypatch, tmp_nbs_nested, cwd,
                                           file_to_get):
    monkeypatch.setenv('ENTRY_POINT', 'pipeline.another.yaml')

    os.chdir(cwd)

    cm = PloomberContentsManager()
    model = cm.get(file_to_get)
    assert get_injected_cell(model['content'])
Example #9
0
def test_deletes_metadata_on_save(tmp_nbs):
    Path('output').mkdir()
    metadata = Path('output/.plot.ipynb.metadata')
    metadata.touch()

    cm = PloomberContentsManager()
    model = cm.get('plot.py')
    cm.save(model, path='/plot.py')

    assert not metadata.exists()
Example #10
0
def test_injects_cell_when_initialized_from_sub_directory(tmp_nbs_nested):
    # simulate initializing from a directory where we have to recursively
    # look for pipeline.yaml
    os.chdir('load')

    cm = PloomberContentsManager()
    model = cm.get('load.py')

    injected = get_injected_cell(model['content'])
    assert injected
Example #11
0
def test_dag_from_directory(monkeypatch, tmp_nbs):
    # remove files we don't need for this test case
    Path('pipeline.yaml').unlink()
    Path('factory.py').unlink()

    monkeypatch.setenv('ENTRY_POINT', '.')

    cm = PloomberContentsManager()
    model = cm.get('plot.py')
    injected = get_injected_cell(model['content'])
    assert injected
Example #12
0
def test_save(tmp_nbs):
    cm = PloomberContentsManager()
    model = cm.get('plot.py')

    # I found a bug when saving a .py file in jupyter notebook: the model
    # received by .save does not have a path, could not reproduce this issue
    # when running this test so I'm deleting it on purpose to simulate that
    # behavior - not sure why this is happening
    del model['path']

    source = model['content']['cells'][0]['source']
    model['content']['cells'][0]['source'] = '# modification\n' + source
    cm.save(model, path='/plot.py')

    nb = jupytext.read('plot.py')
    code = Path('plot.py').read_text()
    assert get_injected_cell(nb) is None
    assert '# modification' in code
Example #13
0
def test_disable_functions_as_notebooks(backup_spec_with_functions):
    """
    Tests a typical workflow with a pieline where some tasks are functions
    """
    with open('pipeline.yaml') as f:
        spec = yaml.safe_load(f)

    spec['meta']['jupyter_functions_as_notebooks'] = False
    Path('pipeline.yaml').write_text(yaml.dump(spec))

    cm = PloomberContentsManager()

    def get_names(out):
        return {model['name'] for model in out['content']}

    assert get_names(cm.get('')) == {'my_tasks', 'pipeline.yaml'}
    assert get_names(cm.get('my_tasks')) == {'__init__.py', 'clean', 'raw'}

    # check new notebooks appear, which are generated from the function tasks
    assert get_names(cm.get('my_tasks/raw')) == {
        '__init__.py',
        'functions.py',
    }
    assert get_names(cm.get('my_tasks/clean')) == {
        '__init__.py',
        'functions.py',
        'util.py',
    }
Example #14
0
def test_injects_cell_even_if_pipeline_yaml_in_subdirectory(tmp_nbs):
    os.chdir('..')
    cm = PloomberContentsManager()
    model = cm.get(str('content/plot.py'))
    injected = get_injected_cell(model['content'])
    assert injected
Example #15
0
def test_import(tmp_nbs):
    # make sure we are able to import modules in the current working
    # directory
    Path('pipeline.yaml').unlink()
    os.rename('pipeline-w-location.yaml', 'pipeline.yaml')
    PloomberContentsManager()
Example #16
0
def test_jupyter_workflow_with_functions(backup_spec_with_functions):
    """
    Tests a typical workflow with a pieline where some tasks are functions
    """
    cm = PloomberContentsManager()

    def get_names(out):
        return {model['name'] for model in out['content']}

    assert get_names(cm.get('')) == {'my_tasks', 'pipeline.yaml'}
    assert get_names(cm.get('my_tasks')) == {'__init__.py', 'clean', 'raw'}

    # check new notebooks appear, which are generated from the function tasks
    assert get_names(cm.get('my_tasks/raw')) == {
        '__init__.py',
        'functions.py',
        'functions.py (functions)',
    }
    assert get_names(cm.get('my_tasks/clean')) == {
        '__init__.py',
        'functions.py',
        'functions.py (functions)',
        'util.py',
    }

    # get notebooks generated from task functions
    raw = cm.get('my_tasks/raw/functions.py (functions)/raw')
    clean = cm.get('my_tasks/clean/functions.py (functions)/clean')

    # add some new code
    cell = nbformat.versions[nbformat.current_nbformat].new_code_cell('1 + 1')
    raw['content']['cells'].append(cell)
    clean['content']['cells'].append(cell)

    # overwrite the original function
    cm.save(raw, path='my_tasks/raw/functions.py (functions)/raw')
    cm.save(clean, path='my_tasks/clean/functions.py (functions)/clean')

    # make sure source code was updated
    raw_source = (backup_spec_with_functions / 'my_tasks' / 'raw' /
                  'functions.py').read_text()
    clean_source = (backup_spec_with_functions / 'my_tasks' / 'clean' /
                    'functions.py').read_text()

    assert '1 + 1' in raw_source
    assert '1 + 1' in clean_source
Example #17
0
 def setUp(self):
     self._temp_dir = TemporaryDirectory()
     self.td = self._temp_dir.name
     self.contents_manager = PloomberContentsManager(root_dir=self.td)