예제 #1
0
def test_should_raise_if_target_does_not_exist(base_steps):
    """
        Test raises if target step does not exist
    """
    with pytest.raises(MlVToolException) as ex:
        get_dvc_dependencies(target_file_path='does_not_exit.dvc',
                             dvc_files=base_steps)
    assert isinstance(ex.value.__cause__, IOError)
예제 #2
0
def test_should_raise_dvc_file_step_not_found(base_steps):
    """
        Test raises if dvc file step not found
    """
    base_steps.append('./does_not_exist_step.dvc')
    with pytest.raises(MlVToolException) as ex:
        get_dvc_dependencies(target_file_path=base_steps[0],
                             dvc_files=base_steps)
    assert isinstance(ex.value.__cause__, IOError)
예제 #3
0
def test_should_raise_if_target_format_error(work_dir, base_steps):
    """
        Test raises if target format error
    """
    target = join(work_dir, 'format_error.dvc')
    with open(target, 'wb') as fd:
        fd.write(b'k:v:\n\t\t-')

    with pytest.raises(MlVToolException) as ex:
        get_dvc_dependencies(target_file_path=target, dvc_files=base_steps)
    assert isinstance(ex.value.__cause__, YAMLError)
예제 #4
0
def test_should_get_dependencies_steps(base_steps):
    """
        Test all steps to in denpendency of the target step

             +-----------+              +---------------+
             | step1.dvc |              | isolated1.dvc |
             +-----------+              +---------------+
                    *
             +-----------+              +---------------+
             | step2.dvc |              | isolated2.dvc |
             +-----------+              +---------------+
         **                  **
+-----------+             +-----------+
| step3.dvc |             | step4.dvc |
+-----------+             +-----------+
                **    **
             +-----------+
             | step5.dvc |
             +-----------+
    """

    dependencies = list(
        get_dvc_dependencies(target_file_path=base_steps[-1],
                             dvc_files=base_steps))
    # Topological sort solution can be 1 -> 2 -> 3 -> 4 -> 5
    #                               or 1 -> 2 -> 4 -> 3 -> 5
    expected_steps = ([basename(base_steps[idx]) for idx in (0, 1, 2, 3, 4)],
                      [basename(base_steps[idx]) for idx in (0, 1, 3, 2, 4)])

    assert [d.name for d in dependencies] in expected_steps
def export_pipeline(dvc_meta_file: str, output: str, work_dir: str):
    """
     Generate an executable script to run a whole pipeline
    """
    logging.info(f'Export pipeline from step {dvc_meta_file} to {output}')
    logging.debug(f'Work directory {work_dir}')

    ordered_dvc_metas = get_dvc_dependencies(dvc_meta_file,
                                             get_dvc_files(dvc_meta_file))

    template_data = {
        'work_dir': work_dir,
        'cmds': [dvc_meta.cmd for dvc_meta in ordered_dvc_metas]
    }
    logging.debug(f'Template data: {template_data}')

    template_path = join(CURRENT_DIR, '..', 'template',
                         PIPELINE_EXPORT_TEMPLATE_NAME)
    write_template(output, template_path, info=template_data)
    logging.log(logging.WARNING + 1,
                f'Pipeline successfully exported in {abspath(output)}')
예제 #6
0
def test_should_remove_not_targeted_steps(work_dir, base_steps):
    """
        Test get dependencies but does not include steps not directly in dependencies
             +-----------+
             | step1.dvc |
             +-----------+**
                    *       **********
             +-----------+           +---------------+
             | step2.dvc |           | step2_bis.dvc |
             +-----------+           +---------------+
         **                  **
+-----------+             +-----------+
| step3.dvc |             | step4.dvc |
+-----------+             +-----------+
                **    **            **
             +-----------+            +-----------+
             | step5.dvc |            | step6.dvc |
             +-----------+            +-----------+
    """

    target_step = join(work_dir, 'step6.dvc')
    base_steps += [join(work_dir, 'step2_bis.dvc'), target_step]

    write_dvc_file(base_steps[-2],
                   'cmd2_bis',
                   deps=['./s1_out'],
                   outs=['./s2_bis_out'])
    write_dvc_file(base_steps[-1],
                   'cmd6',
                   deps=['./s4_out'],
                   outs=['./s6_out'])

    expected_steps = [base_steps[0], base_steps[1], base_steps[3], target_step]
    dependencies = list(
        get_dvc_dependencies(target_file_path=target_step,
                             dvc_files=base_steps))
    # Topological solution is 1 -> 2 -> 4 -> 6
    assert [d.name
            for d in dependencies] == [basename(s) for s in expected_steps]