def test_generate_commands_should_raise_if_missing_variable(work_dir):
    """
        Test dvc bash command generation fails if a docstring template variable is missing
    """
    python_script = 'def my_funct(subset: str, rate: int):\n' \
                    '\t"""\n' \
                    ':param str locale: The data locale\n' \
                    ':param output_file: the output_file\n' \
                    ':dvc-out output_file: {{ conf.output_file }}\n' \
                    ':dvc-extra:  --locale {{ conf.locale }}\n' \
                    '\t"""\n' \
                    '\tprint(\'toto\')\n'
    # Write python script
    script_path = join(work_dir, 'script_python.py')
    with open(script_path, 'w') as fd:
        fd.write(python_script)

    # Write docstring conf
    dc_conf_path = join(work_dir, 'dc_conf.yml')
    with open(dc_conf_path, 'w') as fd:
        yaml.dump({'output_file': './data/other.txt'}, fd)

    dvc_cmd_path = join(work_dir, 'dvc_cmd')
    arguments = ['-i', script_path, '--out-dvc-cmd', dvc_cmd_path, '--working-directory', work_dir,
                 '--docstring-conf', dc_conf_path]

    with pytest.raises(MlVToolException):
        MlScriptToCmd().run(*arguments)
Esempio n. 2
0
def test_should_overwrite_conf_for_docstring_conf(work_dir):
    """
        Test main configuration is overwritten for docstring conf selection
    """
    conf_dc_conf_path = join(work_dir, 'conf_dc_path.yml')
    write_docstring_conf(conf_dc_conf_path, './output_file_base.txt')
    conf_path, script_path = setup_with_conf(
        work_dir,
        conf_path=join(work_dir, DEFAULT_CONF_FILENAME),
        docstring_conf_path=conf_dc_conf_path)

    new_dc_conf_path = join(work_dir, 'new_dc_path.yml')
    write_docstring_conf(new_dc_conf_path, './output_file_overwritten.txt')

    dvc_cmd_path = join(work_dir, 'new_place_dvc')
    arguments = [
        '-i', script_path, '--working-directory', work_dir, '--docstring-conf',
        new_dc_conf_path, '--out-dvc-cmd', dvc_cmd_path
    ]
    MlScriptToCmd().run(*arguments)

    # Assert docstring template value are replaced with the right conf content
    with open(dvc_cmd_path, 'r') as fd:
        content = fd.read()
    assert './output_file_base.txt' not in content
    assert './output_file_overwritten.txt' in content
Esempio n. 3
0
def test_should_raise_if_missing_dvc_command_output_path_argument_and_no_conf(
):
    """
        Test command raise if dvc command output path is not provided when no conf
    """
    arguments = ['-i', './test.py', '--working-directory', './']
    with pytest.raises(MlVToolException):
        MlScriptToCmd().run(*arguments)
Esempio n. 4
0
def test_should_raise_if_dvc_command_output_path_exist_and_no_force(work_dir):
    """
        Test command raise if dvc output path already exists and no force argument
    """
    dvc_cmd_output = join(work_dir, 'dvc_out')
    with open(dvc_cmd_output, 'w') as fd:
        fd.write('')
    arguments = [
        '-i', './test.py', '--working-directory', work_dir, '--out-dvc-cmd',
        dvc_cmd_output
    ]
    with pytest.raises(MlVToolException):
        MlScriptToCmd().run(*arguments)
Esempio n. 5
0
def test_should_get_output_path_from_auto_detected_conf(work_dir):
    """
        Test commands are generated from python script using auto detected configuration
    """
    conf_path, script_path = setup_with_conf(work_dir,
                                             conf_path=join(
                                                 work_dir,
                                                 DEFAULT_CONF_FILENAME))

    arguments = ['-i', script_path, '--working-directory', work_dir]
    MlScriptToCmd().run(*arguments)

    # This path is generated using conf path and the script name
    dvc_cmd_path = join(work_dir, 'dvc_cmd', 'script_path_dvc')
    assert exists(dvc_cmd_path)
def test_should_generate_commands(work_dir):
    """
        Test dvc bash command is generated from python script with param specified
        in docstring.
    """
    python_script = 'def my_funct(subset, rate):\n' \
                    '\t"""\n' \
                    ':param str input_file: the input file\n' \
                    ':param output_file: the output_file\n' \
                    ':param rate: the rate\n' \
                    ':param int retry:\n' \
                    ':param List[int] threshold:\n' \
                    ':dvc-in input_file: ./data/train_set.csv\n' \
                    ':dvc-out output_file: {{ conf.output_file }}\n' \
                    ':dvc-out: ./data/other.txt\n' \
                    '\t"""\n' \
                    '\tprint(\'toto\')\n'
    # Write python script
    script_path = join(work_dir, 'script_python.py')
    with open(script_path, 'w') as fd:
        fd.write(python_script)

    # Write docstring conf
    dc_conf_path = join(work_dir, 'dc_conf.yml')
    with open(dc_conf_path, 'w') as fd:
        yaml.dump({'output_file': './data/other.txt'}, fd)

    dvc_cmd_path = join(work_dir, 'dvc_cmd')
    arguments = [
        '-i', script_path, '--out-dvc-cmd', dvc_cmd_path,
        '--working-directory', work_dir, '--docstring-conf', dc_conf_path
    ]
    MlScriptToCmd().run(*arguments)

    assert exists(dvc_cmd_path)
    assert stat.S_IMODE(os_stat(dvc_cmd_path).st_mode) == 0o755

    # Ensure dvc command is in dvc bash command
    with open(dvc_cmd_path, 'r') as fd:
        dvc_bash_content = fd.read()

    assert 'OUTPUT_FILE="./data/other.txt"' in dvc_bash_content
    assert 'MLV_DVC_META_FILENAME="script_python.dvc"' in dvc_bash_content
    assert 'dvc run${NO_CACHE_OPT} --overwrite-dvcfile -f $MLV_DVC_META_FILENAME' in dvc_bash_content
    assert '-o $OUTPUT_FILE' in dvc_bash_content
    assert '-o ./data/other.txt' in dvc_bash_content
    assert '-d $INPUT_FILE' in dvc_bash_content
Esempio n. 7
0
def test_should_get_output_path_from_conf(work_dir):
    """
        Test commands are generated from python script using provided configuration
    """
    conf_path, script_path = setup_with_conf(work_dir,
                                             conf_path=join(
                                                 work_dir, 'my_conf'))

    arguments = [
        '-i', script_path, '--working-directory', work_dir, '--conf-path',
        conf_path
    ]
    MlScriptToCmd().run(*arguments)

    # This path is generated using conf path and the script name
    dvc_cmd_path = join(work_dir, 'dvc_cmd', 'script_path_dvc')
    assert exists(dvc_cmd_path)
Esempio n. 8
0
def test_should_overwrite_with_force_argument(work_dir):
    """
        Test output paths are overwritten with force argument
    """
    script_path = join(work_dir, 'script_path.py')
    write_min_script(script_path)

    dvc_cmd_path = join(work_dir, 'dvc_cmd')
    with open(dvc_cmd_path, 'w') as fd:
        fd.write('')

    arguments = [
        '-i', script_path, '--out-dvc-cmd', dvc_cmd_path,
        '--working-directory', work_dir, '--force'
    ]
    MlScriptToCmd().run(*arguments)

    with open(dvc_cmd_path, 'r') as fd:
        assert fd.read()
Esempio n. 9
0
def test_should_overwrite_conf_for_path(work_dir):
    """
        Test output paths argument overwrite conf
    """
    conf_path, script_path = setup_with_conf(work_dir,
                                             conf_path=join(
                                                 work_dir,
                                                 DEFAULT_CONF_FILENAME))

    arg_dvc_cmd_path = join(work_dir, 'new_place_dvc')
    arguments = [
        '-i', script_path, '--working-directory', work_dir, '--out-dvc-cmd',
        arg_dvc_cmd_path
    ]
    MlScriptToCmd().run(*arguments)

    # Assert output path is the one from command argument not from conf
    assert exists(arg_dvc_cmd_path)
    conf_dvc_cmd_path = join(work_dir, 'dvc_cmd', 'script_path_dvc')
    assert not exists(conf_dvc_cmd_path)
Esempio n. 10
0
def test_should_generate_dvc_with_whole_cmd(work_dir):
    """
        Test dvc bash command is generated from python script with whole dvc command
        ad specified in docstring
    """
    cmd = 'dvc run -o ./out_train.csv \n' \
          '-o ./out_test.csv\n' \
          '$MLV_PY_CMD_PATH -m train --out ./out_train.csv &&\n' \
          './python/${MLV_PY_CMD_NAME} -m test --out ./out_test.csv'
    python_script = 'def my_funct(subset: str, rate: int):\n' \
                    '\t"""\n' \
                    ':param str input_file: the input file\n' \
                    ':param output_file: the output_file\n' \
                    f':dvc-cmd:{cmd}\n' \
                    '\t"""\n' \
                    '\tprint(\'toto\')\n'

    script_path = join(work_dir, 'script_python.py')
    with open(script_path, 'w') as fd:
        fd.write(python_script)

    makedirs(join(work_dir, 'python'))
    dvc_cmd_path = join(work_dir, 'dvc_cmd')
    arguments = [
        '-i', script_path, '--out-dvc-cmd', dvc_cmd_path,
        '--working-directory', work_dir
    ]
    MlScriptToCmd().run(*arguments)

    assert exists(dvc_cmd_path)

    # Ensure whole command is in dvc bash command
    with open(dvc_cmd_path, 'r') as fd:
        dvc_bash_content = fd.read()

    relative_py_cmd_path = relpath(script_path, work_dir)

    assert f'MLV_PY_CMD_PATH="{relative_py_cmd_path}"' in dvc_bash_content
    assert f'MLV_PY_CMD_NAME="{basename(relative_py_cmd_path)}"' in dvc_bash_content
    assert cmd.replace('\n', ' \\\n') in dvc_bash_content
Esempio n. 11
0
def test_should_get_docstring_conf_from_main_conf(work_dir):
    """
        Test docstring template values are replaced with docstring conf provided in main conf file
    """
    conf_dc_conf_path = join(work_dir, 'conf_dc_path.yml')
    write_docstring_conf(conf_dc_conf_path, './output_file_base.txt')
    conf_path, script_path = setup_with_conf(
        work_dir,
        conf_path=join(work_dir, DEFAULT_CONF_FILENAME),
        docstring_conf_path=conf_dc_conf_path)

    dvc_cmd_path = join(work_dir, 'new_place_dvc')
    arguments = [
        '-i', script_path, '--working-directory', work_dir, '--out-dvc-cmd',
        dvc_cmd_path
    ]
    MlScriptToCmd().run(*arguments)

    # Assert docstring template value are replaced with the docstring conf content
    with open(dvc_cmd_path, 'r') as fd:
        content = fd.read()
    assert './output_file_base.txt' in content