Beispiel #1
0
def test_should_get_dvc_params():
    """
        Test dvc parameters extraction
    """
    docstring_str = ':param str param1: Param1 description\n' \
                    ':param param2: input file\n' \
                    ':dvc-out: path/to/file.txt\n' \
                    ':dvc-out param1: path/to/other\n' \
                    ':dvc-in param2: path/to/in/file\n' \
                    ':dvc-in: path/to/other/infile.test'
    docstring = parse_docstring(docstring_str)
    assert len(docstring.meta) == 6

    dvc_params = get_dvc_params(docstring)
    assert len(dvc_params.dvc_in) == 2
    assert DocstringDvcIn('path/to/other/infile.test') in dvc_params.dvc_in
    assert DocstringDvcIn('path/to/in/file',
                          related_param='param2') in dvc_params.dvc_in

    assert len(dvc_params.dvc_out) == 2
    assert DocstringDvcOut('path/to/file.txt') in dvc_params.dvc_out
    assert DocstringDvcOut('path/to/other',
                           related_param='param1') in dvc_params.dvc_out

    assert not dvc_params.dvc_extra
def get_dvc_template_data(docstring_info: DocstringInfo,
                          python_cmd_path: str,
                          meta_file_variable_name: str,
                          extra_variables: dict = None):
    """
        Format data from docstring for dvc bash command template
    """
    logging.info('Build data for DVC command generation using template')
    dvc_params = get_dvc_params(docstring_info.docstring)
    variables = [] if not extra_variables else [
        f'{name}="{value}"' for name, value in extra_variables.items()
    ]
    meta_file_name = dvc_params.meta_file_name or to_dvc_meta_filename(
        python_cmd_path)

    info = {
        'variables': variables,
        'meta_file_name_var_assign':
        f'{meta_file_variable_name}="{meta_file_name}"',
        'meta_file_name_var': meta_file_variable_name,
        'whole_command': None,
        'python_script': python_cmd_path,
        'dvc_inputs': [],
        'dvc_outputs': [],
        'python_params': ''
    }

    if dvc_params.dvc_cmd:
        logging.info('DVC mode: whole command provided')
        info['whole_command'] = dvc_params.dvc_cmd.cmd.replace('\n', ' \\\n')
        logging.debug(f'Custom command {info["whole_command"]}')
        return info

    logging.info('DVC mode: generate command from parameters')
    python_params = []

    def handle_params(dvc_docstring_params: List[DocstringDvc], label: str):
        for dvc_param in dvc_docstring_params:
            if dvc_param.related_param:
                variable_name = to_bash_variable(dvc_param.related_param)
                py_cmd_param = to_cmd_param(dvc_param.related_param)
                info['variables'].append(
                    f'{variable_name}="{dvc_param.file_path}"')
                python_params.append(f'--{py_cmd_param} ${variable_name}')
                info[label].append(f'${variable_name}')
            else:
                info[label].append(dvc_param.file_path)

    for extra_param in dvc_params.dvc_extra:
        python_params.append(extra_param.extra)

    handle_params(dvc_params.dvc_in, 'dvc_inputs')
    handle_params(dvc_params.dvc_out, 'dvc_outputs')
    info['python_params'] = ' '.join(python_params)
    logging.debug(f'Template info: {info}')
    return info
Beispiel #3
0
def test_should_get_dvc_command():
    """
        Test dvc parameters extraction
    """
    cmd = 'dvc run -o ./out_train.csv \n' \
          '-o ./out_test.csv\n' \
          './py_cmd -m train --out ./out_train.csv &&\n' \
          './py_cmd -m test --out ./out_test.csv'
    docstring_str = f':dvc-cmd: {cmd}'
    docstring = parse_docstring(docstring_str)

    dvc_params = get_dvc_params(docstring)
    assert not dvc_params.dvc_in
    assert not dvc_params.dvc_out
    assert not dvc_params.dvc_extra

    assert dvc_params.dvc_cmd.cmd == cmd
Beispiel #4
0
def test_should_raise_if_dvc_command_and_others():
    """
        Test dvc parameters extraction
    """
    docstring_str = '{}' \
                    ':dvc-cmd: dvc run -o ./out_train.csv -o ./out_test.csv\n' \
                    ' ./py_cmd -m train --out ./out_train.csv && ./py_cmd -m test --out ./out_test.csv'

    docstring = parse_docstring(docstring_str.format(':dvc-in: ./file.csv\n'))
    with pytest.raises(MlVToolException):
        get_dvc_params(docstring)
    docstring = parse_docstring(docstring_str.format(':dvc-out: /file.csv\n'))
    with pytest.raises(MlVToolException):
        get_dvc_params(docstring)
    docstring = parse_docstring(docstring_str.format(':dvc-extra: --dry \n'))
    with pytest.raises(MlVToolException):
        get_dvc_params(docstring)