def test_should_get_dvc_params(): """ Test dvc parameters extraction """ docstring_str = ':param str param1: Param1 description\n' \ ':param param2: input file\n' \ ':dvc-out: path/to/file.txt\n' \ ':dvc-out param1: path/to/other\n' \ ':dvc-in param2: path/to/in/file\n' \ ':dvc-in: path/to/other/infile.test' docstring = parse_docstring(docstring_str) assert len(docstring.meta) == 6 dvc_params = get_dvc_params(docstring) assert len(dvc_params.dvc_in) == 2 assert DocstringDvcIn('path/to/other/infile.test') in dvc_params.dvc_in assert DocstringDvcIn('path/to/in/file', related_param='param2') in dvc_params.dvc_in assert len(dvc_params.dvc_out) == 2 assert DocstringDvcOut('path/to/file.txt') in dvc_params.dvc_out assert DocstringDvcOut('path/to/other', related_param='param1') in dvc_params.dvc_out assert not dvc_params.dvc_extra
def get_dvc_template_data(docstring_info: DocstringInfo, python_cmd_path: str, meta_file_variable_name: str, extra_variables: dict = None): """ Format data from docstring for dvc bash command template """ logging.info('Build data for DVC command generation using template') dvc_params = get_dvc_params(docstring_info.docstring) variables = [] if not extra_variables else [ f'{name}="{value}"' for name, value in extra_variables.items() ] meta_file_name = dvc_params.meta_file_name or to_dvc_meta_filename( python_cmd_path) info = { 'variables': variables, 'meta_file_name_var_assign': f'{meta_file_variable_name}="{meta_file_name}"', 'meta_file_name_var': meta_file_variable_name, 'whole_command': None, 'python_script': python_cmd_path, 'dvc_inputs': [], 'dvc_outputs': [], 'python_params': '' } if dvc_params.dvc_cmd: logging.info('DVC mode: whole command provided') info['whole_command'] = dvc_params.dvc_cmd.cmd.replace('\n', ' \\\n') logging.debug(f'Custom command {info["whole_command"]}') return info logging.info('DVC mode: generate command from parameters') python_params = [] def handle_params(dvc_docstring_params: List[DocstringDvc], label: str): for dvc_param in dvc_docstring_params: if dvc_param.related_param: variable_name = to_bash_variable(dvc_param.related_param) py_cmd_param = to_cmd_param(dvc_param.related_param) info['variables'].append( f'{variable_name}="{dvc_param.file_path}"') python_params.append(f'--{py_cmd_param} ${variable_name}') info[label].append(f'${variable_name}') else: info[label].append(dvc_param.file_path) for extra_param in dvc_params.dvc_extra: python_params.append(extra_param.extra) handle_params(dvc_params.dvc_in, 'dvc_inputs') handle_params(dvc_params.dvc_out, 'dvc_outputs') info['python_params'] = ' '.join(python_params) logging.debug(f'Template info: {info}') return info
def test_should_get_dvc_command(): """ Test dvc parameters extraction """ cmd = 'dvc run -o ./out_train.csv \n' \ '-o ./out_test.csv\n' \ './py_cmd -m train --out ./out_train.csv &&\n' \ './py_cmd -m test --out ./out_test.csv' docstring_str = f':dvc-cmd: {cmd}' docstring = parse_docstring(docstring_str) dvc_params = get_dvc_params(docstring) assert not dvc_params.dvc_in assert not dvc_params.dvc_out assert not dvc_params.dvc_extra assert dvc_params.dvc_cmd.cmd == cmd
def test_should_raise_if_dvc_command_and_others(): """ Test dvc parameters extraction """ docstring_str = '{}' \ ':dvc-cmd: dvc run -o ./out_train.csv -o ./out_test.csv\n' \ ' ./py_cmd -m train --out ./out_train.csv && ./py_cmd -m test --out ./out_test.csv' docstring = parse_docstring(docstring_str.format(':dvc-in: ./file.csv\n')) with pytest.raises(MlVToolException): get_dvc_params(docstring) docstring = parse_docstring(docstring_str.format(':dvc-out: /file.csv\n')) with pytest.raises(MlVToolException): get_dvc_params(docstring) docstring = parse_docstring(docstring_str.format(':dvc-extra: --dry \n')) with pytest.raises(MlVToolException): get_dvc_params(docstring)