def test_should_include_dataset_subset_name_in_output_file_list(self): assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'dataset': { **DEFAULT_EXPERIMENT_DATA['dataset'], 'subset_name': DATASET_SUBSET_NAME_1 } })['output_file_list'] == ( f'file-list-{DATASET_SUBSET_NAME_1}.lst')
def test_should_include_eval_name_in_run_name(self): assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'dataset': { **DEFAULT_EXPERIMENT_DATA['dataset'], 'eval_name': DATASET_EVAL_NAME_1 } })['run_name'] == ( f'{DEFAULT_DATASET_NAME}-{DATASET_EVAL_NAME_1}_{DEFAULT_MODEL_NAME}' )
def test_should_determine_autocut_train_input_source_file_list(self): input_model = AUTOCUT_TRAIN_CONFIG_1['autocut']['input_model'] input_model_name = input_model['name'] assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'train': AUTOCUT_TRAIN_CONFIG_1 })['train']['autocut']['input_source'].get('file_list') == ( f'{DEFAULT_SOURCE_DATA_PATH}-results/{NAMESPACE_1}/' f'{input_model_name}/file-list.lst')
def test_should_include_eval_name_in_eval_output_path(self): assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'dataset': { **DEFAULT_EXPERIMENT_DATA['dataset'], 'eval_name': DATASET_EVAL_NAME_1 } })['eval_output_path'] == ( f'{DEFAULT_SOURCE_DATA_PATH}-results/{NAMESPACE_1}/' f'{DEFAULT_MODEL_NAME}/evaluation-results/{DATASET_EVAL_NAME_1}' )
def test_should_include_image_config(self): assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'xyz_image': IMAGE_1 })['xyz_image'] == IMAGE_1
def test_should_include_config_property(self): assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'config': CUSTOM_CONFIG_1 })['config'] == CUSTOM_CONFIG_1
def test_should_include_train_config(self): assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'train': TRAIN_CONFIG_1 })['train'] == TRAIN_CONFIG_1
def test_should_enable_resume_by_default(self): assert get_conf_for_experiment_data({ # pylint: disable=singleton-comparison **DEFAULT_EXPERIMENT_DATA })['resume'] is True
def test_should_disable_resume_if_disabled_by_experiment(self): assert get_conf_for_experiment_data({ # pylint: disable=singleton-comparison **DEFAULT_EXPERIMENT_DATA, 'resume': 'false' })['resume'] is False
def test_should_fallback_to_environ_default_limit( self, get_default_limit_mock): get_default_limit_mock.return_value = '123' assert get_conf_for_experiment_data({**DEFAULT_EXPERIMENT_DATA })['limit'] == '123'
def test_should_determine_source_file_list(self): assert get_conf_for_experiment_data( {**DEFAULT_EXPERIMENT_DATA })['source_file_list'] == DEFAULT_FILE_LIST
def test_should_raise_exception_with_unknown_config(self): with pytest.raises(InvalidConfigError): get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'train': UNKNOWN_TRAIN_CONFIG_1 })
def test_should_use_default_to_grobid_train_tasks_with_grobid_train_config( self): assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'train': GROBID_TRAIN_CONFIG_1 })['tasks'] == DEFAULT_GROBID_TRAIN_TASKS
def test_should_determine_run_name(self): assert get_conf_for_experiment_data( {**DEFAULT_EXPERIMENT_DATA})['run_name'] == ( f'{DEFAULT_DATASET_NAME}_{DEFAULT_MODEL_NAME}')
def test_should_determine_eval_output_path(self): assert get_conf_for_experiment_data( {**DEFAULT_EXPERIMENT_DATA})['eval_output_path'] == ( f'{DEFAULT_SOURCE_DATA_PATH}-results/{NAMESPACE_1}/' f'{DEFAULT_MODEL_NAME}/evaluation-results/all')
def test_should_determine_output_suffix(self): assert get_conf_for_experiment_data( {**DEFAULT_EXPERIMENT_DATA})['output_suffix'] == '.xml.gz'
def test_should_include_limit_in_output_file_list(self): assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'limit': f'{LIMIT_1}' })['output_file_list'] == (f'file-list-{LIMIT_1}.lst')
def test_should_include_tasks(self): tasks = ['task1', 'task2'] assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'tasks': tasks })['tasks'] == tasks
def test_should_determine_source_data_path(self): assert get_conf_for_experiment_data( {**DEFAULT_EXPERIMENT_DATA })['source_data_path'] == DEFAULT_SOURCE_DATA_PATH
def test_should_use_default_to_convert_tasks_without_train_config( self): assert get_conf_for_experiment_data( {**DEFAULT_EXPERIMENT_DATA })['tasks'] == DEFAULT_CONVERT_AND_EVALUATE_TASKS
def test_should_determine_output_file_list(self): assert get_conf_for_experiment_data( {**DEFAULT_EXPERIMENT_DATA })['output_file_list'] == ('file-list.lst')
def test_should_use_default_to_autocut_train_tasks_with_autocut_train_config( self): assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'train': AUTOCUT_TRAIN_CONFIG_1 })['tasks'] == DEFAULT_AUTOCUT_TRAIN_TASKS
def test_should_use_limit_from_experiment(self): assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'limit': '123' })['limit'] == '123'
def test_should_fallback_to_dataset_limit(self): assert get_conf_for_experiment_data({ **DEFAULT_EXPERIMENT_DATA, 'dataset': { **DEFAULT_EXPERIMENT_DATA['dataset'], 'limit': '123' } })['limit'] == '123'
def test_should_pass_through_model(self): assert get_conf_for_experiment_data( {**DEFAULT_EXPERIMENT_DATA })['model'] == DEFAULT_EXPERIMENT_DATA['model']