def test_should_include_dataset_subset_name_in_output_file_list(self):
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'dataset': {
             **DEFAULT_EXPERIMENT_DATA['dataset'], 'subset_name':
             DATASET_SUBSET_NAME_1
         }
     })['output_file_list'] == (
         f'file-list-{DATASET_SUBSET_NAME_1}.lst')
 def test_should_include_eval_name_in_run_name(self):
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'dataset': {
             **DEFAULT_EXPERIMENT_DATA['dataset'], 'eval_name':
             DATASET_EVAL_NAME_1
         }
     })['run_name'] == (
         f'{DEFAULT_DATASET_NAME}-{DATASET_EVAL_NAME_1}_{DEFAULT_MODEL_NAME}'
     )
 def test_should_determine_autocut_train_input_source_file_list(self):
     input_model = AUTOCUT_TRAIN_CONFIG_1['autocut']['input_model']
     input_model_name = input_model['name']
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'train':
         AUTOCUT_TRAIN_CONFIG_1
     })['train']['autocut']['input_source'].get('file_list') == (
         f'{DEFAULT_SOURCE_DATA_PATH}-results/{NAMESPACE_1}/'
         f'{input_model_name}/file-list.lst')
 def test_should_include_eval_name_in_eval_output_path(self):
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'dataset': {
             **DEFAULT_EXPERIMENT_DATA['dataset'], 'eval_name':
             DATASET_EVAL_NAME_1
         }
     })['eval_output_path'] == (
         f'{DEFAULT_SOURCE_DATA_PATH}-results/{NAMESPACE_1}/'
         f'{DEFAULT_MODEL_NAME}/evaluation-results/{DATASET_EVAL_NAME_1}'
     )
 def test_should_include_image_config(self):
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'xyz_image':
         IMAGE_1
     })['xyz_image'] == IMAGE_1
 def test_should_include_config_property(self):
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'config':
         CUSTOM_CONFIG_1
     })['config'] == CUSTOM_CONFIG_1
 def test_should_include_train_config(self):
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'train':
         TRAIN_CONFIG_1
     })['train'] == TRAIN_CONFIG_1
 def test_should_enable_resume_by_default(self):
     assert get_conf_for_experiment_data({  # pylint: disable=singleton-comparison
         **DEFAULT_EXPERIMENT_DATA
     })['resume'] is True
 def test_should_disable_resume_if_disabled_by_experiment(self):
     assert get_conf_for_experiment_data({  # pylint: disable=singleton-comparison
         **DEFAULT_EXPERIMENT_DATA, 'resume':
         'false'
     })['resume'] is False
 def test_should_fallback_to_environ_default_limit(
         self, get_default_limit_mock):
     get_default_limit_mock.return_value = '123'
     assert get_conf_for_experiment_data({**DEFAULT_EXPERIMENT_DATA
                                          })['limit'] == '123'
 def test_should_determine_source_file_list(self):
     assert get_conf_for_experiment_data(
         {**DEFAULT_EXPERIMENT_DATA
          })['source_file_list'] == DEFAULT_FILE_LIST
 def test_should_raise_exception_with_unknown_config(self):
     with pytest.raises(InvalidConfigError):
         get_conf_for_experiment_data({
             **DEFAULT_EXPERIMENT_DATA, 'train':
             UNKNOWN_TRAIN_CONFIG_1
         })
 def test_should_use_default_to_grobid_train_tasks_with_grobid_train_config(
         self):
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'train':
         GROBID_TRAIN_CONFIG_1
     })['tasks'] == DEFAULT_GROBID_TRAIN_TASKS
 def test_should_determine_run_name(self):
     assert get_conf_for_experiment_data(
         {**DEFAULT_EXPERIMENT_DATA})['run_name'] == (
             f'{DEFAULT_DATASET_NAME}_{DEFAULT_MODEL_NAME}')
 def test_should_determine_eval_output_path(self):
     assert get_conf_for_experiment_data(
         {**DEFAULT_EXPERIMENT_DATA})['eval_output_path'] == (
             f'{DEFAULT_SOURCE_DATA_PATH}-results/{NAMESPACE_1}/'
             f'{DEFAULT_MODEL_NAME}/evaluation-results/all')
 def test_should_determine_output_suffix(self):
     assert get_conf_for_experiment_data(
         {**DEFAULT_EXPERIMENT_DATA})['output_suffix'] == '.xml.gz'
 def test_should_include_limit_in_output_file_list(self):
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'limit':
         f'{LIMIT_1}'
     })['output_file_list'] == (f'file-list-{LIMIT_1}.lst')
 def test_should_include_tasks(self):
     tasks = ['task1', 'task2']
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'tasks':
         tasks
     })['tasks'] == tasks
 def test_should_determine_source_data_path(self):
     assert get_conf_for_experiment_data(
         {**DEFAULT_EXPERIMENT_DATA
          })['source_data_path'] == DEFAULT_SOURCE_DATA_PATH
 def test_should_use_default_to_convert_tasks_without_train_config(
         self):
     assert get_conf_for_experiment_data(
         {**DEFAULT_EXPERIMENT_DATA
          })['tasks'] == DEFAULT_CONVERT_AND_EVALUATE_TASKS
 def test_should_determine_output_file_list(self):
     assert get_conf_for_experiment_data(
         {**DEFAULT_EXPERIMENT_DATA
          })['output_file_list'] == ('file-list.lst')
 def test_should_use_default_to_autocut_train_tasks_with_autocut_train_config(
         self):
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'train':
         AUTOCUT_TRAIN_CONFIG_1
     })['tasks'] == DEFAULT_AUTOCUT_TRAIN_TASKS
 def test_should_use_limit_from_experiment(self):
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'limit':
         '123'
     })['limit'] == '123'
 def test_should_fallback_to_dataset_limit(self):
     assert get_conf_for_experiment_data({
         **DEFAULT_EXPERIMENT_DATA, 'dataset': {
             **DEFAULT_EXPERIMENT_DATA['dataset'], 'limit': '123'
         }
     })['limit'] == '123'
 def test_should_pass_through_model(self):
     assert get_conf_for_experiment_data(
         {**DEFAULT_EXPERIMENT_DATA
          })['model'] == DEFAULT_EXPERIMENT_DATA['model']