def test_set_context(self): context = 'rsmtool' new_context = 'rsmcompare' config = Configuration({"flag_column": "[advisories]"}, context=context) config.context = new_context eq_(config.context, new_context)
def test_pop_value(self): dictionary = { 'experiment_id': '001', 'train_file': 'path/to/train.tsv', 'test_file': 'path/to/test.tsv', "model": 'LinearRegression' } config = Configuration(dictionary) value = config.pop("experiment_id") eq_(value, '001')
def test_get_rater_error_variance_none(self): dictionary = { "experiment_id": 'abs', "model": 'LinearRegression', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv" } config = Configuration(dictionary) rater_error_variance = config.get_rater_error_variance() eq_(rater_error_variance, None)
def test_save(self): dictionary = {"experiment_id": '001', "flag_column": "abc"} config = Configuration(dictionary) config.save() out_path = 'output/001_rsmtool.json' with open(out_path) as buff: config_new = json.loads(buff.read()) rmtree('output') eq_(config_new, dictionary)
def test_check_exclude_listwise_false(self): dictionary = { "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "model": 'LinearRegression' } config = Configuration(dictionary) exclude_list_wise = config.check_exclude_listwise() eq_(exclude_list_wise, False)
def test_check_flag_column_wrong_format(self): config = Configuration({ "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "flag_column": "[advisories]", "model": 'LinearRegression' }) config.check_flag_column()
def test_save(self): dictionary = {"experiment_id": '001', "flag_column": "abc"} config = Configuration(dictionary) config.save() out_path = 'output/001_rsmtool.json' with open(out_path) as buff: config_new = json.loads(buff.read()) rmtree('output') eq_(config_new, dictionary)
def test_get_trim_tolerance_no_min_max(self): dictionary = { "experiment_id": '001', "trim_tolerance": 0.49, "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "model": 'LinearRegression' } config = Configuration(dictionary) trim_min_max_tolerance = config.get_trim_min_max_tolerance() eq_(trim_min_max_tolerance, (None, None, 0.49))
def test_save_rsmsummarize(self): dictionary = {"summary_id": '001', 'experiment_dirs': ['a', 'b', 'c']} config = Configuration(dictionary, context='rsmsummarize') config.save() out_path = 'output/001_rsmsummarize.json' with open(out_path) as buff: config_new = json.loads(buff.read()) rmtree('output') for key in dictionary: eq_(config_new[key], dictionary[key])
def test_check_flag_column_no_values(self): config = Configuration({ "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "flag_column": None, "model": 'LinearRegression' }) flag_dict = config.check_flag_column() eq_(flag_dict, {})
def test_copy_not_deep(self): dictionary = {"experiment_id": '001', 'trim_min': 1, 'trim_max': 6, "object": [1, 2, 3]} config = Configuration(dictionary) config_copy = config.copy(deep=False) assert_not_equal(id(config), id(config_copy)) for key in config.keys(): # check to make sure this is a shallow copy if key == "object": assert_equal(id(config[key]), id(config_copy[key])) assert_equal(config[key], config_copy[key])
def test_check_flag_column_wrong_partition(self): config = Configuration({ "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "flag_column_test": { "advisories": 123 }, "model": 'LinearRegression' }) config.check_flag_column(partition='eval')
def test_check_flag_column_keep_numeric(self): input_dict = {"advisory flag": [1, 2, 3]} config = Configuration({ "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "flag_column": input_dict, "model": 'LinearRegression' }) output_dict = config.check_flag_column() eq_(output_dict, {"advisory flag": [1, 2, 3]})
def test_get_trim_min_max_no_tolerance(self): dictionary = { "experiment_id": '001', "trim_min": 1, "trim_max": 6, "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "model": 'LinearRegression' } config = Configuration(dictionary) trim_min_max_tolerance = config.get_trim_min_max_tolerance() eq_(trim_min_max_tolerance, (1.0, 6.0, 0.4998))
def test_check_exclude_listwise_true(self): dictionary = { "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "min_items_per_candidate": 4, "candidate_column": "candidate", "model": 'LinearRegression' } config = Configuration(dictionary) exclude_list_wise = config.check_exclude_listwise() eq_(exclude_list_wise, True)
def test_get(self): config = Configuration({ "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "trim_min": 1, "trim_max": 6, "flag_column": "[advisories]", "model": 'LinearRegression' }) eq_(config.get('flag_column'), "[advisories]") eq_(config.get('fasdfasfasdfa', 'hi'), 'hi')
def test_check_flag_column_mismatched_partition_both(self): config = Configuration({ "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "flag_column_test": { "advisories": 123 }, "model": 'LinearRegression' }) config.check_flag_column(flag_column='flag_column_test', partition='both')
def test_check_flag_column_convert_to_list_keep_numeric(self): config = Configuration({ "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "flag_column": { "advisories": 123 }, "model": 'LinearRegression' }) flag_dict = config.check_flag_column() eq_(flag_dict, {"advisories": [123]})
def test_check_flag_column_flag_column_test(self): input_dict = {"advisory flag": ['0']} config = Configuration({ "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "flag_column_test": input_dict, "flag_column": input_dict, "model": 'LinearRegression' }) output_dict = config.check_flag_column("flag_column_test") eq_(input_dict, output_dict)
def test_keys(self): configdict = { "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "trim_min": 1, "trim_max": 6, "flag_column": "abc", "model": 'LinearRegression' } config = Configuration(configdict) given_keys = configdict.keys() computed_keys = config.keys() assert all([given_key in computed_keys for given_key in given_keys])
def test_get_trim_min_max_tolerance_none(self): dictionary = { 'experiment_id': '001', 'id_column': 'A', 'candidate_column': 'B', 'train_file': 'path/to/train.tsv', 'test_file': 'path/to/test.tsv', 'features': 'path/to/features.csv', "model": 'LinearRegression', 'subgroups': ['C'] } config = Configuration(dictionary) trim_min_max_tolerance = config.get_trim_min_max_tolerance() eq_(trim_min_max_tolerance, (None, None, 0.4998))
def test_set_context(self): context = 'rsmtool' new_context = 'rsmcompare' config = Configuration( { "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "trim_min": 1, "trim_max": 6, "flag_column": "[advisories]", "model": 'LinearRegression' }, context=context) config.context = new_context eq_(config.context, new_context)
def test_run_experiment_lr_compare_with_object(): ''' test rsmcompare using the Configuration object, rather than a file; ''' source = 'lr-self-compare-object' experiment_id = 'lr_self_compare_object' configdir = join(rsmtool_test_dir, 'data', 'experiments', source) config_dict = { "comparison_id": "lr_self_compare_object", "experiment_dir_old": "lr-subgroups", "experiment_id_old": "lr_subgroups", "description_old": "Using all features with a LinearRegression model.", "use_scaled_predictions_old": True, "experiment_dir_new": "lr-subgroups", "experiment_id_new": "lr_subgroups", "description_new": "Using all features with a LinearRegression model.", "use_scaled_predictions_new": True, "subgroups": ["QUESTION"] } config_obj = Configuration(config_dict, context='rsmcompare', configdir=configdir) check_run_comparison(source, experiment_id, config_obj_or_dict=config_obj)
def test_copy_not_deep(self): dictionary = { "experiment_id": '001', 'trim_min': 1, 'trim_max': 6, "object": [1, 2, 3] } config = Configuration(dictionary) config_copy = config.copy(deep=False) assert_not_equal(id(config), id(config_copy)) for key in config.keys(): # check to make sure this is a shallow copy if key == "object": assert_equal(id(config[key]), id(config_copy[key])) assert_equal(config[key], config_copy[key])
def test_check_flag_column_convert_to_list_test(self): config = Configuration({"flag_column": {"advisories": "0"}}) flag_dict = self.check_logging_output('evaluating', config.check_flag_column, partition='test') eq_(flag_dict, {"advisories": ['0']})
def test_run_experiment_lr_eval_with_object(): ''' test rsmeval using a Configuration object, rather than a file ''' source = 'lr-eval-object' experiment_id = 'lr_eval_object' configdir = join(rsmtool_test_dir, 'data', 'experiments', source) config_dict = { "predictions_file": "../../files/predictions_scaled_with_subgroups.csv", "system_score_column": "score", "description": "An evaluation of LinearRegression predictions.", "human_score_column": "h1", "id_column": "id", "experiment_id": "lr_eval_object", "subgroups": "QUESTION", "scale_with": "asis", "trim_min": 1, "trim_max": 6 } config_obj = Configuration(config_dict, context='rsmeval', configdir=configdir) check_run_evaluation(source, experiment_id, config_obj_or_dict=config_obj)
def test_run_experiment_lr_with_object_no_configdir(): """Test rsmtool using a Configuration object and no specified configdir.""" source = 'lr-object-no-path' experiment_id = 'lr_object_no_path' # set up a temporary directory since # we will be using getcwd old_file_dict = {'train': 'data/files/train.csv', 'test': 'data/files/test.csv', 'features': 'data/experiments/lr-object-no-path/features.csv'} temp_dir = tempfile.TemporaryDirectory(prefix=getcwd()) new_file_dict = copy_data_files(temp_dir.name, old_file_dict, rsmtool_test_dir) config_dict = {"train_file": new_file_dict['train'], "id_column": "ID", "use_scaled_predictions": True, "test_label_column": "score", "train_label_column": "score", "test_file": new_file_dict['test'], "trim_max": 6, "features": new_file_dict['features'], "trim_min": 1, "model": "LinearRegression", "experiment_id": "lr_object_no_path", "description": "Using all features with an LinearRegression model."} config_obj = Configuration(config_dict) check_run_experiment(source, experiment_id, config_obj_or_dict=config_obj)
def test_run_experiment_lr_with_object_and_configdir(): """Test rsmtool using a Configuration object and specified configdir.""" source = 'lr-object' experiment_id = 'lr_object' configdir = join(rsmtool_test_dir, 'data', 'experiments', source) config_dict = {"train_file": "../../files/train.csv", "id_column": "ID", "use_scaled_predictions": True, "test_label_column": "score", "train_label_column": "score", "test_file": "../../files/test.csv", "trim_max": 6, "features": "features.csv", "trim_min": 1, "model": "LinearRegression", "experiment_id": "lr_object", "description": "Using all features with an LinearRegression model."} config_obj = Configuration(config_dict, configdir=configdir) check_run_experiment(source, experiment_id, config_obj_or_dict=config_obj)
def test_check_len(self): expected_len = 2 config = Configuration({ "flag_column": { "advisories": 123 }, 'other_column': 5 }) eq_(len(config), expected_len)
def test_set_configdir(self): configdir = '/path/to/dir/' new_configdir = 'path/that/is/new/' config = Configuration( { "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "trim_min": 1, "trim_max": 6, "flag_column": "[advisories]", "model": 'LinearRegression' }, configdir=configdir) config.configdir = new_configdir eq_(config.configdir, abspath(new_configdir))
def test_save(self): dictionary = { 'experiment_id': '001', 'train_file': 'path/to/train.tsv', 'test_file': 'path/to/test.tsv', "flag_column": "abc", "model": 'LinearRegression' } config = Configuration(dictionary) config.save() with open('output/001_rsmtool.json', 'r') as buff: config_new = json.loads(buff.read()) rmtree('output') for key in dictionary: eq_(config_new[key], dictionary[key])
def test_str_correct(self): config = Configuration({ "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "flag_column": "[advisories]", "model": 'LinearRegression' }) eq_(config['flag_column'], '[advisories]')
def test_copy_not_deep(self): config = Configuration({ "experiment_id": '001', "train_file": "/foo/train.csv", "test_file": "/foo/test.csv", "trim_min": 1, "trim_max": 6, "flag_column": [1, 2, 3], "model": 'LinearRegression' }) config_copy = config.copy(deep=False) assert_not_equal(id(config), id(config_copy)) for key in config.keys(): # check to make sure this is a shallow copy if key == "flag_column": assert_equal(id(config[key]), id(config_copy[key])) assert_equal(config[key], config_copy[key])
def test_get_names_and_paths_with_feature_list(self): filepaths = ['path/to/train.tsv', 'path/to/test.tsv'] filenames = ['train', 'test'] expected = (filenames, filepaths) dictionary = {'id_column': 'A', 'candidate_column': 'B', 'train_file': 'path/to/train.tsv', 'test_file': 'path/to/test.tsv', 'features': ['FEATURE1', 'FEATURE2'], 'subgroups': ['C']} config = Configuration(dictionary) values_for_reader = config.get_names_and_paths(['train_file', 'test_file', 'features'], ['train', 'test', 'feature_specs']) eq_(values_for_reader, expected)
def test_check_flag_column_convert_to_list(self): config = Configuration({"flag_column": {"advisories": "0"}}) flag_dict = config.check_flag_column() eq_(flag_dict, {"advisories": ['0']})
def test_check_flag_column_convert_to_list_keep_numeric(self): config = Configuration({"flag_column": {"advisories": 123}}) flag_dict = config.check_flag_column() eq_(flag_dict, {"advisories": [123]})
def test_check_flag_column_wrong_format(self): config = Configuration({"flag_column": "[advisories]"}) config.check_flag_column()
def test_check_exclude_listwise_true(self): dictionary = {"experiment_id": '001', "min_items_per_candidate": 4} config = Configuration(dictionary) exclude_list_wise = config.check_exclude_listwise() eq_(exclude_list_wise, True)
def test_check_exclude_listwise_false(self): dictionary = {"experiment_id": '001'} config = Configuration(dictionary) exclude_list_wise = config.check_exclude_listwise() eq_(exclude_list_wise, False)
def test_get_trim_min_max_none(self): dictionary = {"experiment_id": '001'} config = Configuration(dictionary) trim_min_max = config.get_trim_min_max() eq_(trim_min_max, (None, None))
def test_pop_value_default(self): dictionary = {"experiment_id": '001', 'trim_min': 1, 'trim_max': 6} config = Configuration(dictionary) value = config.pop("foo", "bar") eq_(value, 'bar')
def test_check_flag_column_keep_numeric(self): input_dict = {"advisory flag": [1, 2, 3]} config = Configuration({"flag_column": input_dict}) output_dict = config.check_flag_column() eq_(output_dict, {"advisory flag": [1, 2, 3]})
def test_to_dict(self): dictionary = {"flag_column": "abc", "other_column": 'xyz'} config = Configuration(dictionary) eq_(config.to_dict(), dictionary)
def test_keys(self): dictionary = {"flag_column": "abc", "other_column": 'xyz'} keys = ['flag_column', 'other_column'] config = Configuration(dictionary) eq_(sorted(config.keys()), sorted(keys))
def test_get_trim_min_max_values(self): dictionary = {"experiment_id": '001', 'trim_min': 1, 'trim_max': 6} config = Configuration(dictionary) trim_min_max = config.get_trim_min_max() eq_(trim_min_max, (1.0, 6.0))
def test_check_flag_column_no_values(self): config = Configuration({"flag_column": None}) flag_dict = config.check_flag_column() eq_(flag_dict, {})
def test_get(self): config = Configuration({"flag_column": "[advisories]"}) eq_(config.get('flag_column'), "[advisories]") eq_(config.get('fasdfasfasdfa', 'hi'), 'hi')
def test_check_flag_column_flag_column_test(self): input_dict = {"advisory flag": ['0']} config = Configuration({"flag_column_test": input_dict}) output_dict = config.check_flag_column("flag_column_test") eq_(input_dict, output_dict)
def test_set_filepath(self): filepath = '/path/to/file.json' new_file_path = 'path/that/is/new.json' config = Configuration({"flag_column": "[advisories]"}, filepath) config.filepath = new_file_path eq_(config.filepath, new_file_path)
def test_pop_value(self): dictionary = {"experiment_id": '001', 'trim_min': 1, 'trim_max': 6} config = Configuration(dictionary) value = config.pop("experiment_id") eq_(value, '001')
def test_str_correct(self): config_dict = {'flag_column': '[advisories]'} config = Configuration(config_dict) print(config) eq_(config.__str__(), 'flag_column')
def test_check_flag_column_mismatched_partition_both(self): config = Configuration({"flag_column_test": {"advisories": 123}}) config.check_flag_column(flag_column='flag_column_test', partition='both')
def test_check_flag_column_wrong_partition(self): config = Configuration({"flag_column_test": {"advisories": 123}}) config.check_flag_column(partition='eval')
def test_items(self): dictionary = {"flag_column": "abc", "other_column": 'xyz'} items = [('flag_column', 'abc'), ('other_column', 'xyz')] config = Configuration(dictionary) eq_(sorted(config.items()), sorted(items))
def test_values(self): dictionary = {"flag_column": "abc", "other_column": 'xyz'} values = ['abc', 'xyz'] config = Configuration(dictionary) eq_(sorted(config.values()), sorted(values))