def test_set_context(self):
     context = 'rsmtool'
     new_context = 'rsmcompare'
     config = Configuration({"flag_column": "[advisories]"},
                            context=context)
     config.context = new_context
     eq_(config.context, new_context)
 def test_pop_value(self):
     dictionary = {
         'experiment_id': '001',
         'train_file': 'path/to/train.tsv',
         'test_file': 'path/to/test.tsv',
         "model": 'LinearRegression'
     }
     config = Configuration(dictionary)
     value = config.pop("experiment_id")
     eq_(value, '001')
 def test_get_rater_error_variance_none(self):
     dictionary = {
         "experiment_id": 'abs',
         "model": 'LinearRegression',
         "train_file": "/foo/train.csv",
         "test_file": "/foo/test.csv"
     }
     config = Configuration(dictionary)
     rater_error_variance = config.get_rater_error_variance()
     eq_(rater_error_variance, None)
    def test_save(self):
        dictionary = {"experiment_id": '001', "flag_column": "abc"}
        config = Configuration(dictionary)
        config.save()

        out_path = 'output/001_rsmtool.json'
        with open(out_path) as buff:
            config_new = json.loads(buff.read())
        rmtree('output')
        eq_(config_new, dictionary)
 def test_check_exclude_listwise_false(self):
     dictionary = {
         "experiment_id": '001',
         "train_file": "/foo/train.csv",
         "test_file": "/foo/test.csv",
         "model": 'LinearRegression'
     }
     config = Configuration(dictionary)
     exclude_list_wise = config.check_exclude_listwise()
     eq_(exclude_list_wise, False)
Ejemplo n.º 6
0
    def test_check_flag_column_wrong_format(self):
        config = Configuration({
            "experiment_id": '001',
            "train_file": "/foo/train.csv",
            "test_file": "/foo/test.csv",
            "flag_column": "[advisories]",
            "model": 'LinearRegression'
        })

        config.check_flag_column()
    def test_save(self):
        dictionary = {"experiment_id": '001', "flag_column": "abc"}
        config = Configuration(dictionary)
        config.save()

        out_path = 'output/001_rsmtool.json'
        with open(out_path) as buff:
            config_new = json.loads(buff.read())
        rmtree('output')
        eq_(config_new, dictionary)
 def test_get_trim_tolerance_no_min_max(self):
     dictionary = {
         "experiment_id": '001',
         "trim_tolerance": 0.49,
         "train_file": "/foo/train.csv",
         "test_file": "/foo/test.csv",
         "model": 'LinearRegression'
     }
     config = Configuration(dictionary)
     trim_min_max_tolerance = config.get_trim_min_max_tolerance()
     eq_(trim_min_max_tolerance, (None, None, 0.49))
    def test_save_rsmsummarize(self):
        dictionary = {"summary_id": '001', 'experiment_dirs': ['a', 'b', 'c']}
        config = Configuration(dictionary, context='rsmsummarize')
        config.save()

        out_path = 'output/001_rsmsummarize.json'
        with open(out_path) as buff:
            config_new = json.loads(buff.read())
        rmtree('output')
        for key in dictionary:
            eq_(config_new[key], dictionary[key])
Ejemplo n.º 10
0
    def test_check_flag_column_no_values(self):
        config = Configuration({
            "experiment_id": '001',
            "train_file": "/foo/train.csv",
            "test_file": "/foo/test.csv",
            "flag_column": None,
            "model": 'LinearRegression'
        })

        flag_dict = config.check_flag_column()
        eq_(flag_dict, {})
    def test_copy_not_deep(self):
        dictionary = {"experiment_id": '001', 'trim_min': 1, 'trim_max': 6,
                      "object": [1, 2, 3]}
        config = Configuration(dictionary)
        config_copy = config.copy(deep=False)
        assert_not_equal(id(config), id(config_copy))
        for key in config.keys():

            # check to make sure this is a shallow copy
            if key == "object":
                assert_equal(id(config[key]), id(config_copy[key]))
            assert_equal(config[key], config_copy[key])
Ejemplo n.º 12
0
    def test_check_flag_column_wrong_partition(self):
        config = Configuration({
            "experiment_id": '001',
            "train_file": "/foo/train.csv",
            "test_file": "/foo/test.csv",
            "flag_column_test": {
                "advisories": 123
            },
            "model": 'LinearRegression'
        })

        config.check_flag_column(partition='eval')
Ejemplo n.º 13
0
    def test_check_flag_column_keep_numeric(self):
        input_dict = {"advisory flag": [1, 2, 3]}
        config = Configuration({
            "experiment_id": '001',
            "train_file": "/foo/train.csv",
            "test_file": "/foo/test.csv",
            "flag_column": input_dict,
            "model": 'LinearRegression'
        })

        output_dict = config.check_flag_column()
        eq_(output_dict, {"advisory flag": [1, 2, 3]})
Ejemplo n.º 14
0
 def test_get_trim_min_max_no_tolerance(self):
     dictionary = {
         "experiment_id": '001',
         "trim_min": 1,
         "trim_max": 6,
         "train_file": "/foo/train.csv",
         "test_file": "/foo/test.csv",
         "model": 'LinearRegression'
     }
     config = Configuration(dictionary)
     trim_min_max_tolerance = config.get_trim_min_max_tolerance()
     eq_(trim_min_max_tolerance, (1.0, 6.0, 0.4998))
Ejemplo n.º 15
0
 def test_check_exclude_listwise_true(self):
     dictionary = {
         "experiment_id": '001',
         "train_file": "/foo/train.csv",
         "test_file": "/foo/test.csv",
         "min_items_per_candidate": 4,
         "candidate_column": "candidate",
         "model": 'LinearRegression'
     }
     config = Configuration(dictionary)
     exclude_list_wise = config.check_exclude_listwise()
     eq_(exclude_list_wise, True)
Ejemplo n.º 16
0
    def test_get(self):
        config = Configuration({
            "experiment_id": '001',
            "train_file": "/foo/train.csv",
            "test_file": "/foo/test.csv",
            "trim_min": 1,
            "trim_max": 6,
            "flag_column": "[advisories]",
            "model": 'LinearRegression'
        })

        eq_(config.get('flag_column'), "[advisories]")
        eq_(config.get('fasdfasfasdfa', 'hi'), 'hi')
Ejemplo n.º 17
0
    def test_check_flag_column_mismatched_partition_both(self):
        config = Configuration({
            "experiment_id": '001',
            "train_file": "/foo/train.csv",
            "test_file": "/foo/test.csv",
            "flag_column_test": {
                "advisories": 123
            },
            "model": 'LinearRegression'
        })

        config.check_flag_column(flag_column='flag_column_test',
                                 partition='both')
Ejemplo n.º 18
0
    def test_check_flag_column_convert_to_list_keep_numeric(self):
        config = Configuration({
            "experiment_id": '001',
            "train_file": "/foo/train.csv",
            "test_file": "/foo/test.csv",
            "flag_column": {
                "advisories": 123
            },
            "model": 'LinearRegression'
        })

        flag_dict = config.check_flag_column()
        eq_(flag_dict, {"advisories": [123]})
Ejemplo n.º 19
0
    def test_check_flag_column_flag_column_test(self):
        input_dict = {"advisory flag": ['0']}
        config = Configuration({
            "experiment_id": '001',
            "train_file": "/foo/train.csv",
            "test_file": "/foo/test.csv",
            "flag_column_test": input_dict,
            "flag_column": input_dict,
            "model": 'LinearRegression'
        })

        output_dict = config.check_flag_column("flag_column_test")
        eq_(input_dict, output_dict)
Ejemplo n.º 20
0
    def test_keys(self):
        configdict = {
            "experiment_id": '001',
            "train_file": "/foo/train.csv",
            "test_file": "/foo/test.csv",
            "trim_min": 1,
            "trim_max": 6,
            "flag_column": "abc",
            "model": 'LinearRegression'
        }

        config = Configuration(configdict)
        given_keys = configdict.keys()
        computed_keys = config.keys()
        assert all([given_key in computed_keys for given_key in given_keys])
Ejemplo n.º 21
0
    def test_get_trim_min_max_tolerance_none(self):
        dictionary = {
            'experiment_id': '001',
            'id_column': 'A',
            'candidate_column': 'B',
            'train_file': 'path/to/train.tsv',
            'test_file': 'path/to/test.tsv',
            'features': 'path/to/features.csv',
            "model": 'LinearRegression',
            'subgroups': ['C']
        }

        config = Configuration(dictionary)
        trim_min_max_tolerance = config.get_trim_min_max_tolerance()
        eq_(trim_min_max_tolerance, (None, None, 0.4998))
Ejemplo n.º 22
0
 def test_set_context(self):
     context = 'rsmtool'
     new_context = 'rsmcompare'
     config = Configuration(
         {
             "experiment_id": '001',
             "train_file": "/foo/train.csv",
             "test_file": "/foo/test.csv",
             "trim_min": 1,
             "trim_max": 6,
             "flag_column": "[advisories]",
             "model": 'LinearRegression'
         },
         context=context)
     config.context = new_context
     eq_(config.context, new_context)
def test_run_experiment_lr_compare_with_object():
    '''
    test rsmcompare using the Configuration object, rather than a file;
    '''

    source = 'lr-self-compare-object'
    experiment_id = 'lr_self_compare_object'

    configdir = join(rsmtool_test_dir, 'data', 'experiments', source)

    config_dict = {
        "comparison_id": "lr_self_compare_object",
        "experiment_dir_old": "lr-subgroups",
        "experiment_id_old": "lr_subgroups",
        "description_old": "Using all features with a LinearRegression model.",
        "use_scaled_predictions_old": True,
        "experiment_dir_new": "lr-subgroups",
        "experiment_id_new": "lr_subgroups",
        "description_new": "Using all features with a LinearRegression model.",
        "use_scaled_predictions_new": True,
        "subgroups": ["QUESTION"]
    }

    config_obj = Configuration(config_dict,
                               context='rsmcompare',
                               configdir=configdir)

    check_run_comparison(source, experiment_id, config_obj_or_dict=config_obj)
    def test_copy_not_deep(self):
        dictionary = {
            "experiment_id": '001',
            'trim_min': 1,
            'trim_max': 6,
            "object": [1, 2, 3]
        }
        config = Configuration(dictionary)
        config_copy = config.copy(deep=False)
        assert_not_equal(id(config), id(config_copy))
        for key in config.keys():

            # check to make sure this is a shallow copy
            if key == "object":
                assert_equal(id(config[key]), id(config_copy[key]))
            assert_equal(config[key], config_copy[key])
    def test_check_flag_column_convert_to_list_test(self):
        config = Configuration({"flag_column": {"advisories": "0"}})

        flag_dict = self.check_logging_output('evaluating',
                                              config.check_flag_column,
                                              partition='test')
        eq_(flag_dict, {"advisories": ['0']})
Ejemplo n.º 26
0
def test_run_experiment_lr_eval_with_object():
    '''
    test rsmeval using a Configuration object, rather than a file
    '''

    source = 'lr-eval-object'
    experiment_id = 'lr_eval_object'

    configdir = join(rsmtool_test_dir, 'data', 'experiments', source)

    config_dict = {
        "predictions_file":
        "../../files/predictions_scaled_with_subgroups.csv",
        "system_score_column": "score",
        "description": "An evaluation of LinearRegression predictions.",
        "human_score_column": "h1",
        "id_column": "id",
        "experiment_id": "lr_eval_object",
        "subgroups": "QUESTION",
        "scale_with": "asis",
        "trim_min": 1,
        "trim_max": 6
    }

    config_obj = Configuration(config_dict,
                               context='rsmeval',
                               configdir=configdir)

    check_run_evaluation(source, experiment_id, config_obj_or_dict=config_obj)
Ejemplo n.º 27
0
def test_run_experiment_lr_with_object_no_configdir():
    """Test rsmtool using a Configuration object and no specified configdir."""
    source = 'lr-object-no-path'
    experiment_id = 'lr_object_no_path'

    # set up a temporary directory since
    # we will be using getcwd
    old_file_dict = {'train': 'data/files/train.csv',
                     'test': 'data/files/test.csv',
                     'features': 'data/experiments/lr-object-no-path/features.csv'}

    temp_dir = tempfile.TemporaryDirectory(prefix=getcwd())
    new_file_dict = copy_data_files(temp_dir.name,
                                    old_file_dict,
                                    rsmtool_test_dir)

    config_dict = {"train_file": new_file_dict['train'],
                   "id_column": "ID",
                   "use_scaled_predictions": True,
                   "test_label_column": "score",
                   "train_label_column": "score",
                   "test_file": new_file_dict['test'],
                   "trim_max": 6,
                   "features": new_file_dict['features'],
                   "trim_min": 1,
                   "model": "LinearRegression",
                   "experiment_id": "lr_object_no_path",
                   "description": "Using all features with an LinearRegression model."}

    config_obj = Configuration(config_dict)

    check_run_experiment(source,
                         experiment_id,
                         config_obj_or_dict=config_obj)
Ejemplo n.º 28
0
def test_run_experiment_lr_with_object_and_configdir():
    """Test rsmtool using a Configuration object and specified configdir."""
    source = 'lr-object'
    experiment_id = 'lr_object'

    configdir = join(rsmtool_test_dir,
                     'data',
                     'experiments',
                     source)

    config_dict = {"train_file": "../../files/train.csv",
                   "id_column": "ID",
                   "use_scaled_predictions": True,
                   "test_label_column": "score",
                   "train_label_column": "score",
                   "test_file": "../../files/test.csv",
                   "trim_max": 6,
                   "features": "features.csv",
                   "trim_min": 1,
                   "model": "LinearRegression",
                   "experiment_id": "lr_object",
                   "description": "Using all features with an LinearRegression model."}

    config_obj = Configuration(config_dict, configdir=configdir)

    check_run_experiment(source,
                         experiment_id,
                         config_obj_or_dict=config_obj)
 def test_check_len(self):
     expected_len = 2
     config = Configuration({
         "flag_column": {
             "advisories": 123
         },
         'other_column': 5
     })
     eq_(len(config), expected_len)
Ejemplo n.º 30
0
    def test_set_configdir(self):
        configdir = '/path/to/dir/'
        new_configdir = 'path/that/is/new/'

        config = Configuration(
            {
                "experiment_id": '001',
                "train_file": "/foo/train.csv",
                "test_file": "/foo/test.csv",
                "trim_min": 1,
                "trim_max": 6,
                "flag_column": "[advisories]",
                "model": 'LinearRegression'
            },
            configdir=configdir)

        config.configdir = new_configdir
        eq_(config.configdir, abspath(new_configdir))
Ejemplo n.º 31
0
    def test_save(self):
        dictionary = {
            'experiment_id': '001',
            'train_file': 'path/to/train.tsv',
            'test_file': 'path/to/test.tsv',
            "flag_column": "abc",
            "model": 'LinearRegression'
        }

        config = Configuration(dictionary)
        config.save()

        with open('output/001_rsmtool.json', 'r') as buff:
            config_new = json.loads(buff.read())

        rmtree('output')
        for key in dictionary:
            eq_(config_new[key], dictionary[key])
Ejemplo n.º 32
0
    def test_str_correct(self):
        config = Configuration({
            "experiment_id": '001',
            "train_file": "/foo/train.csv",
            "test_file": "/foo/test.csv",
            "flag_column": "[advisories]",
            "model": 'LinearRegression'
        })

        eq_(config['flag_column'], '[advisories]')
Ejemplo n.º 33
0
    def test_copy_not_deep(self):
        config = Configuration({
            "experiment_id": '001',
            "train_file": "/foo/train.csv",
            "test_file": "/foo/test.csv",
            "trim_min": 1,
            "trim_max": 6,
            "flag_column": [1, 2, 3],
            "model": 'LinearRegression'
        })

        config_copy = config.copy(deep=False)
        assert_not_equal(id(config), id(config_copy))
        for key in config.keys():

            # check to make sure this is a shallow copy
            if key == "flag_column":
                assert_equal(id(config[key]), id(config_copy[key]))
            assert_equal(config[key], config_copy[key])
    def test_get_names_and_paths_with_feature_list(self):

        filepaths = ['path/to/train.tsv',
                     'path/to/test.tsv']
        filenames = ['train', 'test']

        expected = (filenames, filepaths)

        dictionary = {'id_column': 'A',
                      'candidate_column': 'B',
                      'train_file': 'path/to/train.tsv',
                      'test_file': 'path/to/test.tsv',
                      'features': ['FEATURE1', 'FEATURE2'],
                      'subgroups': ['C']}
        config = Configuration(dictionary)
        values_for_reader = config.get_names_and_paths(['train_file', 'test_file',
                                                        'features'],
                                                       ['train', 'test',
                                                        'feature_specs'])
        eq_(values_for_reader, expected)
 def test_check_flag_column_convert_to_list(self):
     config = Configuration({"flag_column": {"advisories": "0"}})
     flag_dict = config.check_flag_column()
     eq_(flag_dict, {"advisories": ['0']})
 def test_check_flag_column_convert_to_list_keep_numeric(self):
     config = Configuration({"flag_column": {"advisories": 123}})
     flag_dict = config.check_flag_column()
     eq_(flag_dict, {"advisories": [123]})
 def test_check_flag_column_wrong_format(self):
     config = Configuration({"flag_column": "[advisories]"})
     config.check_flag_column()
 def test_check_exclude_listwise_true(self):
     dictionary = {"experiment_id": '001', "min_items_per_candidate": 4}
     config = Configuration(dictionary)
     exclude_list_wise = config.check_exclude_listwise()
     eq_(exclude_list_wise, True)
 def test_check_exclude_listwise_false(self):
     dictionary = {"experiment_id": '001'}
     config = Configuration(dictionary)
     exclude_list_wise = config.check_exclude_listwise()
     eq_(exclude_list_wise, False)
 def test_get_trim_min_max_none(self):
     dictionary = {"experiment_id": '001'}
     config = Configuration(dictionary)
     trim_min_max = config.get_trim_min_max()
     eq_(trim_min_max, (None, None))
 def test_pop_value_default(self):
     dictionary = {"experiment_id": '001', 'trim_min': 1, 'trim_max': 6}
     config = Configuration(dictionary)
     value = config.pop("foo", "bar")
     eq_(value, 'bar')
 def test_check_flag_column_keep_numeric(self):
     input_dict = {"advisory flag": [1, 2, 3]}
     config = Configuration({"flag_column": input_dict})
     output_dict = config.check_flag_column()
     eq_(output_dict, {"advisory flag": [1, 2, 3]})
 def test_to_dict(self):
     dictionary = {"flag_column": "abc", "other_column": 'xyz'}
     config = Configuration(dictionary)
     eq_(config.to_dict(), dictionary)
 def test_keys(self):
     dictionary = {"flag_column": "abc", "other_column": 'xyz'}
     keys = ['flag_column', 'other_column']
     config = Configuration(dictionary)
     eq_(sorted(config.keys()), sorted(keys))
 def test_get_trim_min_max_values(self):
     dictionary = {"experiment_id": '001', 'trim_min': 1, 'trim_max': 6}
     config = Configuration(dictionary)
     trim_min_max = config.get_trim_min_max()
     eq_(trim_min_max, (1.0, 6.0))
 def test_check_flag_column_no_values(self):
     config = Configuration({"flag_column": None})
     flag_dict = config.check_flag_column()
     eq_(flag_dict, {})
 def test_get(self):
     config = Configuration({"flag_column": "[advisories]"})
     eq_(config.get('flag_column'), "[advisories]")
     eq_(config.get('fasdfasfasdfa', 'hi'), 'hi')
 def test_check_flag_column_flag_column_test(self):
     input_dict = {"advisory flag": ['0']}
     config = Configuration({"flag_column_test": input_dict})
     output_dict = config.check_flag_column("flag_column_test")
     eq_(input_dict, output_dict)
 def test_set_filepath(self):
     filepath = '/path/to/file.json'
     new_file_path = 'path/that/is/new.json'
     config = Configuration({"flag_column": "[advisories]"}, filepath)
     config.filepath = new_file_path
     eq_(config.filepath, new_file_path)
 def test_pop_value(self):
     dictionary = {"experiment_id": '001', 'trim_min': 1, 'trim_max': 6}
     config = Configuration(dictionary)
     value = config.pop("experiment_id")
     eq_(value, '001')
 def test_str_correct(self):
     config_dict = {'flag_column': '[advisories]'}
     config = Configuration(config_dict)
     print(config)
     eq_(config.__str__(), 'flag_column')
 def test_check_flag_column_mismatched_partition_both(self):
     config = Configuration({"flag_column_test": {"advisories": 123}})
     config.check_flag_column(flag_column='flag_column_test',
                              partition='both')
 def test_check_flag_column_wrong_partition(self):
     config = Configuration({"flag_column_test": {"advisories": 123}})
     config.check_flag_column(partition='eval')
 def test_items(self):
     dictionary = {"flag_column": "abc", "other_column": 'xyz'}
     items = [('flag_column', 'abc'), ('other_column', 'xyz')]
     config = Configuration(dictionary)
     eq_(sorted(config.items()), sorted(items))
 def test_values(self):
     dictionary = {"flag_column": "abc", "other_column": 'xyz'}
     values = ['abc', 'xyz']
     config = Configuration(dictionary)
     eq_(sorted(config.values()), sorted(values))