Exemple #1
0
def test_run_experiment_lr_with_object():
    # basic experiment with a LinearRegression model

    source = 'lr-object'
    experiment_id = 'lr_object'

    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       '{}.json'.format(experiment_id))

    config_dict = {
        "train_file": "../../files/train.csv",
        "id_column": "ID",
        "use_scaled_predictions": True,
        "test_label_column": "score",
        "train_label_column": "score",
        "test_file": "../../files/test.csv",
        "trim_max": 6,
        "features": "features.csv",
        "trim_min": 1,
        "model": "LinearRegression",
        "experiment_id": "lr_object",
        "description": "Using all features with an LinearRegression model."
    }

    config_parser = ConfigurationParser()
    config_parser.load_config_from_dict(config_dict)
    config_obj = config_parser.normalize_validate_and_process_config()
    config_obj = config_file

    do_run_experiment(source, experiment_id, config_obj)
    output_dir = join('test_outputs', source, 'output')
    expected_output_dir = join(rsmtool_test_dir, 'data', 'experiments', source,
                               'output')
    html_report = join('test_outputs', source, 'report',
                       '{}_report.html'.format(experiment_id))

    csv_files = glob(join(output_dir, '*.csv'))
    for csv_file in csv_files:
        csv_filename = basename(csv_file)
        expected_csv_file = join(expected_output_dir, csv_filename)

        if exists(expected_csv_file):
            yield check_file_output, csv_file, expected_csv_file

    yield check_generated_output, csv_files, experiment_id, 'rsmtool'
    yield check_scaled_coefficients, source, experiment_id
    yield check_report, html_report
def test_run_experiment_lr_with_object():
    # basic experiment with a LinearRegression model

    source = 'lr-object'
    experiment_id = 'lr_object'

    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       '{}.json'.format(experiment_id))

    config_dict = {"train_file": "../../files/train.csv",
                   "id_column": "ID",
                   "use_scaled_predictions": True,
                   "test_label_column": "score",
                   "train_label_column": "score",
                   "test_file": "../../files/test.csv",
                   "trim_max": 6,
                   "features": "features.csv",
                   "trim_min": 1,
                   "model": "LinearRegression",
                   "experiment_id": "lr_object",
                   "description": "Using all features with an LinearRegression model."}

    config_parser = ConfigurationParser()
    config_parser.load_config_from_dict(config_dict)
    config_obj = config_parser.normalize_validate_and_process_config()
    config_obj = config_file

    do_run_experiment(source, experiment_id, config_obj)
    output_dir = join('test_outputs', source, 'output')
    expected_output_dir = join(rsmtool_test_dir, 'data', 'experiments', source, 'output')
    html_report = join('test_outputs', source, 'report', '{}_report.html'.format(experiment_id))

    csv_files = glob(join(output_dir, '*.csv'))
    for csv_file in csv_files:
        csv_filename = basename(csv_file)
        expected_csv_file = join(expected_output_dir, csv_filename)

        if exists(expected_csv_file):
            yield check_file_output, csv_file, expected_csv_file

    yield check_generated_output, csv_files, experiment_id, 'rsmtool'
    yield check_scaled_coefficients, source, experiment_id
    yield check_report, html_report
Exemple #3
0
def test_run_experiment_lr_summary_with_object():

    # basic rsmsummarize experiment comparing several rsmtool experiments
    source = 'lr-self-summary-object'

    config_file = join(rsmtool_test_dir, 'data', 'experiments', source,
                       'rsmsummarize.json')

    config_dict = {
        "summary_id": "model_comparison",
        "experiment_dirs": ["lr-subgroups", "lr-subgroups", "lr-subgroups"],
        "description": "Comparison of rsmtool experiment with itself."
    }

    config_parser = ConfigurationParser()
    config_parser.load_config_from_dict(config_dict)
    config_obj = config_parser.normalize_validate_and_process_config(
        context='rsmsummarize')
    config_obj = config_file

    do_run_summary(source, config_obj)

    html_report = join('test_outputs', source, 'report',
                       'model_comparison_report.html')

    output_dir = join('test_outputs', source, 'output')
    expected_output_dir = join(rsmtool_test_dir, 'data', 'experiments', source,
                               'output')

    csv_files = glob(join(output_dir, '*.csv'))
    for csv_file in csv_files:
        csv_filename = basename(csv_file)
        expected_csv_file = join(expected_output_dir, csv_filename)

        if exists(expected_csv_file):
            yield check_file_output, csv_file, expected_csv_file

    yield check_report, html_report
def test_run_experiment_lr_summary_with_object():

    # basic rsmsummarize experiment comparing several rsmtool experiments
    source = 'lr-self-summary-object'

    config_file = join(rsmtool_test_dir,
                       'data',
                       'experiments',
                       source,
                       'rsmsummarize.json')

    config_dict = {"summary_id": "model_comparison",
                   "experiment_dirs": ["lr-subgroups", "lr-subgroups", "lr-subgroups"],
                   "description": "Comparison of rsmtool experiment with itself."}

    config_parser = ConfigurationParser()
    config_parser.load_config_from_dict(config_dict)
    config_obj = config_parser.normalize_validate_and_process_config(context='rsmsummarize')
    config_obj = config_file

    do_run_summary(source, config_obj)

    html_report = join('test_outputs', source, 'report', 'model_comparison_report.html')

    output_dir = join('test_outputs', source, 'output')
    expected_output_dir = join(rsmtool_test_dir, 'data', 'experiments', source, 'output')

    csv_files = glob(join(output_dir, '*.csv'))
    for csv_file in csv_files:
        csv_filename = basename(csv_file)
        expected_csv_file = join(expected_output_dir, csv_filename)

        if exists(expected_csv_file):
            yield check_file_output, csv_file, expected_csv_file

    yield check_report, html_report
class TestConfigurationParser:

    def setUp(self):
        self.parser = ConfigurationParser()

    def test_normalize_config(self):
        data = {'expID': 'experiment_1',
                'train': 'data/rsmtool_smTrain.csv',
                'LRmodel': 'empWt',
                'feature': 'feature/feature_list.json',
                'description': 'A sample model with 9 features '
                               'trained using average score and tested using r1.',
                'test': 'data/rsmtool_smEval.csv',
                'train.lab': 'sc1',
                'crossvalidate': 'yes',
                'test.lab': 'r1',
                'scale': 'scale'}

        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=DeprecationWarning)

            # Add data to `ConfigurationParser` object
            self.parser.load_config_from_dict(data)

            newdata = self.parser.normalize_config()
            ok_('experiment_id' in newdata.keys())
            assert_equal(newdata['experiment_id'], 'experiment_1')
            assert_equal(newdata['use_scaled_predictions'], True)

        # test for non-standard scaling value
        data = {'expID': 'experiment_1',
                'train': 'data/rsmtool_smTrain.csv',
                'LRmodel': 'LinearRegression',
                'scale': 'Yes'}
        with warnings.catch_warnings():

            # Add data to `ConfigurationParser` object
            self.parser._config = data

            warnings.filterwarnings('ignore', category=DeprecationWarning)
            assert_raises(ValueError, self.parser.normalize_config)

        # test when no scaling is specified
        data = {'expID': 'experiment_1',
                'train': 'data/rsmtool_smTrain.csv',
                'LRmodel': 'LinearRegression',
                'feature': 'feature/feature_list.json',
                'description': 'A sample model with 9 features '
                               'trained using average score and tested using r1.',
                'test': 'data/rsmtool_smEval.csv',
                'train.lab': 'sc1',
                'crossvalidate': 'yes',
                'test.lab': 'r1'}

        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=DeprecationWarning)

            # Add data to `ConfigurationParser` object
            self.parser._config = data
            newdata = self.parser.normalize_config()
            ok_('use_scaled_predictions' not in newdata.keys())

    @raises(ValueError)
    def test_validate_config_missing_fields(self):
        data = {'expID': 'test'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_validate_config_min_responses_but_no_candidate(self):
        data = {'experiment_id': 'experiment_1',
                'train_file': 'data/rsmtool_smTrain.csv',
                'test_file': 'data/rsmtool_smEval.csv',
                'model': 'LinearRegression',
                'min_responses_per_candidate': 5}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    def test_validate_config_unspecified_fields(self):
        data = {'experiment_id': 'experiment_1',
                'train_file': 'data/rsmtool_smTrain.csv',
                'test_file': 'data/rsmtool_smEval.csv',
                'model': 'LinearRegression'}

        # Add data to `ConfigurationParser` object
        self.parser._config = data
        newdata = self.parser.validate_config()
        assert_equal(newdata['id_column'], 'spkitemid')
        assert_equal(newdata['use_scaled_predictions'], False)
        assert_equal(newdata['select_transformations'], False)
        assert_equal(newdata['general_sections'], 'all')
        assert_equal(newdata['description'], '')

    @raises(ValueError)
    def test_validate_config_unknown_fields(self):
        data = {'experiment_id': 'experiment_1',
                'train_file': 'data/rsmtool_smTrain.csv',
                'test_file': 'data/rsmtool_smEval.csv',
                'description': 'Test',
                'model': 'LinearRegression',
                'output': 'foobar'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_validate_config_experiment_id_1(self):
        data = {'experiment_id': 'test experiment',
                'train_file': 'data/rsmtool_smTrain.csv',
                'test_file': 'data/rsmtool_smEval.csv',
                'model': 'LinearRegression'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_validate_config_experiment_id_2(self):
        data = {'experiment_id': 'test experiment',
                'predictions_file': 'data/foo',
                'system_score_column': 'h1',
                'trim_min': 1,
                'trim_max': 5}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmeval')

    @raises(ValueError)
    def test_validate_config_experiment_id_3(self):
        data = {'comparison_id': 'old vs new',
                'experiment_id_old': 'old_experiment',
                'experiment_dir_old': 'data/old',
                'experiment_id_new': 'new_experiment',
                'experiment_dir_new': 'data/new'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmcompare')

    @raises(ValueError)
    def test_validate_config_experiment_id_4(self):
        data = {'comparison_id': 'old vs new',
                'experiment_id_old': 'old experiment',
                'experiment_dir_old': 'data/old',
                'experiment_id_new': 'new_experiment',
                'experiment_dir_new': 'data/new'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmcompare')

    @raises(ValueError)
    def test_validate_config_experiment_id_5(self):
        data = {'experiment_id': 'this_is_a_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_long_id',
                'train_file': 'data/rsmtool_smTrain.csv',
                'test_file': 'data/rsmtool_smEval.csv',
                'model': 'LinearRegression'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_validate_config_experiment_id_6(self):
        data = {'experiment_id': 'this_is_a_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_long_id',
                'predictions_file': 'data/foo',
                'system_score_column': 'h1',
                'trim_min': 1,
                'trim_max': 5}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmeval')

    @raises(ValueError)
    def test_validate_config_experiment_id_7(self):
        data = {'comparison_id': 'this_is_a_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_long_id',
                'experiment_id_old': 'old_experiment',
                'experiment_dir_old': 'data/old',
                'experiment_id_new': 'new_experiment',
                'experiment_dir_new': 'data/new'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmcompare')

    @raises(ValueError)
    def test_validate_config_experiment_id_8(self):
        data = {'summary_id': 'model summary',
                'experiment_dirs': []}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmsummarize')

    @raises(ValueError)
    def test_validate_config_experiment_id_9(self):
        data = {'summary_id': 'this_is_a_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_really_really_really_really_'
                'really_really_really_long_id',
                'experiment_dirs': []}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmsummarize')

    def test_process_fields(self):
        data = {'experiment_id': 'experiment_1',
                'train_file': 'data/rsmtool_smTrain.csv',
                'test_file': 'data/rsmtool_smEval.csv',
                'description': 'Test',
                'model': 'empWt',
                'use_scaled_predictions': 'True',
                'feature_prefix': '1gram, 2gram',
                'subgroups': 'native language, GPA_range',
                'exclude_zero_scores': 'false'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        newdata = self.parser.validate_config(inplace=False)

        # Add data to `ConfigurationParser` object
        self.parser._config = newdata
        newdata = self.parser.process_config(inplace=False)
        assert_array_equal(newdata['feature_prefix'], ['1gram', '2gram'])
        assert_array_equal(newdata['subgroups'], ['native language', 'GPA_range'])
        eq_(type(newdata['use_scaled_predictions']), bool)
        eq_(newdata['use_scaled_predictions'], True)
        eq_(newdata['exclude_zero_scores'], False)

    @raises(ValueError)
    def test_process_fields_with_non_boolean(self):
        data = {'experiment_id': 'experiment_1',
                'train_file': 'data/rsmtool_smTrain.csv',
                'test_file': 'data/rsmtool_smEval.csv',
                'description': 'Test',
                'model': 'empWt',
                'use_scaled_predictions': 'True',
                'feature_prefix': '1gram, 2gram',
                'subgroups': 'native language, GPA_range',
                'exclude_zero_scores': 'Yes'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        newdata = self.parser.validate_config()
        # Add data to `ConfigurationParser` object
        self.parser._config = newdata
        newdata = self.parser.process_config()

    @raises(ValueError)
    def test_process_fields_with_integer(self):
        data = {'experiment_id': 'experiment_1',
                'train_file': 'data/rsmtool_smTrain.csv',
                'test_file': 'data/rsmtool_smEval.csv',
                'description': 'Test',
                'model': 'empWt',
                'use_scaled_predictions': 'True',
                'feature_prefix': '1gram, 2gram',
                'subgroups': 'native language, GPA_range',
                'exclude_zero_scores': 1}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        newdata = self.parser.validate_config()
        # Add data to `ConfigurationParser` object
        self.parser._config = newdata
        newdata = self.parser.process_config()

    @raises(ValueError)
    def test_invalid_skll_objective(self):
        data = {'experiment_id': 'experiment_1',
                'train_file': 'data/rsmtool_smTrain.csv',
                'test_file': 'data/rsmtool_smEval.csv',
                'description': 'Test',
                'model': 'LinearSVR',
                'skll_objective': 'squared_error'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_wrong_skll_model_for_expected_scores(self):
        data = {'experiment_id': 'experiment_1',
                'train_file': 'data/rsmtool_smTrain.csv',
                'test_file': 'data/rsmtool_smEval.csv',
                'description': 'Test',
                'model': 'LinearSVR',
                'predict_expected_scores': 'true'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_builtin_model_for_expected_scores(self):
        data = {'experiment_id': 'experiment_1',
                'train_file': 'data/rsmtool_smTrain.csv',
                'test_file': 'data/rsmtool_smEval.csv',
                'description': 'Test',
                'model': 'NNLR',
                'predict_expected_scores': 'true'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    def test_get_correct_configparser_cfg(self):
        config_parser = ConfigurationParser.get_configparser('config.cfg')
        assert isinstance(config_parser, CFGConfigurationParser)

    def test_get_correct_configparser_json(self):
        config_parser = ConfigurationParser.get_configparser('config.json')
        assert isinstance(config_parser, JSONConfigurationParser)
class TestConfigurationParser:
    def setUp(self):
        self.parser = ConfigurationParser()

    def test_normalize_config(self):
        data = {
            'expID': 'experiment_1',
            'train': 'data/rsmtool_smTrain.csv',
            'LRmodel': 'empWt',
            'feature': 'feature/feature_list.json',
            'description': 'A sample model with 9 features '
            'trained using average score and tested using r1.',
            'test': 'data/rsmtool_smEval.csv',
            'train.lab': 'sc1',
            'crossvalidate': 'yes',
            'test.lab': 'r1',
            'scale': 'scale'
        }

        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=DeprecationWarning)

            # Add data to `ConfigurationParser` object
            self.parser.load_config_from_dict(data)

            newdata = self.parser.normalize_config()
            ok_('experiment_id' in newdata.keys())
            assert_equal(newdata['experiment_id'], 'experiment_1')
            assert_equal(newdata['use_scaled_predictions'], True)

        # test for non-standard scaling value
        data = {
            'expID': 'experiment_1',
            'train': 'data/rsmtool_smTrain.csv',
            'LRmodel': 'LinearRegression',
            'scale': 'Yes'
        }
        with warnings.catch_warnings():

            # Add data to `ConfigurationParser` object
            self.parser._config = data

            warnings.filterwarnings('ignore', category=DeprecationWarning)
            assert_raises(ValueError, self.parser.normalize_config)

        # test when no scaling is specified
        data = {
            'expID': 'experiment_1',
            'train': 'data/rsmtool_smTrain.csv',
            'LRmodel': 'LinearRegression',
            'feature': 'feature/feature_list.json',
            'description': 'A sample model with 9 features '
            'trained using average score and tested using r1.',
            'test': 'data/rsmtool_smEval.csv',
            'train.lab': 'sc1',
            'crossvalidate': 'yes',
            'test.lab': 'r1'
        }

        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=DeprecationWarning)

            # Add data to `ConfigurationParser` object
            self.parser._config = data
            newdata = self.parser.normalize_config()
            ok_('use_scaled_predictions' not in newdata.keys())

    @raises(ValueError)
    def test_validate_config_missing_fields(self):
        data = {'expID': 'test'}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_validate_config_min_responses_but_no_candidate(self):
        data = {
            'experiment_id': 'experiment_1',
            'train_file': 'data/rsmtool_smTrain.csv',
            'test_file': 'data/rsmtool_smEval.csv',
            'model': 'LinearRegression',
            'min_responses_per_candidate': 5
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    def test_validate_config_unspecified_fields(self):
        data = {
            'experiment_id': 'experiment_1',
            'train_file': 'data/rsmtool_smTrain.csv',
            'test_file': 'data/rsmtool_smEval.csv',
            'model': 'LinearRegression'
        }

        # Add data to `ConfigurationParser` object
        self.parser._config = data
        newdata = self.parser.validate_config()
        assert_equal(newdata['id_column'], 'spkitemid')
        assert_equal(newdata['use_scaled_predictions'], False)
        assert_equal(newdata['select_transformations'], False)
        assert_equal(newdata['general_sections'], 'all')
        assert_equal(newdata['description'], '')

    @raises(ValueError)
    def test_validate_config_unknown_fields(self):
        data = {
            'experiment_id': 'experiment_1',
            'train_file': 'data/rsmtool_smTrain.csv',
            'test_file': 'data/rsmtool_smEval.csv',
            'description': 'Test',
            'model': 'LinearRegression',
            'output': 'foobar'
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_validate_config_experiment_id_1(self):
        data = {
            'experiment_id': 'test experiment',
            'train_file': 'data/rsmtool_smTrain.csv',
            'test_file': 'data/rsmtool_smEval.csv',
            'model': 'LinearRegression'
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_validate_config_experiment_id_2(self):
        data = {
            'experiment_id': 'test experiment',
            'predictions_file': 'data/foo',
            'system_score_column': 'h1',
            'trim_min': 1,
            'trim_max': 5
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmeval')

    @raises(ValueError)
    def test_validate_config_experiment_id_3(self):
        data = {
            'comparison_id': 'old vs new',
            'experiment_id_old': 'old_experiment',
            'experiment_dir_old': 'data/old',
            'experiment_id_new': 'new_experiment',
            'experiment_dir_new': 'data/new'
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmcompare')

    @raises(ValueError)
    def test_validate_config_experiment_id_4(self):
        data = {
            'comparison_id': 'old vs new',
            'experiment_id_old': 'old experiment',
            'experiment_dir_old': 'data/old',
            'experiment_id_new': 'new_experiment',
            'experiment_dir_new': 'data/new'
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmcompare')

    @raises(ValueError)
    def test_validate_config_experiment_id_5(self):
        data = {
            'experiment_id':
            'this_is_a_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_long_id',
            'train_file':
            'data/rsmtool_smTrain.csv',
            'test_file':
            'data/rsmtool_smEval.csv',
            'model':
            'LinearRegression'
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_validate_config_experiment_id_6(self):
        data = {
            'experiment_id':
            'this_is_a_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_long_id',
            'predictions_file':
            'data/foo',
            'system_score_column':
            'h1',
            'trim_min':
            1,
            'trim_max':
            5
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmeval')

    @raises(ValueError)
    def test_validate_config_experiment_id_7(self):
        data = {
            'comparison_id':
            'this_is_a_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_long_id',
            'experiment_id_old':
            'old_experiment',
            'experiment_dir_old':
            'data/old',
            'experiment_id_new':
            'new_experiment',
            'experiment_dir_new':
            'data/new'
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmcompare')

    @raises(ValueError)
    def test_validate_config_experiment_id_8(self):
        data = {'summary_id': 'model summary', 'experiment_dirs': []}

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmsummarize')

    @raises(ValueError)
    def test_validate_config_experiment_id_9(self):
        data = {
            'summary_id':
            'this_is_a_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_really_really_really_really_'
            'really_really_really_long_id',
            'experiment_dirs': []
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config(context='rsmsummarize')

    def test_process_fields(self):
        data = {
            'experiment_id': 'experiment_1',
            'train_file': 'data/rsmtool_smTrain.csv',
            'test_file': 'data/rsmtool_smEval.csv',
            'description': 'Test',
            'model': 'empWt',
            'use_scaled_predictions': 'True',
            'feature_prefix': '1gram, 2gram',
            'subgroups': 'native language, GPA_range',
            'exclude_zero_scores': 'false'
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        newdata = self.parser.validate_config(inplace=False)

        # Add data to `ConfigurationParser` object
        self.parser._config = newdata
        newdata = self.parser.process_config(inplace=False)
        assert_array_equal(newdata['feature_prefix'], ['1gram', '2gram'])
        assert_array_equal(newdata['subgroups'],
                           ['native language', 'GPA_range'])
        eq_(type(newdata['use_scaled_predictions']), bool)
        eq_(newdata['use_scaled_predictions'], True)
        eq_(newdata['exclude_zero_scores'], False)

    @raises(ValueError)
    def test_process_fields_with_non_boolean(self):
        data = {
            'experiment_id': 'experiment_1',
            'train_file': 'data/rsmtool_smTrain.csv',
            'test_file': 'data/rsmtool_smEval.csv',
            'description': 'Test',
            'model': 'empWt',
            'use_scaled_predictions': 'True',
            'feature_prefix': '1gram, 2gram',
            'subgroups': 'native language, GPA_range',
            'exclude_zero_scores': 'Yes'
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        newdata = self.parser.validate_config()
        # Add data to `ConfigurationParser` object
        self.parser._config = newdata
        newdata = self.parser.process_config()

    @raises(ValueError)
    def test_process_fields_with_integer(self):
        data = {
            'experiment_id': 'experiment_1',
            'train_file': 'data/rsmtool_smTrain.csv',
            'test_file': 'data/rsmtool_smEval.csv',
            'description': 'Test',
            'model': 'empWt',
            'use_scaled_predictions': 'True',
            'feature_prefix': '1gram, 2gram',
            'subgroups': 'native language, GPA_range',
            'exclude_zero_scores': 1
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        newdata = self.parser.validate_config()
        # Add data to `ConfigurationParser` object
        self.parser._config = newdata
        newdata = self.parser.process_config()

    @raises(ValueError)
    def test_invalid_skll_objective(self):
        data = {
            'experiment_id': 'experiment_1',
            'train_file': 'data/rsmtool_smTrain.csv',
            'test_file': 'data/rsmtool_smEval.csv',
            'description': 'Test',
            'model': 'LinearSVR',
            'skll_objective': 'squared_error'
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_wrong_skll_model_for_expected_scores(self):
        data = {
            'experiment_id': 'experiment_1',
            'train_file': 'data/rsmtool_smTrain.csv',
            'test_file': 'data/rsmtool_smEval.csv',
            'description': 'Test',
            'model': 'LinearSVR',
            'predict_expected_scores': 'true'
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    @raises(ValueError)
    def test_builtin_model_for_expected_scores(self):
        data = {
            'experiment_id': 'experiment_1',
            'train_file': 'data/rsmtool_smTrain.csv',
            'test_file': 'data/rsmtool_smEval.csv',
            'description': 'Test',
            'model': 'NNLR',
            'predict_expected_scores': 'true'
        }

        # Add data to `ConfigurationParser` object
        self.parser.load_config_from_dict(data)
        self.parser.validate_config()

    def test_get_correct_configparser_cfg(self):
        config_parser = ConfigurationParser.get_configparser('config.cfg')
        assert isinstance(config_parser, CFGConfigurationParser)

    def test_get_correct_configparser_json(self):
        config_parser = ConfigurationParser.get_configparser('config.json')
        assert isinstance(config_parser, JSONConfigurationParser)