Beispiel #1
0
    def test_run_nested_kfold_cross_validation_with_list_input(self):

        print "test nested k-fold cross validation with list input..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['none'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features,
            [[0, 3, 2], [8, 6, 5], [4, 1, 7]])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.26666666666666666, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.15272340058922063, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.22222222222222221, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.452887116343635, places=4)

        expected_top_model_param = {'norm_type':'none',
                                    'n_estimators':10,
                                    'max_depth':None,
                                    'random_state':0
                                    }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
Beispiel #2
0
    def test_run_nested_kfold_cross_validation_randomforest(self):

        print "test nested k-fold cross validation on random forest..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]}

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.40167715620274708, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.11009919053282299, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14085904245475275, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3681348274719265, places=4)

        expected_top_model_param = {'norm_type':'normalize',
                                'n_estimators':10,
                                'max_depth':None,
                                'random_state':0
                                }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
Beispiel #3
0
    def test_run_nested_kfold_cross_validation_libsvmnusvr(self):

        print "test nested k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmNusvrTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize', 'clip_0to1', 'clip_minus1to1'],
             'kernel':['rbf'],
             'nu': [0.5],
             'C': [1, 2],
             'gamma': [0.0]
             }

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.30962614123961751, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], -0.1535643705229309, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14085904245475275, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.5853397658781734, places=4)

        expected_top_model_param = {'norm_type':'clip_0to1',
                                'kernel':'rbf',
                                'nu':0.5,
                                'C':1,
                                'gamma':0.0,
                                }
        expected_top_ratio = 1.0

        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_nested_kfold_cross_validation_with_list_input(self):

        print "test nested k-fold cross validation with list input..."

        train_test_model_class = RandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        feature_df = feature_df[:200]
        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df,
            [range(0,50), range(130, 200), range(50, 130)]
        )

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92549459243170684)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.93070443071372855)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76385104263763215)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.43223946862572299)

        expected_top_model_param = {'norm_type':'normalize',
                                    'n_estimators':90,
                                    'max_depth':3,
                                    'random_state':0
                                    }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_nested_kfold_cross_validation_libsvmnusvr(self):

        print "test nested k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmnusvrTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize', 'clip_0to1', 'clip_minus1to1'],
             'kernel':['rbf'],
             'nu': [0.5, 1.0],
             'C': [1, 2],
             'gamma': [0.0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.93704238362264514)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.94734024567912978)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.77785381654919195)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.34039563991411448)

        expected_top_model_param = {'norm_type':'clip_0to1',
                                'kernel':'rbf',
                                'nu':1.0,
                                'C':1,
                                'gamma':0.0,
                                }
        expected_top_ratio = 0.5

        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_nested_kfold_cross_validation_randomforest(self):

        print "test nested k-fold cross validation on random forest..."

        train_test_model_class = RandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92805802153293737)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.94066838465382363)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76196220071567478)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.37660623901376861)

        expected_top_model_param = {'norm_type':'normalize',
                                'n_estimators':90,
                                'max_depth':None,
                                'random_state':0
                                }
        expected_top_ratio = 0.5
        # self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
Beispiel #7
0
    def test_run_nested_kfold_cross_validation_with_list_input(self):

        print "test nested k-fold cross validation with list input..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['none'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features,
            [[0, 3, 2], [8, 6, 5], [4, 1, 7]])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.26666666666666666, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.15272340058922063, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.22222222222222221, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.452887116343635, places=4)

        expected_top_model_param = {'norm_type':'none',
                                    'n_estimators':10,
                                    'max_depth':None,
                                    'random_state':0
                                    }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
Beispiel #8
0
    def test_run_nested_kfold_cross_validation_libsvmnusvr(self):

        print "test nested k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmNusvrTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize', 'clip_0to1', 'clip_minus1to1'],
             'kernel':['rbf'],
             'nu': [0.5],
             'C': [1, 2],
             'gamma': [0.0]
             }

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.30962614123961751, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], -0.1535643705229309, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14085904245475275, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.5853397658781734, places=4)

        expected_top_model_param = {'norm_type':'clip_0to1',
                                'kernel':'rbf',
                                'nu':0.5,
                                'C':1,
                                'gamma':0.0,
                                }
        expected_top_ratio = 1.0

        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
Beispiel #9
0
    def test_run_nested_kfold_cross_validation_randomforest(self):

        print "test nested k-fold cross validation on random forest..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]}

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.40167715620274708, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.11009919053282299, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14085904245475275, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3681348274719265, places=4)

        expected_top_model_param = {'norm_type':'normalize',
                                'n_estimators':10,
                                'max_depth':None,
                                'random_state':0
                                }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_nested_kfold_cross_validation_with_list_input(self):

        print "test nested k-fold cross validation with list input..."

        train_test_model_class = RandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        feature_df = feature_df[:200]
        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df,
            [range(0, 50), range(130, 200),
             range(50, 130)])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.92549459243170684,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.93070443071372855,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.76385104263763215,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.43223946862572299,
                                places=4)

        expected_top_model_param = {
            'norm_type': 'normalize',
            'n_estimators': 90,
            'max_depth': 3,
            'random_state': 0
        }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_nested_kfold_cross_validation_libsvmnusvr(self):

        print "test nested k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmnusvrTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize', 'clip_0to1', 'clip_minus1to1'],
             'kernel':['rbf'],
             'nu': [0.5, 1.0],
             'C': [1, 2],
             'gamma': [0.0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.93704238362264514,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.94734024567912978,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.77785381654919195,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.34039563991411448,
                                places=4)

        expected_top_model_param = {
            'norm_type': 'clip_0to1',
            'kernel': 'rbf',
            'nu': 1.0,
            'C': 1,
            'gamma': 0.0,
        }
        expected_top_ratio = 0.5

        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_nested_kfold_cross_validation_randomforest(self):

        print "test nested k-fold cross validation on random forest..."

        train_test_model_class = RandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.92805802153293737,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.94066838465382363,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.76196220071567478,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.37660623901376861,
                                places=4)

        expected_top_model_param = {
            'norm_type': 'normalize',
            'n_estimators': 90,
            'max_depth': None,
            'random_state': 0
        }
        expected_top_ratio = 0.5
        # self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)