Beispiel #1
0
    def test_sample_model_param_list(self):
        import random
        random.seed(0)

        model_param_search_range = {'norm_type':['normalize', 'clip_0to1'],
                                    'n_estimators':[10, 50], 'random_state': [0]}
        dicts = ModelCrossValidation._sample_model_param_list(
            model_param_search_range, 4)
        expected_dicts = [
         {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':10, 'random_state':0},
         {'norm_type':'normalize', 'n_estimators':50, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0},
        ]
        self.assertEquals(dicts, expected_dicts)

        model_param_search_range = {'norm_type':['normalize', 'clip_0to1'],
                                    'n_estimators':{'low':10, 'high':50, 'decimal':0},
                                    'random_state': [0]}
        dicts = ModelCrossValidation._sample_model_param_list(
            model_param_search_range, 4)
        expected_dicts = [
         {'norm_type':'clip_0to1', 'n_estimators':21, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':20, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':42, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':39, 'random_state':0},
        ]
        self.assertEquals(dicts, expected_dicts)
    def test_sample_model_param_list(self):
        import random
        random.seed(0)

        model_param_search_range = {'norm_type':['normalize', 'clip_0to1'],
                                    'n_estimators':[10, 50], 'random_state': [0]}
        dicts = ModelCrossValidation._sample_model_param_list(
            model_param_search_range, 4)
        expected_dicts = [
         {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':10, 'random_state':0},
         {'norm_type':'normalize', 'n_estimators':50, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0},
        ]
        self.assertEquals(dicts, expected_dicts)

        model_param_search_range = {'norm_type':['normalize', 'clip_0to1'],
                                    'n_estimators':{'low':10, 'high':50, 'decimal':0},
                                    'random_state': [0]}
        dicts = ModelCrossValidation._sample_model_param_list(
            model_param_search_range, 4)
        expected_dicts = [
         {'norm_type':'clip_0to1', 'n_estimators':21, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':20, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':42, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':39, 'random_state':0},
        ]
        self.assertEquals(dicts, expected_dicts)
Beispiel #3
0
    def test_run_nested_kfold_cross_validation_with_list_input(self):

        print "test nested k-fold cross validation with list input..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['none'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features,
            [[0, 3, 2], [8, 6, 5], [4, 1, 7]])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.26666666666666666, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.15272340058922063, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.22222222222222221, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.452887116343635, places=4)

        expected_top_model_param = {'norm_type':'none',
                                    'n_estimators':10,
                                    'max_depth':None,
                                    'random_state':0
                                    }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_cross_validation(self):

        print "test cross validation..."

        train_test_model_class = RandomForestTrainTestModel
        model_param = {'norm_type': 'normalize', 'random_state': 0}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        indices_train = range(250)
        indices_test = range(250, 300)

        output = ModelCrossValidation.run_cross_validation(
            train_test_model_class, model_param, feature_df, indices_train,
            indices_test)
        self.assertAlmostEquals(output['stats']['SRCC'],
                                0.93493301443051136,
                                places=4)
        self.assertAlmostEquals(output['stats']['PCC'],
                                0.9413390374529329,
                                places=4)
        self.assertAlmostEquals(output['stats']['KENDALL'],
                                0.78029280419726044,
                                places=4)
        self.assertAlmostEquals(output['stats']['RMSE'],
                                0.32357946626958406,
                                places=4)
        self.assertEquals(output['model'].TYPE, "RANDOMFOREST")
Beispiel #5
0
    def test_run_nested_kfold_cross_validation_randomforest(self):

        print "test nested k-fold cross validation on random forest..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]}

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.40167715620274708, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.11009919053282299, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14085904245475275, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3681348274719265, places=4)

        expected_top_model_param = {'norm_type':'normalize',
                                'n_estimators':10,
                                'max_depth':None,
                                'random_state':0
                                }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_kfold_cross_validation_with_list_input(self):

        print "test k-fold cross validation with list input..."

        train_test_model_class = RandomForestTrainTestModel
        model_param = {'norm_type': 'normalize', 'random_state': 0}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        feature_df = feature_df[:200]
        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df,
            [range(0, 50), range(130, 200),
             range(50, 130)])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.90636761259756715)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.90819953685397914)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.72937284548325965)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.49899297305829415)
Beispiel #7
0
    def test_run_nested_kfold_cross_validation_libsvmnusvr(self):

        print "test nested k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmNusvrTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize', 'clip_0to1', 'clip_minus1to1'],
             'kernel':['rbf'],
             'nu': [0.5],
             'C': [1, 2],
             'gamma': [0.0]
             }

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.30962614123961751, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], -0.1535643705229309, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14085904245475275, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.5853397658781734, places=4)

        expected_top_model_param = {'norm_type':'clip_0to1',
                                'kernel':'rbf',
                                'nu':0.5,
                                'C':1,
                                'gamma':0.0,
                                }
        expected_top_ratio = 1.0

        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
Beispiel #8
0
    def test_run_nested_kfold_cross_validation_randomforest(self):

        print "test nested k-fold cross validation on random forest..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]}

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.40167715620274708, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.11009919053282299, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14085904245475275, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3681348274719265, places=4)

        expected_top_model_param = {'norm_type':'normalize',
                                'n_estimators':10,
                                'max_depth':None,
                                'random_state':0
                                }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_nested_kfold_cross_validation_libsvmnusvr(self):

        print "test nested k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmnusvrTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize', 'clip_0to1', 'clip_minus1to1'],
             'kernel':['rbf'],
             'nu': [0.5, 1.0],
             'C': [1, 2],
             'gamma': [0.0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.93704238362264514)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.94734024567912978)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.77785381654919195)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.34039563991411448)

        expected_top_model_param = {'norm_type':'clip_0to1',
                                'kernel':'rbf',
                                'nu':1.0,
                                'C':1,
                                'gamma':0.0,
                                }
        expected_top_ratio = 0.5

        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_kfold_cross_validation_randomforest(self):

        print "test k-fold cross validation on random forest..."

        train_test_model_class = RandomForestTrainTestModel
        model_param = {'norm_type': 'normalize', 'random_state': 0}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.92695443548602008,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.93189074441713937,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.76031309571294092,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.40381451586590256,
                                places=4)
Beispiel #11
0
    def test_run_nested_kfold_cross_validation_with_list_input(self):

        print "test nested k-fold cross validation with list input..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['none'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features,
            [[0, 3, 2], [8, 6, 5], [4, 1, 7]])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.26666666666666666, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.15272340058922063, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.22222222222222221, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.452887116343635, places=4)

        expected_top_model_param = {'norm_type':'none',
                                    'n_estimators':10,
                                    'max_depth':None,
                                    'random_state':0
                                    }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_nested_kfold_cross_validation_randomforest(self):

        print "test nested k-fold cross validation on random forest..."

        train_test_model_class = RandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92805802153293737)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.94066838465382363)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76196220071567478)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.37660623901376861)

        expected_top_model_param = {'norm_type':'normalize',
                                'n_estimators':90,
                                'max_depth':None,
                                'random_state':0
                                }
        expected_top_ratio = 0.5
        # self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
Beispiel #13
0
    def test_unroll_dict_of_lists(self):
        model_param_search_range = {
            'norm_type': ['normalize', 'clip_0to1'],
            'n_estimators': [10, 50],
            'random_state': [0]
        }

        dicts = ModelCrossValidation._unroll_dict_of_lists(
            model_param_search_range)

        expected_dicts = [
            {
                'norm_type': 'normalize',
                'n_estimators': 10,
                'random_state': 0
            },
            {
                'norm_type': 'clip_0to1',
                'n_estimators': 10,
                'random_state': 0
            },
            {
                'norm_type': 'normalize',
                'n_estimators': 50,
                'random_state': 0
            },
            {
                'norm_type': 'clip_0to1',
                'n_estimators': 50,
                'random_state': 0
            },
        ]

        self.assertEquals(dicts, expected_dicts)
    def test_run_kfold_cross_validation_libsvmnusvr(self):

        print "test k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmnusvrTrainTestModel
        model_param = {'norm_type': 'normalize'}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.92387451180595015,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.92481147926825724,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.75416215405673581,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.42231775639097513,
                                places=4)
Beispiel #15
0
    def test_run_cross_validation(self):

        print "test cross validation..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param = {'norm_type': 'normalize', 'random_state': 0}

        indices_train = range(9)
        indices_test = range(9)

        output = ModelCrossValidation.run_cross_validation(
            train_test_model_class, model_param, self.features, indices_train,
            indices_test)
        self.assertAlmostEquals(output['stats']['SRCC'],
                                0.93333333333333324,
                                places=4)
        self.assertAlmostEquals(output['stats']['PCC'],
                                0.97754442316039469,
                                places=4)
        self.assertAlmostEquals(output['stats']['KENDALL'],
                                0.83333333333333337,
                                places=4)
        self.assertAlmostEquals(output['stats']['RMSE'],
                                0.17634739353518517,
                                places=4)
        self.assertEquals(output['model'].TYPE, "RANDOMFOREST")
Beispiel #16
0
    def test_run_nested_kfold_cross_validation_libsvmnusvr(self):

        print "test nested k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmNusvrTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize', 'clip_0to1', 'clip_minus1to1'],
             'kernel':['rbf'],
             'nu': [0.5],
             'C': [1, 2],
             'gamma': [0.0]
             }

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.30962614123961751, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], -0.1535643705229309, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14085904245475275, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.5853397658781734, places=4)

        expected_top_model_param = {'norm_type':'clip_0to1',
                                'kernel':'rbf',
                                'nu':0.5,
                                'C':1,
                                'gamma':0.0,
                                }
        expected_top_ratio = 1.0

        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_nested_kfold_cross_validation_with_list_input(self):

        print "test nested k-fold cross validation with list input..."

        train_test_model_class = RandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        feature_df = feature_df[:200]
        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df,
            [range(0,50), range(130, 200), range(50, 130)]
        )

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92549459243170684)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.93070443071372855)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76385104263763215)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.43223946862572299)

        expected_top_model_param = {'norm_type':'normalize',
                                    'n_estimators':90,
                                    'max_depth':3,
                                    'random_state':0
                                    }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
Beispiel #18
0
def cv_on_dataset(dataset,
                  feature_param,
                  model_param,
                  ax,
                  result_store,
                  contentid_groups,
                  logger=None,
                  aggregate_method=np.mean):

    assets = read_dataset(dataset)
    kfold = construct_kfold_list(assets, contentid_groups)

    fassembler = FeatureAssembler(
        feature_dict=feature_param.feature_dict,
        feature_option_dict=None,
        assets=assets,
        logger=logger,
        delete_workdir=True,
        result_store=result_store,
        optional_dict=None,
        optional_dict2=None,
        parallelize=True,
        fifo_mode=True,
        # parallelize=False, fifo_mode=False, # VQM
    )
    fassembler.run()
    results = fassembler.results

    for result in results:
        result.set_score_aggregate_method(aggregate_method)

    model_class = TrainTestModel.find_subclass(model_param.model_type)
    # run nested kfold cv for each combintation
    cv_output = ModelCrossValidation.run_kfold_cross_validation(
        model_class,
        model_param.model_param_dict,
        results,
        kfold,
        logger=logger,
    )

    print 'Feature parameters: {}'.format(feature_param.feature_dict)
    print 'Model type: {}'.format(model_param.model_type)
    print 'Model parameters: {}'.format(model_param.model_param_dict)
    print 'Stats: {}'.format(model_class.format_stats(cv_output['aggr_stats']))

    if ax is not None:
        model_class.plot_scatter(ax, cv_output['aggr_stats'],
                                 cv_output['contentids'])
        ax.set_xlabel('True Score')
        ax.set_ylabel("Predicted Score")
        ax.grid()
        ax.set_title("Dataset: {dataset}, Model: {model},\n{stats}".format(
            dataset=dataset.dataset_name,
            model=model_param.model_type,
            stats=model_class.format_stats(cv_output['aggr_stats'])))

    return assets, cv_output
Beispiel #19
0
def cv_on_dataset(
    dataset, feature_param, model_param, ax, result_store, contentid_groups, logger=None, aggregate_method=np.mean
):

    assets = read_dataset(dataset)
    kfold = construct_kfold_list(assets, contentid_groups)

    fassembler = FeatureAssembler(
        feature_dict=feature_param.feature_dict,
        feature_option_dict=None,
        assets=assets,
        logger=logger,
        delete_workdir=True,
        result_store=result_store,
        optional_dict=None,
        optional_dict2=None,
        parallelize=True,
        fifo_mode=True,
        # parallelize=False, fifo_mode=False, # VQM
    )
    fassembler.run()
    results = fassembler.results

    for result in results:
        result.set_score_aggregate_method(aggregate_method)

    model_class = TrainTestModel.find_subclass(model_param.model_type)
    # run nested kfold cv for each combintation
    cv_output = ModelCrossValidation.run_kfold_cross_validation(
        model_class, model_param.model_param_dict, results, kfold, logger=logger
    )

    print "Feature parameters: {}".format(feature_param.feature_dict)
    print "Model type: {}".format(model_param.model_type)
    print "Model parameters: {}".format(model_param.model_param_dict)
    print "Stats: {}".format(model_class.format_stats(cv_output["aggr_stats"]))

    if ax is not None:
        model_class.plot_scatter(ax, cv_output["aggr_stats"], cv_output["contentids"])
        ax.set_xlabel("True Score")
        ax.set_ylabel("Predicted Score")
        ax.grid()
        ax.set_title(
            "Dataset: {dataset}, Model: {model},\n{stats}".format(
                dataset=dataset.dataset_name,
                model=model_param.model_type,
                stats=model_class.format_stats(cv_output["aggr_stats"]),
            )
        )

    return assets, cv_output
Beispiel #20
0
    def test_run_kfold_cross_validation_libsvmnusvr(self):

        print "test k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmNusvrTrainTestModel
        model_param = {'norm_type': 'normalize'}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.31666666666666665, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33103132578536021, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.27777777777777779, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2855099934718619, places=4)
Beispiel #21
0
    def test_run_kfold_cross_validation_randomforest(self):

        print "test k-fold cross validation on random forest..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.28452131897694583, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.1689046198483892, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.084515425472851652, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.344683833136588, places=4)
    def test_unroll_dict_of_lists(self):
        model_param_search_range = {'norm_type':['normalize', 'clip_0to1'],
                                    'n_estimators':[10, 50], 'random_state': [0]}

        dicts = ModelCrossValidation._unroll_dict_of_lists(model_param_search_range)

        expected_dicts = [
         {'norm_type':'normalize', 'n_estimators':10, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':10, 'random_state':0},
         {'norm_type':'normalize', 'n_estimators':50, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0},
        ]

        self.assertEquals(dicts, expected_dicts)
Beispiel #23
0
    def test_run_kfold_cross_validation_extratrees(self):

        print "test k-fold cross validation on extra trees..."

        train_test_model_class = SklearnExtraTreesTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.17320508075688773, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33023719320146966, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14907119849998599, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3279056191361394, places=4)
Beispiel #24
0
    def test_run_kfold_cross_validation_extratrees(self):

        print "test k-fold cross validation on extra trees..."

        train_test_model_class = SklearnExtraTreesTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.17320508075688773, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33023719320146966, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14907119849998599, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3279056191361394, places=4)
Beispiel #25
0
    def test_run_kfold_cross_validation_libsvmnusvr(self):

        print "test k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmNusvrTrainTestModel
        model_param = {'norm_type': 'normalize'}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.31666666666666665, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33103132578536021, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.27777777777777779, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2855099934718619, places=4)
Beispiel #26
0
    def test_run_kfold_cross_validation_randomforest(self):

        print "test k-fold cross validation on random forest..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features, 3)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.28452131897694583, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.1689046198483892, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.084515425472851652, places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.344683833136588, places=4)
Beispiel #27
0
    def test_run_kfold_cross_validation_with_list_input(self):

        print "test k-fold cross validation with list input..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features,
            [[0, 3, 8], [2, 1, 5], [4, 6, 7]])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.18333333333333335, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.35513638509959689, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.1111111111111111, places=3)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2740400878438387, places=3)
Beispiel #28
0
    def test_run_kfold_cross_validation_with_list_input(self):

        print "test k-fold cross validation with list input..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, self.features,
            [[0, 3, 8], [2, 1, 5], [4, 6, 7]])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.18333333333333335, places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.35513638509959689, places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.1111111111111111, places=3)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2740400878438387, places=3)
    def test_find_most_frequent_dict(self):
        dicts = [
         {'norm_type':'normalize', 'n_estimators':10, 'random_state':0},
         {'norm_type':'normalize', 'n_estimators':50, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':10, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0},
        ]

        dict, count = ModelCrossValidation._find_most_frequent_dict(dicts)
        expected_dict =  {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0}
        expected_count = 2

        self.assertEquals(dict, expected_dict)
        self.assertEquals(count, expected_count)
Beispiel #30
0
    def test_find_most_frequent_dict(self):
        dicts = [
         {'norm_type':'normalize', 'n_estimators':10, 'random_state':0},
         {'norm_type':'normalize', 'n_estimators':50, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':10, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0},
         {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0},
        ]

        dict, count = ModelCrossValidation._find_most_frequent_dict(dicts)
        expected_dict =  {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0}
        expected_count = 2

        self.assertEquals(dict, expected_dict)
        self.assertEquals(count, expected_count)
    def test_run_nested_kfold_cross_validation_libsvmnusvr(self):

        print "test nested k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmnusvrTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize', 'clip_0to1', 'clip_minus1to1'],
             'kernel':['rbf'],
             'nu': [0.5, 1.0],
             'C': [1, 2],
             'gamma': [0.0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.93704238362264514,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.94734024567912978,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.77785381654919195,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.34039563991411448,
                                places=4)

        expected_top_model_param = {
            'norm_type': 'clip_0to1',
            'kernel': 'rbf',
            'nu': 1.0,
            'C': 1,
            'gamma': 0.0,
        }
        expected_top_ratio = 0.5

        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_nested_kfold_cross_validation_with_list_input(self):

        print "test nested k-fold cross validation with list input..."

        train_test_model_class = RandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        feature_df = feature_df[:200]
        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df,
            [range(0, 50), range(130, 200),
             range(50, 130)])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.92549459243170684,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.93070443071372855,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.76385104263763215,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.43223946862572299,
                                places=4)

        expected_top_model_param = {
            'norm_type': 'normalize',
            'n_estimators': 90,
            'max_depth': 3,
            'random_state': 0
        }
        expected_top_ratio = 0.6666666666666666
        self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_kfold_cross_validation_randomforest(self):

        print "test k-fold cross validation on random forest..."

        train_test_model_class = RandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92695443548602008)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.93189074441713937)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76031309571294092)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.40381451586590256)
    def test_run_kfold_cross_validation_libsvmnusvr(self):

        print "test k-fold cross validation on libsvmnusvr..."

        train_test_model_class = LibsvmnusvrTrainTestModel
        model_param = {'norm_type': 'normalize'}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92387451180595015)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.92481147926825724)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.75416215405673581)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.42231775639097513)
Beispiel #35
0
    def test_run_cross_validation(self):

        print "test cross validation..."

        train_test_model_class = SklearnRandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        indices_train = range(9)
        indices_test = range(9)

        output = ModelCrossValidation.run_cross_validation(
            train_test_model_class, model_param, self.features,
            indices_train, indices_test)
        self.assertAlmostEquals(output['stats']['SRCC'], 0.93333333333333324, places=4)
        self.assertAlmostEquals(output['stats']['PCC'], 0.97754442316039469, places=4)
        self.assertAlmostEquals(output['stats']['KENDALL'], 0.83333333333333337, places=4)
        self.assertAlmostEquals(output['stats']['RMSE'], 0.17634739353518517, places=4)
        self.assertEquals(output['model'].TYPE, "RANDOMFOREST")
    def test_run_nested_kfold_cross_validation_randomforest(self):

        print "test nested k-fold cross validation on random forest..."

        train_test_model_class = RandomForestTrainTestModel
        model_param_search_range = \
            {'norm_type':['normalize'],
             'n_estimators':[10, 90],
             'max_depth':[None, 3],
             'random_state': [0]
             }

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(
            eval(open(feature_df_file, "r").read()))

        output = ModelCrossValidation.run_nested_kfold_cross_validation(
            train_test_model_class, model_param_search_range, feature_df, 6)

        self.assertAlmostEquals(output['aggr_stats']['SRCC'],
                                0.92805802153293737,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['PCC'],
                                0.94066838465382363,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'],
                                0.76196220071567478,
                                places=4)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'],
                                0.37660623901376861,
                                places=4)

        expected_top_model_param = {
            'norm_type': 'normalize',
            'n_estimators': 90,
            'max_depth': None,
            'random_state': 0
        }
        expected_top_ratio = 0.5
        # self.assertEquals(output['top_model_param'], expected_top_model_param)
        self.assertEquals(output['top_ratio'], expected_top_ratio)
    def test_run_kfold_cross_validation_with_list_input(self):

        print "test k-fold cross validation with list input..."

        train_test_model_class = RandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        feature_df = feature_df[:200]
        output = ModelCrossValidation.run_kfold_cross_validation(
            train_test_model_class, model_param, feature_df,
            [range(0,50), range(130, 200), range(50, 130)])

        self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.90636761259756715)
        self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.90819953685397914)
        self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.72937284548325965)
        self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.49899297305829415)
    def test_run_cross_validation(self):

        print "test cross validation..."

        train_test_model_class = RandomForestTrainTestModel
        model_param = {'norm_type':'normalize', 'random_state': 0}

        feature_df_file = config.ROOT + \
            "/python/test/resource/sample_feature_extraction_results.json"
        feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read()))

        indices_train = range(250)
        indices_test = range(250, 300)

        output = ModelCrossValidation.run_cross_validation(
            train_test_model_class, model_param, feature_df,
            indices_train, indices_test)
        self.assertAlmostEquals(output['stats']['SRCC'], 0.93493301443051136)
        self.assertAlmostEquals(output['stats']['PCC'], 0.9413390374529329)
        self.assertAlmostEquals(output['stats']['KENDALL'], 0.78029280419726044)
        self.assertAlmostEquals(output['stats']['RMSE'], 0.32357946626958406)
        self.assertEquals(output['model'].TYPE, "RANDOMFOREST")