def test_sample_model_param_list(self): import random random.seed(0) model_param_search_range = {'norm_type':['normalize', 'clip_0to1'], 'n_estimators':[10, 50], 'random_state': [0]} dicts = ModelCrossValidation._sample_model_param_list( model_param_search_range, 4) expected_dicts = [ {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0}, {'norm_type':'clip_0to1', 'n_estimators':10, 'random_state':0}, {'norm_type':'normalize', 'n_estimators':50, 'random_state':0}, {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0}, ] self.assertEquals(dicts, expected_dicts) model_param_search_range = {'norm_type':['normalize', 'clip_0to1'], 'n_estimators':{'low':10, 'high':50, 'decimal':0}, 'random_state': [0]} dicts = ModelCrossValidation._sample_model_param_list( model_param_search_range, 4) expected_dicts = [ {'norm_type':'clip_0to1', 'n_estimators':21, 'random_state':0}, {'norm_type':'clip_0to1', 'n_estimators':20, 'random_state':0}, {'norm_type':'clip_0to1', 'n_estimators':42, 'random_state':0}, {'norm_type':'clip_0to1', 'n_estimators':39, 'random_state':0}, ] self.assertEquals(dicts, expected_dicts)
def test_run_nested_kfold_cross_validation_with_list_input(self): print "test nested k-fold cross validation with list input..." train_test_model_class = SklearnRandomForestTrainTestModel model_param_search_range = \ {'norm_type':['none'], 'n_estimators':[10, 90], 'max_depth':[None, 3], 'random_state': [0] } output = ModelCrossValidation.run_nested_kfold_cross_validation( train_test_model_class, model_param_search_range, self.features, [[0, 3, 2], [8, 6, 5], [4, 1, 7]]) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.26666666666666666, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.15272340058922063, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.22222222222222221, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.452887116343635, places=4) expected_top_model_param = {'norm_type':'none', 'n_estimators':10, 'max_depth':None, 'random_state':0 } expected_top_ratio = 0.6666666666666666 self.assertEquals(output['top_model_param'], expected_top_model_param) self.assertEquals(output['top_ratio'], expected_top_ratio)
def test_run_cross_validation(self): print "test cross validation..." train_test_model_class = RandomForestTrainTestModel model_param = {'norm_type': 'normalize', 'random_state': 0} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict( eval(open(feature_df_file, "r").read())) indices_train = range(250) indices_test = range(250, 300) output = ModelCrossValidation.run_cross_validation( train_test_model_class, model_param, feature_df, indices_train, indices_test) self.assertAlmostEquals(output['stats']['SRCC'], 0.93493301443051136, places=4) self.assertAlmostEquals(output['stats']['PCC'], 0.9413390374529329, places=4) self.assertAlmostEquals(output['stats']['KENDALL'], 0.78029280419726044, places=4) self.assertAlmostEquals(output['stats']['RMSE'], 0.32357946626958406, places=4) self.assertEquals(output['model'].TYPE, "RANDOMFOREST")
def test_run_nested_kfold_cross_validation_randomforest(self): print "test nested k-fold cross validation on random forest..." train_test_model_class = SklearnRandomForestTrainTestModel model_param_search_range = \ {'norm_type':['normalize'], 'n_estimators':[10, 90], 'max_depth':[None, 3], 'random_state': [0]} output = ModelCrossValidation.run_nested_kfold_cross_validation( train_test_model_class, model_param_search_range, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.40167715620274708, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.11009919053282299, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14085904245475275, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3681348274719265, places=4) expected_top_model_param = {'norm_type':'normalize', 'n_estimators':10, 'max_depth':None, 'random_state':0 } expected_top_ratio = 0.6666666666666666 self.assertEquals(output['top_model_param'], expected_top_model_param) self.assertEquals(output['top_ratio'], expected_top_ratio)
def test_run_kfold_cross_validation_with_list_input(self): print "test k-fold cross validation with list input..." train_test_model_class = RandomForestTrainTestModel model_param = {'norm_type': 'normalize', 'random_state': 0} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict( eval(open(feature_df_file, "r").read())) feature_df = feature_df[:200] output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, [range(0, 50), range(130, 200), range(50, 130)]) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.90636761259756715) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.90819953685397914) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.72937284548325965) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.49899297305829415)
def test_run_nested_kfold_cross_validation_libsvmnusvr(self): print "test nested k-fold cross validation on libsvmnusvr..." train_test_model_class = LibsvmNusvrTrainTestModel model_param_search_range = \ {'norm_type':['normalize', 'clip_0to1', 'clip_minus1to1'], 'kernel':['rbf'], 'nu': [0.5], 'C': [1, 2], 'gamma': [0.0] } output = ModelCrossValidation.run_nested_kfold_cross_validation( train_test_model_class, model_param_search_range, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.30962614123961751, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], -0.1535643705229309, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14085904245475275, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.5853397658781734, places=4) expected_top_model_param = {'norm_type':'clip_0to1', 'kernel':'rbf', 'nu':0.5, 'C':1, 'gamma':0.0, } expected_top_ratio = 1.0 self.assertEquals(output['top_model_param'], expected_top_model_param) self.assertEquals(output['top_ratio'], expected_top_ratio)
def test_run_nested_kfold_cross_validation_libsvmnusvr(self): print "test nested k-fold cross validation on libsvmnusvr..." train_test_model_class = LibsvmnusvrTrainTestModel model_param_search_range = \ {'norm_type':['normalize', 'clip_0to1', 'clip_minus1to1'], 'kernel':['rbf'], 'nu': [0.5, 1.0], 'C': [1, 2], 'gamma': [0.0] } feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_nested_kfold_cross_validation( train_test_model_class, model_param_search_range, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.93704238362264514) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.94734024567912978) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.77785381654919195) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.34039563991411448) expected_top_model_param = {'norm_type':'clip_0to1', 'kernel':'rbf', 'nu':1.0, 'C':1, 'gamma':0.0, } expected_top_ratio = 0.5 self.assertEquals(output['top_model_param'], expected_top_model_param) self.assertEquals(output['top_ratio'], expected_top_ratio)
def test_run_kfold_cross_validation_randomforest(self): print "test k-fold cross validation on random forest..." train_test_model_class = RandomForestTrainTestModel model_param = {'norm_type': 'normalize', 'random_state': 0} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict( eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92695443548602008, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.93189074441713937, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76031309571294092, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.40381451586590256, places=4)
def test_run_nested_kfold_cross_validation_randomforest(self): print "test nested k-fold cross validation on random forest..." train_test_model_class = RandomForestTrainTestModel model_param_search_range = \ {'norm_type':['normalize'], 'n_estimators':[10, 90], 'max_depth':[None, 3], 'random_state': [0] } feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_nested_kfold_cross_validation( train_test_model_class, model_param_search_range, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92805802153293737) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.94066838465382363) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76196220071567478) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.37660623901376861) expected_top_model_param = {'norm_type':'normalize', 'n_estimators':90, 'max_depth':None, 'random_state':0 } expected_top_ratio = 0.5 # self.assertEquals(output['top_model_param'], expected_top_model_param) self.assertEquals(output['top_ratio'], expected_top_ratio)
def test_unroll_dict_of_lists(self): model_param_search_range = { 'norm_type': ['normalize', 'clip_0to1'], 'n_estimators': [10, 50], 'random_state': [0] } dicts = ModelCrossValidation._unroll_dict_of_lists( model_param_search_range) expected_dicts = [ { 'norm_type': 'normalize', 'n_estimators': 10, 'random_state': 0 }, { 'norm_type': 'clip_0to1', 'n_estimators': 10, 'random_state': 0 }, { 'norm_type': 'normalize', 'n_estimators': 50, 'random_state': 0 }, { 'norm_type': 'clip_0to1', 'n_estimators': 50, 'random_state': 0 }, ] self.assertEquals(dicts, expected_dicts)
def test_run_kfold_cross_validation_libsvmnusvr(self): print "test k-fold cross validation on libsvmnusvr..." train_test_model_class = LibsvmnusvrTrainTestModel model_param = {'norm_type': 'normalize'} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict( eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92387451180595015, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.92481147926825724, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.75416215405673581, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.42231775639097513, places=4)
def test_run_cross_validation(self): print "test cross validation..." train_test_model_class = SklearnRandomForestTrainTestModel model_param = {'norm_type': 'normalize', 'random_state': 0} indices_train = range(9) indices_test = range(9) output = ModelCrossValidation.run_cross_validation( train_test_model_class, model_param, self.features, indices_train, indices_test) self.assertAlmostEquals(output['stats']['SRCC'], 0.93333333333333324, places=4) self.assertAlmostEquals(output['stats']['PCC'], 0.97754442316039469, places=4) self.assertAlmostEquals(output['stats']['KENDALL'], 0.83333333333333337, places=4) self.assertAlmostEquals(output['stats']['RMSE'], 0.17634739353518517, places=4) self.assertEquals(output['model'].TYPE, "RANDOMFOREST")
def test_run_nested_kfold_cross_validation_with_list_input(self): print "test nested k-fold cross validation with list input..." train_test_model_class = RandomForestTrainTestModel model_param_search_range = \ {'norm_type':['normalize'], 'n_estimators':[10, 90], 'max_depth':[None, 3], 'random_state': [0] } feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read())) feature_df = feature_df[:200] output = ModelCrossValidation.run_nested_kfold_cross_validation( train_test_model_class, model_param_search_range, feature_df, [range(0,50), range(130, 200), range(50, 130)] ) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92549459243170684) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.93070443071372855) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76385104263763215) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.43223946862572299) expected_top_model_param = {'norm_type':'normalize', 'n_estimators':90, 'max_depth':3, 'random_state':0 } expected_top_ratio = 0.6666666666666666 self.assertEquals(output['top_model_param'], expected_top_model_param) self.assertEquals(output['top_ratio'], expected_top_ratio)
def cv_on_dataset(dataset, feature_param, model_param, ax, result_store, contentid_groups, logger=None, aggregate_method=np.mean): assets = read_dataset(dataset) kfold = construct_kfold_list(assets, contentid_groups) fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=assets, logger=logger, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, fifo_mode=True, # parallelize=False, fifo_mode=False, # VQM ) fassembler.run() results = fassembler.results for result in results: result.set_score_aggregate_method(aggregate_method) model_class = TrainTestModel.find_subclass(model_param.model_type) # run nested kfold cv for each combintation cv_output = ModelCrossValidation.run_kfold_cross_validation( model_class, model_param.model_param_dict, results, kfold, logger=logger, ) print 'Feature parameters: {}'.format(feature_param.feature_dict) print 'Model type: {}'.format(model_param.model_type) print 'Model parameters: {}'.format(model_param.model_param_dict) print 'Stats: {}'.format(model_class.format_stats(cv_output['aggr_stats'])) if ax is not None: model_class.plot_scatter(ax, cv_output['aggr_stats'], cv_output['contentids']) ax.set_xlabel('True Score') ax.set_ylabel("Predicted Score") ax.grid() ax.set_title("Dataset: {dataset}, Model: {model},\n{stats}".format( dataset=dataset.dataset_name, model=model_param.model_type, stats=model_class.format_stats(cv_output['aggr_stats']))) return assets, cv_output
def cv_on_dataset( dataset, feature_param, model_param, ax, result_store, contentid_groups, logger=None, aggregate_method=np.mean ): assets = read_dataset(dataset) kfold = construct_kfold_list(assets, contentid_groups) fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=assets, logger=logger, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, fifo_mode=True, # parallelize=False, fifo_mode=False, # VQM ) fassembler.run() results = fassembler.results for result in results: result.set_score_aggregate_method(aggregate_method) model_class = TrainTestModel.find_subclass(model_param.model_type) # run nested kfold cv for each combintation cv_output = ModelCrossValidation.run_kfold_cross_validation( model_class, model_param.model_param_dict, results, kfold, logger=logger ) print "Feature parameters: {}".format(feature_param.feature_dict) print "Model type: {}".format(model_param.model_type) print "Model parameters: {}".format(model_param.model_param_dict) print "Stats: {}".format(model_class.format_stats(cv_output["aggr_stats"])) if ax is not None: model_class.plot_scatter(ax, cv_output["aggr_stats"], cv_output["contentids"]) ax.set_xlabel("True Score") ax.set_ylabel("Predicted Score") ax.grid() ax.set_title( "Dataset: {dataset}, Model: {model},\n{stats}".format( dataset=dataset.dataset_name, model=model_param.model_type, stats=model_class.format_stats(cv_output["aggr_stats"]), ) ) return assets, cv_output
def test_run_kfold_cross_validation_libsvmnusvr(self): print "test k-fold cross validation on libsvmnusvr..." train_test_model_class = LibsvmNusvrTrainTestModel model_param = {'norm_type': 'normalize'} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.31666666666666665, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33103132578536021, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.27777777777777779, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2855099934718619, places=4)
def test_run_kfold_cross_validation_randomforest(self): print "test k-fold cross validation on random forest..." train_test_model_class = SklearnRandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.28452131897694583, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.1689046198483892, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.084515425472851652, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.344683833136588, places=4)
def test_unroll_dict_of_lists(self): model_param_search_range = {'norm_type':['normalize', 'clip_0to1'], 'n_estimators':[10, 50], 'random_state': [0]} dicts = ModelCrossValidation._unroll_dict_of_lists(model_param_search_range) expected_dicts = [ {'norm_type':'normalize', 'n_estimators':10, 'random_state':0}, {'norm_type':'clip_0to1', 'n_estimators':10, 'random_state':0}, {'norm_type':'normalize', 'n_estimators':50, 'random_state':0}, {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0}, ] self.assertEquals(dicts, expected_dicts)
def test_run_kfold_cross_validation_extratrees(self): print "test k-fold cross validation on extra trees..." train_test_model_class = SklearnExtraTreesTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, 3) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.17320508075688773, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.33023719320146966, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.14907119849998599, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.3279056191361394, places=4)
def test_run_kfold_cross_validation_with_list_input(self): print "test k-fold cross validation with list input..." train_test_model_class = SklearnRandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, self.features, [[0, 3, 8], [2, 1, 5], [4, 6, 7]]) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.18333333333333335, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.35513638509959689, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.1111111111111111, places=3) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 1.2740400878438387, places=3)
def test_find_most_frequent_dict(self): dicts = [ {'norm_type':'normalize', 'n_estimators':10, 'random_state':0}, {'norm_type':'normalize', 'n_estimators':50, 'random_state':0}, {'norm_type':'clip_0to1', 'n_estimators':10, 'random_state':0}, {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0}, {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0}, ] dict, count = ModelCrossValidation._find_most_frequent_dict(dicts) expected_dict = {'norm_type':'clip_0to1', 'n_estimators':50, 'random_state':0} expected_count = 2 self.assertEquals(dict, expected_dict) self.assertEquals(count, expected_count)
def test_run_nested_kfold_cross_validation_libsvmnusvr(self): print "test nested k-fold cross validation on libsvmnusvr..." train_test_model_class = LibsvmnusvrTrainTestModel model_param_search_range = \ {'norm_type':['normalize', 'clip_0to1', 'clip_minus1to1'], 'kernel':['rbf'], 'nu': [0.5, 1.0], 'C': [1, 2], 'gamma': [0.0] } feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict( eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_nested_kfold_cross_validation( train_test_model_class, model_param_search_range, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.93704238362264514, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.94734024567912978, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.77785381654919195, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.34039563991411448, places=4) expected_top_model_param = { 'norm_type': 'clip_0to1', 'kernel': 'rbf', 'nu': 1.0, 'C': 1, 'gamma': 0.0, } expected_top_ratio = 0.5 self.assertEquals(output['top_model_param'], expected_top_model_param) self.assertEquals(output['top_ratio'], expected_top_ratio)
def test_run_nested_kfold_cross_validation_with_list_input(self): print "test nested k-fold cross validation with list input..." train_test_model_class = RandomForestTrainTestModel model_param_search_range = \ {'norm_type':['normalize'], 'n_estimators':[10, 90], 'max_depth':[None, 3], 'random_state': [0] } feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict( eval(open(feature_df_file, "r").read())) feature_df = feature_df[:200] output = ModelCrossValidation.run_nested_kfold_cross_validation( train_test_model_class, model_param_search_range, feature_df, [range(0, 50), range(130, 200), range(50, 130)]) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92549459243170684, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.93070443071372855, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76385104263763215, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.43223946862572299, places=4) expected_top_model_param = { 'norm_type': 'normalize', 'n_estimators': 90, 'max_depth': 3, 'random_state': 0 } expected_top_ratio = 0.6666666666666666 self.assertEquals(output['top_model_param'], expected_top_model_param) self.assertEquals(output['top_ratio'], expected_top_ratio)
def test_run_kfold_cross_validation_randomforest(self): print "test k-fold cross validation on random forest..." train_test_model_class = RandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92695443548602008) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.93189074441713937) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76031309571294092) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.40381451586590256)
def test_run_kfold_cross_validation_libsvmnusvr(self): print "test k-fold cross validation on libsvmnusvr..." train_test_model_class = LibsvmnusvrTrainTestModel model_param = {'norm_type': 'normalize'} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92387451180595015) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.92481147926825724) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.75416215405673581) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.42231775639097513)
def test_run_cross_validation(self): print "test cross validation..." train_test_model_class = SklearnRandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} indices_train = range(9) indices_test = range(9) output = ModelCrossValidation.run_cross_validation( train_test_model_class, model_param, self.features, indices_train, indices_test) self.assertAlmostEquals(output['stats']['SRCC'], 0.93333333333333324, places=4) self.assertAlmostEquals(output['stats']['PCC'], 0.97754442316039469, places=4) self.assertAlmostEquals(output['stats']['KENDALL'], 0.83333333333333337, places=4) self.assertAlmostEquals(output['stats']['RMSE'], 0.17634739353518517, places=4) self.assertEquals(output['model'].TYPE, "RANDOMFOREST")
def test_run_nested_kfold_cross_validation_randomforest(self): print "test nested k-fold cross validation on random forest..." train_test_model_class = RandomForestTrainTestModel model_param_search_range = \ {'norm_type':['normalize'], 'n_estimators':[10, 90], 'max_depth':[None, 3], 'random_state': [0] } feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict( eval(open(feature_df_file, "r").read())) output = ModelCrossValidation.run_nested_kfold_cross_validation( train_test_model_class, model_param_search_range, feature_df, 6) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.92805802153293737, places=4) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.94066838465382363, places=4) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.76196220071567478, places=4) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.37660623901376861, places=4) expected_top_model_param = { 'norm_type': 'normalize', 'n_estimators': 90, 'max_depth': None, 'random_state': 0 } expected_top_ratio = 0.5 # self.assertEquals(output['top_model_param'], expected_top_model_param) self.assertEquals(output['top_ratio'], expected_top_ratio)
def test_run_kfold_cross_validation_with_list_input(self): print "test k-fold cross validation with list input..." train_test_model_class = RandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read())) feature_df = feature_df[:200] output = ModelCrossValidation.run_kfold_cross_validation( train_test_model_class, model_param, feature_df, [range(0,50), range(130, 200), range(50, 130)]) self.assertAlmostEquals(output['aggr_stats']['SRCC'], 0.90636761259756715) self.assertAlmostEquals(output['aggr_stats']['PCC'], 0.90819953685397914) self.assertAlmostEquals(output['aggr_stats']['KENDALL'], 0.72937284548325965) self.assertAlmostEquals(output['aggr_stats']['RMSE'], 0.49899297305829415)
def test_run_cross_validation(self): print "test cross validation..." train_test_model_class = RandomForestTrainTestModel model_param = {'norm_type':'normalize', 'random_state': 0} feature_df_file = config.ROOT + \ "/python/test/resource/sample_feature_extraction_results.json" feature_df = pd.DataFrame.from_dict(eval(open(feature_df_file, "r").read())) indices_train = range(250) indices_test = range(250, 300) output = ModelCrossValidation.run_cross_validation( train_test_model_class, model_param, feature_df, indices_train, indices_test) self.assertAlmostEquals(output['stats']['SRCC'], 0.93493301443051136) self.assertAlmostEquals(output['stats']['PCC'], 0.9413390374529329) self.assertAlmostEquals(output['stats']['KENDALL'], 0.78029280419726044) self.assertAlmostEquals(output['stats']['RMSE'], 0.32357946626958406) self.assertEquals(output['model'].TYPE, "RANDOMFOREST")