def test_get_xs_ys(self): xs = TrainTestModel.get_xs_from_results(self.features, [0, 1, 2]) self.assertEquals(len(xs['Moment_noref_feature_1st_score']), 3) self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_1st_score']), 128.26146851380497, places=4) self.assertEquals(len(xs['Moment_noref_feature_var_score']), 3) self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_var_score']), 1569.2395085695462, places=4) xs = TrainTestModel.get_xs_from_results(self.features) self.assertEquals(len(xs['Moment_noref_feature_1st_score']), 9) self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_1st_score']), 111.59099599173773, places=4) self.assertEquals(len(xs['Moment_noref_feature_var_score']), 9) self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_var_score']), 1806.8620377229011, places=4) ys = TrainTestModel.get_ys_from_results(self.features, [0, 1, 2]) expected_ys = { 'label': np.array([2.5, 3.9, 5.0]), 'content_id': np.array([0, 1, 2]) } self.assertTrue(all(ys['label'] == expected_ys['label'])) self.assertTrue(all(ys['content_id'] == expected_ys['content_id']))
def run_cross_validation(train_test_model_class, model_param, results_or_df, train_indices, test_indices): """ Simple cross validation. :param train_test_model_class: :param model_param: :param results_or_df: list of BasicResult, or pandas.DataFrame :param train_indices: :param test_indices: :return: """ xys_train = TrainTestModel.get_xys_from_results(results_or_df, train_indices) xs_test = TrainTestModel.get_xs_from_results(results_or_df, test_indices) ys_test = TrainTestModel.get_ys_from_results(results_or_df, test_indices) model = train_test_model_class(model_param, None) model.train(xys_train) stats = model.evaluate(xs_test, ys_test) output = {} output['stats'] = stats output['model'] = model output['contentids'] = ys_test['content_id'] # for plotting purpose return output
def test_get_xs_ys(self): xs = TrainTestModel.get_xs_from_results(self.features, [0, 1, 2]) self.assertEquals(len(xs['Moment_noref_feature_1st_score']), 3) self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_1st_score']), 128.26146851380497, places=4) self.assertEquals(len(xs['Moment_noref_feature_var_score']), 3) self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_var_score']), 1569.2395085695462, places=4) xs = TrainTestModel.get_xs_from_results(self.features) self.assertEquals(len(xs['Moment_noref_feature_1st_score']), 9) self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_1st_score']), 111.59099599173773, places=4) self.assertEquals(len(xs['Moment_noref_feature_var_score']), 9) self.assertAlmostEquals(np.mean(xs['Moment_noref_feature_var_score']), 1806.8620377229011, places=4) ys = TrainTestModel.get_ys_from_results(self.features, [0, 1, 2]) expected_ys = {'label': np.array([2.5, 3.9, 5.0]), 'content_id': np.array([0, 1, 2])} self.assertTrue(all(ys['label'] == expected_ys['label'])) self.assertTrue(all(ys['content_id'] == expected_ys['content_id']))
def train_test_on_dataset(train_dataset, test_dataset, feature_param, model_param, train_ax, test_ax, result_store, parallelize=True, logger=None, fifo_mode=True, output_model_filepath=None): train_assets = read_dataset(train_dataset) train_fassembler = FeatureAssembler( feature_dict = feature_param.feature_dict, feature_option_dict = None, assets = train_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, parallelize=parallelize, ) train_fassembler.run() train_features = train_fassembler.results train_xys = TrainTestModel.get_xys_from_results(train_features) train_xs = TrainTestModel.get_xs_from_results(train_features) train_ys = TrainTestModel.get_ys_from_results(train_features) model_type = model_param.model_type model_param_dict = model_param.model_param_dict model_class = TrainTestModel.find_subclass(model_type) model = model_class(model_param_dict, logger) model.train(train_xys) # append additional information to model before saving, so that # VmafQualityRunner can read and process model.append_info('feature_dict', feature_param.feature_dict) if 'score_clip' in model_param_dict: VmafQualityRunner.set_clip_score(model, model_param_dict['score_clip']) train_ys_pred = model.predict(train_xs) # apply instructions indicated in the appended info train_ys_pred = VmafQualityRunner.clip_score(model, train_ys_pred) train_stats = TrainTestModel.get_stats(train_ys['label'], train_ys_pred) if logger: logger.info('Stats on training data: {}'.format(TrainTestModel. format_stats(train_stats))) # save model if output_model_filepath is not None: model.to_file(output_model_filepath) if train_ax is not None: train_content_ids = map(lambda asset: asset.content_id, train_assets) TrainTestModel.plot_scatter(train_ax, train_stats, train_content_ids) train_ax.set_xlabel('DMOS') train_ax.set_ylabel("Predicted Score") train_ax.grid() train_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=train_dataset.dataset_name, model=model.model_id, stats=TrainTestModel.format_stats(train_stats) )) # === test model on test dataset === if test_dataset is None: test_assets = None test_stats = None test_fassembler = None else: test_assets = read_dataset(test_dataset) test_fassembler = FeatureAssembler( feature_dict = feature_param.feature_dict, feature_option_dict = None, assets = test_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, parallelize=True, ) test_fassembler.run() test_features = test_fassembler.results test_xs = TrainTestModel.get_xs_from_results(test_features) test_ys = TrainTestModel.get_ys_from_results(test_features) test_ys_pred = model.predict(test_xs) # apply instructions indicated in the appended info test_ys_pred = VmafQualityRunner.clip_score(model, test_ys_pred) test_stats = TrainTestModel.get_stats(test_ys['label'], test_ys_pred) if logger: logger.info('Stats on testing data: {}'.format( TrainTestModel.format_stats(test_stats))) if test_ax is not None: test_content_ids = map(lambda asset: asset.content_id, test_assets) TrainTestModel.plot_scatter(test_ax, test_stats, test_content_ids) test_ax.set_xlabel('DMOS') test_ax.set_ylabel("Predicted Score") test_ax.grid() test_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=test_dataset.dataset_name, model=model.model_id, stats=TrainTestModel.format_stats(test_stats) )) return train_fassembler, train_assets, train_stats, \ test_fassembler, test_assets, test_stats
def train_test_on_dataset(train_dataset, test_dataset, feature_param, model_param, train_ax, test_ax, result_store, parallelize=True, logger=None, fifo_mode=True, output_model_filepath=None): train_assets = read_dataset(train_dataset) train_fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=train_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, parallelize=parallelize, ) train_fassembler.run() train_features = train_fassembler.results train_xys = TrainTestModel.get_xys_from_results(train_features) train_xs = TrainTestModel.get_xs_from_results(train_features) train_ys = TrainTestModel.get_ys_from_results(train_features) model_type = model_param.model_type model_param_dict = model_param.model_param_dict model_class = TrainTestModel.find_subclass(model_type) model = model_class(model_param_dict, logger) model.train(train_xys) # append additional information to model before saving, so that # VmafQualityRunner can read and process model.append_info('feature_dict', feature_param.feature_dict) if 'score_clip' in model_param_dict: VmafQualityRunner.set_clip_score(model, model_param_dict['score_clip']) train_ys_pred = model.predict(train_xs) # apply instructions indicated in the appended info train_ys_pred = VmafQualityRunner.clip_score(model, train_ys_pred) train_stats = TrainTestModel.get_stats(train_ys['label'], train_ys_pred) if logger: logger.info('Stats on training data: {}'.format( TrainTestModel.format_stats(train_stats))) # save model if output_model_filepath is not None: model.to_file(output_model_filepath) if train_ax is not None: train_content_ids = map(lambda asset: asset.content_id, train_assets) TrainTestModel.plot_scatter(train_ax, train_stats, train_content_ids) train_ax.set_xlabel('DMOS') train_ax.set_ylabel("Predicted Score") train_ax.grid() train_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=train_dataset.dataset_name, model=model.model_id, stats=TrainTestModel.format_stats(train_stats))) # === test model on test dataset === if test_dataset is None: test_assets = None test_stats = None test_fassembler = None else: test_assets = read_dataset(test_dataset) test_fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=test_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, parallelize=True, ) test_fassembler.run() test_features = test_fassembler.results test_xs = TrainTestModel.get_xs_from_results(test_features) test_ys = TrainTestModel.get_ys_from_results(test_features) test_ys_pred = model.predict(test_xs) # apply instructions indicated in the appended info test_ys_pred = VmafQualityRunner.clip_score(model, test_ys_pred) test_stats = TrainTestModel.get_stats(test_ys['label'], test_ys_pred) if logger: logger.info('Stats on testing data: {}'.format( TrainTestModel.format_stats(test_stats))) if test_ax is not None: test_content_ids = map(lambda asset: asset.content_id, test_assets) TrainTestModel.plot_scatter(test_ax, test_stats, test_content_ids) test_ax.set_xlabel('DMOS') test_ax.set_ylabel("Predicted Score") test_ax.grid() test_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=test_dataset.dataset_name, model=model.model_id, stats=TrainTestModel.format_stats(test_stats))) return train_fassembler, train_assets, train_stats, \ test_fassembler, test_assets, test_stats