def _run_corrupt_probs(dataset, corrupt_probs, model_class, seed, perf_type='rmse'): def run_one_corrput_prob(corrupt_prob, dataset, seed): np.random.seed(seed) info_dict = { 'corrupt_probability': corrupt_prob, } dataset_reader = CorruptDataRawDatasetReader(dataset, input_dict=info_dict) subjective_model = model_class(dataset_reader) try: result = subjective_model.run_modeling(normalize_final=False) except ValueError as e: print 'Warning: {}, return result None'.format(e) result = None return dataset_reader, result inputs = [] for corrupt_prob in corrupt_probs: input = [corrupt_prob, dataset, seed] inputs.append(input) outputs = map(lambda input: run_one_corrput_prob(*input), inputs) result0 = model_class( RawDatasetReader(dataset)).run_modeling(normalize_final=False) result0_qs = np.array(result0['quality_scores']) result0_qs_mean = np.mean(result0_qs) result0_qs_std = np.std(result0_qs) result0_qs = (result0_qs - result0_qs_mean) / result0_qs_std perfs = [] datasizes = [] for input, output in zip(inputs, outputs): corrupt_prob, dataset, seed = input reader, result = output result_qs = np.array(result['quality_scores']) result_qs = (result_qs - result0_qs_mean) / result0_qs_std if result is None: perf = float('NaN') else: if perf_type == 'pcc': perf, _ = scipy.stats.pearsonr(result_qs, result0_qs) elif perf_type == 'rmse': perf = np.sqrt( np.mean(np.power(result_qs - result0_qs, 2.0))) else: assert False datasize = corrupt_prob perfs.append(perf) datasizes.append(datasize) return datasizes, perfs
def test_mos_subjective_model_normalize_final(self): dataset = import_python_file(self.dataset_filepath) dataset_reader = RawDatasetReader(dataset) subjective_model = MosModel(dataset_reader) result = subjective_model.run_modeling(normalize_final=True) scores = result['quality_scores'] self.assertAlmostEquals(scores[0], 1.1318646945818083, places=4) self.assertAlmostEquals(scores[10], -1.2400334499143002, places=4) self.assertAlmostEquals(np.mean(scores), 0.0, places=4)
def test_mos_subjective_model(self): dataset = import_python_file(self.dataset_filepath) dataset_reader = RawDatasetReader(dataset) subjective_model = MosModel(dataset_reader) result = subjective_model.run_modeling() scores = result['quality_scores'] self.assertAlmostEquals(scores[0], 4.884615384615385, places=4) self.assertAlmostEquals(scores[10], 2.0769230769230771, places=4) self.assertAlmostEquals(np.mean(scores), 3.544790652385589, places=4)
def test_mos_subjective_model_transform_final(self): dataset = import_python_file(self.dataset_filepath) dataset_reader = RawDatasetReader(dataset) subjective_model = MosModel(dataset_reader) result = subjective_model.run_modeling(transform_final={'p1': 10, 'p0': 1}) scores = result['quality_scores'] self.assertAlmostEquals(scores[0], 49.84615384615385, places=4) self.assertAlmostEquals(scores[10], 21.769230769230771, places=4) self.assertAlmostEquals(np.mean(scores), 36.44790652385589, places=4)
def test_mos_subjective_model_output2(self): dataset = import_python_file(self.dataset_filepath) dataset_reader = RawDatasetReader(dataset) subjective_model = MosModel(dataset_reader) subjective_model.run_modeling() dataset2 = subjective_model.to_aggregated_dataset() dis_video = dataset2.dis_videos[0] self.assertTrue('groundtruth' in dis_video) self.assertTrue('os' not in dis_video) self.assertAlmostEquals(dis_video['groundtruth'], 4.884615384615385, places=4)
def _run_partial_corrupt_nums(dataset, subject_nums, model_class, seed, perf_type='rmse'): def run_one_num_subject(num_subject, dataset, seed): np.random.seed(seed) info_dict = { 'selected_subjects': np.random.permutation(len( dataset.dis_videos[0]['os']))[:num_subject], 'corrupt_probability': 0.5, } dataset_reader = CorruptSubjectRawDatasetReader( dataset, input_dict=info_dict) subjective_model = model_class(dataset_reader) result = subjective_model.run_modeling(normalize_final=False) return dataset_reader, result inputs = [] for subject_num in subject_nums: input = [subject_num, dataset, seed] inputs.append(input) outputs = map(lambda input: run_one_num_subject(*input), inputs) result0 = model_class( RawDatasetReader(dataset)).run_modeling(normalize_final=False) result0_qs = np.array(result0['quality_scores']) result0_qs_mean = np.mean(result0_qs) result0_qs_std = np.std(result0_qs) result0_qs = (result0_qs - result0_qs_mean) / result0_qs_std perfs = [] datasizes = [] for input, output in zip(inputs, outputs): subject_num, dataset, seed = input reader, result = output result_qs = np.array(result['quality_scores']) result_qs = (result_qs - result0_qs_mean) / result0_qs_std if perf_type == 'pcc': perf, _ = scipy.stats.pearsonr(result_qs, result0_qs) elif perf_type == 'rmse': perf = np.sqrt(np.mean(np.power(result_qs - result0_qs, 2.0))) else: assert False # datasize = np.prod(subject_num * len(reader.dataset.dis_videos)) datasize = np.prod(subject_num) perfs.append(perf) datasizes.append(datasize) return datasizes, perfs
class RawDatasetReaderTest(unittest.TestCase): def setUp(self): dataset_filepath = config.ROOT + '/python/test/resource/NFLX_dataset_public_raw.py' self.dataset = import_python_file(dataset_filepath) self.dataset_reader = RawDatasetReader(self.dataset) def test_read_dataset_stats(self): self.assertEquals(self.dataset_reader.num_ref_videos, 9) self.assertEquals(self.dataset_reader.num_dis_videos, 79) self.assertEquals(self.dataset_reader.num_observers, 26) def test_opinion_score_2darray(self): os_2darray = self.dataset_reader.opinion_score_2darray self.assertAlmostEquals(np.mean(os_2darray), 3.544790652385589, places=4) self.assertAlmostEquals(np.mean(np.std(os_2darray, axis=1)), 0.64933186478291516, places=4) def test_dis_videos_content_ids(self): content_ids = self.dataset_reader.content_id_of_dis_videos self.assertAlmostEquals(np.mean(content_ids), 3.8607594936708862, places=4) def test_disvideo_is_refvideo(self): l = self.dataset_reader.disvideo_is_refvideo self.assertItemsEqual(indices(l, lambda e: e is True), range(9)) def test_ref_score(self): self.assertEqual(self.dataset_reader.ref_score, 5.0) def test_to_persubject_dataset_wrong_dim(self): with self.assertRaises(AssertionError): dataset = self.dataset_reader.to_persubject_dataset(np.zeros(3000)) self.assertEqual(len(dataset.dis_videos), 2054) def test_to_persubject_dataset(self): dataset = self.dataset_reader.to_persubject_dataset(np.zeros([79, 26])) self.assertEqual(len(dataset.dis_videos), 2054)
def test_persubject_subjective_model_output(self): dataset = import_python_file(self.dataset_filepath) dataset_reader = RawDatasetReader(dataset) subjective_model = PerSubjectModel(dataset_reader) subjective_model.run_modeling(transform_final={'p1':25, 'p0':-25}) subjective_model.to_aggregated_dataset_file(self.output_dataset_filepath) self.assertTrue(os.path.exists(self.output_dataset_filepath)) dataset2 = import_python_file(self.output_dataset_filepath) dis_video = dataset2.dis_videos[0] self.assertTrue('groundtruth' in dis_video) self.assertTrue('os' not in dis_video) self.assertAlmostEquals(dis_video['groundtruth'], 100.0, places=4)
def _run_subject_nums(dataset, subject_nums, model_class, seed, perf_type='rmse'): def run_one_num_subject(num_subject, dataset, seed): np.random.seed(seed) total_subject = len(dataset.dis_videos[0]['os']) info_dict = { 'selected_subjects': np.random.permutation(total_subject)[:num_subject] } dataset_reader = SelectSubjectRawDatasetReader( dataset, input_dict=info_dict) subjective_model = model_class(dataset_reader) result = subjective_model.run_modeling(normalize_final=False) return dataset_reader, result inputs = [] for subject_num in subject_nums: input = [subject_num, dataset, seed] inputs.append(input) outputs = map(lambda input: run_one_num_subject(*input), inputs) result0 = model_class( RawDatasetReader(dataset)).run_modeling(normalize_final=False) result0_qs = np.array(result0['quality_scores']) result0_qs_mean = np.mean(result0_qs) result0_qs_std = np.std(result0_qs) result0_qs = (result0_qs - result0_qs_mean) / result0_qs_std perfs = [] datasizes = [] for output in outputs: reader, result = output result_qs = np.array(result['quality_scores']) result_qs = (result_qs - result0_qs_mean) / result0_qs_std if perf_type == 'pcc': perf, _ = scipy.stats.pearsonr(result_qs, result0_qs) elif perf_type == 'rmse': perf = np.sqrt(np.mean(np.power(result_qs - result0_qs, 2.0))) else: assert False datasize = reader.opinion_score_2darray.shape[1] perfs.append(perf) datasizes.append(datasize) return datasizes, perfs
def test_mos_subjective_model_output_custom_resampling(self): dataset = import_python_file(self.dataset_filepath) dataset_reader = RawDatasetReader(dataset) subjective_model = MosModel(dataset_reader) subjective_model.run_modeling() subjective_model.to_aggregated_dataset_file(self.output_dataset_filepath, resampling_type='lanczos') self.assertTrue(os.path.exists(self.output_dataset_filepath)) dataset2 = import_python_file(self.output_dataset_filepath) self.assertFalse(hasattr(dataset2, 'quality_height')) self.assertFalse(hasattr(dataset2, 'quality_width')) self.assertEquals(dataset2.resampling_type, 'lanczos') dis_video = dataset2.dis_videos[0] self.assertTrue('groundtruth' in dis_video) self.assertTrue('os' not in dis_video) self.assertAlmostEquals(dis_video['groundtruth'], 4.884615384615385, places=4)
def train_test_vmaf_on_dataset(train_dataset, test_dataset, feature_param, model_param, train_ax, test_ax, result_store, parallelize=True, logger=None, fifo_mode=True, output_model_filepath=None, aggregate_method=np.mean, **kwargs): train_assets = read_dataset(train_dataset, **kwargs) train_raw_assets = None try: for train_asset in train_assets: assert train_asset.groundtruth is not None except AssertionError: # no groundtruth, try do subjective modeling subj_model_class = kwargs['subj_model_class'] if 'subj_model_class' in kwargs and kwargs['subj_model_class'] is not None else DmosModel subjective_model = subj_model_class(RawDatasetReader(train_dataset)) subjective_model.run_modeling(**kwargs) train_dataset_aggregate = subjective_model.to_aggregated_dataset(**kwargs) train_raw_assets = train_assets train_assets = read_dataset(train_dataset_aggregate, **kwargs) train_fassembler = FeatureAssembler( feature_dict = feature_param.feature_dict, feature_option_dict = None, assets = train_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=parallelize, ) train_fassembler.run() train_features = train_fassembler.results for result in train_features: result.set_score_aggregate_method(aggregate_method) model_type = model_param.model_type model_param_dict = model_param.model_param_dict model_class = TrainTestModel.find_subclass(model_type) train_xys = model_class.get_xys_from_results(train_features) train_xs = model_class.get_xs_from_results(train_features) train_ys = model_class.get_ys_from_results(train_features) model = model_class(model_param_dict, logger) model.train(train_xys) # append additional information to model before saving, so that # VmafQualityRunner can read and process model.append_info('feature_dict', feature_param.feature_dict) if 'score_clip' in model_param_dict: VmafQualityRunner.set_clip_score(model, model_param_dict['score_clip']) if 'score_transform' in model_param_dict: VmafQualityRunner.set_transform_score(model, model_param_dict['score_transform']) train_ys_pred = VmafQualityRunner.predict_with_model(model, train_xs, **kwargs) raw_groundtruths = None if train_raw_assets is None else \ map(lambda asset: asset.raw_groundtruth, train_raw_assets) train_stats = model.get_stats(train_ys['label'], train_ys_pred, ys_label_raw=raw_groundtruths) log = 'Stats on training data: {}'.format(model.format_stats(train_stats)) if logger: logger.info(log) else: print log # save model if output_model_filepath is not None: model.to_file(output_model_filepath) if train_ax is not None: train_content_ids = map(lambda asset: asset.content_id, train_assets) model_class.plot_scatter(train_ax, train_stats, train_content_ids) train_ax.set_xlabel('True Score') train_ax.set_ylabel("Predicted Score") train_ax.grid() train_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=train_dataset.dataset_name, model=model.model_id, stats=model_class.format_stats(train_stats) )) # === test model on test dataset === if test_dataset is None: test_assets = None test_stats = None test_fassembler = None else: test_assets = read_dataset(test_dataset, **kwargs) test_raw_assets = None try: for test_asset in test_assets: assert test_asset.groundtruth is not None except AssertionError: # no groundtruth, try do subjective modeling subj_model_class = kwargs['subj_model_class'] if 'subj_model_class' in kwargs and kwargs['subj_model_class'] is not None else DmosModel subjective_model = subj_model_class(RawDatasetReader(test_dataset)) subjective_model.run_modeling(**kwargs) test_dataset_aggregate = subjective_model.to_aggregated_dataset(**kwargs) test_raw_assets = test_assets test_assets = read_dataset(test_dataset_aggregate, **kwargs) test_fassembler = FeatureAssembler( feature_dict = feature_param.feature_dict, feature_option_dict = None, assets = test_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, ) test_fassembler.run() test_features = test_fassembler.results for result in test_features: result.set_score_aggregate_method(aggregate_method) test_xs = model_class.get_xs_from_results(test_features) test_ys = model_class.get_ys_from_results(test_features) test_ys_pred = VmafQualityRunner.predict_with_model(model, test_xs, **kwargs) raw_groundtruths = None if test_raw_assets is None else \ map(lambda asset: asset.raw_groundtruth, test_raw_assets) test_stats = model_class.get_stats(test_ys['label'], test_ys_pred, ys_label_raw=raw_groundtruths) log = 'Stats on testing data: {}'.format(model_class.format_stats(test_stats)) if logger: logger.info(log) else: print log if test_ax is not None: test_content_ids = map(lambda asset: asset.content_id, test_assets) model_class.plot_scatter(test_ax, test_stats, test_content_ids) test_ax.set_xlabel('True Score') test_ax.set_ylabel("Predicted Score") test_ax.grid() test_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=test_dataset.dataset_name, model=model.model_id, stats=model_class.format_stats(test_stats) )) return train_fassembler, train_assets, train_stats, \ test_fassembler, test_assets, test_stats, model
def test_on_dataset(test_dataset, runner_class, ax, result_store, model_filepath, parallelize=True, fifo_mode=True, aggregate_method=np.mean, type = 'regressor', **kwargs): if type == 'regressor': model_type = RegressorMixin elif type == 'classifier': model_type = ClassifierMixin else: assert False test_assets = read_dataset(test_dataset, **kwargs) test_raw_assets = None try: for test_asset in test_assets: assert test_asset.groundtruth is not None except AssertionError: # no groundtruth, try do subjective modeling subj_model_class = kwargs['subj_model_class'] if 'subj_model_class' in kwargs and kwargs['subj_model_class'] is not None else DmosModel subjective_model = subj_model_class(RawDatasetReader(test_dataset)) subjective_model.run_modeling(**kwargs) test_dataset_aggregate = subjective_model.to_aggregated_dataset(**kwargs) test_raw_assets = test_assets test_assets = read_dataset(test_dataset_aggregate, **kwargs) if model_filepath is not None: optional_dict = {'model_filepath': model_filepath} else: optional_dict = None # run runner = runner_class( test_assets, None, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=optional_dict, optional_dict2=None, ) runner.run(parallelize=parallelize) results = runner.results for result in results: result.set_score_aggregate_method(aggregate_method) # plot groundtruths = map(lambda asset: asset.groundtruth, test_assets) predictions = map(lambda result: result[runner_class.get_score_key()], results) raw_grountruths = None if test_raw_assets is None else \ map(lambda asset: asset.raw_groundtruth, test_raw_assets) stats = model_type.get_stats(groundtruths, predictions, ys_label_raw=raw_grountruths) print 'Stats on testing data: {}'.format(model_type.format_stats(stats)) if ax is not None: content_ids = map(lambda asset: asset.content_id, test_assets) model_type.plot_scatter(ax, stats, content_ids) ax.set_xlabel('True Score') ax.set_ylabel("Predicted Score") ax.grid() ax.set_title( "{runner}\n{stats}".format( dataset=test_assets[0].dataset, runner=runner_class.TYPE, stats=model_type.format_stats(stats), )) return test_assets, results
def from_dataset_file(cls, dataset_filepath): dataset = import_python_file(dataset_filepath) dataset_reader = RawDatasetReader(dataset) return cls(dataset_reader)
def setUp(self): dataset_filepath = config.ROOT + '/python/test/resource/NFLX_dataset_public_raw.py' self.dataset = import_python_file(dataset_filepath) self.dataset_reader = RawDatasetReader(self.dataset)