def setUp(self): self.dataset_filepath = SurealConfig.test_resource_path( 'NFLX_dataset_public_raw.py') self.output_dataset_filepath = SurealConfig.workdir_path( 'NFLX_dataset_public_test.py') self.output_dataset_pyc_filepath = SurealConfig.workdir_path( 'NFLX_dataset_public_test.pyc')
def setUp(self): dataset_filepath = SurealConfig.test_resource_path( 'NFLX_dataset_public_raw.py') dataset = import_python_file(dataset_filepath) dataset_reader = RawDatasetReader(dataset) pc_dataset = dataset_reader.to_pc_dataset(pc_type='within_subject') self.pc_dataset_reader = PairedCompDatasetReader(pc_dataset)
def setUp(self): dataset_filepath = SurealConfig.test_resource_path('NFLX_dataset_public_raw.py') dataset = import_python_file(dataset_filepath) np.random.seed(0) info_dict = { 'corrupt_probability': 0.1, } self.dataset_reader = CorruptDataRawDatasetReader(dataset, input_dict=info_dict)
def setUp(self): dataset_filepath = SurealConfig.test_resource_path('NFLX_dataset_public_raw.py') dataset = import_python_file(dataset_filepath) np.random.seed(0) info_dict = { 'selected_subjects': range(5), } self.dataset_reader = CorruptSubjectRawDatasetReader(dataset, input_dict=info_dict)
def setUp(self): dataset_filepath = SurealConfig.test_resource_path('NFLX_dataset_public_raw.py') dataset = import_python_file(dataset_filepath) np.random.seed(0) info_dict = { 'selected_dis_videos': range(15), } self.dataset_reader = SelectDisVideoRawDatasetReader(dataset, input_dict=info_dict)
def setUp(self): dataset2_filepath = SurealConfig.test_resource_path('quality_variation_2017_agh_tv_dataset.py') dataset2 = import_python_file(dataset2_filepath) np.random.seed(0) info_dict2 = { 'selected_subjects': list(range(13)), } self.dataset2_reader = SelectSubjectRawDatasetReader(dataset2, input_dict=info_dict2)
def setUp(self): dataset2_filepath = SurealConfig.test_resource_path('test_dataset_os_as_dict.py') dataset2 = import_python_file(dataset2_filepath) np.random.seed(0) info_dict2 = { 'selected_subjects': np.array([1, 2]), } self.dataset2_reader = SelectSubjectRawDatasetReader(dataset2, input_dict=info_dict2)
def main(): dataset_filepaths = [ SurealConfig.resource_path('dataset', 'NFLX_dataset_public_raw_last4outliers.py'), SurealConfig.resource_path('dataset', 'VQEGHD3_dataset_raw.py'), ] # ============ sample results ================= subjective_model_classes = [ MaximumLikelihoodEstimationModel, MosModel, # MaximumLikelihoodEstimationDmosModel, # DmosModel, ] plot_sample_results(dataset_filepaths, subjective_model_classes) # ============ plot trends ================= # ===== datasize growth ===== # run_datasize_growth(dataset_filepaths) # ===== corrpution growth ===== # run_subject_corruption_growth(dataset_filepaths) # run_random_corruption_growth(dataset_filepaths) # run_subject_partial_corruption_growth(dataset_filepaths) # ===== random missing growth ===== # run_missing_growth(dataset_filepaths) # ===== synthetic data ===== # validate_with_synthetic_dataset() DisplayConfig.show()
def setUp(self): dataset_filepath = SurealConfig.test_resource_path('NFLX_dataset_public_raw.py') dataset = import_python_file(dataset_filepath) np.random.seed(0) info_dict = { 'quality_scores': np.random.randint(1, 6, 79), 'observer_bias': np.random.normal(0, 1, 26), 'observer_inconsistency': np.abs(np.random.normal(0, 0.1, 26)), 'content_bias': np.zeros(9), 'content_ambiguity': np.zeros(9), } self.dataset_reader = SyntheticRawDatasetReader(dataset, input_dict=info_dict)
def validate_with_synthetic_dataset(): # use the dataset_filepath only for its dimensions and reference video mapping dataset_filepath = SurealConfig.resource_path( 'dataset', 'NFLX_dataset_public_raw_last4outliers.py') np.random.seed(0) _validate_with_synthetic_dataset( subjective_model_classes=[MaximumLikelihoodEstimationModel], dataset_filepath=dataset_filepath, synthetic_result={ 'quality_scores': np.random.uniform(1, 5, 79), 'observer_bias': np.random.normal(0, 1, 30), 'observer_inconsistency': np.abs(np.random.uniform(0.0, 0.4, 30)), 'content_bias': np.random.normal(0, 0.00001, 9), 'content_ambiguity': np.abs(np.random.uniform(0.4, 0.6, 9)), })
def setUp(self): dataset_filepath = SurealConfig.test_resource_path( 'NFLX_dataset_public_raw_PARTIAL.py') self.dataset = import_python_file(dataset_filepath) self.dataset_reader = RawDatasetReader(self.dataset)
def setUp(self): dataset_filepath = SurealConfig.test_resource_path( 'NFLX_dataset_public_raw.py') self.dataset = import_python_file(dataset_filepath) np.random.seed(0)
def setUp(self): pc_dataset = import_python_file( SurealConfig.test_resource_path('lukas_pc_dataset.py')) self.pc_dataset_reader = PairedCompDatasetReader(pc_dataset)
def run_subject_partial_corruption_growth(dataset_filepaths): @persist_to_file( SurealConfig.workdir_path('_run_partial_corrupt_nums.json')) def _run_partial_corrupt_nums(dataset, subject_nums, model_class, seed, perf_type='rmse'): def run_one_num_subject(num_subject, dataset, seed): np.random.seed(seed) info_dict = { 'selected_subjects': np.random.permutation(len( dataset.dis_videos[0]['os']))[:num_subject], 'corrupt_probability': 0.5, } dataset_reader = CorruptSubjectRawDatasetReader( dataset, input_dict=info_dict) subjective_model = model_class(dataset_reader) result = subjective_model.run_modeling(normalize_final=False) return dataset_reader, result inputs = [] for subject_num in subject_nums: input = [subject_num, dataset, seed] inputs.append(input) outputs = map(lambda input: run_one_num_subject(*input), inputs) result0 = model_class( RawDatasetReader(dataset)).run_modeling(normalize_final=False) result0_qs = np.array(result0['quality_scores']) result0_qs_mean = np.mean(result0_qs) result0_qs_std = np.std(result0_qs) result0_qs = (result0_qs - result0_qs_mean) / result0_qs_std perfs = [] datasizes = [] for input, output in zip(inputs, outputs): subject_num, dataset, seed = input reader, result = output result_qs = np.array(result['quality_scores']) result_qs = (result_qs - result0_qs_mean) / result0_qs_std if perf_type == 'pcc': perf, _ = scipy.stats.pearsonr(result_qs, result0_qs) elif perf_type == 'rmse': perf = np.sqrt(np.mean(np.power(result_qs - result0_qs, 2.0))) else: assert False # datasize = np.prod(subject_num * len(reader.dataset.dis_videos)) datasize = np.prod(subject_num) perfs.append(perf) datasizes.append(datasize) return datasizes, perfs for dataset_filepath in dataset_filepaths: subject_nums = np.arange(0, 20, 1) model_classes = [ MosModel, SubjrejMosModel, ZscoringSubjrejMosModel, MaximumLikelihoodEstimationModel, ] seedss = [ range(10), range(10), range(10), range(1), ] linestyles = ['--', '-.', ':', '-'] linewidths = [1.5, 1.5, 3, 1.5] dataset = import_python_file(dataset_filepath) datasizesss = [[] for _ in model_classes] perfsss = [[] for _ in model_classes] i = 0 for model_class, seeds in zip(model_classes, seedss): print 'class {}...'.format(model_class.__name__) for seed in seeds: datasizes, perfs = _run_partial_corrupt_nums( dataset, subject_nums, model_class, seed) datasizesss[i].append(datasizes) perfsss[i].append(perfs) i += 1 plt.figure(figsize=(5, 3.5)) for i, model_class in enumerate(model_classes): plt.plot( np.mean(np.asarray(datasizesss[i]), axis=0), np.mean(np.asarray(perfsss[i]), axis=0), label=model_class.TYPE, linestyle=linestyles[i], linewidth=linewidths[i], color='black', ) plt.xlabel('No. Corrupted Subjects') plt.ylabel(r'RMSE of Quality Scores ($x_e$)') plt.legend(loc=2, ncol=2, prop={'size': 12}, frameon=False) plt.tight_layout()
def run_random_corruption_growth(dataset_filepaths): @persist_to_file(SurealConfig.workdir_path('_run_corrupt_probs.json')) def _run_corrupt_probs(dataset, corrupt_probs, model_class, seed, perf_type='rmse'): def run_one_corrput_prob(corrupt_prob, dataset, seed): np.random.seed(seed) info_dict = { 'corrupt_probability': corrupt_prob, } dataset_reader = CorruptDataRawDatasetReader(dataset, input_dict=info_dict) subjective_model = model_class(dataset_reader) try: result = subjective_model.run_modeling(normalize_final=False) except ValueError as e: print 'Warning: {}, return result None'.format(e) result = None return dataset_reader, result inputs = [] for corrupt_prob in corrupt_probs: input = [corrupt_prob, dataset, seed] inputs.append(input) outputs = map(lambda input: run_one_corrput_prob(*input), inputs) result0 = model_class( RawDatasetReader(dataset)).run_modeling(normalize_final=False) result0_qs = np.array(result0['quality_scores']) result0_qs_mean = np.mean(result0_qs) result0_qs_std = np.std(result0_qs) result0_qs = (result0_qs - result0_qs_mean) / result0_qs_std perfs = [] datasizes = [] for input, output in zip(inputs, outputs): corrupt_prob, dataset, seed = input reader, result = output result_qs = np.array(result['quality_scores']) result_qs = (result_qs - result0_qs_mean) / result0_qs_std if result is None: perf = float('NaN') else: if perf_type == 'pcc': perf, _ = scipy.stats.pearsonr(result_qs, result0_qs) elif perf_type == 'rmse': perf = np.sqrt( np.mean(np.power(result_qs - result0_qs, 2.0))) else: assert False datasize = corrupt_prob perfs.append(perf) datasizes.append(datasize) return datasizes, perfs for dataset_filepath in dataset_filepaths: corrupt_probs = np.linspace(0.7, 0.0, num=20) model_classes = [ MosModel, SubjrejMosModel, ZscoringSubjrejMosModel, MaximumLikelihoodEstimationModel, ] seedss = [ range(10), range(10), range(10), range(1), ] linestyles = ['--', '-.', ':', '-'] linewidths = [1.5, 1.5, 3, 1.5] dataset = import_python_file(dataset_filepath) datasizesss = [[] for _ in model_classes] perfsss = [[] for _ in model_classes] i = 0 for model_class, seeds in zip(model_classes, seedss): print 'class {}...'.format(model_class.__name__) for seed in seeds: datasizes, perfs = _run_corrupt_probs(dataset, corrupt_probs, model_class, seed) datasizesss[i].append(datasizes) perfsss[i].append(perfs) i += 1 plt.figure(figsize=(5, 3.5)) for i, model_class in enumerate(model_classes): plt.plot( np.mean(np.asarray(datasizesss[i]), axis=0), np.mean(np.asarray(perfsss[i]), axis=0), label=model_class.TYPE, linestyle=linestyles[i], linewidth=linewidths[i], color='black', ) plt.xlabel('Data Corruption Probability') plt.ylabel(r'RMSE of Quality Scores ($x_e$)') plt.legend(loc=2, ncol=2, prop={'size': 12}, frameon=False) plt.tight_layout()