def _post_process_result(cls, result): # override Executor._post_process_result result = super(MomentFeatureExtractor, cls)._post_process_result(result) # calculate refvar and disvar from ref1st, ref2nd, dis1st, dis2nd refvar_scores_key = cls.get_scores_key('refvar') ref1st_scores_key = cls.get_scores_key('ref1st') ref2nd_scores_key = cls.get_scores_key('ref2nd') disvar_scores_key = cls.get_scores_key('disvar') dis1st_scores_key = cls.get_scores_key('dis1st') dis2nd_scores_key = cls.get_scores_key('dis2nd') get_var = lambda m: m[1] - m[0] * m[0] result.result_dict[refvar_scores_key] = \ to_list(map(get_var, zip(result.result_dict[ref1st_scores_key], result.result_dict[ref2nd_scores_key]))) result.result_dict[disvar_scores_key] = \ to_list(map(get_var, zip(result.result_dict[dis1st_scores_key], result.result_dict[dis2nd_scores_key]))) # validate for feature in cls.DERIVED_ATOM_FEATURES: assert cls.get_scores_key(feature) in result.result_dict return result
def rebuf_indices(self): if 'rebuf_indices' in self.asset_dict: assert isinstance(self.asset_dict['rebuf_indices'], list), 'Rebuffering indices need to be in a list.' # check for negative rebuffering indices assert len(to_list(filter(lambda x: x < 0, self.asset_dict['rebuf_indices']))) == 0, 'All rebuffering indices have to be >= 0.' return self.asset_dict['rebuf_indices'] else: return None
def _assert_asset_dict(self): # perform necessary assertions on asset properties of asset dict if 'fps' in self.asset_dict: assert self.asset_dict['fps'] > 0.0, 'Frame rate has to be positive.' if 'rebuf_indices' in self.asset_dict: assert isinstance(self.asset_dict['rebuf_indices'], list), 'Rebuffering indices need to be in a list.' # check for negative rebuffering indices assert len(to_list(filter(lambda x: x < 0, self.asset_dict['rebuf_indices']))) == 0, 'All rebuffering indices have to be >= 0.'
def construct_kfold_list(assets, contentid_groups): # construct cross validation kfold input list content_ids = to_list(map(lambda asset: asset.content_id, assets)) kfold = [] for curr_content_group in contentid_groups: curr_indices = indices(content_ids, lambda x: x in curr_content_group) kfold.append(curr_indices) return kfold
def parallel_map(func, list_args, processes=None): """ Build my own parallelized map function since multiprocessing's Process(), or Pool.map() cannot meet my both needs: 1) be able to control the maximum number of processes in parallel 2) be able to take in non-picklable objects as arguments """ # get maximum number of active processes that can be used max_active_procs = processes if processes is not None else multiprocessing.cpu_count( ) # create shared dictionary return_dict = multiprocessing.Manager().dict() # define runner function def func_wrapper(idx_args): idx, args = idx_args executor = func(args) return_dict[idx] = executor # add idx to args list_idx_args = [] for idx, args in enumerate(list_args): list_idx_args.append((idx, args)) procs = [] for idx_args in list_idx_args: proc = multiprocessing.Process(target=func_wrapper, args=(idx_args, )) procs.append(proc) waiting_procs = set(procs) active_procs = set([]) # processing while True: # check if any procs in active_procs is done; if yes, remove them for p in active_procs.copy(): if not p.is_alive(): active_procs.remove(p) # check if can add a proc to active_procs (add gradually one per loop) if len(active_procs) < max_active_procs and len(waiting_procs) > 0: # move one proc from waiting_procs to active_procs p = waiting_procs.pop() active_procs.add(p) p.start() # if both waiting_procs and active_procs are empty, can terminate if len(waiting_procs) == 0 and len(active_procs) == 0: break sleep(0.01) # check every x sec # finally, collect results rets = map(lambda idx: return_dict[idx], range(len(list_args))) return to_list(rets)
def to_dataframe(self): """ Export to pandas dataframe with columns: dataset, content_id, asset_id, ref_name, dis_name, asset, executor_id, scores_key, scores Example: asset asset_id content_id \ 0 {"asset_dict": {"height": 1080, "width": 1920}... 0 0 1 {"asset_dict": {"height": 1080, "width": 1920}... 0 0 2 {"asset_dict": {"height": 1080, "width": 1920}... 0 0 3 {"asset_dict": {"height": 1080, "width": 1920}... 0 0 4 {"asset_dict": {"height": 1080, "width": 1920}... 0 0 dataset dis_name executor_id \ 0 test checkerboard_1920_1080_10_3_1_0.yuv VMAF_V0.1 1 test checkerboard_1920_1080_10_3_1_0.yuv VMAF_V0.1 2 test checkerboard_1920_1080_10_3_1_0.yuv VMAF_V0.1 3 test checkerboard_1920_1080_10_3_1_0.yuv VMAF_V0.1 4 test checkerboard_1920_1080_10_3_1_0.yuv VMAF_V0.1 ref_name \ 0 checkerboard_1920_1080_10_3_0_0.yuv 1 checkerboard_1920_1080_10_3_0_0.yuv 2 checkerboard_1920_1080_10_3_0_0.yuv 3 checkerboard_1920_1080_10_3_0_0.yuv 4 checkerboard_1920_1080_10_3_0_0.yuv scores scores_key 0 [0.798588, 0.84287, 0.800122] VMAF_adm_scores 1 [12.420815, 12.41775, 12.416308] VMAF_ansnr_scores 2 [0.0, 18.489031, 18.542355] VMAF_motion_scores 3 [42.1117149479, 47.6544689539, 40.6168118533] VMAF_scores 4 [0.156106, 0.156163, 0.156119] VMAF_vif_scores [5 rows x 9 columns] :return: """ import pandas as pd asset = self.asset executor_id = self.executor_id list_scores_key = self.get_ordered_list_scores_key() list_scores = to_list( map(lambda key: self.result_dict[key], list_scores_key)) rows = [] for scores_key, scores in zip(list_scores_key, list_scores): row = [ asset.dataset, asset.content_id, asset.asset_id, get_file_name_with_extension(asset.ref_path), get_file_name_with_extension(asset.dis_path), repr(asset), executor_id, scores_key, scores ] rows.append(row) # zip rows into a dict, and wrap with df df = pd.DataFrame(dict(zip(self.DATAFRAME_COLUMNS, zip(*rows)))) return df
def _get_aggregate_score_str(self): list_score_key = self.get_ordered_list_score_key() str_aggregate = "Aggregate ({}): ".format( self.score_aggregate_method.__name__) + (", ".join( map( lambda tscore: "{score_key}:{score:.6f}".format( score_key=tscore[0], score=tscore[1]), zip( list_score_key, to_list( map(lambda score_key: self[score_key], list_score_key)))))) return str_aggregate
def _preprocess(cls, groundtruths, predictions, **kwargs): aggre_method = kwargs[ 'aggr_method'] if 'aggr_method' in kwargs else np.mean enable_mapping = kwargs[ 'enable_mapping'] if 'enable_mapping' in kwargs else False groundtruths_ = to_list( map(lambda x: aggre_method(x) if hasattr(x, '__len__') else x, groundtruths)) if enable_mapping: predictions_ = cls.sigmoid_adjust(predictions, groundtruths_) else: predictions_ = predictions return groundtruths_, predictions_
def _get_scores_str(self, unit_name='Frame'): list_scores_key = self.get_ordered_list_scores_key() list_score_key = self.get_ordered_list_score_key() list_scores = to_list( map(lambda key: self.result_dict[key], list_scores_key)) str_perframe = "\n".join( map( lambda tframe_scores: "{unit} {num}: ".format( unit=unit_name, num=tframe_scores[0]) + (", ".join( map( lambda tscore: "{score_key}:{score:.6f}".format( score_key=tscore[0], score=tscore[1]), zip(list_score_key, tframe_scores[1])))), enumerate(zip(*list_scores)))) str_perframe += '\n' return str_perframe
def _post_process_result(cls, result): # override Executor._post_process_result result = super(MomentNorefFeatureExtractor, cls)._post_process_result(result) # calculate var from 1st, 2nd var_scores_key = cls.get_scores_key('var') first_scores_key = cls.get_scores_key('1st') second_scores_key = cls.get_scores_key('2nd') value = map(lambda m: m[1] - m[0] * m[0], zip(result.result_dict[first_scores_key], result.result_dict[second_scores_key])) result.result_dict[var_scores_key] = to_list(value) # validate for feature in cls.DERIVED_ATOM_FEATURES: assert cls.get_scores_key(feature) in result.result_dict return result
def run(self, **kwargs): """ Do all the computation here. :return: """ if self.logger: self.logger.info( "For each asset, if {type} result has not been generated, run " "and generate {type} result...".format(type=self.executor_id)) if 'parallelize' in kwargs: parallelize = kwargs['parallelize'] else: parallelize = False if parallelize: # create locks for unique assets (uniqueness is identified by str(asset)) map_asset_lock = {} locks = [] for asset in self.assets: asset_str = str(asset) if asset_str not in map_asset_lock: map_asset_lock[asset_str] = multiprocessing.Lock() locks.append(map_asset_lock[asset_str]) # pack key arguments to be used as inputs to map function list_args = [] for asset, lock in zip(self.assets, locks): list_args.append([asset, lock]) def _run(asset_lock): asset, lock = asset_lock lock.acquire() result = self._run_on_asset(asset) lock.release() return result self.results = parallel_map(_run, list_args) else: self.results = to_list(map(self._run_on_asset, self.assets))
def _evaluate(cls, groundtruths, predictions, **kwargs): # function [resolving_power] = vqm_accuracy (vqm, num_viewers, mos, std, deg_of_freedom) % MATLAB function [resolving_power] = ... # % vqm_accuracy (vqm, num_viewers, mos, std, deg_of_freedom) # % # % Compute resolving power for one model. # % # % vqm is the video quality metric score for this src_id x hrc_id # % num_viewers is the number of viewers that rated this src_id x hrc_id # % mos is the mean opinion score of this src_id x hrc_id # % std is the standard-deviation of this src_id x hrc_id # % # # % All of the above arrays must be the same length. The VQM must already be # % fitted to the MOS. # % # % deg_of_freedom is the number of degrees of freedom for the fit between # % VQM and MOS prior to calling this routine. # % # % returned data contains: # % resolving_power(1) = 95% Resolving Power # % resolving_power(2) = 90% Resolving Power # % resolving_power(3) = 75% Resolving Power # % resolving_power(4) = 68% Resolving Power if isinstance(groundtruths, (list, tuple)) and isinstance(groundtruths[0], dict): raise TypeError( "{} cannot handle dictionary-style daataset yet.".format( cls.__name__)) deg_of_freedom = kwargs['ddof'] if 'ddof' in kwargs else 0 vqm = np.array(predictions) num_viewers = np.array( to_list(map(lambda groundtruth: len(groundtruth), groundtruths))) mos = np.mean(groundtruths, axis=1) std = np.std(groundtruths, axis=1, ddof=deg_of_freedom) # variance = std.^2; variance = std**2 # num_comb = length(vqm); num_comb = len(vqm) # % Perform the vqm RMSE calculation using vqm. # vqm_rmse = (sum((vqm-mos).^2)/(num_comb - deg_of_freedom))^0.5; vqm_rmse = (sum((vqm - mos)**2) / (num_comb - deg_of_freedom))**0.5 # % Perform the vqm resolution measurement using both vqm and mos. # vqm_pairs = repmat(vqm,1,num_comb)-repmat(vqm',num_comb,1); # mos_pairs = repmat(mos,1,num_comb)-repmat(mos',num_comb,1); # stand_err_diff = sqrt(repmat(variance./num_viewers,1,num_comb) + repmat((variance./num_viewers)',num_comb,1)); # z_pairs = mos_pairs./stand_err_diff; vqm_pairs = np.tile(vqm, (num_comb, 1)) vqm_pairs = vqm_pairs - vqm_pairs.T mos_pairs = np.tile(mos, (num_comb, 1)) mos_pairs = mos_pairs - mos_pairs.T stand_err_diff = np.tile(variance / num_viewers, (num_comb, 1)) stand_err_diff = np.sqrt(stand_err_diff + stand_err_diff.T) z_pairs = mos_pairs / stand_err_diff # % Include everything above the diagonal. # delta_vqm = []; # z = []; # for col = 2:num_comb # delta_vqm = [delta_vqm; vqm_pairs(1:col-1,col)]; # z = [z; z_pairs(1:col-1,col)]; # end delta_vqm = [] z = [] for col in range(2, num_comb + 1): delta_vqm = np.hstack([delta_vqm, vqm_pairs[0:col - 1, col - 1]]) z = np.hstack([z, z_pairs[0:col - 1, col - 1]]) # % Switch on z and delta_vqm for negative delta_vqm # z_vqm = z; # negs_vqm = find(delta_vqm < 0); # delta_vqm(negs_vqm) = -delta_vqm(negs_vqm); # z_vqm(negs_vqm) = -z_vqm(negs_vqm); z_vqm = z negs_vqm = indices(delta_vqm, lambda x: x < 0) delta_vqm[negs_vqm] = -delta_vqm[negs_vqm] z_vqm[negs_vqm] = -z_vqm[negs_vqm] # % Compute the average confidence that vqm(2) is worse than vqm(1) in mean_cdf_z_vqm. # cdf_z_vqm = .5+erf(z_vqm/sqrt(2))/2; cdf_z_vqm = .5 + scipy.special.erf(z_vqm / np.sqrt(2)) / 2 # === original binning logic: === # % One control parameter for delta_vqm resolution plot; number of vqm bins, # % equally spaced from min(delta_vqm) to max(delta_vqm). # % Sliding neighborhood filter with 50% overlap means that there will actually # % be vqm_bins*2-1 points on the delta_vqm resolution plot. # vqm_bins = 10; % How many bins to divide full vqm range for local averaging # vqm_low = min(delta_vqm); % lower limit on delta_vqm # vqm_high = max(delta_vqm); % upper limit on delta_vqm # vqm_step = (vqm_high-vqm_low)/vqm_bins; % size of delta_vqm bins vqm_bins = 10 vqm_low = min(delta_vqm) vqm_high = max(delta_vqm) vqm_step = (vqm_high - vqm_low) / vqm_bins # % lower, upper, and center bin locations # low_limits = [vqm_low:vqm_step/2:vqm_high-vqm_step]; # high_limits = [vqm_low+vqm_step:vqm_step/2:vqm_high]; # centers = [vqm_low+vqm_step/2:vqm_step/2:vqm_high-vqm_step/2]; low_limits = np.arange(vqm_low, vqm_high - vqm_step, step=vqm_step / 2) centers = low_limits.copy() + vqm_step / 2 high_limits = low_limits.copy() + vqm_step # patch to cover entire range if high_limits[-1] < vqm_high: low_limits = np.hstack([low_limits, vqm_high - vqm_step]) high_limits = np.hstack([high_limits, vqm_high]) centers = np.hstack([centers, vqm_high - vqm_step / 2]) len_centers = len(centers) assert len_centers == len(low_limits) == len(high_limits) # mean_cdf_z_vqm = zeros(1,2*vqm_bins-1); # for i=1:2*vqm_bins-1 # in_bin = find(low_limits(i) <= delta_vqm & delta_vqm < high_limits(i)); # mean_cdf_z_vqm(i) = mean(cdf_z_vqm(in_bin)); # end mean_cdf_z_vqm = np.zeros(len_centers) for i in range(0, len_centers): in_bin = indices( delta_vqm, lambda x: low_limits[i] <= x and x < high_limits[i]) mean_cdf_z_vqm[i] = np.mean(cdf_z_vqm[in_bin]) centers__mean_cdf_z_vqm = filter(lambda p: not np.isnan(p[1]), zip(centers, mean_cdf_z_vqm)) centers, mean_cdf_z_vqm = zip(*centers__mean_cdf_z_vqm) # # % % Optional code to plot resolving power curve. # # % % The x-axis is vqm(2)-vqm(1). The Y-axis is always the average # # % % confidence that vqm(2) is worse than vqm(1). # # % figure(1) # # % plot(centers,mean_cdf_z_vqm) # # % grid # # % set(gca,'LineWidth',1) # # # # % set(gca,'FontName','Ariel') # # % set(gca,'fontsize',11) # # % xlabel('VQM (2) - VQM (1)') # # % ylabel('Average Confidence VQM (2) is worse than VQM (1)') # # % title('VQM Resolving Power') # # # % Compute each resolving power by interpolating the mean_cdf_z_vqm graph # # # % 95% resolving power # # i = length(centers) - 1; # # while mean_cdf_z_vqm(i) > 0.95 && i > 1, # # i = i -1; # # end # # j = min(length(centers), i+1); # # resolving_power(1) = interp1(mean_cdf_z_vqm(i:j),centers(i:j), 0.95); # # # % 90% resolving power # # i = length(centers) - 1; # # while mean_cdf_z_vqm(i) > 0.90 && i > 1, # # i = i -1; # # end # # j = min(length(centers), i+1); # # resolving_power(2) = interp1(mean_cdf_z_vqm(i:j),centers(i:j), 0.90); # # # % 75% resolving power # # i = length(centers) - 1; # # while mean_cdf_z_vqm(i) > 0.75 && i > 1, # # i = i -1; # # end # # j = min(length(centers), i+1); # # resolving_power(3) = interp1(mean_cdf_z_vqm(i:j),centers(i:j), 0.75); # # # % 68% resolving power # # i = length(centers) - 1; # # while mean_cdf_z_vqm(i) > 0.68 && i > 1, # # i = i -1; # # end # # j = min(length(centers), i+1); # # resolving_power(4) = interp1(mean_cdf_z_vqm(i:j),centers(i:j), 0.68); # # resolving_powers = [] # for perc in [0.95, 0.90, 0.75, 0.68]: # i = len(centers) - 1 # while mean_cdf_z_vqm[i-1] > perc and i > 1: # i -= 1 # j = min(len(centers), i+1) # resolving_power = scipy.interpolate.interp1d(mean_cdf_z_vqm[i-1:j], centers[i-1:j])(perc) # resolving_powers.append(resolving_power) try: res_pow_95 = scipy.interpolate.interp1d(mean_cdf_z_vqm, centers, kind='linear')([0.95])[0] except ValueError: res_pow_95 = float('NaN') # % return infinity if can't compute # resolving_power(isnan(resolving_power)) = inf; result = dict() result['resolving_power_95perc'] = res_pow_95 result['score'] = res_pow_95 return result
def get_ordered_list_score_key(self): # e.g. ['VMAF_score', 'VMAF_vif_score'] list_scores_key = self.get_ordered_list_scores_key() return to_list(map(lambda scores_key: scores_key[:-1], list_scores_key))
def get_ordered_list_multimodel_score_key(self): # e.g. ['BOOTSTRAP_VMAF_all_models_score'] list_scores_key = self.get_ordered_list_multimodel_scores_key() return to_list(map(lambda scores_key: scores_key[:-1], list_scores_key))
def train_test_vmaf_on_dataset(train_dataset, test_dataset, feature_param, model_param, train_ax, test_ax, result_store, parallelize=True, logger=None, fifo_mode=True, output_model_filepath=None, aggregate_method=np.mean, **kwargs): train_assets = read_dataset(train_dataset, **kwargs) train_raw_assets = None try: for train_asset in train_assets: assert train_asset.groundtruth is not None except AssertionError: # no groundtruth, try do subjective modeling from sureal.dataset_reader import RawDatasetReader from sureal.subjective_model import DmosModel subj_model_class = kwargs[ 'subj_model_class'] if 'subj_model_class' in kwargs and kwargs[ 'subj_model_class'] is not None else DmosModel dataset_reader_class = kwargs[ 'dataset_reader_class'] if 'dataset_reader_class' in kwargs else RawDatasetReader subjective_model = subj_model_class( dataset_reader_class(train_dataset)) subjective_model.run_modeling(**kwargs) train_dataset_aggregate = subjective_model.to_aggregated_dataset( **kwargs) train_raw_assets = train_assets train_assets = read_dataset(train_dataset_aggregate, **kwargs) train_fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=train_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=parallelize, ) train_fassembler.run() train_features = train_fassembler.results for result in train_features: result.set_score_aggregate_method(aggregate_method) model_type = model_param.model_type model_param_dict = model_param.model_param_dict model_class = TrainTestModel.find_subclass(model_type) train_xys = model_class.get_xys_from_results(train_features) train_xs = model_class.get_xs_from_results(train_features) train_ys = model_class.get_ys_from_results(train_features) model = model_class(model_param_dict, logger) model.train(train_xys, **kwargs) # append additional information to model before saving, so that # VmafQualityRunner can read and process model.append_info('feature_dict', feature_param.feature_dict) if 'score_clip' in model_param_dict: VmafQualityRunner.set_clip_score(model, model_param_dict['score_clip']) if 'score_transform' in model_param_dict: VmafQualityRunner.set_transform_score( model, model_param_dict['score_transform']) train_ys_pred = VmafQualityRunner.predict_with_model( model, train_xs, **kwargs)['ys_pred'] raw_groundtruths = None if train_raw_assets is None else \ to_list(map(lambda asset: asset.raw_groundtruth, train_raw_assets)) train_stats = model.get_stats(train_ys['label'], train_ys_pred, ys_label_raw=raw_groundtruths) log = 'Stats on training data: {}'.format( model.format_stats_for_print(train_stats)) if logger: logger.info(log) else: print(log) # save model if output_model_filepath is not None: model.to_file(output_model_filepath) if train_ax is not None: train_content_ids = to_list( map(lambda asset: asset.content_id, train_assets)) model_class.plot_scatter(train_ax, train_stats, content_ids=train_content_ids) train_ax.set_xlabel('True Score') train_ax.set_ylabel("Predicted Score") train_ax.grid() train_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=train_dataset.dataset_name, model=model.model_id, stats=model_class.format_stats_for_plot(train_stats))) # === test model on test dataset === if test_dataset is None: test_assets = None test_stats = None test_fassembler = None else: test_assets = read_dataset(test_dataset, **kwargs) test_raw_assets = None try: for test_asset in test_assets: assert test_asset.groundtruth is not None except AssertionError: # no groundtruth, try do subjective modeling from sureal.dataset_reader import RawDatasetReader from sureal.subjective_model import DmosModel subj_model_class = kwargs[ 'subj_model_class'] if 'subj_model_class' in kwargs and kwargs[ 'subj_model_class'] is not None else DmosModel dataset_reader_class = kwargs[ 'dataset_reader_class'] if 'dataset_reader_class' in kwargs else RawDatasetReader subjective_model = subj_model_class( dataset_reader_class(test_dataset)) subjective_model.run_modeling(**kwargs) test_dataset_aggregate = subjective_model.to_aggregated_dataset( **kwargs) test_raw_assets = test_assets test_assets = read_dataset(test_dataset_aggregate, **kwargs) test_fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=test_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, ) test_fassembler.run() test_features = test_fassembler.results for result in test_features: result.set_score_aggregate_method(aggregate_method) test_xs = model_class.get_xs_from_results(test_features) test_ys = model_class.get_ys_from_results(test_features) test_ys_pred = VmafQualityRunner.predict_with_model( model, test_xs, **kwargs)['ys_pred'] raw_groundtruths = None if test_raw_assets is None else \ to_list(map(lambda asset: asset.raw_groundtruth, test_raw_assets)) test_stats = model.get_stats(test_ys['label'], test_ys_pred, ys_label_raw=raw_groundtruths) log = 'Stats on testing data: {}'.format( model_class.format_stats_for_print(test_stats)) if logger: logger.info(log) else: print(log) if test_ax is not None: test_content_ids = to_list( map(lambda asset: asset.content_id, test_assets)) model_class.plot_scatter(test_ax, test_stats, content_ids=test_content_ids) test_ax.set_xlabel('True Score') test_ax.set_ylabel("Predicted Score") test_ax.grid() test_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=test_dataset.dataset_name, model=model.model_id, stats=model_class.format_stats_for_plot(test_stats))) return train_fassembler, train_assets, train_stats, test_fassembler, test_assets, test_stats, model
def run_test_on_dataset(test_dataset, runner_class, ax, result_store, model_filepath, parallelize=True, fifo_mode=True, aggregate_method=np.mean, type='regressor', **kwargs): test_assets = read_dataset(test_dataset, **kwargs) test_raw_assets = None try: for test_asset in test_assets: assert test_asset.groundtruth is not None except AssertionError: # no groundtruth, try do subjective modeling from sureal.dataset_reader import RawDatasetReader from sureal.subjective_model import DmosModel subj_model_class = kwargs[ 'subj_model_class'] if 'subj_model_class' in kwargs and kwargs[ 'subj_model_class'] is not None else DmosModel dataset_reader_class = kwargs[ 'dataset_reader_class'] if 'dataset_reader_class' in kwargs else RawDatasetReader subjective_model = subj_model_class(dataset_reader_class(test_dataset)) subjective_model.run_modeling(**kwargs) test_dataset_aggregate = subjective_model.to_aggregated_dataset( **kwargs) test_raw_assets = test_assets test_assets = read_dataset(test_dataset_aggregate, **kwargs) if model_filepath is not None: optional_dict = {'model_filepath': model_filepath} if 'model_720_filepath' in kwargs and kwargs[ 'model_720_filepath'] is not None: optional_dict['720model_filepath'] = kwargs['model_720_filepath'] if 'model_480_filepath' in kwargs and kwargs[ 'model_480_filepath'] is not None: optional_dict['480model_filepath'] = kwargs['model_480_filepath'] else: optional_dict = None if 'enable_transform_score' in kwargs and kwargs[ 'enable_transform_score'] is not None: if not optional_dict: optional_dict = {} optional_dict['enable_transform_score'] = kwargs[ 'enable_transform_score'] if 'disable_clip_score' in kwargs and kwargs[ 'disable_clip_score'] is not None: if not optional_dict: optional_dict = {} optional_dict['disable_clip_score'] = kwargs['disable_clip_score'] if 'subsample' in kwargs and kwargs['subsample'] is not None: if not optional_dict: optional_dict = {} optional_dict['subsample'] = kwargs['subsample'] # run runner = runner_class( test_assets, None, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=optional_dict, optional_dict2=None, ) runner.run(parallelize=parallelize) results = runner.results for result in results: result.set_score_aggregate_method(aggregate_method) try: model_type = runner.get_train_test_model_class() except: if type == 'regressor': model_type = RegressorMixin elif type == 'classifier': model_type = ClassifierMixin else: assert False # plot groundtruths = to_list(map(lambda asset: asset.groundtruth, test_assets)) predictions = to_list( map(lambda result: result[runner_class.get_score_key()], results)) raw_grountruths = None if test_raw_assets is None else \ to_list(map(lambda asset: asset.raw_groundtruth, test_raw_assets)) groundtruths_std = None if test_assets is None else \ to_list(map(lambda asset: asset.groundtruth_std, test_assets)) try: predictions_bagging = to_list( map(lambda result: result[runner_class.get_bagging_score_key()], results)) predictions_stddev = to_list( map(lambda result: result[runner_class.get_stddev_score_key()], results)) predictions_ci95_low = to_list( map(lambda result: result[runner_class.get_ci95_low_score_key()], results)) predictions_ci95_high = to_list( map(lambda result: result[runner_class.get_ci95_high_score_key()], results)) predictions_all_models = to_list( map(lambda result: result[runner_class.get_all_models_score_key()], results)) # need to revert the list of lists, so that the outer list has the predictions for each model separately predictions_all_models = np.array(predictions_all_models).T.tolist() num_models = np.shape(predictions_all_models)[0] stats = model_type.get_stats( groundtruths, predictions, ys_label_raw=raw_grountruths, ys_label_pred_bagging=predictions_bagging, ys_label_pred_stddev=predictions_stddev, ys_label_pred_ci95_low=predictions_ci95_low, ys_label_pred_ci95_high=predictions_ci95_high, ys_label_pred_all_models=predictions_all_models, ys_label_stddev=groundtruths_std) except Exception as e: print( 'Stats calculation failed, using default stats calculation. Error cause: ' ) print(e) stats = model_type.get_stats(groundtruths, predictions, ys_label_raw=raw_grountruths, ys_label_stddev=groundtruths_std) num_models = 1 print('Stats on testing data: {}'.format( model_type.format_stats_for_print(stats))) # printing stats if multiple models are present if 'SRCC_across_model_distribution' in stats \ and 'PCC_across_model_distribution' in stats \ and 'RMSE_across_model_distribution' in stats: print( 'Stats on testing data (across multiple models, using all test indices): {}' .format( model_type.format_across_model_stats_for_print( model_type.extract_across_model_stats(stats)))) if ax is not None: content_ids = to_list(map(lambda asset: asset.content_id, test_assets)) if 'point_label' in kwargs: if kwargs['point_label'] == 'asset_id': point_labels = to_list( map(lambda asset: asset.asset_id, test_assets)) elif kwargs['point_label'] == 'dis_path': point_labels = to_list( map( lambda asset: get_file_name_without_extension( asset.dis_path), test_assets)) else: raise AssertionError("Unknown point_label {}".format( kwargs['point_label'])) else: point_labels = None model_type.plot_scatter(ax, stats, content_ids=content_ids, point_labels=point_labels, **kwargs) ax.set_xlabel('True Score') ax.set_ylabel("Predicted Score") ax.grid() ax.set_title("{runner}{num_models}\n{stats}".format( dataset=test_assets[0].dataset, runner=runner_class.TYPE, stats=model_type.format_stats_for_plot(stats), num_models=", {} models".format(num_models) if num_models > 1 else "", )) return test_assets, results
def nonemean(my_list): return np.mean(to_list(filter(lambda x: x is not None, my_list)))
def to_dict(self): """Example: { "executorId": "SSIM_V1.0", "asset": { "identifier": "test_0_0_checkerboard_1920_1080_10_3_0_0_1920x1080_vs_checkerboard_1920_1080_10_3_1_0_1920x1080_q_1920x1080" }, "frames": [ { "frameNum": 0, "SSIM_feature_ssim_c_score": 0.997404, "SSIM_feature_ssim_l_score": 0.965512, "SSIM_feature_ssim_s_score": 0.935803, "SSIM_score": 0.901161 }, { "frameNum": 1, "SSIM_feature_ssim_c_score": 0.997404, "SSIM_feature_ssim_l_score": 0.965512, "SSIM_feature_ssim_s_score": 0.935803, "SSIM_score": 0.90116 }, { "frameNum": 2, "SSIM_feature_ssim_c_score": 0.997404, "SSIM_feature_ssim_l_score": 0.965514, "SSIM_feature_ssim_s_score": 0.935804, "SSIM_score": 0.901163 } ], "aggregate": { "SSIM_feature_ssim_c_score": 0.99740399999999996, "SSIM_feature_ssim_l_score": 0.96551266666666669, "SSIM_feature_ssim_s_score": 0.93580333333333332, "SSIM_score": 0.90116133333333337 } } """ list_scores_key = self.get_ordered_list_scores_key() list_score_key = self.get_ordered_list_score_key() list_scores = to_list( map(lambda key: self.result_dict[key], list_scores_key)) list_aggregate_score = to_list( map(lambda key: self[key], list_score_key)) list_multimodel_scores_key = self.get_ordered_list_multimodel_scores_key( ) list_multimodel_score_key = self.get_ordered_list_multimodel_score_key( ) # here we need to transpose, since printing is per frame and not per model # we also need to turn the 2D array to a list of lists, for unpacking to work as expected list_multimodel_scores = to_list( map(lambda key: self.result_dict[key].T.tolist(), list_multimodel_scores_key)) list_aggregate_multimodel_score = to_list( map(lambda key: self[key], list_multimodel_score_key)) # append multimodel scores and keys (if any) list_scores_key += list_multimodel_scores_key list_score_key += list_multimodel_score_key list_scores += list_multimodel_scores list_aggregate_score += list_aggregate_multimodel_score list_scores_reordered = zip(*list_scores) top = OrderedDict() top['executorId'] = self.executor_id top['asset'] = {'identifier': str(self.asset)} top['frames'] = [] for i, list_score in enumerate(list_scores_reordered): frame = OrderedDict() frame['frameNum'] = i for score_key, score in zip(list_score_key, list_score): frame[score_key] = score top['frames'].append(frame) top['aggregate'] = OrderedDict() for score_key, score in zip(list_score_key, list_aggregate_score): top['aggregate'][score_key] = score top['aggregate']['method'] = self.score_aggregate_method.__name__ return top
def to_xml(self): """Example: <?xml version="1.0" ?> <result executorId="SSIM_V1.0"> <asset identifier="test_0_0_checkerboard_1920_1080_10_3_0_0_1920x1080_vs_checkerboard_1920_1080_10_3_1_0_1920x1080_q_1920x1080"/> <frames> <frame SSIM_feature_ssim_c_score="0.997404" SSIM_feature_ssim_l_score="0.965512" SSIM_feature_ssim_s_score="0.935803" SSIM_score="0.901161" frameNum="0"/> <frame SSIM_feature_ssim_c_score="0.997404" SSIM_feature_ssim_l_score="0.965512" SSIM_feature_ssim_s_score="0.935803" SSIM_score="0.90116" frameNum="1"/> <frame SSIM_feature_ssim_c_score="0.997404" SSIM_feature_ssim_l_score="0.965514" SSIM_feature_ssim_s_score="0.935804" SSIM_score="0.901163" frameNum="2"/> </frames> <aggregate SSIM_feature_ssim_c_score="0.997404" SSIM_feature_ssim_l_score="0.965512666667" SSIM_feature_ssim_s_score="0.935803333333" SSIM_score="0.901161333333"/> </result> """ from xml.etree import ElementTree from xml.dom import minidom list_scores_key = self.get_ordered_list_scores_key() list_score_key = self.get_ordered_list_score_key() list_scores = to_list( map(lambda key: self.result_dict[key], list_scores_key)) list_aggregate_score = to_list( map(lambda key: self[key], list_score_key)) list_multimodel_scores_key = self.get_ordered_list_multimodel_scores_key( ) list_multimodel_score_key = self.get_ordered_list_multimodel_score_key( ) # here we need to transpose, since printing is per frame and not per model # we also need to turn the 2D array to a list of lists, for unpacking to work as expected list_multimodel_scores = to_list( map(lambda key: self.result_dict[key].T.tolist(), list_multimodel_scores_key)) list_aggregate_multimodel_score = to_list( map(lambda key: self[key], list_multimodel_score_key)) # append multimodel scores and keys (if any) list_scores_key += list_multimodel_scores_key list_score_key += list_multimodel_score_key list_scores += list_multimodel_scores list_aggregate_score += list_aggregate_multimodel_score list_scores_reordered = zip(*list_scores) def prettify(elem): rough_string = ElementTree.tostring(elem, 'utf-8') reparsed = minidom.parseString(rough_string) return reparsed.toprettyxml(indent=" ") top = ElementTree.Element('result') top.set('executorId', self.executor_id) asset = ElementTree.SubElement(top, 'asset') asset.set('identifier', str(self.asset)) frames = ElementTree.SubElement(top, 'frames') for i, list_score in enumerate(list_scores_reordered): frame = ElementTree.SubElement(frames, 'frame') frame.set('frameNum', str(i)) for score_key, score in zip(list_score_key, list_score): frame.set(score_key, str(score)) aggregate = ElementTree.SubElement(top, 'aggregate') aggregate.set('method', self.score_aggregate_method.__name__) for score_key, score in zip(list_score_key, list_aggregate_score): aggregate.set(score_key, str(score)) return prettify(top)