def cv_on_dataset(dataset, feature_param, model_param, ax, result_store, contentid_groups, logger=None, aggregate_method=np.mean): assets = read_dataset(dataset) kfold = construct_kfold_list(assets, contentid_groups) fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=assets, logger=logger, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, fifo_mode=True, # parallelize=False, fifo_mode=False, # VQM ) fassembler.run() results = fassembler.results for result in results: result.set_score_aggregate_method(aggregate_method) model_class = TrainTestModel.find_subclass(model_param.model_type) # run nested kfold cv for each combintation cv_output = ModelCrossValidation.run_kfold_cross_validation( model_class, model_param.model_param_dict, results, kfold, logger=logger, ) print 'Feature parameters: {}'.format(feature_param.feature_dict) print 'Model type: {}'.format(model_param.model_type) print 'Model parameters: {}'.format(model_param.model_param_dict) print 'Stats: {}'.format(model_class.format_stats(cv_output['aggr_stats'])) if ax is not None: model_class.plot_scatter(ax, cv_output['aggr_stats'], cv_output['contentids']) ax.set_xlabel('True Score') ax.set_ylabel("Predicted Score") ax.grid() ax.set_title("Dataset: {dataset}, Model: {model},\n{stats}".format( dataset=dataset.dataset_name, model=model_param.model_type, stats=model_class.format_stats(cv_output['aggr_stats']))) return assets, cv_output
def cv_on_dataset( dataset, feature_param, model_param, ax, result_store, contentid_groups, logger=None, aggregate_method=np.mean ): assets = read_dataset(dataset) kfold = construct_kfold_list(assets, contentid_groups) fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=assets, logger=logger, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, fifo_mode=True, # parallelize=False, fifo_mode=False, # VQM ) fassembler.run() results = fassembler.results for result in results: result.set_score_aggregate_method(aggregate_method) model_class = TrainTestModel.find_subclass(model_param.model_type) # run nested kfold cv for each combintation cv_output = ModelCrossValidation.run_kfold_cross_validation( model_class, model_param.model_param_dict, results, kfold, logger=logger ) print "Feature parameters: {}".format(feature_param.feature_dict) print "Model type: {}".format(model_param.model_type) print "Model parameters: {}".format(model_param.model_param_dict) print "Stats: {}".format(model_class.format_stats(cv_output["aggr_stats"])) if ax is not None: model_class.plot_scatter(ax, cv_output["aggr_stats"], cv_output["contentids"]) ax.set_xlabel("True Score") ax.set_ylabel("Predicted Score") ax.grid() ax.set_title( "Dataset: {dataset}, Model: {model},\n{stats}".format( dataset=dataset.dataset_name, model=model_param.model_type, stats=model_class.format_stats(cv_output["aggr_stats"]), ) ) return assets, cv_output
def train_test_vmaf_on_dataset(train_dataset, test_dataset, feature_param, model_param, train_ax, test_ax, result_store, parallelize=True, logger=None, fifo_mode=True, output_model_filepath=None, aggregate_method=np.mean, **kwargs): train_assets = read_dataset(train_dataset, **kwargs) train_raw_assets = None try: for train_asset in train_assets: assert train_asset.groundtruth is not None except AssertionError: # no groundtruth, try do subjective modeling subj_model_class = kwargs['subj_model_class'] if 'subj_model_class' in kwargs and kwargs['subj_model_class'] is not None else DmosModel subjective_model = subj_model_class(RawDatasetReader(train_dataset)) subjective_model.run_modeling(**kwargs) train_dataset_aggregate = subjective_model.to_aggregated_dataset(**kwargs) train_raw_assets = train_assets train_assets = read_dataset(train_dataset_aggregate, **kwargs) train_fassembler = FeatureAssembler( feature_dict = feature_param.feature_dict, feature_option_dict = None, assets = train_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=parallelize, ) train_fassembler.run() train_features = train_fassembler.results for result in train_features: result.set_score_aggregate_method(aggregate_method) model_type = model_param.model_type model_param_dict = model_param.model_param_dict model_class = TrainTestModel.find_subclass(model_type) train_xys = model_class.get_xys_from_results(train_features) train_xs = model_class.get_xs_from_results(train_features) train_ys = model_class.get_ys_from_results(train_features) model = model_class(model_param_dict, logger) model.train(train_xys) # append additional information to model before saving, so that # VmafQualityRunner can read and process model.append_info('feature_dict', feature_param.feature_dict) if 'score_clip' in model_param_dict: VmafQualityRunner.set_clip_score(model, model_param_dict['score_clip']) if 'score_transform' in model_param_dict: VmafQualityRunner.set_transform_score(model, model_param_dict['score_transform']) train_ys_pred = VmafQualityRunner.predict_with_model(model, train_xs, **kwargs) raw_groundtruths = None if train_raw_assets is None else \ map(lambda asset: asset.raw_groundtruth, train_raw_assets) train_stats = model.get_stats(train_ys['label'], train_ys_pred, ys_label_raw=raw_groundtruths) log = 'Stats on training data: {}'.format(model.format_stats(train_stats)) if logger: logger.info(log) else: print log # save model if output_model_filepath is not None: model.to_file(output_model_filepath) if train_ax is not None: train_content_ids = map(lambda asset: asset.content_id, train_assets) model_class.plot_scatter(train_ax, train_stats, train_content_ids) train_ax.set_xlabel('True Score') train_ax.set_ylabel("Predicted Score") train_ax.grid() train_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=train_dataset.dataset_name, model=model.model_id, stats=model_class.format_stats(train_stats) )) # === test model on test dataset === if test_dataset is None: test_assets = None test_stats = None test_fassembler = None else: test_assets = read_dataset(test_dataset, **kwargs) test_raw_assets = None try: for test_asset in test_assets: assert test_asset.groundtruth is not None except AssertionError: # no groundtruth, try do subjective modeling subj_model_class = kwargs['subj_model_class'] if 'subj_model_class' in kwargs and kwargs['subj_model_class'] is not None else DmosModel subjective_model = subj_model_class(RawDatasetReader(test_dataset)) subjective_model.run_modeling(**kwargs) test_dataset_aggregate = subjective_model.to_aggregated_dataset(**kwargs) test_raw_assets = test_assets test_assets = read_dataset(test_dataset_aggregate, **kwargs) test_fassembler = FeatureAssembler( feature_dict = feature_param.feature_dict, feature_option_dict = None, assets = test_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, ) test_fassembler.run() test_features = test_fassembler.results for result in test_features: result.set_score_aggregate_method(aggregate_method) test_xs = model_class.get_xs_from_results(test_features) test_ys = model_class.get_ys_from_results(test_features) test_ys_pred = VmafQualityRunner.predict_with_model(model, test_xs, **kwargs) raw_groundtruths = None if test_raw_assets is None else \ map(lambda asset: asset.raw_groundtruth, test_raw_assets) test_stats = model_class.get_stats(test_ys['label'], test_ys_pred, ys_label_raw=raw_groundtruths) log = 'Stats on testing data: {}'.format(model_class.format_stats(test_stats)) if logger: logger.info(log) else: print log if test_ax is not None: test_content_ids = map(lambda asset: asset.content_id, test_assets) model_class.plot_scatter(test_ax, test_stats, test_content_ids) test_ax.set_xlabel('True Score') test_ax.set_ylabel("Predicted Score") test_ax.grid() test_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=test_dataset.dataset_name, model=model.model_id, stats=model_class.format_stats(test_stats) )) return train_fassembler, train_assets, train_stats, \ test_fassembler, test_assets, test_stats, model
def train_test_on_dataset(train_dataset, test_dataset, feature_param, model_param, train_ax, test_ax, result_store, parallelize=True, logger=None, fifo_mode=True, output_model_filepath=None): train_assets = read_dataset(train_dataset) train_fassembler = FeatureAssembler( feature_dict = feature_param.feature_dict, feature_option_dict = None, assets = train_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, parallelize=parallelize, ) train_fassembler.run() train_features = train_fassembler.results train_xys = TrainTestModel.get_xys_from_results(train_features) train_xs = TrainTestModel.get_xs_from_results(train_features) train_ys = TrainTestModel.get_ys_from_results(train_features) model_type = model_param.model_type model_param_dict = model_param.model_param_dict model_class = TrainTestModel.find_subclass(model_type) model = model_class(model_param_dict, logger) model.train(train_xys) # append additional information to model before saving, so that # VmafQualityRunner can read and process model.append_info('feature_dict', feature_param.feature_dict) if 'score_clip' in model_param_dict: VmafQualityRunner.set_clip_score(model, model_param_dict['score_clip']) train_ys_pred = model.predict(train_xs) # apply instructions indicated in the appended info train_ys_pred = VmafQualityRunner.clip_score(model, train_ys_pred) train_stats = TrainTestModel.get_stats(train_ys['label'], train_ys_pred) if logger: logger.info('Stats on training data: {}'.format(TrainTestModel. format_stats(train_stats))) # save model if output_model_filepath is not None: model.to_file(output_model_filepath) if train_ax is not None: train_content_ids = map(lambda asset: asset.content_id, train_assets) TrainTestModel.plot_scatter(train_ax, train_stats, train_content_ids) train_ax.set_xlabel('DMOS') train_ax.set_ylabel("Predicted Score") train_ax.grid() train_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=train_dataset.dataset_name, model=model.model_id, stats=TrainTestModel.format_stats(train_stats) )) # === test model on test dataset === if test_dataset is None: test_assets = None test_stats = None test_fassembler = None else: test_assets = read_dataset(test_dataset) test_fassembler = FeatureAssembler( feature_dict = feature_param.feature_dict, feature_option_dict = None, assets = test_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, parallelize=True, ) test_fassembler.run() test_features = test_fassembler.results test_xs = TrainTestModel.get_xs_from_results(test_features) test_ys = TrainTestModel.get_ys_from_results(test_features) test_ys_pred = model.predict(test_xs) # apply instructions indicated in the appended info test_ys_pred = VmafQualityRunner.clip_score(model, test_ys_pred) test_stats = TrainTestModel.get_stats(test_ys['label'], test_ys_pred) if logger: logger.info('Stats on testing data: {}'.format( TrainTestModel.format_stats(test_stats))) if test_ax is not None: test_content_ids = map(lambda asset: asset.content_id, test_assets) TrainTestModel.plot_scatter(test_ax, test_stats, test_content_ids) test_ax.set_xlabel('DMOS') test_ax.set_ylabel("Predicted Score") test_ax.grid() test_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=test_dataset.dataset_name, model=model.model_id, stats=TrainTestModel.format_stats(test_stats) )) return train_fassembler, train_assets, train_stats, \ test_fassembler, test_assets, test_stats
def train_test_on_dataset(train_dataset, test_dataset, feature_param, model_param, train_ax, test_ax, result_store, parallelize=True, logger=None, fifo_mode=True, output_model_filepath=None): train_assets = read_dataset(train_dataset) train_fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=train_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, parallelize=parallelize, ) train_fassembler.run() train_features = train_fassembler.results train_xys = TrainTestModel.get_xys_from_results(train_features) train_xs = TrainTestModel.get_xs_from_results(train_features) train_ys = TrainTestModel.get_ys_from_results(train_features) model_type = model_param.model_type model_param_dict = model_param.model_param_dict model_class = TrainTestModel.find_subclass(model_type) model = model_class(model_param_dict, logger) model.train(train_xys) # append additional information to model before saving, so that # VmafQualityRunner can read and process model.append_info('feature_dict', feature_param.feature_dict) if 'score_clip' in model_param_dict: VmafQualityRunner.set_clip_score(model, model_param_dict['score_clip']) train_ys_pred = model.predict(train_xs) # apply instructions indicated in the appended info train_ys_pred = VmafQualityRunner.clip_score(model, train_ys_pred) train_stats = TrainTestModel.get_stats(train_ys['label'], train_ys_pred) if logger: logger.info('Stats on training data: {}'.format( TrainTestModel.format_stats(train_stats))) # save model if output_model_filepath is not None: model.to_file(output_model_filepath) if train_ax is not None: train_content_ids = map(lambda asset: asset.content_id, train_assets) TrainTestModel.plot_scatter(train_ax, train_stats, train_content_ids) train_ax.set_xlabel('DMOS') train_ax.set_ylabel("Predicted Score") train_ax.grid() train_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=train_dataset.dataset_name, model=model.model_id, stats=TrainTestModel.format_stats(train_stats))) # === test model on test dataset === if test_dataset is None: test_assets = None test_stats = None test_fassembler = None else: test_assets = read_dataset(test_dataset) test_fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=test_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, parallelize=True, ) test_fassembler.run() test_features = test_fassembler.results test_xs = TrainTestModel.get_xs_from_results(test_features) test_ys = TrainTestModel.get_ys_from_results(test_features) test_ys_pred = model.predict(test_xs) # apply instructions indicated in the appended info test_ys_pred = VmafQualityRunner.clip_score(model, test_ys_pred) test_stats = TrainTestModel.get_stats(test_ys['label'], test_ys_pred) if logger: logger.info('Stats on testing data: {}'.format( TrainTestModel.format_stats(test_stats))) if test_ax is not None: test_content_ids = map(lambda asset: asset.content_id, test_assets) TrainTestModel.plot_scatter(test_ax, test_stats, test_content_ids) test_ax.set_xlabel('DMOS') test_ax.set_ylabel("Predicted Score") test_ax.grid() test_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=test_dataset.dataset_name, model=model.model_id, stats=TrainTestModel.format_stats(test_stats))) return train_fassembler, train_assets, train_stats, \ test_fassembler, test_assets, test_stats
def train_test_vmaf_on_dataset( train_dataset, test_dataset, feature_param, model_param, train_ax, test_ax, result_store, parallelize=True, logger=None, fifo_mode=True, output_model_filepath=None, aggregate_method=np.mean, **kwargs ): train_assets = read_dataset(train_dataset, **kwargs) train_raw_assets = None try: for train_asset in train_assets: assert train_asset.groundtruth is not None except AssertionError: # no groundtruth, try do subjective modeling subj_model_class = ( kwargs["subj_model_class"] if "subj_model_class" in kwargs and kwargs["subj_model_class"] is not None else DmosModel ) subjective_model = subj_model_class(RawDatasetReader(train_dataset)) subjective_model.run_modeling(**kwargs) train_dataset_aggregate = subjective_model.to_aggregated_dataset(**kwargs) train_raw_assets = train_assets train_assets = read_dataset(train_dataset_aggregate, **kwargs) train_fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=train_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=parallelize, ) train_fassembler.run() train_features = train_fassembler.results for result in train_features: result.set_score_aggregate_method(aggregate_method) model_type = model_param.model_type model_param_dict = model_param.model_param_dict model_class = TrainTestModel.find_subclass(model_type) train_xys = model_class.get_xys_from_results(train_features) train_xs = model_class.get_xs_from_results(train_features) train_ys = model_class.get_ys_from_results(train_features) model = model_class(model_param_dict, logger) model.train(train_xys) # append additional information to model before saving, so that # VmafQualityRunner can read and process model.append_info("feature_dict", feature_param.feature_dict) if "score_clip" in model_param_dict: VmafQualityRunner.set_clip_score(model, model_param_dict["score_clip"]) train_ys_pred = VmafQualityRunner.predict_with_model(model, train_xs, **kwargs) raw_groundtruths = None if train_raw_assets is None else map(lambda asset: asset.raw_groundtruth, train_raw_assets) train_stats = model.get_stats(train_ys["label"], train_ys_pred, ys_label_raw=raw_groundtruths) log = "Stats on training data: {}".format(model.format_stats(train_stats)) if logger: logger.info(log) else: print log # save model if output_model_filepath is not None: model.to_file(output_model_filepath) if train_ax is not None: train_content_ids = map(lambda asset: asset.content_id, train_assets) model_class.plot_scatter(train_ax, train_stats, train_content_ids) train_ax.set_xlabel("True Score") train_ax.set_ylabel("Predicted Score") train_ax.grid() train_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=train_dataset.dataset_name, model=model.model_id, stats=model_class.format_stats(train_stats) ) ) # === test model on test dataset === if test_dataset is None: test_assets = None test_stats = None test_fassembler = None else: test_assets = read_dataset(test_dataset, **kwargs) test_raw_assets = None try: for test_asset in test_assets: assert test_asset.groundtruth is not None except AssertionError: # no groundtruth, try do subjective modeling subj_model_class = ( kwargs["subj_model_class"] if "subj_model_class" in kwargs and kwargs["subj_model_class"] is not None else DmosModel ) subjective_model = subj_model_class(RawDatasetReader(test_dataset)) subjective_model.run_modeling(**kwargs) test_dataset_aggregate = subjective_model.to_aggregated_dataset(**kwargs) test_raw_assets = test_assets test_assets = read_dataset(test_dataset_aggregate, **kwargs) test_fassembler = FeatureAssembler( feature_dict=feature_param.feature_dict, feature_option_dict=None, assets=test_assets, logger=logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=None, optional_dict2=None, parallelize=True, ) test_fassembler.run() test_features = test_fassembler.results for result in test_features: result.set_score_aggregate_method(aggregate_method) test_xs = model_class.get_xs_from_results(test_features) test_ys = model_class.get_ys_from_results(test_features) test_ys_pred = VmafQualityRunner.predict_with_model(model, test_xs, **kwargs) raw_groundtruths = ( None if test_raw_assets is None else map(lambda asset: asset.raw_groundtruth, test_raw_assets) ) test_stats = model_class.get_stats(test_ys["label"], test_ys_pred, ys_label_raw=raw_groundtruths) log = "Stats on testing data: {}".format(model_class.format_stats(test_stats)) if logger: logger.info(log) else: print log if test_ax is not None: test_content_ids = map(lambda asset: asset.content_id, test_assets) model_class.plot_scatter(test_ax, test_stats, test_content_ids) test_ax.set_xlabel("True Score") test_ax.set_ylabel("Predicted Score") test_ax.grid() test_ax.set_title( "Dataset: {dataset}, Model: {model}\n{stats}".format( dataset=test_dataset.dataset_name, model=model.model_id, stats=model_class.format_stats(test_stats) ) ) return train_fassembler, train_assets, train_stats, test_fassembler, test_assets, test_stats, model