Example #1
0
    def setUp(self):
        dataset_filepath = SurealConfig.test_resource_path('NFLX_dataset_public_raw.py')
        dataset = import_python_file(dataset_filepath)

        np.random.seed(0)
        info_dict = {
            'missing_probability': 0.1,
        }

        self.dataset_reader = MissingDataRawDatasetReader(dataset, input_dict=info_dict)
Example #2
0
class MissingDatasetReaderTest(unittest.TestCase):
    def setUp(self):
        dataset_filepath = SurealConfig.test_resource_path(
            'NFLX_dataset_public_raw.py')
        dataset = import_python_file(dataset_filepath)

        np.random.seed(0)
        info_dict = {
            'missing_probability': 0.1,
        }

        self.dataset_reader = MissingDataRawDatasetReader(dataset,
                                                          input_dict=info_dict)

    def test_opinion_score_2darray(self):
        os_2darray = self.dataset_reader.opinion_score_2darray
        self.assertTrue(np.isnan(np.mean(os_2darray)))
        self.assertEqual(np.isnan(os_2darray).sum(), 201)

    def test_to_dataset(self):
        dataset = self.dataset_reader.to_dataset()

        old_scores = [
            dis_video['os']
            for dis_video in self.dataset_reader.dataset.dis_videos
        ]
        new_scores = [dis_video['os'] for dis_video in dataset.dis_videos]

        self.assertNotEqual(old_scores, new_scores)
Example #3
0
    def test_observer_content_aware_subjective_model_missingdata(self):

        dataset = import_python_file(self.dataset_filepath)

        np.random.seed(0)
        info_dict = {
            'missing_probability': 0.1,
        }
        dataset_reader = MissingDataRawDatasetReader(dataset, input_dict=info_dict)

        subjective_model = MaximumLikelihoodEstimationModel(dataset_reader)
        result = subjective_model.run_modeling()

        self.assertAlmostEquals(np.sum(result['content_ambiguity']), 3.9104244772977128, places=4)
        self.assertAlmostEquals(np.var(result['content_ambiguity']), 0.0037713583509767193, places=4)

        self.assertAlmostEquals(np.sum(result['observer_bias']), -0.21903272050455846, places=4)
        self.assertAlmostEquals(np.var(result['observer_bias']), 0.084353684687185043, places=4)

        self.assertAlmostEquals(np.sum(result['observer_inconsistency']), 9.8168943054654481, places=4)
        self.assertAlmostEquals(np.var(result['observer_inconsistency']), 0.028159236075789944, places=4)

        self.assertAlmostEquals(np.sum(result['quality_scores']), 280.05548186797336, places=4)
        self.assertAlmostEquals(np.var(result['quality_scores']), 1.4339487982797514, places=4)

        np.random.seed(0)
        info_dict = {
            'missing_probability': 0.5,
        }
        dataset_reader = MissingDataRawDatasetReader(dataset, input_dict=info_dict)

        subjective_model = MaximumLikelihoodEstimationModel(dataset_reader)
        result = subjective_model.run_modeling()

        self.assertAlmostEquals(np.sum(result['content_ambiguity']), 2.63184284168883, places=4)
        self.assertAlmostEquals(np.var(result['content_ambiguity']), 0.019164097909450246, places=4)

        self.assertAlmostEquals(np.sum(result['observer_bias']), 0.2263148440748638, places=4)
        self.assertAlmostEquals(np.var(result['observer_bias']), 0.070613033112114504, places=4)

        self.assertAlmostEquals(np.sum(result['observer_inconsistency']), 12.317917502439435, places=4)
        self.assertAlmostEquals(np.var(result['observer_inconsistency']), 0.029455722248727296, places=4)

        self.assertAlmostEquals(np.sum(result['quality_scores']), 280.29962156788139, places=4)
        self.assertAlmostEquals(np.var(result['quality_scores']), 1.4717366222424826, places=4)
Example #4
0
 def run_one_missing_prob(missing_prob, dataset, seed):
     np.random.seed(seed)
     info_dict = {
         'missing_probability': missing_prob,
     }
     dataset_reader = MissingDataRawDatasetReader(dataset,
                                                  input_dict=info_dict)
     subjective_model = model_class(dataset_reader)
     try:
         result = subjective_model.run_modeling(normalize_final=False)
     except ValueError as e:
         print 'Warning: {}, return result None'.format(e)
         result = None
     return dataset_reader, result
Example #5
0
    def test_observer_aware_subjective_model_missingdata(self):

        dataset = import_python_file(self.dataset_filepath)

        np.random.seed(0)
        info_dict = {
            'missing_probability': 0.1,
        }
        dataset_reader = MissingDataRawDatasetReader(dataset, input_dict=info_dict)
        subjective_model = MaximumLikelihoodEstimationModelReduced(dataset_reader)
        result = subjective_model.run_modeling()

        self.assertAlmostEquals(np.sum(result['observer_bias']), -0.18504017984241944, places=4)
        self.assertAlmostEquals(np.var(result['observer_bias']), 0.087350553292201705, places=4)

        self.assertAlmostEquals(np.sum(result['observer_inconsistency']), 15.520738471447299, places=4)
        self.assertAlmostEquals(np.var(result['observer_inconsistency']), 0.010940587327083341, places=4)

        self.assertAlmostEquals(np.sum(result['quality_scores']), 279.94975274863879, places=4)
        self.assertAlmostEquals(np.var(result['quality_scores']), 1.4325574378911554, places=4)

        np.random.seed(0)
        info_dict = {
            'missing_probability': 0.5,
        }
        dataset_reader = MissingDataRawDatasetReader(dataset, input_dict=info_dict)
        subjective_model = MaximumLikelihoodEstimationModelReduced(dataset_reader)
        result = subjective_model.run_modeling()

        self.assertAlmostEquals(np.sum(result['observer_bias']), 0.057731868199093525, places=4)
        self.assertAlmostEquals(np.var(result['observer_bias']), 0.081341845650928557, places=4)

        self.assertAlmostEquals(np.sum(result['observer_inconsistency']), 14.996238224489693, places=4)
        self.assertAlmostEquals(np.var(result['observer_inconsistency']), 0.013666025579465165, places=4)

        self.assertAlmostEquals(np.sum(result['quality_scores']), 280.67100837103203, places=4)
        self.assertAlmostEquals(np.var(result['quality_scores']), 1.4637917512768972, places=4)
Example #6
0
def validate_with_synthetic_dataset(synthetic_dataset_reader_class,
                                    subjective_model_classes, dataset_filepath,
                                    synthetic_result, ax_dict, **more):
    ret = {}

    color_dict = more['color_dict'] if 'color_dict' in more else {}

    marker_dict = more['marker_dict'] if 'marker_dict' in more else {}

    output_synthetic_dataset_filepath = more['output_synthetic_dataset_filepath'] \
        if 'output_synthetic_dataset_filepath' in more else None

    missing_probability = more[
        'missing_probability'] if 'missing_probability' in more else None
    assert missing_probability is None or 0 <= missing_probability < 1

    measure_runtime = more[
        'measure_runtime'] if 'measure_runtime' in more else False
    assert isinstance(measure_runtime, bool)

    dataset = import_python_file(dataset_filepath)
    dataset_reader = synthetic_dataset_reader_class(
        dataset, input_dict=synthetic_result)

    if missing_probability is not None:
        dataset = dataset_reader.to_dataset()
        synthetic_result2 = copy.deepcopy(synthetic_result)
        synthetic_result2.update({'missing_probability': missing_probability})
        dataset_reader = MissingDataRawDatasetReader(
            dataset, input_dict=synthetic_result2)

    if output_synthetic_dataset_filepath is not None:
        dataset_reader.write_out_dataset(dataset_reader.to_dataset(),
                                         output_synthetic_dataset_filepath)
        ret['output_synthetic_dataset_filepath'] = output_synthetic_dataset_filepath

    subjective_models = map(
        lambda subjective_model_class: subjective_model_class(dataset_reader),
        subjective_model_classes)

    def run_modeling(subjective_model):
        if measure_runtime:
            with Timer() as t:
                ret = subjective_model.run_modeling(**more)
            ret['runtime'] = t.interval
        else:
            ret = subjective_model.run_modeling(**more)
        return ret

    results = list(
        map(lambda subjective_model: run_modeling(subjective_model),
            subjective_models))

    if ax_dict is None:
        ax_dict = dict()

    do_errorbar = more['do_errorbar'] if 'do_errorbar' in more else False
    assert isinstance(do_errorbar, bool)

    ret['results'] = dict()
    for subjective_model_class, result in zip(subjective_model_classes,
                                              results):
        ret['results'][subjective_model_class.TYPE] = result

    for ax in ax_dict.values():
        ax.set_xlabel('Synthetic')
        ax.set_ylabel('Recovered')
        ax.grid()

    for subjective_model_class, result, idx in zip(subjective_model_classes,
                                                   results,
                                                   range(len(results))):

        model_name = subjective_model_class.TYPE

        if 'quality_scores' in result and 'quality_scores' in synthetic_result and 'quality_scores_ci95' in result:
            ci_perc = get_ci_percentage(synthetic_result, result,
                                        'quality_scores',
                                        'quality_scores_ci95')
            ret['results'][model_name]['quality_scores_ci_perc'] = ci_perc

        if 'observer_bias' in result and 'observer_bias' in synthetic_result and 'observer_bias_ci95' in result:
            ci_perc = get_ci_percentage(synthetic_result, result,
                                        'observer_bias', 'observer_bias_ci95')
            ret['results'][model_name]['observer_bias_ci_perc'] = ci_perc

        if 'observer_inconsistency' in result and 'observer_inconsistency' in synthetic_result \
                and 'observer_inconsistency_ci95' in result:
            ci_perc = get_ci_percentage(synthetic_result, result,
                                        'observer_inconsistency',
                                        'observer_inconsistency_ci95')
            ret['results'][model_name][
                'observer_inconsistency_ci_perc'] = ci_perc

        if 'quality_scores' in ax_dict:
            ax = ax_dict['quality_scores']
            ax.set_title(r'Quality Score ($\psi_j$)')
            if 'quality_scores' in result and 'quality_scores' in synthetic_result:
                color = color_dict[
                    model_name] if model_name in color_dict else 'black'
                marker = marker_dict[
                    model_name] if model_name in marker_dict else '.'
                x = synthetic_result['quality_scores']
                y = result['quality_scores']
                if 'quality_scores_ci95' in result:
                    yerr = result['quality_scores_ci95']
                    ci_perc = get_ci_percentage(synthetic_result, result,
                                                'quality_scores',
                                                'quality_scores_ci95')
                else:
                    yerr = None
                    ci_perc = None
                if do_errorbar is True and 'quality_scores_ci95' in result:
                    ax.errorbar(
                        x,
                        y,
                        fmt='.',
                        yerr=yerr,
                        color=color,
                        capsize=2,
                        marker=marker,
                        label='{sm} (RMSE {rmse:.4f}, CI% {ci_perc:.1f})'.
                        format(
                            sm=model_name,
                            rmse=RmsePerfMetric(
                                x, y).evaluate(enable_mapping=False)['score'],
                            ci_perc=ci_perc,
                        ))
                else:
                    ax.scatter(x,
                               y,
                               color=color,
                               marker=marker,
                               label='{sm} (RMSE {rmse:.4f})'.format(
                                   sm=model_name,
                                   rmse=RmsePerfMetric(x, y).evaluate(
                                       enable_mapping=False)['score']))
                ax.legend()
                diag_line = np.arange(min(x), max(x), step=0.01)
                ax.plot(
                    diag_line,
                    diag_line,
                    '-',
                    color='gray',
                )

        if 'quality_scores_std' in ax_dict:
            ax = ax_dict['quality_scores_std']
            ax.set_title(r'Std of Quality Score ($\sigma(\psi_j)$)')
            if 'quality_scores_std' in result and 'quality_scores_std' in synthetic_result:
                color = color_dict[
                    model_name] if model_name in color_dict else 'black'
                marker = marker_dict[
                    model_name] if model_name in marker_dict else '.'
                x = synthetic_result['quality_scores_std']
                y = result['quality_scores_std']
                ax.scatter(x,
                           y,
                           color=color,
                           marker=marker,
                           label='{sm} (RMSE {rmse:.4f})'.format(
                               sm=model_name,
                               rmse=RmsePerfMetric(
                                   x,
                                   y).evaluate(enable_mapping=False)['score']))
                ax.legend()
                diag_line = np.arange(min(x), max(x), step=0.01)
                ax.plot(
                    diag_line,
                    diag_line,
                    '-',
                    color='gray',
                )

        if 'observer_bias' in ax_dict:
            ax = ax_dict['observer_bias']
            ax.set_title(r'Subject Bias ($\Delta_i$)')
            if 'observer_bias' in result and 'observer_bias' in synthetic_result:
                color = color_dict[
                    model_name] if model_name in color_dict else 'black'
                marker = marker_dict[
                    model_name] if model_name in marker_dict else '.'
                x = synthetic_result['observer_bias']
                y = result['observer_bias']
                if 'observer_bias_ci95' in result:
                    yerr = result['observer_bias_ci95']
                    ci_perc = get_ci_percentage(synthetic_result, result,
                                                'observer_bias',
                                                'observer_bias_ci95')
                else:
                    yerr = None
                    ci_perc = None
                min_xy = np.min([len(x), len(y)])
                x = x[:min_xy]
                y = y[:min_xy]
                if do_errorbar is True and 'observer_bias_ci95' in result:
                    ax.errorbar(
                        x,
                        y,
                        fmt='.',
                        yerr=yerr,
                        color=color,
                        capsize=2,
                        marker=marker,
                        label='{sm} (RMSE {rmse:.4f}, CI% {ci_perc:.1f})'.
                        format(
                            sm=model_name,
                            rmse=RmsePerfMetric(
                                x, y).evaluate(enable_mapping=False)['score'],
                            ci_perc=ci_perc,
                        ))
                else:
                    ax.scatter(x,
                               y,
                               color=color,
                               marker=marker,
                               label='{sm} (RMSE {rmse:.4f})'.format(
                                   sm=model_name,
                                   rmse=RmsePerfMetric(x, y).evaluate(
                                       enable_mapping=False)['score']))
                ax.legend()
                diag_line = np.arange(min(x), max(x), step=0.01)
                ax.plot(
                    diag_line,
                    diag_line,
                    '-',
                    color='gray',
                )

        if 'observer_inconsistency' in ax_dict:
            ax = ax_dict['observer_inconsistency']
            ax.set_title(r'Subject Inconsistency ($\upsilon_i$)')
            if 'observer_inconsistency' in result and 'observer_inconsistency' in synthetic_result:
                color = color_dict[
                    model_name] if model_name in color_dict else 'black'
                marker = marker_dict[
                    model_name] if model_name in marker_dict else '.'
                x = synthetic_result['observer_inconsistency']
                y = result['observer_inconsistency']
                if 'observer_inconsistency_ci95' in result:
                    yerr = np.array(result['observer_inconsistency_ci95'])
                    ci_perc = get_ci_percentage(synthetic_result, result,
                                                'observer_inconsistency',
                                                'observer_inconsistency_ci95')
                else:
                    yerr = None
                    ci_perc = None
                min_xy = np.min([len(x), len(y)])
                x = x[:min_xy]
                y = y[:min_xy]
                if do_errorbar is True and 'observer_inconsistency_ci95' in result:
                    ax.errorbar(
                        x,
                        y,
                        fmt='.',
                        yerr=yerr,
                        color=color,
                        capsize=2,
                        marker=marker,
                        label='{sm} (RMSE {rmse:.4f}, CI% {ci_perc:.1f})'.
                        format(
                            sm=model_name,
                            rmse=RmsePerfMetric(
                                x, y).evaluate(enable_mapping=False)['score'],
                            ci_perc=ci_perc,
                        ))
                else:
                    ax.scatter(x,
                               y,
                               color=color,
                               marker=marker,
                               label='{sm} (RMSE {rmse:.4f})'.format(
                                   sm=model_name,
                                   rmse=RmsePerfMetric(x, y).evaluate(
                                       enable_mapping=False)['score']))
                ax.legend()
                diag_line = np.arange(min(x), max(x), step=0.01)
                ax.plot(
                    diag_line,
                    diag_line,
                    '-',
                    color='gray',
                )

        if 'quality_ambiguity' in ax_dict:
            ax = ax_dict['quality_ambiguity']
            ax.set_title(r'Quality Ambiguity ($\phi_j$)')
            if 'quality_ambiguity' in result and 'quality_ambiguity' in synthetic_result:
                color = color_dict[
                    model_name] if model_name in color_dict else 'black'
                marker = marker_dict[
                    model_name] if model_name in marker_dict else '.'
                x = synthetic_result['quality_ambiguity']
                y = result['quality_ambiguity']
                min_xy = np.min([len(x), len(y)])
                x = x[:min_xy]
                y = y[:min_xy]
                ax.scatter(x,
                           y,
                           color=color,
                           marker=marker,
                           label='{sm} (RMSE {rmse:.4f})'.format(
                               sm=model_name,
                               rmse=RmsePerfMetric(
                                   x,
                                   y).evaluate(enable_mapping=False)['score']))
                ax.legend()
                diag_line = np.arange(min(x), max(x), step=0.01)
                ax.plot(
                    diag_line,
                    diag_line,
                    '-',
                    color='gray',
                )

    return ret