예제 #1
0
 def test_prmse_sparse_matrix_array_as_input(self):
     human_scores = self.human_score_columns
     df_humans = self.data_sparse[human_scores].to_numpy()
     system = np.array(self.data_sparse['system'])
     expected_prmse_true = 0.538748
     prmse = prmse_true(system, df_humans)
     assert_almost_equal(prmse, expected_prmse_true, 7)
예제 #2
0
 def test_prmse_sparse_matrix_computed_ve(self):
     human_scores = self.human_score_columns
     df_humans = self.data_sparse[human_scores]
     system = self.data_sparse['system']
     expected_prmse_true = 0.538748
     prmse = prmse_true(system, df_humans)
     assert_almost_equal(prmse, expected_prmse_true, 7)
예제 #3
0
 def test_prmse_full_matrix_computed_ve(self):
     human_scores = self.human_score_columns
     df_humans = self.data_full[human_scores]
     system = self.data_full['system']
     expected_prmse_true = 0.5409673
     prmse = prmse_true(system, df_humans)
     assert_almost_equal(prmse, expected_prmse_true, 7)
예제 #4
0
 def test_prmse_sparse_matrix_given_ve(self):
     human_scores = self.human_score_columns
     df_humans = self.data_sparse[human_scores]
     system = self.data_sparse['system']
     variance_errors_human = 0.5150882
     expected_prmse_true = 0.538748
     prmse = prmse_true(system, df_humans, variance_errors_human)
     assert_almost_equal(prmse, expected_prmse_true, 7)
예제 #5
0
 def test_prmse_full_matrix_given_ve(self):
     human_scores = self.human_score_columns
     df_humans = self.data_full[human_scores]
     system = self.data_full['system']
     variance_errors_human = 0.509375
     expected_prmse_true = 0.5409673
     prmse = prmse_true(system, df_humans, variance_errors_human)
     assert_almost_equal(prmse, expected_prmse_true, 7)
예제 #6
0
def test_prmse_all_single_scored():
    # this test should raise a UserWarning
    system_scores = [1, 2, 3, 4, 5]
    sc1 = [1, 2, 3, None, None]
    sc2 = [None, None, None, 2, 3]
    df = pd.DataFrame({'sc1': sc1, 'sc2': sc2, 'system': system_scores})
    with warnings.catch_warnings(record=True) as warning_list:
        prmse = prmse_true(df['system'], df[['sc1', 'sc2']])
    ok_(prmse is None)
    assert issubclass(warning_list[-1].category, UserWarning)
def compute_prmse_one_system_multiple_rater_pairs(df_scores, system_id, rater_pairs):
    """
    Compute the PRMSE score for the system against all given rater pairs.

    This function computes the value of the PRMSE metric between
    the scores of the given system (``system_id``) against the scores
    assigned by the two simulated raters ``rater_id1`` and ``rater_id2``.

    Parameters
    ----------
    df_scores : pandas.DataFrame
        The data frame containing the simulated scores.
        This is usually one of the data frames returned
        by the ``simulation.dataset.Dataset.to_frame()``
        method.
    system_id : str
        The ID for the simulated system to be evaluated.
        This must be a column in ``df_scores``.
        Description
    rater_pairs : list of lists of str
        A list containing rater pairs against which
        the system is to be evaluated. Each rater
        pair is a list of rater ID, e.g.,
        ``[h_1, h_33]``.

    Returns
    -------
    prmse_values : list of float
        A list containing the values for the PRMSE metric
        for each of the given rater pairs.
    """
    # initialize a list that will hold the series
    prmse_for_all_pairs = []

    # iterate over each given rater pair
    for rater_id1, rater_id2 in rater_pairs:

        # call the per-pair function
        prmse_for_this_pair = prmse_true(df_scores[system_id],
                                         df_scores[[rater_id1, rater_id2]])
        # save the returned lists of series
        prmse_for_all_pairs.append(prmse_for_this_pair)

    return prmse_for_all_pairs
예제 #8
0
def test_prmse_single_human_ve_array_as_input():
    system_scores = np.array([1, 2, 5])
    human_scores = np.array([2, 3, 5])
    prmse = prmse_true(system_scores, human_scores, 0.5)
    eq_(prmse, 0.9090909090909091)
예제 #9
0
def test_prmse_single_human_ve():
    df = pd.DataFrame({'system': [1, 2, 5], 'sc1': [2, 3, 5]})
    prmse = prmse_true(df['system'], df['sc1'], 0.5)
    eq_(prmse, 0.9090909090909091)