예제 #1
0
def test_dawid_skene_overlap(overlap):
    data = pd.DataFrame([{
        'task': task_id,
        'worker': perf_id,
        'label': 'yes' if (perf_id - task_id) % 3 else 'no',
    } for perf_id in range(overlap) for task_id in range(3)])

    ds = DawidSkene(20).fit(data)

    expected_probas = _make_probas([[task_id, 1 / 3., 2 / 3]
                                    for task_id in range(3)])
    expected_labels = _make_tasks_labels([[task_id, 'yes']
                                          for task_id in range(3)])

    # TODO: check errors_
    assert_frame_equal(expected_probas,
                       ds.probas_,
                       check_like=True,
                       atol=0.005)
    assert_series_equal(expected_labels, ds.labels_, atol=0.005)
    assert_series_equal(pd.Series({
        'no': 1 / 3,
        'yes': 2 / 3
    }, name='prior'),
                        ds.priors_,
                        atol=0.005)
예제 #2
0
def test_aggregate_ds_on_simple(n_iter, tol, simple_answers_df,
                                simple_ground_truth):
    np.random.seed(42)
    assert_series_equal(
        DawidSkene(n_iter=n_iter,
                   tol=tol).fit(simple_answers_df).labels_.sort_index(),
        simple_ground_truth.sort_index(),
    )
예제 #3
0
def test_aggregate_ds_on_toy_ysda(n_iter, tol, toy_answers_df,
                                  toy_ground_truth_df):
    np.random.seed(42)
    assert_series_equal(
        DawidSkene(n_iter=n_iter,
                   tol=tol).fit(toy_answers_df).labels_.sort_index(),
        toy_ground_truth_df.sort_index(),
    )
예제 #4
0
def test_dawid_skene_on_empty_input(request, data):
    ds = DawidSkene(10).fit(
        pd.DataFrame([], columns=['task', 'worker', 'label']))
    assert_frame_equal(pd.DataFrame(), ds.probas_, check_like=True, atol=0.005)
    assert_frame_equal(pd.DataFrame(), ds.errors_, check_like=True, atol=0.005)
    assert_series_equal(pd.Series(dtype=float, name='prior'),
                        ds.priors_,
                        atol=0.005)
    assert_series_equal(pd.Series(dtype=float, name='agg_label'),
                        ds.labels_,
                        atol=0.005)
예제 #5
0
def test_dawid_skene_step_by_step(request, data, n_iter):
    probas = request.getfixturevalue(f'probas_iter_{n_iter}')
    labels = request.getfixturevalue(f'tasks_labels_iter_{n_iter}')
    errors = request.getfixturevalue(f'errors_iter_{n_iter}')
    priors = request.getfixturevalue(f'priors_iter_{n_iter}')

    ds = DawidSkene(n_iter).fit(data)
    assert_frame_equal(probas, ds.probas_, check_like=True, atol=0.005)
    assert_frame_equal(errors, ds.errors_, check_like=True, atol=0.005)
    assert_series_equal(priors, ds.priors_, atol=0.005)
    assert_series_equal(labels, ds.labels_, atol=0.005)
예제 #6
0
def test_on_bool_labels(data_with_bool_labels, bool_labels_ground_truth):
    ds = DawidSkene(20).fit(data_with_bool_labels)
    assert_series_equal(bool_labels_ground_truth, ds.labels_, atol=0.005)
예제 #7
0
 def track_accuracy_dawid_skene(self):
     return self._calc_accuracy(
         DawidSkene(n_iter=5).fit_predict(self.crowd_labels))
예제 #8
0
 def peakmem_dawid_skene(self):
     DawidSkene(n_iter=5).fit_predict(self.crowd_labels)