def test_cohens_kappa_perc_agreement_no_agreement(
    setup_celery,
    test_project_half_irr_data,
    test_half_irr_all_queues,
    test_profile,
    test_profile2,
    test_labels_half_irr,
    test_redis,
    tmpdir,
    settings,
):
    """This just tests the kappa and percent if nobody ever agreed."""
    project = test_project_half_irr_data
    labels = test_labels_half_irr
    normal_queue, admin_queue, irr_queue = test_half_irr_all_queues
    fill_queue(
        normal_queue, "random", irr_queue, project.percentage_irr, project.batch_size
    )

    # label 5 irr elements but disagree on all of them
    for i in range(5):
        datum = assign_datum(test_profile, project, "irr")
        assign_datum(test_profile2, project, "irr")
        label_data(labels[i % 3], datum, test_profile, 3)
        label_data(labels[(i + 1) % 3], datum, test_profile2, 3)
    kappa, perc = cohens_kappa(project)
    assert round(kappa, 3) == -0.471
    assert perc == 0.0
예제 #2
0
def test_model_task_redis_no_dupes_data_left_in_queue(
        test_project_labeled_and_tfidf, test_queue_labeled,
        test_irr_queue_labeled, test_admin_queue_labeled, test_redis, tmpdir,
        settings):
    project = test_project_labeled_and_tfidf
    initial_training_set = project.get_current_training_set().set_number
    queue = project.queue_set.get(type="normal")
    queue.length = 40
    queue.save()

    irr_queue = project.queue_set.get(type="irr")
    irr_queue.length = 40
    irr_queue.save()

    model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles')
    settings.MODEL_PICKLE_PATH = str(model_path_temp)

    batch_size = project.batch_size
    fill_queue(queue,
               'random',
               irr_queue,
               irr_percent=project.percentage_irr,
               batch_size=batch_size)

    labels = project.labels.all()
    for i in range(int(batch_size * ((100 - project.percentage_irr) / 100))):
        datum = assign_datum(project.creator, project)
        label_data(random.choice(labels), datum, project.creator, 3)

    tasks.send_model_task.delay(project.pk).get()
    assert project.get_current_training_set(
    ).set_number == initial_training_set + 1
    redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1)
    assert len(redis_items) == len(set(redis_items))
예제 #3
0
def test_queue_refill(setup_celery, test_project_data, test_all_queues,
                      test_profile, test_labels, test_redis, tmpdir, settings):
    '''
    Check that the queues refill the way they should.
    Have one person label everything in a batch. Check that the queue refills but the irr queue now has twice the irr% * batch amount
    '''
    project = test_project_data
    normal_queue, admin_queue, irr_queue = test_all_queues
    fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr,
               project.batch_size)

    irr_count = math.ceil((project.percentage_irr / 100) * project.batch_size)
    non_irr_count = math.ceil(
        ((100 - project.percentage_irr) / 100) * project.batch_size)

    for i in range(non_irr_count):
        datum = assign_datum(test_profile, project, "normal")
        assert datum is not None
        label_data(test_labels[0], datum, test_profile, 3)
        check_and_trigger_model(datum, test_profile)
    for i in range(irr_count):
        datum = assign_datum(test_profile, project, "irr")
        assert datum is not None
        label_data(test_labels[0], datum, test_profile, 3)
        check_and_trigger_model(datum, test_profile)
    assert DataQueue.objects.filter(
        queue=normal_queue).count() == non_irr_count
    assert DataQueue.objects.filter(queue=irr_queue).count() == irr_count * 2
예제 #4
0
def test_unassign_after_fillqueue(db, test_profile, test_project_data,
                                  test_queue, test_labels, test_redis):
    fill_queue(test_queue, 'random')

    assert test_redis.llen('queue:' + str(test_queue.pk)) == test_queue.length
    assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length

    data = get_assignments(test_profile, test_project_data, 10)

    assert test_redis.llen('queue:' +
                           str(test_queue.pk)) == (test_queue.length - 10)
    assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length

    test_label = test_labels[0]
    for i in range(5):
        label_data(test_label, data[i], test_profile, 3)

    assert test_redis.llen('queue:' +
                           str(test_queue.pk)) == (test_queue.length - 10)
    assert test_redis.scard('set:' +
                            str(test_queue.pk)) == (test_queue.length - 5)

    fill_queue(test_queue, 'random')

    assert test_redis.llen('queue:' +
                           str(test_queue.pk)) == test_queue.length - 5
    assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length
예제 #5
0
def test_model_task_redis_no_dupes_data_unassign_assigned_data(
        test_project_labeled_and_tfidf, test_queue_labeled,
        test_irr_queue_labeled, test_admin_queue_labeled, test_redis, tmpdir,
        settings):
    project = test_project_labeled_and_tfidf
    person2 = create_profile('test_profilezzz', 'password',
                             '*****@*****.**')
    person3 = create_profile('test_profile2', 'password',
                             '*****@*****.**')
    ProjectPermissions.objects.create(profile=person2,
                                      project=project,
                                      permission='CODER')
    ProjectPermissions.objects.create(profile=person3,
                                      project=project,
                                      permission='CODER')
    initial_training_set = project.get_current_training_set().set_number
    queue = project.queue_set.get(type="normal")
    queue.length = 40
    queue.save()

    irr_queue = project.queue_set.get(type="irr")
    irr_queue.length = 40
    irr_queue.save()

    model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles')
    settings.MODEL_PICKLE_PATH = str(model_path_temp)

    batch_size = project.batch_size
    fill_queue(queue,
               'random',
               irr_queue,
               irr_percent=project.percentage_irr,
               batch_size=batch_size)

    labels = project.labels.all()
    assignments = get_assignments(project.creator, project, batch_size)
    for assignment in assignments:
        label_data(random.choice(labels), assignment, project.creator, 3)

    tasks.send_model_task.delay(project.pk).get()
    assert project.get_current_training_set(
    ).set_number == initial_training_set + 1
    redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1)
    assert len(redis_items) == len(set(redis_items))

    assignments = get_assignments(project.creator, project, 40)
    for assignment in assignments[:batch_size]:
        label_data(random.choice(labels), assignment, project.creator, 3)

    tasks.send_model_task.delay(project.pk).get()
    assert project.get_current_training_set(
    ).set_number == initial_training_set + 2
    redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1)
    assert len(redis_items) == len(set(redis_items))

    batch_unassign(project.creator)
    redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1)
    assert len(redis_items) == len(set(redis_items))
예제 #6
0
def test_skip_irr(
    setup_celery,
    test_project_half_irr_data,
    test_half_irr_all_queues,
    test_profile,
    test_profile2,
    test_profile3,
    test_labels_half_irr,
    test_redis,
    tmpdir,
    settings,
):
    """This tests the skip function, and see if the data is in the correct places."""
    project = test_project_half_irr_data
    normal_queue, admin_queue, irr_queue = test_half_irr_all_queues
    fill_queue(normal_queue, "random", irr_queue, project.percentage_irr,
               project.batch_size)
    # get an irr datum. One should exist.
    datum = assign_datum(test_profile, project, "irr")
    assert datum is not None

    # let one user skip an irr datum. It should not be in adminqueue, should be in irr queue,
    # should be in irrlog, should be in irr queue, not be in datalabel
    skip_data(datum, test_profile)
    assert DataQueue.objects.filter(data=datum, queue=admin_queue).count() == 0
    assert DataQueue.objects.filter(data=datum, queue=irr_queue).count() == 1
    assert IRRLog.objects.filter(data=datum, profile=test_profile).count() == 1
    assert DataLabel.objects.filter(data=datum,
                                    profile=test_profile).count() == 0

    # let the other user skip the data. It should be in admin queue,
    # IRRlog (twice), and nowhere else.
    datum2 = assign_datum(test_profile2, project, "irr")
    assert datum.pk == datum2.pk
    skip_data(datum2, test_profile2)
    assert DataQueue.objects.filter(data=datum, queue=admin_queue).count() == 1
    assert DataQueue.objects.filter(data=datum, queue=irr_queue).count() == 0
    assert IRRLog.objects.filter(data=datum).count() == 2
    assert DataLabel.objects.filter(data=datum).count() == 0

    # have two users label an IRR datum then have a third user skip it.
    # It should be in the IRRLog but not in admin queue or anywhere else.
    second_datum = assign_datum(test_profile, project, "irr")
    second_datum2 = assign_datum(test_profile2, project, "irr")
    assert second_datum.pk != datum.pk
    assert second_datum.pk == second_datum2.pk
    second_datum3 = assign_datum(test_profile3, project, "irr")
    assert second_datum2.pk == second_datum3.pk

    label_data(test_labels_half_irr[0], second_datum, test_profile, 3)
    label_data(test_labels_half_irr[0], second_datum2, test_profile2, 3)
    skip_data(second_datum3, test_profile3)
    assert DataQueue.objects.filter(data=second_datum3,
                                    queue=admin_queue).count() == 0
    assert DataQueue.objects.filter(data=second_datum3,
                                    queue=irr_queue).count() == 0
    assert IRRLog.objects.filter(data=second_datum3).count() == 3
    assert DataLabel.objects.filter(data=second_datum3).count() == 1
예제 #7
0
def test_annotate_irr(setup_celery, test_project_half_irr_data,
                      test_half_irr_all_queues, test_profile, test_profile2,
                      test_profile3, test_labels_half_irr, test_redis, tmpdir,
                      settings):
    '''
    This tests the irr labeling workflow, and checks that the data is in the correct models
    '''
    project = test_project_half_irr_data
    normal_queue, admin_queue, irr_queue = test_half_irr_all_queues
    fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr,
               project.batch_size)
    # get an irr datum. One should exist.
    datum = assign_datum(test_profile, project, "irr")
    assert datum is not None

    # let one user label a datum. It should be in DataLabel, not be in IRRLog,
    # still be in IRR Queue
    label_data(test_labels_half_irr[0], datum, test_profile, 3)
    assert DataLabel.objects.filter(data=datum,
                                    profile=test_profile).count() > 0
    assert IRRLog.objects.filter(data=datum, profile=test_profile).count() == 0
    assert DataQueue.objects.filter(data=datum, queue=irr_queue).count() > 0

    datum2 = assign_datum(test_profile2, project, "irr")
    assert datum.pk == datum2.pk

    datum3 = assign_datum(test_profile3, project, "irr")
    assert datum.pk == datum3.pk

    # let other user label the same datum. It should now be in datatable with
    # creater=profile, be in IRRLog (twice), not be in IRRQueue
    label_data(test_labels_half_irr[0], datum2, test_profile2, 3)
    assert DataLabel.objects.filter(data=datum2).count() == 1
    assert DataLabel.objects.get(data=datum2).profile.pk == project.creator.pk
    assert IRRLog.objects.filter(data=datum2).count() == 2
    assert DataQueue.objects.filter(data=datum2, queue=irr_queue).count() == 0

    # let a third user label the first data something else. It should be in
    # IRRLog but not overwrite the label from before
    label_data(test_labels_half_irr[0], datum3, test_profile3, 3)
    assert IRRLog.objects.filter(data=datum3).count() == 3
    assert DataLabel.objects.filter(data=datum3).count() == 1
    assert DataLabel.objects.get(data=datum3).profile.pk == project.creator.pk

    # let two users disagree on a datum. It should be in the admin queue,
    # not in irr queue, not in datalabel, in irrlog twice
    second_datum = assign_datum(test_profile, project, "irr")
    # should be a new datum
    assert datum.pk != second_datum.pk
    second_datum2 = assign_datum(test_profile2, project, "irr")
    label_data(test_labels_half_irr[0], second_datum, test_profile, 3)
    label_data(test_labels_half_irr[1], second_datum2, test_profile2, 3)
    assert DataQueue.objects.filter(data=second_datum2,
                                    queue=admin_queue).count() == 1
    assert DataQueue.objects.filter(data=second_datum2,
                                    queue=irr_queue).count() == 0
    assert DataLabel.objects.filter(data=second_datum2).count() == 0
    assert IRRLog.objects.filter(data=second_datum2).count() == 2
def test_g_naivebayes_classifier(
    setup_celery,
    test_project_gnb_data_tfidf,
    test_gnb_labels,
    test_gnb_queue_list,
    test_profile,
    test_redis,
    tmpdir,
    settings,
):
    """This tests that a project with the Gaussian Naiive Bayes classifier can
    successfully train and give predictions for a model."""
    normal_queue, admin_queue, irr_queue = test_gnb_queue_list
    labels = test_gnb_labels
    project = test_project_gnb_data_tfidf

    active_l = project.learning_method
    batch_size = project.batch_size
    initial_training_set = project.get_current_training_set()
    model_path_temp = tmpdir.listdir()[0].mkdir("model_pickles")
    settings.MODEL_PICKLE_PATH = str(model_path_temp)

    assert project.classifier == "gnb"
    assert active_l == "least confident"

    fill_queue(normal_queue, "random")

    assert DataQueue.objects.filter(queue=normal_queue).count() == batch_size

    for i in range(batch_size):
        datum = assign_datum(test_profile, project)
        label_data(labels[i % 3], datum, test_profile, 3)

    ret_str = check_and_trigger_model(datum)
    assert ret_str == "model running"

    # Assert model created and saved
    assert_obj_exists(Model, {"project": project})
    model = Model.objects.get(project=project)
    assert os.path.isfile(model.pickle_path)
    assert model.pickle_path == os.path.join(
        str(model_path_temp),
        "project_"
        + str(project.pk)
        + "_training_"
        + str(initial_training_set.set_number)
        + ".pkl",
    )

    # Assert predictions created
    predictions = DataPrediction.objects.filter(data__project=project)
    assert (
        len(predictions)
        == Data.objects.filter(project=project, labelers=None).count()
        * project.labels.count()
    )
예제 #9
0
def label_project(project, profile, num_labels):
    labels = project.labels.all()

    current_training_set = project.get_current_training_set()

    assignments = get_assignments(profile, project, num_labels)
    for i in range(min(len(labels), len(assignments))):
        label_data(labels[i], assignments[i], profile, random.randint(0, 25))
    for assignment in assignments[len(labels):]:
        label_data(random.choice(labels), assignment, profile, random.randint(0, 25))

    task_num = tasks.send_model_task.apply(args=[project.pk])
    current_training_set.celery_task_id = task_num
    current_training_set.save()
예제 #10
0
def test_randomforest_classifier(setup_celery,
                                 test_project_randomforest_data_tfidf,
                                 test_randomforest_labels,
                                 test_randomforest_queue_list, test_profile,
                                 test_redis, tmpdir, settings):
    '''
    This tests that a project with the random forest classifier can successfully train
    and give predictions for a model
    '''
    normal_queue, admin_queue, irr_queue = test_randomforest_queue_list
    labels = test_randomforest_labels
    project = test_project_randomforest_data_tfidf

    active_l = project.learning_method
    batch_size = project.batch_size
    initial_training_set = project.get_current_training_set()
    model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles')
    settings.MODEL_PICKLE_PATH = str(model_path_temp)

    assert project.classifier == "random forest"
    assert active_l == 'least confident'

    fill_queue(normal_queue, 'random')

    assert DataQueue.objects.filter(queue=normal_queue).count() == batch_size

    for i in range(batch_size):
        datum = assign_datum(test_profile, project)
        label_data(labels[i % 3], datum, test_profile, 3)

    ret_str = check_and_trigger_model(datum)
    assert ret_str == 'model running'

    # Assert model created and saved
    assert_obj_exists(Model, {'project': project})
    model = Model.objects.get(project=project)
    assert os.path.isfile(model.pickle_path)
    assert model.pickle_path == os.path.join(
        str(model_path_temp), 'project_' + str(project.pk) + '_training_' +
        str(initial_training_set.set_number) + '.pkl')

    # Assert predictions created
    predictions = DataPrediction.objects.filter(data__project=project)
    assert len(predictions) == Data.objects.filter(
        project=project, labelers=None).count() * project.labels.count()
예제 #11
0
def annotate_data(request, data_pk):
    """Annotate a single datum which is in the assigneddata queue given the user,
       data_id, and label_id.  This will remove it from assigneddata, remove it
       from dataqueue and add it to labeleddata.  Also check if project is ready
       to have model run, if so start that process.

    Args:
        request: The POST request
        data_pk: Primary key of the data
    Returns:
        {}
    """
    data = Data.objects.get(pk=data_pk)
    project = data.project
    profile = request.user.profile
    response = {}
    label = Label.objects.get(pk=request.data['labelID'])
    labeling_time = request.data['labeling_time']

    num_history = IRRLog.objects.filter(data=data).count()

    if RecycleBin.objects.filter(data=data).count() > 0:
        # this data is no longer in use. delete it
        assignment = AssignedData.objects.get(data=data, profile=profile)
        assignment.delete()
    elif num_history >= project.num_users_irr:
        # if the IRR history has more than the needed number of labels , it is
        # already processed so just add this label to the history.
        IRRLog.objects.create(data=data,
                              profile=profile,
                              label=label,
                              timestamp=timezone.now())
        assignment = AssignedData.objects.get(data=data, profile=profile)
        assignment.delete()
    else:
        label_data(label, data, profile, labeling_time)
        if data.irr_ind:
            # if it is reliability data, run processing step
            process_irr_label(data, label)

    # for all data, check if we need to refill queue
    check_and_trigger_model(data, profile)

    return Response(response)
def test_check_and_trigger_model_first_labeled(
    setup_celery, test_project_data, test_labels, test_queue, test_profile
):
    initial_training_set = test_project_data.get_current_training_set()

    fill_queue(test_queue, orderby="random")

    datum = assign_datum(test_profile, test_queue.project)
    test_label = test_labels[0]
    label_data(test_label, datum, test_profile, 3)

    check = check_and_trigger_model(datum)
    assert check == "no trigger"

    assert test_project_data.get_current_training_set() == initial_training_set
    assert test_project_data.model_set.count() == 0
    assert DataPrediction.objects.filter(data__project=test_project_data).count() == 0
    assert DataUncertainty.objects.filter(data__project=test_project_data).count() == 0
    assert DataQueue.objects.filter(queue=test_queue).count() == TEST_QUEUE_LEN - 1
예제 #13
0
def test_label_data(db, test_profile, test_queue, test_redis):
    fill_queue(test_queue, orderby='random')

    datum = assign_datum(test_profile, test_queue.project)
    test_label = Label.objects.create(name='test', project=test_queue.project)
    label_data(test_label, datum, test_profile, 3)

    # Make sure the label was properly recorded
    assert datum in test_profile.labeled_data.all()
    assert_obj_exists(
        DataLabel, {
            'data': datum,
            'profile': test_profile,
            'label': test_label,
            'time_to_label': 3
        })

    # Make sure the assignment was removed
    assert not AssignedData.objects.filter(
        profile=test_profile, data=datum, queue=test_queue).exists()
def test_label_data(db, test_profile, test_queue, test_redis):
    fill_queue(test_queue, orderby="random")

    datum = assign_datum(test_profile, test_queue.project)
    test_label = Label.objects.create(name="test", project=test_queue.project)
    label_data(test_label, datum, test_profile, 3)

    # Make sure the label was properly recorded
    assert datum in test_profile.labeled_data.all()
    assert_obj_exists(
        DataLabel,
        {
            "data": datum,
            "profile": test_profile,
            "label": test_label,
            "time_to_label": 3,
        },
    )

    # Make sure the assignment was removed
    assert not AssignedData.objects.filter(
        profile=test_profile, data=datum, queue=test_queue).exists()
예제 #15
0
def test_fleiss_kappa_perc_agreement(
        setup_celery, test_project_all_irr_3_coders_data,
        test_all_irr_3_coders_all_queues, test_profile, test_profile2,
        test_profile3, test_labels_all_irr_3_coders, test_redis, tmpdir,
        settings):
    '''
    This tests the results of the Fleiss's kappa function when fed different situations
    '''
    project = test_project_all_irr_3_coders_data
    labels = test_labels_all_irr_3_coders
    normal_queue, admin_queue, irr_queue = test_all_irr_3_coders_all_queues
    fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr,
               project.batch_size)

    # first check that an error is thrown if there is no data
    with pytest.raises(ValueError) as excinfo:
        fleiss_kappa(project)
    assert 'No irr data' in str(excinfo.value)

    # next, check that the same error happens if only two have labeled it
    datum = assign_datum(test_profile, project, "irr")
    assign_datum(test_profile2, project, "irr")
    assign_datum(test_profile3, project, "irr")
    label_data(labels[0], datum, test_profile, 3)
    label_data(labels[1], datum, test_profile2, 3)

    with pytest.raises(ValueError) as excinfo:
        fleiss_kappa(project)
    assert 'No irr data' in str(excinfo.value)

    # have everyone label a datum differenty
    # [1 1 1], kappa = -0.5, pa = 0
    label_data(labels[2], datum, test_profile3, 3)
    kappa, perc = fleiss_kappa(project)
    assert round(kappa, 1) == -0.5
    assert perc == 0.0

    # have only two people label a datum the same and check that kappa is the same
    datum = assign_datum(test_profile, project, "irr")
    assign_datum(test_profile2, project, "irr")
    assign_datum(test_profile3, project, "irr")
    label_data(labels[0], datum, test_profile, 3)
    label_data(labels[0], datum, test_profile2, 3)

    kappa, perc = fleiss_kappa(project)
    assert round(kappa, 1) == -0.5
    assert perc == 0.0

    # have last person label datum the same
    # [[1 1 1],[3 0 0]], kappa = 0.0, pa = 0.5
    label_data(labels[0], datum, test_profile3, 3)

    kappa, perc = fleiss_kappa(project)
    assert round(kappa, 2) == 0.0
    assert perc == 0.5

    # have two people agree and one disagree
    # [[1 1 1],[3 0 0],[2 1 0]], kappa = -0.13, pa=0.333
    datum = assign_datum(test_profile, project, "irr")
    assign_datum(test_profile2, project, "irr")
    assign_datum(test_profile3, project, "irr")
    label_data(labels[0], datum, test_profile, 3)
    label_data(labels[0], datum, test_profile2, 3)
    label_data(labels[1], datum, test_profile3, 3)

    kappa, perc = fleiss_kappa(project)
    assert round(kappa, 2) == -0.13
    assert round(perc, 2) == 0.33

    # repeat previous step with slight variation
    # [[1 1 1],[3 0 0],[2 1 0],[1 2 0]], kappa = -0.08, pa=0.25
    datum = assign_datum(test_profile, project, "irr")
    assign_datum(test_profile2, project, "irr")
    assign_datum(test_profile3, project, "irr")
    label_data(labels[0], datum, test_profile, 3)
    label_data(labels[1], datum, test_profile2, 3)
    label_data(labels[1], datum, test_profile3, 3)

    kappa, perc = fleiss_kappa(project)
    assert round(kappa, 2) == -0.08
    assert round(perc, 2) == 0.25
예제 #16
0
def test_cohens_kappa_perc_agreement(setup_celery, test_project_half_irr_data,
                                     test_half_irr_all_queues, test_profile,
                                     test_profile2, test_labels_half_irr,
                                     test_redis, tmpdir, settings):
    '''
    want to check several different configurations including empty, no agreement
    Should throw an error if no irr data processed yet
    '''
    project = test_project_half_irr_data
    labels = test_labels_half_irr
    normal_queue, admin_queue, irr_queue = test_half_irr_all_queues
    fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr,
               project.batch_size)

    # check that before anything is labeled, an error is thrown
    with pytest.raises(ValueError) as excinfo:
        cohens_kappa(project)
    assert 'No irr data' in str(excinfo.value)

    # have two labelers label two datum the same.
    for i in range(2):
        datum = assign_datum(test_profile, project, "irr")
        assign_datum(test_profile2, project, "irr")
        label_data(labels[0], datum, test_profile, 3)
        label_data(labels[0], datum, test_profile2, 3)

    # kappa requires at least two labels be represented
    with pytest.raises(ValueError) as excinfo:
        cohens_kappa(project)
    assert 'Need at least two labels represented' in str(excinfo.value)

    datum = assign_datum(test_profile, project, "irr")
    assign_datum(test_profile2, project, "irr")
    label_data(labels[1], datum, test_profile, 3)
    label_data(labels[1], datum, test_profile2, 3)

    # Now kappa should be 1
    kappa, perc = cohens_kappa(project)
    assert kappa == 1.0
    assert perc == 1.0

    # have two labelers disagree on two datum check the value
    datum = assign_datum(test_profile, project, "irr")
    assign_datum(test_profile2, project, "irr")
    label_data(labels[1], datum, test_profile, 3)
    label_data(labels[2], datum, test_profile2, 3)

    datum = assign_datum(test_profile, project, "irr")
    assign_datum(test_profile2, project, "irr")
    label_data(labels[0], datum, test_profile, 3)
    label_data(labels[1], datum, test_profile2, 3)

    kappa, perc = cohens_kappa(project)
    assert round(kappa, 3) == 0.333
    assert perc == 0.6
예제 #17
0
def test_percent_agreement_table(setup_celery,
                                 test_project_all_irr_3_coders_data,
                                 test_all_irr_3_coders_all_queues,
                                 test_profile, test_profile2, test_profile3,
                                 test_labels_all_irr_3_coders, test_redis,
                                 tmpdir, settings):
    '''
    This tests the percent agreement table
    '''
    project = test_project_all_irr_3_coders_data
    ProjectPermissions.objects.create(profile=test_profile2,
                                      project=project,
                                      permission='CODER')
    ProjectPermissions.objects.create(profile=test_profile3,
                                      project=project,
                                      permission='CODER')
    labels = test_labels_all_irr_3_coders
    normal_queue, admin_queue, irr_queue = test_all_irr_3_coders_all_queues
    fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr,
               project.batch_size)

    table_data_perc = pd.DataFrame(
        perc_agreement_table_data(project))["Percent Agreement"].tolist()
    # first test that it has "No Samples" for the percent for all
    assert len(table_data_perc) == 3
    assert (table_data_perc[0]
            == "No samples") and (table_data_perc[1]
                                  == "No samples") and (table_data_perc[2]
                                                        == "No samples")

    # First have everyone give same label, should be 100% for all
    datum = assign_datum(test_profile, project, "irr")
    assign_datum(test_profile2, project, "irr")
    assign_datum(test_profile3, project, "irr")
    label_data(labels[0], datum, test_profile, 3)
    label_data(labels[0], datum, test_profile2, 3)
    label_data(labels[0], datum, test_profile3, 3)

    table_data_perc = pd.DataFrame(
        perc_agreement_table_data(project))["Percent Agreement"].tolist()
    assert (table_data_perc[0]
            == "100.0%") and (table_data_perc[1]
                              == "100.0%") and (table_data_perc[2] == "100.0%")

    # Next have user1 = user2 != user3, Check values
    datum = assign_datum(test_profile, project, "irr")
    assign_datum(test_profile2, project, "irr")
    assign_datum(test_profile3, project, "irr")
    label_data(labels[0], datum, test_profile, 3)
    label_data(labels[0], datum, test_profile2, 3)
    label_data(labels[1], datum, test_profile3, 3)

    table_data_perc = pd.DataFrame(
        perc_agreement_table_data(project))["Percent Agreement"].tolist()
    # goes in the order [prof2,prof3], [prof2, prof], [prof3, prof]
    assert (table_data_perc[0]
            == "50.0%") and (table_data_perc[1]
                             == "100.0%") and (table_data_perc[2] == "50.0%")

    # Next have all users skip. Should count as disagreement.
    datum = assign_datum(test_profile, project, "irr")
    assign_datum(test_profile2, project, "irr")
    assign_datum(test_profile3, project, "irr")
    skip_data(datum, test_profile)
    skip_data(datum, test_profile2)
    skip_data(datum, test_profile3)

    table_data_perc = pd.DataFrame(
        perc_agreement_table_data(project))["Percent Agreement"].tolist()
    # goes in the order [prof2,prof3], [prof2, prof], [prof3, prof]
    assert (table_data_perc[0]
            == "33.3%") and (table_data_perc[1]
                             == "66.7%") and (table_data_perc[2] == "33.3%")

    # Lastly have two users label. Should be the same as before
    datum = assign_datum(test_profile, project, "irr")
    assign_datum(test_profile2, project, "irr")
    assign_datum(test_profile3, project, "irr")
    label_data(labels[0], datum, test_profile, 3)
    label_data(labels[0], datum, test_profile2, 3)
    table_data_perc = pd.DataFrame(
        perc_agreement_table_data(project))["Percent Agreement"].tolist()
    # goes in the order [prof2,prof3], [prof2, prof], [prof3, prof]
    assert (table_data_perc[0]
            == "33.3%") and (table_data_perc[1]
                             == "66.7%") and (table_data_perc[2] == "33.3%")
예제 #18
0
def test_heatmap_data(setup_celery, test_project_half_irr_data,
                      test_half_irr_all_queues, test_profile, test_profile2,
                      test_labels_half_irr, test_redis, tmpdir, settings):
    '''
    These tests check that the heatmap accurately reflects the data
    '''
    project = test_project_half_irr_data
    ProjectPermissions.objects.create(profile=test_profile,
                                      project=project,
                                      permission='CODER')
    ProjectPermissions.objects.create(profile=test_profile2,
                                      project=project,
                                      permission='CODER')
    labels = test_labels_half_irr
    normal_queue, admin_queue, irr_queue = test_half_irr_all_queues
    fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr,
               project.batch_size)

    combo1 = str(test_profile.pk) + "_" + str(test_profile2.pk)

    same1 = str(test_profile.pk) + "_" + str(test_profile.pk)
    same2 = str(test_profile2.pk) + "_" + str(test_profile2.pk)

    # don't label anything. The heatmap shoud have all zeros for user pair
    heatmap = irr_heatmap_data(project)
    assert combo1 in heatmap
    heatmap = heatmap[combo1]

    counts = pd.DataFrame(heatmap)["count"].tolist()
    assert np.all(np.equal(counts, [0] * len(counts)))

    # have one user skip 3 things and another label them.
    for i in range(3):
        datum = assign_datum(test_profile, project, "irr")
        assign_datum(test_profile2, project, "irr")
        label_data(labels[i], datum, test_profile, 3)
        skip_data(datum, test_profile2)

    # check that user1-user1 map is I3
    heatmap = irr_heatmap_data(project)
    same_frame = pd.DataFrame(heatmap[same1])
    assert same_frame.loc[(same_frame["label1"] == labels[0].name) & (
        same_frame["label2"] == labels[0].name)]["count"].tolist()[0] == 1
    assert same_frame.loc[(same_frame["label1"] == labels[1].name) & (
        same_frame["label2"] == labels[1].name)]["count"].tolist()[0] == 1
    assert same_frame.loc[(same_frame["label1"] == labels[2].name) & (
        same_frame["label2"] == labels[2].name)]["count"].tolist()[0] == 1
    assert np.sum(same_frame["count"].tolist()) == 3

    # check the second user only has 3 in the skip-skip spot
    same_frame2 = pd.DataFrame(heatmap[same2])
    assert same_frame2.loc[(same_frame2["label1"] == "Skip") & (
        same_frame["label2"] == "Skip")]["count"].tolist()[0] == 3
    assert np.sum(same_frame2["count"].tolist()) == 3

    # check that the between-user heatmap has skip-label = 1 for each label
    heatmap = irr_heatmap_data(project)
    heatmap = pd.DataFrame(heatmap[combo1])
    assert heatmap.loc[(heatmap["label1"] == labels[0].name) &
                       (heatmap["label2"] == "Skip")]["count"].tolist()[0] == 1
    assert heatmap.loc[(heatmap["label1"] == labels[1].name) &
                       (heatmap["label2"] == "Skip")]["count"].tolist()[0] == 1
    assert heatmap.loc[(heatmap["label1"] == labels[2].name) &
                       (heatmap["label2"] == "Skip")]["count"].tolist()[0] == 1

    assert np.sum(heatmap["count"].tolist()) == 3

    # have users agree on 5 labels and datums, check heatmap
    for i in range(5):
        datum = assign_datum(test_profile, project, "irr")
        assign_datum(test_profile2, project, "irr")
        label_data(labels[i % 3], datum, test_profile, 3)
        label_data(labels[i % 3], datum, test_profile2, 3)

    heatmap = irr_heatmap_data(project)
    heatmap = pd.DataFrame(heatmap[combo1])

    assert heatmap.loc[(heatmap["label1"] == labels[0].name) & (
        heatmap["label2"] == labels[0].name)]["count"].tolist()[0] == 2
    assert heatmap.loc[(heatmap["label1"] == labels[1].name) & (
        heatmap["label2"] == labels[1].name)]["count"].tolist()[0] == 2
    assert heatmap.loc[(heatmap["label1"] == labels[2].name) & (
        heatmap["label2"] == labels[2].name)]["count"].tolist()[0] == 1
    assert np.sum(heatmap["count"].tolist()) == 8

    # have one user label something, show the heatmap hasn't changed
    datum = assign_datum(test_profile, project, "irr")
    label_data(labels[0], datum, test_profile, 3)
    heatmap = irr_heatmap_data(project)
    same_map = heatmap[same1]
    assert np.sum(pd.DataFrame(same_map)["count"].tolist()) == 8
    heatmap = pd.DataFrame(heatmap[combo1])
    assert np.sum(pd.DataFrame(heatmap)["count"].tolist()) == 8
예제 #19
0
def test_all_irr(setup_celery, test_project_all_irr_3_coders_data,
                 test_all_irr_3_coders_all_queues, test_profile, test_profile2,
                 test_profile3, test_labels_all_irr_3_coders, test_redis,
                 tmpdir, settings):
    '''
    This tests the case with 100% IRR and triple labeling required
    '''
    project = test_project_all_irr_3_coders_data
    labels = test_labels_all_irr_3_coders
    normal_queue, admin_queue, irr_queue = test_all_irr_3_coders_all_queues
    fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr,
               project.batch_size)

    # check the normal queue is empty and the irr queue is full
    assert DataQueue.objects.filter(
        queue=irr_queue).count() == project.batch_size
    assert DataQueue.objects.filter(queue=normal_queue).count() == 0

    # check everything in the irr queue has irr_ind = true
    assert DataQueue.objects.filter(
        queue=irr_queue, data__irr_ind=True).count() == project.batch_size

    # have one person label three datum and check that they are still in the queue
    datum = assign_datum(test_profile, project, "irr")
    second_datum = assign_datum(test_profile, project, "irr")
    third_datum = assign_datum(test_profile, project, "irr")
    assert datum.pk != second_datum.pk
    assert third_datum.pk != second_datum.pk

    label_data(labels[0], datum, test_profile, 3)
    label_data(labels[0], second_datum, test_profile, 3)
    label_data(labels[0], third_datum, test_profile, 3)

    assert DataQueue.objects.filter(
        queue=irr_queue, data__in=[datum, second_datum,
                                   third_datum]).count() == 3

    # have one person skip all three datum, and check that they are still in the irr queue, in irrlog, and in datalabel, but not in admin queue
    datum2 = assign_datum(test_profile2, project, "irr")
    second_datum2 = assign_datum(test_profile2, project, "irr")
    third_datum2 = assign_datum(test_profile2, project, "irr")

    assert datum.pk == datum2.pk
    assert second_datum.pk == second_datum2.pk
    assert third_datum.pk == third_datum2.pk

    skip_data(datum2, test_profile2)
    skip_data(second_datum2, test_profile2)
    skip_data(third_datum2, test_profile2)

    assert DataQueue.objects.filter(
        data__in=[datum2, second_datum2,
                  third_datum2], queue=irr_queue).count() == 3
    assert DataQueue.objects.filter(
        data__in=[datum2, second_datum2,
                  third_datum2], queue=admin_queue).count() == 0
    assert IRRLog.objects.filter(
        data__in=[datum2, second_datum2, third_datum2]).count() == 3
    assert DataLabel.objects.filter(
        data__in=[datum2, second_datum2, third_datum2]).count() == 3

    # have the third person label all three datum and check that they are in the log and admin queue, but not in irr queue or datalabel
    datum3 = assign_datum(test_profile3, project, "irr")
    second_datum3 = assign_datum(test_profile3, project, "irr")
    third_datum3 = assign_datum(test_profile3, project, "irr")

    assert datum.pk == datum3.pk
    assert second_datum.pk == second_datum3.pk
    assert third_datum.pk == third_datum3.pk

    label_data(labels[0], datum3, test_profile3, 3)
    label_data(labels[1], second_datum3, test_profile3, 3)
    label_data(labels[0], third_datum3, test_profile3, 3)

    assert DataQueue.objects.filter(
        data__in=[datum3, second_datum3,
                  third_datum3], queue=irr_queue).count() == 0
    assert DataQueue.objects.filter(
        data__in=[datum3, second_datum3,
                  third_datum3], queue=admin_queue).count() == 3
    assert IRRLog.objects.filter(
        data__in=[datum3, second_datum3, third_datum3]).count() == 9
    assert DataLabel.objects.filter(
        data__in=[datum3, second_datum3, third_datum3]).count() == 0