def get_irr_metrics(request, project_pk): """This function takes the current coded IRR and calculates several reliability metrics Args: request: The POST request project_pk: Primary key of the project Returns: {} """ # need to take the IRRLog and pull out exactly the max_labelers amount # of labels for each datum project = Project.objects.get(pk=project_pk) try: if project.num_users_irr > 2: kappa, perc_agreement = fleiss_kappa(project) else: kappa, perc_agreement = cohens_kappa(project) kappa = round(kappa, 3) perc_agreement = str(round(perc_agreement, 5) * 100) + "%" except ValueError: kappa = "No irr data processed" perc_agreement = "No irr data processed" return Response({'kappa': kappa, 'percent agreement': perc_agreement})
def test_cohens_kappa_perc_agreement_no_agreement( setup_celery, test_project_half_irr_data, test_half_irr_all_queues, test_profile, test_profile2, test_labels_half_irr, test_redis, tmpdir, settings, ): """This just tests the kappa and percent if nobody ever agreed.""" project = test_project_half_irr_data labels = test_labels_half_irr normal_queue, admin_queue, irr_queue = test_half_irr_all_queues fill_queue( normal_queue, "random", irr_queue, project.percentage_irr, project.batch_size ) # label 5 irr elements but disagree on all of them for i in range(5): datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[i % 3], datum, test_profile, 3) label_data(labels[(i + 1) % 3], datum, test_profile2, 3) kappa, perc = cohens_kappa(project) assert round(kappa, 3) == -0.471 assert perc == 0.0
def test_cohens_kappa_perc_agreement(setup_celery, test_project_half_irr_data, test_half_irr_all_queues, test_profile, test_profile2, test_labels_half_irr, test_redis, tmpdir, settings): ''' want to check several different configurations including empty, no agreement Should throw an error if no irr data processed yet ''' project = test_project_half_irr_data labels = test_labels_half_irr normal_queue, admin_queue, irr_queue = test_half_irr_all_queues fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr, project.batch_size) # check that before anything is labeled, an error is thrown with pytest.raises(ValueError) as excinfo: cohens_kappa(project) assert 'No irr data' in str(excinfo.value) # have two labelers label two datum the same. for i in range(2): datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[0], datum, test_profile, 3) label_data(labels[0], datum, test_profile2, 3) # kappa requires at least two labels be represented with pytest.raises(ValueError) as excinfo: cohens_kappa(project) assert 'Need at least two labels represented' in str(excinfo.value) datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[1], datum, test_profile, 3) label_data(labels[1], datum, test_profile2, 3) # Now kappa should be 1 kappa, perc = cohens_kappa(project) assert kappa == 1.0 assert perc == 1.0 # have two labelers disagree on two datum check the value datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[1], datum, test_profile, 3) label_data(labels[2], datum, test_profile2, 3) datum = assign_datum(test_profile, project, "irr") assign_datum(test_profile2, project, "irr") label_data(labels[0], datum, test_profile, 3) label_data(labels[1], datum, test_profile2, 3) kappa, perc = cohens_kappa(project) assert round(kappa, 3) == 0.333 assert perc == 0.6