def test_unassign(db, test_profile, test_project_data, test_queue, test_redis): fill_queue(test_queue, orderby='random') assert test_redis.llen('queue:' + str(test_queue.pk)) == test_queue.length assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length datum = get_assignments(test_profile, test_project_data, 1)[0] assert test_redis.llen('queue:' + str(test_queue.pk)) == (test_queue.length - 1) assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length assert AssignedData.objects.filter(data=datum, profile=test_profile).exists() unassign_datum(datum, test_profile) assert test_redis.llen('queue:' + str(test_queue.pk)) == test_queue.length assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length assert not AssignedData.objects.filter(data=datum, profile=test_profile).exists() # The unassigned datum should be the next to be assigned reassigned_datum = get_assignments(test_profile, test_project_data, 1)[0] assert reassigned_datum == datum
def test_percent_agree_table(seeded_database, client, admin_client, test_project_all_irr_data, test_all_irr_all_queues, test_labels_all_irr): ''' This tests that the percent agree table can be called and returns correctly. Note: the exact values of the table are checked in the util tests. ''' labels = test_labels_all_irr normal_queue, admin_queue, irr_queue = test_all_irr_all_queues project = test_project_all_irr_data client.login(username=SEED_USERNAME, password=SEED_PASSWORD) client_profile = Profile.objects.get(user__username=SEED_USERNAME) ProjectPermissions.objects.create(profile=client_profile, project=project, permission='CODER') admin_client.login(username=SEED_USERNAME2, password=SEED_PASSWORD2) admin_profile = Profile.objects.get(user__username=SEED_USERNAME2) ProjectPermissions.objects.create(profile=admin_profile, project=project, permission='ADMIN') third_profile = Profile.objects.get(user__username="******") fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr, project.batch_size) # non-admin should not be able to call the test response = client.get('/api/perc_agree_table/' + str(project.pk) + '/') assert 403 == response.status_code and "Invalid permission. Must be an admin" in str( response.content) data = get_assignments(client_profile, project, 15) data2 = get_assignments(admin_profile, project, 15) for i in range(15): response = admin_client.post('/api/annotate_data/' + str(data[i].pk) + '/', { "labelID": labels[i % 3].pk, "labeling_time": 3 }) assert 'error' not in response.json() and 'detail' not in response.json() response = client.post('/api/annotate_data/' + str(data2[i].pk) + '/', { "labelID": labels[i % 3].pk, "labeling_time": 3 }) assert 'error' not in response.json() and 'detail' not in response.json() # check that the three user pairs are in table response = admin_client.get('/api/perc_agree_table/' + str(project.pk) + '/').json() assert 'data' in response response_frame = pd.DataFrame(response['data']) # should have combination [adm, cl] [adm, u3], [cl, u3] assert response_frame['First Coder'].tolist() == [SEED_USERNAME, SEED_USERNAME, SEED_USERNAME2] assert response_frame['Second Coder'].tolist( ) == [SEED_USERNAME2, str(third_profile), str(third_profile)] # check that the table has just those three combinations assert len(response_frame) == 3 # should have "no samples" for combos with user3 assert response_frame.loc[response_frame['Second Coder'] == str( third_profile)]["Percent Agreement"].tolist() == ["No samples", "No samples"] # check that the percent agreement matches n%, n between 0 and 100 perc = response_frame["Percent Agreement"].tolist()[0] assert float(perc[:len(perc) - 1]) <= 100 and float(perc[:len(perc) - 1]) >= 0
def test_restore_data( seeded_database, client, admin_client, test_project_data, test_queue, test_irr_queue, test_labels, test_admin_queue, ): """This tests that data can be restored after it is discarded.""" project = test_project_data fill_queue( test_queue, "random", test_irr_queue, project.percentage_irr, project.batch_size ) admin_client.login(username=SEED_USERNAME2, password=SEED_PASSWORD2) admin_profile = Profile.objects.get(user__username=SEED_USERNAME2) ProjectPermissions.objects.create( profile=admin_profile, project=project, permission="ADMIN" ) client.login(username=SEED_USERNAME, password=SEED_PASSWORD) client_profile = Profile.objects.get(user__username=SEED_USERNAME) ProjectPermissions.objects.create( profile=client_profile, project=project, permission="CODER" ) # assign a batch of data. Should be IRR and non-IRR data = get_assignments(client_profile, project, 30) for i in range(30): response = client.post("/api/skip_data/" + str(data[i].pk) + "/") # have the admin also get a batch and call skip on everything data = get_assignments(admin_profile, project, 30) for i in range(30): response = admin_client.post("/api/skip_data/" + str(data[i].pk) + "/") admin_data = DataQueue.objects.filter(data__project=project, queue=test_admin_queue) # discard all data for datum in admin_data: admin_client.post("/api/discard_data/" + str(datum.data.pk) + "/") # check for admin privalidges response = client.post( "/api/restore_data/" + str(admin_data[0].data.pk) + "/" ).json() assert ( "detail" in response and "Invalid permission. Must be an admin" in response["detail"] ) # restore all data. It should not be in recycle bin for datum in admin_data: admin_client.post("/api/restore_data/" + str(datum.data.pk) + "/") assert RecycleBin.objects.filter(data=datum.data).count() == 0 assert not Data.objects.get(pk=datum.data.pk).irr_ind
def test_model_task_redis_no_dupes_data_unassign_assigned_data( test_project_labeled_and_tfidf, test_queue_labeled, test_irr_queue_labeled, test_admin_queue_labeled, test_redis, tmpdir, settings): project = test_project_labeled_and_tfidf person2 = create_profile('test_profilezzz', 'password', '*****@*****.**') person3 = create_profile('test_profile2', 'password', '*****@*****.**') ProjectPermissions.objects.create(profile=person2, project=project, permission='CODER') ProjectPermissions.objects.create(profile=person3, project=project, permission='CODER') initial_training_set = project.get_current_training_set().set_number queue = project.queue_set.get(type="normal") queue.length = 40 queue.save() irr_queue = project.queue_set.get(type="irr") irr_queue.length = 40 irr_queue.save() model_path_temp = tmpdir.listdir()[0].mkdir('model_pickles') settings.MODEL_PICKLE_PATH = str(model_path_temp) batch_size = project.batch_size fill_queue(queue, 'random', irr_queue, irr_percent=project.percentage_irr, batch_size=batch_size) labels = project.labels.all() assignments = get_assignments(project.creator, project, batch_size) for assignment in assignments: label_data(random.choice(labels), assignment, project.creator, 3) tasks.send_model_task.delay(project.pk).get() assert project.get_current_training_set( ).set_number == initial_training_set + 1 redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1) assert len(redis_items) == len(set(redis_items)) assignments = get_assignments(project.creator, project, 40) for assignment in assignments[:batch_size]: label_data(random.choice(labels), assignment, project.creator, 3) tasks.send_model_task.delay(project.pk).get() assert project.get_current_training_set( ).set_number == initial_training_set + 2 redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1) assert len(redis_items) == len(set(redis_items)) batch_unassign(project.creator) redis_items = test_redis.lrange(redis_serialize_queue(queue), 0, -1) assert len(redis_items) == len(set(redis_items))
def test_get_irr_metrics(seeded_database, client, admin_client, test_project_half_irr_data, test_half_irr_all_queues, test_labels_half_irr): ''' This tests the irr metrics api call. Note: the exact values are checked in the util tests. ''' # sign in users labels = test_labels_half_irr normal_queue, admin_queue, irr_queue = test_half_irr_all_queues project = test_project_half_irr_data client.login(username=SEED_USERNAME, password=SEED_PASSWORD) client_profile = Profile.objects.get(user__username=SEED_USERNAME) ProjectPermissions.objects.create(profile=client_profile, project=project, permission='CODER') admin_client.login(username=SEED_USERNAME2, password=SEED_PASSWORD2) admin_profile = Profile.objects.get(user__username=SEED_USERNAME2) ProjectPermissions.objects.create(profile=admin_profile, project=project, permission='ADMIN') fill_queue(normal_queue, 'random', irr_queue, project.percentage_irr, project.batch_size) # non-admin should not be able to call the test response = client.get('/api/get_irr_metrics/' + str(project.pk) + '/') assert 403 == response.status_code and "Invalid permission. Must be an admin" in str( response.content) # initially, should have no irr data processed response = admin_client.get('/api/get_irr_metrics/' + str(project.pk) + '/').json() assert 'error' not in response and 'detail' not in response assert 'kappa' in response and response['kappa'] == "No irr data processed" assert 'percent agreement' in response and response['percent agreement'] == "No irr data processed" # have each person label three irr data data = get_assignments(client_profile, project, 3) data2 = get_assignments(admin_profile, project, 3) for i in range(3): response = client.post('/api/annotate_data/' + str(data[i].pk) + '/', { "labelID": labels[i].pk, "labeling_time": 3 }) assert 'error' not in response.json() and 'detail' not in response.json() response = admin_client.post('/api/annotate_data/' + str(data2[i].pk) + '/', { "labelID": labels[(i + 1) % 3].pk, "labeling_time": 3 }) assert 'error' not in response.json() response = admin_client.get('/api/get_irr_metrics/' + str(project.pk) + '/').json() # the percent agreement should be a number between 0 and 100 with a % assert 'percent agreement' in response percent = float(response['percent agreement'][:len(response['percent agreement']) - 1]) assert percent <= 100 and percent >= 0 and '%' == response['percent agreement'][-1] # kappa should be a value between -1 and 1 assert 'kappa' in response and response['kappa'] >= -1 and response['kappa'] <= 1
def test_predicted_table(seeded_database, admin_client, client, test_project_unlabeled_and_tfidf, test_queue, test_labels, test_irr_queue, test_admin_queue): ''' This tests that the predicted table contains what it should ''' project = test_project_unlabeled_and_tfidf client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client, admin_client) # first, check that it is empty response = client.get('/api/data_predicted_table/' + str(project.pk) + '/') assert 'detail' in response.json() and "Invalid permission. Must be an admin" in response.json()[ 'detail'] response = admin_client.get('/api/data_predicted_table/' + str(project.pk) + '/').json() assert len(response['data']) == 0 # label 15 things and check that it is still empty data = get_assignments(client_profile, project, 15) data_text = [] label_names = [] for i in range(15): data_text.append(escape(data[i].text)) label_names.append(test_labels[i % 3].name) response = client.post('/api/annotate_data/' + str(data[i].pk) + '/', { "labelID": test_labels[i % 3].pk, "labeling_time": 1 }) response = admin_client.get('/api/data_predicted_table/' + str(project.pk) + '/').json() assert len(response['data']) == 0 # label 15 more things and let the predictions be created # check that the unlabeled items are in the table data = get_assignments(client_profile, project, 15) for i in range(15): data_text.append(escape(data[i].text)) label_names.append(test_labels[i % 3].name) response = client.post('/api/annotate_data/' + str(data[i].pk) + '/', { "labelID": test_labels[i % 3].pk, "labeling_time": 1 }) response = admin_client.get('/api/data_predicted_table/' + str(project.pk) + '/').json() training_set = TrainingSet.objects.get( set_number=project.get_current_training_set().set_number - 1) model = Model.objects.get(training_set=training_set) # check that the table holds the predicted data assert len(response['data']) == (DataPrediction.objects.filter( data__project=project, model=model).count()) // len(test_labels) # check that the table has the number of unlabeled data assert len(response['data']) == (Data.objects.filter( project=project).count() - DataLabel.objects.filter(data__project=project).count()) # check that the table does not have the labeled data data_list = list(DataPrediction.objects.filter(data__project=project).values_list("data__text")) for d in data_text: assert d not in data_list
def test_modify_label_to_skip(seeded_database, client, test_project_data, test_queue, test_irr_queue, test_labels, test_admin_queue): '''This tests the history table's ability to change labeled items to skipped items.''' request_info = {"labelID": test_labels[0].pk, "labeling_time": 3} project = test_project_data client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client) # have a user annotate some data data = get_assignments(client_profile, project, 1)[0] assert data is not None response = client.post('/api/annotate_data/' + str(data.pk) + '/', request_info) assert DataLabel.objects.filter(data=data).count() == 1 # Call the change to skip function. Should now be in admin table, not be # in history table. change_info = {"dataID": data.pk, "oldLabelID": test_labels[0].pk} response = client.post('/api/modify_label_to_skip/' + str(data.pk) + '/', change_info) assert 'error' not in response.json() and 'detail' not in response.json() assert DataLabel.objects.filter(data=data).count() == 0 assert DataQueue.objects.filter(queue=test_admin_queue).count() == 1 # check it's in change log assert LabelChangeLog.objects.filter(data=data, new_label="skip").count() == 1
def test_modify_label(seeded_database, client, test_project_data, test_queue, test_labels, test_irr_queue, test_admin_queue): ''' This tests the history table's ability to modify a label ''' request_info = {"labelID": test_labels[0].pk, "labeling_time": 3} project = test_project_data client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client) # have a user annotate some data data = get_assignments(client_profile, project, 1)[0] assert data is not None response = client.post('/api/annotate_data/' + str(data.pk) + '/', request_info) assert DataLabel.objects.filter(data=data).count() == 1 # call modify label to change it to something else change_info = { "dataID": data.pk, "oldLabelID": test_labels[0].pk, "labelID": test_labels[1].pk } response = client.post('/api/modify_label/' + str(data.pk) + '/', change_info) assert 'error' not in response.json() and 'detail' not in response.json() # check that the label is updated and it's in the correct places # check that there are no duplicate labels assert DataLabel.objects.filter(data=data).count() == 1 assert DataLabel.objects.get(data=data).label.pk == test_labels[1].pk # check it's in change log assert LabelChangeLog.objects.filter(data=data).count() == 1
def test_annotate_data(seeded_database, client, test_project_data, test_queue, test_labels, test_admin_queue, test_irr_queue): '''This tests the basic ability to annotate a datum''' # get a datum from the queue project = test_project_data fill_queue(test_queue, 'random') request_info = {"labelID": test_labels[0].pk, "labeling_time": 3} permission_message = 'Account disabled by administrator. Please contact project owner for details' # call annotate data without the user having permission. Check that # the data is not annotated and the response has an error. client.login(username=SEED_USERNAME, password=SEED_PASSWORD) client_profile = Profile.objects.get(user__username=SEED_USERNAME) data = get_assignments(client_profile, project, 1) response = client.post('/api/annotate_data/' + str(data[0].pk) + '/', request_info) assert 'detail' in response.json() and permission_message in response.json( )['detail'] assert DataLabel.objects.filter(data=data[0]).count() == 0 ProjectPermissions.objects.create(profile=client_profile, project=project, permission='CODER') # give the user permission and call annotate again # The data should be labeled and in the proper places # check that the response was {} (no error) response = client.post('/api/annotate_data/' + str(data[0].pk) + '/', request_info) assert 'error' not in response.json() and 'detail' not in response.json() assert DataLabel.objects.filter(data=data[0]).count() == 1 assert DataQueue.objects.filter(data=data[0]).count() == 0
def get_card_deck(request, project_pk): """Grab data using get_assignments and send it to the frontend react app. Args: request: The request to the endpoint project_pk: Primary key of project Returns: labels: The project labels data: The data in the queue """ profile = request.user.profile project = Project.objects.get(pk=project_pk) # Calculate queue parameters batch_size = project.batch_size num_coders = len(project.projectpermissions_set.all()) + 1 coder_size = math.ceil(batch_size / num_coders) data = get_assignments(profile, project, coder_size) # shuffle so the irr is not all at the front random.shuffle(data) labels = Label.objects.all().filter(project=project) return Response({ 'labels': LabelSerializer(labels, many=True).data, 'data': DataSerializer(data, many=True).data })
def test_coded_table(seeded_database, client, admin_client, test_project_data, test_queue, test_admin_queue, test_irr_queue, test_labels): ''' This tests the table that displays the labeled table ''' project = test_project_data client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client, admin_client) # first, check that it is empty response = client.get('/api/data_coded_table/' + str(project.pk) + '/') assert 'detail' in response.json() and "Invalid permission. Must be an admin" in response.json()[ 'detail'] response = admin_client.get('/api/data_coded_table/' + str(project.pk) + '/').json() assert len(response['data']) == 0 # label a few things, and check that they are in the table data = get_assignments(client_profile, project, 3) data_text = [] label_names = [] for i in range(3): data_text.append(escape(data[i].text)) label_names.append(test_labels[i].name) response = client.post('/api/annotate_data/' + str(data[i].pk) + '/', { "labelID": test_labels[i].pk, "labeling_time": 1 }) response = admin_client.get('/api/data_coded_table/' + str(project.pk) + '/').json() assert len(response['data']) == 3 for row in response['data']: assert row['Text'] in data_text assert row['Label'] in label_names assert row['Coder'] == str(client_profile)
def test_unassign_after_fillqueue(db, test_profile, test_project_data, test_queue, test_labels, test_redis): fill_queue(test_queue, 'random') assert test_redis.llen('queue:' + str(test_queue.pk)) == test_queue.length assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length data = get_assignments(test_profile, test_project_data, 10) assert test_redis.llen('queue:' + str(test_queue.pk)) == (test_queue.length - 10) assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length test_label = test_labels[0] for i in range(5): label_data(test_label, data[i], test_profile, 3) assert test_redis.llen('queue:' + str(test_queue.pk)) == (test_queue.length - 10) assert test_redis.scard('set:' + str(test_queue.pk)) == (test_queue.length - 5) fill_queue(test_queue, 'random') assert test_redis.llen('queue:' + str(test_queue.pk)) == test_queue.length - 5 assert test_redis.scard('set:' + str(test_queue.pk)) == test_queue.length
def test_get_assignments_no_existing_assignment_one_assignment( db, test_profile, test_project_data, test_queue, test_redis): fill_queue(test_queue, orderby='random') assert AssignedData.objects.count() == 0 data = get_assignments(test_profile, test_project_data, 1) assert len(data) == 1 assert isinstance(data[0], Data) assert_obj_exists(AssignedData, {'data': data[0], 'profile': test_profile})
def test_get_assignments_one_existing_assignment(db, test_profile, test_project_data, test_queue, test_redis): fill_queue(test_queue, orderby='random') assigned_datum = assign_datum(test_profile, test_project_data) data = get_assignments(test_profile, test_project_data, 1) assert isinstance(data[0], Data) # We should just get the datum that was already assigned assert data[0] == assigned_datum
def test_unlabeled_table( seeded_database, client, admin_client, test_project_unlabeled_and_tfidf, test_queue, test_admin_queue, test_irr_queue, test_labels, ): """This tests that the unlabeled data table contains what it should.""" project = test_project_unlabeled_and_tfidf # first, check that it has all the unlabeled data client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client, admin_client ) response = client.get("/api/data_unlabeled_table/" + str(project.pk) + "/") assert ( "detail" in response.json() and "Invalid permission. Must be an admin" in response.json()["detail"] ) response = admin_client.get( "/api/data_unlabeled_table/" + str(project.pk) + "/" ).json() assert "data" in response assert ( len(response["data"]) == Data.objects.filter(project=project).count() - DataQueue.objects.filter(data__project=project).count() ) # label something. Check it is not in the table. data = get_assignments(client_profile, project, 2) response = client.post( "/api/annotate_data/" + str(data[0].pk) + "/", {"labelID": test_labels[0].pk, "labeling_time": 1}, ) response = admin_client.get( "/api/data_unlabeled_table/" + str(project.pk) + "/" ).json() data_ids = [d["ID"] for d in response["data"]] assert data[0].pk not in data_ids # skip something. Check it is not in the table. response = client.post("/api/skip_data/" + str(data[1].pk) + "/") response = admin_client.get( "/api/data_unlabeled_table/" + str(project.pk) + "/" ).json() data_ids = [d["ID"] for d in response["data"]] assert data[1].pk not in data_ids
def label_project(project, profile, num_labels): labels = project.labels.all() current_training_set = project.get_current_training_set() assignments = get_assignments(profile, project, num_labels) for i in range(min(len(labels), len(assignments))): label_data(labels[i], assignments[i], profile, random.randint(0, 25)) for assignment in assignments[len(labels):]: label_data(random.choice(labels), assignment, profile, random.randint(0, 25)) task_num = tasks.send_model_task.apply(args=[project.pk]) current_training_set.celery_task_id = task_num current_training_set.save()
def test_get_assignments_no_existing_assignment_max_queue_length( db, test_profile, test_project_data, test_queue, test_redis): fill_queue(test_queue, orderby='random') assert AssignedData.objects.count() == 0 data = get_assignments(test_profile, test_project_data, TEST_QUEUE_LEN) assert len(data) == TEST_QUEUE_LEN for datum in data: assert isinstance(datum, Data) assert_obj_exists(AssignedData, { 'data': datum, 'profile': test_profile })
def test_admin_table( seeded_database, admin_client, client, test_project_data, test_queue, test_irr_queue, test_admin_queue, test_labels, ): """This tests that the admin table holds the correct items.""" project = test_project_data client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client, admin_client ) # check that a non-admin can't get the table response = client.get("/api/data_admin_table/" + str(project.pk) + "/").json() assert ( "detail" in response and "Invalid permission. Must be an admin" in response["detail"] ) response = admin_client.get("/api/data_admin_table/" + str(project.pk) + "/").json() # first, check that it is empty assert len(response["data"]) == 0 # label something. Should still be empty. data = get_assignments(client_profile, project, 2) response = client.post( "/api/annotate_data/" + str(data[0].pk) + "/", {"labelID": test_labels[0].pk, "labeling_time": 1}, ) response = admin_client.get("/api/data_admin_table/" + str(project.pk) + "/").json() assert len(response["data"]) == 0 # skip something. Should be in the table. response = client.post("/api/skip_data/" + str(data[1].pk) + "/") response = admin_client.get("/api/data_admin_table/" + str(project.pk) + "/").json() assert len(response["data"]) == 1 assert response["data"][0]["ID"] == data[1].pk # admin annotate the data. Admin table should be empty again. response = admin_client.post( "/api/label_admin_label/" + str(data[1].pk) + "/", {"labelID": test_labels[0].pk}, ) response = admin_client.get("/api/data_admin_table/" + str(project.pk) + "/").json() assert len(response["data"]) == 0
def test_get_assignments_no_existing_assignment_half_max_queue_length( db, test_profile, test_project_data, test_queue, test_redis): fill_queue(test_queue, orderby="random") assert AssignedData.objects.count() == 0 data = get_assignments(test_profile, test_project_data, TEST_QUEUE_LEN // 2) assert len(data) == TEST_QUEUE_LEN // 2 for datum in data: assert isinstance(datum, Data) assert_obj_exists(AssignedData, { "data": datum, "profile": test_profile })
def test_get_assignments_multiple_existing_assignments(db, test_profile, test_project_data, test_queue, test_redis): fill_queue(test_queue, orderby='random') assigned_data = [] for i in range(5): assigned_data.append(assign_datum(test_profile, test_project_data)) data = get_assignments(test_profile, test_project_data, 5) assert len(data) == 5 assert len(data) == len(assigned_data) for datum, assigned_datum in zip(data, assigned_data): assert isinstance(datum, Data) # We should just get the data that was already assigned assert len(data) == len(assigned_data)
def test_get_label_history( seeded_database, admin_client, client, test_project_data, test_queue, test_labels, test_admin_queue, test_irr_queue, ): """This tests the function that returns the elements that user has already labeled.""" project = test_project_data client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client, admin_client ) # before anything has been labeled, the history table should be empty response = admin_client.get("/api/get_label_history/" + str(project.pk) + "/") assert response.json()["data"] == [] # skip an item. Should still be empty data = get_assignments(client_profile, project, 2) datum = data[0] assert datum is not None response = client.post("/api/skip_data/" + str(datum.pk) + "/") assert "error" not in response.json() and "detail" not in response.json() response = client.get("/api/get_label_history/" + str(project.pk) + "/") assert response.json()["data"] == [] # have one user label something. Call label history on two users. request_info = {"labelID": test_labels[0].pk, "labeling_time": 3} datum = data[1] response = client.post("/api/annotate_data/" + str(datum.pk) + "/", request_info) assert "error" not in response.json() and "detail" not in response.json() response_client = client.get("/api/get_label_history/" + str(project.pk) + "/") assert response_client.json()["data"] != [] response_admin = admin_client.get("/api/get_label_history/" + str(project.pk) + "/") assert response_admin.json()["data"] == [] # the label should be in the correct person's history response_data = response_client.json()["data"][0] assert response_data["id"] == datum.pk assert response_data["labelID"] == test_labels[0].pk
def test_coded_table( seeded_database, client, admin_client, test_project_data, test_queue, test_admin_queue, test_irr_queue, test_labels, ): """This tests the table that displays the labeled table.""" project = test_project_data client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client, admin_client) # first, check that it is empty response = client.get("/api/data_coded_table/" + str(project.pk) + "/") assert ("detail" in response.json() and "Invalid permission. Must be an admin" in response.json()["detail"]) response = admin_client.get("/api/data_coded_table/" + str(project.pk) + "/").json() assert len(response["data"]) == 0 # label a few things, and check that they are in the table data = get_assignments(client_profile, project, 3) data_text = [] label_names = [] for i in range(3): data_text.append(escape(data[i].text)) label_names.append(test_labels[i].name) response = client.post( "/api/annotate_data/" + str(data[i].pk) + "/", { "labelID": test_labels[i].pk, "labeling_time": 1 }, ) response = admin_client.get("/api/data_coded_table/" + str(project.pk) + "/").json() assert len(response["data"]) == 3 for row in response["data"]: assert row["Text"] in data_text assert row["Label"] in label_names assert row["Coder"] == str(client_profile)
def test_admin_table(seeded_database, admin_client, client, test_project_data, test_queue, test_irr_queue, test_admin_queue, test_labels): ''' This tests that the admin table holds the correct items ''' project = test_project_data client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client, admin_client) # check that a non-admin can't get the table response = client.get('/api/data_admin_table/' + str(project.pk) + '/').json() assert 'detail' in response and 'Invalid permission. Must be an admin' in response[ 'detail'] response = admin_client.get('/api/data_admin_table/' + str(project.pk) + '/').json() # first, check that it is empty assert len(response['data']) == 0 # label something. Should still be empty. data = get_assignments(client_profile, project, 2) response = client.post('/api/annotate_data/' + str(data[0].pk) + '/', { "labelID": test_labels[0].pk, "labeling_time": 1 }) response = admin_client.get('/api/data_admin_table/' + str(project.pk) + '/').json() assert len(response['data']) == 0 # skip something. Should be in the table. response = client.post('/api/skip_data/' + str(data[1].pk) + '/') response = admin_client.get('/api/data_admin_table/' + str(project.pk) + '/').json() assert len(response['data']) == 1 assert response['data'][0]['ID'] == data[1].pk # admin annotate the data. Admin table should be empty again. response = admin_client.post( '/api/label_admin_label/' + str(data[1].pk) + '/', {"labelID": test_labels[0].pk}) response = admin_client.get('/api/data_admin_table/' + str(project.pk) + '/').json() assert len(response['data']) == 0
def test_admin_label( seeded_database, admin_client, client, test_project_data, test_queue, test_labels, test_irr_queue, test_admin_queue, ): """This tests the admin ability to label skipped items in the admin table.""" # fill queue. The admin queue should be empty project = test_project_data client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client, admin_client ) assert DataQueue.objects.filter(queue=test_admin_queue).count() == 0 # have a normal client skip something and try to admin label. Should not # be allowed data = get_assignments(client_profile, project, 1)[0] response = client.post("/api/skip_data/" + str(data.pk) + "/") assert "error" not in response.json() and "detail" not in response.json() payload = {"labelID": test_labels[0].pk} response = client.post("/api/label_admin_label/" + str(data.pk) + "/", payload) assert ( "detail" in response.json() and "Invalid permission. Must be an admin" in response.json()["detail"] ) # check datum is in proper places assert DataQueue.objects.filter(data=data, queue=test_admin_queue).count() == 1 assert DataQueue.objects.filter(data=data, queue=test_queue).count() == 0 assert DataLabel.objects.filter(data=data).count() == 0 # Let admin label datum. Should work. Check it is now in proper places response = admin_client.post( "/api/label_admin_label/" + str(data.pk) + "/", payload ) assert "error" not in response.json() and "detail" not in response.json() assert DataQueue.objects.filter(data=data, queue=test_admin_queue).count() == 0 assert DataLabel.objects.filter(data=data).count() == 1
def test_skip_data( seeded_database, client, test_project_data, test_queue, test_irr_queue, test_labels, test_admin_queue, ): """This tests that the skip data api works.""" project = test_project_data fill_queue(test_queue, "random") permission_message = ( "Account disabled by administrator. Please contact project owner for details" ) # call skip data without the user having permission. Check that # the data is not in admin and the response has an error. client.login(username=SEED_USERNAME, password=SEED_PASSWORD) client_profile = Profile.objects.get(user__username=SEED_USERNAME) data = get_assignments(client_profile, project, 1) response = client.post("/api/skip_data/" + str(data[0].pk) + "/") assert ( "detail" in response.json() and permission_message in response.json()["detail"] ) assert DataQueue.objects.filter(data=data[0], queue=test_queue).count() == 1 assert DataQueue.objects.filter(data=data[0], queue=test_admin_queue).count() == 0 ProjectPermissions.objects.create( profile=client_profile, project=project, permission="CODER" ) # have someone skip something with permission. Should # be in admin queue, not in normal queue, not in datalabel response = client.post("/api/skip_data/" + str(data[0].pk) + "/") assert "error" not in response.json() and "detail" not in response.json() assert DataQueue.objects.filter(data=data[0], queue=test_queue).count() == 0 assert DataQueue.objects.filter(data=data[0], queue=test_admin_queue).count() == 1 assert DataLabel.objects.filter(data=data[0]).count() == 0
def test_admin_counts(seeded_database, client, admin_client, test_project_data, test_queue, test_irr_queue, test_labels, test_admin_queue, test_project_no_irr_data, test_no_irr_all_queues, test_labels_no_irr): ''' This tests the admin counts api ''' projects = [test_project_data, test_project_no_irr_data] normal_queues = [test_queue, test_no_irr_all_queues[0]] irr_queues = [test_irr_queue, test_no_irr_all_queues[2]] # log in the users into both projects client.login(username=SEED_USERNAME, password=SEED_PASSWORD) admin_client.login(username=SEED_USERNAME2, password=SEED_PASSWORD2) client_profile = Profile.objects.get(user__username=SEED_USERNAME) admin_profile = Profile.objects.get(user__username=SEED_USERNAME2) for i in range(2): fill_queue(normal_queues[i], 'random', irr_queues[i], projects[i].percentage_irr, projects[i].batch_size) ProjectPermissions.objects.create(profile=admin_profile, project=projects[i], permission='ADMIN') ProjectPermissions.objects.create(profile=client_profile, project=projects[i], permission='CODER') # check for admin priviledges response = client.get('/api/data_admin_counts/' + str(projects[i].pk) + '/').json() assert 'detail' in response and 'Invalid permission. Must be an admin' in response[ 'detail'] # counts should be 0 for both projects. IRR project should have two counts. response = admin_client.get('/api/data_admin_counts/' + str(projects[0].pk) + '/').json() assert 'detail' not in response and len(response["data"]) == 2 assert list(response['data'].values()) == [0, 0] response = admin_client.get('/api/data_admin_counts/' + str(projects[1].pk) + '/').json() assert 'detail' not in response and len(response["data"]) == 1 assert list(response['data'].values()) == [0] # have admin and non_admin skip everything. The count should be 30 for non-irr project irr_count = 0 non_irr_count = 0 data = get_assignments(client_profile, projects[0], 30) for i in range(30): if data[i].irr_ind: irr_count += 1 else: non_irr_count += 1 response = client.post('/api/skip_data/' + str(data[i].pk) + '/') data = get_assignments(admin_profile, projects[0], 30) for i in range(30): if not data[i].irr_ind: non_irr_count += 1 response = admin_client.post('/api/skip_data/' + str(data[i].pk) + '/') response = admin_client.get('/api/data_admin_counts/' + str(projects[0].pk) + '/').json() assert 'detail' not in response and len(response["data"]) == 2 assert response['data']['IRR'] == irr_count assert response['data']['SKIP'] == non_irr_count # the counts should be split with the non-irr project data = get_assignments(client_profile, projects[1], 30) for i in range(30): response = client.post('/api/skip_data/' + str(data[i].pk) + '/') data = get_assignments(admin_profile, projects[1], 30) for i in range(30): response = admin_client.post('/api/skip_data/' + str(data[i].pk) + '/') response = admin_client.get('/api/data_admin_counts/' + str(projects[1].pk) + '/').json() assert 'detail' not in response and len(response["data"]) == 1 assert response['data']['SKIP'] == 60
def test_recycle_bin_table(seeded_database, client, admin_client, test_project_data, test_queue, test_irr_queue, test_labels, test_admin_queue): ''' This tests that the recycle bin table is populated correctly ''' project = test_project_data fill_queue(test_queue, 'random', test_irr_queue, project.percentage_irr, project.batch_size) admin_client.login(username=SEED_USERNAME2, password=SEED_PASSWORD2) admin_profile = Profile.objects.get(user__username=SEED_USERNAME2) ProjectPermissions.objects.create(profile=admin_profile, project=project, permission='ADMIN') client.login(username=SEED_USERNAME, password=SEED_PASSWORD) client_profile = Profile.objects.get(user__username=SEED_USERNAME) ProjectPermissions.objects.create(profile=client_profile, project=project, permission='CODER') # check for admin privalidges response = client.get('/api/recycle_bin_table/' + str(project.pk) + '/').json() assert 'detail' in response and 'Invalid permission. Must be an admin' in response[ 'detail'] # check that the table is currently empty response = admin_client.get('/api/recycle_bin_table/' + str(project.pk) + '/').json() assert 'detail' not in response assert len(response["data"]) == 0 # assign a batch of data. Should be IRR and non-IRR irr_count = 0 non_irr_count = 0 data = get_assignments(client_profile, project, 30) for i in range(30): if data[i].irr_ind: irr_count += 1 else: non_irr_count += 1 response = client.post('/api/skip_data/' + str(data[i].pk) + '/') # have the admin also get a batch and call skip on everything data = get_assignments(admin_profile, project, 30) for i in range(30): if not data[i].irr_ind: non_irr_count += 1 response = admin_client.post('/api/skip_data/' + str(data[i].pk) + '/') admin_data = DataQueue.objects.filter(data__project=project, queue=test_admin_queue) # discard all data for datum in admin_data: admin_client.post('/api/discard_data/' + str(datum.data.pk) + '/') # check that the table has 30 elements that match the discarded data response = admin_client.get('/api/recycle_bin_table/' + str(project.pk) + '/').json() assert 'detail' not in response assert len(response["data"]) == non_irr_count + irr_count assert_collections_equal([d["ID"] for d in response["data"]], RecycleBin.objects.filter( data__project=project).values_list("data__pk", flat=True)) # restore all data for datum in admin_data: admin_client.post('/api/restore_data/' + str(datum.data.pk) + '/') # check that the table is empty again response = admin_client.get('/api/recycle_bin_table/' + str(project.pk) + '/').json() assert 'detail' not in response assert len(response["data"]) == 0
def test_model_metrics(seeded_database, admin_client, client, test_project_unlabeled_and_tfidf, test_queue, test_admin_queue, test_irr_queue, test_labels): ''' This function tests the model metrics api ''' project = test_project_unlabeled_and_tfidf client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client, admin_client) # at the beginning, shouldn't have any for metric in ['accuracy', 'f1', 'precision', 'recall']: response = admin_client.get('/api/model_metrics/' + str(project.pk) + '/?metric=' + metric).json() if len(response) == 1: assert response[0]['key'] == 'Accuracy' else: assert len(response) == len(test_labels) for temp_dict in response: assert len(temp_dict['values']) == 0 # label 30 items. The model should run. data = get_assignments(client_profile, project, 30) for i in range(30): response = client.post('/api/annotate_data/' + str(data[i].pk) + '/', { "labelID": test_labels[i % 3].pk, "labeling_time": 1 }) assert DataLabel.objects.filter(data__in=data).count() == 30 # check that metrics were generated # for metric in ['accuracy', 'f1', 'precision', 'recall']: for metric in ['accuracy', 'f1', 'precision', 'recall']: response = admin_client.get('/api/model_metrics/' + str(project.pk) + '/?metric=' + metric).json() if len(response) == 1: assert response[0]['key'] == 'Accuracy' else: assert len(response) == len(test_labels) # check there is some value for the first run for temp_dict in response: assert len(temp_dict['values']) == 1 # do this again and check that a new metric is generated fill_queue(test_queue, project.learning_method) data = get_assignments(client_profile, project, 30) for i in range(30): response = client.post('/api/annotate_data/' + str(data[i].pk) + '/', { "labelID": test_labels[i % 3].pk, "labeling_time": 1 }) fill_queue(test_queue, project.learning_method) data = get_assignments(client_profile, project, 10) for i in range(10): response = client.post('/api/annotate_data/' + str(data[i].pk) + '/', { "labelID": test_labels[i % 3].pk, "labeling_time": 1 }) for metric in ['accuracy', 'f1', 'precision', 'recall']: response = admin_client.get('/api/model_metrics/' + str(project.pk) + '/?metric=' + metric).json() if len(response) == 1: assert response[0]['key'] == 'Accuracy' else: assert len(response) == len(test_labels) # check there is some value for the first run for temp_dict in response: assert len(temp_dict['values']) == 2
def test_discard_data(seeded_database, client, admin_client, test_project_data, test_queue, test_irr_queue, test_labels, test_admin_queue): ''' This tests that data can be discarded ''' project = test_project_data fill_queue(test_queue, 'random', test_irr_queue, project.percentage_irr, project.batch_size) admin_client.login(username=SEED_USERNAME2, password=SEED_PASSWORD2) admin_profile = Profile.objects.get(user__username=SEED_USERNAME2) ProjectPermissions.objects.create(profile=admin_profile, project=project, permission='ADMIN') client.login(username=SEED_USERNAME, password=SEED_PASSWORD) client_profile = Profile.objects.get(user__username=SEED_USERNAME) ProjectPermissions.objects.create(profile=client_profile, project=project, permission='CODER') # assign a batch of data. Should be IRR and non-IRR data = get_assignments(client_profile, project, 30) assert not all(not datum.irr_ind for datum in data) assert not all(datum.irr_ind for datum in data) # call skip data on a full batch of data for i in range(30): response = client.post('/api/skip_data/' + str(data[i].pk) + '/') # have the admin also get a batch and call skip on everything data = get_assignments(admin_profile, project, 30) assert not all(not datum.irr_ind for datum in data) assert not all(datum.irr_ind for datum in data) # call skip data on a full batch of data for i in range(30): response = admin_client.post('/api/skip_data/' + str(data[i].pk) + '/') admin_data = DataQueue.objects.filter(data__project=project, queue=test_admin_queue) assert not all(not datum.data.irr_ind for datum in admin_data) assert not all(datum.data.irr_ind for datum in admin_data) # check for admin privalidges response = client.post('/api/discard_data/' + str(admin_data[0].data.pk) + '/').json() assert 'detail' in response and 'Invalid permission. Must be an admin' in response[ 'detail'] # get irr data and discard it. Check that the data is not in IRRLog, AssignedData DataQueue, in RecycleBin irr_data = admin_data.filter(data__irr_ind=True) for datum in irr_data: assert IRRLog.objects.filter(data=datum.data).count() > 0 admin_client.post('/api/discard_data/' + str(datum.data.pk) + '/') assert IRRLog.objects.filter(data=datum.data).count() == 0 assert DataQueue.objects.filter(data=datum.data).count() == 0 assert AssignedData.objects.filter(data=datum.data).count() == 0 assert RecycleBin.objects.filter(data=datum.data).count() == 1 assert not RecycleBin.objects.get(data=datum.data).data.irr_ind # get normal data and discard it. Check that the data is not in IRRLog, AssignedData DataQueue, in RecycleBin non_irr_data = admin_data.filter(data__irr_ind=False) for datum in non_irr_data: admin_client.post('/api/discard_data/' + str(datum.data.pk) + '/') assert DataQueue.objects.filter(data=datum.data).count() == 0 assert AssignedData.objects.filter(data=datum.data).count() == 0 assert RecycleBin.objects.filter(data=datum.data).count() == 1
def test_label_distribution_inverted(seeded_database, admin_client, client, test_project_data, test_queue, test_irr_queue, test_labels, test_admin_queue): ''' This tests the api that produces the label counts chart for the skew page. It is stacked differently than the previous. ''' project = test_project_data client_profile, admin_profile = sign_in_and_fill_queue( project, test_queue, client, admin_client) # at the beginning, should return empty list response = client.get('/api/label_distribution/' + str(project.pk) + '/') assert 'detail' in response.json( ) and "Invalid permission. Must be an admin" in response.json()['detail'] response = admin_client.get('/api/label_distribution/' + str(project.pk) + '/') assert len(response.json()) == 0 # have client label three things differently. Check values. data = get_assignments(client_profile, project, 3) response = client.post('/api/annotate_data/' + str(data[0].pk) + '/', { "labelID": test_labels[0].pk, "labeling_time": 3 }) response = client.post('/api/annotate_data/' + str(data[1].pk) + '/', { "labelID": test_labels[1].pk, "labeling_time": 3 }) response = client.post('/api/annotate_data/' + str(data[2].pk) + '/', { "labelID": test_labels[2].pk, "labeling_time": 3 }) assert DataLabel.objects.filter(data__in=data).count() == 3 response = admin_client.get('/api/label_distribution_inverted/' + str(project.pk) + '/').json() assert len(response) > 0 for row in response: user = row['key'] temp_dict = row['values'] for label_row in temp_dict: if user == str(client_profile): assert label_row['y'] == 1 else: assert user in [str(admin_profile), 'test_profile'] assert label_row['y'] == 0 # Have admin label three things the same. Check values. data = get_assignments(admin_profile, project, 3) response = admin_client.post('/api/annotate_data/' + str(data[0].pk) + '/', { "labelID": test_labels[0].pk, "labeling_time": 3 }) response = admin_client.post('/api/annotate_data/' + str(data[1].pk) + '/', { "labelID": test_labels[0].pk, "labeling_time": 3 }) response = admin_client.post('/api/annotate_data/' + str(data[2].pk) + '/', { "labelID": test_labels[0].pk, "labeling_time": 3 }) response = admin_client.get('/api/label_distribution_inverted/' + str(project.pk) + '/').json() assert len(response) > 0 for row in response: user = row['key'] temp_dict = row['values'] for label_row in temp_dict: if user == str(client_profile): assert label_row['y'] == 1 elif user == str(admin_profile): if label_row['x'] == test_labels[0].name: assert label_row['y'] == 3 else: assert label_row['y'] == 0 else: assert label_row['y'] == 0