예제 #1
0
def test_no_irr_all_queues(db, test_project_no_irr_data):
    """A queue containing data from the test project, with length set to the global
    len."""
    normal_q = add_queue(test_project_no_irr_data, TEST_QUEUE_LEN)
    admin_q = add_queue(test_project_no_irr_data, TEST_QUEUE_LEN, type="admin")
    irr_q = add_queue(test_project_no_irr_data, MAX_DATA_LEN, type="irr")
    return [normal_q, admin_q, irr_q]
예제 #2
0
def test_gnb_queue_list(db, test_project_gnb_data_tfidf):
    normal_q = add_queue(test_project_gnb_data_tfidf, TEST_QUEUE_LEN)
    admin_q = add_queue(test_project_gnb_data_tfidf,
                        TEST_QUEUE_LEN,
                        type="admin")
    irr_q = add_queue(test_project_gnb_data_tfidf, MAX_DATA_LEN, type="irr")
    return [normal_q, admin_q, irr_q]
def test_init_redis_one_empty_queue(db, test_project, test_redis):
    add_queue(test_project, 10)

    test_redis.flushdb()
    init_redis()

    assert_redis_matches_db(test_redis)
예제 #4
0
def test_add_queue_no_profile(test_project):
    QUEUE_LEN = 10
    add_queue(test_project, QUEUE_LEN)
    assert_obj_exists(Queue, {
        'project': test_project,
        'length': QUEUE_LEN,
        'profile': None
    })
예제 #5
0
def test_add_queue_profile(test_project, test_profile):
    QUEUE_LEN = 10
    add_queue(test_project, QUEUE_LEN, profile=test_profile)
    assert_obj_exists(Queue, {
        'project': test_project,
        'length': QUEUE_LEN,
        'profile': test_profile
    })
예제 #6
0
def test_randomforest_queue_list(db, test_project_randomforest_data_tfidf):
    normal_q = add_queue(
        test_project_randomforest_data_tfidf, TEST_QUEUE_LEN, type="normal"
    )
    admin_q = add_queue(
        test_project_randomforest_data_tfidf, TEST_QUEUE_LEN, type="admin"
    )
    irr_q = add_queue(test_project_randomforest_data_tfidf, TEST_QUEUE_LEN, type="irr")
    return [normal_q, admin_q, irr_q]
def test_init_redis_multiple_queues(db, test_project_data, test_redis):
    queue = add_queue(test_project_data, 10)
    fill_queue(queue, orderby="random")

    add_queue(test_project_data, 10)

    test_redis.flushdb()
    init_redis()

    assert_redis_matches_db(test_redis)
def test_get_nonempty_queue_noprofile(db, test_project_data):
    queue_len = 10
    queue = add_queue(test_project_data, queue_len)
    queue2 = add_queue(test_project_data, queue_len)

    assert get_nonempty_queue(test_project_data) is None

    fill_queue(queue2, orderby="random")
    assert get_nonempty_queue(test_project_data) == queue2

    fill_queue(queue, orderby="random")
    assert get_nonempty_queue(test_project_data) == queue
def test_get_nonempty_profile_queue(db, test_project_data, test_profile):
    queue_len = 10
    add_queue(test_project_data, queue_len)
    profile_queue = add_queue(test_project_data, queue_len, profile=test_profile)
    profile_queue2 = add_queue(test_project_data, queue_len, profile=test_profile)

    assert get_nonempty_queue(test_project_data, profile=test_profile) is None

    fill_queue(profile_queue2, orderby="random")
    assert get_nonempty_queue(test_project_data, profile=test_profile) == profile_queue2

    fill_queue(profile_queue, orderby="random")
    assert get_nonempty_queue(test_project_data, profile=test_profile) == profile_queue
예제 #10
0
def seed_project(creator, name, description, data_file, label_list, perm_list,
                 classifier):
    project = Project.objects.create(name=name,
                                     description=description,
                                     creator=creator,
                                     classifier=classifier)

    TrainingSet.objects.create(project=project, set_number=0)

    labels = []
    for name in label_list:
        labels.append(Label.objects.create(name=name, project=project))

    permissions = []
    for perm in perm_list:
        permissions.append(
            ProjectPermissions.objects.create(profile=perm,
                                              project=project,
                                              permission="CODER"))

    batch_size = 10 * len(labels)
    project.batch_size = batch_size
    project.save()

    num_coders = len(permissions) + 1
    q_length = find_queue_length(batch_size, num_coders)

    queue = add_queue(project=project, length=q_length, type="normal")

    # Data
    f_data = read_test_data_backend(file=data_file)
    data_length = len(f_data)

    add_queue(project=project, length=data_length, type="admin")
    irr_queue = add_queue(project=project, length=2000000, type="irr")
    new_df = add_data(project, f_data)
    fill_queue(queue,
               irr_queue=irr_queue,
               orderby="random",
               batch_size=batch_size)
    save_data_file(new_df, project.pk)

    tasks.send_tfidf_creation_task.apply(args=[project.pk])
    tasks.send_check_and_trigger_model_task.apply(args=[project.pk])

    return project
def test_init_redis_one_nonempty_queue(db, test_project_data, test_redis):
    queue = add_queue(test_project_data, 10)
    fill_queue(queue, orderby="random")

    test_redis.flushdb()
    init_redis()

    assert_redis_matches_db(test_redis)
def test_pop_only_affects_one_queue(db, test_project_data, test_redis):
    queue_len = 10
    queue = add_queue(test_project_data, queue_len)
    queue2 = add_queue(test_project_data, queue_len)
    fill_queue(queue, orderby="random")
    fill_queue(queue2, orderby="random")

    datum = pop_queue(queue)

    assert isinstance(datum, Data)
    assert test_redis.llen("queue:" + str(queue.pk)) == (queue_len - 1)
    assert test_redis.scard("set:" + str(queue.pk)) == (queue_len)
    assert queue.data.count() == queue_len

    assert test_redis.llen("queue:" + str(queue2.pk)) == queue_len
    assert test_redis.scard("set:" + str(queue2.pk)) == (queue_len)
    assert queue2.data.count() == queue_len
def test_pop_empty_queue(db, test_project, test_redis):
    queue = add_queue(test_project, 10)

    datum = pop_queue(queue)

    assert datum is None
    assert not test_redis.exists("queue:" + str(queue.pk))
    assert queue.data.count() == 0
예제 #14
0
def test_pop_nonempty_queue(db, test_project_data, test_redis):
    queue_len = 10
    queue = add_queue(test_project_data, queue_len)
    fill_queue(queue, orderby='random')

    datum = pop_queue(queue)

    assert isinstance(datum, Data)
    assert test_redis.llen('queue:' + str(queue.pk)) == (queue_len - 1)
    assert test_redis.scard('set:' + str(queue.pk)) == (queue_len)
    assert queue.data.count() == queue_len
예제 #15
0
def test_pop_first_nonempty_queue_multiple_queues(db, test_project_data,
                                                  test_queue, test_redis):
    test_queue2 = add_queue(test_project_data, 10)
    fill_queue(test_queue2, orderby='random')

    queue, data = pop_first_nonempty_queue(test_project_data)

    assert isinstance(queue, Queue)
    assert queue == test_queue2

    fill_queue(test_queue, orderby='random')

    queue, data = pop_first_nonempty_queue(test_project_data)

    assert isinstance(queue, Queue)
    assert queue == test_queue
def test_init_redis_multiple_projects(db, test_project_data, test_redis,
                                      test_profile):
    # Try a mix of multiple queues in multiple projects with
    # and without data to see if everything initializes as expected.
    p1_queue1 = add_queue(test_project_data, 10)
    fill_queue(p1_queue1, orderby="random")
    add_queue(test_project_data, 10)

    project2 = create_project("test_project2", test_profile)
    project2_data = read_test_data_backend(
        file="./core/data/test_files/test_no_labels.csv")

    add_data(project2, project2_data)
    p2_queue1 = add_queue(project2, 10)
    fill_queue(p2_queue1, orderby="random")
    add_queue(project2, 10)

    test_redis.flushdb()
    init_redis()

    assert_redis_matches_db(test_redis)
예제 #17
0
    def done(self, form_list, form_dict, **kwargs):
        proj = form_dict['project']
        labels = form_dict['labels']
        permissions = form_dict['permissions']
        advanced = form_dict['advanced']
        data = form_dict['data']
        codebook_data = form_dict['codebook']

        with transaction.atomic():
            # Project
            proj_obj = proj.save(commit=False)
            advanced_data = advanced.cleaned_data

            proj_obj.creator = self.request.user.profile
            # Advanced Options
            proj_obj.save()
            proj_pk = proj_obj.pk
            # Save the codebook file

            cb_data = codebook_data.cleaned_data['data']
            if cb_data != "":
                cb_filepath = save_codebook_file(cb_data, proj_pk)
            else:
                cb_filepath = ""
            proj_obj.codebook_file = cb_filepath
            if advanced_data["batch_size"] == 0:
                batch_size = 10 * len([x for x in labels if x.cleaned_data
                                       != {} and not x.cleaned_data['DELETE']])
            else:
                batch_size = advanced_data["batch_size"]

            proj_obj.batch_size = batch_size
            proj_obj.learning_method = advanced_data["learning_method"]
            proj_obj.percentage_irr = advanced_data["percentage_irr"]
            proj_obj.num_users_irr = advanced_data["num_users_irr"]
            proj_obj.classifier = advanced_data["classifier"]
            proj_obj.save()

            # Training Set
            TrainingSet.objects.create(project=proj_obj, set_number=0)

            # Labels
            labels.instance = proj_obj
            labels.save()

            # Permissions
            permissions.instance = proj_obj
            permissions.save()

            # Queue

            num_coders = len([x for x in permissions if x.cleaned_data
                              != {} and not x.cleaned_data['DELETE']]) + 1
            q_length = find_queue_length(batch_size, num_coders)

            queue = add_queue(project=proj_obj, length=q_length)

            # Data
            f_data = data.cleaned_data['data']
            add_queue(project=proj_obj, length=2000000, type="admin")
            irr_queue = add_queue(project=proj_obj, length=2000000, type="irr")
            upload_data(f_data, proj_obj, queue, irr_queue, batch_size)

        return HttpResponseRedirect(proj_obj.get_absolute_url())
def test_add_queue_no_profile(test_project):
    QUEUE_LEN = 10
    add_queue(test_project, QUEUE_LEN)
    assert_obj_exists(
        Queue, {"project": test_project, "length": QUEUE_LEN, "profile": None}
    )
def test_add_queue_profile(test_project, test_profile):
    QUEUE_LEN = 10
    add_queue(test_project, QUEUE_LEN, profile=test_profile)
    assert_obj_exists(
        Queue, {"project": test_project, "length": QUEUE_LEN, "profile": test_profile}
    )
예제 #20
0
def test_admin_queue(db, test_project_data):
    '''
    A queue containing data from the test project, with length set to
    the global len.
    '''
    return add_queue(test_project_data, TEST_QUEUE_LEN, type="admin")
예제 #21
0
def test_queue_labeled(db, test_project_labeled):
    '''
    A queue containing data from the test project, with length set to
    the global len.
    '''
    return add_queue(test_project_labeled, TEST_QUEUE_LEN, type="normal")
예제 #22
0
def test_queue(db, test_project_data):
    """A queue containing data from the test project, with length set to the global
    len."""
    return add_queue(test_project_data, TEST_QUEUE_LEN)
예제 #23
0
def test_irr_queue(db, test_project_data):
    '''
    A queue containing data from the test project, with length set to
    the global len.
    '''
    return add_queue(test_project_data, MAX_DATA_LEN, type="irr")
예제 #24
0
def test_irr_queue_labeled(db, test_project_labeled):
    """A queue containing data from the test project, with length set to the global
    len."""
    return add_queue(test_project_labeled, MAX_DATA_LEN, type="irr")
예제 #25
0
def test_admin_queue_labeled(db, test_project_labeled):
    """A queue containing data from the test project, with length set to the global
    len."""
    return add_queue(test_project_labeled, TEST_QUEUE_LEN, type="admin")
예제 #26
0
def test_profile_queue(db, test_profile, test_project_data):
    """A queue with test data, associated with the first test profile."""
    return add_queue(test_project_data, TEST_QUEUE_LEN, profile=test_profile)
예제 #27
0
def test_profile_queue2(db, test_profile2, test_project_data):
    """A queue with test data, associated with an additional test profile.

    Useful for tests requiring multiple profiles/queues on the same project.
    """
    return add_queue(test_project_data, TEST_QUEUE_LEN, profile=test_profile2)
예제 #28
0
def test_half_irr_all_queues(db, test_project_half_irr):
    normal_q = add_queue(test_project_half_irr, TEST_QUEUE_LEN)
    admin_q = add_queue(test_project_half_irr, TEST_QUEUE_LEN, type="admin")
    irr_q = add_queue(test_project_half_irr, MAX_DATA_LEN, type="irr")
    return [normal_q, admin_q, irr_q]