Ejemplo n.º 1
0
def test_project_data(db, test_project):
    '''
    Creates the test project and adds test data to it.
    '''
    test_data = read_test_data_backend(
        file='./core/data/test_files/test_no_labels.csv')
    add_data(test_project, test_data)
    return test_project
Ejemplo n.º 2
0
def test_project_all_irr_3_coders_data(db, test_project_all_irr_3_coders):
    '''
    Creates the test project with 100% irr and adds test data to it.
    '''
    test_data = read_test_data_backend(
        file='./core/data/test_files/test_no_labels.csv')
    add_data(test_project_all_irr_3_coders, test_data)
    return test_project_all_irr_3_coders
Ejemplo n.º 3
0
def test_project_all_irr_data(db, test_profile):
    '''
    Creates the test project with 100% irr and adds test data to it.
    '''
    project = create_project('test_project', test_profile, 100, 2)
    test_data = read_test_data_backend(
        file='./core/data/test_files/test_no_labels.csv')
    add_data(project, test_data)
    return project
Ejemplo n.º 4
0
def test_project_labeled(test_project):
    """A project that has labeled data."""
    for label in SEED_LABELS:
        Label.objects.create(name=label, project=test_project)

    test_data = read_test_data_backend(
        file="./core/data/test_files/test_some_labels.csv"
    )
    add_data(test_project, test_data)
    return test_project
Ejemplo n.º 5
0
def test_project_gnb_data_tfidf(db, test_profile, tmpdir, settings):
    """This fixture only creates the test project without any data."""
    proj = create_project("test_project", test_profile, classifier="gnb")
    test_data = read_test_data_backend(file="./core/data/test_files/test_no_labels.csv")
    add_data(proj, test_data)

    Data.objects.filter(project=proj)
    matrix = create_tfidf_matrix(proj.pk)[0]

    data_temp = tmpdir.mkdir("data").mkdir("tf_idf")
    settings.TF_IDF_PATH = str(data_temp)

    save_tfidf_matrix(matrix, proj.pk)
    return proj
def test_fill_multiple_projects(db, test_queue, test_profile):
    project_data_count = test_queue.project.data_set.count()
    test_queue.length = project_data_count + 1
    test_queue.save()
    test_project2 = create_project("test_project2", test_profile)
    project2_data = read_test_data_backend(
        file="./core/data/test_files/test_no_labels.csv"
    )

    add_data(test_project2, project2_data)

    fill_queue(test_queue, orderby="random")

    # Ensure the queue didn't fill any data from the other project
    assert test_queue.data.count() == project_data_count
    assert all((d.project == test_queue.project for d in test_queue.data.all()))
Ejemplo n.º 7
0
def test_project_svm_data_tfidf(db, test_profile, tmpdir, settings):
    '''
    This fixture only creates the test project without any data.
    '''
    proj = create_project('test_project', test_profile, classifier="svm")
    test_data = read_test_data_backend(
        file='./core/data/test_files/test_no_labels.csv')
    add_data(proj, test_data)

    Data.objects.filter(project=proj)
    matrix = create_tfidf_matrix(proj.pk)[0]

    data_temp = tmpdir.mkdir('data').mkdir('tf_idf')
    settings.TF_IDF_PATH = str(data_temp)

    save_tfidf_matrix(matrix, proj.pk)

    return proj
Ejemplo n.º 8
0
def test_add_data_no_labels(db, test_project):
    test_data = read_test_data_backend(
        file='./core/data/test_files/test_no_labels.csv')
    df = add_data(test_project, test_data)

    for i, row in df.iterrows():
        assert_obj_exists(
            Data, {
                'upload_id_hash': row['id_hash'],
                'hash': row['hash'],
                'project': test_project
            })
def test_init_redis_multiple_projects(db, test_project_data, test_redis,
                                      test_profile):
    # Try a mix of multiple queues in multiple projects with
    # and without data to see if everything initializes as expected.
    p1_queue1 = add_queue(test_project_data, 10)
    fill_queue(p1_queue1, orderby="random")
    add_queue(test_project_data, 10)

    project2 = create_project("test_project2", test_profile)
    project2_data = read_test_data_backend(
        file="./core/data/test_files/test_no_labels.csv")

    add_data(project2, project2_data)
    p2_queue1 = add_queue(project2, 10)
    fill_queue(p2_queue1, orderby="random")
    add_queue(project2, 10)

    test_redis.flushdb()
    init_redis()

    assert_redis_matches_db(test_redis)
Ejemplo n.º 10
0
def test_add_data_no_labels(db, test_project):
    test_data = read_test_data_backend(
        file="./core/data/test_files/test_no_labels.csv")
    df = add_data(test_project, test_data)

    for i, row in df.iterrows():
        assert_obj_exists(
            Data,
            {
                "upload_id_hash": row["id_hash"],
                "hash": row["hash"],
                "project": test_project,
            },
        )
Ejemplo n.º 11
0
def seed_project(creator, name, description, data_file, label_list, perm_list,
                 classifier):
    project = Project.objects.create(name=name,
                                     description=description,
                                     creator=creator,
                                     classifier=classifier)

    TrainingSet.objects.create(project=project, set_number=0)

    labels = []
    for name in label_list:
        labels.append(Label.objects.create(name=name, project=project))

    permissions = []
    for perm in perm_list:
        permissions.append(
            ProjectPermissions.objects.create(profile=perm,
                                              project=project,
                                              permission="CODER"))

    batch_size = 10 * len(labels)
    project.batch_size = batch_size
    project.save()

    num_coders = len(permissions) + 1
    q_length = find_queue_length(batch_size, num_coders)

    queue = add_queue(project=project, length=q_length, type="normal")

    # Data
    f_data = read_test_data_backend(file=data_file)
    data_length = len(f_data)

    add_queue(project=project, length=data_length, type="admin")
    irr_queue = add_queue(project=project, length=2000000, type="irr")
    new_df = add_data(project, f_data)
    fill_queue(queue,
               irr_queue=irr_queue,
               orderby="random",
               batch_size=batch_size)
    save_data_file(new_df, project.pk)

    tasks.send_tfidf_creation_task.apply(args=[project.pk])
    tasks.send_check_and_trigger_model_task.apply(args=[project.pk])

    return project
Ejemplo n.º 12
0
def test_add_data_with_labels(db, test_project_labels):
    test_data = read_test_data_backend(
        file='./core/data/test_files/test_some_labels.csv')
    df = add_data(test_project_labels, test_data)

    for i, row in df.iterrows():
        assert_obj_exists(
            Data, {
                'upload_id_hash': row['id_hash'],
                'hash': row['hash'],
                'project': test_project_labels
            })
        if not pd.isnull(row['Label']):
            assert_obj_exists(
                DataLabel, {
                    'data__hash': row['hash'],
                    'profile': test_project_labels.creator,
                    'label__name': row['Label']
                })
Ejemplo n.º 13
0
def test_add_data_with_labels(db, test_project_labels):
    test_data = read_test_data_backend(
        file="./core/data/test_files/test_some_labels.csv")
    df = add_data(test_project_labels, test_data)

    for i, row in df.iterrows():
        assert_obj_exists(
            Data,
            {
                "upload_id_hash": row["id_hash"],
                "hash": row["hash"],
                "project": test_project_labels,
            },
        )
        if not pd.isnull(row["Label"]):
            assert_obj_exists(
                DataLabel,
                {
                    "data__hash": row["hash"],
                    "profile": test_project_labels.creator,
                    "label__name": row["Label"],
                },
            )
Ejemplo n.º 14
0
def test_project_data(db, test_project):
    """Creates the test project and adds test data to it."""
    test_data = read_test_data_backend(file="./core/data/test_files/test_no_labels.csv")
    add_data(test_project, test_data)
    return test_project
Ejemplo n.º 15
0
def test_project_all_irr_data(db, test_profile):
    """Creates the test project with 100% irr and adds test data to it."""
    project = create_project("test_project", test_profile, 100, 2)
    test_data = read_test_data_backend(file="./core/data/test_files/test_no_labels.csv")
    add_data(project, test_data)
    return project