Ejemplo n.º 1
0
def test_dep_sample(config):
    iaa_files_path = test_utils.make_test_directory(config, 'dep_sample')
    out_path = test_utils.make_test_directory(config, 'out_dep_sample')
    # source_task_id generated by smashing keyboard
    iaa = IAA_task(out_folder=iaa_files_path, source_task_id='kjncsa87nxao21899102j1j2')
    iaa.add_row({"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Probability',
                 'highlighted_indices': test_utils.make_highlight_indices(10,30)})
    iaa.add_row({"agreed_Answer": 3, "question_Number": 2, "namespace": 'Covid_Probability'})
    fin_path = iaa.export()
    data_path = config['data_dir']
    schema_path = data_path + '/schemas'
    dh_path = None #doesn't get used by dependency but is still an argument

    eval_dependency(dh_path, iaa_files_path, schema_path, out_path)

    for root, dir, files in os.walk(out_path):
        for file in files:
            #should be only 1 file for this case, so just run it on the only one
            # if there's more than 1 then you can get fancy
            out_df  = pd.read_csv(os.path.join(out_path, file), encoding='utf-8')

    #9 answer choices to a checklist question
    assert len(out_df) == 2
    q_three = out_df[out_df['question_Number']==2]
    hl = q_three['highlighted_indices'].iloc[0]
    assert len(hl) >18
    assert '10' in hl
    assert '29' in hl
Ejemplo n.º 2
0
def test_dep_parent(config):
    #Test if parent has highlight, children don't, dep_iaa should have parent's highlight
    iaa_files_path=test_utils.make_test_directory(config, 'dep_parent')
    outpath=test_utils.make_test_directory(config, 'out_dep_parent')
    # source_task_id generated by smashing keyboard
    all_schema=[ [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Probability','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 3, "question_Number": 2, "namespace": 'Covid_Probability'},2],
                 [{"agreed_Answer": 2, "question_Number": 1, "namespace": 'Covid_Languagev1.1','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 3, "question_Number": 3, "namespace": 'Covid_Languagev1.1'},3],
                 [{"agreed_Answer": 4, "question_Number": 15, "namespace": 'Covid_Holisticv1.2','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1, "question_Number": 16, "namespace": 'Covid_Holisticv1.2'},16],
                 [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Evidence2020_03_21',
                   'highlighted_indices': test_utils.make_highlight_indices(10, 30)},
                  {"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Evidence2020_03_21',
                   'highlighted_indices': test_utils.make_highlight_indices(10, 30)},
                  {"agreed_Answer": 3, "question_Number": 4, "namespace": 'Covid_Evidence2020_03_21'}, 4],
                 [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Reasoning', 'highlighted_indices': test_utils.make_highlight_indices(80, 120)},{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1,
                                                                                      "question_Number": 7,
                                                                                      "namespace": 'Covid_Reasoning'}, 7]
                 ]

    #The test fails if I add these two to the all_Schema list, which I don't understand why since I have been following the same logic
    #[{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Evidence2020_03_21','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 3, "question_Number": 4, "namespace": 'Covid_Evidence2020_03_21'}, 4],
    #[{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1, "question_Number": 7, "namespace": 'Covid_Reasoning'}, 7]
    for i in all_schema:
        print(i)
        iaa = IAA_task(out_folder=iaa_files_path, source_task_id="auhfdaiughfs")
        for row in i:
            if isinstance(row, dict):
                iaa.add_row(row)
        fin_path = iaa.export()
        data_path = config['data_dir']
        schema_path = data_path + '/schemas'
        dh_path = None #doesn't get used by dependency but is still an argument
        eval_dependency(dh_path, iaa_files_path, schema_path, outpath)
        for root, dir, files in os.walk(outpath):
            for file in files:
                #should be only 1 file for this case, so just run it on the only one
                # if there's more than 1 then you can get fancy
                out_df  = pd.read_csv(os.path.join(outpath, file), encoding='utf-8')
        #9 answer choices to a checklist question
        #This basically works for my first test, child should have parent's highlights if itself doesn't have any but its parent does, thx eric~.
                #assert len(out_df) == 2
                q_three = out_df[out_df['question_Number']==i[-1]]
                hl = q_three['highlighted_indices'].iloc[0]
                assert len(hl) >18
                assert '10' in hl
                assert '29' in hl
def test_he_low_info_true_low_counts(config):
    tua_path = test_utils.make_test_directory(
        config, 'he_tua_input_low_info_true_low_counts')
    scoring_path = test_utils.make_test_directory(
        config, 'he_scoring_input_low_info_true_low_counts')
    #out_path = test_utils.make_test_directory(config, 'out_he_low_info_true_low_counts')

    pa = point_assignment(out_folder=scoring_path,
                          article_num='520',
                          source_task_id='practice_makes+[perfect')
    pa.add_row({
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'Answer_Number': 3,
        'points': 5,
        "Question_Number": 5,
        'agreement_score': 1,
        'highlighted_indices': test_utils.make_highlight_indices(10, 30)
    })

    new_tua = tua(out_folder=tua_path,
                  article_num='520',
                  source_task_id='tua_task_id')
    new_tua.add_row({
        'topic_name': 'argument',
        'start_pos': 10,
        'end_pos': 30,
        'tua_uuid': 'test1'
    })

    hol_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='doesnt matter',
                      article_num='520')
    #scientific discovery
    hol_dep.add_row({
        "namespace": "Covid2_Holistic_2020_09_20",
        "agreed_Answer": 5,
        "question_Number": 1,
        "agreement_score": 1,
        "tua_uuid": 'test1'
    })
    hol_dep.export()
    points = eval_triage_scoring(new_tua.df, pa.df, scoring_path)
    #points.to_csv(out_path+'/AssessedPoints.csv', encoding = 'utf-8')
    assert len(points) == 2
    assert points['points'].sum() == 3
Ejemplo n.º 4
0
def test_point_assignment_source_in_weight_highlight(config):
    tua_path = test_utils.make_test_directory(config, 'pa_source_wh_tua')
    scoring_path = test_utils.make_test_directory(config, 'pa_source_wh_dep')

    weight = weighted(out_folder=scoring_path,
                      article_num='520',
                      source_task_id='source_scaling')
    for i in range(1, 8):
        hl = ((i - 1) * 10, i * 10 - 1)  # (0, 9), (10, 19), ...
        weight.add_row({
            'schema':
            'Probability',
            'namespace':
            'Covid_Probability',
            'Answer_Number':
            1,
            'agreement_adjusted_points':
            9,
            "Question_Number":
            i,
            'agreement_score':
            1,
            'highlighted_indices':
            test_utils.make_highlight_indices(hl[0], hl[1])
        })
    weight_df = weight.df

    new_tua = tua(out_folder=tua_path,
                  article_num='520',
                  source_task_id='tua_task_id')
    for i in range(1, 8):
        hl = ((i - 1) * 10 + 1, i * 10 - 2)  # (1, 8), (11, 18), ...
        new_tua.add_row({
            'topic_name': 'source',
            'start_pos': hl[0],
            'end_pos': hl[1],
            'tua_uuid': str(i)
        })

    arg_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='doesnt matter',
                      article_num='520')
    for i in range(1, 8):
        arg_dep.add_row({
            "namespace": "Covid_Sources_2002_03_20v2.1",
            "agreed_Answer": i,
            "question_Number": 8,
            "agreement_score": 1,
            "tua_uuid": str(i)
        })
    new_tua.export()
    arg_dep.export()

    tuas, weights, tua_raw = pointSort(
        scoring_path,
        input_dir=None,
        weights=weight_df,
        scale_guide_dir=config['IAA_config_dir'] +
        '/point_assignment_scaling_guide.csv',
        tua_dir=tua_path,
        reporting=True)

    assert len(weights) == 7
    assert weights['points'].iloc[0] == 9 * 2
    assert weights['points'].iloc[1] == 9 * 1.5
    assert weights['points'].iloc[2] == 9 * 1
    assert weights['points'].iloc[3] == 9 * 0.5
    assert weights['points'].iloc[4] == 9 * 0
    assert weights['points'].iloc[5] == 9 * -0.5
    assert weights['points'].iloc[6] == 9 * 0
Ejemplo n.º 5
0
def test_point_assignment_source(config):
    tua_path = test_utils.make_test_directory(config, 'pa_source_tua')
    scoring_path = test_utils.make_test_directory(config, 'pa_source_dep')

    weight = weighted(out_folder=scoring_path,
                      article_num='520',
                      source_task_id='source_scaling')
    for i in range(1, 8):
        hl = ((i - 1) * 10, i * 10 - 1
              )  #creates highlights (0, 9) for 1, (10, 19) for 2, etc.
        weight.add_row({
            'schema':
            'Probability',
            'namespace':
            'Covid_Probability',
            'Answer_Number':
            1,
            'agreement_adjusted_points':
            5,
            "Question_Number":
            i,
            'agreement_score':
            1,
            'highlighted_indices':
            test_utils.make_highlight_indices(hl[0], hl[1])
        })
    weight_df = weight.df

    #Note that each TUA should have its own ID and unique set of highlights (TUA highlights should not overlap!)
    new_tua = tua(out_folder=tua_path,
                  article_num='520',
                  source_task_id='tua_task_id')
    for i in range(1, 8):
        hl = ((i - 1) * 10, i * 10 - 1)
        new_tua.add_row({
            'topic_name': 'source',
            'start_pos': hl[0],
            'end_pos': hl[1],
            'tua_uuid': str(i)
        })

    arg_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='doesnt matter',
                      article_num='520')
    for i in range(1, 8):
        arg_dep.add_row({
            "namespace": "Covid_Sources_2002_03_20v2.1",
            "agreed_Answer": i,
            "question_Number": 8,
            "agreement_score": 1,
            "tua_uuid": str(i)
        })
    new_tua.export()
    arg_dep.export()

    tuas, weights, tua_raw = pointSort(
        scoring_path,
        input_dir=None,
        weights=weight_df,
        scale_guide_dir=config['IAA_config_dir'] +
        '/point_assignment_scaling_guide.csv',
        tua_dir=tua_path,
        reporting=True)
    print("WEIGHTS:", weights)
    assert len(weights) == 7
    #Ensure all point assignments are accurate with point_assignment_scaling_guide.csv
    assert weights['points'].iloc[0] == 5 * 2
    assert weights['points'].iloc[1] == 5 * 1.5
    assert weights['points'].iloc[2] == 5 * 1
    assert weights['points'].iloc[3] == 5 * 0.5
    assert weights['points'].iloc[4] == 5 * 0
    assert weights['points'].iloc[5] == 5 * -0.5
    assert weights['points'].iloc[6] == 5 * 0
def test_he_vague_sources_false(config):
    tua_path = test_utils.make_test_directory(config,
                                              'he_tua_vague_sources_false')
    scoring_path = test_utils.make_test_directory(
        config, 'he_scoring_vague_sources_false')
    out_path = test_utils.make_test_directory(config,
                                              'out_he_vague_sources_false')
    #2800 is considered standard article; threhold for scoring is 4 vague sources per 2800 characters
    pa = point_assignment(out_folder=scoring_path,
                          article_num='520',
                          source_task_id='practice_makes+[perfect',
                          article_text_length=2900)
    pa.add_row({
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'Answer_Number': 3,
        'points': 0,
        "Question_Number": 5,
        'agreement_score': 1,
        'highlighted_indices': test_utils.make_highlight_indices(10, 30)
    })
    pa.export()
    src_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='qs1',
                      article_num='520',
                      article_text_length=2900)
    # scientific discovery
    src_dep.add_row({
        'namespace':
        'Covid2_Sources_2002_09_20',
        'agreed_Answer':
        5,
        "question_Number":
        2,
        'agreement_score':
        1,
        'highlighted_indices':
        test_utils.make_highlight_indices(10, 30),
        'tua_uuid':
        'tua3'
    })
    src_dep.export()
    src_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='qs2',
                      article_num='520',
                      article_text_length=2900)
    src_dep.add_row({
        'namespace':
        'Covid2_Sources_2002_09_20',
        'agreed_Answer':
        6,
        "question_Number":
        2,
        'agreement_score':
        1,
        'highlighted_indices':
        test_utils.make_highlight_indices(15, 38),
        'tua_uuid':
        'tua3'
    })
    src_dep.add_row({
        'namespace':
        'Covid2_Sources_2002_09_20',
        'agreed_Answer':
        8,
        "question_Number":
        5,
        'agreement_score':
        1,
        'highlighted_indices':
        test_utils.make_highlight_indices(7, 27),
        'tua_uuid':
        'tua3'
    })
    src_dep.export()
    src_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='qs3',
                      article_num='520',
                      article_text_length=2900)
    src_dep.add_row({
        'namespace':
        'Covid2_Sources_2002_09_20',
        'agreed_Answer':
        7,
        "question_Number":
        5,
        'agreement_score':
        1,
        'highlighted_indices':
        test_utils.make_highlight_indices(15, 38),
        'tua_uuid':
        'tua3'
    })
    src_dep.export()

    new_tua = tua(out_folder=tua_path,
                  article_num='520',
                  source_task_id='tua_task_id',
                  article_text_length=2900)
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 10,
        'end_pos': 30,
        'tua_uuid': 'tua1'
    })
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 50,
        'end_pos': 120,
        'tua_uuid': 'tua2'
    })
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 900,
        'end_pos': 1020,
        'tua_uuid': 'tua3'
    })
    new_tua.export()

    points = eval_triage_scoring(new_tua.df, pa.df, scoring_path)
    points.to_csv(out_path + '/AssessedPoints.csv', encoding='utf-8')

    assert points['points'].sum() == -4
    assert len(points) == 3
    hl = points[points['points'] == -2]['highlighted_indices'].iloc[0]
    assert all([str(i) in hl for i in range(900, 1020)])
Ejemplo n.º 7
0
def test_dep_parent1(config):
    #Test if parent doesn't have highlight, child does, child still have its highlight
    iaa_files_path=test_utils.make_test_directory(config, 'dep_parent1')
    outpath=test_utils.make_test_directory(config, 'out_dep_parent1')
    # source_task_id generated by smashing keyboard
    all_schema=[ [{"agreed_Answer": 2, "question_Number": 4, "namespace": 'Covid_Sources_2002_03_20v2.1','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 1, "question_Number": 3, "namespace": 'Covid_Sources_2002_03_20v2.1'},4],
                 [{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Reasoning'},2]
                 ]

    for i in all_schema:
        print(i)
        iaa = IAA_task(out_folder=iaa_files_path, source_task_id="auhfdaiughfs")
        iaa.add_row(i[1])
        iaa.add_row(i[0])
        fin_path = iaa.export()
        data_path = config['data_dir']
        schema_path = data_path + '/schemas'
        dh_path = None #doesn't get used by dependency but is still an argument
        eval_dependency(dh_path, iaa_files_path, schema_path, outpath)
        for root, dir, files in os.walk(outpath):
            for file in files:
                #should be only 1 file for this case, so just run it on the only one
                # if there's more than 1 then you can get fancy
                out_df  = pd.read_csv(os.path.join(outpath, file), encoding='utf-8')
        #9 answer choices to a checklist question
        #This basically works for my first test, child should have parent's highlights if itself doesn't have any but its parent does, thx eric~.
        assert len(out_df) == 2
        q_three = out_df[out_df['question_Number']==i[2]]
        hl = q_three['highlighted_indices'].iloc[0]
        assert len(hl) >18
        assert '10' in hl
        assert '29' in hl