def test_dep_sample(config): iaa_files_path = test_utils.make_test_directory(config, 'dep_sample') out_path = test_utils.make_test_directory(config, 'out_dep_sample') # source_task_id generated by smashing keyboard iaa = IAA_task(out_folder=iaa_files_path, source_task_id='kjncsa87nxao21899102j1j2') iaa.add_row({"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Probability', 'highlighted_indices': test_utils.make_highlight_indices(10,30)}) iaa.add_row({"agreed_Answer": 3, "question_Number": 2, "namespace": 'Covid_Probability'}) fin_path = iaa.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' dh_path = None #doesn't get used by dependency but is still an argument eval_dependency(dh_path, iaa_files_path, schema_path, out_path) for root, dir, files in os.walk(out_path): for file in files: #should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy out_df = pd.read_csv(os.path.join(out_path, file), encoding='utf-8') #9 answer choices to a checklist question assert len(out_df) == 2 q_three = out_df[out_df['question_Number']==2] hl = q_three['highlighted_indices'].iloc[0] assert len(hl) >18 assert '10' in hl assert '29' in hl
def test_dep_parent(config): #Test if parent has highlight, children don't, dep_iaa should have parent's highlight iaa_files_path=test_utils.make_test_directory(config, 'dep_parent') outpath=test_utils.make_test_directory(config, 'out_dep_parent') # source_task_id generated by smashing keyboard all_schema=[ [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Probability','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 3, "question_Number": 2, "namespace": 'Covid_Probability'},2], [{"agreed_Answer": 2, "question_Number": 1, "namespace": 'Covid_Languagev1.1','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 3, "question_Number": 3, "namespace": 'Covid_Languagev1.1'},3], [{"agreed_Answer": 4, "question_Number": 15, "namespace": 'Covid_Holisticv1.2','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1, "question_Number": 16, "namespace": 'Covid_Holisticv1.2'},16], [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Evidence2020_03_21', 'highlighted_indices': test_utils.make_highlight_indices(10, 30)}, {"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Evidence2020_03_21', 'highlighted_indices': test_utils.make_highlight_indices(10, 30)}, {"agreed_Answer": 3, "question_Number": 4, "namespace": 'Covid_Evidence2020_03_21'}, 4], [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Reasoning', 'highlighted_indices': test_utils.make_highlight_indices(80, 120)},{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1, "question_Number": 7, "namespace": 'Covid_Reasoning'}, 7] ] #The test fails if I add these two to the all_Schema list, which I don't understand why since I have been following the same logic #[{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Evidence2020_03_21','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 3, "question_Number": 4, "namespace": 'Covid_Evidence2020_03_21'}, 4], #[{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1, "question_Number": 7, "namespace": 'Covid_Reasoning'}, 7] for i in all_schema: print(i) iaa = IAA_task(out_folder=iaa_files_path, source_task_id="auhfdaiughfs") for row in i: if isinstance(row, dict): iaa.add_row(row) fin_path = iaa.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' dh_path = None #doesn't get used by dependency but is still an argument eval_dependency(dh_path, iaa_files_path, schema_path, outpath) for root, dir, files in os.walk(outpath): for file in files: #should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy out_df = pd.read_csv(os.path.join(outpath, file), encoding='utf-8') #9 answer choices to a checklist question #This basically works for my first test, child should have parent's highlights if itself doesn't have any but its parent does, thx eric~. #assert len(out_df) == 2 q_three = out_df[out_df['question_Number']==i[-1]] hl = q_three['highlighted_indices'].iloc[0] assert len(hl) >18 assert '10' in hl assert '29' in hl
def test_he_low_info_true_low_counts(config): tua_path = test_utils.make_test_directory( config, 'he_tua_input_low_info_true_low_counts') scoring_path = test_utils.make_test_directory( config, 'he_scoring_input_low_info_true_low_counts') #out_path = test_utils.make_test_directory(config, 'out_he_low_info_true_low_counts') pa = point_assignment(out_folder=scoring_path, article_num='520', source_task_id='practice_makes+[perfect') pa.add_row({ 'namespace': 'Covid2_Reasoning_2020_09_20', 'Answer_Number': 3, 'points': 5, "Question_Number": 5, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(10, 30) }) new_tua = tua(out_folder=tua_path, article_num='520', source_task_id='tua_task_id') new_tua.add_row({ 'topic_name': 'argument', 'start_pos': 10, 'end_pos': 30, 'tua_uuid': 'test1' }) hol_dep = dep_iaa(out_folder=scoring_path, source_task_id='doesnt matter', article_num='520') #scientific discovery hol_dep.add_row({ "namespace": "Covid2_Holistic_2020_09_20", "agreed_Answer": 5, "question_Number": 1, "agreement_score": 1, "tua_uuid": 'test1' }) hol_dep.export() points = eval_triage_scoring(new_tua.df, pa.df, scoring_path) #points.to_csv(out_path+'/AssessedPoints.csv', encoding = 'utf-8') assert len(points) == 2 assert points['points'].sum() == 3
def test_point_assignment_source_in_weight_highlight(config): tua_path = test_utils.make_test_directory(config, 'pa_source_wh_tua') scoring_path = test_utils.make_test_directory(config, 'pa_source_wh_dep') weight = weighted(out_folder=scoring_path, article_num='520', source_task_id='source_scaling') for i in range(1, 8): hl = ((i - 1) * 10, i * 10 - 1) # (0, 9), (10, 19), ... weight.add_row({ 'schema': 'Probability', 'namespace': 'Covid_Probability', 'Answer_Number': 1, 'agreement_adjusted_points': 9, "Question_Number": i, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(hl[0], hl[1]) }) weight_df = weight.df new_tua = tua(out_folder=tua_path, article_num='520', source_task_id='tua_task_id') for i in range(1, 8): hl = ((i - 1) * 10 + 1, i * 10 - 2) # (1, 8), (11, 18), ... new_tua.add_row({ 'topic_name': 'source', 'start_pos': hl[0], 'end_pos': hl[1], 'tua_uuid': str(i) }) arg_dep = dep_iaa(out_folder=scoring_path, source_task_id='doesnt matter', article_num='520') for i in range(1, 8): arg_dep.add_row({ "namespace": "Covid_Sources_2002_03_20v2.1", "agreed_Answer": i, "question_Number": 8, "agreement_score": 1, "tua_uuid": str(i) }) new_tua.export() arg_dep.export() tuas, weights, tua_raw = pointSort( scoring_path, input_dir=None, weights=weight_df, scale_guide_dir=config['IAA_config_dir'] + '/point_assignment_scaling_guide.csv', tua_dir=tua_path, reporting=True) assert len(weights) == 7 assert weights['points'].iloc[0] == 9 * 2 assert weights['points'].iloc[1] == 9 * 1.5 assert weights['points'].iloc[2] == 9 * 1 assert weights['points'].iloc[3] == 9 * 0.5 assert weights['points'].iloc[4] == 9 * 0 assert weights['points'].iloc[5] == 9 * -0.5 assert weights['points'].iloc[6] == 9 * 0
def test_point_assignment_source(config): tua_path = test_utils.make_test_directory(config, 'pa_source_tua') scoring_path = test_utils.make_test_directory(config, 'pa_source_dep') weight = weighted(out_folder=scoring_path, article_num='520', source_task_id='source_scaling') for i in range(1, 8): hl = ((i - 1) * 10, i * 10 - 1 ) #creates highlights (0, 9) for 1, (10, 19) for 2, etc. weight.add_row({ 'schema': 'Probability', 'namespace': 'Covid_Probability', 'Answer_Number': 1, 'agreement_adjusted_points': 5, "Question_Number": i, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(hl[0], hl[1]) }) weight_df = weight.df #Note that each TUA should have its own ID and unique set of highlights (TUA highlights should not overlap!) new_tua = tua(out_folder=tua_path, article_num='520', source_task_id='tua_task_id') for i in range(1, 8): hl = ((i - 1) * 10, i * 10 - 1) new_tua.add_row({ 'topic_name': 'source', 'start_pos': hl[0], 'end_pos': hl[1], 'tua_uuid': str(i) }) arg_dep = dep_iaa(out_folder=scoring_path, source_task_id='doesnt matter', article_num='520') for i in range(1, 8): arg_dep.add_row({ "namespace": "Covid_Sources_2002_03_20v2.1", "agreed_Answer": i, "question_Number": 8, "agreement_score": 1, "tua_uuid": str(i) }) new_tua.export() arg_dep.export() tuas, weights, tua_raw = pointSort( scoring_path, input_dir=None, weights=weight_df, scale_guide_dir=config['IAA_config_dir'] + '/point_assignment_scaling_guide.csv', tua_dir=tua_path, reporting=True) print("WEIGHTS:", weights) assert len(weights) == 7 #Ensure all point assignments are accurate with point_assignment_scaling_guide.csv assert weights['points'].iloc[0] == 5 * 2 assert weights['points'].iloc[1] == 5 * 1.5 assert weights['points'].iloc[2] == 5 * 1 assert weights['points'].iloc[3] == 5 * 0.5 assert weights['points'].iloc[4] == 5 * 0 assert weights['points'].iloc[5] == 5 * -0.5 assert weights['points'].iloc[6] == 5 * 0
def test_he_vague_sources_false(config): tua_path = test_utils.make_test_directory(config, 'he_tua_vague_sources_false') scoring_path = test_utils.make_test_directory( config, 'he_scoring_vague_sources_false') out_path = test_utils.make_test_directory(config, 'out_he_vague_sources_false') #2800 is considered standard article; threhold for scoring is 4 vague sources per 2800 characters pa = point_assignment(out_folder=scoring_path, article_num='520', source_task_id='practice_makes+[perfect', article_text_length=2900) pa.add_row({ 'namespace': 'Covid2_Reasoning_2020_09_20', 'Answer_Number': 3, 'points': 0, "Question_Number": 5, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(10, 30) }) pa.export() src_dep = dep_iaa(out_folder=scoring_path, source_task_id='qs1', article_num='520', article_text_length=2900) # scientific discovery src_dep.add_row({ 'namespace': 'Covid2_Sources_2002_09_20', 'agreed_Answer': 5, "question_Number": 2, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(10, 30), 'tua_uuid': 'tua3' }) src_dep.export() src_dep = dep_iaa(out_folder=scoring_path, source_task_id='qs2', article_num='520', article_text_length=2900) src_dep.add_row({ 'namespace': 'Covid2_Sources_2002_09_20', 'agreed_Answer': 6, "question_Number": 2, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(15, 38), 'tua_uuid': 'tua3' }) src_dep.add_row({ 'namespace': 'Covid2_Sources_2002_09_20', 'agreed_Answer': 8, "question_Number": 5, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(7, 27), 'tua_uuid': 'tua3' }) src_dep.export() src_dep = dep_iaa(out_folder=scoring_path, source_task_id='qs3', article_num='520', article_text_length=2900) src_dep.add_row({ 'namespace': 'Covid2_Sources_2002_09_20', 'agreed_Answer': 7, "question_Number": 5, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(15, 38), 'tua_uuid': 'tua3' }) src_dep.export() new_tua = tua(out_folder=tua_path, article_num='520', source_task_id='tua_task_id', article_text_length=2900) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 10, 'end_pos': 30, 'tua_uuid': 'tua1' }) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 50, 'end_pos': 120, 'tua_uuid': 'tua2' }) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 900, 'end_pos': 1020, 'tua_uuid': 'tua3' }) new_tua.export() points = eval_triage_scoring(new_tua.df, pa.df, scoring_path) points.to_csv(out_path + '/AssessedPoints.csv', encoding='utf-8') assert points['points'].sum() == -4 assert len(points) == 3 hl = points[points['points'] == -2]['highlighted_indices'].iloc[0] assert all([str(i) in hl for i in range(900, 1020)])
def test_dep_parent1(config): #Test if parent doesn't have highlight, child does, child still have its highlight iaa_files_path=test_utils.make_test_directory(config, 'dep_parent1') outpath=test_utils.make_test_directory(config, 'out_dep_parent1') # source_task_id generated by smashing keyboard all_schema=[ [{"agreed_Answer": 2, "question_Number": 4, "namespace": 'Covid_Sources_2002_03_20v2.1','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 1, "question_Number": 3, "namespace": 'Covid_Sources_2002_03_20v2.1'},4], [{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Reasoning'},2] ] for i in all_schema: print(i) iaa = IAA_task(out_folder=iaa_files_path, source_task_id="auhfdaiughfs") iaa.add_row(i[1]) iaa.add_row(i[0]) fin_path = iaa.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' dh_path = None #doesn't get used by dependency but is still an argument eval_dependency(dh_path, iaa_files_path, schema_path, outpath) for root, dir, files in os.walk(outpath): for file in files: #should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy out_df = pd.read_csv(os.path.join(outpath, file), encoding='utf-8') #9 answer choices to a checklist question #This basically works for my first test, child should have parent's highlights if itself doesn't have any but its parent does, thx eric~. assert len(out_df) == 2 q_three = out_df[out_df['question_Number']==i[2]] hl = q_three['highlighted_indices'].iloc[0] assert len(hl) >18 assert '10' in hl assert '29' in hl