def test_dep_parent1(config): #Test if parent doesn't have highlight, child does, child still have its highlight iaa_files_path=test_utils.make_test_directory(config, 'dep_parent1') outpath=test_utils.make_test_directory(config, 'out_dep_parent1') # source_task_id generated by smashing keyboard all_schema=[ [{"agreed_Answer": 2, "question_Number": 4, "namespace": 'Covid_Sources_2002_03_20v2.1','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 1, "question_Number": 3, "namespace": 'Covid_Sources_2002_03_20v2.1'},4], [{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Reasoning'},2] ] for i in all_schema: print(i) iaa = IAA_task(out_folder=iaa_files_path, source_task_id="auhfdaiughfs") iaa.add_row(i[1]) iaa.add_row(i[0]) fin_path = iaa.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' dh_path = None #doesn't get used by dependency but is still an argument eval_dependency(dh_path, iaa_files_path, schema_path, outpath) for root, dir, files in os.walk(outpath): for file in files: #should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy out_df = pd.read_csv(os.path.join(outpath, file), encoding='utf-8') #9 answer choices to a checklist question #This basically works for my first test, child should have parent's highlights if itself doesn't have any but its parent does, thx eric~. assert len(out_df) == 2 q_three = out_df[out_df['question_Number']==i[2]] hl = q_three['highlighted_indices'].iloc[0] assert len(hl) >18 assert '10' in hl assert '29' in hl
def test_dep_sample(config): iaa_files_path = test_utils.make_test_directory(config, 'dep_sample') out_path = test_utils.make_test_directory(config, 'out_dep_sample') # source_task_id generated by smashing keyboard iaa = IAA_task(out_folder=iaa_files_path, source_task_id='kjncsa87nxao21899102j1j2') iaa.add_row({"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Probability', 'highlighted_indices': test_utils.make_highlight_indices(10,30)}) iaa.add_row({"agreed_Answer": 3, "question_Number": 2, "namespace": 'Covid_Probability'}) fin_path = iaa.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' dh_path = None #doesn't get used by dependency but is still an argument eval_dependency(dh_path, iaa_files_path, schema_path, out_path) for root, dir, files in os.walk(out_path): for file in files: #should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy out_df = pd.read_csv(os.path.join(out_path, file), encoding='utf-8') #9 answer choices to a checklist question assert len(out_df) == 2 q_three = out_df[out_df['question_Number']==2] hl = q_three['highlighted_indices'].iloc[0] assert len(hl) >18 assert '10' in hl assert '29' in hl
def test_weighting_sample(config): out_path = test_utils.make_test_directory(config, 'weighting_sample_test') weight_out_folder = test_utils.make_test_directory( config, 'out_weighting_sample_test') weight_df = pd.read_csv(weight_path) iaa = dep_iaa(out_folder=out_path, source_task_id='weightsampletests') namespace = "Covid2_Evidence2020_09_20" #-.5 points--from the weight key in config folder and the agreement_score iaa.add_row({ "namespace": namespace, "agreed_Answer": 2, "question_Number": 4, "agreement_score": 1 }) #-2 points from ./config/weight_key and agreement score iaa.add_row({ "namespace": namespace, "agreed_Answer": 1, "question_Number": 8, "agreement_score": .5 }) # +1.5 points from ./config/weight_key and agreement score iaa.add_row({ "namespace": namespace, "agreed_Answer": 1, "question_Number": 12, "agreement_score": .75 }) fin_path = iaa.export() #weighting will output the actual pandas dataframe instead of the directory #if you look into the Weighting.py file, you can see the paths to weighting_out = launch_Weighting(out_path, weight_out_folder) points = weighting_out['agreement_adjusted_points'] weighting_out.to_csv(weight_out_folder + "/Point_recs_.csv", encoding='utf-8') tot = points.sum() print(weighting_out) assert tot == -1 assert len(weighting_out.index) == 3 for index, row in weighting_out.iterrows(): question_num = row['Question_Number'] answer_num = row['Answer_Number'] adjusted_points = row['agreement_adjusted_points'] ag_score = row['agreement_score'] nm = weight_df[weight_df['namespace'] == namespace] qa = nm[(nm['Question_Number'] == question_num) & (nm['Answer_Number'] == answer_num)] pr = qa['Point_Recommendation'].iloc[0] correct_weight = pr * ag_score assert adjusted_points == correct_weight, "Q" + str( question_num) + "A" + str(answer_num) + " points: " + str( adjusted_points) + ", weight_df: " + str(correct_weight)
def test_random_evidence(config): weight_df = pd.read_csv(weight_path) namespace = "Covid2_Evidence2020_09_20" weight_df = weight_df[weight_df['namespace'] == namespace] out_path = test_utils.make_test_directory( config, 'weighting_evidence_random_test') iaa = dep_iaa(out_folder=out_path, source_task_id='3random') sample_df = weight_df.sample(3) for index, row in sample_df.iterrows(): question_num = row['Question_Number'] answer_num = row['Answer_Number'] iaa.add_row({ "namespace": namespace, "agreed_Answer": answer_num, "question_Number": question_num, "agreement_score": 1 }) fin_path = iaa.export() weighting_out = launch_Weighting(out_path) assert weighting_out.shape[0] == 3 for index, row in weighting_out.iterrows(): question_num = row['Question_Number'] answer_num = row['Answer_Number'] adjusted_points = row['agreement_adjusted_points'] ag_score = row['agreement_score'] nm = weight_df[weight_df['namespace'] == namespace] qa = nm[(nm['Question_Number'] == question_num) & (nm['Answer_Number'] == answer_num)] pr = qa['Point_Recommendation'].iloc[0] correct_weight = pr * ag_score assert adjusted_points == correct_weight, "Q" + str( question_num) + "A" + str(answer_num) + " points: " + str( adjusted_points) + ", weight_df: " + str(correct_weight) print("evidence random row", index + 1, "checks out")
def test_iaa_checklist_diff_agree(config, tmpdir): test_path = test_utils.make_test_directory( config, 'test_iaa_checklist_diff_agree') # source_task_id generated by smashing keyboard dh = datahunt(out_folder=test_path, source_task_id='apply_to_all') for i in range(9): for j in range(i + 1): dh.add_row({ 'answer_label': 'T1.Q2.A' + str(j), 'namespace': 'Covid2_Reasoning_2020_09_20', 'contributor_uuid': 'Daniel' + str(i) }) fin_path = dh.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir, outDirectory=tmpdir) print(iaa_out) for root, dir, files in os.walk(iaa_out): for file in files: # should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy out_df = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8') print(out_df['agreed_Answer']) temp = out_df['agreed_Answer'].tolist() assert int(temp[0]) == 1 assert int(temp[2]) == 3 assert out_df['agreement_score'][0] != out_df['agreement_score'][3] print("++++++++++++++") print(out_df['agreement_score'].tolist())
def test_iaa_constructor(config, tmpdir): test_path = test_utils.make_test_directory(config, 'test_test_iaa_evi_q5') #source_task_id generated by smashing keyboard dh = datahunt(out_folder=test_path, source_task_id='owhdnoicaunhcio32ewda') dh.add_row({ 'answer_label': 'T1.Q2.A2', 'namespace': 'Covid2_Reasoning_2020_09_20', 'contributor_uuid': 'Michael' }) dh.add_row({ 'answer_label': 'T1.Q2.A2', 'namespace': 'Covid2_Reasoning_2020_09_20', 'contributor_uuid': 'Dwight' }) fin_path = dh.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir, outDirectory=tmpdir) print(iaa_out) for root, dir, files in os.walk(iaa_out): for file in files: #should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy out_df = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8') #9 answer choices to a checklist question assert len(out_df) == 6
def test_dep_parent(config): #Test if parent has highlight, children don't, dep_iaa should have parent's highlight iaa_files_path=test_utils.make_test_directory(config, 'dep_parent') outpath=test_utils.make_test_directory(config, 'out_dep_parent') # source_task_id generated by smashing keyboard all_schema=[ [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Probability','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 3, "question_Number": 2, "namespace": 'Covid_Probability'},2], [{"agreed_Answer": 2, "question_Number": 1, "namespace": 'Covid_Languagev1.1','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 3, "question_Number": 3, "namespace": 'Covid_Languagev1.1'},3], [{"agreed_Answer": 4, "question_Number": 15, "namespace": 'Covid_Holisticv1.2','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1, "question_Number": 16, "namespace": 'Covid_Holisticv1.2'},16], [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Evidence2020_03_21', 'highlighted_indices': test_utils.make_highlight_indices(10, 30)}, {"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Evidence2020_03_21', 'highlighted_indices': test_utils.make_highlight_indices(10, 30)}, {"agreed_Answer": 3, "question_Number": 4, "namespace": 'Covid_Evidence2020_03_21'}, 4], [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Reasoning', 'highlighted_indices': test_utils.make_highlight_indices(80, 120)},{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1, "question_Number": 7, "namespace": 'Covid_Reasoning'}, 7] ] #The test fails if I add these two to the all_Schema list, which I don't understand why since I have been following the same logic #[{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Evidence2020_03_21','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 3, "question_Number": 4, "namespace": 'Covid_Evidence2020_03_21'}, 4], #[{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1, "question_Number": 7, "namespace": 'Covid_Reasoning'}, 7] for i in all_schema: print(i) iaa = IAA_task(out_folder=iaa_files_path, source_task_id="auhfdaiughfs") for row in i: if isinstance(row, dict): iaa.add_row(row) fin_path = iaa.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' dh_path = None #doesn't get used by dependency but is still an argument eval_dependency(dh_path, iaa_files_path, schema_path, outpath) for root, dir, files in os.walk(outpath): for file in files: #should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy out_df = pd.read_csv(os.path.join(outpath, file), encoding='utf-8') #9 answer choices to a checklist question #This basically works for my first test, child should have parent's highlights if itself doesn't have any but its parent does, thx eric~. #assert len(out_df) == 2 q_three = out_df[out_df['question_Number']==i[-1]] hl = q_three['highlighted_indices'].iloc[0] assert len(hl) >18 assert '10' in hl assert '29' in hl
def test_he_low_info_true_low_counts(config): tua_path = test_utils.make_test_directory( config, 'he_tua_input_low_info_true_low_counts') scoring_path = test_utils.make_test_directory( config, 'he_scoring_input_low_info_true_low_counts') #out_path = test_utils.make_test_directory(config, 'out_he_low_info_true_low_counts') pa = point_assignment(out_folder=scoring_path, article_num='520', source_task_id='practice_makes+[perfect') pa.add_row({ 'namespace': 'Covid2_Reasoning_2020_09_20', 'Answer_Number': 3, 'points': 5, "Question_Number": 5, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(10, 30) }) new_tua = tua(out_folder=tua_path, article_num='520', source_task_id='tua_task_id') new_tua.add_row({ 'topic_name': 'argument', 'start_pos': 10, 'end_pos': 30, 'tua_uuid': 'test1' }) hol_dep = dep_iaa(out_folder=scoring_path, source_task_id='doesnt matter', article_num='520') #scientific discovery hol_dep.add_row({ "namespace": "Covid2_Holistic_2020_09_20", "agreed_Answer": 5, "question_Number": 1, "agreement_score": 1, "tua_uuid": 'test1' }) hol_dep.export() points = eval_triage_scoring(new_tua.df, pa.df, scoring_path) #points.to_csv(out_path+'/AssessedPoints.csv', encoding = 'utf-8') assert len(points) == 2 assert points['points'].sum() == 3
def test_import_tags_iaa_when_not_adj(config, tmpdir): iaa_path = test_utils.make_test_directory(config, 'imptags_iaa_iaa_when_not_adj') adj_path = test_utils.make_test_directory(config, 'imptags_adj_iaa_when_not_adj') schema_path = config['data_dir'] + '/schemas' schema_namespace = 'Covid_Evidence2020_03_21' # source_task_id generated by smashing keyboard task_id = 'adjudicated' iaa = IAA_task(out_folder=iaa_path, source_task_id=task_id) iaa.add_row({ "question_Number": 1, "agreed_Answer": 3, 'namespace': schema_namespace }) iaa.export() adj = adjudicator(out_folder=adj_path, source_task_id=task_id) adj.add_row({'topic_name': '01.02.02', 'namespace': schema_namespace}) adj.export() task_id2 = 'not_adjudicated' iaa = IAA_task(out_folder=iaa_path, source_task_id=task_id2) iaa.add_row({ "question_Number": 2, "agreed_Answer": 2, 'namespace': schema_namespace }) iaa.export() i_tags = import_tags(iaa_path, adj_path, schema_path, tmpdir) print('temp dir is:', tmpdir) #i_tags is directory holding all the import tags for root, dir, files in os.walk(i_tags): for file in files: #produces 2 files with the same answer/questions in them i_df = pd.read_csv(os.path.join(i_tags, file), encoding='utf-8') assert len(i_df) == 1 assert test_utils.count_matching_rows(i_df, { 'agreed_Answer': 2, 'question_Number': 2 }) == 1 assert test_utils.count_matching_rows(i_df, { 'agreed_Answer': 1, 'question_Number': 3 }) == 0
def test_op_ed_not_op_ed(config): out_path = test_utils.make_test_directory( config, 'weighting_test_op_ed_not_op_ed') weight_out_folder = test_utils.make_test_directory( config, 'out_weighting_not_op_ed') weight_df = pd.read_csv(weight_path) holi_iaa = dep_iaa(out_folder=out_path, source_task_id='holisticIAA') # -.5 points--from the weight key in config folder and the agreement_score holi_iaa.add_row({ "namespace": "Covid2_Holistic_2020_09_20", "agreed_Answer": 3, "question_Number": 5, "agreement_score": 1 }) # -2 points from ./config/weight_key and agreement score holi_iaa.export() reas_iaa = dep_iaa(out_folder=out_path, source_task_id='reasoningIAA') reasoning_namespace = "Covid2_Reasoning_2020_09_20" reas_iaa.add_row({ "namespace": reasoning_namespace, "agreed_Answer": 1, "question_Number": 2, "agreement_score": .5 }) reas_iaa.export() # weighting will output the actual pandas dataframe instead of the directory # if you look into the Weighting.py file, you can see the paths to weighting_out = launch_Weighting(out_path) weighting_out.to_csv(weight_out_folder + "/Point_recs_.csv", encoding='utf-8') nm = weight_df[weight_df['namespace'] == reasoning_namespace] qa = nm[(nm['Question_Number'] == 2) & (nm['Answer_Number'] == 1)] pr = qa['Point_Recommendation'].iloc[0] ag_rows = weighting_out[weighting_out['agreement_score'] == .5] ag_score = ag_rows['agreement_score'].iloc[0] correct_weight = pr * ag_score adjusted_points = ag_rows['agreement_adjusted_points'].iloc[0] assert adjusted_points == correct_weight, "Q" + str(2) + "A" + str( 1) + " points: " + str(adjusted_points) + ", weight_df: " + str( correct_weight)
def test_language_weighting(config): #Import the csv containing the weights for each question weight_df = pd.read_csv(weight_path) #Set up paths for test data to be stored at out_path = test_utils.make_test_directory( config, 'weighting_language_calculation_test') iaa = dep_iaa(out_folder=out_path, source_task_id='languageweights') #Generate an IAA with random agreement scores for each question and answer in the schema namespace = "Covid_Languagev1.1" weight_df = weight_df[weight_df['namespace'] == namespace] for index, row in weight_df.iterrows(): question_num = row['Question_Number'] answer_num = row['Answer_Number'] agree_score = random.random() namespace = "Covid_Languagev1.1" iaa.add_row({ "namespace": namespace, "agreed_Answer": answer_num, "question_Number": question_num, "agreement_score": agree_score }) #Export the data as a dataframe fin_path = iaa.export() weighting_out = launch_Weighting(out_path) #Check that weights (point_recs) * agreement_scores = adjusted_scores assert weight_df.shape[0] == weighting_out.shape[0] point_recs = weight_df['Point_Recommendation'].to_numpy() agreement_scores = weighting_out['agreement_score'].to_numpy() adjusted_points = weighting_out['agreement_adjusted_points'].to_numpy() print("scores:", agreement_scores[:5]) print("weights:", point_recs[:5]) print("adjusted:", adjusted_points[:5]) assert np.array_equal(point_recs * agreement_scores, adjusted_points) for index, row in weighting_out.iterrows(): question_num = row['Question_Number'] answer_num = row['Answer_Number'] adjusted_points = row['agreement_adjusted_points'] ag_score = row['agreement_score'] nm = weight_df[weight_df['namespace'] == namespace] qa = nm[(nm['Question_Number'] == question_num) & (nm['Answer_Number'] == answer_num)] pr = qa['Point_Recommendation'].iloc[0] correct_weight = pr * ag_score assert adjusted_points == correct_weight, "Q" + str( question_num) + "A" + str(answer_num) + " points: " + str( adjusted_points) + ", weight_df: " + str(correct_weight) print("No differences found in language weighting.") print()
def test_all_no_parent_pass(config): iaa_files_path = test_utils.make_test_directory(config, 'dep_all_orphans_pass') out_path = test_utils.make_test_directory(config, 'dep_all_orphans_pass_out') #The questions with no parents in the Evidence schema are 1, 9, 12, 13, and 14 numAnswers = {1:3, 9:3, 12:4, 13:10, 14:10} iaa = IAA_task(out_folder=iaa_files_path, source_task_id='batman') for i in [1, 9, 12, 13, 14]: for j in range(1, numAnswers[i]+1): iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": i, "agreed_Answer": j}) #Question 3 has Question 2 as a parent, so it should never appear in any dependencies iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 3, "agreed_Answer": 1}) fin_path = iaa.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' dh_path = None eval_dependency(dh_path, iaa_files_path, schema_path, out_path) for root, dir, files in os.walk(out_path): for file in files: out_df = pd.read_csv(os.path.join(out_path, file), encoding='utf-8') assert len(out_df) == 30
def test_bad_parent(config): iaa_files_path = test_utils.make_test_directory(config, 'dep_bad_dad') out_path = test_utils.make_test_directory(config, 'dep_bad_dad_out') parents = {1:[2], 2:[3,4,5], 5:[6], 9:[10,11]} childNumAnswers = {2:9, 3:1, 4:6, 5:5, 6:3, 7:1, 8:5, 10:5, 11:5} for parent in parents: iaa = IAA_task(out_folder=iaa_files_path, source_task_id='gru' + str(parent)) iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": parent, "agreed_Answer": 'U'}) for child in parents[parent]: for j in range(1, childNumAnswers[child]+1): iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": child, "agreed_Answer": j}) fin_path = iaa.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' dh_path = None eval_dependency(dh_path, iaa_files_path, schema_path, out_path) for root, dir, files in os.walk(out_path): for file in files: out_df = pd.read_csv(os.path.join(out_path, file), encoding='utf-8') assert len(out_df) == 0, "failing file is " + str(file)
def test_import_tags_adj_1_iaa_1_disagree(config, tmpdir): iaa_path = test_utils.make_test_directory( config, 'imptags_iaa_1_iaa_1_adj_disagree') adj_path = test_utils.make_test_directory( config, 'imptags_adj_1_iaa_1_adj_disagree') schema_path = config['data_dir'] + '/schemas' schema_namespace = 'Covid_Evidence2020_03_21' # source_task_id generated by smashing keyboard task_id = 'nc87wehcolfg6caanc9w' iaa = IAA_task(out_folder=iaa_path, source_task_id=task_id) iaa.add_row({ "question_Number": 1, "agreed_Answer": 3, 'namespace': schema_namespace }) iaa.export() adj = adjudicator(out_folder=adj_path, source_task_id=task_id) adj.add_row({'topic_name': '01.02.02', 'namespace': schema_namespace}) adj.export() i_tags = import_tags(iaa_path, adj_path, schema_path, tmpdir) print('temp dir is:', tmpdir) #i_tags is directory holding all the import tags for root, dir, files in os.walk(i_tags): for file in files: #should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy i_df = pd.read_csv(os.path.join(i_tags, file), encoding='utf-8') assert len(i_df) == 1 assert test_utils.count_matching_rows(i_df, { 'agreed_Answer': 2, 'question_Number': 2 }) == 1 assert test_utils.count_matching_rows(i_df, { 'agreed_Answer': 1, 'question_Number': 3 }) == 0
def test_evidence_weighting(config): weight_df = pd.read_csv(weight_path) out_path = test_utils.make_test_directory( config, 'weighting_evidence_calculation_test') iaa = dep_iaa(out_folder=out_path, source_task_id='evidenceweights') namespace = "Covid2_Evidence2020_09_20" weight_df = weight_df[weight_df['namespace'] == namespace] for index, row in weight_df.iterrows(): question_num = row['Question_Number'] answer_num = row['Answer_Number'] agree_score = random.random() namespace = "Covid2_Evidence2020_09_20" iaa.add_row({ "namespace": namespace, "agreed_Answer": answer_num, "question_Number": question_num, "agreement_score": agree_score }) fin_path = iaa.export() weighting_out = launch_Weighting(out_path) assert weight_df.shape[0] == weighting_out.shape[0] point_recs = weight_df['Point_Recommendation'].to_numpy() agreement_scores = weighting_out['agreement_score'].to_numpy() adjusted_points = weighting_out['agreement_adjusted_points'].to_numpy() print("scores:", agreement_scores[:5]) print("weights:", point_recs[:5]) print("adjusted:", adjusted_points[:5]) assert np.array_equal(point_recs * agreement_scores, adjusted_points) for index, row in weighting_out.iterrows(): question_num = row['Question_Number'] answer_num = row['Answer_Number'] adjusted_points = row['agreement_adjusted_points'] ag_score = row['agreement_score'] nm = weight_df[weight_df['namespace'] == namespace] qa = nm[(nm['Question_Number'] == question_num) & (nm['Answer_Number'] == answer_num)] pr = qa['Point_Recommendation'].iloc[0] correct_weight = pr * ag_score assert adjusted_points == correct_weight, "Q" + str( question_num) + "A" + str(answer_num) + " points: " + str( adjusted_points) + ", weight_df: " + str(correct_weight) print("No differences found in evidence weighting.") print()
def test_iaa_evi_3q(config): out_path = test_utils.make_test_directory(config, 'iaa_evi_q5') #source_task_id generated by smashing keyboard iaa = IAA_task(out_folder=out_path, source_task_id='kjncsa87nxao21899102j1j2') iaa.add_row({"agreed_Answer": 800, "question_Number": 8}) iaa.add_row({"agreed_Answer": 800, "question_Number": 5}) iaa.add_row({"agreed_Answer": 800, "question_Number": 5}) fin_path = iaa.export() read_iaa = pd.read_csv(fin_path, encoding='utf-8') assert len(read_iaa) == 3 count = test_utils.count_matching_rows(read_iaa, { 'agreed_Answer': 800, 'question_Number': 8 }) assert count == 1 count = test_utils.count_matching_rows(read_iaa, {'agreed_Answer': 800}) assert count == 3
def test_random_language(config): #Import the csv containing the weights for each question weight_df = pd.read_csv(weight_path) namespace = "Covid_Languagev1.1" weight_df = weight_df[weight_df['namespace'] == namespace] #Set up paths for test data to be stored at out_path = test_utils.make_test_directory( config, 'weighting_language_random_test') #Create IAA file with 3 random rows with agreement scores of 1 iaa = dep_iaa(out_folder=out_path, source_task_id='3random') sample_df = weight_df.sample(3) for index, row in sample_df.iterrows(): question_num = row['Question_Number'] answer_num = row['Answer_Number'] iaa.add_row({ "namespace": namespace, "agreed_Answer": answer_num, "question_Number": question_num, "agreement_score": 1 }) #Export the data as a dataframe and check if all 3 rows have the correct adjusted weight fin_path = iaa.export() weighting_out = launch_Weighting(out_path) assert weighting_out.shape[0] == 3 for index, row in weighting_out.iterrows(): question_num = row['Question_Number'] answer_num = row['Answer_Number'] adjusted_points = row['agreement_adjusted_points'] ag_score = row['agreement_score'] nm = weight_df[weight_df['namespace'] == namespace] qa = nm[(nm['Question_Number'] == question_num) & (nm['Answer_Number'] == answer_num)] pr = qa['Point_Recommendation'].iloc[0] correct_weight = pr * ag_score assert adjusted_points == correct_weight, "Q" + str( question_num) + "A" + str(answer_num) + " points: " + str( adjusted_points) + ", weight_df: " + str(correct_weight) print("language random row", index + 1, "checks out")
def test_iaa_checklist_maxans(config, tmpdir): test_path = test_utils.make_test_directory(config, 'test_iaa_checklist_max_ans') # source_task_id generated by smashing keyboard dh = datahunt(out_folder=test_path, source_task_id='apply_to_all') for i in range(5): dh.add_row({ 'answer_label': 'T1.Q2.A7', 'namespace': 'Sources2021_02_23', 'contributor_uuid': 'mturk' + str(i) }) for i in range(5): dh.add_row({ 'answer_label': 'T1.Q10.A7', 'namespace': 'Sources2021_02_23', 'contributor_uuid': 'mturk' + str(i) }) fin_path = dh.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir, outDirectory=tmpdir) print(iaa_out) for root, dir, files in os.walk(iaa_out): for file in files: # should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy out_df = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8') print(out_df['agreed_Answer']) temp = out_df['agreed_Answer'].tolist() #for i in range(9): #assert temp[i] == 'L' print("++++++++++++++") print(out_df['coding_perc_agreement'].tolist())
def test_highlights(config, tmpdir): test_path = test_utils.make_test_directory(config, 'test_iaa_highlights') # source_task_id generated by smashing keyboard dh = datahunt(out_folder=test_path, source_task_id='highlights') for i in range(10): dh.add_row({'answer_label': 'T1.Q2.A' + str((i % 3) + 1), 'namespace': 'Covid_Evidence2020_03_21', 'contributor_uuid': 'Daniel' + str(i), 'start_pos': 0, 'end_pos': 20}) fin_path = dh.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' # out_path = test_utils.make_test_directory(config, 'out_iaa_hl_everythingpass') iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir, outDirectory=tmpdir) print(iaa_out) for root, dir, files in os.walk(iaa_out): for file in files: # should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy out_df = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8') temp = out_df["highlighted_indices"] for i in range(8): counter = 1 listt = list(map(str, temp[i][1:len(temp[i]) - 1].split(','))) for j in range(len(listt)): listt[j] = listt[j].strip() if listt != ['']: for num in listt: assert int(num) == counter counter += 1 print("++++++++++++++") print(out_df['coding_perc_agreement'].tolist())
def test_op_ed_when_op_ed(config): out_path = test_utils.make_test_directory( config, 'weighting_test_op_ed_when_op_ed') weight_df = pd.read_csv(weight_path) holi_iaa = dep_iaa(out_folder=out_path, source_task_id='holisticIAA') holi_namespace = "Covid2_Holistic_2020_09_20" holi_iaa.add_row({ "namespace": holi_namespace, "agreed_Answer": 3, "question_Number": 1, "agreement_score": 1 }) holi_iaa.export() reas_iaa = dep_iaa(out_folder=out_path, source_task_id='reasoningIAA') reasoning_namespace = "Covid2_Reasoning_2020_09_20" reas_iaa.add_row({ "namespace": reasoning_namespace, "agreed_Answer": 1, "question_Number": 2, "agreement_score": .5 }) reas_iaa.export() # weighting will output the actual pandas dataframe instead of the directory # if you look into the Weighting.py file, you can see the paths to weighting_out = launch_Weighting(out_path) nm = weight_df[weight_df['namespace'] == reasoning_namespace] qa = nm[(nm['Question_Number'] == 2) & (nm['Answer_Number'] == 1)] pr = qa['Op-Ed'].iloc[0] ag_rows = weighting_out[weighting_out['agreement_score'] == .5] ag_score = ag_rows['agreement_score'].iloc[0] correct_weight = pr * ag_score adjusted_points = ag_rows['agreement_adjusted_points'].iloc[0] assert adjusted_points == correct_weight, "Q" + str(2) + "A" + str( 1) + " points: " + str(adjusted_points) + ", weight_df: " + str( correct_weight)
def test_master(config): dh_path = test_utils.make_test_directory(config, 'mn_dh_') iaa_path = test_utils.make_test_directory(config, 'out_mn_iaa') scoring_path = test_utils.make_test_directory(config, 'out_mn_scoring') tua_path = test_utils.make_test_directory(config, 'mn_tua_') viz_path = test_utils.make_test_directory(config, 'out_mn_viz') dh = datahunt(out_folder=dh_path, source_task_id='dh1', article_num='520', article_text_length=2900) for i in range(9): for j in range(i + 1): dh.add_row({ 'answer_label': 'T1.Q2.A' + str(j), 'namespace': 'Covid2_Reasoning_2020_09_20', 'contributor_uuid': 'Daniel' + str(i) }) dh.export() dh = datahunt(out_folder=dh_path, source_task_id='dh2', article_num='520', article_text_length=2900) make_dh(dh, 'Covid2_Probability2020_09_20', 20, 50, start_shift=lambda x: x + 5, end_shift=lambda x: x * 2) dh = datahunt(out_folder=dh_path, source_task_id='dh3', article_num='520', article_text_length=2900) make_dh(dh, 'Covid2_Probability2020_09_20', 100, 150, start_shift=lambda x: x - 5, end_shift=lambda x: x + 5, answer=lambda x: 2) dh.export() dh = datahunt(out_folder=dh_path, source_task_id='dh4', article_num='520', article_text_length=2900) make_dh(dh, 'Covid2_Probability2020_09_20', 100, 150, start_shift=lambda x: x + 5, end_shift=lambda x: x + 5, answer=lambda x: (x + 8) / 5) dh.export() dh = datahunt(out_folder=dh_path, source_task_id='dh5', article_num='520', article_text_length=2900) make_dh(dh, 'Covid2_Holistic_2020_09_20', 0, 0, start_shift=lambda x: x, end_shift=lambda x: x, answer=lambda x: 1) dh.export() dh = datahunt(out_folder=dh_path, source_task_id='dh6', article_num='520', article_text_length=2900) make_dh(dh, 'Covid2_Evidence2020_09_20', 100, 150, start_shift=lambda x: x + 5, end_shift=lambda x: x + 5, answer=lambda x: (x + 6) / 4) dh.export() dh = datahunt(out_folder=dh_path, source_task_id='dh7', article_num='520', article_text_length=2900) make_dh(dh, 'Covid2_Sources_2002_09_20', 100, 150, start_shift=lambda x: x + 5, end_shift=lambda x: x + 5, answer=lambda x: 6, tua='qs1') dh.export() dh = datahunt(out_folder=dh_path, source_task_id='dh8', article_num='520', article_text_length=2900) make_dh(dh, 'Covid2_Sources_2002_09_20', 500, 760, start_shift=lambda x: x + 5, end_shift=lambda x: x + 5, answer=lambda x: 6, tua='qs2') dh.export() dh = datahunt(out_folder=dh_path, source_task_id='dh9', article_num='520', article_text_length=2900) make_dh(dh, 'Covid2_Sources_2002_09_20', 100, 150, start_shift=lambda x: x + 5, end_shift=lambda x: x + 5, answer=lambda x: 6, tua='qs3') dh.export() dh = datahunt(out_folder=dh_path, source_task_id='dh10', article_num='520', article_text_length=2900) make_dh(dh, 'Covid2_Sources_2002_09_20', 500, 760, start_shift=lambda x: x + 5, end_shift=lambda x: x + 5, answer=lambda x: 6, tua='qs4') dh.export() dh = datahunt(out_folder=dh_path, source_task_id='dh11', article_num='520', article_text_length=2900) make_dh(dh, 'Covid2_Sources_2002_09_20', 100, 150, start_shift=lambda x: x + 5, end_shift=lambda x: x + 5, answer=lambda x: 6, tua='qs5') dh.export() dh = datahunt(out_folder=dh_path, source_task_id='dh12', article_num='520', article_text_length=2900) make_dh(dh, 'Covid2_Sources_2002_09_20', 500, 760, start_shift=lambda x: x + 5, end_shift=lambda x: x + 5, answer=lambda x: 6, tua='qs6') dh.export() dh = datahunt(out_folder=dh_path, source_task_id='dh13', article_num='520', article_text_length=2900) for i in range(9): for j in range(7): dh.add_row({ 'answer_label': 'T1.Q1.A' + str(j), 'namespace': 'Covid_Languagev1.1', 'contributor_uuid': 'User' + str(i), 'start_pos': 10 * j, 'end_pos': 12 * j }) for i in range(9): for j in range(7): dh.add_row({ 'answer_label': 'T1.Q1.A' + str(j), 'namespace': 'Covid_Languagev1.1', 'contributor_uuid': 'User' + str(i), 'start_pos': 19 * j, 'end_pos': 20 * j + 10 }) make_dh( dh, 'Covid_Languagev1.1', 1, 20, start_shift=lambda x: x + 5, end_shift=lambda x: x + 5, answer=lambda x: (x + 2) % 4, ) dh.export() dh.export() new_tua = tua(out_folder=tua_path, article_num='520', source_task_id='tua_task_id') new_tua.add_row({ 'topic_name': 'Assertions', 'start_pos': 10, 'end_pos': 30, 'tua_uuid': 'a1' }) new_tua.add_row({ 'topic_name': 'Assertions', 'start_pos': 40, 'end_pos': 80, 'tua_uuid': 'a2' }) new_tua.add_row({ 'topic_name': 'Assertions', 'start_pos': 40, 'end_pos': 80, 'tua_uuid': 'a3' }) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 20, 'end_pos': 80, 'tua_uuid': 'qs1' }) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 40, 'end_pos': 60, 'tua_uuid': 'qs2' }) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 20, 'end_pos': 80, 'tua_uuid': 'qs3' }) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 40, 'end_pos': 60, 'tua_uuid': 'qs4' }) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 20, 'end_pos': 80, 'tua_uuid': 'qs5' }) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 40, 'end_pos': 60, 'tua_uuid': 'qs6' }) new_tua.export() calculate_scores_master(dh_path, config['test_dir'] + config['texts_dir'], config['IAA_config_dir'], config['schema_dir'], iaa_path, scoring_path, push_aws=False, tua_dir=tua_path, viz_dir=viz_path, reporting=True) for root, dir, files in os.walk(viz_path): for file in files: viz_file = pd.read_csv(viz_path + '/' + file, encoding='utf-8') print(len(viz_file)) points = viz_file['Points'].dropna() assert sum(points) < -31 assert sum(points) > -31.1
def test_he_vague_sources_false(config): tua_path = test_utils.make_test_directory(config, 'he_tua_vague_sources_false') scoring_path = test_utils.make_test_directory( config, 'he_scoring_vague_sources_false') out_path = test_utils.make_test_directory(config, 'out_he_vague_sources_false') #2800 is considered standard article; threhold for scoring is 4 vague sources per 2800 characters pa = point_assignment(out_folder=scoring_path, article_num='520', source_task_id='practice_makes+[perfect', article_text_length=2900) pa.add_row({ 'namespace': 'Covid2_Reasoning_2020_09_20', 'Answer_Number': 3, 'points': 0, "Question_Number": 5, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(10, 30) }) pa.export() src_dep = dep_iaa(out_folder=scoring_path, source_task_id='qs1', article_num='520', article_text_length=2900) # scientific discovery src_dep.add_row({ 'namespace': 'Covid2_Sources_2002_09_20', 'agreed_Answer': 5, "question_Number": 2, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(10, 30), 'tua_uuid': 'tua3' }) src_dep.export() src_dep = dep_iaa(out_folder=scoring_path, source_task_id='qs2', article_num='520', article_text_length=2900) src_dep.add_row({ 'namespace': 'Covid2_Sources_2002_09_20', 'agreed_Answer': 6, "question_Number": 2, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(15, 38), 'tua_uuid': 'tua3' }) src_dep.add_row({ 'namespace': 'Covid2_Sources_2002_09_20', 'agreed_Answer': 8, "question_Number": 5, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(7, 27), 'tua_uuid': 'tua3' }) src_dep.export() src_dep = dep_iaa(out_folder=scoring_path, source_task_id='qs3', article_num='520', article_text_length=2900) src_dep.add_row({ 'namespace': 'Covid2_Sources_2002_09_20', 'agreed_Answer': 7, "question_Number": 5, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(15, 38), 'tua_uuid': 'tua3' }) src_dep.export() new_tua = tua(out_folder=tua_path, article_num='520', source_task_id='tua_task_id', article_text_length=2900) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 10, 'end_pos': 30, 'tua_uuid': 'tua1' }) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 50, 'end_pos': 120, 'tua_uuid': 'tua2' }) new_tua.add_row({ 'topic_name': 'Quoted Sources', 'start_pos': 900, 'end_pos': 1020, 'tua_uuid': 'tua3' }) new_tua.export() points = eval_triage_scoring(new_tua.df, pa.df, scoring_path) points.to_csv(out_path + '/AssessedPoints.csv', encoding='utf-8') assert points['points'].sum() == -4 assert len(points) == 3 hl = points[points['points'] == -2]['highlighted_indices'].iloc[0] assert all([str(i) in hl for i in range(900, 1020)])
raise NameError( 'Params', params, ' must include a value for namespace, Question_Number, and Answer_Number, and agreement_adjusted_points' ) new_row['schema'] = self.schema return new_row def set_out_name(self, filetype, source_task_id): return 'SortedPts.csv' if __name__ == '__main__': #this is broken cause it's not a path data with open('test_config.json') as json_file: config = json.load(json_file) dh_path = test_utils.make_test_directory(config, 'mn_dh_') dh = datahunt(out_folder=dh_path, source_task_id='dh13', article_num='520', article_text_length=2900) for i in range(9): for j in range(7): dh.add_row({ 'answer_label': 'T1.Q1.A' + str(j), 'namespace': 'Covid_Languagev1.1', 'contributor_uuid': 'User' + str(i), 'start_pos': 10 * j, 'end_pos': 12 * j }) for i in range(9): for j in range(7):
def test_point_assignment_source(config): tua_path = test_utils.make_test_directory(config, 'pa_source_tua') scoring_path = test_utils.make_test_directory(config, 'pa_source_dep') weight = weighted(out_folder=scoring_path, article_num='520', source_task_id='source_scaling') for i in range(1, 8): hl = ((i - 1) * 10, i * 10 - 1 ) #creates highlights (0, 9) for 1, (10, 19) for 2, etc. weight.add_row({ 'schema': 'Probability', 'namespace': 'Covid_Probability', 'Answer_Number': 1, 'agreement_adjusted_points': 5, "Question_Number": i, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(hl[0], hl[1]) }) weight_df = weight.df #Note that each TUA should have its own ID and unique set of highlights (TUA highlights should not overlap!) new_tua = tua(out_folder=tua_path, article_num='520', source_task_id='tua_task_id') for i in range(1, 8): hl = ((i - 1) * 10, i * 10 - 1) new_tua.add_row({ 'topic_name': 'source', 'start_pos': hl[0], 'end_pos': hl[1], 'tua_uuid': str(i) }) arg_dep = dep_iaa(out_folder=scoring_path, source_task_id='doesnt matter', article_num='520') for i in range(1, 8): arg_dep.add_row({ "namespace": "Covid_Sources_2002_03_20v2.1", "agreed_Answer": i, "question_Number": 8, "agreement_score": 1, "tua_uuid": str(i) }) new_tua.export() arg_dep.export() tuas, weights, tua_raw = pointSort( scoring_path, input_dir=None, weights=weight_df, scale_guide_dir=config['IAA_config_dir'] + '/point_assignment_scaling_guide.csv', tua_dir=tua_path, reporting=True) print("WEIGHTS:", weights) assert len(weights) == 7 #Ensure all point assignments are accurate with point_assignment_scaling_guide.csv assert weights['points'].iloc[0] == 5 * 2 assert weights['points'].iloc[1] == 5 * 1.5 assert weights['points'].iloc[2] == 5 * 1 assert weights['points'].iloc[3] == 5 * 0.5 assert weights['points'].iloc[4] == 5 * 0 assert weights['points'].iloc[5] == 5 * -0.5 assert weights['points'].iloc[6] == 5 * 0
def test_point_assignment_source_in_weight_highlight(config): tua_path = test_utils.make_test_directory(config, 'pa_source_wh_tua') scoring_path = test_utils.make_test_directory(config, 'pa_source_wh_dep') weight = weighted(out_folder=scoring_path, article_num='520', source_task_id='source_scaling') for i in range(1, 8): hl = ((i - 1) * 10, i * 10 - 1) # (0, 9), (10, 19), ... weight.add_row({ 'schema': 'Probability', 'namespace': 'Covid_Probability', 'Answer_Number': 1, 'agreement_adjusted_points': 9, "Question_Number": i, 'agreement_score': 1, 'highlighted_indices': test_utils.make_highlight_indices(hl[0], hl[1]) }) weight_df = weight.df new_tua = tua(out_folder=tua_path, article_num='520', source_task_id='tua_task_id') for i in range(1, 8): hl = ((i - 1) * 10 + 1, i * 10 - 2) # (1, 8), (11, 18), ... new_tua.add_row({ 'topic_name': 'source', 'start_pos': hl[0], 'end_pos': hl[1], 'tua_uuid': str(i) }) arg_dep = dep_iaa(out_folder=scoring_path, source_task_id='doesnt matter', article_num='520') for i in range(1, 8): arg_dep.add_row({ "namespace": "Covid_Sources_2002_03_20v2.1", "agreed_Answer": i, "question_Number": 8, "agreement_score": 1, "tua_uuid": str(i) }) new_tua.export() arg_dep.export() tuas, weights, tua_raw = pointSort( scoring_path, input_dir=None, weights=weight_df, scale_guide_dir=config['IAA_config_dir'] + '/point_assignment_scaling_guide.csv', tua_dir=tua_path, reporting=True) assert len(weights) == 7 assert weights['points'].iloc[0] == 9 * 2 assert weights['points'].iloc[1] == 9 * 1.5 assert weights['points'].iloc[2] == 9 * 1 assert weights['points'].iloc[3] == 9 * 0.5 assert weights['points'].iloc[4] == 9 * 0 assert weights['points'].iloc[5] == 9 * -0.5 assert weights['points'].iloc[6] == 9 * 0
def test_checklist_coding_multiple_hl(config): test_path = test_utils.make_test_directory( config, 'test_iaa_checklist_coding_multiple_hl') out_path = test_utils.make_test_directory( config, 'out_test_iaa_checklist_coding_multiple_hl') # source_task_id generated by smashing keyboard dh = datahunt(out_folder=test_path, source_task_id='highlights') dh.add_row({ 'answer_label': 'T1.Q2.A2', 'namespace': 'Covid2_Reasoning_2020_09_20', 'contributor_uuid': 'C1', 'start_pos': 0, 'end_pos': 20 }) dh.add_row({ 'answer_label': 'T1.Q2.A2', 'namespace': 'Covid2_Reasoning_2020_09_20', 'contributor_uuid': 'C1', 'start_pos': 30, 'end_pos': 80 }) dh.add_row({ 'answer_label': 'T1.Q2.A4', 'namespace': 'Covid2_Reasoning_2020_09_20', 'contributor_uuid': 'C1', 'start_pos': 30, 'end_pos': 80 }) dh.add_row({ 'answer_label': 'T1.Q2.A4', 'namespace': 'Covid2_Reasoning_2020_09_20', 'contributor_uuid': 'C2', 'start_pos': 30, 'end_pos': 80 }) dh.add_row({ 'answer_label': 'T1.Q2.A4', 'namespace': 'Covid2_Reasoning_2020_09_20', 'contributor_uuid': 'C3', 'start_pos': 30, 'end_pos': 80 }) dh.add_row({ 'answer_label': 'T1.Q2.A4', 'namespace': 'Covid2_Reasoning_2020_09_20', 'contributor_uuid': 'C5', 'start_pos': 30, 'end_pos': 80 }) fin_path = dh.export() data_path = config['data_dir'] schema_path = data_path + '/schemas' # out_path = test_utils.make_test_directory(config, 'out_iaa_hl_everythingpass') iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir, outDirectory=out_path) for root, dir, files in os.walk(out_path): for file in files: # should be only 1 file for this case, so just run it on the only one # if there's more than 1 then you can get fancy out_df = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8') out_df['agreed_Answer'] = out_df['agreed_Answer'].apply(str) answers = out_df['agreed_Answer'] print('answers', answers.isin([4])) assert not answers.isin(['2']).any() assert len(out_df) == 6 assert answers.isin(['4']).any() row_4 = out_df[out_df['agreed_Answer'] == '4'] print(row_4.columns) print("row 4\n", row_4) print(row_4['highlighted_indices']) r4_hl = row_4['highlighted_indices'].iloc[0] assert all([str(i) in r4_hl for i in range(30, 80)])