load_json() # get intervention delivery data InterventionDelivery = get_data(intervention_delivery_output) interventiondelivery_df = pd.DataFrame(InterventionDelivery) interventiondelivery_df = interventiondelivery_df.T interventiondelivery_df.columns = ["int_who_raw"] # get intervention delivery highlighted text InterventionDelivery_HT = highlighted_text(intervention_delivery_output) InterventionDelivery_HT_df = pd.DataFrame(InterventionDelivery_HT) InterventionDelivery_HT_df = InterventionDelivery_HT_df.T InterventionDelivery_HT_df.columns = ["int_who_ht"] # get intervention delivery user comments InterventionDelivery_Comments = comments(intervention_delivery_output) InterventionDelivery_Comments_df = pd.DataFrame(InterventionDelivery_Comments) InterventionDelivery_Comments_df = InterventionDelivery_Comments_df.T InterventionDelivery_Comments_df.columns = ["int_who_info"] # concatenate data frames intervention_delivery_df = pd.concat([ interventiondelivery_df, InterventionDelivery_HT_df, InterventionDelivery_Comments_df ], axis=1, sort=False) # Remove problematic text (potential escape sequences) from text input intervention_delivery_df.replace('\r',' ', regex=True, inplace=True) intervention_delivery_df.replace('\n',' ', regex=True, inplace=True) intervention_delivery_df.replace(':',' ', regex=True, inplace=True)
from Main import load_json, comments, highlighted_text from AttributeIDList import intervention_objectives_output import pandas as pd # load json file load_json() # Get Intervention Name highlighted text Intervention_ObjectivesHT = highlighted_text(intervention_objectives_output) Intervention_ObjectivesHT_df = pd.DataFrame(Intervention_ObjectivesHT) Intervention_ObjectivesHT_df = Intervention_ObjectivesHT_df.T Intervention_ObjectivesHT_df.columns = ["int_objec_ht"] # Get Intervention Description user comments Intervention_Objectives_Comments = comments(intervention_objectives_output) Intervention_Objectives_Comments_df = pd.DataFrame( Intervention_Objectives_Comments) Intervention_Objectives_Comments_df = Intervention_Objectives_Comments_df.T Intervention_Objectives_Comments_df.columns = ["int_objec_info"] # concatenate data frames intervention_objectives_df = pd.concat( [Intervention_ObjectivesHT_df, Intervention_Objectives_Comments_df], axis=1, sort=False) # remove problematic text intervention_objectives_df.replace('\r', ' ', regex=True, inplace=True) intervention_objectives_df.replace('\n', ' ', regex=True, inplace=True) intervention_objectives_df.replace(':', ' ', regex=True, inplace=True) intervention_objectives_df.replace(';', ' ', regex=True, inplace=True)
# get intervention organisation type main data InterventionOrgType = get_data(intervention_organisation_type_output) InterventionOrgType_df = pd.DataFrame(InterventionOrgType) InterventionOrgType_df = InterventionOrgType_df.T InterventionOrgType_df.columns = ["int_prov_raw"] # get intervention organisation type highlighted text InterventionOrgType_HT = highlighted_text( intervention_organisation_type_output) InterventionOrgType_HT_df = pd.DataFrame(InterventionOrgType_HT) InterventionOrgType_HT_df = InterventionOrgType_HT_df.T InterventionOrgType_HT_df.columns = ["int_prov_ht"] # get intervention organisation type user comments InterventionOrgType_Comments = comments(intervention_organisation_type_output) InterventionOrgType_Comments_df = pd.DataFrame(InterventionOrgType_Comments) InterventionOrgType_Comments_df = InterventionOrgType_Comments_df.T InterventionOrgType_Comments_df.columns = ["int_prov_info"] # concatenate data frames intervention_org_type = pd.concat([ InterventionOrgType_df, InterventionOrgType_HT_df, InterventionOrgType_Comments_df ], axis=1, sort=False) # replace problematic text intervention_org_type.replace('\r', ' ', regex=True, inplace=True) intervention_org_type.replace('\n', ' ', regex=True, inplace=True)
from Main import load_json, comments, highlighted_text from AttributeIDList import intervention_frequency_output import pandas as pd # load json file load_json() # get intervention frequency highlighted text InterventionFrequency_HT = highlighted_text(intervention_frequency_output) InterventionFrequency_HT_df = pd.DataFrame(InterventionFrequency_HT) InterventionFrequency_HT_df = InterventionFrequency_HT_df.T InterventionFrequency_HT_df.columns = ["int_freq_ht"] # get intervention frequency user comments InterventionFrequency_Comments = comments(intervention_frequency_output) InterventionFrequency_Comments_df = pd.DataFrame( InterventionFrequency_Comments) InterventionFrequency_Comments_df = InterventionFrequency_Comments_df.T InterventionFrequency_Comments_df.columns = ["int_freq_info"] # concatenate data frames intervention_frequency_df = pd.concat( [InterventionFrequency_HT_df, InterventionFrequency_Comments_df], axis=1, sort=False) # Remove problematic text (potential escape sequences) from text input intervention_frequency_df.replace('\r', ' ', regex=True, inplace=True) intervention_frequency_df.replace('\n', ' ', regex=True, inplace=True) intervention_frequency_df.replace(':', ' ', regex=True, inplace=True) intervention_frequency_df.replace(';', ' ', regex=True, inplace=True)
from Main import highlighted_text, comments from AttributeIDList import gender_split_output import pandas as pd # get gender split data gender_split = comments(gender_split_output) gender_split_df = pd.DataFrame(gender_split) gender_split_df = gender_split_df.T gender_split_df.columns = ["Gender_Split_comments"] # get gender split highlighted text gender_split_comments = highlighted_text(gender_split_output) gender_split_comments_df = pd.DataFrame(gender_split_comments) gender_split_comments_df = gender_split_comments_df.T gender_split_comments_df.columns = ["Gender_Split_HT"] # concatenate all dataframes gender_split_df = pd.concat([gender_split_df, gender_split_comments_df], axis=1, sort=False) # remove problematic text gender_split_df.replace('\r',' ', regex=True, inplace=True) gender_split_df.replace('\n',' ', regex=True, inplace=True) # fill blanks with NA gender_split_df.fillna("NA", inplace=True) # save to disk gender_split_df.to_csv("gender_split.csv", index=False)
# load json file load_json() ########################### # INTERVENTION GROUP NUMBER ########################### # Get Intervention Group Number highlighted text InterventionGroupNumber_HT = highlighted_text(intervention_group_number) InterventionGroupNumber_HT_df = pd.DataFrame(InterventionGroupNumber_HT) InterventionGroupNumber_HT_df = InterventionGroupNumber_HT_df.T InterventionGroupNumber_HT_df.columns = ["n_treat_ht"] # Get Intervention Group Number comments InterventionGroupNumber_comments = comments(intervention_group_number) InterventionGroupNumber_comments_df = pd.DataFrame( InterventionGroupNumber_comments) InterventionGroupNumber_comments_df = InterventionGroupNumber_comments_df.T InterventionGroupNumber_comments_df.columns = ["n_treat_info"] ################################## # INTERVENTION GROUP PRE-TEST MEAN ################################## # Get Intervention Group Pre-test Mean highlighted text InterventionGroupPretestMean_HT = highlighted_text( intervention_group_pretest_mean) InterventionGroupPretestMean_HT_df = pd.DataFrame( InterventionGroupPretestMean_HT) InterventionGroupPretestMean_HT_df = InterventionGroupPretestMean_HT_df.T
load_json() # get intervention teaching approach data InterventionTeachingApproach = get_data(intervention_teaching_approach) InterventionTeachingApproach_df = pd.DataFrame(InterventionTeachingApproach) InterventionTeachingApproach_df = InterventionTeachingApproach_df.T InterventionTeachingApproach_df.columns = ["int_approach_raw"] # get intervention teaching approach highlighted text InterventionTeachingApproach_HT = highlighted_text(intervention_teaching_approach) InterventionTeachingApproach_HT_df = pd.DataFrame(InterventionTeachingApproach_HT) InterventionTeachingApproach_HT_df = InterventionTeachingApproach_HT_df.T InterventionTeachingApproach_HT_df.columns = ["int_approach_ht"] # get intervention teaching approach user comments InterventionTeachingApproach_Comments = comments(intervention_teaching_approach) InterventionTeachingApproach_Comments_df = pd.DataFrame(InterventionTeachingApproach_Comments) InterventionTeachingApproach_Comments_df = InterventionTeachingApproach_Comments_df.T InterventionTeachingApproach_Comments_df.columns = ["int_approach_info"] # concatenate data frames intervention_teaching_approach_df = pd.concat([ InterventionTeachingApproach_df, InterventionTeachingApproach_HT_df, InterventionTeachingApproach_Comments_df ], axis=1, sort=False) # remove problematic text intervention_teaching_approach_df.replace('\r',' ', regex=True, inplace=True) intervention_teaching_approach_df.replace('\n',' ', regex=True, inplace=True) intervention_teaching_approach_df.replace(':',' ', regex=True, inplace=True)
load_json() # get randomisation data randomisation = get_data(randomisation_details) randomisation_df = pd.DataFrame(randomisation) randomisation_df = randomisation_df.T randomisation_df.columns = ["rand_raw"] # Get Randomisation highlighted text randomisation_HT = highlighted_text(randomisation_details) randomisation_details_df = pd.DataFrame(randomisation_HT) randomisation_details_df = randomisation_details_df.T randomisation_details_df.columns = ["rand_ht"] # Get Randomisation user comments randomisation_Comments = comments(randomisation_details) randomisation_Comments_df = pd.DataFrame(randomisation_Comments) randomisation_Comments_df = randomisation_Comments_df.T randomisation_Comments_df.columns = ["rand_info"] # concatenate data frames randomisation_df = pd.concat( [randomisation_df, randomisation_details_df, randomisation_Comments_df], axis=1, sort=False) # fill blanks with Na randomisation_df.fillna("NA", inplace=True) # remove square brackets randomisation_df['rand_raw'] = randomisation_df['rand_raw'].str[0]
# load json file load_json() ########################### # CONTROL GROUP NUMBER ########################### # Get Control Group Number highlighted text ControlGroupNumber_HT = highlighted_text(control_group_two_number) ControlGroupNumber_HT_df = pd.DataFrame(ControlGroupNumber_HT) ControlGroupNumber_HT_df = ControlGroupNumber_HT_df.T ControlGroupNumber_HT_df.columns = ["n_cont2_ht"] # Get Control Group Number comments ControlGroupNumber_comments = comments(control_group_two_number) ControlGroupNumber_comments_df = pd.DataFrame(ControlGroupNumber_comments) ControlGroupNumber_comments_df = ControlGroupNumber_comments_df.T ControlGroupNumber_comments_df.columns = ["n_cont2_info"] ################################# # Control GROUP PRE-TEST MEAN ################################# # Get Control Group Pre-test Mean highlighted text ControlGroupPretestMean_HT = highlighted_text(control_group_two_pretest_mean) ControlGroupPretestMean_HT_df = pd.DataFrame(ControlGroupPretestMean_HT) ControlGroupPretestMean_HT_df = ControlGroupPretestMean_HT_df.T ControlGroupPretestMean_HT_df.columns = ["pre_c2_mean_ht"] # Get Control Group Pre-test Mean comments
curriculumsubjects_df["test_subject_writing"] = curriculumsubjects_df["test_subject_raw"].map(set(['Writing']).issubset).astype(int) curriculumsubjects_df["test_subject_mathematics"] = curriculumsubjects_df["test_subject_raw"].map(set(['Mathematics']).issubset).astype(int) curriculumsubjects_df["test_subject_science"] = curriculumsubjects_df["test_subject_raw"].map(set(['Science']).issubset).astype(int) curriculumsubjects_df["test_subject_social_studies"] = curriculumsubjects_df["test_subject_raw"].map(set(['Social studies']).issubset).astype(int) curriculumsubjects_df["test_subject_arts"] = curriculumsubjects_df["test_subject_raw"].map(set(['Arts']).issubset).astype(int) curriculumsubjects_df["test_subject_languages"] = curriculumsubjects_df["test_subject_raw"].map(set(['Languages']).issubset).astype(int) curriculumsubjects_df["test_subject_other_curriculum_test"] = curriculumsubjects_df["test_subject_raw"].map(set(['Other curriculum test']).issubset).astype(int) ''' # Get Country highlighted text curriculumsubjects_HT = highlighted_text(curriculum_subjects) curriculumsubjects_HT_df = pd.DataFrame(curriculumsubjects_HT) curriculumsubjects_HT_df = curriculumsubjects_HT_df.T curriculumsubjects_HT_df.columns = ["test_subject_ht"] # Get Country user comments curriculumsubjects_Comments = comments(curriculum_subjects) curriculumsubjects_Comments_df = pd.DataFrame(curriculumsubjects_Comments) curriculumsubjects_Comments_df = curriculumsubjects_Comments_df.T curriculumsubjects_Comments_df.columns = ["test_subject_info"] ########################### # OTHER OUTCOMES REPORTED # ########################### # get other outcomes data other_outcomes = get_data(other_outcomes_output) other_outcomes_df = pd.DataFrame(other_outcomes) other_outcomes_df = other_outcomes_df.T other_outcomes_df.columns = ["out_other_raw"] # get other outcomes highlighted text
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import number_of_classes_intervention_output from AttributeIDList import number_of_classes_control_output from AttributeIDList import number_of_classes_total_output from AttributeIDList import number_of_classes_not_provided_output import pandas as pd # load json file load_json() ################################## # NUMBER OF CLASSES INTERVENTION # ################################## # get number of classes intervention comments data number_of_classes_intervention_Comments = comments( number_of_classes_intervention_output) number_of_classes_intervention_Comments_df = pd.DataFrame( number_of_classes_intervention_Comments) number_of_classes_intervention_Comments_df = number_of_classes_intervention_Comments_df.T number_of_classes_intervention_Comments_df.columns = ["class_treat_info"] # get number of classes intervention highlighted text data number_of_classes_intervention_HT = highlighted_text( number_of_classes_intervention_output) number_of_classes_intervention_HT_df = pd.DataFrame( number_of_classes_intervention_HT) number_of_classes_intervention_HT_df = number_of_classes_intervention_HT_df.T number_of_classes_intervention_HT_df.columns = ["class_treat_ht"] ############################# # NUMBER OF CLASSES CONTROL #
import pandas as pd # extract clustering data clustering = get_data(clustering_output) clustering_df = pd.DataFrame(clustering) clustering_df = clustering_df.T clustering_df.columns = ["clust_anal_raw"] # Get Baseline Differences highlighted text clustering_HT = highlighted_text(clustering_output) clustering_HT_df = pd.DataFrame(clustering_HT) clustering_HT_df = clustering_HT_df.T clustering_HT_df.columns = ["clust_anal_ht"] # Get Educational Setting user comments clustering_Comments = comments(clustering_output) clustering_Comments_df = pd.DataFrame(clustering_Comments) clustering_Comments_df = clustering_Comments_df.T clustering_Comments_df.columns = ["clust_anal_info"] # concatenate data frames clustering_df = pd.concat( [clustering_df, clustering_HT_df, clustering_Comments_df], axis=1, sort=False) # fill blanks with NA clustering_df.fillna("NA", inplace=True) # save to disk clustering_df.to_csv("clustering.csv", index=False)
load_json() ############################################# # Analyzed sample size for intervention group ############################################# # highlighted text sample_size_analyzed_intervention = highlighted_text( sample_size_analyzed_intervention_output) sample_size_analyzed_intervention_df = pd.DataFrame( sample_size_analyzed_intervention) sample_size_analyzed_intervention_df = sample_size_analyzed_intervention_df.T sample_size_analyzed_intervention_df.columns = ["n_treat_ht"] # comments sample_size_analyzed_intervention_Comments = comments( sample_size_analyzed_intervention_output) sample_size_analyzed_intervention_Comments_df = pd.DataFrame( sample_size_analyzed_intervention_Comments) sample_size_analyzed_intervention_Comments_df = sample_size_analyzed_intervention_Comments_df.T sample_size_analyzed_intervention_Comments_df.columns = ["n_treat_info"] ############################################ # Analyzed sample size for the control group ############################################ # highlighted text sample_size_analyzed_control = highlighted_text( sample_size_analyzed_control_output) sample_size_analyzed_control_df = pd.DataFrame(sample_size_analyzed_control) sample_size_analyzed_control_df = sample_size_analyzed_control_df.T sample_size_analyzed_control_df.columns = ["n_cont_ht"]
########################################### # Get Intervention Time main data InterventionTime = get_data(intervention_time_output) InterventionTime_df = pd.DataFrame(InterventionTime) InterventionTime_df = InterventionTime_df.T InterventionTime_df.columns = ["int_when_raw"] # Get Intervention Time highlighted text InterventionTime_HT = highlighted_text(intervention_time_output) InterventionTime_HT_df = pd.DataFrame(InterventionTime_HT) InterventionTime_HT_df = InterventionTime_HT_df.T InterventionTime_HT_df.columns = ["int_when_ht"] # Get Intervention Time user comments InterventionTime_Comments = comments(intervention_time_output) InterventionTime_Comments_df = pd.DataFrame(InterventionTime_Comments) InterventionTime_Comments_df = InterventionTime_Comments_df.T InterventionTime_Comments_df.columns = ["int_when_info"] # concatenate data frames intervention_time_df = pd.concat([ InterventionTime_df, InterventionTime_HT_df, InterventionTime_Comments_df ], axis=1, sort=False) # Remove problematic text (potential escape sequences) from text input intervention_time_df.replace('\r', ' ', regex=True, inplace=True) intervention_time_df.replace('\n', ' ', regex=True, inplace=True) intervention_time_df.replace(':', ' ', regex=True, inplace=True)
load_json() # Get Intervention Costs Reported main data InterventionCosts = get_data(intervention_costs_reported) InterventionCosts_df = pd.DataFrame(InterventionCosts) InterventionCosts_df = InterventionCosts_df.T InterventionCosts_df.columns = ["int_cost_raw"] # Get Intervention Costs Reported highlighted text InterventionCosts_HT = highlighted_text(intervention_costs_reported) InterventionCosts_HT_df = pd.DataFrame(InterventionCosts_HT) InterventionCosts_HT_df = InterventionCosts_HT_df.T InterventionCosts_HT_df.columns = ["int_cost_ht"] # Get Intervention Costs Reported user comments InterventionCosts_Comments = comments(intervention_costs_reported) InterventionCosts_Comments_df = pd.DataFrame(InterventionCosts_Comments) InterventionCosts_Comments_df = InterventionCosts_Comments_df.T InterventionCosts_Comments_df.columns = ["int_cost_info"] # concatenate data frames intervention_costs_df = pd.concat([ InterventionCosts_df, InterventionCosts_HT_df, InterventionCosts_Comments_df ], axis=1, sort=False) # Remove problematic text (potential escape sequences) from text input intervention_costs_df.replace('\r', ' ', regex=True, inplace=True) intervention_costs_df.replace('\n', ' ', regex=True, inplace=True)
load_json() # get treatment group data treatmentgroup = get_data(treatment_group) treatmentgroup_df = pd.DataFrame(treatmentgroup) treatmentgroup_df = treatmentgroup_df.T treatmentgroup_df.columns = ["treat_group_raw"] # get treatment group highlighted text treatmentgroup_HT = highlighted_text(treatment_group) treatmentgroup_HT_df = pd.DataFrame(treatmentgroup_HT) treatmentgroup_HT_df = treatmentgroup_HT_df.T treatmentgroup_HT_df.columns = ["treat_group_ht"] # get treatment group user comments treatmentgroup_Comments = comments(treatment_group) treatmentgroup_Comments_df = pd.DataFrame(treatmentgroup_Comments) treatmentgroup_Comments_df = treatmentgroup_Comments_df.T treatmentgroup_Comments_df.columns = ["treat_group_info"] # concatenate data frames treatment_group_df = pd.concat( [treatmentgroup_df, treatmentgroup_HT_df, treatmentgroup_Comments_df], axis=1, sort=False) # fill blanks with NA treatment_group_df.fillna("NA", inplace=True) # save to difk """ treatment_group_df.to_csv("treatmentgroup.csv", index=False) """
edusetting_df = edusetting_df.T edusetting_df.columns=["int_setting_raw"] # binarize educational setting data """ edusetting_df["int_setting_primary/elementary_school"] = edusetting_df["int_setting_raw"].map(set(['Primary/elementary school']).issubset).astype(int) edusetting_df["int_setting_middle_school"] = edusetting_df["int_setting_raw"].map(set(['Middle school']).issubset).astype(int) edusetting_df["int_setting_secondary/high_school"] = edusetting_df["int_setting_raw"].map(set(['Secondary/High school']).issubset).astype(int) """ # Get Educational Setting highlighted text edusetting_HT = highlighted_text(edu_setting_output) edusetting_HT_df = pd.DataFrame(edusetting_HT) edusetting_HT_df = edusetting_HT_df.T edusetting_HT_df.columns = ["int_setting_ht"] # Get Educational Setting user comments edusetting_Comments = comments(edu_setting_output) edusetting_Comments_df = pd.DataFrame(edusetting_Comments) edusetting_Comments_df = edusetting_Comments_df.T edusetting_Comments_df.columns = ["int_setting_info"] # concatenate data frames educational_setting_df = pd.concat([ edusetting_df, edusetting_HT_df, edusetting_Comments_df ], axis=1, sort=False) # replace blanks with NA educational_setting_df.fillna("NA", inplace=True) # save to disk
# Get Digital Technology (inclusion) main data DigitalTechnology = get_data(intervention_approach_digital_technology) DigitalTechnology_df = pd.DataFrame(DigitalTechnology) DigitalTechnology_df = DigitalTechnology_df.T DigitalTechnology_df.columns = ["digit_tech_raw"] # Get Digital Technology (inclusion) highlighted text DigitalTechnology_HT = highlighted_text( intervention_approach_digital_technology) DigitalTechnology_HT_df = pd.DataFrame(DigitalTechnology_HT) DigitalTechnology_HT_df = DigitalTechnology_HT_df.T DigitalTechnology_HT_df.columns = ["digit_tech_ht"] # Get Digital Technology (inclusion) user comments DigitalTechnology_Comments = comments(intervention_approach_digital_technology) DigitalTechnology_Comments_df = pd.DataFrame(DigitalTechnology_Comments) DigitalTechnology_Comments_df = DigitalTechnology_Comments_df.T DigitalTechnology_Comments_df.columns = ["digit_tech_info"] ########################################### # PARENTS OR COMMUNITY VOLUNTEERS INCLUSION ########################################### # Get Parents/Community volunteers (inclusion) main data Parents_or_Community_Volunteers = get_data( intervention_approach_parents_or_community_volunteers) Parents_or_Community_Volunteers_df = pd.DataFrame( Parents_or_Community_Volunteers) Parents_or_Community_Volunteers_df = Parents_or_Community_Volunteers_df.T Parents_or_Community_Volunteers_df.columns = ["parent_partic_raw"]
from Main import load_json, comments, highlighted_text from AttributeIDList import intervention_duration_output import pandas as pd # load json file load_json() # get intervention duration highlighted text InterventionDuration_HT = highlighted_text(intervention_duration_output) InterventionDuration_HT_df = pd.DataFrame(InterventionDuration_HT) InterventionDuration_HT_df = InterventionDuration_HT_df.T InterventionDuration_HT_df.columns = ["int_dur_ht"] # get intervention duration user comments InterventionDuration_Comments = comments(intervention_duration_output) InterventionDuration_Comments_df = pd.DataFrame(InterventionDuration_Comments) InterventionDuration_Comments_df = InterventionDuration_Comments_df.T InterventionDuration_Comments_df.columns = ["int_dur_info"] # concatenate data frames intervention_duration_df = pd.concat( [InterventionDuration_HT_df, InterventionDuration_Comments_df], axis=1, sort=False) # Remove problematic text (potential escape sequences) from text input intervention_duration_df.replace('\r', ' ', regex=True, inplace=True) intervention_duration_df.replace('\n', ' ', regex=True, inplace=True) intervention_duration_df.replace(':', ' ', regex=True, inplace=True) intervention_duration_df.replace(';', ' ', regex=True, inplace=True)
] ############################################## # get country data country = get_data(countries) country_df = pd.DataFrame(country) country_df = country_df.T country_df.columns = ["loc_country_raw"] # get country highlighted text country_HT = highlighted_text(countries) country_HT_df = pd.DataFrame(country_HT) country_HT_df = country_HT_df.T country_HT_df.columns = ["loc_country_ht"] # get country user comments country_Comments = comments(countries) country_Comments_df = pd.DataFrame(country_Comments) country_Comments_df = country_Comments_df.T country_Comments_df.columns = ["loc_country_info"] # concatenate data frames """ country_df = pd.concat( [country_df, country_HT_df, country_Comments_df], axis=1, sort=False) """ # fill blanks with NA country_df.fillna("NA", inplace=True) # save to disk country_df.to_csv("Country.csv", index=False)
more_location_info_HT_df.columns = ["More_Location_information_HT"] # Get More Location Information comments more_location_info_Comments = comments(more_location_info) more_location_info_Comments_df = pd.DataFrame(more_location_info_Comments) more_location_info_Comments_df = more_location_info_Comments_df.T more_location_info_Comments_df.columns = ["More_Location_Information_comments"] """ # Get Location Specific Information highlighted text location_specific_info_HT = highlighted_text(specific_to_location) location_specific_info_HT_df = pd.DataFrame(location_specific_info_HT) location_specific_info_HT_df = location_specific_info_HT_df.T location_specific_info_HT_df.columns = ["loc_spec_ht"] # Get Location Specific Information comments location_specific_info_Comments = comments(specific_to_location) location_specific_info_Comments_df = pd.DataFrame( location_specific_info_Comments) location_specific_info_Comments_df = location_specific_info_Comments_df.T location_specific_info_Comments_df.columns = ["loc_spec_info"] # Get Type of Location highlighted text type_of_location_info_HT = highlighted_text(type_of_location) type_of_location_info_HT_df = pd.DataFrame(type_of_location_info_HT) type_of_location_info_HT_df = type_of_location_info_HT_df.T type_of_location_info_HT_df.columns = ["loc_type_ht"] # Get Type of Location comments type_of_location_info_Comments = comments(type_of_location) type_of_location_info_Comments_df = pd.DataFrame( type_of_location_info_Comments)
import pandas as pd # extract baseline differences data baselinedifferences = get_data(baseline_differences_output) baselinedifferences_df = pd.DataFrame(baselinedifferences) baselinedifferences_df = baselinedifferences_df.T baselinedifferences_df.columns=["base_diff_raw"] # Get Baseline Differences highlighted text baselinedifferences_HT = highlighted_text(baseline_differences_output) baselinedifferences_HT_df = pd.DataFrame(baselinedifferences_HT) baselinedifferences_HT_df = baselinedifferences_HT_df.T baselinedifferences_HT_df.columns = ["base_diff_ht"] # Get Educational Setting user comments baselinedifferences_Comments = comments(baseline_differences_output) baselinedifferences_Comments_df = pd.DataFrame(baselinedifferences_Comments) baselinedifferences_Comments_df = baselinedifferences_Comments_df.T baselinedifferences_Comments_df.columns = ["base_diff_info"] # concatenate data frames baseline_differences_df = pd.concat([ baselinedifferences_df, baselinedifferences_HT_df, baselinedifferences_Comments_df ], axis=1, sort=False) # fill blanks with NA baseline_differences_df.fillna("NA", inplace=True) # save to disk
# load json file load_json() ############################################# # Initial sample size for intervention group ############################################# # get sample size intervention highlighted text sample_size_intervention_HT = highlighted_text(sample_size_intervention_output) sample_size_intervention_HT_df = pd.DataFrame(sample_size_intervention_HT) sample_size_intervention_HT_df = sample_size_intervention_HT_df.T sample_size_intervention_HT_df.columns = ["base_n_treat_ht"] # get sample size intervention sample_size_intervention_Comments = comments(sample_size_intervention_output) sample_size_intervention_Comments_df = pd.DataFrame( sample_size_intervention_Comments) sample_size_intervention_Comments_df = sample_size_intervention_Comments_df.T sample_size_intervention_Comments_df.columns = ["base_n_treat_info"] ############################################ # Initial sample size for the control group ############################################ # get sample size control highlighted text sample_size_control_HT = highlighted_text(sample_size_control_output) sample_size_control_HT_df = pd.DataFrame(sample_size_control_HT) sample_size_control_HT_df = sample_size_control_HT_df.T sample_size_control_HT_df.columns = ["base_n_cont_ht"]
# load json file load_json() ########################### # CONTROL GROUP NUMBER ########################### # Get Control Group Number highlighted text ControlGroupNumber_HT = highlighted_text(control_group_number) ControlGroupNumber_HT_df = pd.DataFrame(ControlGroupNumber_HT) ControlGroupNumber_HT_df = ControlGroupNumber_HT_df.T ControlGroupNumber_HT_df.columns = ["n_cont_ht"] # Get Control Group Number comments ControlGroupNumber_comments = comments(control_group_number) ControlGroupNumber_comments_df = pd.DataFrame(ControlGroupNumber_comments) ControlGroupNumber_comments_df = ControlGroupNumber_comments_df.T ControlGroupNumber_comments_df.columns = ["n_cont_info"] ################################# # Control GROUP PRE-TEST MEAN ################################# # Get Control Group Pre-test Mean highlighted text ControlGroupPretestMean_HT = highlighted_text(control_group_pretest_mean) ControlGroupPretestMean_HT_df = pd.DataFrame(ControlGroupPretestMean_HT) ControlGroupPretestMean_HT_df = ControlGroupPretestMean_HT_df.T ControlGroupPretestMean_HT_df.columns = ["pre_c_mean_ht"] # Get Control Group Pre-test Mean comments
from Main import load_json, comments, highlighted_text from AttributeIDList import intervention_description_output import pandas as pd # load json file load_json() # get intervention description highlighted text Intervention_DescriptionHT = highlighted_text(intervention_description_output) Intervention_DescriptionHT_df = pd.DataFrame(Intervention_DescriptionHT) Intervention_DescriptionHT_df = Intervention_DescriptionHT_df.T Intervention_DescriptionHT_df.columns = ["int_desc_ht"] # get intervention description user comments Intervention_Description_Comments = comments(intervention_description_output) Intervention_Description_Comments_df = pd.DataFrame( Intervention_Description_Comments) Intervention_Description_Comments_df = Intervention_Description_Comments_df.T Intervention_Description_Comments_df.columns = ["int_desc_info"] # concatenate dataframes intervention_description_df = pd.concat( [Intervention_DescriptionHT_df, Intervention_Description_Comments_df], axis=1, sort=False) # remove problematic text intervention_description_df.replace('\r', ' ', regex=True, inplace=True) intervention_description_df.replace('\n', ' ', regex=True, inplace=True) intervention_description_df.replace(':', ' ', regex=True, inplace=True) intervention_description_df.replace(';', ' ', regex=True, inplace=True)
################# # get other outcomes data other_outcomes = get_data(other_outcomes_output) other_outcomes_df = pd.DataFrame(other_outcomes) other_outcomes_df = other_outcomes_df.T other_outcomes_df.columns = ["out_other_raw"] # get other outcomes highlighted text other_outcomes_HT = highlighted_text(other_outcomes_output) other_outcomes_HT_df = pd.DataFrame(other_outcomes_HT) other_outcomes_HT_df = other_outcomes_HT_df.T other_outcomes_HT_df.columns = ["out_other_ht"] # get other outcomes comments other_outcomes_info = comments(other_outcomes_output) other_outcomes_info_df = pd.DataFrame(other_outcomes_info) other_outcomes_info_df = other_outcomes_info_df.T other_outcomes_info_df.columns = ["out_other_info"] ###################### # Additional outcomes ###################### # get additional outcomes data additional_outcomes = get_data(additional_outcomes_output) additional_outcomes_df = pd.DataFrame(additional_outcomes) additional_outcomes_df = additional_outcomes_df.T additional_outcomes_df.columns = ["out_info_raw"] # get additional outcomes highlighted text
from Main import load_json, comments, highlighted_text from AttributeIDList import sample_size_output import pandas as pd # load json file load_json() # get sample size comments sample_size_Comments = comments(sample_size_output) sample_size_Comments_df = pd.DataFrame(sample_size_Comments) sample_size_Comments_df = sample_size_Comments_df.T sample_size_Comments_df.columns = ["sample_analysed_info"] # get sample size highlighted text sample_size_HT = highlighted_text(sample_size_output) sample_size_HT_df = pd.DataFrame(sample_size_HT) sample_size_HT_df = sample_size_HT_df.T sample_size_HT_df.columns = ["sample_analysed_ht"] # concatenate dataframes sample_size_df = pd.concat([sample_size_Comments_df, sample_size_HT_df], axis=1, sort=False) # remove problematic text sample_size_df.replace('\r', ' ', regex=True, inplace=True) sample_size_df.replace('\n', ' ', regex=True, inplace=True) # fill blanks with NA sample_size_df.fillna("NA", inplace=True)
from Main import load_json, comments, highlighted_text from AttributeIDList import intervention_name_output import pandas as pd # load json file load_json() # get intervention name highlighted text Intervention_NameHT = highlighted_text(intervention_name_output) Intervention_NameHT_df = pd.DataFrame(Intervention_NameHT) Intervention_NameHT_df = Intervention_NameHT_df.T Intervention_NameHT_df.columns=["int_name_ht"] # get intervention name user comments Intervention_Name_Comments = comments(intervention_name_output) Intervention_Name_Comments_df = pd.DataFrame(Intervention_Name_Comments) Intervention_Name_Comments_df = Intervention_Name_Comments_df.T Intervention_Name_Comments_df.columns=["int_name_info"] # concatenate dataframes intervention_name_df = pd.concat([ Intervention_NameHT_df, Intervention_Name_Comments_df ], axis=1, sort=False) # replace problematic text intervention_name_df.replace('\r',' ', regex=True, inplace=True) intervention_name_df.replace('\n',' ', regex=True, inplace=True) intervention_name_df.replace(':',' ', regex=True, inplace=True) intervention_name_df.replace(';',' ', regex=True, inplace=True)
from Main import load_json, get_outcome_lvl2, comments from AttributeIDList import toolkit_strand_codes import pandas as pd # load json file load_json() # get toolkit strand data toolkitstrand = get_outcome_lvl2(toolkit_strand_codes) toolkitstrand_df = pd.DataFrame(toolkitstrand) # get toolkit strand comments toolkitstrand_Comments = comments(toolkit_strand_codes) toolkitstrand_Comments_df = pd.DataFrame(toolkitstrand_Comments) toolkitstrand_Comments_df = toolkitstrand_Comments_df.T toolkitstrand_Comments_df.columns = ["_info"] # fill blanks with NA toolkitstrand_df.fillna("NA", inplace=True) # name each column (number depends on outcome number) toolkitstrand_df.columns = [ "out_strand_" + '{}'.format(column + 1) for column in toolkitstrand_df.columns ] # save to disk """ toolkitstrand_df.to_csv("toolkitstrand.csv", index=False) """
from Main import load_json, get_data, comments, highlighted_text from AttributeIDList import number_of_schools_intervention_output from AttributeIDList import number_of_schools_control_output from AttributeIDList import number_of_schools_total_output from AttributeIDList import number_of_schools_not_provided_output import pandas as pd # load json file load_json() ################################## # NUMBER OF SCHOOLS INTERVENTION # ################################## # get number of school intervention comments data number_of_schools_intervention_Comments = comments( number_of_schools_intervention_output) number_of_schools_intervention_Comments_df = pd.DataFrame( number_of_schools_intervention_Comments) number_of_schools_intervention_Comments_df = number_of_schools_intervention_Comments_df.T number_of_schools_intervention_Comments_df.columns = ["school_treat_info"] # get number of school intervention highlighted text data number_of_schools_intervention_HT = highlighted_text( number_of_schools_intervention_output) number_of_schools_intervention_HT_df = pd.DataFrame( number_of_schools_intervention_HT) number_of_schools_intervention_HT_df = number_of_schools_intervention_HT_df.T number_of_schools_intervention_HT_df.columns = ["school_treat_ht"] ############################# # NUMBER OF SCHOOLS CONTROL #