예제 #1
0
load_json()

# get intervention delivery data
InterventionDelivery = get_data(intervention_delivery_output)
interventiondelivery_df = pd.DataFrame(InterventionDelivery)
interventiondelivery_df = interventiondelivery_df.T
interventiondelivery_df.columns = ["int_who_raw"]

# get intervention delivery highlighted text
InterventionDelivery_HT = highlighted_text(intervention_delivery_output)
InterventionDelivery_HT_df = pd.DataFrame(InterventionDelivery_HT)
InterventionDelivery_HT_df = InterventionDelivery_HT_df.T
InterventionDelivery_HT_df.columns = ["int_who_ht"]

# get intervention delivery user comments
InterventionDelivery_Comments = comments(intervention_delivery_output)
InterventionDelivery_Comments_df = pd.DataFrame(InterventionDelivery_Comments)
InterventionDelivery_Comments_df = InterventionDelivery_Comments_df.T
InterventionDelivery_Comments_df.columns = ["int_who_info"]

# concatenate data frames
intervention_delivery_df = pd.concat([
    interventiondelivery_df, 
    InterventionDelivery_HT_df, 
    InterventionDelivery_Comments_df
], axis=1, sort=False)

# Remove problematic text (potential escape sequences) from text input
intervention_delivery_df.replace('\r',' ', regex=True, inplace=True)
intervention_delivery_df.replace('\n',' ', regex=True, inplace=True)
intervention_delivery_df.replace(':',' ',  regex=True, inplace=True)
from Main import load_json, comments, highlighted_text
from AttributeIDList import intervention_objectives_output
import pandas as pd

# load json file
load_json()

# Get Intervention Name highlighted text
Intervention_ObjectivesHT = highlighted_text(intervention_objectives_output)
Intervention_ObjectivesHT_df = pd.DataFrame(Intervention_ObjectivesHT)
Intervention_ObjectivesHT_df = Intervention_ObjectivesHT_df.T
Intervention_ObjectivesHT_df.columns = ["int_objec_ht"]

# Get Intervention Description user comments
Intervention_Objectives_Comments = comments(intervention_objectives_output)
Intervention_Objectives_Comments_df = pd.DataFrame(
    Intervention_Objectives_Comments)
Intervention_Objectives_Comments_df = Intervention_Objectives_Comments_df.T
Intervention_Objectives_Comments_df.columns = ["int_objec_info"]

# concatenate data frames
intervention_objectives_df = pd.concat(
    [Intervention_ObjectivesHT_df, Intervention_Objectives_Comments_df],
    axis=1,
    sort=False)

# remove problematic text
intervention_objectives_df.replace('\r', ' ', regex=True, inplace=True)
intervention_objectives_df.replace('\n', ' ', regex=True, inplace=True)
intervention_objectives_df.replace(':', ' ', regex=True, inplace=True)
intervention_objectives_df.replace(';', ' ', regex=True, inplace=True)
예제 #3
0
# get intervention organisation type main data
InterventionOrgType = get_data(intervention_organisation_type_output)
InterventionOrgType_df = pd.DataFrame(InterventionOrgType)
InterventionOrgType_df = InterventionOrgType_df.T
InterventionOrgType_df.columns = ["int_prov_raw"]

# get intervention organisation type highlighted text
InterventionOrgType_HT = highlighted_text(
    intervention_organisation_type_output)
InterventionOrgType_HT_df = pd.DataFrame(InterventionOrgType_HT)
InterventionOrgType_HT_df = InterventionOrgType_HT_df.T
InterventionOrgType_HT_df.columns = ["int_prov_ht"]

# get intervention organisation type user comments
InterventionOrgType_Comments = comments(intervention_organisation_type_output)
InterventionOrgType_Comments_df = pd.DataFrame(InterventionOrgType_Comments)
InterventionOrgType_Comments_df = InterventionOrgType_Comments_df.T
InterventionOrgType_Comments_df.columns = ["int_prov_info"]

# concatenate data frames
intervention_org_type = pd.concat([
    InterventionOrgType_df, InterventionOrgType_HT_df,
    InterventionOrgType_Comments_df
],
                                  axis=1,
                                  sort=False)

# replace problematic text
intervention_org_type.replace('\r', ' ', regex=True, inplace=True)
intervention_org_type.replace('\n', ' ', regex=True, inplace=True)
예제 #4
0
from Main import load_json, comments, highlighted_text
from AttributeIDList import intervention_frequency_output
import pandas as pd

# load json file
load_json()

# get intervention frequency highlighted text
InterventionFrequency_HT = highlighted_text(intervention_frequency_output)
InterventionFrequency_HT_df = pd.DataFrame(InterventionFrequency_HT)
InterventionFrequency_HT_df = InterventionFrequency_HT_df.T
InterventionFrequency_HT_df.columns = ["int_freq_ht"]

# get intervention frequency user comments
InterventionFrequency_Comments = comments(intervention_frequency_output)
InterventionFrequency_Comments_df = pd.DataFrame(
    InterventionFrequency_Comments)
InterventionFrequency_Comments_df = InterventionFrequency_Comments_df.T
InterventionFrequency_Comments_df.columns = ["int_freq_info"]

# concatenate data frames
intervention_frequency_df = pd.concat(
    [InterventionFrequency_HT_df, InterventionFrequency_Comments_df],
    axis=1,
    sort=False)

# Remove problematic text (potential escape sequences) from text input
intervention_frequency_df.replace('\r', ' ', regex=True, inplace=True)
intervention_frequency_df.replace('\n', ' ', regex=True, inplace=True)
intervention_frequency_df.replace(':', ' ', regex=True, inplace=True)
intervention_frequency_df.replace(';', ' ', regex=True, inplace=True)
from Main import highlighted_text, comments
from AttributeIDList import gender_split_output
import pandas as pd
 
# get gender split data
gender_split = comments(gender_split_output)
gender_split_df = pd.DataFrame(gender_split)
gender_split_df = gender_split_df.T
gender_split_df.columns = ["Gender_Split_comments"]

# get gender split highlighted text
gender_split_comments = highlighted_text(gender_split_output)
gender_split_comments_df = pd.DataFrame(gender_split_comments)
gender_split_comments_df = gender_split_comments_df.T
gender_split_comments_df.columns = ["Gender_Split_HT"]

# concatenate all dataframes
gender_split_df = pd.concat([gender_split_df, gender_split_comments_df], axis=1, sort=False)

# remove problematic text
gender_split_df.replace('\r',' ', regex=True, inplace=True)
gender_split_df.replace('\n',' ', regex=True, inplace=True)

# fill blanks with NA
gender_split_df.fillna("NA", inplace=True)

# save to disk
gender_split_df.to_csv("gender_split.csv", index=False)
# load json file
load_json()

###########################
# INTERVENTION GROUP NUMBER
###########################

# Get Intervention Group Number highlighted text
InterventionGroupNumber_HT = highlighted_text(intervention_group_number)
InterventionGroupNumber_HT_df = pd.DataFrame(InterventionGroupNumber_HT)
InterventionGroupNumber_HT_df = InterventionGroupNumber_HT_df.T
InterventionGroupNumber_HT_df.columns = ["n_treat_ht"]

# Get Intervention Group Number comments
InterventionGroupNumber_comments = comments(intervention_group_number)
InterventionGroupNumber_comments_df = pd.DataFrame(
    InterventionGroupNumber_comments)
InterventionGroupNumber_comments_df = InterventionGroupNumber_comments_df.T
InterventionGroupNumber_comments_df.columns = ["n_treat_info"]

##################################
# INTERVENTION GROUP PRE-TEST MEAN
##################################

# Get Intervention Group Pre-test Mean highlighted text
InterventionGroupPretestMean_HT = highlighted_text(
    intervention_group_pretest_mean)
InterventionGroupPretestMean_HT_df = pd.DataFrame(
    InterventionGroupPretestMean_HT)
InterventionGroupPretestMean_HT_df = InterventionGroupPretestMean_HT_df.T
예제 #7
0
load_json()

# get intervention teaching approach data
InterventionTeachingApproach = get_data(intervention_teaching_approach)
InterventionTeachingApproach_df = pd.DataFrame(InterventionTeachingApproach)
InterventionTeachingApproach_df = InterventionTeachingApproach_df.T
InterventionTeachingApproach_df.columns = ["int_approach_raw"]

# get intervention teaching approach highlighted text
InterventionTeachingApproach_HT = highlighted_text(intervention_teaching_approach)
InterventionTeachingApproach_HT_df = pd.DataFrame(InterventionTeachingApproach_HT)
InterventionTeachingApproach_HT_df = InterventionTeachingApproach_HT_df.T
InterventionTeachingApproach_HT_df.columns = ["int_approach_ht"]

# get intervention teaching approach user comments
InterventionTeachingApproach_Comments = comments(intervention_teaching_approach)
InterventionTeachingApproach_Comments_df = pd.DataFrame(InterventionTeachingApproach_Comments)
InterventionTeachingApproach_Comments_df = InterventionTeachingApproach_Comments_df.T
InterventionTeachingApproach_Comments_df.columns = ["int_approach_info"]

# concatenate data frames
intervention_teaching_approach_df = pd.concat([
    InterventionTeachingApproach_df, 
    InterventionTeachingApproach_HT_df, 
    InterventionTeachingApproach_Comments_df
], axis=1, sort=False)

# remove problematic text
intervention_teaching_approach_df.replace('\r',' ', regex=True, inplace=True)
intervention_teaching_approach_df.replace('\n',' ', regex=True, inplace=True)
intervention_teaching_approach_df.replace(':',' ',  regex=True, inplace=True)
load_json()

# get randomisation data
randomisation = get_data(randomisation_details)
randomisation_df = pd.DataFrame(randomisation)
randomisation_df = randomisation_df.T
randomisation_df.columns = ["rand_raw"]

# Get Randomisation highlighted text
randomisation_HT = highlighted_text(randomisation_details)
randomisation_details_df = pd.DataFrame(randomisation_HT)
randomisation_details_df = randomisation_details_df.T
randomisation_details_df.columns = ["rand_ht"]

# Get Randomisation user comments
randomisation_Comments = comments(randomisation_details)
randomisation_Comments_df = pd.DataFrame(randomisation_Comments)
randomisation_Comments_df = randomisation_Comments_df.T
randomisation_Comments_df.columns = ["rand_info"]

# concatenate data frames
randomisation_df = pd.concat(
    [randomisation_df, randomisation_details_df, randomisation_Comments_df],
    axis=1,
    sort=False)

# fill blanks with Na
randomisation_df.fillna("NA", inplace=True)

# remove square brackets
randomisation_df['rand_raw'] = randomisation_df['rand_raw'].str[0]
# load json file
load_json()

###########################
# CONTROL GROUP NUMBER
###########################

# Get Control Group Number highlighted text
ControlGroupNumber_HT = highlighted_text(control_group_two_number)
ControlGroupNumber_HT_df = pd.DataFrame(ControlGroupNumber_HT)
ControlGroupNumber_HT_df = ControlGroupNumber_HT_df.T
ControlGroupNumber_HT_df.columns = ["n_cont2_ht"]

# Get Control Group Number comments
ControlGroupNumber_comments = comments(control_group_two_number)
ControlGroupNumber_comments_df = pd.DataFrame(ControlGroupNumber_comments)
ControlGroupNumber_comments_df = ControlGroupNumber_comments_df.T
ControlGroupNumber_comments_df.columns = ["n_cont2_info"]

#################################
# Control GROUP PRE-TEST MEAN
#################################

# Get Control Group Pre-test Mean highlighted text
ControlGroupPretestMean_HT = highlighted_text(control_group_two_pretest_mean)
ControlGroupPretestMean_HT_df = pd.DataFrame(ControlGroupPretestMean_HT)
ControlGroupPretestMean_HT_df = ControlGroupPretestMean_HT_df.T
ControlGroupPretestMean_HT_df.columns = ["pre_c2_mean_ht"]

# Get Control Group Pre-test Mean comments
예제 #10
0
curriculumsubjects_df["test_subject_writing"] = curriculumsubjects_df["test_subject_raw"].map(set(['Writing']).issubset).astype(int)
curriculumsubjects_df["test_subject_mathematics"] = curriculumsubjects_df["test_subject_raw"].map(set(['Mathematics']).issubset).astype(int)
curriculumsubjects_df["test_subject_science"] = curriculumsubjects_df["test_subject_raw"].map(set(['Science']).issubset).astype(int)
curriculumsubjects_df["test_subject_social_studies"] = curriculumsubjects_df["test_subject_raw"].map(set(['Social studies']).issubset).astype(int)
curriculumsubjects_df["test_subject_arts"] = curriculumsubjects_df["test_subject_raw"].map(set(['Arts']).issubset).astype(int)
curriculumsubjects_df["test_subject_languages"] = curriculumsubjects_df["test_subject_raw"].map(set(['Languages']).issubset).astype(int)
curriculumsubjects_df["test_subject_other_curriculum_test"] = curriculumsubjects_df["test_subject_raw"].map(set(['Other curriculum test']).issubset).astype(int) '''

# Get Country highlighted text
curriculumsubjects_HT = highlighted_text(curriculum_subjects)
curriculumsubjects_HT_df = pd.DataFrame(curriculumsubjects_HT)
curriculumsubjects_HT_df = curriculumsubjects_HT_df.T
curriculumsubjects_HT_df.columns = ["test_subject_ht"]

# Get Country user comments
curriculumsubjects_Comments = comments(curriculum_subjects)
curriculumsubjects_Comments_df = pd.DataFrame(curriculumsubjects_Comments)
curriculumsubjects_Comments_df = curriculumsubjects_Comments_df.T
curriculumsubjects_Comments_df.columns = ["test_subject_info"]

###########################
# OTHER OUTCOMES REPORTED #
###########################

# get other outcomes data
other_outcomes = get_data(other_outcomes_output)
other_outcomes_df = pd.DataFrame(other_outcomes)
other_outcomes_df = other_outcomes_df.T
other_outcomes_df.columns = ["out_other_raw"]

# get other outcomes highlighted text
from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import number_of_classes_intervention_output
from AttributeIDList import number_of_classes_control_output
from AttributeIDList import number_of_classes_total_output
from AttributeIDList import number_of_classes_not_provided_output
import pandas as pd

# load json file
load_json()

##################################
# NUMBER OF CLASSES INTERVENTION #
##################################

# get number of classes intervention comments data
number_of_classes_intervention_Comments = comments(
    number_of_classes_intervention_output)
number_of_classes_intervention_Comments_df = pd.DataFrame(
    number_of_classes_intervention_Comments)
number_of_classes_intervention_Comments_df = number_of_classes_intervention_Comments_df.T
number_of_classes_intervention_Comments_df.columns = ["class_treat_info"]

# get number of classes intervention highlighted text data
number_of_classes_intervention_HT = highlighted_text(
    number_of_classes_intervention_output)
number_of_classes_intervention_HT_df = pd.DataFrame(
    number_of_classes_intervention_HT)
number_of_classes_intervention_HT_df = number_of_classes_intervention_HT_df.T
number_of_classes_intervention_HT_df.columns = ["class_treat_ht"]

#############################
# NUMBER OF CLASSES CONTROL #
import pandas as pd

# extract clustering data
clustering = get_data(clustering_output)
clustering_df = pd.DataFrame(clustering)
clustering_df = clustering_df.T
clustering_df.columns = ["clust_anal_raw"]

# Get Baseline Differences highlighted text
clustering_HT = highlighted_text(clustering_output)
clustering_HT_df = pd.DataFrame(clustering_HT)
clustering_HT_df = clustering_HT_df.T
clustering_HT_df.columns = ["clust_anal_ht"]

# Get Educational Setting user comments
clustering_Comments = comments(clustering_output)
clustering_Comments_df = pd.DataFrame(clustering_Comments)
clustering_Comments_df = clustering_Comments_df.T
clustering_Comments_df.columns = ["clust_anal_info"]

# concatenate data frames
clustering_df = pd.concat(
    [clustering_df, clustering_HT_df, clustering_Comments_df],
    axis=1,
    sort=False)

# fill blanks with NA
clustering_df.fillna("NA", inplace=True)

# save to disk
clustering_df.to_csv("clustering.csv", index=False)
load_json()

#############################################
# Analyzed sample size for intervention group
#############################################

# highlighted text
sample_size_analyzed_intervention = highlighted_text(
    sample_size_analyzed_intervention_output)
sample_size_analyzed_intervention_df = pd.DataFrame(
    sample_size_analyzed_intervention)
sample_size_analyzed_intervention_df = sample_size_analyzed_intervention_df.T
sample_size_analyzed_intervention_df.columns = ["n_treat_ht"]

# comments
sample_size_analyzed_intervention_Comments = comments(
    sample_size_analyzed_intervention_output)
sample_size_analyzed_intervention_Comments_df = pd.DataFrame(
    sample_size_analyzed_intervention_Comments)
sample_size_analyzed_intervention_Comments_df = sample_size_analyzed_intervention_Comments_df.T
sample_size_analyzed_intervention_Comments_df.columns = ["n_treat_info"]

############################################
# Analyzed sample size for the control group
############################################

# highlighted text
sample_size_analyzed_control = highlighted_text(
    sample_size_analyzed_control_output)
sample_size_analyzed_control_df = pd.DataFrame(sample_size_analyzed_control)
sample_size_analyzed_control_df = sample_size_analyzed_control_df.T
sample_size_analyzed_control_df.columns = ["n_cont_ht"]
###########################################

# Get Intervention Time main data
InterventionTime = get_data(intervention_time_output)
InterventionTime_df = pd.DataFrame(InterventionTime)
InterventionTime_df = InterventionTime_df.T
InterventionTime_df.columns = ["int_when_raw"]

# Get Intervention Time highlighted text
InterventionTime_HT = highlighted_text(intervention_time_output)
InterventionTime_HT_df = pd.DataFrame(InterventionTime_HT)
InterventionTime_HT_df = InterventionTime_HT_df.T
InterventionTime_HT_df.columns = ["int_when_ht"]

# Get Intervention Time user comments
InterventionTime_Comments = comments(intervention_time_output)
InterventionTime_Comments_df = pd.DataFrame(InterventionTime_Comments)
InterventionTime_Comments_df = InterventionTime_Comments_df.T
InterventionTime_Comments_df.columns = ["int_when_info"]

# concatenate data frames
intervention_time_df = pd.concat([
    InterventionTime_df, InterventionTime_HT_df, InterventionTime_Comments_df
],
                                 axis=1,
                                 sort=False)

# Remove problematic text (potential escape sequences) from text input
intervention_time_df.replace('\r', ' ', regex=True, inplace=True)
intervention_time_df.replace('\n', ' ', regex=True, inplace=True)
intervention_time_df.replace(':', ' ', regex=True, inplace=True)
예제 #15
0
load_json()

# Get Intervention Costs Reported main data
InterventionCosts = get_data(intervention_costs_reported)
InterventionCosts_df = pd.DataFrame(InterventionCosts)
InterventionCosts_df = InterventionCosts_df.T
InterventionCosts_df.columns = ["int_cost_raw"]

# Get Intervention Costs Reported highlighted text
InterventionCosts_HT = highlighted_text(intervention_costs_reported)
InterventionCosts_HT_df = pd.DataFrame(InterventionCosts_HT)
InterventionCosts_HT_df = InterventionCosts_HT_df.T
InterventionCosts_HT_df.columns = ["int_cost_ht"]

# Get Intervention Costs Reported user comments
InterventionCosts_Comments = comments(intervention_costs_reported)
InterventionCosts_Comments_df = pd.DataFrame(InterventionCosts_Comments)
InterventionCosts_Comments_df = InterventionCosts_Comments_df.T
InterventionCosts_Comments_df.columns = ["int_cost_info"]

# concatenate data frames
intervention_costs_df = pd.concat([
    InterventionCosts_df, InterventionCosts_HT_df,
    InterventionCosts_Comments_df
],
                                  axis=1,
                                  sort=False)

# Remove problematic text (potential escape sequences) from text input
intervention_costs_df.replace('\r', ' ', regex=True, inplace=True)
intervention_costs_df.replace('\n', ' ', regex=True, inplace=True)
예제 #16
0
load_json()

# get treatment group data
treatmentgroup = get_data(treatment_group)
treatmentgroup_df = pd.DataFrame(treatmentgroup)
treatmentgroup_df = treatmentgroup_df.T
treatmentgroup_df.columns = ["treat_group_raw"]

# get treatment group highlighted text
treatmentgroup_HT = highlighted_text(treatment_group)
treatmentgroup_HT_df = pd.DataFrame(treatmentgroup_HT)
treatmentgroup_HT_df = treatmentgroup_HT_df.T
treatmentgroup_HT_df.columns = ["treat_group_ht"]

# get treatment group user comments
treatmentgroup_Comments = comments(treatment_group)
treatmentgroup_Comments_df = pd.DataFrame(treatmentgroup_Comments)
treatmentgroup_Comments_df = treatmentgroup_Comments_df.T
treatmentgroup_Comments_df.columns = ["treat_group_info"]

# concatenate data frames
treatment_group_df = pd.concat(
    [treatmentgroup_df, treatmentgroup_HT_df, treatmentgroup_Comments_df],
    axis=1,
    sort=False)

# fill blanks with NA
treatment_group_df.fillna("NA", inplace=True)

# save to difk
""" treatment_group_df.to_csv("treatmentgroup.csv", index=False) """
예제 #17
0
edusetting_df = edusetting_df.T
edusetting_df.columns=["int_setting_raw"]

# binarize educational setting data
""" edusetting_df["int_setting_primary/elementary_school"] = edusetting_df["int_setting_raw"].map(set(['Primary/elementary school']).issubset).astype(int)
edusetting_df["int_setting_middle_school"] = edusetting_df["int_setting_raw"].map(set(['Middle school']).issubset).astype(int)
edusetting_df["int_setting_secondary/high_school"] = edusetting_df["int_setting_raw"].map(set(['Secondary/High school']).issubset).astype(int) """

# Get Educational Setting highlighted text
edusetting_HT = highlighted_text(edu_setting_output)
edusetting_HT_df = pd.DataFrame(edusetting_HT)
edusetting_HT_df = edusetting_HT_df.T
edusetting_HT_df.columns = ["int_setting_ht"]

# Get Educational Setting user comments
edusetting_Comments = comments(edu_setting_output)
edusetting_Comments_df = pd.DataFrame(edusetting_Comments)
edusetting_Comments_df = edusetting_Comments_df.T
edusetting_Comments_df.columns = ["int_setting_info"]

# concatenate data frames
educational_setting_df = pd.concat([
    edusetting_df, 
    edusetting_HT_df, 
    edusetting_Comments_df
], axis=1, sort=False)

# replace blanks with NA
educational_setting_df.fillna("NA", inplace=True)

# save to disk
예제 #18
0
# Get Digital Technology (inclusion) main data
DigitalTechnology = get_data(intervention_approach_digital_technology)
DigitalTechnology_df = pd.DataFrame(DigitalTechnology)
DigitalTechnology_df = DigitalTechnology_df.T
DigitalTechnology_df.columns = ["digit_tech_raw"]

# Get Digital Technology (inclusion) highlighted text
DigitalTechnology_HT = highlighted_text(
    intervention_approach_digital_technology)
DigitalTechnology_HT_df = pd.DataFrame(DigitalTechnology_HT)
DigitalTechnology_HT_df = DigitalTechnology_HT_df.T
DigitalTechnology_HT_df.columns = ["digit_tech_ht"]

# Get Digital Technology (inclusion) user comments
DigitalTechnology_Comments = comments(intervention_approach_digital_technology)
DigitalTechnology_Comments_df = pd.DataFrame(DigitalTechnology_Comments)
DigitalTechnology_Comments_df = DigitalTechnology_Comments_df.T
DigitalTechnology_Comments_df.columns = ["digit_tech_info"]

###########################################
# PARENTS OR COMMUNITY VOLUNTEERS INCLUSION
###########################################

# Get Parents/Community volunteers (inclusion) main data
Parents_or_Community_Volunteers = get_data(
    intervention_approach_parents_or_community_volunteers)
Parents_or_Community_Volunteers_df = pd.DataFrame(
    Parents_or_Community_Volunteers)
Parents_or_Community_Volunteers_df = Parents_or_Community_Volunteers_df.T
Parents_or_Community_Volunteers_df.columns = ["parent_partic_raw"]
from Main import load_json, comments, highlighted_text
from AttributeIDList import intervention_duration_output
import pandas as pd

# load json file
load_json()

# get intervention duration highlighted text
InterventionDuration_HT = highlighted_text(intervention_duration_output)
InterventionDuration_HT_df = pd.DataFrame(InterventionDuration_HT)
InterventionDuration_HT_df = InterventionDuration_HT_df.T
InterventionDuration_HT_df.columns = ["int_dur_ht"]

# get intervention duration user comments
InterventionDuration_Comments = comments(intervention_duration_output)
InterventionDuration_Comments_df = pd.DataFrame(InterventionDuration_Comments)
InterventionDuration_Comments_df = InterventionDuration_Comments_df.T
InterventionDuration_Comments_df.columns = ["int_dur_info"]

# concatenate data frames
intervention_duration_df = pd.concat(
    [InterventionDuration_HT_df, InterventionDuration_Comments_df],
    axis=1,
    sort=False)

# Remove problematic text (potential escape sequences) from text input
intervention_duration_df.replace('\r', ' ', regex=True, inplace=True)
intervention_duration_df.replace('\n', ' ', regex=True, inplace=True)
intervention_duration_df.replace(':', ' ', regex=True, inplace=True)
intervention_duration_df.replace(';', ' ', regex=True, inplace=True)
예제 #20
0
]

##############################################

# get country data
country = get_data(countries)
country_df = pd.DataFrame(country)
country_df = country_df.T
country_df.columns = ["loc_country_raw"]

# get country highlighted text
country_HT = highlighted_text(countries)
country_HT_df = pd.DataFrame(country_HT)
country_HT_df = country_HT_df.T
country_HT_df.columns = ["loc_country_ht"]

# get country user comments
country_Comments = comments(countries)
country_Comments_df = pd.DataFrame(country_Comments)
country_Comments_df = country_Comments_df.T
country_Comments_df.columns = ["loc_country_info"]

# concatenate data frames
""" country_df = pd.concat(
    [country_df, country_HT_df, country_Comments_df], axis=1, sort=False) """

# fill blanks with NA
country_df.fillna("NA", inplace=True)

# save to disk
country_df.to_csv("Country.csv", index=False)
more_location_info_HT_df.columns = ["More_Location_information_HT"]

# Get More Location Information comments
more_location_info_Comments = comments(more_location_info)
more_location_info_Comments_df = pd.DataFrame(more_location_info_Comments)
more_location_info_Comments_df = more_location_info_Comments_df.T
more_location_info_Comments_df.columns = ["More_Location_Information_comments"] """

# Get Location Specific Information highlighted text
location_specific_info_HT = highlighted_text(specific_to_location)
location_specific_info_HT_df = pd.DataFrame(location_specific_info_HT)
location_specific_info_HT_df = location_specific_info_HT_df.T
location_specific_info_HT_df.columns = ["loc_spec_ht"]

# Get Location Specific Information comments
location_specific_info_Comments = comments(specific_to_location)
location_specific_info_Comments_df = pd.DataFrame(
    location_specific_info_Comments)
location_specific_info_Comments_df = location_specific_info_Comments_df.T
location_specific_info_Comments_df.columns = ["loc_spec_info"]

# Get Type of Location highlighted text
type_of_location_info_HT = highlighted_text(type_of_location)
type_of_location_info_HT_df = pd.DataFrame(type_of_location_info_HT)
type_of_location_info_HT_df = type_of_location_info_HT_df.T
type_of_location_info_HT_df.columns = ["loc_type_ht"]

# Get Type of Location  comments
type_of_location_info_Comments = comments(type_of_location)
type_of_location_info_Comments_df = pd.DataFrame(
    type_of_location_info_Comments)
import pandas as pd

# extract baseline differences data
baselinedifferences = get_data(baseline_differences_output)
baselinedifferences_df = pd.DataFrame(baselinedifferences)
baselinedifferences_df = baselinedifferences_df.T
baselinedifferences_df.columns=["base_diff_raw"]

# Get Baseline Differences highlighted text
baselinedifferences_HT = highlighted_text(baseline_differences_output)
baselinedifferences_HT_df = pd.DataFrame(baselinedifferences_HT)
baselinedifferences_HT_df = baselinedifferences_HT_df.T
baselinedifferences_HT_df.columns = ["base_diff_ht"]

# Get Educational Setting user comments
baselinedifferences_Comments = comments(baseline_differences_output)
baselinedifferences_Comments_df = pd.DataFrame(baselinedifferences_Comments)
baselinedifferences_Comments_df = baselinedifferences_Comments_df.T
baselinedifferences_Comments_df.columns = ["base_diff_info"]

# concatenate data frames
baseline_differences_df = pd.concat([
    baselinedifferences_df, 
    baselinedifferences_HT_df, 
    baselinedifferences_Comments_df
], axis=1, sort=False)

# fill blanks with NA
baseline_differences_df.fillna("NA", inplace=True)

# save to disk
예제 #23
0
# load json file
load_json()

#############################################
# Initial sample size for intervention group
#############################################

# get sample size intervention highlighted text
sample_size_intervention_HT = highlighted_text(sample_size_intervention_output)
sample_size_intervention_HT_df = pd.DataFrame(sample_size_intervention_HT)
sample_size_intervention_HT_df = sample_size_intervention_HT_df.T
sample_size_intervention_HT_df.columns = ["base_n_treat_ht"]

# get sample size intervention
sample_size_intervention_Comments = comments(sample_size_intervention_output)
sample_size_intervention_Comments_df = pd.DataFrame(
    sample_size_intervention_Comments)
sample_size_intervention_Comments_df = sample_size_intervention_Comments_df.T
sample_size_intervention_Comments_df.columns = ["base_n_treat_info"]

############################################
# Initial sample size for the control group
############################################

# get sample size control highlighted text
sample_size_control_HT = highlighted_text(sample_size_control_output)
sample_size_control_HT_df = pd.DataFrame(sample_size_control_HT)
sample_size_control_HT_df = sample_size_control_HT_df.T
sample_size_control_HT_df.columns = ["base_n_cont_ht"]
# load json file
load_json()

###########################
# CONTROL GROUP NUMBER
###########################

# Get Control Group Number highlighted text
ControlGroupNumber_HT = highlighted_text(control_group_number)
ControlGroupNumber_HT_df = pd.DataFrame(ControlGroupNumber_HT)
ControlGroupNumber_HT_df = ControlGroupNumber_HT_df.T
ControlGroupNumber_HT_df.columns = ["n_cont_ht"]

# Get Control Group Number comments
ControlGroupNumber_comments = comments(control_group_number)
ControlGroupNumber_comments_df = pd.DataFrame(ControlGroupNumber_comments)
ControlGroupNumber_comments_df = ControlGroupNumber_comments_df.T
ControlGroupNumber_comments_df.columns = ["n_cont_info"]

#################################
# Control GROUP PRE-TEST MEAN
#################################

# Get Control Group Pre-test Mean highlighted text
ControlGroupPretestMean_HT = highlighted_text(control_group_pretest_mean)
ControlGroupPretestMean_HT_df = pd.DataFrame(ControlGroupPretestMean_HT)
ControlGroupPretestMean_HT_df = ControlGroupPretestMean_HT_df.T
ControlGroupPretestMean_HT_df.columns = ["pre_c_mean_ht"]

# Get Control Group Pre-test Mean comments
from Main import load_json, comments, highlighted_text
from AttributeIDList import intervention_description_output
import pandas as pd

# load json file
load_json()

# get intervention description highlighted text
Intervention_DescriptionHT = highlighted_text(intervention_description_output)
Intervention_DescriptionHT_df = pd.DataFrame(Intervention_DescriptionHT)
Intervention_DescriptionHT_df = Intervention_DescriptionHT_df.T
Intervention_DescriptionHT_df.columns = ["int_desc_ht"]

# get intervention description user comments
Intervention_Description_Comments = comments(intervention_description_output)
Intervention_Description_Comments_df = pd.DataFrame(
    Intervention_Description_Comments)
Intervention_Description_Comments_df = Intervention_Description_Comments_df.T
Intervention_Description_Comments_df.columns = ["int_desc_info"]

# concatenate dataframes
intervention_description_df = pd.concat(
    [Intervention_DescriptionHT_df, Intervention_Description_Comments_df],
    axis=1,
    sort=False)

# remove problematic text
intervention_description_df.replace('\r', ' ', regex=True, inplace=True)
intervention_description_df.replace('\n', ' ', regex=True, inplace=True)
intervention_description_df.replace(':', ' ', regex=True, inplace=True)
intervention_description_df.replace(';', ' ', regex=True, inplace=True)
예제 #26
0
#################

# get other outcomes data
other_outcomes = get_data(other_outcomes_output)
other_outcomes_df = pd.DataFrame(other_outcomes)
other_outcomes_df = other_outcomes_df.T
other_outcomes_df.columns = ["out_other_raw"]

# get other outcomes highlighted text
other_outcomes_HT = highlighted_text(other_outcomes_output)
other_outcomes_HT_df = pd.DataFrame(other_outcomes_HT)
other_outcomes_HT_df = other_outcomes_HT_df.T
other_outcomes_HT_df.columns = ["out_other_ht"]

# get other outcomes comments
other_outcomes_info = comments(other_outcomes_output)
other_outcomes_info_df = pd.DataFrame(other_outcomes_info)
other_outcomes_info_df = other_outcomes_info_df.T
other_outcomes_info_df.columns = ["out_other_info"]

######################
# Additional outcomes
######################

# get additional outcomes data
additional_outcomes = get_data(additional_outcomes_output)
additional_outcomes_df = pd.DataFrame(additional_outcomes)
additional_outcomes_df = additional_outcomes_df.T
additional_outcomes_df.columns = ["out_info_raw"]

# get additional outcomes highlighted text
from Main import load_json, comments, highlighted_text
from AttributeIDList import sample_size_output
import pandas as pd

# load json file
load_json()

# get sample size comments
sample_size_Comments = comments(sample_size_output)
sample_size_Comments_df = pd.DataFrame(sample_size_Comments)
sample_size_Comments_df = sample_size_Comments_df.T
sample_size_Comments_df.columns = ["sample_analysed_info"]

# get sample size highlighted text
sample_size_HT = highlighted_text(sample_size_output)
sample_size_HT_df = pd.DataFrame(sample_size_HT)
sample_size_HT_df = sample_size_HT_df.T
sample_size_HT_df.columns = ["sample_analysed_ht"]

# concatenate dataframes
sample_size_df = pd.concat([sample_size_Comments_df, sample_size_HT_df],
                           axis=1,
                           sort=False)

# remove problematic text
sample_size_df.replace('\r', ' ', regex=True, inplace=True)
sample_size_df.replace('\n', ' ', regex=True, inplace=True)

# fill blanks with NA
sample_size_df.fillna("NA", inplace=True)
from Main import load_json, comments, highlighted_text
from AttributeIDList import intervention_name_output
import pandas as pd

# load json file
load_json()

# get intervention name highlighted text
Intervention_NameHT = highlighted_text(intervention_name_output)
Intervention_NameHT_df = pd.DataFrame(Intervention_NameHT)
Intervention_NameHT_df = Intervention_NameHT_df.T
Intervention_NameHT_df.columns=["int_name_ht"]

# get intervention name user comments
Intervention_Name_Comments = comments(intervention_name_output)
Intervention_Name_Comments_df = pd.DataFrame(Intervention_Name_Comments)
Intervention_Name_Comments_df = Intervention_Name_Comments_df.T
Intervention_Name_Comments_df.columns=["int_name_info"]

# concatenate dataframes
intervention_name_df = pd.concat([
    Intervention_NameHT_df, 
    Intervention_Name_Comments_df
], axis=1, sort=False)

# replace problematic text
intervention_name_df.replace('\r',' ', regex=True, inplace=True)
intervention_name_df.replace('\n',' ', regex=True, inplace=True)
intervention_name_df.replace(':',' ', regex=True, inplace=True)
intervention_name_df.replace(';',' ', regex=True, inplace=True)
예제 #29
0
from Main import load_json, get_outcome_lvl2, comments
from AttributeIDList import toolkit_strand_codes
import pandas as pd

# load json file
load_json()

# get toolkit strand data
toolkitstrand = get_outcome_lvl2(toolkit_strand_codes)
toolkitstrand_df = pd.DataFrame(toolkitstrand)

# get toolkit strand comments
toolkitstrand_Comments = comments(toolkit_strand_codes)
toolkitstrand_Comments_df = pd.DataFrame(toolkitstrand_Comments)
toolkitstrand_Comments_df = toolkitstrand_Comments_df.T
toolkitstrand_Comments_df.columns = ["_info"]

# fill blanks with NA
toolkitstrand_df.fillna("NA", inplace=True)

# name each column (number depends on outcome number)
toolkitstrand_df.columns = [
    "out_strand_" + '{}'.format(column + 1)
    for column in toolkitstrand_df.columns
]

# save to disk
""" toolkitstrand_df.to_csv("toolkitstrand.csv", index=False) """
from Main import load_json, get_data, comments, highlighted_text
from AttributeIDList import number_of_schools_intervention_output
from AttributeIDList import number_of_schools_control_output
from AttributeIDList import number_of_schools_total_output
from AttributeIDList import number_of_schools_not_provided_output
import pandas as pd

# load json file
load_json()

##################################
# NUMBER OF SCHOOLS INTERVENTION #
##################################

# get number of school intervention comments data
number_of_schools_intervention_Comments = comments(
    number_of_schools_intervention_output)
number_of_schools_intervention_Comments_df = pd.DataFrame(
    number_of_schools_intervention_Comments)
number_of_schools_intervention_Comments_df = number_of_schools_intervention_Comments_df.T
number_of_schools_intervention_Comments_df.columns = ["school_treat_info"]

# get number of school intervention highlighted text data
number_of_schools_intervention_HT = highlighted_text(
    number_of_schools_intervention_output)
number_of_schools_intervention_HT_df = pd.DataFrame(
    number_of_schools_intervention_HT)
number_of_schools_intervention_HT_df = number_of_schools_intervention_HT_df.T
number_of_schools_intervention_HT_df.columns = ["school_treat_ht"]

#############################
# NUMBER OF SCHOOLS CONTROL #