def process_reinfections_report(sim_dir_map, sim_id): survey_report_output = {} reinfections = {} for i,day in enumerate(campaign_days): # get reports before and after campaign survey_day_prior = day - 5 survey_day_after = day + 5 survey_report_output[survey_day_after] = {} survey_report_prior_path = os.path.join(sim_dir_map[sim_id],'output', 'MalariaSurveyJSONAnalyzer_Day_' + str(survey_day_prior) + '_0.json') survey_report_after_path = os.path.join(sim_dir_map[sim_id],'output', 'MalariaSurveyJSONAnalyzer_Day_' + str(survey_day_after) + '_0.json') survey_report_prior = json2dict(survey_report_prior_path) survey_report_after = json2dict(survey_report_after_path) # debug statements #print len(survey_report_prior['patient_array']) #print len(survey_report_after['patient_array']) # re-index reports data by patient id survey_report_prior_reindexed = {} # index patient by id which perhaps is a bit more sensible than the current reporter output for patient in survey_report_prior['patient_array']: survey_report_prior_reindexed[patient['id']] = patient survey_report_after_reindexed = {} for patient in survey_report_after['patient_array']: survey_report_after_reindexed[patient['id']] = patient survey_report_prior = survey_report_prior_reindexed survey_report_after = survey_report_after_reindexed # determine if each patient in report is reinfected for each pair of consecutive surveys total_tested_patients = 0 num_reinfected = 0 count_not_found = 0 for patient_id, patient in survey_report_prior.iteritems(): treatment_prior = "" for drugs in patient['treatment'][0]: if drugs != "": treatment_prior = drugs if patient_id in survey_report_after: patient_after = survey_report_after[patient_id] else: continue treatment_after = "" for drugs in patient_after['treatment'][0]: if drugs != "": treatment_after = drugs if ' + '+treatment_prior in treatment_after and treatment_prior != '': treatment_after = string.replace(treatment_after, ' + ' + treatment_prior, '', maxreplace = 1) else: treatment_after = string.replace(treatment_after, treatment_prior, '', maxreplace = 1) #print "clean after treatment " + str(treatment_after) survey_report_output[survey_day_after][patient_id] = {} survey_report_output[survey_day_after][patient_id]['initial_age'] = patient['initial_age'] survey_report_output[survey_day_after][patient_id]['treatment'] = treatment_after if i > 0: survey_day_1 = campaign_days[i - 1] + 5 survey_day_2 = survey_day_after if patient_id in survey_report_output[survey_day_1]: treatment_1 = survey_report_output[survey_day_1][patient_id]['treatment'] treatment_2 = treatment_after if treatment_1 != '' and treatment_2 != '': total_tested_patients = total_tested_patients + 1 # DO NOT APPLY FOR MDA CAMAPIGNS WITHOUT MODIFICATION!!!!!! if ('Artemether' in treatment_1) and ('Artemether' in treatment_2): num_reinfected = num_reinfected + 1 ''' # for MDA do something along the lines of if (('DHA' in patient['treatment'] and 'Vehicle' in patient['treatment']) or 'Artemether' in patient['treatment']) and\ (('DHA' in survey_report_output[second_survey_day][patient_id]['treatment'] and 'Vehicle' in survey_report_output[second_survey_day][patient_id]['treatment']) or 'Artemether' in survey_report_output[second_survey_day][patient_id]['treatment']): num_reinfected = num_reinfected + 1 ''' else: #print "patient " + str(patient_id) + " not found in second_survey day (" + str(second_survey_day) + ") report" count_not_found = count_not_found + 1 if i > 0: if total_tested_patients != 0: reinfections['round_' + str(i) + '_' + str(i+1)] = num_reinfected/(total_tested_patients + 0.0) else: reinfections['round_' + str(i) + '_' + str(i+1)] = 'nan' return reinfections
def get_sweep_results(sim_meta_config_files, calib_file_path, tags_data_file_path): # Login to COMPs comps_login() # find total number of simulations across given experiment files num_sims = 0 for sim_meta_config_file in sim_meta_config_files: with open(sim_meta_config_file) as metadata_file: metadata = json.loads(metadata_file.read()) num_sims = num_sims + len(metadata['sims']) # Download simulations locally # sample sim meta config file (like "C:\\Users\\Mnikolov\\Zambia-raw\\dtk-scripts\\1node\\simulations\\Sinamalina_Sinazongwe_Calibration_e9979059-33f8-e411-93f9-f0921c16b9e7.json") #print 'Downloading simulations from experiment ' + str(sim_meta_config_files) + '...' # simulations tag data structure: accumulates sims meta information from sims tags tag_data = { 'ITN trajectory': [],\ 'Drug coverage per round': [],\ 'Temporary habitat scale': [],\ 'Constant habitat scale': []\ } # iterate through experiments calib_output = {} # count processed sims to updated progress count = 0 for sim_meta_config_file in sim_meta_config_files: # construct experiment directory structure with open(sim_meta_config_file) as metadata_file: metadata = json.loads(metadata_file.read()) output_path = metadata['sim_root'] exp_id = metadata['exp_id'] exp_name = metadata['exp_name'] sim_dir_map = CompsDTKOutputParser.createSimDirectoryMap(exp_id) # get all successfully completed sims in experiment for sim_id, sim in metadata['sims'].items(): # get path to the sim timeseries channels data timeseries_path = os.path.join(sim_dir_map[sim_id],'output', 'InsetChart.json') #get sim timeseries channels data; json2dict returns None if timeseries_path points to non-existing file, which is the case if the sim has not successfully finished sim_output = json2dict(timeseries_path) # only download successfully completed simulations if sim_output == None: continue # delete all but the specified channels for channel in sim_output['Channels'].keys(): if not channel in channels: del(sim_output['Channels'][channel]) # process specified reports report_channels_data = {} if not reports_channels == None: report_channels_data = process_reports(reports_channels, sim_dir_map, sim_id) # record sim meta information including sim tags tags_path = os.path.join(sim_dir_map[sim_id], 'tags.json') f = open(tags_path, 'r') tags = f.read() sim_meta = ast.literal_eval(tags) append_tag_data(sim_meta, tag_data) # construct sim group key and sim key x_temp_h = sim_meta_2_temp_h(sim_meta) const_h = sim_meta_2_const_h(sim_meta) itn_level = sim_meta_2_itn_level(sim_meta) drug_coverage_level = sim_meta_2_drug_cov(sim_meta) sim_key = get_sim_key(x_temp_h, const_h, itn_level, drug_coverage_level) sim_group_key = get_sim_group_key(itn_level, drug_coverage_level) # store sim channels data if sim_group_key not in calib_output: calib_output[sim_group_key] = {} calib_output[sim_group_key][sim_key] = { # can add/remove data entries depending on needs 'prevalence': sim_output['Channels']['New Diagnostic Prevalence']['Data'], 'reinfections': report_channels_data['reinfections'], 'meta':sim_meta, 'sim_id':sim_id } ''' count = count + 1 percent_complete = 100*count/(num_sims+0.0) sys.stdout.write('\r') sys.stdout.write('%2f %%' % percent_complete) #sys.stdout.write('%d' % count) sys.stdout.flush() ''' print "" print "Writing files..." with open(calib_file_path, 'w') as calib_f: json.dump(calib_output, calib_f) print str(len(calib_output)) + ' simulation results saved to ' + calib_file_path with open(tags_data_file_path, 'w') as tags_f: json.dump(tag_data, tags_f) print 'Meta data tags saved to ' + tags_data_file_path print "" return calib_f
import pandas as pd import json from nodenames import * from dtk.utils.parsers.json2dict import json2dict import operator net_info_file = "prevalence_net-use_service-use_by_new_cluster_waves1-6.csv" nets_calib = json2dict("cluster_tags_net_usage_single_node_updated.json") df = pd.io.parsers.read_csv(net_info_file) grouped_rounds = df.groupby("cluster.id") net_usage = {} for cluster_id, group in grouped_rounds: pop_cluster = group["pop"].to_dict() net_usage_cluster = group.UsedLastNight.to_dict() for k, v in net_usage_cluster.iteritems(): if str(v) == "nan": vals = np.array(net_usage_cluster.values()) net_usage_cluster[k] = vals[~np.isnan(vals)].mean() rounds_idx = group.round.to_dict() for k, v in rounds_idx.iteritems(): net_usage_cluster[str(v)] = net_usage_cluster.pop(k) pop_cluster[str(v)] = pop_cluster.pop(k) rnd_max_pop = max(pop_cluster.iteritems(), key=operator.itemgetter(1))[0]