def get_prevalence_based_cc(best_fits, all_fits, calib_data): weighted_ccs_by_hfca_id_file_path = os.path.join(sim_data_dir, weighted_ccs_by_hfca_id_file) hfca_ids = get_hfca_ids() debug_p('Getting clinical cases samples based on prevalence optimal regions') weighted_ccs_model_agg_by_hfca = {} for hfca_id in hfca_ids: hfca_id = str(hfca_id) weighted_ccs_model_agg_by_hfca[hfca_id] = {} cluster_ids = hfca_id_2_cluster_ids(hfca_id) for cluster_id in cluster_ids: cluster_cat = get_cluster_category(cluster_id) sims_opt_region = get_prevalence_opt_region_sims(best_fits, all_fits, cluster_id) # assume sample size is always less than the size of the population! sample_sims_opt_region = random.sample(sims_opt_region, sample_size) for i, (sample_group_key, sample_sim_key) in enumerate(sample_sims_opt_region): sample_cc_trace = calib_data[cluster_cat][sample_group_key][sample_sim_key] cc_cluster_weight_factor = get_cc_cluster_weight_factor(cluster_id) # accounting for health seeking behavior data cc_cluster_weight_factor = (cluster_2_mean_pop(cluster_id)/(calib_node_pop + 0.0)) * cc_cluster_weight_factor # accounting for real cluster population (mean across all rounds) ccs_model_agg, ccs_ref_agg = get_cc_model_ref_traces(sample_cc_trace, cluster_id, cc_cluster_weight_factor) ccs_model_agg_unweighted, ccs_ref_agg_unweighted = get_cc_model_ref_traces(sample_cc_trace, cluster_id) if not cluster_id in weighted_ccs_model_agg_by_hfca[hfca_id]: weighted_ccs_model_agg_by_hfca[hfca_id][cluster_id] = { 'weighted':[], 'unweighted':[] } weighted_ccs_model_agg_by_hfca[hfca_id][cluster_id]['weighted'].append(ccs_model_agg) weighted_ccs_model_agg_by_hfca[hfca_id][cluster_id]['unweighted'].append((sample_group_key,sample_sim_key,ccs_model_agg_unweighted)) with open(weighted_ccs_by_hfca_id_file_path, 'w') as w_ccs_f: json.dump(weighted_ccs_model_agg_by_hfca, w_ccs_f, indent = 3) debug_p('DONE getting clinical cases samples based on prevalence optimal regions') debug_p('Saved clinical cases samples based on prevalence optimal regions to ' + weighted_ccs_by_hfca_id_file_path) return weighted_ccs_model_agg_by_hfca
def plot_weighted_cc_per_hfca(self, weighted_ccs_model_agg_by_hfca, ccs_model_agg_by_hfca_cluster_id): clusters_processed = 0 for hfca_id, weighted_ccs_combos in weighted_ccs_model_agg_by_hfca.iteritems(): weighted_ccs_by_bin = {} for i in range(0, cc_num_fold_bins): weighted_ccs_by_bin[i] = [] for weighted_ccs_combo in weighted_ccs_combos: sum_weighted_ccs = cc_num_fold_bins * [0] for weighted_ccs in weighted_ccs_combo: sum_weighted_ccs = np.add(sum_weighted_ccs, weighted_ccs) for i in range(0, cc_num_fold_bins): weighted_ccs_by_bin[i].append(sum_weighted_ccs[i]) per_bottom = [] per_top = [] per_median = [] for i in range(0, cc_num_fold_bins): weighted_ccs_by_bin_idx = weighted_ccs_by_bin[i] per_bottom.append( satp(weighted_ccs_by_bin_idx, 2.5) ) per_top.append( satp(weighted_ccs_by_bin_idx, 97.5) ) per_median.append( satp(weighted_ccs_by_bin_idx, 50) ) ''' debug_p('length of weighted ccs_combos array ' + str(len(weighted_ccs_combos))) ''' debug_p('length of bin 0 in weighted_ccs_by_bin ' + str(len(weighted_ccs_by_bin[0]))) for cluster_id in hfca_id_2_cluster_ids(hfca_id): fig = plt.figure(cluster_id, figsize=(9.2, 4), dpi=100, facecolor='white') gs = gridspec.GridSpec(1, 4) ax = plt.subplot(gs[0:4]) x_smooth = np.linspace(0, cc_num_fold_bins-1,60) per_bottom_smooth = spline(range(0, cc_num_fold_bins),per_bottom,x_smooth) per_top_smooth = spline(range(0, cc_num_fold_bins),per_top,x_smooth) per_median_smooth = spline(range(0, cc_num_fold_bins),per_median,x_smooth) ax.plot(x_smooth, per_bottom_smooth, alpha=1, linewidth=0.5, color = 'black', linestyle=':', label = '2.5 percentile HS weighted: prevalence space samples', marker = None) ax.plot(x_smooth, per_top_smooth, alpha=1, linewidth=0.5, color = 'black', linestyle=':', label = '97.5 percentile HS weighted: prevalence space samples', marker = None) ax.plot(x_smooth, per_median_smooth, alpha=1, linewidth=2.0, color = 'magenta', linestyle='-', label = 'median HS weighted: prevalence space samples', marker = None) ax.fill_between(x_smooth, per_bottom_smooth, per_top_smooth, facecolor='gray', alpha=0.5, interpolate=True) cluster_cat = get_cluster_category(cluster_id) opt_group_key = self.best_fits[cluster_id]['group_key'] opt_sim_key_cc = self.best_fits[cluster_id]['cc_penalty']['sim_key'] cc_trace_opt_cc = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_cc] opt_sim_key_prev = self.best_fits[cluster_id]['mse']['sim_key'] cc_trace_opt_prev = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_prev] opt_sim_key_fit = self.best_fits[cluster_id]['fit']['sim_key'] cc_trace_opt_fit = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_fit] ccs_model_agg_cc, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_cc, cluster_id) ccs_model_agg_prev, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_prev, cluster_id) ccs_model_agg_fit, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_fit, cluster_id) facility = hfca_id_2_facility(hfca_id) ax.plot(range(0, len(ccs_model_agg_cc)), ccs_model_agg_cc, alpha=1, linewidth=1, color = 'blue', label = 'Best fit: clinical cases', marker = 's') ax.plot(range(0, len(ccs_model_agg_prev)), ccs_model_agg_prev, alpha=1, linewidth=1, color = 'magenta', label = 'Best fit: prevalence', marker = 'o') ax.plot(range(0, len(ccs_model_agg_fit)), ccs_model_agg_fit, alpha=1, linewidth=1, color = 'black', label = 'Best fit: prevalence + clinical cases', marker = '*') ax.plot(range(0, len(ccs_ref_agg)), ccs_ref_agg, alpha=1, linewidth=2.0, linestyle = '-', color = 'red', label = 'Observed in ' + facility, marker = None) for i,sample_ccs in enumerate(ccs_model_agg_by_hfca_cluster_id[hfca_id][cluster_id]['unweighted']): if i == 0: ax.plot(range(0, cc_num_fold_bins), sample_ccs[2], alpha=0.5, linewidth=0.5, color = 'magenta', label = 'Opt 5-percentile samples for cluster ' + cluster_id, marker = None) #ax.plot(range(0, cc_num_fold_bins), sample_ccs[2]) else: ax.plot(range(0, cc_num_fold_bins), sample_ccs[2], alpha=0.5, linewidth=0.5, color = 'magenta', marker = None) plt.xlabel('6-week bins', fontsize=8) plt.ylabel('Clinical cases', fontsize=8) legend = plt.legend(loc=1, fontsize=8) plt.xlim(0,8) plt.title('Clinical cases timeseries', fontsize = 8, fontweight = 'bold', color = 'black') plt.gca().tick_params(axis='x', labelsize=8) plt.gca().tick_params(axis='y', labelsize=8) plt.tight_layout() output_plot_file_path = os.path.join(self.root_sweep_dir, weighted_cc_traces_plots_dir, weighted_cc_traces_base_file_name + cluster_id + '.png') plt.savefig(output_plot_file_path, dpi = 300, format='png') plt.close() clusters_processed = clusters_processed + 1 debug_p('Processed weighting and plotting clinical cases for ' + str(clusters_processed) + ' clusters')