def get_cached_mse(self): if self.all_fits: if scale_fit_terms: min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['mse']) max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['mse']) return val_scale(self.all_fits[self.cluster_id][self.sim_key]['fit_terms'], max_term, min_term) else: return self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['mse'] else: error_loading_fit_terms('mse')
def __init__(self, model, sim_data, cluster_id, reinfection_penalty = 0.0, reinfection_penalty_weight = 0.0, clinical_cases_penalty = 0.0, clinical_cases_penalty_weight = 0.0, all_fits = None): self.cluster_id = cluster_id self.reinfection_penalty = reinfection_penalty self.reinfection_penalty_term = reinfection_penalty self.reinfection_penalty_weight = reinfection_penalty_weight self.ref_reinfection_num_points = 0 self.rho = None self.p_val = None self.clinical_cases_penalty = clinical_cases_penalty self.clinical_cases_penalty_term = clinical_cases_penalty self.clinical_cases_penalty_weight = clinical_cases_penalty_weight self.ref_clinical_cases_num_points = 0 self.sim_data = sim_data # pre calculated fits self.all_fits = all_fits self.ref_avg_reinfection_rate = 0.0 self.sim_avg_reinfection_rate = 0.0 #debug_p('model id during kariba conversion prior model assignment ' + str(model.get_model_id())) self.model = model model_meta = self.model.get_meta() self.sim_key = model_meta['sim_key'] #debug_p('model id during kariba conversion after model assignment ' + str(self.model.get_model_id())) # get reinfection rates from sim data, compute reinfection penalty and model penalties if not reinf_weight == 0: model_report_channels = sim_report_channels_model_format(reports_channels, self.sim_data) if not load_reinf_penalty: self.set_reinfection_penalty(model_report_channels['reinfections'], self.cluster_id) else: if self.all_fits: self.reinfection_penalty = self.all_fits[self.cluster_id][self.sim_key]['reinf_penalty'] self.reinfection_penalty_weight = reinf_weight else: error_loading_fit_terms('reinfection penalty') if not load_cc_penalty: if 'ls_folded_norm' in cc_penalty_model: self.set_clinical_cases_penalty_by_ls(self.sim_data['cc'], self.cluster_id) if 'ls_folded_no_norm' in cc_penalty_model: self.set_clinical_cases_penalty_by_ls_no_norm(self.sim_data['cc'], self.cluster_id) if 'corr' in cc_penalty_model: self.set_clinical_cases_penalty_by_corr(self.sim_data['cc'], self.cluster_id) else: if self.all_fits: max_term = 0 min_term = 0 if 'ls_folded_norm' in cc_penalty_model: self.clinical_cases_penalty = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['ls_norm'] if scale_fit_terms: max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['ls_norm']) min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['ls_norm']) elif 'ls_norm_not_folded' in cc_penalty_model: self.clinical_cases_penalty = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['ls_norm_not_folded'] if scale_fit_terms: # change path in fit_terms_types for ls_norm_not_folded if we use that again; need to add corresponding entry as well # if we are not using that feature again, remove these lines altogether; this is just a placeholder max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['ls_norm']) min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['ls_norm']) if 'ls_no_norm' in cc_penalty_model: self.clinical_cases_penalty = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['ls_no_norm'] if scale_fit_terms: # change path in fit_terms_types for ls_norm_not_folded if we use that again; need to add corresponding entry as well # if we are not using that feature again, remove these lines altogether; this is just a placeholder max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['ls_norm']) min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['ls_norm']) if 'corr_folded' in cc_penalty_model: self.clinical_cases_penalty = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_folded']['penalty'] if scale_fit_terms: max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['corr_folded']) min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['corr_folded']) self.rho = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_folded']['rho'] self.p_val = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_folded']['p_val'] if 'corr_not_folded' in cc_penalty_model: self.clinical_cases_penalty = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_not_folded']['penalty'] if scale_fit_terms: # change path in fit_terms_types for ls_norm_not_folded if we use that again; need to add corresponding entry as well # if we are not using that feature again, remove these lines altogether; this is just a placeholder max_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['max_terms'], fit_terms_types['corr_not_folded']) min_term = unroll_term(self.all_fits[self.cluster_id][self.sim_key]['min_terms'], fit_terms_types['corr_not_folded']) self.rho = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_not_folded']['rho'] self.p_val = self.all_fits[self.cluster_id][self.sim_key]['fit_terms']['cc_penalty']['corr_not_folded']['p_val'] self.clinical_cases_penalty_term = self.clinical_cases_penalty if scale_fit_terms: # should have found proper min_term and max_term if scale_fit_terms is True self.clinical_cases_penalty = val_scale(self.clinical_cases_penalty, max_term, min_term) self.clinical_cases_penalty_weight = cc_weight else: error_loading_fit_terms('clinical cases penalty') self.set_model_penalties()
def set_reinfection_penalty(self, model_reinfection_rates, cluster_id): ref_reinfection_rates = cluster_2_reinfection_rates(cluster_id) if ref_reinfection_rates: cluster_pops = cluster_2_pops(cluster_id) reinfection_feature = [] pop_feature = [] total_pop = 0.0 # find max and min values of reinfection rates feature count_reinf = 0 for i in range(0,5): if ('reinf_' + str(i+1) + '_' + str(i+2) in model_reinfection_rates) and (i+1 != 3 and i+2 != 4): cluster_pop = get_cluster_pop_per_rnd_pair(i+1, i+2) total_pop = total_pop + ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total'] if cluster_pop: pop_feature = pop_feature.append(ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total']/cluster_pop) ref_reinfection_rate = ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['reinf']/(ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total'] + 0.0) model_reinfection_rate = model_reinfection_rates['round_' + str(i+1) + '_' + str(i+2)] if(is_number(ref_reinfection_rate) and is_number(model_reinfection_rate)): reinfection_feature.append(ref_reinfection_rate) reinfection_feature.append(model_reinfection_rate) self.sim_avg_reinfection_rate = self.sim_avg_reinfection_rate + model_reinfection_rate self.ref_avg_reinfection_rate = self.ref_avg_reinfection_rate + ref_reinfection_rate count_reinf = count_reinf + 1 if count_reinf != 0: self.sim_avg_reinfection_rate = self.sim_avg_reinfection_rate / (count_reinf + 0.0) self.ref_avg_reinfection_rate = self.ref_avg_reinfection_rate / (count_reinf + 0.0) max_reinf_val = None min_reinf_val = None if reinfection_feature: max_reinf_val = max(reinfection_feature) min_reinf_val = min(reinfection_feature) else: # no data observed; penalty is set to 0.0 self.reinfection_penalty = 0.0 return max_pop_val = None min_pop_val = None if pop_feature: max_pop_val = max(pop_feature) min_pop_val = min(pop_feature) else: # no data observed; penalty is set to 0.0 self.reinfection_penalty = 0.0 return # compute square error between reference and model scaled reinfection features to use as a penalty if there are more than threshold number of people linked num_linked_threshold = 40 se_reinfection_rates = [] self.reinfection_penalty = 0.0 self.ref_reinfection_num_points = 0.0 for i in range(0,5): # do feature scaling if ('reinf_' + str(i+1) + '_' + str(i+2) in model_reinfection_rates) and (i+1 != 3 and i+2 != 4) and ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total'] > num_linked_threshold: cluster_pop = get_cluster_pop_per_rnd_pair(i+1, i+2) if cluster_pop: ref_reinfection_rate = ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['reinf']/(ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total'] + 0.0) model_reinfection_rate = model_reinfection_rates['round_' + str(i+1) + '_' + str(i+2)] if(is_number(ref_reinfection_rate) and is_number(model_reinfection_rate)): self.ref_reinfection_num_points = self.ref_reinfection_num_points + 1 # weight square error se for this round pair proportional to the number of linked people for this round pair over the total number of linked people at this cluster for all rounds # also multiple by a weight in [0,1] depending on how close the number of linked people for this round pair is to the known population of the cluster at these rounds; # the closer the number of linked people the closer the weight to 1; the round pair with closest number of linked people is weighted the most rnd_pair_weight = (val_scale(ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total']/(cluster_pop + 0.0), max_pop_val, min_pop_val))*ref_reinfection_rates['reinf_' + str(i+1) + '_' + str(i+2)]['total']/total_pop se = pow(val_scale(ref_reinfection_rate, max_reinf_val, min_reinf_val) - val_scale(model_reinfection_rate, max_reinf_val, min_reinf_val),2) self.reinfection_penalty = self.reinfection_penalty + rnd_pair_weight*se # weight the reinfection penalty at this cluster based on how much data is available; number of potentially available reinfection measurements is # max_ref_reinfection_points in kariba_settings.py #debug_p('reinfection penalty ' + str(self.reinfection_penalty)) self.reinfection_penalty_weight = self.ref_reinfection_num_points/(max_ref_reinfection_points + 0.0) #debug_p('reinfection penalty weight ' + str(self.reinfection_penalty_weight)) #debug_p('weighted reinfection penalty ' + str(self.reinfection_penalty*self.reinfection_penalty_weight)) return else: # no reinfection data found so set penalty to 0 self.reinfection_penalty = 0.0 return