def get_cc_penalty(fit_entry):
    
    if 'corr_folded' in cc_penalty_model:
        return fit_entry['fit_terms']['cc_penalty']['corr_folded']['penalty']
    
    if 'corr_not_folded' in cc_penalty_model:
        return fit_entry['fit_terms']['cc_penalty']['corr_not_folded']['penalty']
    
    if 'ls_folded_norm' in cc_penalty_model: 
        return fit_entry['fit_terms']['cc_penalty']['ls_norm']
    elif 'ls_norm_not_folded' in cc_penalty_model:
        return fit_entry['fit_terms']['cc_penalty']['ls_norm_not_folded']
                    
    if 'ls_no_norm' in cc_penalty_model: 
        return fit_entry['fit_terms']['cc_penalty']['ls_no_norm']

    debug_p('No clinical cases penalty found. This should not happen!')
    return None
def weighted_ccs_combos(hfca_id, weighted_ccs_model_agg_by_hfca):
    
    debug_p('Generating clinical cases combos')
    
    samples_by_cluster = {}
    
    num_clusters = len(weighted_ccs_model_agg_by_hfca) # number of clusters in the given hfca
    
    '''
    # determine number of sample clinical cases timeseries per cluster to satisfy the minimum number of combos (cartesian product cardinality) of clinical case time series per hfcas
    
    
    satisfied = False
    sample_size_ccs_per_cluster = 1
    
    while not satisfied:
        if math.pow(num_clusters, sample_size_ccs_per_cluster) >= min_num_combos_per_hfca:
            satisfied = True
            break
        sample_size_ccs_per_cluster = sample_size_ccs_per_cluster + 1
        debug_p('sample size per cluster is ' + str(sample_size_ccs_per_cluster))
    '''   
    
    # randomly sample clinical case time series per cluster
    for cluster_id, ccs in weighted_ccs_model_agg_by_hfca.iteritems():
        #sample_ccs = random.sample(ccs['weighted'], sample_size_ccs_per_cluster)        
        sample_ccs = random.sample(ccs['weighted'], sample_size)
        samples_by_cluster[cluster_id] = sample_ccs
        
    ccs_combos_hfca = []
    
    shuffled_ccs = samples_by_cluster.values() 
    np.random.shuffle(shuffled_ccs)
    
    # find a set of cartesian products on the randomly sampled clinical cases time series across clusters 
    for ccs_combo in it.product(*shuffled_ccs): # construct a cartesian product from the random samples of weighted clinical case traces within hfca_id
        ccs_combos_hfca.append(ccs_combo)
        if len(ccs_combos_hfca) >= min_num_combos_per_hfca:
            break
    
    debug_p('DONE generating clinical cases combos')
        
    return ccs_combos_hfca, samples_by_cluster
    def set_clinical_cases_penalty_by_corr(self, model_clinical_cases, cluster_id):

        ccs_model_agg, ccs_ref_agg = get_cc_model_ref_traces(model_clinical_cases, cluster_id)
        

        '''
        cc_debug_agg = {}
        cc_debug_agg['model'] = ccs_model_agg
        cc_debug_agg['ref'] = ccs_ref_agg
         
        with open('cc_debug_agg_'+cluster_id+'.json' ,'w') as ccd_f:
            json.dump(cc_debug_agg, ccd_f, indent=3)
        '''
        
        '''
        cc_debug_agg_clean = {}
        cc_debug_agg_clean['model_clean'] = ccs_model_agg
        cc_debug_agg_clean['ref_clean'] = ccs_ref_agg
         
        
        with open('cc_debug_agg_clean'+cluster_id+'.json' ,'w') as ccd_f:
            json.dump(cc_debug_agg_clean, ccd_f, indent=3)
        '''
    
        rho, p = spearmanr(ccs_ref_agg, ccs_model_agg)
        
        self.clinical_cases_penalty = 1 - rho
        self.clinical_cases_penalty_term = 1 - rho
        #debug_p('clinical cases penalty ' + str(self.clinical_cases_penalty))
        
        self.clinical_cases_penalty_weight = cc_weight
        #debug_p('weighted clinical cases penalty ' + str(self.clinical_cases_penalty_weight*self.clinical_cases_penalty)) 
        
        self.rho = rho
        self.p_val = p
        
        if rho > 0.75:
            debug_p('clinical cases rho ' + str(rho))
            debug_p('clinical cases p-value ' + str(p))
            debug_p('clinical cases penalty ' + str(self.clinical_cases_penalty))
            debug_p('weighted clinical cases penalty ' + str(self.clinical_cases_penalty_weight*self.clinical_cases_penalty))
def get_prevalence_based_cc(best_fits, all_fits, calib_data):
    
    weighted_ccs_by_hfca_id_file_path = os.path.join(sim_data_dir, weighted_ccs_by_hfca_id_file)
    
    hfca_ids = get_hfca_ids()
    
    debug_p('Getting clinical cases samples based on prevalence optimal regions')
    
    weighted_ccs_model_agg_by_hfca = {} 
    for hfca_id in hfca_ids:
        hfca_id = str(hfca_id)
        weighted_ccs_model_agg_by_hfca[hfca_id] = {}
        
        cluster_ids = hfca_id_2_cluster_ids(hfca_id)
        
        for cluster_id in cluster_ids:
            
            cluster_cat = get_cluster_category(cluster_id)
            
            sims_opt_region = get_prevalence_opt_region_sims(best_fits, all_fits, cluster_id)
            
            # assume sample size is always less than the size of the population!
            sample_sims_opt_region = random.sample(sims_opt_region, sample_size) 
            
            for i, (sample_group_key, sample_sim_key) in enumerate(sample_sims_opt_region): 
                      
                sample_cc_trace = calib_data[cluster_cat][sample_group_key][sample_sim_key]
                cc_cluster_weight_factor = get_cc_cluster_weight_factor(cluster_id) # accounting for health seeking behavior data
                cc_cluster_weight_factor = (cluster_2_mean_pop(cluster_id)/(calib_node_pop + 0.0)) * cc_cluster_weight_factor # accounting for real cluster population (mean across all rounds)
                ccs_model_agg, ccs_ref_agg = get_cc_model_ref_traces(sample_cc_trace, cluster_id, cc_cluster_weight_factor)
                ccs_model_agg_unweighted, ccs_ref_agg_unweighted = get_cc_model_ref_traces(sample_cc_trace, cluster_id)
                
                if not cluster_id in weighted_ccs_model_agg_by_hfca[hfca_id]:  
                    weighted_ccs_model_agg_by_hfca[hfca_id][cluster_id] = {
                                                                           'weighted':[],
                                                                           'unweighted':[]
                                                                           }
                    
                weighted_ccs_model_agg_by_hfca[hfca_id][cluster_id]['weighted'].append(ccs_model_agg)
                weighted_ccs_model_agg_by_hfca[hfca_id][cluster_id]['unweighted'].append((sample_group_key,sample_sim_key,ccs_model_agg_unweighted))

    with open(weighted_ccs_by_hfca_id_file_path, 'w') as w_ccs_f:
        json.dump(weighted_ccs_model_agg_by_hfca, w_ccs_f, indent = 3)
        
    debug_p('DONE getting clinical cases samples based on prevalence optimal regions')
    
    debug_p('Saved clinical cases samples based on prevalence optimal regions to ' + weighted_ccs_by_hfca_id_file_path)
    
    return weighted_ccs_model_agg_by_hfca
Beispiel #5
0
def refresh_school_info(
    homepage_response='',
    a_task: utils.Atask = object(),
    # homepage_response='',
    sess=object,
    m_headers={},
    m_cookies={},
    schl_abbr='',
    sql_conn=object,
    # tb_schl_lib_stmp = 'schl_lib_stmp',
    # platform=utils.GBCF.PLATFORM['IGTL'],
) -> dict:
    # info_dict for return
    user_conf_dict = {}
    libid_and_name = {}
    # sql_param example : (schl_abbr, schl_nm,open_time, libid, clssrm_nm, seatmap_json)
    sql_param = []

    # get open time
    # usage_rules_url = 'https://wechat.v2.traceint.com/index.php/center/rule.html'
    usage_rules_url = a_task.BASE_URL['rules']
    html_opentime = utils.get_response(url=usage_rules_url,
                                       sess=sess,
                                       m_headers=m_headers,
                                       m_cookies=m_cookies,
                                       verify_key='使用规则')

    # utils.debug_p('get_opentime=', html_opentime)

    open_time = get_opentime(html_opentime)
    user_conf_dict['open_time'] = open_time

    # url_host = 'https://wechat.v2.traceint.com'
    # path_homepage = '/index.php/reserve/index.html?f=wechat'
    # if not homepage_url:
    #     homepage_url = url_host + path_homepage
    homepage_url = a_task.BASE_URL['home_page']
    if not homepage_response:
        # homepage_response is none
        homepage_response = utils.get_response(url=homepage_url,
                                               sess=sess,
                                               m_headers=m_headers,
                                               m_cookies=m_cookies,
                                               verify_key='')

    # get_name
    user_name, school_name = get_name(homepage_response)
    user_conf_dict['user_name'] = user_name
    user_conf_dict['schl_abbr'] = schl_abbr.lower()
    user_conf_dict['school_name'] = school_name
    # get_classroom clssrm:list[dict{}]   [{'name':classroom_name,'libid':libid, 'path':classroom_path},{}]
    clssrm = get_classroom(homepage_response)
    # entry seat map page
    for i in range(len(clssrm)):
        try:
            time.sleep(0.2)
            # {'name':classroom_name,'libid':libid, 'path':classroom_path}
            cr = clssrm[i]
            libid_and_name[cr['libid']] = cr['classroom_name']
            path_seat_map_page = cr['path']
            # get seat page response
            seat_map_page = utils.get_response(url=a_task.BASE_URL['host'] +
                                               path_seat_map_page,
                                               sess=sess,
                                               m_headers=m_headers,
                                               m_cookies=m_cookies)
            if not seat_map_page:
                utils.debug_p(
                    '[E]: crawldata.py -> refresh_school_info() -> seat_map_page is none'
                )
                return {}
            # parse, seat_map = {seat_num : coordinate, seat_num2 : coordinate2, }
            seat_map = get_seatmap(seat_map_page)
            if not seat_map:
                # get seat_map failed
                continue
            # cr: {'classroom_name':classroom_name,'libid':libid, 'path':classroom_path} + {seat_map:...}
            cr['seat_map'] = seat_map
            clssrm[i] = cr
            # (platform, schl_abbr, schl_nm, open_time, libid, clssrm_nm, seatmap_json)
            sql_param.append(
                (a_task.platform, schl_abbr, school_name, open_time,
                 int(cr['libid']), cr['classroom_name'],
                 json.dumps(cr['seat_map'])))
        except Exception as e:
            utils.debug_p(
                'refresh_school_info has a seat_map_page error; cr[\'classroom_name\']=',
                clssrm[i].get('classroom_name', 'null-classroom_name'),
                traceback.format_exc())

    debug_p('[refresh_school_info]', 'sql_param=', sql_param)
    if len(sql_param) == 0:
        user_conf_dict['classroom'] = []
        return user_conf_dict

    user_conf_dict['classroom'] = clssrm
    # insert/REPLACE into sqlite3
    insert_many_sql = 'REPLACE  INTO ' + utils.SqlAct.tb_schl_lib_stmp + \
                      '(platform, schl_abbr, schl_nm, open_time, libid, clssrm_nm, seatmap_json) ' + \
                      'VALUES(?, ?, ?, ?, ?, ?, ?);'
    cur = sql_conn.cursor()
    cur.executemany(insert_many_sql, sql_param)
    sql_conn.commit()
    #  {user_name:'',schl_abbr:'', school_name:'','open_time':'06:10', classroom:[{'classroom_name':classroom_name,'libid':libid, 'path':classroom_path,'seat_map':''},{},{}...]}
    ## {            ,schl_abbr:'', school_name:'','open_time':'06:10', classroom:[{'classroom_name':classroom_name,'libid':libid,                       'seat_map':''},{},{}...]}
    libid_set = set([str(_[4]) for _ in sql_param])
    user_conf_dict['classroom'] = [
        _ for _ in user_conf_dict['classroom'] if str(_['libid']) in libid_set
    ]
    return user_conf_dict
    def fit(self):
        
        models_list_prime = calib_data_2_models_list(self.calib_data)
                
        best_fits = {}
        all_fits = {}
        #all_fits = {'fit':{'min_residual':float('inf')}, }
        
        all_fits['min_residual'] = float('inf')
        all_fits['max_residual'] = 0.0
        
        
        
        all_fits['models'] = {}
        
        debug_p('category ' + self.category)
        
        for idx,cluster_id in enumerate(c2c(self.category)):
        
            models_list = copy.deepcopy(models_list_prime)
            
            print "Processing cluster " + cluster_id + "."
            debug_p('Processing cluster ' + cluster_id + " in " + self.category + ".")
            
            itn_traj = cluster_2_itn_traj(cluster_id)
            drug_cov = cluster_2_drug_cov(cluster_id)
            
            # prune models to the ones matching prior data
            cluster_models = []
            for model in models_list:
                model_meta = model.get_meta()
                if model_meta['group_key'] == get_sim_group_key(itn_traj, drug_cov):
                    #debug_p('model id before kariba conversion ' + str(model.get_model_id()))
                    group_key = model_meta['group_key']
                    sim_key = model_meta['sim_key']

                    model = KaribaModel(model, self.calib_data[group_key][sim_key], cluster_id, all_fits = self.fit_terms)
                    
                    #model = kariba_model
                    #debug_p('model id after kariba conversion ' + str(model.get_model_id()))
                    cluster_models.append(model)
                
            surv_data = {}
            all_ref_objs_found = True
            for channel_code in objectives_channel_codes:
                if channel_code == 'prevalence':
                    prev_data = c2p(cluster_id)
                    if prev_data:
                        surv_data[channel_code] = prev_data
                    else:
                        msg = 'Prevalence objective reference data was not found!\n Skipping cluster ' + cluster_id + ' fit!'
                        print msg
                        all_ref_objs_found = False
                else:
                    msg = "Channel objective" + channel_code + " not implemented yet!\nSetting objective reference data to None."
                    warn_p(msg)
                    surv_data[channel_code] = None
            
            # one of the reference objective channels was not found; skipping cluster fit!
            if not all_ref_objs_found:
                continue
                        
            ref = d2f(surv_data)
            
            # adjust highest possible fit to account for RDT+ model in dtk not reflecting reality at the upper end
            obj_prev = ref.get_obj_by_name('prevalence')
            d_points = obj_prev.get_points()
            obj_prev.set_points([min(point, rdt_max) for point in d_points])
            
            
            fitting_set = FittingSet(cluster_id, cluster_models, ref)
            
            if load_prevalence_mse:
                fit = Fit(fitting_set, type = 'mmse_distance_cached')
            else:
                fit = Fit(fitting_set)
            
            best_fit_model = fit.best_fit_mmse_distance()
            
            min_residual = fit.get_min_residual()
            max_residual = fit.get_max_residual()
            
            if min_residual  < all_fits['min_residual']:
                all_fits['min_residual'] = min_residual 
                
            if max_residual  > all_fits['max_residual']:
                all_fits['max_residual'] = max_residual
            
            if best_fit_model: 
            
                temp_h, const_h, itn_level, drug_coverage_level = get_model_params(best_fit_model)
                best_fit_meta = best_fit_model.get_meta()
                best_fits[cluster_id] = {}
                best_fits[cluster_id]['habs'] = {}
                best_fits[cluster_id]['habs']['const_h'] = const_h 
                best_fits[cluster_id]['habs']['temp_h'] = temp_h
                best_fits[cluster_id]['ITN_cov'] = itn_level
                best_fits[cluster_id]['category'] = self.category
                best_fits[cluster_id]['MSAT_cov'] = drug_coverage_level
                best_fits[cluster_id]['sim_id'] = best_fit_meta['sim_id']
                best_fits[cluster_id]['sim_key'] = best_fit_meta['sim_key'] 
                best_fits[cluster_id]['group_key'] = best_fit_meta['group_key']
                best_fits[cluster_id]['fit_value'] = best_fit_model.get_fit_val()
                best_fits[cluster_id]['sim_avg_reinfection_rate'] = best_fit_model.get_sim_avg_reinfection_rate()
                best_fits[cluster_id]['ref_avg_reinfection_rate'] = best_fit_model.get_ref_avg_reinfection_rate()
                best_fits[cluster_id]['prevalence'] = best_fit_model.get_objective_by_name('prevalence').get_points()
            
                # redundancy; to be refactored via FitEntry class                
                best_fits[cluster_id]['fit'] = {}
                best_fits[cluster_id]['fit']['value'] = best_fit_model.get_fit_val()
                best_fits[cluster_id]['fit']['temp_h'] = temp_h
                best_fits[cluster_id]['fit']['const_h'] = const_h
                best_fits[cluster_id]['fit']['ITN_cov'] = itn_level
                best_fits[cluster_id]['fit']['MSAT_cov'] = drug_coverage_level
                best_fits[cluster_id]['fit']['sim_id'] = best_fit_meta['sim_id']
                best_fits[cluster_id]['fit']['sim_key'] = best_fit_meta['sim_key']
                
                
                best_fits[cluster_id]['mse'] = {}
                best_fits[cluster_id]['mse']['value'] = fit.get_min_mses()['prevalence']['value'] # get mmse for objective prevalence
                best_fit_mse_model = fit.get_min_mses()['prevalence']['model']
                temp_h, const_h, itn_level, drug_coverage_level = get_model_params(best_fit_mse_model)
                model_meta_data = best_fit_mse_model.get_meta()
                best_fits[cluster_id]['mse']['temp_h'] = temp_h
                best_fits[cluster_id]['mse']['const_h'] = const_h
                best_fits[cluster_id]['mse']['ITN_cov'] = itn_level
                best_fits[cluster_id]['mse']['MSAT_cov'] = drug_coverage_level
                best_fits[cluster_id]['mse']['sim_id'] = model_meta_data['sim_id']
                best_fits[cluster_id]['mse']['sim_key'] = model_meta_data['sim_key']
                
                best_fits[cluster_id]['cc_penalty'] = {}
                best_fits[cluster_id]['cc_penalty']['value'] = fit.get_min_penalties()['prevalence']['value'] # get clinical penalty for objective prevalence; at present this is just the clinical cases penalty; if reinfection is considered the code needs to be adjusted
                best_fit_cc_penalty_model = fit.get_min_penalties()['prevalence']['model']
                temp_h, const_h, itn_level, drug_coverage_level = get_model_params(best_fit_cc_penalty_model)
                model_meta_data = best_fit_cc_penalty_model.get_meta()
                best_fits[cluster_id]['cc_penalty']['temp_h'] = temp_h
                best_fits[cluster_id]['cc_penalty']['const_h'] = const_h
                best_fits[cluster_id]['cc_penalty']['ITN_cov'] = itn_level
                best_fits[cluster_id]['cc_penalty']['MSAT_cov'] = drug_coverage_level
                best_fits[cluster_id]['cc_penalty']['sim_id'] = model_meta_data['sim_id']
                best_fits[cluster_id]['cc_penalty']['sim_key'] = model_meta_data['sim_key']
                  
    
                rho = best_fit_model.get_rho()
                p_val = best_fit_model.get_p_val()
                
                if rho and p_val :
                    best_fits[cluster_id]['rho'] = rho
                    best_fits[cluster_id]['p_val'] = p_val
                    
                    debug_p('rho' + str(rho))
                    debug_p('p_val' + str(p_val)) 
                
                
            else:
                msg = "something went wrong and the best fit for " + cluster_id + " could not be found."
                warn_p(msg)
                
            
            all_fits['models'][cluster_id] = cluster_models
            #all_fits['models'][cluster_id] = fit.get_fitting_set_models()
            
            print str(idx+1) + " clusters have been processed."
            debug_p( str(idx+1) + " clusters have been processed in category " + self.category)
            
            '''
            if idx > 0:
                break 
            '''      
        return best_fits, all_fits
Beispiel #7
0
    def best_fit_mmse_distance(self, cached = False):
        
        models_list = self.fitting_set.get_models_list()
        
        ref = self.fitting_set.get_ref()
        
        
        min_distance = float('inf')
        min_mses = {}
        min_penalties = {}
        
        
        # assume all models have the same objectives and initialize min obj penalty to inf
        # initialize model minimizer to first model in list
        for obj in models_list[0].get_objectives():
            min_penalties[obj.get_name()] = {} 
            min_penalties[obj.get_name()]['value'] = float('inf')
            min_penalties[obj.get_name()]['model'] = models_list[0]
             
            min_mses[obj.get_name()] = {}
            min_mses[obj.get_name()]['value'] = float('inf')
            min_mses[obj.get_name()]['model'] = models_list[0]
            

        best_fit = None
        
        debug_p(len(models_list))
        debug_p('======================================================================================')
        for model in models_list:
            
            distance = None
            mse = 0.0
            for obj in model.get_objectives():
                m_points = obj.get_points()     # model values for this obj
                d_points = ref.get_obj_by_name(obj.get_name()).get_points()  # reference data values for this obj
                
                #debug_p('m_points len: ' + str(len(m_points)))
                #debug_p('d_points len: ' + str(len(d_points)))
                
                points_weights = obj.get_points_weights()
        
                if not cached:
                    mse = self.mse(m_points, d_points, points_weights)
                else:
                    mse = model.get_cached_mse()
                
                if not mse == None:
                    
                    if mse <= min_mses[obj.get_name()]['value']:
                       min_mses[obj.get_name()]['value'] = mse
                       min_mses[obj.get_name()]['model'] = model
                       
                    if obj.get_model_penalty() <= min_penalties[obj.get_name()]['value']:
                       min_penalties[obj.get_name()]['value'] = obj.get_model_penalty()
                       min_penalties[obj.get_name()]['model'] = model  
                    
                    if distance == None:
                        #debug_p('obj weight: ' + str(obj.get_weight()))
                        #debug_p('obj model penalty: ' + str(obj.get_model_penalty()))
                        distance  = obj.get_weight()*(mse + obj.get_model_penalty())
                    else:
                        #debug_p('obj weight: ' + str(obj.get_weight()))
                        #debug_p('distance ' + str(distance))
                        distance  = distance + obj.get_weight()*(mse + obj.get_model_penalty())
            
            model.set_mse(mse)
            
            if distance:
                
                # a bit redundant since we also find min_distance; will need to adjust
                if distance < self.min_residual:
                    self.min_residual = distance
                    
                if distance > self.max_residual:
                    self.max_residual = distance
                    
                if distance <= min_distance:
                   
                   debug_p('current best distance ' + str(min_distance))
                   if best_fit:
                       #debug_p('current best fit model ' + str(best_fit.get_model_id()))
                       debug_p('current best fit model mmse ' + str(best_fit.get_fit_val()))
                   #debug_p('model ' + str(model.get_model_id()))
                   debug_p('improving fit ' + str(distance))
                   debug_p('fit difference ' + str(distance - min_distance))
                   
                   min_distance = distance
                   model.set_fit_val(min_distance)
                   best_fit = model
                else:
                   model.set_fit_val(distance)
        
        
        #debug_p('best_fit ' + str(best_fit))
        
        self.min_mses = min_mses
        self.min_penalties = min_penalties
        
        return best_fit
    def plot_calib_prev_traces(self):
        
        count = 0
        for cluster_id, cluster_record in self.best_fits.iteritems():
            
            debug_p('Plotting prevalence trace for cluster ' + cluster_id + ' in category ' + self.category)
        
            #fig = plt.figure(cluster, figsize=(11, 4), dpi=100, facecolor='white')
            fig = plt.figure(cluster_id, figsize=(9.2, 4), dpi=100, facecolor='white')
            
            gs = gridspec.GridSpec(1, 2)
            ymax = 16
        
            scale_int = np.array(range(0,ymax+1))
            pal = cm = plt.get_cmap('jet') 
            cNorm  = colors.Normalize(vmin=0, vmax=ymax+1)
            scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=pal)
                         
            ax = plt.subplot(gs[0:2])

            ax.set_xlim(2150,3010)    
            
            ref = self.get_ref(cluster_id)
            
            if not ref:
                # no reference object could be constructed; no data found; return w/o plotting
                return
            
            
            with open(os.path.join(sim_data_dir,weighted_ccs_by_hfca_id_file), 'r') as wccs_f:
                cluster_ccs_samples = json.load(wccs_f)
            
            hfca_id = cluster_id.split('_')[0]
            
            count_labels = 0
            for (group_key, sim_key, ccs) in cluster_ccs_samples[hfca_id][cluster_id]['unweighted']:
                 prev_trace = self.calib_data[group_key][sim_key]['prevalence']
                 if count_labels == 0:
                     ax.plot(range(2150, 3000), prev_trace[2149:2999], alpha=0.35, linewidth=0.5, color = 'magenta', label = 'Opt 5-percentile samples for cluster ' + cluster_id)
                     count_labels += 1
                 else:
                    ax.plot(range(2150, 3000), prev_trace[2149:2999], alpha=0.35, linewidth=0.5, color = 'magenta', marker = None)



            opt_sim_key = cluster_record['sim_key']
            opt_group_key = cluster_record['group_key']
            
            opt_const_h = cluster_record['habs']['const_h']
            opt_x_temp_h = cluster_record['habs']['temp_h']
            opt_itn = cluster_record['ITN_cov']
            opt_drug = cluster_record['MSAT_cov']
            opt_fit_value = cluster_record['fit_value']
            
            opt_prev_trace = self.calib_data[opt_group_key][opt_sim_key]['prevalence']
            
            ax.plot(range(2150, 3000), opt_prev_trace[2149:2999], alpha=1, linewidth=2.0, c = 'black', label = 'Best fit: prevalence + clinical cases: eff. constant=' + str(opt_const_h*opt_x_temp_h) + ', all='+str(opt_x_temp_h) + ', drug cov.' + str(opt_drug) + ', ITN dist. = '+str(opt_itn))
            
            
            sim_key = cluster_record['mse']['sim_key']
            # avoid doing that and add group_key to corresponding terms in best_fits
            group_key = sim_key_2_group_key(sim_key)

            const_h = cluster_record['mse']['const_h']
            x_temp_h = cluster_record['mse']['temp_h']
            itn = cluster_record['mse']['ITN_cov']
            drug = cluster_record['mse']['MSAT_cov']
            
            prev_trace_by_prev = self.calib_data[group_key][sim_key]['prevalence']
            
            ax.plot(range(2150, 3000), prev_trace_by_prev[2149:2999], alpha=1, linewidth=2.0, c = 'magenta', label = 'Best fit: prevalence: eff. constant=' + str(const_h*x_temp_h) + ', all='+str(x_temp_h) + ', drug cov.' + str(drug) + ', ITN dist. = '+str(itn))
            
            
            sim_key = cluster_record['cc_penalty']['sim_key']
            group_key = sim_key_2_group_key(sim_key)
            
            const_h = cluster_record['cc_penalty']['const_h']
            x_temp_h = cluster_record['cc_penalty']['temp_h']
            itn = cluster_record['cc_penalty']['ITN_cov']
            drug = cluster_record['cc_penalty']['MSAT_cov']
            
            prev_trace_by_cc = self.calib_data[group_key][sim_key]['prevalence']
            
            ax.plot(range(2150, 3000), prev_trace_by_cc[2149:2999], alpha=1, linewidth=2.0, c = 'blue', label = 'Best fit: clinical cases: eff. constant=' + str(const_h*x_temp_h) + ', all='+str(x_temp_h) + ', drug cov.' + str(drug) + ', ITN dist. = '+str(itn))

            
            obs_prevs = ref.to_dict()['prevalence']['d_points']
            
            label_obs_prev_shown = False
            label_sim_prev_shown = False
            max_obs_prev = 0.0


            for i,prev in enumerate(obs_prevs):
                
                if prev != 'nan':
                    if max_obs_prev < prev:
                        max_obs_prev = prev
                        
                    if not label_obs_prev_shown:
                        label_obs_prev = 'Observed prevalence'
                        label_obs_prev_shown = True
                    else: 
                        label_obs_prev = None
                         
                    ax.scatter(channels_sample_points['prevalence'][i], prev, c = 'red', facecolor = 'red', marker='o', s = 40, label = label_obs_prev, zorder=200)
                    
                if not label_sim_prev_shown:
                    label_sim_prev = 'Best fit simulated prevalence at rnds.'
                    label_sim_prev_shown = True
                else: 
                    label_sim_prev = None
                
                ax.scatter(channels_sample_points['prevalence'][i], opt_prev_trace[channels_sample_points['prevalence'][i]], c = 'black', facecolor = 'none', marker='o', s = 60, label = label_sim_prev, zorder=150)
                
                
            '''
            count_traces = 0 
            for sim_key,fit_entry in self.all_fits[cluster_id].iteritems():

                if sim_key == 'min_terms' or sim_key == 'max_terms':
                    continue
                
                group_key = fit_entry['group_key']
                fit_val = fit_entry['fit_val']
            
                const_h = fit_entry['const_h']  
                x_temp_h = fit_entry['x_temp_h']
            
                #if sim_key == opt_sim_key or fit_val > opt_fit_value + opt_fit_value*subopt_plots_threshold or count_traces > 10:
                #if fit_entry['fit_terms']['mse'] <= satp(z, sample_size_percentile):
                #do not plot optimal traces since we've already plotted it ;also do not plot too many suboptimal traces
                #    continue
                
                prev_trace = self.calib_data[group_key][sim_key]['prevalence']
                marker = self.get_marker(sim_key, count_traces)
                #ax.plot(range(2180, 3000), prev_trace[2179:2999], alpha=0.75, linewidth=0.5,  marker = marker, markersize = 0.5*opt_marker_size, label = 'eff. constant=' + str(const_h*x_temp_h) + ', all='+str(x_temp_h))
                #ax.plot(range(2180, 3000), prev_trace[2179:2999], alpha=0.75, linewidth=0.5,  marker = marker, markersize = 0.5*opt_marker_size)
                ax.plot(range(2150, 3000), prev_trace[2149:2999], alpha=0.75, linewidth=0.5)
                
                
                for i,prev in enumerate(obs_prevs):                    
                    ax.scatter(channels_sample_points['prevalence'][i], prev_trace[channels_sample_points['prevalence'][i]], marker = marker, c = 'black', facecolor = 'none', s = 30)
                
                count_traces = count_traces + 1 
            '''
            
            
            ax.set_ylim(0,min(max(max_obs_prev, max(opt_prev_trace))+0.1,1))            
            plt.xlabel('Time (days)', fontsize=8)
            plt.ylabel('Prevalence (population fraction)', fontsize=8)
            plt.legend(loc=1, fontsize=8)
            plt.title('Prevalence timeseries', fontsize = 8, fontweight = 'bold', color = 'black')
            plt.gca().tick_params(axis='x', labelsize=8)
            plt.gca().tick_params(axis='y', labelsize=8)
        
            plt.tight_layout()
            output_plot_file_path = os.path.join(self.root_sweep_dir, traces_plots_dir, traces_base_file_name + cluster_id + '.png')
            plt.savefig(output_plot_file_path, dpi = 300, format='png')
            plt.close()
    
            count = count + 1
    def plot_weighted_cc_per_hfca(self, weighted_ccs_model_agg_by_hfca, ccs_model_agg_by_hfca_cluster_id):
        
        
        clusters_processed = 0
        for hfca_id, weighted_ccs_combos in weighted_ccs_model_agg_by_hfca.iteritems():
        
            weighted_ccs_by_bin = {}
            for i in range(0, cc_num_fold_bins):
                weighted_ccs_by_bin[i] = []
                
            
            for weighted_ccs_combo in weighted_ccs_combos:
                sum_weighted_ccs = cc_num_fold_bins * [0]

                for weighted_ccs in weighted_ccs_combo:
                    sum_weighted_ccs = np.add(sum_weighted_ccs, weighted_ccs)
                    
                for i in range(0, cc_num_fold_bins):
                    weighted_ccs_by_bin[i].append(sum_weighted_ccs[i])
        
        
            per_bottom = []
            per_top = []
            per_median = []
            
            for i in range(0, cc_num_fold_bins):
                weighted_ccs_by_bin_idx = weighted_ccs_by_bin[i]
                per_bottom.append( satp(weighted_ccs_by_bin_idx, 2.5) )
                per_top.append( satp(weighted_ccs_by_bin_idx, 97.5) )
                per_median.append( satp(weighted_ccs_by_bin_idx, 50) )
                
            '''
            debug_p('length of weighted ccs_combos array ' + str(len(weighted_ccs_combos)))
            '''
            debug_p('length of bin 0 in weighted_ccs_by_bin ' + str(len(weighted_ccs_by_bin[0])))
           
            
            for cluster_id in hfca_id_2_cluster_ids(hfca_id):
                
                fig = plt.figure(cluster_id, figsize=(9.2, 4), dpi=100, facecolor='white')
                gs = gridspec.GridSpec(1, 4)
                     
                ax = plt.subplot(gs[0:4])

                x_smooth = np.linspace(0, cc_num_fold_bins-1,60)
                
                per_bottom_smooth = spline(range(0, cc_num_fold_bins),per_bottom,x_smooth)
                per_top_smooth = spline(range(0, cc_num_fold_bins),per_top,x_smooth)
                per_median_smooth = spline(range(0, cc_num_fold_bins),per_median,x_smooth)
                
                ax.plot(x_smooth, per_bottom_smooth, alpha=1, linewidth=0.5, color = 'black', linestyle=':', label = '2.5 percentile HS weighted: prevalence space samples', marker = None)
                ax.plot(x_smooth, per_top_smooth, alpha=1, linewidth=0.5, color = 'black', linestyle=':', label = '97.5 percentile HS weighted: prevalence space samples', marker = None)
                ax.plot(x_smooth, per_median_smooth, alpha=1, linewidth=2.0, color = 'magenta', linestyle='-', label = 'median HS weighted: prevalence space samples', marker = None)
                ax.fill_between(x_smooth, per_bottom_smooth, per_top_smooth, facecolor='gray', alpha=0.5, interpolate=True)
                
                cluster_cat = get_cluster_category(cluster_id)
                
                opt_group_key = self.best_fits[cluster_id]['group_key']
                
                opt_sim_key_cc = self.best_fits[cluster_id]['cc_penalty']['sim_key']
                cc_trace_opt_cc = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_cc]
                
                opt_sim_key_prev = self.best_fits[cluster_id]['mse']['sim_key']
                cc_trace_opt_prev = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_prev]
                
                opt_sim_key_fit = self.best_fits[cluster_id]['fit']['sim_key']
                cc_trace_opt_fit = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_fit]
            
                ccs_model_agg_cc, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_cc, cluster_id)
                ccs_model_agg_prev, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_prev, cluster_id)
                ccs_model_agg_fit, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_fit, cluster_id)
                
                facility = hfca_id_2_facility(hfca_id)
                ax.plot(range(0, len(ccs_model_agg_cc)), ccs_model_agg_cc, alpha=1, linewidth=1, color = 'blue', label = 'Best fit: clinical cases', marker = 's')
                ax.plot(range(0, len(ccs_model_agg_prev)), ccs_model_agg_prev, alpha=1, linewidth=1, color = 'magenta', label = 'Best fit: prevalence', marker = 'o')
                ax.plot(range(0, len(ccs_model_agg_fit)), ccs_model_agg_fit, alpha=1, linewidth=1, color = 'black', label = 'Best fit: prevalence + clinical cases', marker = '*')
                ax.plot(range(0, len(ccs_ref_agg)), ccs_ref_agg, alpha=1, linewidth=2.0, linestyle = '-', color = 'red', label = 'Observed in ' + facility, marker = None)    
                
                for i,sample_ccs in enumerate(ccs_model_agg_by_hfca_cluster_id[hfca_id][cluster_id]['unweighted']):
                      
                    if i == 0:
                        ax.plot(range(0, cc_num_fold_bins), sample_ccs[2], alpha=0.5, linewidth=0.5, color = 'magenta', label = 'Opt 5-percentile samples for cluster ' + cluster_id, marker = None)
                        #ax.plot(range(0, cc_num_fold_bins), sample_ccs[2])
                    else:
                        ax.plot(range(0, cc_num_fold_bins), sample_ccs[2], alpha=0.5, linewidth=0.5, color = 'magenta', marker = None)
        
                plt.xlabel('6-week bins', fontsize=8)
                plt.ylabel('Clinical cases', fontsize=8)
                legend = plt.legend(loc=1, fontsize=8)
            
                
                plt.xlim(0,8)
                plt.title('Clinical cases timeseries', fontsize = 8, fontweight = 'bold', color = 'black')
                plt.gca().tick_params(axis='x', labelsize=8)
                plt.gca().tick_params(axis='y', labelsize=8)
                plt.tight_layout()
                output_plot_file_path = os.path.join(self.root_sweep_dir, weighted_cc_traces_plots_dir, weighted_cc_traces_base_file_name + cluster_id + '.png')
                plt.savefig(output_plot_file_path, dpi = 300, format='png')
                plt.close()
                
                clusters_processed = clusters_processed + 1
                
                debug_p('Processed weighting and plotting clinical cases for ' + str(clusters_processed) + ' clusters') 
    def plot_calib_cc_traces_clusters(self):
        
        for cluster_id, cluster_record in self.best_fits.iteritems():
            
            debug_p('Plotting clinical cases trace for cluster ' + cluster_id + ' in category ' + self.category)
            
            fig = plt.figure(cluster_id, figsize=(9.2, 4), dpi=100, facecolor='white')
            
            opt_sim_key = cluster_record['sim_key']
            opt_group_key = cluster_record['group_key']
            
            opt_cc_trace = self.calib_data[opt_group_key][opt_sim_key]['cc']
            
            ccs_model_agg, ccs_ref_agg = get_cc_model_ref_traces(opt_cc_trace, cluster_id)
            
            #debug_p('model length ' + str(len(ccs_model_agg)))
            #debug_p('ref length ' + str(len(ccs_ref_agg)))
            
            hfca_id = cluster_id.split('_')[0]
            
            facility = hfca_id_2_facility(hfca_id)
            
            gs = gridspec.GridSpec(1, 4)
            ymax = 16
        
            scale_int = np.array(range(0,ymax+1))
            pal = cm = plt.get_cmap('jet') 
            cNorm  = colors.Normalize(vmin=0, vmax=ymax+1)
            scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=pal)
                         
            ax = plt.subplot(gs[0:4])
            
            #ax.set_ylim(1000)
            
            opt_const_h = cluster_record['habs']['const_h']
            opt_x_temp_h = cluster_record['habs']['temp_h']
            opt_itn = cluster_record['ITN_cov']
            opt_drug = cluster_record['MSAT_cov']
            opt_fit_value = cluster_record['fit_value']
            
            # the following code only relevant for rank correlation cc penalty fit
            opt_rho = None
            opt_p_val = None
            if 'rho' in cluster_record:
                opt_rho = cluster_record['rho']
            if 'p_val' in cluster_record:
                opt_p_val = cluster_record['p_val']
            
            '''
            mod_dates, mod_cases = zip(*ccs_model_agg)
            ref_dates, ref_cases = zip(*ccs_ref_agg)
            '''
                
            if opt_rho and opt_p_val:
                ax.plot(range(0, len(ccs_model_agg)), ccs_model_agg, alpha=1, linewidth=2.0, c = 'black', label = 'Best fit: eff. constant=' + str(opt_const_h*opt_x_temp_h) + ', all='+str(opt_x_temp_h) + ', drug cov.' + str(opt_drug) + ', ITN dist. = '+str(opt_itn) + ', rho=' + str(opt_rho) + ', p-val=' + str(opt_p_val), marker = opt_marker, markersize = opt_marker_size)
            else:
                ax.plot(range(0, len(ccs_model_agg)), ccs_model_agg, alpha=1, linewidth=2.0, c = 'black', label = 'Best fit: eff. constant=' + str(opt_const_h*opt_x_temp_h) + ', all='+str(opt_x_temp_h) + ', drug cov.' + str(opt_drug) + ', ITN dist. = '+str(opt_itn), marker = opt_marker, markersize = opt_marker_size)
            ax.plot(range(0, len(ccs_ref_agg)), ccs_ref_agg, alpha=1, linewidth=2.0, c = 'red', label = 'Observed in ' + facility)    
                
            '''
            if opt_rho and opt_p_val:
                ax.plot(mod_dates, mod_cases, alpha=1, linewidth=2.0, c = 'black', label = 'Best fit: eff. constant=' + str(opt_const_h*opt_x_temp_h) + ', all='+str(opt_x_temp_h) + ', drug cov.' + str(opt_drug) + ', ITN dist. = '+str(opt_itn) + ', rho=' + str(opt_rho) + ', p-val=' + str(opt_p_val), marker = opt_marker, markersize = opt_marker_size)
            else:
                ax.plot(mod_dates, mod_cases, alpha=1, linewidth=2.0, c = 'black', label = 'Best fit: eff. constant=' + str(opt_const_h*opt_x_temp_h) + ', all='+str(opt_x_temp_h) + ', drug cov.' + str(opt_drug) + ', ITN dist. = '+str(opt_itn), marker = opt_marker, markersize = opt_marker_size)
            ax.bar(ref_dates, ref_cases, width=12,color='red',edgecolor='red', linewidth=0, label = 'Observed in ' + facility)
            #ax.plot(dates, ccs_ref_agg, alpha=1, linewidth=2.0, c = 'red', label = 'Observed in ' + facility)
            '''
            count_traces = 0 
            for sim_key, fit_entry in self.all_fits[cluster_id].iteritems():
                
                if sim_key == 'min_terms' or sim_key == 'max_terms':
                    continue
                
                group_key = fit_entry['group_key']
                fit_val = fit_entry['fit_val']
            
                if sim_key == opt_sim_key or fit_val > opt_fit_value + opt_fit_value*subopt_plots_threshold or count_traces > 10: 
                # do not plot optimal traces since we've already plotted it ;also do not plot too suboptimal traces
                    continue
                
                cc_trace = self.calib_data[group_key][sim_key]['cc']
            
                ccs_model_agg, ccs_ref_agg = get_cc_model_ref_traces(cc_trace, cluster_id)
                
                # the following code only relevant for rank correlation cc penalty fit
                rho = None
                p_val = None
                if 'rho' in fit_entry:
                    rho = fit_entry['rho']
                if 'p_val' in fit_entry:
                    p_val = fit_entry['p_val']
                    
                
                const_h = fit_entry['const_h']
                x_temp_h = fit_entry['x_temp_h']
                itn = fit_entry['itn_level']
                drug = fit_entry['drug_cov']
                
                '''
                mod_dates, mod_cases = zip(*ccs_model_agg)
                ref_dates, ref_cases = zip(*ccs_ref_agg)
                '''
                
                marker = self.get_marker(sim_key, count_traces)
                
                if rho and p_val:
                    #ax.plot(range(0, len(ccs_model_agg)), ccs_model_agg, alpha=0.75, linewidth=0.5,  marker = marker, markersize = 0.5*opt_marker_size, label = 'eff. constant=' + str(const_h*x_temp_h) + ', all='+str(x_temp_h) + 'rho=' + str(rho) + ', p-val=' + str(p_val))
                    #ax.plot(range(0, len(ccs_model_agg)), ccs_model_agg, alpha=0.75, linewidth=0.5,  marker = marker, markersize = 0.5*opt_marker_size)
                    ax.plot(range(0, len(ccs_model_agg)), ccs_model_agg, alpha=0.75, linewidth=0.5)
                else:
                    #ax.plot(range(0, len(ccs_model_agg)), ccs_model_agg, alpha=0.75, linewidth=0.5, marker = marker, markersize = 0.5*opt_marker_size, label = 'eff. constant=' + str(const_h*x_temp_h) + ', all='+str(x_temp_h))
                    ax.plot(range(0, len(ccs_model_agg)), ccs_model_agg, alpha=0.75, linewidth=0.5)
                #ax.plot(range(0, len(ccs_ref_agg)), ccs_ref_agg, alpha=1, linewidth=1.0, c = 'red', label = 'Observed in ' + facility)
                
                count_traces = count_traces + 1    
                
                '''    
                if rho and p_val:
                    ax.plot(mod_dates, mod_cases, alpha=0.75, linewidth=2.0, marker = marker, label = 'eff. constant=' + str(const_h*x_temp_h) + ', all='+str(x_temp_h) + 'rho=' + str(rho) + ', p-val=' + str(p_val))
                else:
                    ax.plot(mod_dates, mod_cases, alpha=0.75, linewidth=2.0, marker = marker, label = 'eff. constant=' + str(const_h*x_temp_h) + ', all='+str(x_temp_h)) 
                ax.bar(ref_dates, ref_cases, width=12,color='red',edgecolor='red', linewidth=0, label = 'Observed in ' + facility)
                '''
            
            plt.xlabel('6-week bins', fontsize=8)
            plt.ylabel('Clinical cases', fontsize=8)
            legend = plt.legend(loc=1, fontsize=8)
            
            '''
            init_font_size = 8
            for i,label in enumerate(legend.get_texts()):
                if i > 2:
                    label.set_fontsize(max(init_font_size - i, 5))
            '''

            plt.title('Clinical cases timeseries', fontsize = 8, fontweight = 'bold', color = 'black')
            plt.gca().tick_params(axis='x', labelsize=8)
            plt.gca().tick_params(axis='y', labelsize=8)
            plt.tight_layout()
            output_plot_file_path = os.path.join(self.root_sweep_dir, cc_traces_plots_dir, cc_traces_base_file_name + cluster_id + '.png')
            plt.savefig(output_plot_file_path, dpi = 300, format='png')
            plt.close()
    def plot_calib_err_surfaces(self, err_surface_types):
        
        count = 0
        
        min_residual = self.residuals['min']
        max_residual = self.residuals['max']
        
        for cluster_id, cluster_record in self.best_fits.iteritems():
            
            debug_p('Plotting error surface for cluster ' + cluster_id + ' in category ' + self.category)
        
            fig_width = len(err_surface_types)*4.35
            fig = plt.figure(cluster_id, figsize=(fig_width, 4), dpi=300, facecolor='white')
            #debug_p('error surface types length' + str(len(err_surface_types)))
            #debug_p('fig width' + str(fig_width))
            
            gs = gridspec.GridSpec(1, 3)
         
            error_points = {}
            
            title_ITN = 'ITN distribution: '
            title_drug_coverage = 'drug coverage: '
            
            opt_fit = {}
            
            opt_sim_key = cluster_record['sim_key']
            opt_group_key = cluster_record['group_key']
            opt_const_h = cluster_record['habs']['const_h']
            opt_x_temp_h = cluster_record['habs']['temp_h']
            
            for err_surface_type in err_surface_types:    
                opt_fit[err_surface_type] = {} 
                opt_fit[err_surface_type]['const_h'] = cluster_record[err_surface_type]['const_h']
                opt_fit[err_surface_type]['temp_h'] = cluster_record[err_surface_type]['temp_h']
                opt_fit[err_surface_type]['value'] = cluster_record[err_surface_type]['value']
                
                if err_surface_type == 'cc_penalty':
                    opt_fit[err_surface_type]['value'] = opt_fit[err_surface_type]['value']*(math.pow(cc_weight, -1)) # opt_fit of penalties contains the weighted value; hence we reverse the weighting
                    
                 
            opt_neigh_fits = []
            
            for sim_key,fit_entry in self.all_fits[cluster_id].iteritems():
        
                if sim_key == 'min_terms' or sim_key == 'max_terms':
                    continue
        
                x_temp_h = fit_entry['x_temp_h']
                const_h = fit_entry['const_h']
                fit_val = fit_entry['fit_val']
                mse = fit_entry['fit_terms']['mse']
                cc_penalty = get_cc_penalty(fit_entry)

                itn_level = fit_entry['itn_level']
                drug_coverage_level = fit_entry['drug_cov']
                group_key = fit_entry['group_key']    
                
                    
                if group_key not in error_points:
                    error_points[group_key] = {
                                                   'x_temp_h':[],
                                                   'const_h':[],
                                                   'fit':[],
                                                   'cc_penalty':[],
                                                   'mse':[],
                                                   'title': title_ITN + itn_level + "; " + title_drug_coverage + str(drug_coverage_level),
                                                   'itn_level':itn_level,
                                                   'drug_coverage':drug_coverage_level
                                                }
                    
                error_points[group_key]['x_temp_h'].append(x_temp_h)
                error_points[group_key]['const_h'].append(const_h)
                error_points[group_key]['fit'].append(fit_val)
                error_points[group_key]['mse'].append(mse)
                error_points[group_key]['cc_penalty'].append(cc_penalty)
                       
                          
            ymax = 10
            
            scale_int = np.array(range(1,10))
            pal = cm = plt.get_cmap('nipy_spectral') 
            cNorm  = colors.Normalize(vmin=0, vmax=ymax)
            #scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=pal)
            scalarMap = b2mpl.get_map('Spectral', 'Diverging', 5).mpl_colors
            lgds = []
            for i,(err_surface_type, err_surface_style) in enumerate(err_surface_types.iteritems()):
            
                for j,group_key in enumerate(error_points.keys()):
                    
                    itn_level = error_points[group_key]['itn_level']
                    drug_coverage = error_points[group_key]['drug_coverage']
                    
                    # currently assume opt_group_key is the same for all err_surface_types
                    if group_key == opt_group_key: 
            
                        #debug_p('plot at position (0, ' + str(i) + ') in grid')
                        plt.subplot(gs[0,i])
                        x = error_points[group_key]['x_temp_h']
                        y = error_points[group_key]['const_h']
                        z = error_points[group_key][err_surface_type]
                        #print len(z)
                        res = 125
                        ttl = err_surface_style['title']
                        
                        min_x = np.min(x)
                        min_y = np.min(y)
                        min_z = np.min(z)
                        
                        max_x = np.max(x)
                        max_y = np.max(y)
                        max_z = np.max(z)
                        
                        
                    
                        #f = interpolate.interp2d(x, y, z)
                    
                        xi = np.linspace(min_x, max_x , res)
                        yi = np.linspace(min_y, max_y , res)
                        
                        zi = griddata(x,y,z,xi,yi)
                        
                        #xig, yig = np.meshgrid(xi, yi)
                        #zig = f(xi,yi)
    
                        #rbf = Rbf(x, y, z, epsilon=2)
                        #zig = rbf(xig, yig)
                    
                        blevels = self.get_colorbar_ticks(min_z, max_z, z)
                        num_colors = len(blevels)-1
                        from matplotlib.colors import BoundaryNorm
                        
                        cmap2 = self.custom_cmap(num_colors, mincol='DarkBlue', midcol='CornflowerBlue', maxcol='w')
                        cmap2.set_over('0.7') # light gray
                        
                        bnorm = BoundaryNorm(blevels, ncolors = num_colors, clip = False)
                    
                        #rmse_pl = plt.contourf(xi,yi,zi,15,cmap=plt.cm.hot)
                        #rmse_pl = plt.pcolor(xi, yi, zi, cmap=plt.get_cmap('RdYlGn_r'), vmin=0.475, vmax=0.8)
                        #rmse_pl = plt.pcolor(xi, yi, zi, cmap=plt.get_cmap('RdYlGn_r'))
                        #rmse_pl = plt.pcolor(xi, yi, zi, cmap=plt.get_cmap('cool'), vmin = min_residual, vmax = max_residual)
                        #rmse_pl = plt.contourf(xi, yi, zi, cmap=plt.get_cmap('cool'), vmin = min_z, vmax = max_z, levels = blevels, norm = bnorm, extend = 'both')
                        rmse_pl = plt.contourf(xi, yi, zi, cmap=cmap2, vmin = min_z, vmax = max_z, levels = blevels, norm = bnorm, extend = 'both')
                        #rmse_pl = plt.contourf(xi,yi,zi,15,cmap=plt.get_cmap('paired'))
                        #rmse_pl.cmap.set_over('black')
                        #rmse_pl.cmap.set_under('grey')
                        
                        max_blevel_in_sample = 0
                        for blevel in blevels:
                            if blevel <= satp(z, sample_size_percentile) and blevel > max_blevel_in_sample:
                               max_blevel_in_sample = blevel 
                                
                        pc = plt.contour(xi,yi,zi, levels=[max_blevel_in_sample], colors='r', linewidth=0, alpha=0.5)
                        
                        b_per_s = pc.collections[0].get_paths()
                        count_labels = 0
                        for per in range(len(b_per_s)):
                            b_per_s_x = b_per_s[per].vertices[:,0]
                            b_per_s_y = b_per_s[per].vertices[:,1]
                            if count_labels == 0:
                                plt.fill(b_per_s_x,b_per_s_y, 'magenta', linestyle='solid', alpha=0.3, label = 'Opt 5-percentile: ' + err_surface_style['title'])
                                count_labels = count_labels + 1
                            else:
                                plt.fill(b_per_s_x,b_per_s_y, 'magenta', linestyle='solid', alpha=0.3)
                            
                        
                        cb = plt.colorbar(rmse_pl, ticks = blevels, spacing='uniform')
    
                        cb.set_label(ttl + ' residual', fontsize=8)
                        cb.ax.tick_params(labelsize=8)    
                        #plt.scatter(x, y, 10, z, cmap=cmap2,  vmin = min_z, vmax = max_z, norm = bnorm)
                        
                        level1_opt_neighs_label = False
                        level2_opt_neighs_label = False
                        level3_opt_neighs_label = False
                        # plot all optimal markers on each surface
                
                        for (opt_err_surface_type, opt_err_surface_style) in err_surface_types.iteritems():
                            plt.scatter(opt_fit[opt_err_surface_type]['temp_h'], opt_fit[opt_err_surface_type]['const_h'], c = 'red', marker = opt_err_surface_style['marker'], s = 60, facecolor='none', edgecolor='black', zorder=100, label= opt_err_surface_style['title'] + ' best fit')
                        
                        '''
                        for k,fit_val in enumerate(z):

                                if fit_val < opt_fit[err_surface_type]['value'] + opt_fit[err_surface_type]['value']*subopt_plots_threshold:
                                    
                                    if not level1_opt_neighs_label:
                                        label = '< opt + 0.1opt'
                                        level1_opt_neighs_label = True
                                    else:
                                        label = None
                                    
                                    #plt.scatter(x[k], y[k], 10, fit_val, marker = 'd',  linewidth = 0.75, color = 'green', label = label)
                                    
                                    
                                elif fit_val < opt_fit[err_surface_type]['value'] + 2*opt_fit[err_surface_type]['value']*subopt_plots_threshold:
                                    
                                    if not level2_opt_neighs_label:
                                        label = '< opt + 0.2opt'
                                        level2_opt_neighs_label = True
                                    else:
                                        label = None
        
                                    #plt.scatter(x[k], y[k], 10, fit_val, marker = 'o', linewidth = 0.75, color = 'blue', label = label)
                                    
                                    
                                elif fit_val < opt_fit[err_surface_type]['value'] + 3*opt_fit[err_surface_type]['value']*subopt_plots_threshold:
                                    
                                    if not level3_opt_neighs_label:
                                        label = '< opt + 0.3opt'
                                        level3_opt_neighs_label = True
                                    else:
                                        label = None                            
                                    
                                    #plt.scatter(x[k], y[k], 10, fit_val, marker = 'x',  linewidth = 0.75, color = 'red',  label = label)
                        '''
                            
                        #plt.title(ttl, fontsize = 8, fontweight = 'bold', color = 'white', backgroundcolor = scalarMap.to_rgba(scale_int[itn_levels_2_sbplts[itn_level]]))
                        plt.title(ttl, fontsize = 8, fontweight = 'bold', color = 'black')
                        plt.xlabel('All habitats scale', fontsize=8)
                        plt.ylabel('Constant habitat scale', fontsize=8)
                        plt.xlim(min_x+0.1, max_x+0.1)
                        plt.ylim(min_y+0.1, max_y+0.1)
                        #plt.ylim(0.01, 14)
                        plt.gca().tick_params(axis='x', labelsize=8)
                        plt.gca().tick_params(axis='y', labelsize=8)
                        
                        '''
                        count_traces = 0
                        
                        # NEED TO update to new FIT_ENTRY DATA STRUCT IF REUSED
                        for fit_entry in opt_neigh_fits:
                            
                            x_temp_h = fit_entry['x_temp_h']
                            const_h = fit_entry['const_h'] 
                            sim_key = fit_entry['sim_key']
                            
                            marker = self.get_marker(sim_key, count_traces)
        
                            plt.scatter(x_temp_h, const_h, c = 'black', marker = marker, s = 20, facecolor='none', zorder=100)
                            
                            count_traces = count_traces + 1
                        '''
                    
                #plt.subplot(gs[itn_levels_2_sbplts[best_fit_itn_level], 0])
                #debug_p('plot optimal at position (0, ' + str(i) + ') in grid')
                plt.subplot(gs[0,i])
                
                cluster_record = self.best_fits[cluster_id]
                opt_itn = cluster_record['ITN_cov']
                opt_drug = cluster_record['MSAT_cov']
                
                #plt.annotate(opt_fit_value, opt_x_temp_h, opt_const_h)
    
                #lgds.append(plt.legend(bbox_to_anchor=(0., 1, 1., .1), loc=2, ncol=1, borderaxespad=0., fontsize=8))
                lgds.append(plt.legend(ncol=1,loc='upper center', bbox_to_anchor=(0.,-0.15), borderaxespad=0., fontsize=8, mode='expand'))
                
                    
            plt.tight_layout()
            output_plot_file_path = os.path.join(self.root_sweep_dir, err_surfaces_plots_dir, err_surfaces_base_file_name + cluster_id +'.png')
            plt.savefig(output_plot_file_path, dpi = 300, format='png', bbox_extra_artists=lgds, bbox_inches='tight')
            plt.close()
            
            count = count + 1
def error_loading_fit_terms():
    
    debug_p('Could not load fit terms! Check whether fit terms file at ' + os.path.join(sim_data_dir, fit_terms_file) + ' is accessible.')
    
    raise ValueError('Could not load fit terms! Check whether fit terms file at ' + os.path.join(sim_data_dir, fit_terms_file) + ' is accessible.')