def get_colorbar_ticks(self, min_z, max_z, z):
     
     blevels = []
     ticks = [min_z]
     
     current = 0.1
     previous = 0.0
     index = 0
     
     self.generate_blevel_sequence(blevels, current, previous, index)
 
     for blevel in blevels:
         tick = min_z + blevel*min_z
         if tick <= max_z:
             ticks.append(tick)
     
     # not efficient; but there are many parts of this that aren't;
     # need to refactor at some point if speed is an issue
     # avoid premature optimization
     
     ticks.append(satp(z, sample_size_percentile))
     
             
     return sorted(ticks)
    def plot_calib_err_surfaces(self, err_surface_types):
        
        count = 0
        
        min_residual = self.residuals['min']
        max_residual = self.residuals['max']
        
        for cluster_id, cluster_record in self.best_fits.iteritems():
            
            debug_p('Plotting error surface for cluster ' + cluster_id + ' in category ' + self.category)
        
            fig_width = len(err_surface_types)*4.35
            fig = plt.figure(cluster_id, figsize=(fig_width, 4), dpi=300, facecolor='white')
            #debug_p('error surface types length' + str(len(err_surface_types)))
            #debug_p('fig width' + str(fig_width))
            
            gs = gridspec.GridSpec(1, 3)
         
            error_points = {}
            
            title_ITN = 'ITN distribution: '
            title_drug_coverage = 'drug coverage: '
            
            opt_fit = {}
            
            opt_sim_key = cluster_record['sim_key']
            opt_group_key = cluster_record['group_key']
            opt_const_h = cluster_record['habs']['const_h']
            opt_x_temp_h = cluster_record['habs']['temp_h']
            
            for err_surface_type in err_surface_types:    
                opt_fit[err_surface_type] = {} 
                opt_fit[err_surface_type]['const_h'] = cluster_record[err_surface_type]['const_h']
                opt_fit[err_surface_type]['temp_h'] = cluster_record[err_surface_type]['temp_h']
                opt_fit[err_surface_type]['value'] = cluster_record[err_surface_type]['value']
                
                if err_surface_type == 'cc_penalty':
                    opt_fit[err_surface_type]['value'] = opt_fit[err_surface_type]['value']*(math.pow(cc_weight, -1)) # opt_fit of penalties contains the weighted value; hence we reverse the weighting
                    
                 
            opt_neigh_fits = []
            
            for sim_key,fit_entry in self.all_fits[cluster_id].iteritems():
        
                if sim_key == 'min_terms' or sim_key == 'max_terms':
                    continue
        
                x_temp_h = fit_entry['x_temp_h']
                const_h = fit_entry['const_h']
                fit_val = fit_entry['fit_val']
                mse = fit_entry['fit_terms']['mse']
                cc_penalty = get_cc_penalty(fit_entry)

                itn_level = fit_entry['itn_level']
                drug_coverage_level = fit_entry['drug_cov']
                group_key = fit_entry['group_key']    
                
                    
                if group_key not in error_points:
                    error_points[group_key] = {
                                                   'x_temp_h':[],
                                                   'const_h':[],
                                                   'fit':[],
                                                   'cc_penalty':[],
                                                   'mse':[],
                                                   'title': title_ITN + itn_level + "; " + title_drug_coverage + str(drug_coverage_level),
                                                   'itn_level':itn_level,
                                                   'drug_coverage':drug_coverage_level
                                                }
                    
                error_points[group_key]['x_temp_h'].append(x_temp_h)
                error_points[group_key]['const_h'].append(const_h)
                error_points[group_key]['fit'].append(fit_val)
                error_points[group_key]['mse'].append(mse)
                error_points[group_key]['cc_penalty'].append(cc_penalty)
                       
                          
            ymax = 10
            
            scale_int = np.array(range(1,10))
            pal = cm = plt.get_cmap('nipy_spectral') 
            cNorm  = colors.Normalize(vmin=0, vmax=ymax)
            #scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=pal)
            scalarMap = b2mpl.get_map('Spectral', 'Diverging', 5).mpl_colors
            lgds = []
            for i,(err_surface_type, err_surface_style) in enumerate(err_surface_types.iteritems()):
            
                for j,group_key in enumerate(error_points.keys()):
                    
                    itn_level = error_points[group_key]['itn_level']
                    drug_coverage = error_points[group_key]['drug_coverage']
                    
                    # currently assume opt_group_key is the same for all err_surface_types
                    if group_key == opt_group_key: 
            
                        #debug_p('plot at position (0, ' + str(i) + ') in grid')
                        plt.subplot(gs[0,i])
                        x = error_points[group_key]['x_temp_h']
                        y = error_points[group_key]['const_h']
                        z = error_points[group_key][err_surface_type]
                        #print len(z)
                        res = 125
                        ttl = err_surface_style['title']
                        
                        min_x = np.min(x)
                        min_y = np.min(y)
                        min_z = np.min(z)
                        
                        max_x = np.max(x)
                        max_y = np.max(y)
                        max_z = np.max(z)
                        
                        
                    
                        #f = interpolate.interp2d(x, y, z)
                    
                        xi = np.linspace(min_x, max_x , res)
                        yi = np.linspace(min_y, max_y , res)
                        
                        zi = griddata(x,y,z,xi,yi)
                        
                        #xig, yig = np.meshgrid(xi, yi)
                        #zig = f(xi,yi)
    
                        #rbf = Rbf(x, y, z, epsilon=2)
                        #zig = rbf(xig, yig)
                    
                        blevels = self.get_colorbar_ticks(min_z, max_z, z)
                        num_colors = len(blevels)-1
                        from matplotlib.colors import BoundaryNorm
                        
                        cmap2 = self.custom_cmap(num_colors, mincol='DarkBlue', midcol='CornflowerBlue', maxcol='w')
                        cmap2.set_over('0.7') # light gray
                        
                        bnorm = BoundaryNorm(blevels, ncolors = num_colors, clip = False)
                    
                        #rmse_pl = plt.contourf(xi,yi,zi,15,cmap=plt.cm.hot)
                        #rmse_pl = plt.pcolor(xi, yi, zi, cmap=plt.get_cmap('RdYlGn_r'), vmin=0.475, vmax=0.8)
                        #rmse_pl = plt.pcolor(xi, yi, zi, cmap=plt.get_cmap('RdYlGn_r'))
                        #rmse_pl = plt.pcolor(xi, yi, zi, cmap=plt.get_cmap('cool'), vmin = min_residual, vmax = max_residual)
                        #rmse_pl = plt.contourf(xi, yi, zi, cmap=plt.get_cmap('cool'), vmin = min_z, vmax = max_z, levels = blevels, norm = bnorm, extend = 'both')
                        rmse_pl = plt.contourf(xi, yi, zi, cmap=cmap2, vmin = min_z, vmax = max_z, levels = blevels, norm = bnorm, extend = 'both')
                        #rmse_pl = plt.contourf(xi,yi,zi,15,cmap=plt.get_cmap('paired'))
                        #rmse_pl.cmap.set_over('black')
                        #rmse_pl.cmap.set_under('grey')
                        
                        max_blevel_in_sample = 0
                        for blevel in blevels:
                            if blevel <= satp(z, sample_size_percentile) and blevel > max_blevel_in_sample:
                               max_blevel_in_sample = blevel 
                                
                        pc = plt.contour(xi,yi,zi, levels=[max_blevel_in_sample], colors='r', linewidth=0, alpha=0.5)
                        
                        b_per_s = pc.collections[0].get_paths()
                        count_labels = 0
                        for per in range(len(b_per_s)):
                            b_per_s_x = b_per_s[per].vertices[:,0]
                            b_per_s_y = b_per_s[per].vertices[:,1]
                            if count_labels == 0:
                                plt.fill(b_per_s_x,b_per_s_y, 'magenta', linestyle='solid', alpha=0.3, label = 'Opt 5-percentile: ' + err_surface_style['title'])
                                count_labels = count_labels + 1
                            else:
                                plt.fill(b_per_s_x,b_per_s_y, 'magenta', linestyle='solid', alpha=0.3)
                            
                        
                        cb = plt.colorbar(rmse_pl, ticks = blevels, spacing='uniform')
    
                        cb.set_label(ttl + ' residual', fontsize=8)
                        cb.ax.tick_params(labelsize=8)    
                        #plt.scatter(x, y, 10, z, cmap=cmap2,  vmin = min_z, vmax = max_z, norm = bnorm)
                        
                        level1_opt_neighs_label = False
                        level2_opt_neighs_label = False
                        level3_opt_neighs_label = False
                        # plot all optimal markers on each surface
                
                        for (opt_err_surface_type, opt_err_surface_style) in err_surface_types.iteritems():
                            plt.scatter(opt_fit[opt_err_surface_type]['temp_h'], opt_fit[opt_err_surface_type]['const_h'], c = 'red', marker = opt_err_surface_style['marker'], s = 60, facecolor='none', edgecolor='black', zorder=100, label= opt_err_surface_style['title'] + ' best fit')
                        
                        '''
                        for k,fit_val in enumerate(z):

                                if fit_val < opt_fit[err_surface_type]['value'] + opt_fit[err_surface_type]['value']*subopt_plots_threshold:
                                    
                                    if not level1_opt_neighs_label:
                                        label = '< opt + 0.1opt'
                                        level1_opt_neighs_label = True
                                    else:
                                        label = None
                                    
                                    #plt.scatter(x[k], y[k], 10, fit_val, marker = 'd',  linewidth = 0.75, color = 'green', label = label)
                                    
                                    
                                elif fit_val < opt_fit[err_surface_type]['value'] + 2*opt_fit[err_surface_type]['value']*subopt_plots_threshold:
                                    
                                    if not level2_opt_neighs_label:
                                        label = '< opt + 0.2opt'
                                        level2_opt_neighs_label = True
                                    else:
                                        label = None
        
                                    #plt.scatter(x[k], y[k], 10, fit_val, marker = 'o', linewidth = 0.75, color = 'blue', label = label)
                                    
                                    
                                elif fit_val < opt_fit[err_surface_type]['value'] + 3*opt_fit[err_surface_type]['value']*subopt_plots_threshold:
                                    
                                    if not level3_opt_neighs_label:
                                        label = '< opt + 0.3opt'
                                        level3_opt_neighs_label = True
                                    else:
                                        label = None                            
                                    
                                    #plt.scatter(x[k], y[k], 10, fit_val, marker = 'x',  linewidth = 0.75, color = 'red',  label = label)
                        '''
                            
                        #plt.title(ttl, fontsize = 8, fontweight = 'bold', color = 'white', backgroundcolor = scalarMap.to_rgba(scale_int[itn_levels_2_sbplts[itn_level]]))
                        plt.title(ttl, fontsize = 8, fontweight = 'bold', color = 'black')
                        plt.xlabel('All habitats scale', fontsize=8)
                        plt.ylabel('Constant habitat scale', fontsize=8)
                        plt.xlim(min_x+0.1, max_x+0.1)
                        plt.ylim(min_y+0.1, max_y+0.1)
                        #plt.ylim(0.01, 14)
                        plt.gca().tick_params(axis='x', labelsize=8)
                        plt.gca().tick_params(axis='y', labelsize=8)
                        
                        '''
                        count_traces = 0
                        
                        # NEED TO update to new FIT_ENTRY DATA STRUCT IF REUSED
                        for fit_entry in opt_neigh_fits:
                            
                            x_temp_h = fit_entry['x_temp_h']
                            const_h = fit_entry['const_h'] 
                            sim_key = fit_entry['sim_key']
                            
                            marker = self.get_marker(sim_key, count_traces)
        
                            plt.scatter(x_temp_h, const_h, c = 'black', marker = marker, s = 20, facecolor='none', zorder=100)
                            
                            count_traces = count_traces + 1
                        '''
                    
                #plt.subplot(gs[itn_levels_2_sbplts[best_fit_itn_level], 0])
                #debug_p('plot optimal at position (0, ' + str(i) + ') in grid')
                plt.subplot(gs[0,i])
                
                cluster_record = self.best_fits[cluster_id]
                opt_itn = cluster_record['ITN_cov']
                opt_drug = cluster_record['MSAT_cov']
                
                #plt.annotate(opt_fit_value, opt_x_temp_h, opt_const_h)
    
                #lgds.append(plt.legend(bbox_to_anchor=(0., 1, 1., .1), loc=2, ncol=1, borderaxespad=0., fontsize=8))
                lgds.append(plt.legend(ncol=1,loc='upper center', bbox_to_anchor=(0.,-0.15), borderaxespad=0., fontsize=8, mode='expand'))
                
                    
            plt.tight_layout()
            output_plot_file_path = os.path.join(self.root_sweep_dir, err_surfaces_plots_dir, err_surfaces_base_file_name + cluster_id +'.png')
            plt.savefig(output_plot_file_path, dpi = 300, format='png', bbox_extra_artists=lgds, bbox_inches='tight')
            plt.close()
            
            count = count + 1
    def plot_weighted_cc_per_hfca(self, weighted_ccs_model_agg_by_hfca, ccs_model_agg_by_hfca_cluster_id):
        
        
        clusters_processed = 0
        for hfca_id, weighted_ccs_combos in weighted_ccs_model_agg_by_hfca.iteritems():
        
            weighted_ccs_by_bin = {}
            for i in range(0, cc_num_fold_bins):
                weighted_ccs_by_bin[i] = []
                
            
            for weighted_ccs_combo in weighted_ccs_combos:
                sum_weighted_ccs = cc_num_fold_bins * [0]

                for weighted_ccs in weighted_ccs_combo:
                    sum_weighted_ccs = np.add(sum_weighted_ccs, weighted_ccs)
                    
                for i in range(0, cc_num_fold_bins):
                    weighted_ccs_by_bin[i].append(sum_weighted_ccs[i])
        
        
            per_bottom = []
            per_top = []
            per_median = []
            
            for i in range(0, cc_num_fold_bins):
                weighted_ccs_by_bin_idx = weighted_ccs_by_bin[i]
                per_bottom.append( satp(weighted_ccs_by_bin_idx, 2.5) )
                per_top.append( satp(weighted_ccs_by_bin_idx, 97.5) )
                per_median.append( satp(weighted_ccs_by_bin_idx, 50) )
                
            '''
            debug_p('length of weighted ccs_combos array ' + str(len(weighted_ccs_combos)))
            '''
            debug_p('length of bin 0 in weighted_ccs_by_bin ' + str(len(weighted_ccs_by_bin[0])))
           
            
            for cluster_id in hfca_id_2_cluster_ids(hfca_id):
                
                fig = plt.figure(cluster_id, figsize=(9.2, 4), dpi=100, facecolor='white')
                gs = gridspec.GridSpec(1, 4)
                     
                ax = plt.subplot(gs[0:4])

                x_smooth = np.linspace(0, cc_num_fold_bins-1,60)
                
                per_bottom_smooth = spline(range(0, cc_num_fold_bins),per_bottom,x_smooth)
                per_top_smooth = spline(range(0, cc_num_fold_bins),per_top,x_smooth)
                per_median_smooth = spline(range(0, cc_num_fold_bins),per_median,x_smooth)
                
                ax.plot(x_smooth, per_bottom_smooth, alpha=1, linewidth=0.5, color = 'black', linestyle=':', label = '2.5 percentile HS weighted: prevalence space samples', marker = None)
                ax.plot(x_smooth, per_top_smooth, alpha=1, linewidth=0.5, color = 'black', linestyle=':', label = '97.5 percentile HS weighted: prevalence space samples', marker = None)
                ax.plot(x_smooth, per_median_smooth, alpha=1, linewidth=2.0, color = 'magenta', linestyle='-', label = 'median HS weighted: prevalence space samples', marker = None)
                ax.fill_between(x_smooth, per_bottom_smooth, per_top_smooth, facecolor='gray', alpha=0.5, interpolate=True)
                
                cluster_cat = get_cluster_category(cluster_id)
                
                opt_group_key = self.best_fits[cluster_id]['group_key']
                
                opt_sim_key_cc = self.best_fits[cluster_id]['cc_penalty']['sim_key']
                cc_trace_opt_cc = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_cc]
                
                opt_sim_key_prev = self.best_fits[cluster_id]['mse']['sim_key']
                cc_trace_opt_prev = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_prev]
                
                opt_sim_key_fit = self.best_fits[cluster_id]['fit']['sim_key']
                cc_trace_opt_fit = self.calib_data[cluster_cat][opt_group_key][opt_sim_key_fit]
            
                ccs_model_agg_cc, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_cc, cluster_id)
                ccs_model_agg_prev, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_prev, cluster_id)
                ccs_model_agg_fit, ccs_ref_agg = get_cc_model_ref_traces(cc_trace_opt_fit, cluster_id)
                
                facility = hfca_id_2_facility(hfca_id)
                ax.plot(range(0, len(ccs_model_agg_cc)), ccs_model_agg_cc, alpha=1, linewidth=1, color = 'blue', label = 'Best fit: clinical cases', marker = 's')
                ax.plot(range(0, len(ccs_model_agg_prev)), ccs_model_agg_prev, alpha=1, linewidth=1, color = 'magenta', label = 'Best fit: prevalence', marker = 'o')
                ax.plot(range(0, len(ccs_model_agg_fit)), ccs_model_agg_fit, alpha=1, linewidth=1, color = 'black', label = 'Best fit: prevalence + clinical cases', marker = '*')
                ax.plot(range(0, len(ccs_ref_agg)), ccs_ref_agg, alpha=1, linewidth=2.0, linestyle = '-', color = 'red', label = 'Observed in ' + facility, marker = None)    
                
                for i,sample_ccs in enumerate(ccs_model_agg_by_hfca_cluster_id[hfca_id][cluster_id]['unweighted']):
                      
                    if i == 0:
                        ax.plot(range(0, cc_num_fold_bins), sample_ccs[2], alpha=0.5, linewidth=0.5, color = 'magenta', label = 'Opt 5-percentile samples for cluster ' + cluster_id, marker = None)
                        #ax.plot(range(0, cc_num_fold_bins), sample_ccs[2])
                    else:
                        ax.plot(range(0, cc_num_fold_bins), sample_ccs[2], alpha=0.5, linewidth=0.5, color = 'magenta', marker = None)
        
                plt.xlabel('6-week bins', fontsize=8)
                plt.ylabel('Clinical cases', fontsize=8)
                legend = plt.legend(loc=1, fontsize=8)
            
                
                plt.xlim(0,8)
                plt.title('Clinical cases timeseries', fontsize = 8, fontweight = 'bold', color = 'black')
                plt.gca().tick_params(axis='x', labelsize=8)
                plt.gca().tick_params(axis='y', labelsize=8)
                plt.tight_layout()
                output_plot_file_path = os.path.join(self.root_sweep_dir, weighted_cc_traces_plots_dir, weighted_cc_traces_base_file_name + cluster_id + '.png')
                plt.savefig(output_plot_file_path, dpi = 300, format='png')
                plt.close()
                
                clusters_processed = clusters_processed + 1
                
                debug_p('Processed weighting and plotting clinical cases for ' + str(clusters_processed) + ' clusters') 
def get_prevalence_opt_region_sims(best_fits, all_fits, cluster_id):
    
    opt_region_sims = []
    
    cluster_record = best_fits[cluster_id]
    
    opt_group_key = cluster_record['group_key']

    error_points = {}
    for sim_key,fit_entry in all_fits[cluster_id].iteritems():

        if sim_key == 'min_terms' or sim_key == 'max_terms':
            continue

        mse = fit_entry['fit_terms']['mse']
        group_key = fit_entry['group_key']    
        
            
        if group_key not in error_points:
            error_points[group_key] = {
                                           'mse':[],
                                           'sim_key': []
                                        }
            
        error_points[group_key]['mse'].append(mse)
        error_points[group_key]['sim_key'].append(sim_key)
               
                
    for j,group_key in enumerate(error_points.keys()):

        if group_key == opt_group_key:     
                z = error_points[group_key]['mse']
                sim_keys = error_points[group_key]['sim_key']
                sample_space = zip(sim_keys, z) 
                
                opt_region_sims  = [(group_key, sim_key) for (sim_key, res) in sample_space if res <= satp(z, sample_size_percentile)]
                break
                
    return opt_region_sims