def make_pdfs(self, typ): """ Make pdfs, i.e. number of events vs density Pdf is always self.npoints long; the last bin is an overflow bin. """ if not self.data[typ]["us"].get_n_events(): return transmission = float(self.data[typ]["ds"].get_n_events() ) / self.data[typ]["us"].get_n_events() for loc in self.locations: # Extract the data as a numpy array n_events = self.data[typ][loc].get_n_events() id_data = [None] * n_events ps_data = np.ndarray((n_events, 4)) for i, event in enumerate(self.data[typ][loc].retrieve()): ps_data[i] = event[-1].tolist() id_data[i] = (event[0], event[1], event[2]) # Use the kNN module to get a density profile norm = 1. if loc == "ds": norm = transmission density_estimator = kNNDensityEstimator(ps_data, self.rotate, self.nthreads, norm) density_estimator.set_levels() levels = density_estimator.levels key_value_pairs = [(id_data[i], levels[i]) for i in range(n_events)] density_dict = dict(key_value_pairs) self.density_data[typ][loc]["densities"] = density_dict # pdf, bx = np.histogram(density_estimator.levels, self.npoints-1, (0., self.density_max)) pdf, bx = np.histogram(density_estimator.levels, self.npoints, (0., self.density_max)) pdf = pdf.tolist() #pdf.append(len(levels)-sum(pdf)) cdf = self.make_cdf(pdf) self.density_data[typ][loc]["pdf"] = pdf self.density_data[typ][loc]["cdf"] = cdf print "Found density pdf with max entry", max(density_dict), \ "max bin", self.density_max, "n(overflow)", pdf[-1], \ "and", len(density_dict), "=", sum(pdf), "entries" pdf_stats = [density_estimator.bin_uncertainty(pdf_bin/cdf[0]) \ for pdf_bin in pdf] cdf_stats = [density_estimator.bin_uncertainty(cdf_bin/cdf[0]) \ for cdf_bin in cdf] if loc == "us": pdf_stats = [0. for pdf_bin in pdf] cdf_stats = [0. for cdf_bin in cdf] self.density_data[typ][loc]["pdf_stat_errors"] = pdf_stats self.density_data[typ][loc]["cdf_stat_errors"] = cdf_stats self.delta = sum(pdf) / self.npoints / 100. print "Density correction with delta", self.delta
def make_profiles(self, typ): """ Produces density profiles. Extract a numpy ndarray that contains all of the phase space vectors, initialize the kNN density estimator and extract the density profile with its uncertainty * typ specifies the type of data (all_mc, reco_mc, reco) """ # Skip if no data if not self.data[typ]["us"].get_n_events(): return transmission = float(self.data[typ]["ds"].get_n_events() ) / self.data[typ]["us"].get_n_events() for loc in self.locations: # Extract the data as a numpy array ps_data = np.ndarray((self.data[typ][loc].get_n_events(), 4)) for i, event in enumerate(self.data[typ][loc].retrieve()): ps_data[i] = event[-1].tolist() # Use the kNN module to get a density profile norm = 1. if loc == "ds": norm = transmission density_estimator = kNNDensityEstimator(ps_data, self.rotate, self.nthreads, norm) levels, errors = density_estimator.profile(self.npoints, self.uncertainty) # Store the profiles and their statistical uncertainties self.density_data[typ][loc]["levels"] = levels self.density_data[typ][loc]["levels_stat_errors"] = errors print "DensityAnalysis", loc print " levels", levels print " errors", errors # Plot the Poincare sections if requested if self.config_anal["density_sections"]: plotter = DensityPlotter(self.plot_dir, typ + "_" + loc) plotter.plot_phase_space(density_estimator)
def make_profiles(self, typ): """ Produces density profiles. Extract a numpy ndarray that contains all of the phase space vectors, initialize the kNN density estimator and extract the density profile with its uncertainty * typ specifies the type of data (all_mc, reco_mc, reco) """ # Skip if no data if not self.data[typ]["us"].get_n_events(): return transmission = float(self.data[typ]["ds"].get_n_events())/self.data[typ]["us"].get_n_events() for loc in self.locations: # Extract the data as a numpy array ps_data = np.ndarray((self.data[typ][loc].get_n_events(), 4)) for i, event in enumerate(self.data[typ][loc].retrieve()): ps_data[i] = event[-1].tolist() # Use the kNN module to get a density profile norm = 1. if loc == "ds": norm = transmission density_estimator = kNNDensityEstimator(ps_data, self.rotate, self.nthreads, norm) levels, errors = density_estimator.profile(self.npoints, self.uncertainty, scaling=self.graph_scaling) # Store the profiles and their statistical uncertainties self.density_data[typ][loc]["levels"] = levels self.density_data[typ][loc]["levels_stat_errors"] = errors # Plot the Poincare sections if requested if self.config_anal["density_sections"]: plotter = DensityPlotter(self.plot_dir, typ+"_"+loc) plotter.plot_phase_space(density_estimator) def corrections_and_uncertainties(self, typ): """ Calculate corrected profiles and uncertainties * typ specifies the type of data (all_mc, reco_mc, reco) * corrected profiles is given by rho(alpha) = c(alpha)*i(alpha)*rho(alpha) where c(alpha) is the response function and i(alpha) is the inefficiency function * statistical errors are provided by the density estimator * total errors are given by sum in quadrature of statistical errors and systematic errors """ # Set the upstream statistical uncertainty to 0 as it is the given # profile to which to compare the downstream profile. The downstream # statistical uncertainties are set in self.make_profiles() data = self.density_data[typ] data["us"]["levels_stat_errors"] = [0. for bin in range(self.npoints)] # Do the correction for each of the tracker locations for loc in self.locations: print "Doing density level correction for", typ, loc source = self.density_data levels = self.do_corrections(typ, loc, source) data[loc]["corrected_levels"] = levels # Evaluate the systematic uncertainties for each of the tracker locations for loc in self.locations: print "Finding density systematic errors for", typ, loc reco_syst_list = self.calculate_detector_systematics(typ, loc) perf_syst_list = self.calculate_performance_systematics(typ, loc) syst_error_list = [(reco_syst_list[i]**2+perf_syst_list[i]**2)**0.5 \ for i in range(self.npoints)] data[loc]["levels_syst_errors"] = syst_error_list self.density_data[typ] = data def do_corrections(self, typ, loc, source, use_capped = True): """ Applies the corrections to the requested density profile Only applies response correction to the reconstructed sample * typ specifies the type of data (all_mc, reco_mc, reco) * loc specifies the location of the tracker (us, ds) * source specifies the source of the corrections to be used * Use capped corrections if use_capped is True """ levels = np.array(source[typ][loc]["levels"]) corr_key = "level_ratio" if use_capped: corr_key = "level_ratio_capped" if typ == "reco": response = np.array(source["response"][loc][corr_key]) levels = levels*response if typ == "reco" or typ == "reco_mc": inefficiency = np.array(source["inefficiency"][loc][corr_key]) levels = levels*inefficiency return levels.tolist() def calculate_detector_systematics(self, typ, loc): """ Calculate the systematic errors in the reconstruction of tracks. The uncertainty on each level corresponds to the sum in quadrature of the residuals between the reference reconstruction set and the data sets that are shifted from the reference set * typ specifies the type of data (all_mc, reco_mc, reco) * loc specifies the location of the tracker (us, ds) """ # If there is no reference specified, skip data = self.density_data[typ] syst_error_list = [0. for i in range(self.npoints)] if data["detector_reference"] == None: return syst_error_list print "\nEvaluating density reconstruction systematic errors", loc # Correct the density profile with the reference corrections source = data["detector_reference"] ref_levels = self.do_corrections(typ, loc, source) # Loop over the detector systematics list systematics_list = data[loc]["detector_systematics"] for i, source in enumerate(systematics_list): # Evaluate the levels with the corresponding systematic shift syst_levels = self.do_corrections(typ, loc, source) # Initialize a graph that contains the deviation from the reference name = self.get_syst_name(source["source"]) if self.config_anal["density_systematics_draw"]: self.syst_graphs[typ][loc][name] = ROOT.TGraph(self.npoints) # Add in quadrature an uncertainty that corresponds to the level shift due # to the use of a different set of corrections scale = source["scale"] for j in range(self.npoints): err = (syst_levels[j] - ref_levels[j])*scale syst_error_list[j] = (syst_error_list[j]**2+err**2)**0.5 if self.config_anal["density_systematics_draw"]: alpha = (float(j+1.)/(self.npoints+1.)) val = 0. if ref_levels[j] > 0: val = err/ref_levels[j] self.syst_graphs[typ][loc][name].SetPoint(j, alpha, val) return syst_error_list def get_syst_name(self, path): """ Convert systematic path to a systematic name """ suffix = path.split("Systematics_",1)[1] name = suffix.split("/")[0] return name def calculate_performance_systematics(self, typ, loc): """ Calculate the systematic errors in the channel performance. The experiment measures ratios of downstream density levels over upstream density levels. The uncertainty is evaluated as the shifts in the downstream density profile for variable deviations from the expected cooling channel performance. * typ specifies the type of data (all_mc, reco_mc, reco) * loc specifies the location of the tracker (us, ds) """ # If there is no reference specified, skip data = self.density_data[typ] syst_error_list = [0. for i in range(self.npoints)] if data["performance_reference"] == None: return syst_error_list print "\nEvaluating density performance systematic errors", loc # Get the reference ratio array source = data["performance_reference"] ref_ratio = np.array(source[typ]["ds"]["levels"])/np.array(source[typ]["us"]["levels"]) ref_ratio = ref_ratio.tolist() # Loop over the performance systematics list systematics_list = data[loc]["performance_systematics"] ratio_error_list = [0. for i in range(self.npoints)] for i, source in enumerate(systematics_list): # Evaluate the ratio with the corresponding systematic shift syst_ratio = np.array(source[typ]["ds"]["levels"])/np.array(source[typ]["us"]["levels"]) syst_ratio = syst_ratio.tolist() # Initialize a graph that contains the deviation from the reference name = self.get_syst_name(source["source"]) if self.config_anal["density_systematics_draw"]: self.syst_graphs[typ][loc][name] = ROOT.TGraph(self.npoints) # Add in quadrature an uncertainty that corresponds to the ratio shift due # to the use of a different cooling channel scale = source["scale"] for j in range(self.npoints): err = (syst_ratio[j] - ref_ratio[j])*scale ratio_error_list[j] = (ratio_error_list[j]**2+err**2)**0.5 if self.config_anal["density_systematics_draw"]: alpha = (float(j+1.)/(self.npoints+1.)) self.syst_graphs[typ][loc][name].SetPoint(j, alpha, err) # Convert the uncertainties in terms of density ref_levels_us = data["us"]["levels"] for i in range(self.npoints): syst_error_list[i] = ratio_error_list[i] * ref_levels_us[i] return syst_error_list def draw_systematics(self): """ Draws the systematic errors. The uncertainty on each level corresponds to the residuals between the reference reconstruction set and the data sets that are shifted from the reference set * typ specifies the type of data (all_mc, reco_mc, reco) * loc specifies the location of the tracker (us, ds) """ # Feed the systematics graphs to the drawer for typ in self.data_types: for loc in self.locations: print typ, loc, len(self.syst_graphs[typ][loc]) if len(self.syst_graphs[typ][loc]): plotter = DensityPlotter(self.plot_dir, typ+"_"+loc) plotter.plot_systematics(self.syst_graphs[typ][loc]) def draw_profiles(self): """ Produce plots that compare the density profiles upstream and downstream of the absorber. Produce one for each category of data """ for typ in self.data_types: # Skip if no data if not len(self.density_data[typ]["us"]["levels"]): continue # Initialize the graphs graphs = {} graphs_full = {} for loc in self.locations: graphs[loc] = self.make_graph(typ, loc, True, False) graphs_full[loc] = self.make_graph(typ, loc, True, True) # Print up/down comparison canvas_name = 'density_profile_%s' % typ canvas = self.get_plot(canvas_name)["pad"] mg = self.make_multigraph(typ, graphs, graphs_full, "density_profile") leg = self.make_multigraph_legend(graphs) for fmt in ["pdf", "png", "root"]: canvas.Print(self.plot_dir+"/"+canvas_name+"."+fmt) # Print ratios canvas_name = 'density_ratio_%s' % typ canvas = self.get_plot(canvas_name)["pad"] gratio = self.make_ratio(typ, graphs, graphs_full, "density_ratio") for fmt in ["pdf", "png", "root"]: canvas.Print(self.plot_dir+"/"+canvas_name+"."+fmt) def make_graph(self, typ, loc, include_corr, include_syst): """ Builds a TGraphErrors for the requested data type and location * typ specifies the type of data (all_mc, reco_mc, reco) * loc specifies the location of the tracker (us, ds) * include_corr is True if the corrected levels are to be represented * include_syst is True if the systematic uncertainty is to be includes """ level_type = "levels" if include_corr: level_type = "corrected_levels" graph = ROOT.TGraphErrors(self.npoints) for i in range(self.npoints): alpha = (float(i+1.)/(self.npoints+1.)) value = self.density_data[typ][loc][level_type][i] graph.SetPoint(i, alpha, value) all_err = self.density_data[typ][loc]["levels_stat_errors"][i] if include_syst and value > 0.: syst_err = self.density_data[typ][loc]["levels_syst_errors"][i] all_err = (syst_err**2+all_err**2)**0.5 graph.SetPointError(i, 0., all_err) color = {"us":1,"ds":4}[loc] graph.SetLineColor(color) graph.SetFillColorAlpha(color, .25) return graph def make_multigraph(self, typ, graphs, graphs_full, name): """ Initializes a multigraph, draws it * typ specifies the type of data (all_mc, reco_mc, reco) * graphs is a dictionary containing the up and downstream graphs with stat errors * graphs_full is a dictionary containing the up and downstream graphs with full errors * name of the type of graph being output """ mg = ROOT.TMultiGraph(name+"_"+typ, ";Fraction #alpha;#rho_{#alpha} [mm^{-2}(MeV/c)^{-2}]"); for loc in self.locations: mg.Add(graphs[loc], "LE3") mg.Add(graphs_full[loc], "LE3") self.plots[name+"_"+typ]["graphs"][loc] = graphs[loc] self.plots[name+"_"+typ]["graphs"][loc+"_full"] = graphs_full[loc] mg.Draw("A") return mg def make_multigraph_legend(self, graphs): """ Initializes a multigraph legend, draws it * graphs is a dictionary containing the up and downstream graphs """ leg = ROOT.TLegend(.6, .65, .8, .85) leg.AddEntry(graphs["us"], "Upstream", "LF") leg.AddEntry(graphs["ds"], "Downstream", "LF") leg.Draw("SAME") return leg def make_ratio(self, typ, graphs, graphs_full, name): """ Initializes a graph ratio, draws it * typ specifies the type of data (all_mc, reco_mc, reco) * graphs is a dictionary containing the up and downstream graphs with stat errors * graphs_full is a dictionary containing the up and downstream graphs with full errors * name of the type of graph being output """ gratio = ROOT.TGraphErrors(self.npoints) gratio_full = ROOT.TGraphErrors(self.npoints) gratio_full.SetTitle(";Fraction #alpha;#rho_{#alpha}^{d} /#rho_{#alpha}^{u}") for i in range(self.npoints): ratio = graphs["ds"].GetY()[i]/graphs["us"].GetY()[i] gratio.GetX()[i] = graphs["us"].GetX()[i] gratio.GetEX()[i] = graphs["us"].GetEX()[i] gratio.GetY()[i] = ratio gratio.GetEY()[i] = 0. if graphs["ds"].GetY()[i] > 0.: gratio.GetEY()[i] = ratio*graphs["ds"].GetEY()[i]/graphs["ds"].GetY()[i] gratio_full.GetX()[i] = gratio.GetX()[i] gratio_full.GetEX()[i] = gratio.GetEX()[i] gratio_full.GetY()[i] = ratio gratio_full.GetEY()[i] = 0. if graphs["ds"].GetY()[i] > 0.: us_rel_err = graphs_full["us"].GetEY()[i]/graphs_full["us"].GetY()[i] ds_rel_err = graphs_full["ds"].GetEY()[i]/graphs_full["ds"].GetY()[i] gratio_full.GetEY()[i] = ratio*(us_rel_err**2 + ds_rel_err**2)**0.5 self.plots[name+"_"+typ]["graphs"]["ratio"] = gratio self.plots[name+"_"+typ]["graphs"]["ratio_full"] = gratio_full gratio.SetLineColor(1) gratio.SetFillColorAlpha(1, .25) gratio_full.SetLineColor(1) gratio_full.SetFillColorAlpha(1, .25) gratio_full.Draw("ALE3") gratio.Draw("LE3 SAME") return gratio, gratio_full def save(self): """ Saves the data dictionary to a json file """ fout = open(self.plot_dir+"/density.json", "w") print >> fout, json.dumps(self.density_data, sort_keys=True, indent=4) def set_corrections(self): """ Calculate the profile corrections, i.e. the inefficiency and response function * uses the Monte Carlo to generate corrections """ for loc in self.locations: # Initialize the data all_mc_levels = self.density_data["all_mc"][loc]["levels"] reco_mc_levels = self.density_data["reco_mc"][loc]["levels"] reco_levels = self.density_data["reco"][loc]["levels"] # Calculate the corrections inefficiency, response = [], [] for i in range(len(all_mc_levels)): # Inherent detector inefficiency if reco_mc_levels[i] == 0: inefficiency.append(1.) else: inefficiency.append(float(all_mc_levels[i])/reco_mc_levels[i]) # Detector response function if reco_levels[i] == 0: response.append(1.) else: response.append(float(reco_mc_levels[i])/reco_levels[i]) # Produce a capped version of the corrections cutoff = self.config_anal["density_corrections_cutoff"] cutoff_index = int(cutoff*(self.npoints+1.)) ineff_cap = all_mc_levels[cutoff_index]/reco_mc_levels[cutoff_index] resp_cap = reco_mc_levels[cutoff_index]/reco_levels[cutoff_index] inefficiency_capped = copy.deepcopy(inefficiency) response_capped = copy.deepcopy(response) for i in range(cutoff_index, len(inefficiency_capped)): inefficiency_capped[i] = ineff_cap response_capped[i] = resp_cap # Store the correction factors self.density_data["inefficiency"][loc] = { "level_ratio":inefficiency, "level_ratio_capped":inefficiency_capped, } self.density_data["response"][loc] = { "level_ratio":response, "level_ratio_capped":response_capped, } def draw_corrections(self): """ Draw the correction factors used """ for loc in self.locations: inefficiency = self.density_data["inefficiency"][loc]["level_ratio"] response = self.density_data["response"][loc]["level_ratio"] plotter = DensityPlotter(self.plot_dir, loc) plotter.plot_corrections(inefficiency, response) inefficiency = self.density_data["inefficiency"][loc]["level_ratio_capped"] response = self.density_data["response"][loc]["level_ratio_capped"] plotter = DensityPlotter(self.plot_dir, "capped_"+loc) plotter.plot_corrections(inefficiency, response) def clear_density_data(self): """ Initializes the dictionary that is used to store data and make corrections down the line """ self.density_data = { "inefficiency":{ "us":{ "level_ratio":[1. for i in range(self.npoints)], "level_ratio_capped":[1. for i in range(self.npoints)] }, "ds":{ "level_ratio":[1. for i in range(self.npoints)], "level_ratio_capped":[1. for i in range(self.npoints)] }, }, "response":{ "us":{ "level_ratio":[1. for i in range(self.npoints)], "level_ratio_capped":[1. for i in range(self.npoints)] }, "ds":{ "level_ratio":[1. for i in range(self.npoints)], "level_ratio_capped":[1. for i in range(self.npoints)] }, }, "source":"", "scale":1., "npoints":0 } level_data = { "performance_reference":None, "detector_reference":None, "us":{ "levels":[], "corrected_levels":[], "levels_stat_errors":[], "levels_syst_errors":[], "detector_systematics":[], "performance_systematics":[], }, "ds":{ "levels":[], "corrected_levels":[], "levels_stat_errors":[], "levels_syst_errors":[], "detector_systematics":[], "performance_systematics":[], } } for typ in self.data_types: self.density_data[typ] = copy.deepcopy(level_data) def load_errors(self): """ Two "classes" of systematic errors; * systematic errors on the reconstruction are contained in the correction factors. For these we store the correction factors and compare to the reference correction factors * systematic errors on the performance are contained in the actual density profile. For these we store the point-by-point fractional difference between the density profile and reference. """ # If the corrections are to calculated in this analysis, skip this step if self.calculate_corrections: return # Set base correction factors self.load_corrections(self.config_anal["density_corrections"]) # Load systematic uncertainties systematics = self.config_anal["density_systematics"] for typ in systematics: print "Loading density systematic errors for", typ if typ not in self.density_data: self.density_data[typ] = {} for ref_key in ["detector_reference", "performance_reference"]: ref_src = systematics[typ][ref_key] if ref_src == None: self.density_data[typ][ref_key] = None else: self.density_data[typ][ref_key] = \ self.load_one_error(ref_src, None) print " Loaded reference", typ, ref_key, ref_src, \ type(self.density_data[typ][ref_key]) for loc in ["us", "ds"]: if loc not in self.density_data[typ]: self.density_data[typ][loc] = {} for key in ["detector_systematics", "performance_systematics"]: print "SYSTEMATICS", typ, loc, key print " KEYS", systematics[typ].keys() err_src_dict = systematics[typ][loc][key] self.density_data[typ][loc][key] = [ self.load_one_error(err_src, scale) \ for err_src, scale in err_src_dict.iteritems() ] print " Loaded", len(self.density_data[typ][loc][key]), loc, key def load_corrections(self, file_name): """ Load the density corrections to be applied during this density analysis. Loads the correction factors """ fin = open(file_name) density_str = fin.read() src_density = json.loads(density_str) src_density["source"] = file_name self.density_data["inefficiency"] = src_density["inefficiency"] self.density_data["response"] = src_density["response"] def load_one_error(self, file_name, scale): """ Load the density analysis output for a given uncertainty source """ fin = open(file_name) density_str = fin.read() density = json.loads(density_str) density["source"] = file_name density["scale"] = scale return density