예제 #1
0
    def make_pdfs(self, typ):
        """
        Make pdfs, i.e. number of events vs density
        
        Pdf is always self.npoints long; the last bin is an overflow bin.
        """
        if not self.data[typ]["us"].get_n_events():
            return

        transmission = float(self.data[typ]["ds"].get_n_events()
                             ) / self.data[typ]["us"].get_n_events()
        for loc in self.locations:
            # Extract the data as a numpy array
            n_events = self.data[typ][loc].get_n_events()
            id_data = [None] * n_events
            ps_data = np.ndarray((n_events, 4))
            for i, event in enumerate(self.data[typ][loc].retrieve()):
                ps_data[i] = event[-1].tolist()
                id_data[i] = (event[0], event[1], event[2])

            # Use the kNN module to get a density profile
            norm = 1.
            if loc == "ds":
                norm = transmission
            density_estimator = kNNDensityEstimator(ps_data, self.rotate,
                                                    self.nthreads, norm)
            density_estimator.set_levels()
            levels = density_estimator.levels
            key_value_pairs = [(id_data[i], levels[i])
                               for i in range(n_events)]
            density_dict = dict(key_value_pairs)
            self.density_data[typ][loc]["densities"] = density_dict
            #            pdf, bx = np.histogram(density_estimator.levels, self.npoints-1, (0., self.density_max))
            pdf, bx = np.histogram(density_estimator.levels, self.npoints,
                                   (0., self.density_max))
            pdf = pdf.tolist()
            #pdf.append(len(levels)-sum(pdf))

            cdf = self.make_cdf(pdf)
            self.density_data[typ][loc]["pdf"] = pdf
            self.density_data[typ][loc]["cdf"] = cdf
            print "Found density pdf with max entry", max(density_dict), \
                  "max bin", self.density_max, "n(overflow)", pdf[-1], \
                  "and", len(density_dict), "=", sum(pdf), "entries"
            pdf_stats = [density_estimator.bin_uncertainty(pdf_bin/cdf[0]) \
                                                           for pdf_bin in pdf]
            cdf_stats = [density_estimator.bin_uncertainty(cdf_bin/cdf[0]) \
                                                           for cdf_bin in cdf]
            if loc == "us":
                pdf_stats = [0. for pdf_bin in pdf]
                cdf_stats = [0. for cdf_bin in cdf]
            self.density_data[typ][loc]["pdf_stat_errors"] = pdf_stats
            self.density_data[typ][loc]["cdf_stat_errors"] = cdf_stats
            self.delta = sum(pdf) / self.npoints / 100.
            print "Density correction with delta", self.delta
    def make_profiles(self, typ):
        """
        Produces density profiles. Extract a numpy ndarray that contains all of
        the phase space vectors, initialize the kNN density estimator and extract
        the density profile with its uncertainty
        * typ specifies the type of data (all_mc, reco_mc, reco)
        """
        # Skip if no data
        if not self.data[typ]["us"].get_n_events():
            return

        transmission = float(self.data[typ]["ds"].get_n_events()
                             ) / self.data[typ]["us"].get_n_events()
        for loc in self.locations:
            # Extract the data as a numpy array
            ps_data = np.ndarray((self.data[typ][loc].get_n_events(), 4))
            for i, event in enumerate(self.data[typ][loc].retrieve()):
                ps_data[i] = event[-1].tolist()

            # Use the kNN module to get a density profile
            norm = 1.
            if loc == "ds":
                norm = transmission
            density_estimator = kNNDensityEstimator(ps_data, self.rotate,
                                                    self.nthreads, norm)
            levels, errors = density_estimator.profile(self.npoints,
                                                       self.uncertainty)

            # Store the profiles and their statistical uncertainties
            self.density_data[typ][loc]["levels"] = levels
            self.density_data[typ][loc]["levels_stat_errors"] = errors
            print "DensityAnalysis", loc
            print "   levels", levels
            print "   errors", errors

            # Plot the Poincare sections if requested
            if self.config_anal["density_sections"]:
                plotter = DensityPlotter(self.plot_dir, typ + "_" + loc)
                plotter.plot_phase_space(density_estimator)
예제 #3
0
    def make_profiles(self, typ):
        """
        Produces density profiles. Extract a numpy ndarray that contains all of
        the phase space vectors, initialize the kNN density estimator and extract
        the density profile with its uncertainty
        * typ specifies the type of data (all_mc, reco_mc, reco)
        """

        # Skip if no data
        if not self.data[typ]["us"].get_n_events():
            return

        transmission = float(self.data[typ]["ds"].get_n_events())/self.data[typ]["us"].get_n_events()
        for loc in self.locations:
            # Extract the data as a numpy array
            ps_data = np.ndarray((self.data[typ][loc].get_n_events(), 4))
            for i, event in enumerate(self.data[typ][loc].retrieve()):
                ps_data[i] = event[-1].tolist()

        # Use the kNN module to get a density profile
        norm = 1.
        if loc == "ds":
        norm = transmission
        density_estimator = kNNDensityEstimator(ps_data, self.rotate, self.nthreads, norm)
        levels, errors = density_estimator.profile(self.npoints, self.uncertainty, scaling=self.graph_scaling)

        # Store the profiles and their statistical uncertainties
        self.density_data[typ][loc]["levels"] = levels
        self.density_data[typ][loc]["levels_stat_errors"] = errors

        # Plot the Poincare sections if requested
        if self.config_anal["density_sections"]:
        plotter = DensityPlotter(self.plot_dir, typ+"_"+loc)
        plotter.plot_phase_space(density_estimator)

    def corrections_and_uncertainties(self, typ):
        """
        Calculate corrected profiles and uncertainties
        * typ specifies the type of data (all_mc, reco_mc, reco)
        * corrected profiles is given by rho(alpha) = c(alpha)*i(alpha)*rho(alpha) where c(alpha)
            is the response function and i(alpha) is the inefficiency function
        * statistical errors are provided by the density estimator
        * total errors are given by sum in quadrature of statistical errors and
          systematic errors
        """
        # Set the upstream statistical uncertainty to 0 as it is the given
        # profile to which to compare the downstream profile. The downstream
        # statistical uncertainties are set in self.make_profiles()
        data = self.density_data[typ]
        data["us"]["levels_stat_errors"] = [0. for bin in range(self.npoints)]

        # Do the correction for each of the tracker locations
        for loc in self.locations:
            print "Doing density level correction for", typ, loc
        source = self.density_data
            levels = self.do_corrections(typ, loc, source)
            data[loc]["corrected_levels"] = levels

        # Evaluate the systematic uncertainties for each of the tracker locations
        for loc in self.locations:
            print "Finding density systematic errors for", typ, loc
            reco_syst_list = self.calculate_detector_systematics(typ, loc)
            perf_syst_list = self.calculate_performance_systematics(typ, loc)
            syst_error_list = [(reco_syst_list[i]**2+perf_syst_list[i]**2)**0.5 \
                                                  for i in range(self.npoints)]
            data[loc]["levels_syst_errors"] = syst_error_list

        self.density_data[typ] = data

    def do_corrections(self, typ, loc, source, use_capped = True):
        """
        Applies the corrections to the requested density profile
        Only applies response correction to the reconstructed sample
        * typ specifies the type of data (all_mc, reco_mc, reco)
        * loc specifies the location of the tracker (us, ds)
        * source specifies the source of the corrections to be used
        * Use capped corrections if use_capped is True
        """
        levels = np.array(source[typ][loc]["levels"])
        corr_key = "level_ratio"
        if use_capped:
            corr_key = "level_ratio_capped"
        if typ == "reco":
            response = np.array(source["response"][loc][corr_key])
            levels = levels*response
        if typ == "reco" or typ == "reco_mc":
            inefficiency = np.array(source["inefficiency"][loc][corr_key])
            levels = levels*inefficiency

        return levels.tolist()

    def calculate_detector_systematics(self, typ, loc):
        """
        Calculate the systematic errors in the reconstruction of tracks. The uncertainty
        on each level corresponds to the sum in quadrature of the residuals between the reference
        reconstruction set and the data sets that are shifted from the reference set
        * typ specifies the type of data (all_mc, reco_mc, reco)
        * loc specifies the location of the tracker (us, ds)
        """
        # If there is no reference specified, skip
        data = self.density_data[typ]
        syst_error_list = [0. for i in range(self.npoints)]
        if data["detector_reference"] == None:
            return syst_error_list

        print "\nEvaluating density reconstruction systematic errors", loc

        # Correct the density profile with the reference corrections
        source = data["detector_reference"]
        ref_levels = self.do_corrections(typ, loc, source)

        # Loop over the detector systematics list
        systematics_list = data[loc]["detector_systematics"]
        for i, source in enumerate(systematics_list):
            # Evaluate the levels with the corresponding systematic shift
            syst_levels = self.do_corrections(typ, loc, source)

            # Initialize a graph that contains the deviation from the reference
            name = self.get_syst_name(source["source"])
            if self.config_anal["density_systematics_draw"]:
                self.syst_graphs[typ][loc][name] = ROOT.TGraph(self.npoints)

            # Add in quadrature an uncertainty that corresponds to the level shift due
            # to the use of a different set of corrections
            scale = source["scale"]
            for j in range(self.npoints):
                err = (syst_levels[j] - ref_levels[j])*scale
                syst_error_list[j] = (syst_error_list[j]**2+err**2)**0.5

            if self.config_anal["density_systematics_draw"]:
                alpha = (float(j+1.)/(self.npoints+1.))
                val = 0.
                if ref_levels[j] > 0:
                    val = err/ref_levels[j]
                self.syst_graphs[typ][loc][name].SetPoint(j, alpha, val)

        return syst_error_list

    def get_syst_name(self, path):
        """
        Convert systematic path to a systematic name
        """
        suffix = path.split("Systematics_",1)[1]
        name = suffix.split("/")[0]
        return name

    def calculate_performance_systematics(self, typ, loc):
        """
        Calculate the systematic errors in the channel performance. The experiment measures
        ratios of downstream density levels over upstream density levels.
        The uncertainty is evaluated as the shifts in the downstream density profile for
        variable deviations from the expected cooling channel performance.
        * typ specifies the type of data (all_mc, reco_mc, reco)
        * loc specifies the location of the tracker (us, ds)
        """
        # If there is no reference specified, skip
        data = self.density_data[typ]
        syst_error_list = [0. for i in range(self.npoints)]
        if data["performance_reference"] == None:
            return syst_error_list

        print "\nEvaluating density performance systematic errors", loc

        # Get the reference ratio array
        source = data["performance_reference"]
        ref_ratio = np.array(source[typ]["ds"]["levels"])/np.array(source[typ]["us"]["levels"])
        ref_ratio = ref_ratio.tolist()

        # Loop over the performance systematics list
        systematics_list = data[loc]["performance_systematics"]
        ratio_error_list = [0. for i in range(self.npoints)]
        for i, source in enumerate(systematics_list):
            # Evaluate the ratio with the corresponding systematic shift
            syst_ratio = np.array(source[typ]["ds"]["levels"])/np.array(source[typ]["us"]["levels"])
            syst_ratio = syst_ratio.tolist()

            # Initialize a graph that contains the deviation from the reference
            name = self.get_syst_name(source["source"])
            if self.config_anal["density_systematics_draw"]:
                self.syst_graphs[typ][loc][name] = ROOT.TGraph(self.npoints)

            # Add in quadrature an uncertainty that corresponds to the ratio shift due
            # to the use of a different cooling channel
            scale = source["scale"]
            for j in range(self.npoints):
                err = (syst_ratio[j] - ref_ratio[j])*scale
                ratio_error_list[j] = (ratio_error_list[j]**2+err**2)**0.5

                if self.config_anal["density_systematics_draw"]:
                    alpha = (float(j+1.)/(self.npoints+1.))
                    self.syst_graphs[typ][loc][name].SetPoint(j, alpha, err)

        # Convert the uncertainties in terms of density
        ref_levels_us = data["us"]["levels"]
        for i in range(self.npoints):
            syst_error_list[i] = ratio_error_list[i] * ref_levels_us[i]

        return syst_error_list

    def draw_systematics(self):
        """
        Draws the systematic errors. The uncertainty on each level corresponds to the
        residuals between the reference reconstruction set and the data sets that
        are shifted from the reference set
        * typ specifies the type of data (all_mc, reco_mc, reco)
        * loc specifies the location of the tracker (us, ds)
        """
        # Feed the systematics graphs to the drawer
        for typ in self.data_types:
            for loc in self.locations:
                print typ, loc, len(self.syst_graphs[typ][loc])
                if len(self.syst_graphs[typ][loc]):
                    plotter = DensityPlotter(self.plot_dir, typ+"_"+loc)
                    plotter.plot_systematics(self.syst_graphs[typ][loc])

    def draw_profiles(self):
        """
        Produce plots that compare the density profiles upstream and downstream
        of the absorber. Produce one for each category of data
        """
        for typ in self.data_types:
            # Skip if no data
            if not len(self.density_data[typ]["us"]["levels"]):
                continue

            # Initialize the graphs
            graphs = {}
            graphs_full = {}
            for loc in self.locations:
                graphs[loc] = self.make_graph(typ, loc, True, False)
                graphs_full[loc] = self.make_graph(typ, loc, True, True)

            # Print up/down comparison
            canvas_name = 'density_profile_%s' % typ
            canvas = self.get_plot(canvas_name)["pad"]

            mg = self.make_multigraph(typ, graphs, graphs_full, "density_profile")
            leg = self.make_multigraph_legend(graphs)
            for fmt in ["pdf", "png", "root"]:
                canvas.Print(self.plot_dir+"/"+canvas_name+"."+fmt)

            # Print ratios
            canvas_name = 'density_ratio_%s' % typ
            canvas = self.get_plot(canvas_name)["pad"]

            gratio = self.make_ratio(typ, graphs, graphs_full, "density_ratio")
            for fmt in ["pdf", "png", "root"]:
                canvas.Print(self.plot_dir+"/"+canvas_name+"."+fmt)

    def make_graph(self, typ, loc, include_corr, include_syst):
        """
        Builds a TGraphErrors for the requested data type and location
        * typ specifies the type of data (all_mc, reco_mc, reco)
        * loc specifies the location of the tracker (us, ds)
        * include_corr is True if the corrected levels are to be represented
        * include_syst is True if the systematic uncertainty is to be includes
        """
        level_type = "levels"
        if include_corr:
            level_type = "corrected_levels"

        graph = ROOT.TGraphErrors(self.npoints)
        for i in range(self.npoints):
            alpha = (float(i+1.)/(self.npoints+1.))
            value = self.density_data[typ][loc][level_type][i]
            graph.SetPoint(i, alpha, value)
            all_err = self.density_data[typ][loc]["levels_stat_errors"][i]
            if include_syst and value > 0.:
                syst_err = self.density_data[typ][loc]["levels_syst_errors"][i]
                all_err = (syst_err**2+all_err**2)**0.5
                graph.SetPointError(i, 0., all_err)

        color = {"us":1,"ds":4}[loc]
        graph.SetLineColor(color)
        graph.SetFillColorAlpha(color, .25)

        return graph

    def make_multigraph(self, typ, graphs, graphs_full, name):
        """
        Initializes a multigraph, draws it
        * typ specifies the type of data (all_mc, reco_mc, reco)
        * graphs is a dictionary containing the up and downstream graphs with stat errors
        * graphs_full is a dictionary containing the up and downstream graphs with full errors
        * name of the type of graph being output
        """

        mg = ROOT.TMultiGraph(name+"_"+typ, ";Fraction #alpha;#rho_{#alpha} [mm^{-2}(MeV/c)^{-2}]");
        for loc in self.locations:
            mg.Add(graphs[loc], "LE3")
            mg.Add(graphs_full[loc], "LE3")
            self.plots[name+"_"+typ]["graphs"][loc] = graphs[loc]
            self.plots[name+"_"+typ]["graphs"][loc+"_full"] = graphs_full[loc]

        mg.Draw("A")
        return mg

    def make_multigraph_legend(self, graphs):
        """
        Initializes a multigraph legend, draws it
        * graphs is a dictionary containing the up and downstream graphs
        """

        leg = ROOT.TLegend(.6, .65, .8, .85)
        leg.AddEntry(graphs["us"], "Upstream", "LF")
        leg.AddEntry(graphs["ds"], "Downstream", "LF")
        leg.Draw("SAME")
        return leg

    def make_ratio(self, typ, graphs, graphs_full, name):
        """
        Initializes a graph ratio, draws it
        * typ specifies the type of data (all_mc, reco_mc, reco)
        * graphs is a dictionary containing the up and downstream graphs with stat errors
        * graphs_full is a dictionary containing the up and downstream graphs with full errors
        * name of the type of graph being output
        """
        gratio = ROOT.TGraphErrors(self.npoints)
        gratio_full = ROOT.TGraphErrors(self.npoints)
        gratio_full.SetTitle(";Fraction #alpha;#rho_{#alpha}^{d} /#rho_{#alpha}^{u}")
        for i in range(self.npoints):
            ratio = graphs["ds"].GetY()[i]/graphs["us"].GetY()[i]
            gratio.GetX()[i] = graphs["us"].GetX()[i]
            gratio.GetEX()[i] = graphs["us"].GetEX()[i]
            gratio.GetY()[i] = ratio
            gratio.GetEY()[i] = 0.
            if graphs["ds"].GetY()[i] > 0.:
                gratio.GetEY()[i] = ratio*graphs["ds"].GetEY()[i]/graphs["ds"].GetY()[i]

                gratio_full.GetX()[i] = gratio.GetX()[i]
                gratio_full.GetEX()[i] = gratio.GetEX()[i]
                gratio_full.GetY()[i] = ratio
                gratio_full.GetEY()[i] = 0.
                if graphs["ds"].GetY()[i] > 0.:
                    us_rel_err = graphs_full["us"].GetEY()[i]/graphs_full["us"].GetY()[i]
                    ds_rel_err = graphs_full["ds"].GetEY()[i]/graphs_full["ds"].GetY()[i]
                    gratio_full.GetEY()[i] = ratio*(us_rel_err**2 + ds_rel_err**2)**0.5

        self.plots[name+"_"+typ]["graphs"]["ratio"] = gratio
        self.plots[name+"_"+typ]["graphs"]["ratio_full"] = gratio_full

        gratio.SetLineColor(1)
        gratio.SetFillColorAlpha(1, .25)
        gratio_full.SetLineColor(1)
        gratio_full.SetFillColorAlpha(1, .25)

        gratio_full.Draw("ALE3")
        gratio.Draw("LE3 SAME")
        return gratio, gratio_full

    def save(self):
        """
        Saves the data dictionary to a json file
        """
        fout = open(self.plot_dir+"/density.json", "w")
        print >> fout, json.dumps(self.density_data, sort_keys=True, indent=4)

    def set_corrections(self):
        """
        Calculate the profile corrections, i.e. the inefficiency and response function
        * uses the Monte Carlo to generate corrections
        """
        for loc in self.locations:
            # Initialize the data
            all_mc_levels = self.density_data["all_mc"][loc]["levels"]
            reco_mc_levels = self.density_data["reco_mc"][loc]["levels"]
            reco_levels = self.density_data["reco"][loc]["levels"]

            # Calculate the corrections
            inefficiency, response = [], []
            for i in range(len(all_mc_levels)):
                # Inherent detector inefficiency
                if reco_mc_levels[i] == 0:
                    inefficiency.append(1.)
                else:
                    inefficiency.append(float(all_mc_levels[i])/reco_mc_levels[i])

                # Detector response function
                if reco_levels[i] == 0:
                    response.append(1.)
                else:
                    response.append(float(reco_mc_levels[i])/reco_levels[i])

            # Produce a capped version of the corrections
            cutoff = self.config_anal["density_corrections_cutoff"]
            cutoff_index = int(cutoff*(self.npoints+1.))
            ineff_cap = all_mc_levels[cutoff_index]/reco_mc_levels[cutoff_index]
            resp_cap = reco_mc_levels[cutoff_index]/reco_levels[cutoff_index]

            inefficiency_capped = copy.deepcopy(inefficiency)
            response_capped = copy.deepcopy(response)
            for i in range(cutoff_index, len(inefficiency_capped)):
                inefficiency_capped[i] = ineff_cap
                response_capped[i] = resp_cap

            # Store the correction factors
            self.density_data["inefficiency"][loc] = {
                "level_ratio":inefficiency,
                "level_ratio_capped":inefficiency_capped,
            }
            self.density_data["response"][loc] = {
                "level_ratio":response,
                "level_ratio_capped":response_capped,
            }

    def draw_corrections(self):
        """
        Draw the correction factors used
        """
        for loc in self.locations:
            inefficiency = self.density_data["inefficiency"][loc]["level_ratio"]
            response = self.density_data["response"][loc]["level_ratio"]
            plotter = DensityPlotter(self.plot_dir, loc)
            plotter.plot_corrections(inefficiency, response)

            inefficiency = self.density_data["inefficiency"][loc]["level_ratio_capped"]
            response = self.density_data["response"][loc]["level_ratio_capped"]
            plotter = DensityPlotter(self.plot_dir, "capped_"+loc)
            plotter.plot_corrections(inefficiency, response)

    def clear_density_data(self):
        """
        Initializes the dictionary that is used to store
        data and make corrections down the line
        """
        self.density_data = {
          "inefficiency":{
              "us":{
                  "level_ratio":[1. for i in range(self.npoints)],
                  "level_ratio_capped":[1. for i in range(self.npoints)]
              },
              "ds":{
                  "level_ratio":[1. for i in range(self.npoints)],
                  "level_ratio_capped":[1. for i in range(self.npoints)]
              },
          },
          "response":{
              "us":{
                  "level_ratio":[1. for i in range(self.npoints)],
                  "level_ratio_capped":[1. for i in range(self.npoints)]
              },
              "ds":{
                  "level_ratio":[1. for i in range(self.npoints)],
                  "level_ratio_capped":[1. for i in range(self.npoints)]
              },
          },
          "source":"",
          "scale":1.,
      "npoints":0
        }

        level_data = {
            "performance_reference":None,
            "detector_reference":None,
            "us":{
                "levels":[],
                "corrected_levels":[],
                "levels_stat_errors":[],
                "levels_syst_errors":[],
                "detector_systematics":[],
                "performance_systematics":[],
            },
            "ds":{
                "levels":[],
                "corrected_levels":[],
                "levels_stat_errors":[],
                "levels_syst_errors":[],
                "detector_systematics":[],
                "performance_systematics":[],
            }
        }

        for typ in self.data_types:
            self.density_data[typ] = copy.deepcopy(level_data)

    def load_errors(self):
        """
        Two "classes" of systematic errors;
        * systematic errors on the reconstruction are contained in the
          correction factors. For these we store the correction factors and
          compare to the reference correction factors
        * systematic errors on the performance are contained in the actual
          density profile. For these we store the point-by-point fractional
          difference between the density profile and reference.
        """
        # If the corrections are to calculated in this analysis, skip this step
        if self.calculate_corrections:
            return

        # Set base correction factors
        self.load_corrections(self.config_anal["density_corrections"])

        # Load systematic uncertainties
        systematics = self.config_anal["density_systematics"]
        for typ in systematics:
            print "Loading density systematic errors for", typ
            if typ not in self.density_data:
                self.density_data[typ] = {}
            for ref_key in ["detector_reference", "performance_reference"]:
                ref_src = systematics[typ][ref_key]
                if ref_src == None:
                    self.density_data[typ][ref_key] = None
                else:
                    self.density_data[typ][ref_key] = \
                                              self.load_one_error(ref_src, None)
                print "  Loaded reference", typ, ref_key, ref_src, \
                                          type(self.density_data[typ][ref_key])
            for loc in ["us", "ds"]:
                if loc not in self.density_data[typ]:
                    self.density_data[typ][loc] = {}
                for key in ["detector_systematics", "performance_systematics"]:
                    print "SYSTEMATICS", typ, loc, key
                    print "       KEYS", systematics[typ].keys()
                    err_src_dict = systematics[typ][loc][key]
                    self.density_data[typ][loc][key] = [
                        self.load_one_error(err_src, scale) \
                              for err_src, scale in err_src_dict.iteritems()
                    ]
                    print "  Loaded", len(self.density_data[typ][loc][key]), loc, key

    def load_corrections(self, file_name):
        """
        Load the density corrections to be applied during this density
        analysis. Loads the correction factors
        """
        fin = open(file_name)
        density_str = fin.read()
        src_density = json.loads(density_str)
        src_density["source"] = file_name
        self.density_data["inefficiency"] = src_density["inefficiency"]
        self.density_data["response"] = src_density["response"]

    def load_one_error(self, file_name, scale):
        """
        Load the density analysis output for a given uncertainty source
        """
        fin = open(file_name)
        density_str = fin.read()
        density = json.loads(density_str)
        density["source"] = file_name
        density["scale"] = scale
        return density