Esempio n. 1
0
    def _analyse_delphes_sample(
        self,
        delete_delphes_files,
        delphes_file,
        generator_truth,
        is_background,
        k_factor,
        lhe_file,
        lhe_file_for_weights,
        parse_lhe_events_as_xml,
        reference_benchmark,
        sampling_benchmark,
        weight_labels,
        sample_syst_names,
    ):
        # Relevant systematics
        systematics_used = OrderedDict()
        if sample_syst_names is None:
            sample_syst_names = []
        for key in sample_syst_names:
            systematics_used[key] = self.systematics[key]

        if len(systematics_used) > 0 and lhe_file_for_weights is None:
            raise NotImplementedError(
                "Systematic uncertainties are currently only supported when the weights"
                " are extracted from the LHE file (instead of the HepMC / Delphes ROOT"
                " file). Please use the keyword lhe_filename when calling add_sample()."
            )

        # Read systematics setup from LHE file
        logger.debug("Extracting nuisance parameter definitions from LHE file")
        systematics_dict = extract_nuisance_parameters_from_lhe_file(
            lhe_file, systematics_used)
        logger.debug("systematics_dict: %s", systematics_dict)
        # systematics_dict has structure
        # {systematics_name : {nuisance_parameter_name : ((benchmark0, weight0), (benchmark1, weight1), processing)}}

        # Store nuisance parameters
        for systematics_name, nuisance_info in six.iteritems(systematics_dict):
            for nuisance_parameter_name, ((benchmark0, weight0), (benchmark1,
                                                                  weight1),
                                          _) in six.iteritems(nuisance_info):
                if (self.nuisance_parameters is not None
                        and nuisance_parameter_name in self.nuisance_parameters
                        and (systematics_name, benchmark0, benchmark1) !=
                        self.nuisance_parameters[nuisance_parameter_name]):
                    raise RuntimeError(
                        "Inconsistent information for same nuisance parameter {}. Old: {}. New: {}."
                        .format(
                            nuisance_parameter_name,
                            self.nuisance_parameters[nuisance_parameter_name],
                            (systematics_name, benchmark0, benchmark1),
                        ))
                self.nuisance_parameters[nuisance_parameter_name] = (
                    systematics_name, benchmark0, benchmark1)

        # Calculate observables and weights in Delphes ROOT file
        this_observations, this_weights, cut_filter = parse_delphes_root_file(
            delphes_file,
            self.observables,
            self.observables_required,
            self.observables_defaults,
            self.cuts,
            self.cuts_default_pass,
            weight_labels,
            use_generator_truth=generator_truth,
            delete_delphes_sample_file=delete_delphes_files,
            acceptance_eta_max_a=self.acceptance_eta_max_a,
            acceptance_eta_max_e=self.acceptance_eta_max_e,
            acceptance_eta_max_mu=self.acceptance_eta_max_mu,
            acceptance_eta_max_j=self.acceptance_eta_max_j,
            acceptance_pt_min_a=self.acceptance_pt_min_a,
            acceptance_pt_min_e=self.acceptance_pt_min_e,
            acceptance_pt_min_mu=self.acceptance_pt_min_mu,
            acceptance_pt_min_j=self.acceptance_pt_min_j,
        )
        # No events found?
        if this_observations is None:
            logger.warning(
                "No remaining events in this Delphes file, skipping it")
            return None, None, None

        if this_weights is not None:
            logger.debug("Found weights %s in Delphes file",
                         list(this_weights.keys()))
        else:
            logger.debug("Did not extract weights from Delphes file")

        # Sanity checks
        n_events = self._check_sample_observations(this_observations)

        # Find weights in LHE file
        if lhe_file_for_weights is not None:
            logger.debug("Extracting weights from LHE file")
            _, this_weights = parse_lhe_file(
                filename=lhe_file_for_weights,
                sampling_benchmark=sampling_benchmark,
                benchmark_names=self.benchmark_names_phys,
                observables=OrderedDict(),
                parse_events_as_xml=parse_lhe_events_as_xml,
                systematics_dict=systematics_dict,
                is_background=is_background,
            )

            logger.debug("Found weights %s in LHE file",
                         list(this_weights.keys()))

            # Apply cuts
            logger.debug("Applying Delphes-based cuts to LHE weights")
            for key, weights in six.iteritems(this_weights):
                this_weights[key] = weights[cut_filter]

        if this_weights is None:
            raise RuntimeError(
                "Could not extract weights from Delphes ROOT file or LHE file."
            )

        # Sanity checks
        n_events = self._check_sample_weights(n_events, this_weights)

        # k factors
        if k_factor is not None:
            for key in this_weights:
                this_weights[key] = k_factor * this_weights[key]
        # Background scenario: we only have one set of weights, but these should be true for all benchmarks

        if is_background:
            logger.debug("Sample is background")
            benchmarks_weight = list(six.itervalues(this_weights))[0]

            for benchmark_name in self.benchmark_names_phys:
                this_weights[benchmark_name] = benchmarks_weight

        # Rescale nuisance parameters to reference benchmark
        reference_weights = this_weights[reference_benchmark]
        sampling_weights = this_weights[sampling_benchmark]
        for key in this_weights:
            if key not in self.benchmark_names_phys:  # Only rescale nuisance benchmarks
                this_weights[
                    key] = reference_weights / sampling_weights * this_weights[
                        key]

        return this_observations, this_weights, n_events
    def _analyse_delphes_sample(
        self,
        delete_delphes_files,
        delphes_file,
        generator_truth,
        is_background,
        k_factor,
        lhe_file,
        lhe_file_for_weights,
        parse_lhe_events_as_xml,
        reference_benchmark,
        sampling_benchmark,
        weight_labels,
    ):
        # Read systematics setup from LHE file
        logger.debug("Extracting nuisance parameter definitions from LHE file")
        nuisance_parameters = extract_nuisance_parameters_from_lhe_file(lhe_file, self.systematics)
        logger.debug("Found %s nuisance parameters with matching benchmarks:", len(nuisance_parameters))
        for key, value in six.iteritems(nuisance_parameters):
            logger.debug("  %s: %s", key, value)

        # Compare to existing data
        if self.nuisance_parameters is None:
            self.nuisance_parameters = nuisance_parameters
        else:
            if dict(self.nuisance_parameters) != dict(nuisance_parameters):
                raise RuntimeError(
                    "Different LHE files have different definitions of nuisance parameters / benchmarks!\n"
                    "Previous: {}\nNew:{}".format(self.nuisance_parameters, nuisance_parameters)
                )

        # Calculate observables and weights in Delphes ROOT file
        this_observations, this_weights, cut_filter = parse_delphes_root_file(
            delphes_file,
            self.observables,
            self.observables_required,
            self.observables_defaults,
            self.cuts,
            self.cuts_default_pass,
            weight_labels,
            use_generator_truth=generator_truth,
            delete_delphes_sample_file=delete_delphes_files,
            acceptance_eta_max_a=self.acceptance_eta_max_a,
            acceptance_eta_max_e=self.acceptance_eta_max_e,
            acceptance_eta_max_mu=self.acceptance_eta_max_mu,
            acceptance_eta_max_j=self.acceptance_eta_max_j,
            acceptance_pt_min_a=self.acceptance_pt_min_a,
            acceptance_pt_min_e=self.acceptance_pt_min_e,
            acceptance_pt_min_mu=self.acceptance_pt_min_mu,
            acceptance_pt_min_j=self.acceptance_pt_min_j,
        )
        # No events found?
        if this_observations is None:
            logger.debug("No observations in this Delphes file, skipping it")
            return None, None, None

        if this_weights is not None:
            logger.debug("Found weights %s in Delphes file", list(this_weights.keys()))
        else:
            logger.debug("Did not extract weights from Delphes file")

        # Sanity checks
        n_events = self._check_sample_observations(this_observations)

        # Find weights in LHE file
        if lhe_file_for_weights is not None:
            logger.debug("Extracting weights from LHE file")
            _, this_weights = parse_lhe_file(
                filename=lhe_file_for_weights,
                sampling_benchmark=sampling_benchmark,
                observables=OrderedDict(),
                parse_events_as_xml=parse_lhe_events_as_xml,
            )

            logger.debug("Found weights %s in LHE file", list(this_weights.keys()))

            # Apply cuts
            logger.debug("Applying Delphes-based cuts to LHE weights")
            for key, weights in six.iteritems(this_weights):
                this_weights[key] = weights[cut_filter]

        if this_weights is None:
            raise RuntimeError("Could not extract weights from Delphes ROOT file or LHE file.")

        # Sanity checks
        n_events = self._check_sample_weights(n_events, this_weights)

        # k factors
        if k_factor is not None:
            for key in this_weights:
                this_weights[key] = k_factor * this_weights[key]
        # Background scenario: we only have one set of weights, but these should be true for all benchmarks

        if is_background:
            logger.debug("Sample is background")
            benchmarks_weight = list(six.itervalues(this_weights))[0]

            for benchmark_name in self.benchmark_names_phys:
                this_weights[benchmark_name] = benchmarks_weight

        # Rescale nuisance parameters to reference benchmark
        reference_weights = this_weights[reference_benchmark]
        sampling_weights = this_weights[sampling_benchmark]
        for key in this_weights:
            if key not in self.benchmark_names_phys:  # Only rescale nuisance benchmarks
                this_weights[key] = reference_weights / sampling_weights * this_weights[key]

        return this_observations, this_weights, n_events
Esempio n. 3
0
    def analyse_delphes_samples(
        self, generator_truth=False, delete_delphes_files=False, reference_benchmark=None, parse_lhe_events_as_xml=True
    ):
        """
        Main function that parses the Delphes samples (ROOT files), checks acceptance and cuts, and extracts
        the observables and weights.

        Parameters
        ----------
        generator_truth : bool, optional
            If True, the generator truth information (as given out by Pythia) will be parsed. Detector resolution or
            efficiency effects will not be taken into account.

        delete_delphes_files : bool, optional
            If True, the Delphes ROOT files will be deleted after extracting the information from them. Default value:
            False.

        reference_benchmark : str or None, optional
            The weights at the nuisance benchmarks will be rescaled to some reference theta benchmark:
            `dsigma(x|theta_sampling(x),nu) -> dsigma(x|theta_ref,nu) = dsigma(x|theta_sampling(x),nu)
            * dsigma(x|theta_ref,0) / dsigma(x|theta_sampling(x),0)`. This sets the name of the reference benchmark.
            If None, the first one will be used. Default value: None.

        parse_lhe_events_as_xml : bool, optional
            Decides whether the LHE events are parsed with an XML parser (more robust, but slower) or a text parser
            (less robust, faster). Default value: True.

        Returns
        -------
            None

        """

        # Input
        if reference_benchmark is None:
            reference_benchmark = self.benchmark_names_phys[0]
        self.reference_benchmark = reference_benchmark

        # Reset observations
        self.observations = None
        self.weights = None
        self.nuisance_parameters = None

        for (
            delphes_file,
            weight_labels,
            is_background,
            sampling_benchmark,
            lhe_file,
            lhe_file_for_weights,
            k_factor,
        ) in zip(
            self.delphes_sample_filenames,
            self.hepmc_sample_weight_labels,
            self.hepmc_is_backgrounds,
            self.hepmc_sampled_from_benchmark,
            self.lhe_sample_filenames,
            self.lhe_sample_filenames_for_weights,
            self.sample_k_factors,
        ):
            logger.info("Analysing Delphes sample %s", delphes_file)

            # Read systematics setup from LHE file
            logger.debug("Extracting nuisance parameter definitions from LHE file")
            nuisance_parameters = extract_nuisance_parameters_from_lhe_file(lhe_file, self.systematics)
            logger.debug("Found %s nuisance parameters with matching benchmarks:", len(nuisance_parameters))
            for key, value in six.iteritems(nuisance_parameters):
                logger.debug("  %s: %s", key, value)

            # Compare to existing data
            if self.nuisance_parameters is None:
                self.nuisance_parameters = nuisance_parameters
            else:
                if dict(self.nuisance_parameters) != dict(nuisance_parameters):
                    raise RuntimeError(
                        "Different LHE files have different definitions of nuisance parameters / benchmarks!\n"
                        "Previous: {}\nNew:{}".format(self.nuisance_parameters, nuisance_parameters)
                    )

            # Calculate observables and weights in Delphes ROOT file
            this_observations, this_weights, cut_filter = parse_delphes_root_file(
                delphes_file,
                self.observables,
                self.observables_required,
                self.observables_defaults,
                self.cuts,
                self.cuts_default_pass,
                weight_labels,
                use_generator_truth=generator_truth,
                delete_delphes_sample_file=delete_delphes_files,
                acceptance_eta_max_a=self.acceptance_eta_max_a,
                acceptance_eta_max_e=self.acceptance_eta_max_e,
                acceptance_eta_max_mu=self.acceptance_eta_max_mu,
                acceptance_eta_max_j=self.acceptance_eta_max_j,
                acceptance_pt_min_a=self.acceptance_pt_min_a,
                acceptance_pt_min_e=self.acceptance_pt_min_e,
                acceptance_pt_min_mu=self.acceptance_pt_min_mu,
                acceptance_pt_min_j=self.acceptance_pt_min_j,
            )

            # No events found?
            if this_observations is None:
                logger.debug("No observations in this Delphes file, skipping it")
                continue

            if this_weights is not None:
                logger.debug("Found weights %s in Delphes file", list(this_weights.keys()))
            else:
                logger.debug("Did not extract weights from Delphes file")

            # Check number of events in observables
            n_events = None
            for key, obs in six.iteritems(this_observations):
                this_n_events = len(obs)
                if n_events is None:
                    n_events = this_n_events
                    logger.debug("Found %s events", n_events)

                if this_n_events != n_events:
                    raise RuntimeError(
                        "Mismatching number of events in Delphes observations for {}: {} vs {}".format(
                            key, n_events, this_n_events
                        )
                    )

            # Find weights in LHE file
            if lhe_file_for_weights is not None:
                logger.debug("Extracting weights from LHE file")
                _, this_weights = parse_lhe_file(
                    filename=lhe_file_for_weights,
                    sampling_benchmark=sampling_benchmark,
                    observables=OrderedDict(),
                    parse_events_as_xml=parse_lhe_events_as_xml,
                )

                logger.debug("Found weights %s in LHE file", list(this_weights.keys()))

                # Apply cuts
                logger.debug("Applying Delphes-based cuts to LHE weights")
                for key, weights in six.iteritems(this_weights):
                    this_weights[key] = weights[cut_filter]

            if this_weights is None:
                raise RuntimeError("Could not extract weights from Delphes ROOT file or LHE file.")

            # Check number of events in weights
            for key, weights in six.iteritems(this_weights):
                this_n_events = len(weights)
                if n_events is None:
                    n_events = this_n_events
                    logger.debug("Found %s events", n_events)

                if this_n_events != n_events:
                    raise RuntimeError(
                        "Mismatching number of events in weights {}: {} vs {}".format(key, n_events, this_n_events)
                    )

            # k factors
            if k_factor is not None:
                for key in this_weights:
                    this_weights[key] = k_factor * this_weights[key]

            # Background scenario: we only have one set of weights, but these should be true for all benchmarks
            if is_background:
                logger.debug("Sample is background")
                benchmarks_weight = list(six.itervalues(this_weights))[0]

                for benchmark_name in self.benchmark_names_phys:
                    this_weights[benchmark_name] = benchmarks_weight

            # Rescale nuisance parameters to reference benchmark
            reference_weights = this_weights[reference_benchmark]
            sampling_weights = this_weights[sampling_benchmark]

            for key in this_weights:
                if key not in self.benchmark_names_phys:  # Only rescale nuisance benchmarks
                    this_weights[key] = reference_weights / sampling_weights * this_weights[key]

            # First results
            if self.observations is None and self.weights is None:
                self.observations = this_observations
                self.weights = this_weights
                continue

            # Following results: check consistency with previous results
            if len(self.weights) != len(this_weights):
                raise ValueError(
                    "Number of weights in different files incompatible: {} vs {}".format(
                        len(self.weights), len(this_weights)
                    )
                )
            if len(self.observations) != len(this_observations):
                raise ValueError(
                    "Number of observations in different Delphes files incompatible: {} vs {}".format(
                        len(self.observations), len(this_observations)
                    )
                )

            # Merge results with previous
            for key in self.weights:
                assert key in this_weights, "Weight label {} not found in sample!".format(key)
                self.weights[key] = np.hstack([self.weights[key], this_weights[key]])

            for key in self.observations:
                assert key in this_observations, "Observable {} not found in Delphes sample!".format(key)
                self.observations[key] = np.hstack([self.observations[key], this_observations[key]])