def _analyse_delphes_sample( self, delete_delphes_files, delphes_file, generator_truth, is_background, k_factor, lhe_file, lhe_file_for_weights, parse_lhe_events_as_xml, reference_benchmark, sampling_benchmark, weight_labels, ): # Read systematics setup from LHE file logger.debug("Extracting nuisance parameter definitions from LHE file") nuisance_parameters = extract_nuisance_parameters_from_lhe_file(lhe_file, self.systematics) logger.debug("Found %s nuisance parameters with matching benchmarks:", len(nuisance_parameters)) for key, value in six.iteritems(nuisance_parameters): logger.debug(" %s: %s", key, value) # Compare to existing data if self.nuisance_parameters is None: self.nuisance_parameters = nuisance_parameters else: if dict(self.nuisance_parameters) != dict(nuisance_parameters): raise RuntimeError( "Different LHE files have different definitions of nuisance parameters / benchmarks!\n" "Previous: {}\nNew:{}".format(self.nuisance_parameters, nuisance_parameters) ) # Calculate observables and weights in Delphes ROOT file this_observations, this_weights, cut_filter = parse_delphes_root_file( delphes_file, self.observables, self.observables_required, self.observables_defaults, self.cuts, self.cuts_default_pass, weight_labels, use_generator_truth=generator_truth, delete_delphes_sample_file=delete_delphes_files, acceptance_eta_max_a=self.acceptance_eta_max_a, acceptance_eta_max_e=self.acceptance_eta_max_e, acceptance_eta_max_mu=self.acceptance_eta_max_mu, acceptance_eta_max_j=self.acceptance_eta_max_j, acceptance_pt_min_a=self.acceptance_pt_min_a, acceptance_pt_min_e=self.acceptance_pt_min_e, acceptance_pt_min_mu=self.acceptance_pt_min_mu, acceptance_pt_min_j=self.acceptance_pt_min_j, ) # No events found? if this_observations is None: logger.debug("No observations in this Delphes file, skipping it") return None, None, None if this_weights is not None: logger.debug("Found weights %s in Delphes file", list(this_weights.keys())) else: logger.debug("Did not extract weights from Delphes file") # Sanity checks n_events = self._check_sample_observations(this_observations) # Find weights in LHE file if lhe_file_for_weights is not None: logger.debug("Extracting weights from LHE file") _, this_weights = parse_lhe_file( filename=lhe_file_for_weights, sampling_benchmark=sampling_benchmark, observables=OrderedDict(), parse_events_as_xml=parse_lhe_events_as_xml, ) logger.debug("Found weights %s in LHE file", list(this_weights.keys())) # Apply cuts logger.debug("Applying Delphes-based cuts to LHE weights") for key, weights in six.iteritems(this_weights): this_weights[key] = weights[cut_filter] if this_weights is None: raise RuntimeError("Could not extract weights from Delphes ROOT file or LHE file.") # Sanity checks n_events = self._check_sample_weights(n_events, this_weights) # k factors if k_factor is not None: for key in this_weights: this_weights[key] = k_factor * this_weights[key] # Background scenario: we only have one set of weights, but these should be true for all benchmarks if is_background: logger.debug("Sample is background") benchmarks_weight = list(six.itervalues(this_weights))[0] for benchmark_name in self.benchmark_names_phys: this_weights[benchmark_name] = benchmarks_weight # Rescale nuisance parameters to reference benchmark reference_weights = this_weights[reference_benchmark] sampling_weights = this_weights[sampling_benchmark] for key in this_weights: if key not in self.benchmark_names_phys: # Only rescale nuisance benchmarks this_weights[key] = reference_weights / sampling_weights * this_weights[key] return this_observations, this_weights, n_events
def _analyse_delphes_sample( self, delete_delphes_files, delphes_file, generator_truth, is_background, k_factor, lhe_file, lhe_file_for_weights, parse_lhe_events_as_xml, reference_benchmark, sampling_benchmark, weight_labels, sample_syst_names, ): # Relevant systematics systematics_used = OrderedDict() if sample_syst_names is None: sample_syst_names = [] for key in sample_syst_names: systematics_used[key] = self.systematics[key] if len(systematics_used) > 0 and lhe_file_for_weights is None: raise NotImplementedError( "Systematic uncertainties are currently only supported when the weights" " are extracted from the LHE file (instead of the HepMC / Delphes ROOT" " file). Please use the keyword lhe_filename when calling add_sample()." ) # Read systematics setup from LHE file logger.debug("Extracting nuisance parameter definitions from LHE file") systematics_dict = extract_nuisance_parameters_from_lhe_file( lhe_file, systematics_used) logger.debug("systematics_dict: %s", systematics_dict) # systematics_dict has structure # {systematics_name : {nuisance_parameter_name : ((benchmark0, weight0), (benchmark1, weight1), processing)}} # Store nuisance parameters for systematics_name, nuisance_info in six.iteritems(systematics_dict): for nuisance_parameter_name, ((benchmark0, weight0), (benchmark1, weight1), _) in six.iteritems(nuisance_info): if (self.nuisance_parameters is not None and nuisance_parameter_name in self.nuisance_parameters and (systematics_name, benchmark0, benchmark1) != self.nuisance_parameters[nuisance_parameter_name]): raise RuntimeError( "Inconsistent information for same nuisance parameter {}. Old: {}. New: {}." .format( nuisance_parameter_name, self.nuisance_parameters[nuisance_parameter_name], (systematics_name, benchmark0, benchmark1), )) self.nuisance_parameters[nuisance_parameter_name] = ( systematics_name, benchmark0, benchmark1) # Calculate observables and weights in Delphes ROOT file this_observations, this_weights, cut_filter = parse_delphes_root_file( delphes_file, self.observables, self.observables_required, self.observables_defaults, self.cuts, self.cuts_default_pass, weight_labels, use_generator_truth=generator_truth, delete_delphes_sample_file=delete_delphes_files, acceptance_eta_max_a=self.acceptance_eta_max_a, acceptance_eta_max_e=self.acceptance_eta_max_e, acceptance_eta_max_mu=self.acceptance_eta_max_mu, acceptance_eta_max_j=self.acceptance_eta_max_j, acceptance_pt_min_a=self.acceptance_pt_min_a, acceptance_pt_min_e=self.acceptance_pt_min_e, acceptance_pt_min_mu=self.acceptance_pt_min_mu, acceptance_pt_min_j=self.acceptance_pt_min_j, ) # No events found? if this_observations is None: logger.warning( "No remaining events in this Delphes file, skipping it") return None, None, None if this_weights is not None: logger.debug("Found weights %s in Delphes file", list(this_weights.keys())) else: logger.debug("Did not extract weights from Delphes file") # Sanity checks n_events = self._check_sample_observations(this_observations) # Find weights in LHE file if lhe_file_for_weights is not None: logger.debug("Extracting weights from LHE file") _, this_weights = parse_lhe_file( filename=lhe_file_for_weights, sampling_benchmark=sampling_benchmark, benchmark_names=self.benchmark_names_phys, observables=OrderedDict(), parse_events_as_xml=parse_lhe_events_as_xml, systematics_dict=systematics_dict, is_background=is_background, ) logger.debug("Found weights %s in LHE file", list(this_weights.keys())) # Apply cuts logger.debug("Applying Delphes-based cuts to LHE weights") for key, weights in six.iteritems(this_weights): this_weights[key] = weights[cut_filter] if this_weights is None: raise RuntimeError( "Could not extract weights from Delphes ROOT file or LHE file." ) # Sanity checks n_events = self._check_sample_weights(n_events, this_weights) # k factors if k_factor is not None: for key in this_weights: this_weights[key] = k_factor * this_weights[key] # Background scenario: we only have one set of weights, but these should be true for all benchmarks if is_background: logger.debug("Sample is background") benchmarks_weight = list(six.itervalues(this_weights))[0] for benchmark_name in self.benchmark_names_phys: this_weights[benchmark_name] = benchmarks_weight # Rescale nuisance parameters to reference benchmark reference_weights = this_weights[reference_benchmark] sampling_weights = this_weights[sampling_benchmark] for key in this_weights: if key not in self.benchmark_names_phys: # Only rescale nuisance benchmarks this_weights[ key] = reference_weights / sampling_weights * this_weights[ key] return this_observations, this_weights, n_events
def _parse_sample( self, is_background, k_factor, lhe_file, parse_events_as_xml, reference_benchmark, sampling_benchmark, sample_syst_names, ): # Relevant systematics systematics_used = OrderedDict() if sample_syst_names is None: sample_syst_names = [] for key in sample_syst_names: systematics_used[key] = self.systematics[key] # Read systematics setup from LHE file logger.debug("Extracting nuisance parameter definitions from LHE file") systematics_dict = extract_nuisance_parameters_from_lhe_file(lhe_file, systematics_used) logger.debug("systematics_dict: %s", systematics_dict) # systematics_dict has structure # {systematics_name : {nuisance_parameter_name : ((benchmark0, weight0), (benchmark1, weight1), processing)}} # Store nuisance parameters for systematics_name, nuisance_info in systematics_dict.items(): for nuisance_parameter_name, ((benchmark0, weight0), (benchmark1, weight1), _) in nuisance_info.items(): if ( self.nuisance_parameters is not None and nuisance_parameter_name in self.nuisance_parameters and (systematics_name, benchmark0, benchmark1) != self.nuisance_parameters[nuisance_parameter_name] ): raise RuntimeError( f"Inconsistent information for same nuisance parameter {nuisance_parameter_name}. " f"Old: {self.nuisance_parameters[nuisance_parameter_name]}. " f"New: {(systematics_name, benchmark0, benchmark1)}." ) self.nuisance_parameters[nuisance_parameter_name] = (systematics_name, benchmark0, benchmark1) # Calculate observables and weights in LHE file this_observations, this_weights = parse_lhe_file( filename=lhe_file, sampling_benchmark=sampling_benchmark, benchmark_names=self.benchmark_names_phys, is_background=is_background, observables=self.observables, observables_required=self.observables_required, observables_defaults=self.observables_defaults, cuts=self.cuts, cuts_default_pass=self.cuts_default_pass, efficiencies=self.efficiencies, efficiencies_default_pass=self.efficiencies_default_pass, energy_resolutions=self.energy_resolution, pt_resolutions=self.pt_resolution, eta_resolutions=self.eta_resolution, phi_resolutions=self.phi_resolution, k_factor=k_factor, parse_events_as_xml=parse_events_as_xml, systematics_dict=systematics_dict, ) # No events found? if this_observations is None: logger.warning("No remaining events in this LHE file, skipping it") return None, None logger.debug("Found weights %s in LHE file", list(this_weights.keys())) n_events = self._check_sample_elements(this_observations, None) n_events = self._check_sample_elements(this_weights, None) # Rescale nuisance parameters to reference benchmark reference_weights = this_weights[reference_benchmark] sampling_weights = this_weights[sampling_benchmark] for key in this_weights: if key not in self.benchmark_names_phys: # Only rescale nuisance benchmarks this_weights[key] = reference_weights / sampling_weights * this_weights[key] return this_observations, this_weights, n_events
def _parse_sample(self, is_background, k_factor, lhe_file, parse_events_as_xml, reference_benchmark, sampling_benchmark): # Read systematics setup from LHE file logger.debug("Extracting nuisance parameter definitions from LHE file") nuisance_parameters = extract_nuisance_parameters_from_lhe_file( lhe_file, self.systematics) logger.debug("Found %s nuisance parameters with matching benchmarks:", len(nuisance_parameters)) for key, value in six.iteritems(nuisance_parameters): logger.debug(" %s: %s", key, value) # Compare to existing data if self.nuisance_parameters is None: self.nuisance_parameters = nuisance_parameters else: if dict(self.nuisance_parameters) != dict(nuisance_parameters): raise RuntimeError( "Different LHE files have different definitions of nuisance parameters / benchmarks!\nPrevious: {}\nNew:{}" .format(self.nuisance_parameters, nuisance_parameters)) # Calculate observables and weights in LHE file this_observations, this_weights = parse_lhe_file( filename=lhe_file, sampling_benchmark=sampling_benchmark, benchmark_names=self.benchmark_names_phys, is_background=is_background, observables=self.observables, observables_required=self.observables_required, observables_defaults=self.observables_defaults, cuts=self.cuts, cuts_default_pass=self.cuts_default_pass, efficiencies=self.efficiencies, efficiencies_default_pass=self.efficiencies_default_pass, energy_resolutions=self.energy_resolution, pt_resolutions=self.pt_resolution, eta_resolutions=self.eta_resolution, phi_resolutions=self.phi_resolution, k_factor=k_factor, parse_events_as_xml=parse_events_as_xml, ) # No events found? if this_observations is None: logger.debug("No observations in this LHE file, skipping it") return None, None logger.debug("Found weights %s in LHE file", list(this_weights.keys())) # Check number of events in observables n_events = None for key, obs in six.iteritems(this_observations): this_n_events = len(obs) logger.debug("Found {} events in Obs {}".format( this_n_events, key)) if n_events is None: n_events = this_n_events logger.debug("Found %s events", n_events) if this_n_events != n_events: raise RuntimeError( "Mismatching number of events in LHE observations for {}: {} vs {}" .format(key, n_events, this_n_events)) # Check number of events in weights for key, weights in six.iteritems(this_weights): this_n_events = len(weights) if n_events is None: n_events = this_n_events logger.debug("Found %s events", n_events) if this_n_events != n_events: raise RuntimeError( "Mismatching number of events in weights {}: {} vs {}". format(key, n_events, this_n_events)) # Rescale nuisance parameters to reference benchmark reference_weights = this_weights[reference_benchmark] sampling_weights = this_weights[sampling_benchmark] for key in this_weights: if key not in self.benchmark_names_phys: # Only rescale nuisance benchmarks this_weights[ key] = reference_weights / sampling_weights * this_weights[ key] return this_observations, this_weights
def analyse_delphes_samples( self, generator_truth=False, delete_delphes_files=False, reference_benchmark=None, parse_lhe_events_as_xml=True ): """ Main function that parses the Delphes samples (ROOT files), checks acceptance and cuts, and extracts the observables and weights. Parameters ---------- generator_truth : bool, optional If True, the generator truth information (as given out by Pythia) will be parsed. Detector resolution or efficiency effects will not be taken into account. delete_delphes_files : bool, optional If True, the Delphes ROOT files will be deleted after extracting the information from them. Default value: False. reference_benchmark : str or None, optional The weights at the nuisance benchmarks will be rescaled to some reference theta benchmark: `dsigma(x|theta_sampling(x),nu) -> dsigma(x|theta_ref,nu) = dsigma(x|theta_sampling(x),nu) * dsigma(x|theta_ref,0) / dsigma(x|theta_sampling(x),0)`. This sets the name of the reference benchmark. If None, the first one will be used. Default value: None. parse_lhe_events_as_xml : bool, optional Decides whether the LHE events are parsed with an XML parser (more robust, but slower) or a text parser (less robust, faster). Default value: True. Returns ------- None """ # Input if reference_benchmark is None: reference_benchmark = self.benchmark_names_phys[0] self.reference_benchmark = reference_benchmark # Reset observations self.observations = None self.weights = None self.nuisance_parameters = None for ( delphes_file, weight_labels, is_background, sampling_benchmark, lhe_file, lhe_file_for_weights, k_factor, ) in zip( self.delphes_sample_filenames, self.hepmc_sample_weight_labels, self.hepmc_is_backgrounds, self.hepmc_sampled_from_benchmark, self.lhe_sample_filenames, self.lhe_sample_filenames_for_weights, self.sample_k_factors, ): logger.info("Analysing Delphes sample %s", delphes_file) # Read systematics setup from LHE file logger.debug("Extracting nuisance parameter definitions from LHE file") nuisance_parameters = extract_nuisance_parameters_from_lhe_file(lhe_file, self.systematics) logger.debug("Found %s nuisance parameters with matching benchmarks:", len(nuisance_parameters)) for key, value in six.iteritems(nuisance_parameters): logger.debug(" %s: %s", key, value) # Compare to existing data if self.nuisance_parameters is None: self.nuisance_parameters = nuisance_parameters else: if dict(self.nuisance_parameters) != dict(nuisance_parameters): raise RuntimeError( "Different LHE files have different definitions of nuisance parameters / benchmarks!\n" "Previous: {}\nNew:{}".format(self.nuisance_parameters, nuisance_parameters) ) # Calculate observables and weights in Delphes ROOT file this_observations, this_weights, cut_filter = parse_delphes_root_file( delphes_file, self.observables, self.observables_required, self.observables_defaults, self.cuts, self.cuts_default_pass, weight_labels, use_generator_truth=generator_truth, delete_delphes_sample_file=delete_delphes_files, acceptance_eta_max_a=self.acceptance_eta_max_a, acceptance_eta_max_e=self.acceptance_eta_max_e, acceptance_eta_max_mu=self.acceptance_eta_max_mu, acceptance_eta_max_j=self.acceptance_eta_max_j, acceptance_pt_min_a=self.acceptance_pt_min_a, acceptance_pt_min_e=self.acceptance_pt_min_e, acceptance_pt_min_mu=self.acceptance_pt_min_mu, acceptance_pt_min_j=self.acceptance_pt_min_j, ) # No events found? if this_observations is None: logger.debug("No observations in this Delphes file, skipping it") continue if this_weights is not None: logger.debug("Found weights %s in Delphes file", list(this_weights.keys())) else: logger.debug("Did not extract weights from Delphes file") # Check number of events in observables n_events = None for key, obs in six.iteritems(this_observations): this_n_events = len(obs) if n_events is None: n_events = this_n_events logger.debug("Found %s events", n_events) if this_n_events != n_events: raise RuntimeError( "Mismatching number of events in Delphes observations for {}: {} vs {}".format( key, n_events, this_n_events ) ) # Find weights in LHE file if lhe_file_for_weights is not None: logger.debug("Extracting weights from LHE file") _, this_weights = parse_lhe_file( filename=lhe_file_for_weights, sampling_benchmark=sampling_benchmark, observables=OrderedDict(), parse_events_as_xml=parse_lhe_events_as_xml, ) logger.debug("Found weights %s in LHE file", list(this_weights.keys())) # Apply cuts logger.debug("Applying Delphes-based cuts to LHE weights") for key, weights in six.iteritems(this_weights): this_weights[key] = weights[cut_filter] if this_weights is None: raise RuntimeError("Could not extract weights from Delphes ROOT file or LHE file.") # Check number of events in weights for key, weights in six.iteritems(this_weights): this_n_events = len(weights) if n_events is None: n_events = this_n_events logger.debug("Found %s events", n_events) if this_n_events != n_events: raise RuntimeError( "Mismatching number of events in weights {}: {} vs {}".format(key, n_events, this_n_events) ) # k factors if k_factor is not None: for key in this_weights: this_weights[key] = k_factor * this_weights[key] # Background scenario: we only have one set of weights, but these should be true for all benchmarks if is_background: logger.debug("Sample is background") benchmarks_weight = list(six.itervalues(this_weights))[0] for benchmark_name in self.benchmark_names_phys: this_weights[benchmark_name] = benchmarks_weight # Rescale nuisance parameters to reference benchmark reference_weights = this_weights[reference_benchmark] sampling_weights = this_weights[sampling_benchmark] for key in this_weights: if key not in self.benchmark_names_phys: # Only rescale nuisance benchmarks this_weights[key] = reference_weights / sampling_weights * this_weights[key] # First results if self.observations is None and self.weights is None: self.observations = this_observations self.weights = this_weights continue # Following results: check consistency with previous results if len(self.weights) != len(this_weights): raise ValueError( "Number of weights in different files incompatible: {} vs {}".format( len(self.weights), len(this_weights) ) ) if len(self.observations) != len(this_observations): raise ValueError( "Number of observations in different Delphes files incompatible: {} vs {}".format( len(self.observations), len(this_observations) ) ) # Merge results with previous for key in self.weights: assert key in this_weights, "Weight label {} not found in sample!".format(key) self.weights[key] = np.hstack([self.weights[key], this_weights[key]]) for key in self.observations: assert key in this_observations, "Observable {} not found in Delphes sample!".format(key) self.observations[key] = np.hstack([self.observations[key], this_observations[key]])
def analyse_samples(self, reference_benchmark=None, parse_events_as_xml=True): """ Main function that parses the LHE samples, applies detector effects, checks cuts, and extracts the observables and weights. Parameters ---------- reference_benchmark : str or None, optional The weights at the nuisance benchmarks will be rescaled to some reference theta benchmark: `dsigma(x|theta_sampling(x),nu) -> dsigma(x|theta_ref,nu) = dsigma(x|theta_sampling(x),nu) * dsigma(x|theta_ref,0) / dsigma(x|theta_sampling(x),0)`. This sets the name of the reference benchmark. If None, the first one will be used. Default value: None. parse_events_as_xml : bool, optional Decides whether the LHE events are parsed with an XML parser (more robust, but slower) or a text parser (less robust, faster). Default value: True. Returns ------- None """ # Input if reference_benchmark is None: reference_benchmark = self.benchmark_names_phys[0] self.reference_benchmark = reference_benchmark # Reset observations self.observations = None self.weights = None self.nuisance_parameters = None for lhe_file, is_background, sampling_benchmark, k_factor in zip( self.lhe_sample_filenames, self.sample_is_backgrounds, self.sampling_benchmarks, self.sample_k_factors ): logger.info("Analysing LHE sample %s", lhe_file) # Read systematics setup from LHE file logger.debug("Extracting nuisance parameter definitions from LHE file") nuisance_parameters = extract_nuisance_parameters_from_lhe_file(lhe_file, self.systematics) logger.debug("Found %s nuisance parameters with matching benchmarks:", len(nuisance_parameters)) for key, value in six.iteritems(nuisance_parameters): logger.debug(" %s: %s", key, value) # Compare to existing data if self.nuisance_parameters is None: self.nuisance_parameters = nuisance_parameters else: if dict(self.nuisance_parameters) != dict(nuisance_parameters): raise RuntimeError( "Different LHE files have different definitions of nuisance parameters / benchmarks!\nPrevious: {}\nNew:{}".format( self.nuisance_parameters, nuisance_parameters ) ) # Calculate observables and weights in LHE file this_observations, this_weights = parse_lhe_file( filename=lhe_file, sampling_benchmark=sampling_benchmark, benchmark_names=self.benchmark_names_phys, is_background=is_background, observables=self.observables, observables_required=self.observables_required, observables_defaults=self.observables_defaults, cuts=self.cuts, cuts_default_pass=self.cuts_default_pass, energy_resolutions=self.energy_resolution, pt_resolutions=self.pt_resolution, eta_resolutions=self.eta_resolution, phi_resolutions=self.phi_resolution, k_factor=k_factor, parse_events_as_xml=parse_events_as_xml, ) # No events found? if this_observations is None: logger.debug("No observations in this LHE file, skipping it") continue logger.debug("Found weights %s in LHE file", list(this_weights.keys())) # Check number of events in observables n_events = None for key, obs in six.iteritems(this_observations): this_n_events = len(obs) if n_events is None: n_events = this_n_events logger.debug("Found %s events", n_events) if this_n_events != n_events: raise RuntimeError( "Mismatching number of events in LHE observations for {}: {} vs {}".format( key, n_events, this_n_events ) ) # Check number of events in weights for key, weights in six.iteritems(this_weights): this_n_events = len(weights) if n_events is None: n_events = this_n_events logger.debug("Found %s events", n_events) if this_n_events != n_events: raise RuntimeError( "Mismatching number of events in weights {}: {} vs {}".format(key, n_events, this_n_events) ) # Rescale nuisance parameters to reference benchmark reference_weights = this_weights[reference_benchmark] sampling_weights = this_weights[sampling_benchmark] for key in this_weights: if key not in self.benchmark_names_phys: # Only rescale nuisance benchmarks this_weights[key] = reference_weights / sampling_weights * this_weights[key] # First results if self.observations is None and self.weights is None: self.observations = this_observations self.weights = this_weights continue # Following results: check consistency with previous results if len(self.weights) != len(this_weights): raise ValueError( "Number of weights in different files incompatible: {} vs {}".format( len(self.weights), len(this_weights) ) ) if len(self.observations) != len(this_observations): raise ValueError( "Number of observations in different Delphes files incompatible: {} vs {}".format( len(self.observations), len(this_observations) ) ) # Merge results with previous for key in self.weights: assert key in this_weights, "Weight label {} not found in sample!".format(key) self.weights[key] = np.hstack([self.weights[key], this_weights[key]]) for key in self.observations: assert key in this_observations, "Observable {} not found in Delphes sample!".format(key) self.observations[key] = np.hstack([self.observations[key], this_observations[key]])