def setup_function(self): ''' Calculate the bin index where each event falls into Create one mask for each analysis bin. ''' assert self.calc_specs == 'events', 'ERROR: calc specs must be set to "events for this module' self.data.data_specs = 'events' for container in self.data: # Generate a new container called bin_indices container['bin_indices'] = np.empty((container.size), dtype=np.int64) variables_to_bin = [] for bin_name in self.output_specs.names: variables_to_bin.append(container[bin_name]) new_array = lookup_indices(sample=variables_to_bin, binning=self.output_specs) new_array = new_array.get('host') np.copyto(src=new_array, dst=container["bin_indices"].get('host')) for bin_i in range(self.output_specs.tot_num_bins): container.add_array_data(key='bin_{}_mask'.format(bin_i), data=(new_array == bin_i))
def setup_function(self): ''' Calculate the bin index where each event falls into Create one mask for each analysis bin. ''' assert self.calc_mode == 'events', 'ERROR: calc specs must be set to "events for this module' for container in self.data: self.data.representation = self.calc_mode variables_to_bin = [] for bin_name in self.apply_mode.names: variables_to_bin.append(container[bin_name]) indices = lookup_indices(sample=variables_to_bin, binning=self.apply_mode) container['bin_indices'] = indices self.data.representation = self.apply_mode for bin_i in range(self.apply_mode.tot_num_bins): container['bin_{}_mask'.format( bin_i)] = container['bin_indices'] == bin_i
def setup_function(self): ''' This is where we figure out how many events to generate, define their weights relative to the data statistics and initialize the container we will need This function is run once when we instantiate the pipeline ''' # # figure out how many signal and background events to create # n_data_events = int(self.params.n_events_data.value.m) self.stats_factor = float(self.params.stats_factor.value.m) signal_fraction = float(self.params.signal_fraction.value.m) # Number of simulated MC events self.n_mc = int(n_data_events * self.stats_factor) # Number of signal MC events self.nsig = int(self.n_mc * signal_fraction) self.nbkg = self.n_mc - self.nsig # Number of bkg MC events # Go in events mode self.data.data_specs = 'events' # # Create a signal container, with equal weights # signal_container = Container('signal') signal_container.data_specs = 'events' # Populate the signal physics quantity over a uniform range signal_initial = np.random.uniform(low=self.params.bkg_min.value.m, high=self.params.bkg_max.value.m, size=self.nsig) signal_container.add_array_data('stuff', signal_initial) # Populate its MC weight by equal constant factors signal_container.add_array_data( 'weights', np.ones(self.nsig, dtype=FTYPE) * 1. / self.stats_factor) # Populate the error on those weights signal_container.add_array_data('errors', (np.ones(self.nsig, dtype=FTYPE) * 1. / self.stats_factor)**2.) # # Compute the bin indices associated with each event # sig_indices = lookup_indices(sample=[signal_container['stuff']], binning=self.output_specs) sig_indices = sig_indices.get('host') signal_container.add_array_data('bin_indices', sig_indices) # # Compute an associated bin mask for each output bin # for bin_i in range(self.output_specs.tot_num_bins): sig_bin_mask = sig_indices == bin_i signal_container.add_array_data(key='bin_{}_mask'.format(bin_i), data=sig_bin_mask) # # Add container to the data # self.data.add_container(signal_container) # # Create a background container # if self.nbkg > 0: bkg_container = Container('background') bkg_container.data_specs = 'events' # Create a set of background events initial_bkg_events = np.random.uniform( low=self.params.bkg_min.value.m, high=self.params.bkg_max.value.m, size=self.nbkg) bkg_container.add_array_data('stuff', initial_bkg_events) # create their associated weights bkg_container.add_array_data( 'weights', np.ones(self.nbkg) * 1. / self.stats_factor) bkg_container.add_array_data('errors', (np.ones(self.nbkg) * 1. / self.stats_factor)**2.) # compute their bin indices bkg_indices = lookup_indices(sample=[bkg_container['stuff']], binning=self.output_specs) bkg_indices = bkg_indices.get('host') bkg_container.add_array_data('bin_indices', bkg_indices) # Add bin indices mask (used in generalized poisson llh) for bin_i in range(self.output_specs.tot_num_bins): bkg_bin_mask = bkg_indices == bin_i bkg_container.add_array_data(key='bin_{}_mask'.format(bin_i), data=bkg_bin_mask) self.data.add_container(bkg_container) # # Add the binned counterpart of each events container # for container in self.data: container.array_to_binned('weights', binning=self.output_specs, averaged=False) container.array_to_binned('errors', binning=self.output_specs, averaged=False)
def apply_function(self): ''' This is where we actually inject a gaussian signal and a flat background according to the parameters This function will be called at every iteration of the minimizer ''' # # Make sure we are in events mode # self.data.data_specs = 'events' for container in self.data: if container.name == 'signal': # # First, generate the signal # signal = np.random.normal(loc=self.params['mu'].value.m, scale=self.params['sigma'].value.m, size=self.nsig) container['stuff'] = signal elif container.name == 'background': # # Then the background # background = np.random.uniform( low=self.params.bkg_min.value.m, high=self.params.bkg_max.value.m, size=self.nbkg) container['stuff'] = background # # Recompute the bin indices associated with each event # new_array = lookup_indices(sample=[container['stuff']], binning=self.output_specs) new_array = new_array.get('host') container["bin_indices"] = new_array for bin_i in range(self.output_specs.tot_num_bins): container['bin_{}_mask'.format(bin_i)] = new_array == bin_i # # Re-bin the data # for container in self.data: container.array_to_binned('weights', binning=self.output_specs, averaged=False) container.array_to_binned('errors', binning=self.output_specs, averaged=False) # # Recalculate the number of MC events per bin, if the array already exists # if "n_mc_events" in container.binned_data.keys(): self.data.data_specs = 'events' nevents_sim = np.zeros(self.output_specs.tot_num_bins) for index in range(self.output_specs.tot_num_bins): index_mask = container['bin_{}_mask'.format(index)].get( 'host') current_weights = container['weights'].get( 'host')[index_mask] n_weights = current_weights.shape[0] # Number of MC events in each bin nevents_sim[index] = n_weights self.data.data_specs = self.output_specs np.copyto(src=nevents_sim, dst=container["n_mc_events"].get('host')) # # Step 2: Re-calculate the mean adjustment for each container # mean_number_of_mc_events = np.mean(nevents_sim) if mean_number_of_mc_events < 1.0: mean_adjustment = -(1.0 - mean_number_of_mc_events) + 1.e-3 else: mean_adjustment = 0.0 container.scalar_data['mean_adjustment'] = mean_adjustment