Esempio n. 1
0
    def setup_function(self):
        '''
        Calculate the bin index where each event falls into

        Create one mask for each analysis bin.
        '''
        
        assert self.calc_specs == 'events', 'ERROR: calc specs must be set to "events for this module'

        self.data.data_specs = 'events'

        for container in self.data:
            # Generate a new container called bin_indices
            container['bin_indices'] = np.empty((container.size), dtype=np.int64)
  
            variables_to_bin = []
            for bin_name in self.output_specs.names:
                variables_to_bin.append(container[bin_name])

            new_array = lookup_indices(sample=variables_to_bin,
                                       binning=self.output_specs)

            new_array = new_array.get('host')
            np.copyto(src=new_array, dst=container["bin_indices"].get('host'))


            for bin_i in range(self.output_specs.tot_num_bins):
                container.add_array_data(key='bin_{}_mask'.format(bin_i), 
                                         data=(new_array == bin_i))
Esempio n. 2
0
    def setup_function(self):
        '''
        Calculate the bin index where each event falls into

        Create one mask for each analysis bin.
        '''

        assert self.calc_mode == 'events', 'ERROR: calc specs must be set to "events for this module'

        for container in self.data:
            self.data.representation = self.calc_mode
            variables_to_bin = []
            for bin_name in self.apply_mode.names:
                variables_to_bin.append(container[bin_name])

            indices = lookup_indices(sample=variables_to_bin,
                                     binning=self.apply_mode)

            container['bin_indices'] = indices

            self.data.representation = self.apply_mode
            for bin_i in range(self.apply_mode.tot_num_bins):
                container['bin_{}_mask'.format(
                    bin_i)] = container['bin_indices'] == bin_i
Esempio n. 3
0
    def setup_function(self):
        '''
        This is where we figure out how many events to generate,
        define their weights relative to the data statistics
        and initialize the container we will need

        This function is run once when we instantiate the pipeline
        '''

        #
        # figure out how many signal and background events to create
        #
        n_data_events = int(self.params.n_events_data.value.m)
        self.stats_factor = float(self.params.stats_factor.value.m)
        signal_fraction = float(self.params.signal_fraction.value.m)

        # Number of simulated MC events
        self.n_mc = int(n_data_events * self.stats_factor)
        # Number of signal MC events
        self.nsig = int(self.n_mc * signal_fraction)
        self.nbkg = self.n_mc - self.nsig  # Number of bkg MC events

        # Go in events mode
        self.data.data_specs = 'events'

        #
        # Create a signal container, with equal weights
        #
        signal_container = Container('signal')
        signal_container.data_specs = 'events'
        # Populate the signal physics quantity over a uniform range
        signal_initial = np.random.uniform(low=self.params.bkg_min.value.m,
                                           high=self.params.bkg_max.value.m,
                                           size=self.nsig)

        signal_container.add_array_data('stuff', signal_initial)
        # Populate its MC weight by equal constant factors
        signal_container.add_array_data(
            'weights',
            np.ones(self.nsig, dtype=FTYPE) * 1. / self.stats_factor)
        # Populate the error on those weights
        signal_container.add_array_data('errors',
                                        (np.ones(self.nsig, dtype=FTYPE) * 1. /
                                         self.stats_factor)**2.)

        #
        # Compute the bin indices associated with each event
        #
        sig_indices = lookup_indices(sample=[signal_container['stuff']],
                                     binning=self.output_specs)
        sig_indices = sig_indices.get('host')
        signal_container.add_array_data('bin_indices', sig_indices)

        #
        # Compute an associated bin mask for each output bin
        #
        for bin_i in range(self.output_specs.tot_num_bins):
            sig_bin_mask = sig_indices == bin_i
            signal_container.add_array_data(key='bin_{}_mask'.format(bin_i),
                                            data=sig_bin_mask)

        #
        # Add container to the data
        #
        self.data.add_container(signal_container)

        #
        # Create a background container
        #
        if self.nbkg > 0:

            bkg_container = Container('background')
            bkg_container.data_specs = 'events'
            # Create a set of background events
            initial_bkg_events = np.random.uniform(
                low=self.params.bkg_min.value.m,
                high=self.params.bkg_max.value.m,
                size=self.nbkg)
            bkg_container.add_array_data('stuff', initial_bkg_events)
            # create their associated weights
            bkg_container.add_array_data(
                'weights',
                np.ones(self.nbkg) * 1. / self.stats_factor)
            bkg_container.add_array_data('errors', (np.ones(self.nbkg) * 1. /
                                                    self.stats_factor)**2.)
            # compute their bin indices
            bkg_indices = lookup_indices(sample=[bkg_container['stuff']],
                                         binning=self.output_specs)
            bkg_indices = bkg_indices.get('host')
            bkg_container.add_array_data('bin_indices', bkg_indices)
            # Add bin indices mask (used in generalized poisson llh)
            for bin_i in range(self.output_specs.tot_num_bins):
                bkg_bin_mask = bkg_indices == bin_i
                bkg_container.add_array_data(key='bin_{}_mask'.format(bin_i),
                                             data=bkg_bin_mask)

            self.data.add_container(bkg_container)

        #
        # Add the binned counterpart of each events container
        #
        for container in self.data:
            container.array_to_binned('weights',
                                      binning=self.output_specs,
                                      averaged=False)
            container.array_to_binned('errors',
                                      binning=self.output_specs,
                                      averaged=False)
Esempio n. 4
0
    def apply_function(self):
        '''
        This is where we actually inject a gaussian signal and a
        flat background according to the parameters

        This function will be called at every iteration of the minimizer
        '''

        #
        # Make sure we are in events mode
        #
        self.data.data_specs = 'events'

        for container in self.data:

            if container.name == 'signal':
                #
                # First, generate the signal
                #
                signal = np.random.normal(loc=self.params['mu'].value.m,
                                          scale=self.params['sigma'].value.m,
                                          size=self.nsig)
                container['stuff'] = signal

            elif container.name == 'background':
                #
                # Then the background
                #
                background = np.random.uniform(
                    low=self.params.bkg_min.value.m,
                    high=self.params.bkg_max.value.m,
                    size=self.nbkg)

                container['stuff'] = background

            #
            # Recompute the bin indices associated with each event
            #
            new_array = lookup_indices(sample=[container['stuff']],
                                       binning=self.output_specs)
            new_array = new_array.get('host')
            container["bin_indices"] = new_array

            for bin_i in range(self.output_specs.tot_num_bins):
                container['bin_{}_mask'.format(bin_i)] = new_array == bin_i

        #
        # Re-bin the data
        #
        for container in self.data:
            container.array_to_binned('weights',
                                      binning=self.output_specs,
                                      averaged=False)
            container.array_to_binned('errors',
                                      binning=self.output_specs,
                                      averaged=False)

            #
            #  Recalculate the number of MC events per bin, if the array already exists
            #
            if "n_mc_events" in container.binned_data.keys():

                self.data.data_specs = 'events'
                nevents_sim = np.zeros(self.output_specs.tot_num_bins)

                for index in range(self.output_specs.tot_num_bins):
                    index_mask = container['bin_{}_mask'.format(index)].get(
                        'host')
                    current_weights = container['weights'].get(
                        'host')[index_mask]
                    n_weights = current_weights.shape[0]

                    # Number of MC events in each bin
                    nevents_sim[index] = n_weights

                self.data.data_specs = self.output_specs
                np.copyto(src=nevents_sim,
                          dst=container["n_mc_events"].get('host'))

                #
                # Step 2: Re-calculate the mean adjustment for each container
                #
                mean_number_of_mc_events = np.mean(nevents_sim)
                if mean_number_of_mc_events < 1.0:
                    mean_adjustment = -(1.0 - mean_number_of_mc_events) + 1.e-3
                else:
                    mean_adjustment = 0.0
                container.scalar_data['mean_adjustment'] = mean_adjustment