예제 #1
0
    def get_outputs(self,
                    return_sum=False,
                    sum_map_name='total',
                    sum_map_tex_name='Total',
                    **kwargs):
        """Compute and return the outputs.

        Parameters
        ----------
        return_sum : bool
            If True, add up all Maps in all MapSets returned by all pipelines.
            The result will be a single Map contained in a MapSet.
            If False, return a list where each element is the full MapSet
            returned by each pipeline in the DistributionMaker.


        **kwargs
            Passed on to each pipeline's `get_outputs` method.

        Returns
        -------
        MapSet if `return_sum=True` or list of MapSets if `return_sum=False`

        """

        outputs = [pipeline.get_outputs(**kwargs) for pipeline in self]  # pylint: disable=redefined-outer-name

        if return_sum:

            # Case where the output of a pipeline is a mapSet
            if isinstance(outputs[0], MapSet):
                outputs = sum([sum(x) for x in outputs])  # This produces a Map
                outputs.name = sum_map_name
                outputs.tex = sum_map_tex_name
                outputs = MapSet(outputs)  # final output must be a MapSet

            # Case where the output of a pipeline is a dict of different MapSets
            elif isinstance(outputs[0], OrderedDict):
                output_dict = OrderedDict()
                for key in outputs[0].keys():
                    output_dict[key] = sum([sum(A[key]) for A in outputs
                                            ])  # This produces a Map objects
                    output_dict[key].name = sum_map_name
                    output_dict[key].tex = sum_map_tex_name
                    output_dict[key] = MapSet(output_dict[key])

                outputs = output_dict

        return outputs
예제 #2
0
    def _compute_outputs(self, inputs=None):
        # Following is just so that we only produce new maps when params
        # change, but produce the same maps with the same param values
        # (for a more realistic test of caching).
        seed = hash_obj(self.params.values, hash_to='int') % (2**32 - 1)
        np.random.seed(seed)

        # Convert a parameter that the user can specify in any (compatible)
        # units to the units used for compuation
        height = self.params['test'].to('meter').magnitude

        output_maps = []
        for output_name in self.output_names:
            # Generate the fake per-bin "fluxes", modified by the parameter
            hist = np.random.random(self.output_binning.shape) * height

            # Put the "fluxes" into a Map object, give it the output_name
            m = Map(name=output_name, hist=hist, binning=self.output_binning)

            # Optionally turn on errors here, that will be propagated through
            # rest of pipeline (slows things down, but essential in some cases)
            #m.set_poisson_errors()
            output_maps.append(m)

        # Combine the output maps into a single MapSet object to return.
        # The MapSet contains the varous things that are necessary to make
        # caching work and also provides a nice interface for the user to all
        # of the contained maps
        return MapSet(maps=output_maps, name='flux maps')
예제 #3
0
    def get_outputs(self,
                    return_sum=False,
                    sum_map_name='total',
                    sum_map_tex_name='Total',
                    **kwargs):
        """Compute and return the outputs.

        Parameters
        ----------
        return_sum : bool
            If True, add up all Maps in all MapSets returned by all pipelines.
            The result will be a single Map contained in a MapSet.
            If False, return a list where each element is the full MapSet
            returned by each pipeline in the DistributionMaker.

        **kwargs
            Passed on to each pipeline's `get_outputs1` method.

        Returns
        -------
        MapSet if `return_sum=True` or list of MapSets if `return_sum=False`

        """
        outputs = [pipeline.get_outputs(**kwargs) for pipeline in self]  # pylint: disable=redefined-outer-name
        if return_sum:
            if len(outputs) > 1:
                outputs = reduce(lambda x, y: sum(x) + sum(y), outputs)
            else:
                outputs = sum(sum(outputs))
            outputs.name = sum_map_name
            outputs.tex = sum_map_tex_name
            outputs = MapSet(outputs)
        return outputs
예제 #4
0
파일: plotter.py 프로젝트: thehrh/pisa-1
 def plot_2d_array(self,
                   map_set,
                   n_rows=None,
                   n_cols=None,
                   fname=None,
                   **kwargs):
     """plot all maps or transforms in a single plot"""
     if fname is None:
         fname = map_set.name
     # if dimensionality is 3, then still define a spli_axis automatically
     new_maps = []
     split_axis = kwargs.pop('split_axis', None)
     for map in map_set:
         if len(map.binning) == 3:
             if split_axis is None:
                 # Find shortest dimension
                 l = map.binning.num_bins
                 idx = l.index(min(l))
                 split_axis_ = map.binning.names[idx]
                 logging.warning(
                     'Plotter automatically splitting map %s along %s axis',
                     map.name, split_axis_)
             new_maps.extend(map.split(split_axis_))
         elif len(map.binning) == 2:
             new_maps.append(map)
         else:
             raise Exception('Cannot plot %i dimensional map in 2d' %
                             len(map))
     map_set = MapSet(new_maps)
     self.plot_array(map_set,
                     'plot_2d_map',
                     n_rows=n_rows,
                     n_cols=n_cols,
                     **kwargs)
     self.dump(fname)
예제 #5
0
파일: icc.py 프로젝트: austinschneider/pisa
    def _compute_outputs(self, inputs=None):
        """Apply scales to histograms, put them into PISA MapSets
        Also asign errors given a method:
            * sumw2 : just sum of weights quared as error (the usual weighte histo error)
            * sumw2+shae : including the shape difference
            * fixed_sumw2+shape : errors estimated from nominal paramter values, i.e. scale-invariant

        """

        scale = self.params.atm_muon_scale.value.m_as('dimensionless')
        fixed_scale = self.params.atm_muon_scale.nominal_value.m_as('dimensionless')
        scale *= self.params.livetime.value.m_as('common_year')
        fixed_scale *= self.params.livetime.value.m_as('common_year')
        fixed_scale *= self.params.fixed_scale_factor.value.m_as('dimensionless')

        if self.error_method == 'sumw2':
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=(np.sqrt(self.icc_bg_hist) * scale) ,binning=self.output_binning)]
        elif self.error_method == 'sumw2+shape':
            error = scale * np.sqrt(self.icc_bg_hist + (self.icc_bg_hist - self.alt_icc_bg_hist)**2 )
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)]
        elif self.error_method == 'shape':
            error = scale * np.abs(self.icc_bg_hist - self.alt_icc_bg_hist)
        elif self.error_method == 'fixed_shape':
            error = fixed_scale * np.abs(self.icc_bg_hist - self.alt_icc_bg_hist)
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)]
        elif self.error_method == 'fixed_sumw2+shape':
            error = fixed_scale * np.sqrt(self.icc_bg_hist + (self.icc_bg_hist - self.alt_icc_bg_hist)**2 )
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)]
        elif self.error_method == 'fixed_doublesumw2+shape':
            error = fixed_scale * np.sqrt(2*self.icc_bg_hist + (self.icc_bg_hist - self.alt_icc_bg_hist)**2 )
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)]
        else:
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), binning=self.output_binning)]

        return MapSet(maps, name='icc')
예제 #6
0
    def _compute_outputs(self, inputs=None):

        """Apply basic cuts and compute histograms for output channels."""

        logging.debug('Entering sample._compute_outputs')

        self.config = from_file(self.params['data_sample_config'].value)

        name = self.config.get('general', 'name')
        logging.trace('{0} sample sample_hash = '
                      '{1}'.format(name, self.sample_hash))
        self.load_sample_events()

        if self.params['keep_criteria'].value is not None:
            # TODO(shivesh)
            raise NotImplementedError(
                'needs check to make sure this works in a DistributionMaker'
            )
            self._data.applyCut(self.params['keep_criteria'].value)
            self._data.update_hash()

        if self.output_events:
            return self._data

        outputs = []
        if self.neutrinos:
            trans_nu_data = self._data.transform_groups(
                self._output_nu_groups
            )
            for fig in trans_nu_data.keys():
                outputs.append(trans_nu_data.histogram(
                    kinds       = fig,
                    binning     = self.output_binning,
                    weights_col = 'pisa_weight',
                    errors      = True,
                    name        = str(NuFlavIntGroup(fig)),
                ))

        if self.muons:
            outputs.append(self._data.histogram(
                kinds       = 'muons',
                binning     = self.output_binning,
                weights_col = 'pisa_weight',
                errors      = True,
                name        = 'muons',
                tex         = r'\rm{muons}'
            ))

        if self.noise:
            outputs.append(self._data.histogram(
                kinds       = 'noise',
                binning     = self.output_binning,
                weights_col = 'pisa_weight',
                errors      = True,
                name        = 'noise',
                tex         = r'\rm{noise}'
            ))

        name = self.config.get('general', 'name')
        return MapSet(maps=outputs, name=name)
예제 #7
0
파일: fit.py 프로젝트: terliuk/pisa
    def _compute_outputs(self, inputs=None):
        """Compute histograms for output channels."""
        logging.debug('Entering fit._compute_outputs')
        if not isinstance(inputs, Data):
            raise AssertionError('inputs is not a Data object, instead is '
                                 'type {0}'.format(type(inputs)))
        self.weight_hash = deepcopy(inputs.metadata['weight_hash'])
        logging.trace('{0} fit weight_hash = '
                      '{1}'.format(inputs.metadata['name'], self.weight_hash))
        logging.trace('{0} fit fit_hash = '
                      '{1}'.format(inputs.metadata['name'], self.fit_hash))
        self._data = inputs
        self.reweight()

        if self.output_events:
            return self._data

        outputs = []
        if self.neutrinos:
            trans_nu_data = self._data.transform_groups(
                self._output_nu_groups
            )
            for fig in trans_nu_data.iterkeys():
                outputs.append(
                    trans_nu_data.histogram(
                        kinds=fig,
                        binning=self.output_binning,
                        weights_col='pisa_weight',
                        errors=True,
                        name=str(NuFlavIntGroup(fig)),
                    )
                )

        if self.muons:
            outputs.append(
                self._data.histogram(
                    kinds='muons',
                    binning=self.output_binning,
                    weights_col='pisa_weight',
                    errors=True,
                    name='muons',
                    tex=text2tex('muons')
                )
            )

        if self.noise:
            outputs.append(
                self._data.histogram(
                    kinds='noise',
                    binning=self.output_binning,
                    weights_col='pisa_weight',
                    errors=True,
                    name='noise',
                    tex=text2tex('noise')
                )
            )

        return MapSet(maps=outputs, name=self._data.metadata['name'])
예제 #8
0
파일: spline.py 프로젝트: terliuk/pisa
 def return_mapset(self, **kwargs):
     """Return a MapSet of stored spline maps."""
     for signature in self._spline_data.iterkeys():
         if not isinstance(self._spline_data[signature], Map):
             raise ValueError('Error: map {0} has not yet been '
                              'computed'.format(signature))
     maps = [self._spline_data[signature]
             for signature in self._spline_data.iterkeys()]
     return MapSet(maps=maps, **kwargs)
예제 #9
0
def main():
    args = parse_args()
    set_verbosity(args.verbose)

    assert len(args.infiles[0]) == 2
    assert len(args.subtitles) == 2
    logging.info('Loading MapSet from files {0}'.format(args.infiles[0]))
    o_mapset = MapSet.from_json(args.infiles[0][0])
    t_mapset = MapSet.from_json(args.infiles[0][1])
    assert len(o_mapset) == 1 and len(t_mapset) == 1
    o_map = o_mapset.pop()
    t_map = t_mapset.pop()

    outfile = args.outdir + '/' + args.outname
    make_plot(
        maps=(o_map, t_map),
        outfile=outfile,
        logv=args.logv,
        center_zero=args.center_zero,
        vlabel=args.vlabel,
        title=args.title,
        sub_titles=args.subtitles,
    )
예제 #10
0
    def get_mapset(self, key, error=None):
        '''
        Parameters
        ----------

        key : str

        error : None or str
            specify a key that errors are read from

        For a given key, get a PISA MapSet
        '''
        maps = []
        for container in self:
            maps.append(container.get_map(key, error=error))
        return MapSet(name=self.name, maps=maps)
예제 #11
0
def serve(config, ref, port=DFLT_PORT):
    """Instantiate PISA objects and run server for processing requests.

    Parameters
    ----------
    config : str or iterable thereof
        Resource path(s) to pipeline config(s)

    ref : str
        Resource path to reference map

    port : int or str, optional

    """
    # Instantiate the objects here to save having to do this repeatedly
    dist_maker = DistributionMaker(config)
    ref = MapSet.from_json(ref)

    # Define server as a closure such that it captures the above-instantiated objects
    class MyTCPHandler(SocketServer.BaseRequestHandler):
        """
        The request handler class for our server.

        It is instantiated once per connection to the server, and must override
        the handle() method to implement communication to the client.

        See SocketServer.BaseRequestHandler for documentation of args.
        """
        def handle(self):
            try:
                param_values = receive_obj(self.request)
            except ConnectionClosed:
                return
            dist_maker._set_rescaled_free_params(param_values)  # pylint: disable=protected-access
            test_map = dist_maker.get_outputs(return_sum=True)[0]
            llh = test_map.llh(
                expected_values=ref,
                binned=
                False,  # return sum over llh from all bins (not per-bin llh's)
            )
            send_obj(llh, self.request)

    server = SocketServer.TCPServer((DFLT_HOST, int(port)), MyTCPHandler)
    print("llh server started on {}:{}".format(DFLT_HOST, port))
    server.serve_forever()
예제 #12
0
파일: plotter.py 프로젝트: thehrh/pisa-1
 def plot_array(self, map_set, fun, *args, **kwargs):
     """wrapper funtion to exccute plotting function fun for every map in a
     set distributed over a grid"""
     n_rows = kwargs.pop('n_rows', None)
     n_cols = kwargs.pop('n_cols', None)
     if isinstance(map_set, Map):
         map_set = MapSet([map_set])
     if isinstance(map_set, MapSet):
         n = len(map_set)
     elif isinstance(map_set, TransformSet):
         n = len([x for x in map_set])
     else:
         raise TypeError('Expecting to plot a MapSet or TransformSet but '
                         'got %s' % type(map_set))
     if n_rows is None and n_cols is None:
         # TODO: auto row/cols
         n_rows = np.floor(np.sqrt(n))
         while n % n_rows != 0:
             n_rows -= 1
         n_cols = n // n_rows
     if n > n_cols * n_rows:
         raise ValueError(
             'trying to plot %s subplots on a grid with %s x %s cells' %
             (n, n_cols, n_rows))
     size = (n_cols * self.size[0], n_rows * self.size[1])
     self.init_fig(size)
     plt.tight_layout()
     h_margin = 1. / size[0]
     v_margin = 1. / size[1]
     self.fig.subplots_adjust(hspace=0.2,
                              wspace=0.2,
                              top=1 - v_margin,
                              bottom=v_margin,
                              left=h_margin,
                              right=1 - h_margin)
     for i, map in enumerate(map_set):
         plt.subplot(n_rows, n_cols, i + 1)
         getattr(self, fun)(map, *args, **kwargs)
         self.add_stamp(map.tex)
예제 #13
0
파일: transform.py 프로젝트: terliuk/pisa
    def apply(self, inputs):
        """Apply each transform to `inputs`; return computed outputs.

        Parameters
        -----------
        inputs : sequence of objects

        Returns
        -------
        outputs : container with computed outputs (no sideband objects)

        """
        output_names = []
        outputs = []

        # If any outputs have the same name, add them together to form a single
        # output for that name
        for xform in self:
            output = xform.apply(inputs)
            name = output.name
            try:
                idx = output_names.index(name)
                outputs[idx] = outputs[idx] + output
                outputs[idx].name = name
            except ValueError:
                outputs.append(output)
                output_names.append(name)

        # Automatically attach a sensible hash (this may be overwritten, but
        # the below should be a reasonable hash in most cases)
        if inputs.hash is None or self.hash is None:
            hash_ = None
        else:
            hash_ = hash_obj((inputs.hash, self.hash))

        # TODO: what to set for map set's name, tex, etc. ?
        return MapSet(maps=outputs, hash=hash_)
예제 #14
0
def create_pseudo_data(toymc_params, seed=None):
    '''
    Create pseudo data consisting of a gaussian peak
    on top of a uniform background
    '''
    if seed is not None:
        np.random.seed(seed)

    binning = toymc_params.binning
    #
    # Gaussian signal peak
    #
    signal = np.random.normal(loc=toymc_params.mu,
                              scale=toymc_params.sigma,
                              size=toymc_params.nsig)

    #
    # Uniform background
    #
    background = np.random.uniform(high=toymc_params.nbackground_high,
                                   low=toymc_params.nbackground_low,
                                   size=toymc_params.nbkg)
    total_data = np.concatenate([signal, background])
    counts_data, _ = np.histogram(total_data, bins=binning.bin_edges.magnitude)

    # Convert data histogram into a pisa map
    data_map = Map(name='total',
                   binning=MultiDimBinning([binning]),
                   hist=counts_data)

    # Set the errors as the sqrt of the counts
    data_map.set_errors(error_hist=np.sqrt(counts_data))

    data_as_mapset = MapSet([data_map])

    return data_as_mapset
예제 #15
0
    def _compute_nominal_outputs(self):
        """load the evnts from file, perform sanity checks and histogram them
        (into final MapSet)

        """
        # get params
        data_file_name = self.params.data_file.value
        sim_version = self.params.sim_ver.value
        bdt_cut = self.params.bdt_cut.value.m_as('dimensionless')

        self.bin_names = self.output_binning.names

        # TODO: convert units using e.g. `comp_units` in stages/reco/hist.py
        self.bin_edges = []
        for name in self.bin_names:
            if 'energy' in  name:
                bin_edges = self.output_binning[name].bin_edges.to('GeV').magnitude
            else:
                bin_edges = self.output_binning[name].bin_edges.magnitude
            self.bin_edges.append(bin_edges)

        # the rest of this function is PISA v2 legacy code...
        # right now only use burn sample with sim_version = '4digit'
        #print "sim_version == ", sim_version
        if sim_version == "4digit":
            Reco_Neutrino_Name = 'IC86_Dunkman_L6_MultiNest8D_PDG_Neutrino'
            Reco_Track_Name = 'IC86_Dunkman_L6_MultiNest8D_PDG_Track'
        elif sim_version == "5digit" or sim_version=="dima":
            Reco_Neutrino_Name = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_NumuCC'
            Reco_Track_Name = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_Track'
        else:
            raise ValueError(
                'only allow 4digit, 5digit(H2 model for hole ice) or'
                ' dima (dima p1 and p2 for hole ice)!'
            )

        data_file = h5py.File(find_resource(data_file_name), 'r')
        L6_result = np.array(data_file['IC86_Dunkman_L6']['result'])
        dLLH = np.array(data_file['IC86_Dunkman_L6']['delta_LLH'])
        reco_energy_all = np.array(data_file[Reco_Neutrino_Name]['energy'])
        reco_coszen_all = np.array(np.cos(
            data_file[Reco_Neutrino_Name]['zenith']
        ))
        reco_trck_len_all = np.array(data_file[Reco_Track_Name]['length'])
        #print "before L6 cut, no. of burn sample = ", len(reco_coszen_all)

        # sanity check
        santa_doms = data_file['IC86_Dunkman_L6_SANTA_DirectDOMs']['value']
        l3 = data_file['IC86_Dunkman_L3']['value']
        l4 = data_file['IC86_Dunkman_L4']['result']
        l5 = data_file['IC86_Dunkman_L5']['bdt_score']
        assert(np.all(santa_doms>=3) and np.all(l3 == 1) and np.all(l5 >= 0.1))

        # l4==1 was not applied when i3 files were written to hdf5 files, so do
        # it here
        dLLH = dLLH[l4==1]
        reco_energy_all = reco_energy_all[l4==1]
        reco_coszen_all = reco_coszen_all[l4==1]
        l5 = l5[l4==1]
        L6_result = L6_result[l4==1]
        data_file.close()

        dLLH_L6 = dLLH[L6_result==1]
        l5 = l5[L6_result==1]
        reco_energy_L6 = reco_energy_all[L6_result==1]
        reco_coszen_L6 = reco_coszen_all[L6_result==1]
        #print "after L6 cut, no. of burn sample = ", len(reco_coszen_L6)

        # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make data/MC
        # agree much better); if use no such further cut, use bdt_cut = 0.1
        logging.info(
            "Cut2, removing events with bdt_score < %s i.e. only keep bdt > %s"
            %(bdt_cut, bdt_cut)
        )
        cut_events = {}
        cut = l5>=bdt_cut
        cut_events['reco_energy'] = reco_energy_L6[cut]
        cut_events['reco_coszen'] = reco_coszen_L6[cut]
        cut_events['pid'] = dLLH_L6[cut]

        hist, _ = np.histogramdd(sample = np.array(
            [cut_events[bin_name] for bin_name in self.bin_names]
        ).T, bins=self.bin_edges)

        maps = [Map(name=self.output_names[0], hist=hist,
                    binning=self.output_binning)]
        self.template = MapSet(maps, name='data')
예제 #16
0
파일: roounfold.py 프로젝트: thehrh/pisa-1
    def _compute_outputs(self, inputs=None):
        """Compute histograms for output channels."""
        logging.debug('Entering roounfold._compute_outputs')
        self.fit_hash = deepcopy(inputs.metadata['fit_hash'])
        logging.trace('{0} roounfold fit_hash = '
                      '{1}'.format(inputs.metadata['name'], self.fit_hash))
        if self.random_state is not None:
            logging.trace('{0} roounfold random_state = '
                          '{1}'.format(inputs.metadata['name'],
                                       hash_obj(
                                           self.random_state.get_state())))
        if not isinstance(inputs, Data):
            raise AssertionError('inputs is not a Data object, instead is '
                                 'type {0}'.format(type(inputs)))
        self._data = inputs

        if not self.params['return_eff'].value:
            if len(self.output_names) > 1:
                raise AssertionError(
                    'Specified more than one NuFlavIntGroup as '
                    'signal, {0}'.format(self.output_names))
            self.output_str = str(self.output_names[0])

        real_data = self.params['real_data'].value
        if real_data:
            logging.debug('Using real data')
            if 'nuall' not in self._data:
                raise AssertionError(
                    'When using real data, input Data object must contain '
                    'only one element "nuall" containing the data, instead it '
                    'contains elements {0}'.format(self._data.keys()))
            if self.disk_cache is None:
                raise AssertionError(
                    'No disk_cache specified from which to load - using real '
                    'data requires object such as the response object to be '
                    'cached to disk.')

        if self.params['optimize_reg'].value and real_data:
            raise AssertionError(
                'Cannot optimize the regularation if using real data.')
        if int(self.params['stat_fluctuations'].m) != 0 and real_data:
            raise AssertionError(
                'Cannot do poisson fluctuations if using real data.')
        if self.params['return_eff'].value and real_data:
            raise AssertionError(
                'Not implemented return of efficiency maps if using real data.'
            )

        if self.params['return_eff'].value:
            fin_data = self._data
            # Load generator level data for signal
            unfold_pipeline_cfg = self.params['unfold_pipeline_cfg'].value
            pipeline_cfg = from_file(unfold_pipeline_cfg)
            template_maker = Pipeline(pipeline_cfg)
            gen_data = template_maker.get_outputs()

            fin_data = fin_data.transform_groups(self.output_names)
            gen_data = gen_data.transform_groups(self.output_names)

            efficiencies = []
            assert set(fin_data.keys()) == set(gen_data.keys())
            for fig in fin_data.keys():
                figd_f = fin_data[fig]
                figd_g = gen_data[fig]
                inv_eff = self._get_inv_eff(figd_f, figd_g, self.true_binning,
                                            fig)

                i_mask = ~(inv_eff == 0.)
                eff = unp.uarray(np.zeros(self.true_binning.shape),
                                 np.zeros(self.true_binning.shape))
                eff[i_mask] = 1. / inv_eff[i_mask]
                efficiencies.append(
                    Map(name=fig, hist=eff, binning=self.true_binning))
            return MapSet(efficiencies)

        # TODO(shivesh): [   TRACE] None of the selections ['iron', 'nh'] found in this pipeline.
        # TODO(shivesh): Fix "smearing_matrix" memory leak
        # TODO(shivesh): Fix unweighted unfolding
        # TODO(shivesh): different algorithms
        # TODO(shivesh): implement handling of 0 division inside Map objects
        if real_data:
            unfold_map = self.unfold_real_data()
        else:
            unfold_map = self.unfold_mc()

        return MapSet([unfold_map])
예제 #17
0
파일: pipeline.py 프로젝트: atrettin/pisa
def main(return_outputs=False):
    """Run unit tests if `pipeline.py` is called as a script."""
    from pisa.utils.plotter import Plotter

    args = parse_args()
    set_verbosity(args.v)

    # Even if user specifies an integer on command line, it comes in as a
    # string. Try to convert to int (e.g. if `'1'` is passed to indicate the
    # second stage), and -- if successful -- use this as `args.only_stage`.
    # Otherwise, the string value passed will be used (e.g. `'osc'` could be
    # passed).
    try:
        only_stage_int = int(args.only_stage)
    except (ValueError, TypeError):
        pass
    else:
        args.only_stage = only_stage_int

    if args.outdir:
        mkdir(args.outdir)
    else:
        if args.pdf or args.png:
            raise ValueError("No --outdir provided, so cannot save images.")

    # Most basic parsing of the pipeline config (parsing only to this level
    # allows for simple strings to be specified as args for updating)
    bcp = PISAConfigParser()
    bcp.read(args.pipeline)

    # Update the config with any args specified on command line
    if args.arg is not None:
        for arg_list in args.arg:
            if len(arg_list) < 2:
                raise ValueError(
                    'Args must be formatted as: "section arg=val". Got "%s"'
                    " instead." % " ".join(arg_list))
            section = arg_list[0]
            remainder = " ".join(arg_list[1:])
            eq_split = remainder.split("=")
            newarg = eq_split[0].strip()
            value = ("=".join(eq_split[1:])).strip()
            logging.debug('Setting config section "%s" arg "%s" = "%s"',
                          section, newarg, value)
            try:
                bcp.set(section, newarg, value)
            except NoSectionError:
                logging.error(
                    'Invalid section "%s" specified. Must be one of %s',
                    section,
                    bcp.sections(),
                )
                raise

    # Instantiate the pipeline
    pipeline = Pipeline(bcp)  # pylint: disable=redefined-outer-name

    if args.select is not None:
        pipeline.select_params(args.select, error_on_missing=True)

    if args.only_stage is None:
        stop_idx = args.stop_after_stage
        try:
            stop_idx = int(stop_idx)
        except (TypeError, ValueError):
            pass
        if isinstance(stop_idx, str):
            stop_idx = pipeline.index(stop_idx)
        outputs = pipeline.get_outputs(idx=stop_idx)  # pylint: disable=redefined-outer-name
        if stop_idx is not None:
            stop_idx += 1
        indices = slice(0, stop_idx)
    else:
        assert args.stop_after_stage is None
        idx = pipeline.index(args.only_stage)
        stage = pipeline[idx]
        indices = slice(idx, idx + 1)

        # Create dummy inputs if necessary
        inputs = None
        if hasattr(stage, "input_binning"):
            logging.warning(
                "Stage requires input, so building dummy"
                " inputs of random numbers, with random state set to the input"
                " index according to alphabetical ordering of input names and"
                " filled in alphabetical ordering of dimension names.")
            input_maps = []
            tmp = deepcopy(stage.input_binning)
            alphabetical_binning = tmp.reorder_dimensions(sorted(tmp.names))
            for input_num, input_name in enumerate(sorted(stage.input_names)):
                # Create a new map with all 3's; name according to the input
                hist = np.full(shape=alphabetical_binning.shape,
                               fill_value=3.0)
                input_map = Map(name=input_name,
                                binning=alphabetical_binning,
                                hist=hist)

                # Apply Poisson fluctuations to randomize the values in the map
                input_map.fluctuate(method="poisson", random_state=input_num)

                # Reorder dimensions according to user's original binning spec
                input_map.reorder_dimensions(stage.input_binning)
                input_maps.append(input_map)
            inputs = MapSet(maps=input_maps, name="ones", hash=1)

        outputs = stage.run(inputs=inputs)

    for stage in pipeline[indices]:
        if not args.outdir:
            break
        stg_svc = stage.stage_name + "__" + stage.service_name
        fbase = os.path.join(args.outdir, stg_svc)
        if args.intermediate or stage == pipeline[indices][-1]:
            stage.outputs.to_json(fbase + "__output.json.bz2")

        # also only plot if args intermediate or last stage
        if args.intermediate or stage == pipeline[indices][-1]:
            formats = OrderedDict(png=args.png, pdf=args.pdf)
            if isinstance(stage.outputs, Data):
                # TODO(shivesh): plots made here will use the most recent
                # "pisa_weight" column and so all stages will have identical plots
                # (one workaround is to turn on "memcache_deepcopy")
                # TODO(shivesh): intermediate stages have no output binning
                if stage.output_binning is None:
                    logging.debug("Skipping plot of intermediate stage %s",
                                  stage)
                    continue
                outputs = stage.outputs.histogram_set(
                    binning=stage.output_binning,
                    nu_weights_col="pisa_weight",
                    mu_weights_col="pisa_weight",
                    noise_weights_col="pisa_weight",
                    mapset_name=stg_svc,
                    errors=True,
                )

            try:
                for fmt, enabled in formats.items():
                    if not enabled:
                        continue
                    my_plotter = Plotter(
                        stamp="Event rate",
                        outdir=args.outdir,
                        fmt=fmt,
                        log=False,
                        annotate=args.annotate,
                    )
                    my_plotter.ratio = True
                    my_plotter.plot_2d_array(outputs,
                                             fname=stg_svc + "__output",
                                             cmap="RdBu")
            except ValueError as exc:
                logging.error(
                    "Failed to save plot to format %s. See exception"
                    " message below",
                    fmt,
                )
                traceback.format_exc()
                logging.exception(exc)
                logging.warning("I can't go on, I'll go on.")

    if return_outputs:
        return pipeline, outputs
예제 #18
0
    def get_outputs(self, inputs=None):
        """Top-level function for computing outputs. Use this method to get
        outputs if you live outside this stage/service.

        Caching is handled here, so if the output hash returned by
        `_derive_outputs_hash` is in `outputs_cache`, it is simply returned.
        Otherwise, the `_compute_outputs` private method is invoked to do the
        actual work of computing outputs.

        Parameters
        ----------
        inputs : None or Mapping
            Any inputs to be transformed, plus any sideband objects that are to
            be passed on (untransformed) to subsequent stages.

        See also
        --------
        Overloadable methods called directly from this:
            _derive_outputs_hash
            _compute_outputs

        """
        # Reset flags
        self.outputs_loaded_from_cache = None
        self.outputs_computed = False

        # TODO: store nominal outputs to the outputs cache as well, but
        # derive the hash value the same way as it is done for outputs,
        # to avoid needing to apply no systematics to the nominal outputs
        # to get the (identical) outputs?
        # Problem: assumes the nominal transform is the same as the outputs
        # that will result, which *might* not be true (though it seems it will
        # usually be so)

        # Keep inputs for internal use and for inspection later
        self.inputs = inputs

        outputs_hash, transforms_hash, nominal_transforms_hash = (
            self._derive_outputs_hash()
        )

        # Compute nominal outputs; if feature is not used, this doesn't
        # actually do much of anything. To do more than this, override the
        # `_compute_nominal_outputs` method.
        self.get_nominal_outputs(nominal_outputs_hash=nominal_transforms_hash)

        logging.trace("outputs_hash: %s" % outputs_hash)

        if (
            self.outputs_cache is not None
            and outputs_hash is not None
            and outputs_hash in self.outputs_cache
            and self.debug_mode is None
        ):
            self.outputs_loaded_from_cache = "memory"
            logging.trace("Loading outputs from cache.")
            outputs = self.outputs_cache[outputs_hash]
        else:
            logging.trace("Need to compute outputs...")

            if self.use_transforms:
                self.get_transforms(
                    transforms_hash=transforms_hash,
                    nominal_transforms_hash=nominal_transforms_hash,
                )

            logging.trace("... now computing outputs.")
            outputs = self._compute_outputs(inputs=self.inputs)
            self.check_outputs(outputs)

            if isinstance(outputs, (Map, MapSet)):
                outputs = outputs.rebin(self.output_binning)

            outputs.hash = outputs_hash
            self.outputs_computed = True

            # Store output to cache
            if self.outputs_cache is not None and outputs_hash is not None:
                self.outputs_cache[outputs_hash] = outputs

        # Keep outputs for inspection later
        self.outputs = outputs

        # Attach sideband objects (i.e., inputs not specified in
        # `self.input_names`) to the "augmented" output object
        if self.inputs is None:
            names_in_inputs = set()
        else:
            names_in_inputs = set(self.inputs.names)
        unused_input_names = names_in_inputs.difference(self.input_names)

        if len(unused_input_names) == 0:
            return outputs

        # TODO: update logic for Data object, generic sideband objects
        # Create a new output container different from `outputs` but copying
        # the contents, for purposes of attaching the sideband objects found.
        if isinstance(outputs, MapSet):
            augmented_outputs = MapSet(outputs)
            for name in unused_input_names:
                augmented_outputs.append(inputs[name])

            return augmented_outputs
        else:
            raise TypeError(
                "Outputs are %s, but must currently be a MapSet in"
                " the case that the input includes sideband"
                " objects." % type(outputs)
            )
예제 #19
0
파일: fit.py 프로젝트: thehrh/pisa-1
    def _calculate_fit_coeffs(data,
                              params,
                              fit_binning,
                              nu_params=None,
                              mu_params=None):
        """
        Calculate the fit coefficients for each systematic, flavint,
        bin for a polynomial.
        """
        logging.debug('Calculating fit coefficients')

        config = from_file(params['discr_sys_sample_config'].value)

        degree = int(params['poly_degree'].value)
        force_through_nominal = params['force_through_nominal'].value

        if force_through_nominal:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, [1.] + list(poly_coeffs))
        else:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, list(poly_coeffs))

            # add free param for constant term
            degree += 1

        template_maker = Pipeline(params['pipeline_config'].value)
        dataset_param = template_maker.params['dataset']

        def parse(string):
            return string.replace(' ', '').split(',')

        sys_fit_coeffs = OrderedDict()
        if nu_params is not None:
            sys_list = parse(config.get('neutrinos', 'sys_list'))
            nu_params = deepcopy(map(lambda x: x[3:], nu_params))

            if set(nu_params) != set(sys_list):
                raise AssertionError(
                    'Systematics list listed in the sample config file does '
                    'not match the params in the pipeline config file\n {0} '
                    '!= {1}'.format(set(nu_params), set(sys_list)))

            for sys in sys_list:
                ev_sys = 'neutrinos|' + sys
                runs = parse(config.get(ev_sys, 'runs')[1:-1])
                nominal = config.get(ev_sys, 'nominal')

                mapset_dict = OrderedDict()
                flavint_groups = None
                for run in runs:
                    logging.info('Loading run {0} of systematic '
                                 '{1}'.format(run, sys))
                    dataset_param.value = ev_sys + '|' + run
                    template_maker.update_params(dataset_param)
                    template = template_maker.get_outputs(
                        idx=int(params['stop_after_stage'].m))
                    if not isinstance(template, Data):
                        raise AssertionError(
                            'Template output is not a Data object, instead is '
                            'type {0}'.format(type(template)))
                    if flavint_groups is None:
                        flavint_groups = template.flavint_groups
                    else:
                        if set(flavint_groups) != set(template.flavint_groups):
                            raise AssertionError(
                                'Mismatch of flavint_groups - ({0}) does not '
                                'match flavint_groups '
                                '({1})'.format(flavint_groups,
                                               template.flavint_groups))

                    outputs = []
                    for fig in template.keys():
                        outputs.append(
                            template.histogram(kinds=fig,
                                               binning=fit_binning,
                                               weights_col='pisa_weight',
                                               errors=False,
                                               name=str(NuFlavIntGroup(fig))))
                    mapset_dict[run] = MapSet(outputs, name=run)

                nom_mapset = mapset_dict[nominal]
                fracdiff_mapset_dict = OrderedDict()
                for run in runs:
                    mapset = []
                    for flavintg_map in mapset_dict[run]:
                        # TODO(shivesh): error propagation?
                        flavintg = flavintg_map.name
                        mask = ~(nom_mapset[flavintg].hist == 0.)
                        div = np.zeros(flavintg_map.shape)
                        with np.errstate(divide='ignore', invalid='ignore'):
                            div[mask] = \
                                unp.nominal_values(flavintg_map.hist[mask]) /\
                                unp.nominal_values(nom_mapset[flavintg].hist[mask])
                        mapset.append(
                            Map(name=flavintg,
                                binning=flavintg_map.binning,
                                hist=div))
                    fracdiff_mapset_dict[run] = MapSet(mapset)

                delta_runs = np.array([float(x)
                                       for x in runs]) - float(nominal)

                coeff_binning = OneDimBinning(name='coeff',
                                              num_bins=degree,
                                              is_lin=True,
                                              domain=[-1, 1])
                combined_binning = fit_binning + coeff_binning

                params_mapset = []
                for fig in template.keys():
                    # TODO(shivesh): Fix numpy warning on this line
                    pvals_hist = np.empty(map(int, combined_binning.shape),
                                          dtype=object)
                    hists = [
                        fracdiff_mapset_dict[run][fig].hist for run in runs
                    ]
                    zip_hists = np.dstack(hists)
                    for idx in np.ndindex(fit_binning.shape):
                        y_values = []
                        y_sigma = []
                        for run in fracdiff_mapset_dict:
                            y_values.append(
                                unp.nominal_values(
                                    fracdiff_mapset_dict[run][fig].hist[idx]))
                            y_sigma.append(
                                unp.std_devs(
                                    fracdiff_mapset_dict[run][fig].hist[idx]))

                        if np.any(y_sigma):
                            popt, pcov = curve_fit(fit_func,
                                                   delta_runs,
                                                   y_values,
                                                   sigma=y_sigma,
                                                   p0=np.ones(degree))
                        else:
                            popt, pcov = curve_fit(fit_func,
                                                   delta_runs,
                                                   y_values,
                                                   p0=np.ones(degree))
                        # perr = np.sqrt(np.diag(pcov))
                        # pvals = unp.uarray(popt, perr)
                        pvals_hist[idx] = popt
                    pvals_hist = np.array(pvals_hist.tolist())
                    params_mapset.append(
                        Map(name=fig,
                            binning=combined_binning,
                            hist=pvals_hist))
                params_mapset = MapSet(params_mapset, name=sys)

                if sys in sys_fit_coeffs:
                    sys_fit_coeffs[sys] = MapSet(
                        [sys_fit_coeffs[sys], params_mapset])
                else:
                    sys_fit_coeffs[sys] = params_mapset

        if mu_params is not None:
            sys_list = parse(config.get('muons', 'sys_list'))
            mu_params = deepcopy(map(lambda x: x[3:], mu_params))

            if set(mu_params) != set(sys_list):
                raise AssertionError(
                    'Systematics list listed in the sample config file does '
                    'not match the params in the pipeline config file\n {0} '
                    '!= {1}'.format(set(mu_params), set(sys_list)))

            for sys in sys_list:
                ev_sys = 'muons|' + sys
                runs = parse(config.get(ev_sys, 'runs')[1:-1])
                nominal = config.get(ev_sys, 'nominal')

                map_dict = OrderedDict()
                flavint_groups = None
                for run in runs:
                    logging.info('Loading run {0} of systematic '
                                 '{1}'.format(run, sys))
                    dataset_param.value = ev_sys + '|' + run
                    template_maker.update_params(dataset_param)
                    template = template_maker.get_outputs(
                        idx=int(params['stop_after_stage'].m))
                    if not isinstance(template, Data):
                        raise AssertionError(
                            'Template output is not a Data object, instead is '
                            'type {0}'.format(type(template)))
                    if not template.contains_muons:
                        raise AssertionError(
                            'Template output does not contain muons')

                    output = template.histogram(
                        kinds='muons',
                        binning=fit_binning,
                        # NOTE: weights cancel in fraction
                        weights_col=None,
                        errors=False,
                        name='muons')
                    map_dict[run] = output

                nom_map = map_dict[nominal]
                fracdiff_map_dict = OrderedDict()
                for run in runs:
                    mask = ~(nom_map.hist == 0.)
                    div = np.zeros(nom_map.shape)
                    with np.errstate(divide='ignore', invalid='ignore'):
                        div[mask] = \
                            unp.nominal_values(map_dict[run].hist[mask]) /\
                            unp.nominal_values(nom_map.hist[mask])
                    fracdiff_map_dict[run] = Map(name='muons',
                                                 binning=nom_map.binning,
                                                 hist=div)

                delta_runs = np.array([float(x)
                                       for x in runs]) - float(nominal)

                coeff_binning = OneDimBinning(name='coeff',
                                              num_bins=degree,
                                              is_lin=True,
                                              domain=[-1, 1])
                combined_binning = fit_binning + coeff_binning

                pvals_hist = np.empty(map(int, combined_binning.shape),
                                      dtype=object)
                hists = [fracdiff_map_dict[run].hist for run in runs]
                zip_hists = np.dstack(hists)
                for idx in np.ndindex(fit_binning.shape):
                    y_values = []
                    y_sigma = []
                    for run in fracdiff_mapset_dict:
                        y_values.append(
                            unp.nominal_values(
                                fracdiff_mapset_dict[run][fig].hist[idx]))
                        y_sigma.append(
                            unp.std_devs(
                                fracdiff_mapset_dict[run][fig].hist[idx]))
                    if np.any(y_sigma):
                        popt, pcov = curve_fit(fit_func,
                                               delta_runs,
                                               y_values,
                                               sigma=y_sigma,
                                               p0=np.ones(degree))
                    else:
                        popt, pcov = curve_fit(fit_func,
                                               delta_runs,
                                               y_values,
                                               p0=np.ones(degree))
                    # perr = np.sqrt(np.diag(pcov))
                    # pvals = unp.uarray(popt, perr)
                    pvals_hist[idx] = popt
                pvals_hist = np.array(pvals_hist.tolist())
                params_map = Map(name='muons',
                                 binning=combined_binning,
                                 hist=pvals_hist)
                if sys in sys_fit_coeffs:
                    sys_fit_coeffs[sys] = MapSet(
                        [sys_fit_coeffs[sys], params_map])
                else:
                    sys_fit_coeffs[sys] = params_map

        return sys_fit_coeffs
예제 #20
0
def main():
    args = parse_args()
    set_verbosity(args.v)

    if args.plot:
        import matplotlib as mpl
        mpl.use('pdf')
        import matplotlib.pyplot as plt
        from pisa.utils.plotter import Plotter

    cfg = from_file(args.fit_settings)
    sys_list = cfg.get('general', 'sys_list').replace(' ', '').split(',')
    stop_idx = cfg.getint('general', 'stop_after_stage')


    for sys in sys_list:
        # Parse info for given systematic
        nominal = cfg.getfloat(sys, 'nominal')
        degree = cfg.getint(sys, 'degree')
        force_through_nominal = cfg.getboolean(sys, 'force_through_nominal')
        runs = eval(cfg.get(sys, 'runs'))
        #print "runs ", runs
        smooth = cfg.get(sys, 'smooth')

        x_values = np.array(sorted(runs))

        # Build fit function
        if force_through_nominal:
            function = "lambda x, *p: np.polynomial.polynomial.polyval(x, [1.] + list(p))"
        else:
            function = "lambda x, *p: np.polynomial.polynomial.polyval(x, list(p))"
            # Add free parameter for constant term
            degree += 1
        fit_fun = eval(function)

        # Instantiate template maker
        template_maker = Pipeline(args.template_settings)

        if not args.set_param == '':
            for one_set_param in args.set_param:
                p_name, value = one_set_param.split("=")
                #print "p_name,value= ", p_name, " ", value
                value = parse_quantity(value)
                value = value.n * value.units
                param = template_maker.params[p_name]
                #print "old ", p_name, "value = ", param.value
                param.value = value
                #print "new ", p_name, "value = ", param.value
                template_maker.update_params(param)

        inputs = {}
        map_names = None
        # Get sys templates
        for run in runs:
            for key, val in cfg.items('%s:%s'%(sys, run)):
                if key.startswith('param.'):
                    _, pname = key.split('.')
                    param = template_maker.params[pname]
                    try:
                        value = parse_quantity(val)
                        param.value = value.n * value.units
                    except ValueError:
                        value = parse_string_literal(val)
                        param.value = value
                    param.set_nominal_to_current_value()
                    template_maker.update_params(param)
            # Retreive maps
            template = template_maker.get_outputs(idx=stop_idx)
            if map_names is None: map_names = [m.name for m in template]
            inputs[run] = {}
            for m in template:
                inputs[run][m.name] = m.hist

        # Numpy acrobatics:
        arrays = {}
        for name in map_names:
            arrays[name] = []
            for x in x_values:
                arrays[name].append(
                    inputs[x][name] / unp.nominal_values(inputs[nominal][name])
                )
            a = np.array(arrays[name])
            arrays[name] = np.rollaxis(a, 0, len(a.shape))

        # Shift to get deltas
        x_values -= nominal

        # Binning object (assuming they're all the same)
        binning = template.maps[0].binning

        shape = [d.num_bins for d in binning] + [degree]
        shape_small = [d.num_bins for d in binning]

        outputs = {}
        errors = {}
        for name in map_names:
            # Now actualy perform some fits
            outputs[name] = np.ones(shape)
            errors[name] = np.ones(shape)


            for idx in np.ndindex(*shape_small):
                y_values = unp.nominal_values(arrays[name][idx])
                y_sigma = unp.std_devs(arrays[name][idx])
                if np.any(y_sigma):
                    popt, pcov = curve_fit(fit_fun, x_values, y_values,
                                           sigma=y_sigma, p0=np.ones(degree))
                else:
                    popt, pcov = curve_fit(fit_fun, x_values, y_values,
                                           p0=np.ones(degree))
                perr = np.sqrt(np.diag(pcov))
                for k, p in enumerate(popt):
                    outputs[name][idx][k] = p
                    errors[name][idx][k] = perr[k]

                # TODO(philippeller): the below block of code will fail

                # Maybe plot
                #if args.plot:
                #    fig_num = i + nx * j
                #    if fig_num == 0:
                #        fig = plt.figure(num=1, figsize=( 4*nx, 4*ny))
                #    subplot_idx = nx*(ny-1-j)+ i + 1
                #    plt.subplot(ny, nx, subplot_idx)
                #    #plt.snameter(x_values, y_values, color=plt_colors[name])
                #    plt.gca().errorbar(x_values, y_values, yerr=y_sigma,
                #                       fmt='o', color=plt_colors[name],
                #                       ecolor=plt_colors[name],
                #                       mec=plt_colors[name])
                #    # Plot nominal point again in black
                #    plt.snameter([0.0], [1.0], color='k')
                #    f_values = fit_fun(x_values, *popt)
                #    fun_plot, = plt.plot(x_values, f_values,
                #            color=plt_colors[name])
                #    plt.ylim(np.min(unp.nominal_values(arrays[name]))*0.9,
                #             np.max(unp.nominal_values(arrays[name]))*1.1)
                #    if i > 0:
                #        plt.setp(plt.gca().get_yticklabels(), visible=False)
                #    if j > 0:
                #        plt.setp(plt.gca().get_xticklabels(), visible=False)

        if smooth == 'gauss':
            for name in map_names:
                for d in range(degree):
                    outputs[name][...,d] = gaussian_filter(outputs[name][...,d],sigma=1)

        if smooth == 'gauss_pid':
            for name in map_names:
                split_idx = binning.names.index('pid')
                tot = len(binning)-1
                for d in range(degree):
                    for p in range(len(binning['pid'])):
                        outputs[name][...,p,d] = gaussian_filter(
                            np.swapaxes(outputs[name], split_idx, tot)[...,p,d],
                            sigma=1
                        )
                outputs[name] = np.swapaxes(outputs[name], split_idx, tot)

        # Save the raw ones anyway
        outputs['pname'] = sys
        outputs['nominal'] = nominal
        outputs['function'] = function
        outputs['map_names'] = map_names
        outputs['binning_hash'] = binning.hash
        to_file(outputs, '%s/%s_sysfits_%s_%s.json'%(args.out_dir, sys,
                                                     args.tag, smooth))

        if args.plot:
            for d in range(degree):
                maps = []
                for name in map_names:
                    maps.append(Map(name='%s_raw'%name, hist=outputs[name][...,d],
                                    binning=binning))
                maps = MapSet(maps)
                my_plotter = Plotter(
                    stamp='',
                    outdir=args.out_dir,
                    fmt='pdf',
                    log=False,
                    label=''
                )
                my_plotter.plot_2d_array(
                    maps,
                    fname='%s_%s_%s_%s'%(sys, args.tag, d, smooth),
                )
예제 #21
0
파일: roounfold.py 프로젝트: thehrh/pisa-1
    def unfold_mc(self):
        logging.debug('Unfolding monte carlo sample')
        regularisation = int(self.params['regularisation'].m)
        unfold_bg = self.params['unfold_bg'].value
        unfold_eff = self.params['unfold_eff'].value
        unfold_unweighted = self.params['unfold_unweighted'].value

        # Split data into signal, bg and all (signal+bg)
        signal_data, bg_data, all_data = self.split_data()

        # Load generator level data for signal
        gen_data = self.load_gen_data()

        # Return true map is regularisation is set to 0
        if regularisation == 0:
            logging.info('Returning baseline MapSet')
            true = roounfold._histogram(events=gen_data,
                                        binning=self.true_binning,
                                        weights=gen_data['pisa_weight'],
                                        errors=True,
                                        name=self.output_str)
            return MapSet([true])

        # Get the inversed efficiency histogram
        if not unfold_eff:
            inv_eff = self.get_inv_eff(signal_data, gen_data)

        # Set the reco and true data based on cfg file settings
        reco_norm_data = None
        true_norm_data = None
        data = signal_data
        if unfold_bg:
            reco_norm_data = all_data
        if unfold_eff:
            true_norm_data = gen_data
        if reco_norm_data is None:
            reco_norm_data = signal_data
        if true_norm_data is None:
            true_norm_data = signal_data
        if unfold_unweighted:
            ones = np.ones(reco_norm_data['pisa_weight'].shape)
            reco_norm_data['pisa_weight'] = ones
            true_norm_data['pisa_weight'] = ones
            data['pisa_weight'] = ones

        # Create response object
        response = self.create_response(reco_norm_data, true_norm_data, data)

        # Make pseduodata
        all_hist = self._histogram(events=all_data,
                                   binning=self.reco_binning,
                                   weights=all_data['pisa_weight'],
                                   errors=False,
                                   name='all',
                                   tex=r'\rm{all}')
        seed = int(self.params['stat_fluctuations'].m)
        if seed != 0:
            if self.random_state is None or seed != self.seed:
                self.seed = seed
                self.random_state = get_random_state(seed)
            all_hist = all_hist.fluctuate('poisson', self.random_state)
        else:
            self.seed = None
            self.random_state = None
        all_hist.set_poisson_errors()

        # Background Subtraction
        if unfold_bg:
            reco = all_hist
        else:
            bg_hist = self.get_bg_hist(bg_data)
            reco = all_hist - bg_hist
        reco.name = 'reco_signal'
        reco.tex = r'\rm{reco_signal}'

        r_flat = roounfold._flatten_to_1d(reco)
        r_th1d = convert_to_th1d(r_flat, errors=True)

        # Find optimum value for regularisation parameter
        if self.params['optimize_reg'].value:
            chisq = None
            for r_idx in range(regularisation):
                unfold = RooUnfoldBayes(response, r_th1d, r_idx + 1)
                unfold.SetVerbose(0)
                idx_chisq = unfold.Chi2(self.t_th1d, 1)
                if chisq is None:
                    pass
                elif idx_chisq > chisq:
                    regularisation = r_idx
                    break
                chisq = idx_chisq

        # Unfold
        unfold = RooUnfoldBayes(response, r_th1d, regularisation)
        unfold.SetVerbose(0)

        unfolded_flat = unfold.Hreco(1)
        unfold_map = unflatten_thist(in_th1d=unfolded_flat,
                                     binning=self.true_binning,
                                     name=self.output_str,
                                     errors=True)

        # Efficiency correction
        if not unfold_eff:
            unfold_map *= inv_eff

        del r_th1d
        del unfold
        logging.info('Unfolded reco sum {0}'.format(
            np.sum(unp.nominal_values(unfold_map.hist))))
        return unfold_map
예제 #22
0
파일: compare.py 프로젝트: thehrh/pisa-1
def compare(outdir,
            ref,
            ref_label,
            test,
            test_label,
            asymm_max=None,
            asymm_min=None,
            combine=None,
            diff_max=None,
            diff_min=None,
            fract_diff_max=None,
            fract_diff_min=None,
            json=False,
            pdf=False,
            png=False,
            ref_abs=False,
            ref_param_selections=None,
            sum=None,
            test_abs=False,
            test_param_selections=None):
    """Compare two entities. The result each entity specification is
    formatted into a MapSet and stored to disk, so that e.g. re-running
    a DistributionMaker is unnecessary to reproduce the results.

    Parameters
    ----------
    outdir : string
        Store output plots to this directory

    ref : string or array of strings
        Pipeline settings config file that generates reference output,
        or a stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    ref_abs : bool
        Use the absolute value of the reference plot for comparisons

    ref_label : string
        Label for reference

    ref_param-selections : string
        Param selections to apply to ref pipeline config(s). Not
        applicable if ref specifies stored map or map sets

    test : string or array of strings
        Pipeline settings config file that generates test output, or a
        stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    test_abs : bool
        Use the absolute value of the test plot for comparisons

    test_label : string
        Label for test

    test_param_selections : None or string
        Param selections to apply to test pipeline config(s). Not
        applicable if test specifies stored map or map sets

    combine : None or string or array of strings
        Combine by wildcard string, where string globbing (a la command
        line) uses asterisk for any number of wildcard characters. Use
        single quotes such that asterisks do not get expanded by the
        shell. Multiple combine strings supported

    sum : None or int
        Sum over (and hence remove) the specified axis or axes. I.e.,
        project the map onto remaining (unspecified) axis or axes

    json : bool
        Save output maps in compressed json (json.bz2) format

    pdf : bool
        Save plots in PDF format. If neither this nor png is
        specified, no plots are produced

    png : bool
        Save plots in PNG format. If neither this nor pdf is specfied,
        no plots are produced

    diff_min : None or float
        Difference plot vmin; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    diff_max : None or float
        Difference plot max; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    fract_diff_min : None or float
        Fractional difference plot vmin; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    fract_diff_max : None or float
        Fractional difference plot max; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    asymm_min : None or float
        Asymmetry plot vmin; if you specify only one of asymm_min or
        asymm_max, symmetric limits are automatically used (min = -max)

    asymm_max : None or float
        Fractional difference plot max; if you specify only one of
        asymm_min or asymm_max, symmetric limits are automatically used
        (min = -max)

    Returns
    -------
    summary_stats : dict
        Dictionary containing a summary for each h Map processed

    diff : MapSet
        MapSet of the difference
        - (Test - Ref)

    fract_diff : MapSet
        MapSet of the fractional difference
        - (Test - Ref) / Ref

    asymm : MapSet
        MapSet of the asymmetric fraction difference or pull
        - (Test - Ref) / sqrt(Ref)

    """
    ref_plot_label = ref_label
    if ref_abs and not ref_label.startswith('abs'):
        ref_plot_label = 'abs(%s)' % ref_plot_label
    test_plot_label = test_label
    if test_abs and not test_label.startswith('abs'):
        test_plot_label = 'abs(%s)' % test_plot_label

    plot_formats = []
    if pdf:
        plot_formats.append('pdf')
    if png:
        plot_formats.append('png')

    diff_symm = True
    if diff_min is not None and diff_max is None:
        diff_max = -diff_min
        diff_symm = False
    if diff_max is not None and diff_min is None:
        diff_min = -diff_max
        diff_symm = False

    fract_diff_symm = True
    if fract_diff_min is not None and fract_diff_max is None:
        fract_diff_max = -fract_diff_min
        fract_diff_symm = False
    if fract_diff_max is not None and fract_diff_min is None:
        fract_diff_min = -fract_diff_max
        fract_diff_symm = False

    asymm_symm = True
    if asymm_max is not None and asymm_min is None:
        asymm_min = -asymm_max
        asymm_symm = False
    if asymm_min is not None and asymm_max is None:
        asymm_max = -asymm_min
        asymm_symm = False

    outdir = os.path.expanduser(os.path.expandvars(outdir))
    mkdir(outdir)

    # Get the reference distribution(s) into the form of a test MapSet
    p_ref = None
    ref_source = None
    if isinstance(ref, Map):
        p_ref = MapSet(ref)
        ref_source = MAP_SOURCE_STR
    elif isinstance(ref, MapSet):
        p_ref = ref
        ref_source = MAPSET_SOURCE_STR
    elif isinstance(ref, Pipeline):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = PIPELINE_SOURCE_STR
    elif isinstance(ref, DistributionMaker):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(ref) == 1:
            try:
                ref_pipeline = Pipeline(config=ref[0])
            except:
                pass
            else:
                ref_source = PIPELINE_SOURCE_STR
                if ref_param_selections is not None:
                    ref_pipeline.select_params(ref_param_selections)
                p_ref = ref_pipeline.get_outputs()
        else:
            try:
                ref_dmaker = DistributionMaker(pipelines=ref)
            except:
                pass
            else:
                ref_source = DISTRIBUTIONMAKER_SOURCE_STR
                if ref_param_selections is not None:
                    ref_dmaker.select_params(ref_param_selections)
                p_ref = ref_dmaker.get_outputs()

    if p_ref is None:
        try:
            p_ref = [Map.from_json(f) for f in ref]
        except:
            pass
        else:
            ref_source = MAP_SOURCE_STR
            p_ref = MapSet(p_ref)

    if p_ref is None:
        assert ref_param_selections is None
        assert len(ref) == 1, 'Can only handle one MapSet'
        try:
            p_ref = MapSet.from_json(ref[0])
        except:
            pass
        else:
            ref_source = MAPSET_SOURCE_STR

    if p_ref is None:
        raise ValueError(
            'Could not instantiate the reference Pipeline, DistributionMaker,'
            ' Map, or MapSet from ref value(s) %s' % ref)
    ref = p_ref

    logging.info('Reference map(s) derived from a ' + ref_source)

    # Get the test distribution(s) into the form of a test MapSet
    p_test = None
    test_source = None
    if isinstance(test, Map):
        p_test = MapSet(test)
        test_source = MAP_SOURCE_STR
    elif isinstance(test, MapSet):
        p_test = test
        test_source = MAPSET_SOURCE_STR
    elif isinstance(test, Pipeline):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = PIPELINE_SOURCE_STR
    elif isinstance(test, DistributionMaker):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(test) == 1:
            try:
                test_pipeline = Pipeline(config=test[0])
            except:
                pass
            else:
                test_source = PIPELINE_SOURCE_STR
                if test_param_selections is not None:
                    test_pipeline.select_params(test_param_selections)
                p_test = test_pipeline.get_outputs()
        else:
            try:
                test_dmaker = DistributionMaker(pipelines=test)
            except:
                pass
            else:
                test_source = DISTRIBUTIONMAKER_SOURCE_STR
                if test_param_selections is not None:
                    test_dmaker.select_params(test_param_selections)
                p_test = test_dmaker.get_outputs()

    if p_test is None:
        try:
            p_test = [Map.from_json(f) for f in test]
        except:
            pass
        else:
            test_source = MAP_SOURCE_STR
            p_test = MapSet(p_test)

    if p_test is None:
        assert test_param_selections is None
        assert len(test) == 1, 'Can only handle one MapSet'
        try:
            p_test = MapSet.from_json(test[0])
        except:
            pass
        else:
            test_source = MAPSET_SOURCE_STR

    if p_test is None:
        raise ValueError(
            'Could not instantiate the test Pipeline, DistributionMaker, Map,'
            ' or MapSet from test value(s) %s' % test)
    test = p_test

    logging.info('Test map(s) derived from a ' + test_source)

    if combine is not None:
        ref = ref.combine_wildcard(combine)
        test = test.combine_wildcard(combine)
        if isinstance(ref, Map):
            ref = MapSet([ref])
        if isinstance(test, Map):
            test = MapSet([test])

    if sum is not None:
        ref = ref.sum(sum)
        test = test.sum(sum)

    # Set the MapSet names according to args passed by user
    ref.name = ref_label
    test.name = test_label

    # Save to disk the maps being plotted (excluding optional aboslute value
    # operations)
    if json:
        refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label)
        to_file(ref, refmaps_path)

        testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label)
        to_file(test, testmaps_path)

    if set(test.names) != set(ref.names):
        raise ValueError('Test map names %s do not match ref map names %s.' %
                         (sorted(test.names), sorted(ref.names)))

    # Aliases to save keystrokes
    def masked(x):
        return np.ma.masked_invalid(x.nominal_values)

    def zero_to_nan(map):
        newmap = deepcopy(map)
        mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON)
        newmap.hist[mask] = np.nan
        return newmap

    reordered_test = []
    new_ref = []
    diff_maps = []
    fract_diff_maps = []
    asymm_maps = []
    summary_stats = {}
    for ref_map in ref:
        test_map = test[ref_map.name].reorder_dimensions(ref_map.binning)
        if ref_abs:
            ref_map = abs(ref_map)
        if test_abs:
            test_map = abs(test_map)

        diff_map = test_map - ref_map
        fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map)
        asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5)
        abs_fract_diff_map = np.abs(fract_diff_map)

        new_ref.append(ref_map)
        reordered_test.append(test_map)
        diff_maps.append(diff_map)
        fract_diff_maps.append(fract_diff_map)
        asymm_maps.append(asymm_map)

        min_ref = np.min(masked(ref_map))
        max_ref = np.max(masked(ref_map))

        min_test = np.min(masked(test_map))
        max_test = np.max(masked(test_map))

        total_ref = np.sum(masked(ref_map))
        total_test = np.sum(masked(test_map))

        mean_ref = np.mean(masked(ref_map))
        mean_test = np.mean(masked(test_map))

        max_abs_fract_diff = np.max(masked(abs_fract_diff_map))
        mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map))
        median_abs_fract_diff = np.median(masked(abs_fract_diff_map))

        mean_fract_diff = np.mean(masked(fract_diff_map))
        min_fract_diff = np.min(masked(fract_diff_map))
        max_fract_diff = np.max(masked(fract_diff_map))
        std_fract_diff = np.std(masked(fract_diff_map))

        mean_diff = np.mean(masked(diff_map))
        min_diff = np.min(masked(diff_map))
        max_diff = np.max(masked(diff_map))
        std_diff = np.std(masked(diff_map))

        median_diff = np.nanmedian(masked(diff_map))
        mad_diff = np.nanmedian(masked(np.abs(diff_map)))
        median_fract_diff = np.nanmedian(masked(fract_diff_map))
        mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map)))

        min_asymm = np.min(masked(fract_diff_map))
        max_asymm = np.max(masked(fract_diff_map))

        total_asymm = np.sqrt(np.sum(masked(asymm_map)**2))

        summary_stats[test_map.name] = OrderedDict([
            ('min_ref', min_ref),
            ('max_ref', max_ref),
            ('total_ref', total_ref),
            ('mean_ref', mean_ref),
            ('min_test', min_test),
            ('max_test', max_test),
            ('total_test', total_test),
            ('mean_test', mean_test),
            ('max_abs_fract_diff', max_abs_fract_diff),
            ('mean_abs_fract_diff', mean_abs_fract_diff),
            ('median_abs_fract_diff', median_abs_fract_diff),
            ('min_fract_diff', min_fract_diff),
            ('max_fract_diff', max_fract_diff),
            ('mean_fract_diff', mean_fract_diff),
            ('std_fract_diff', std_fract_diff),
            ('median_fract_diff', median_fract_diff),
            ('mad_fract_diff', mad_fract_diff),
            ('min_diff', min_diff),
            ('max_diff', max_diff),
            ('mean_diff', mean_diff),
            ('std_diff', std_diff),
            ('median_diff', median_diff),
            ('mad_diff', mad_diff),
            ('min_asymm', min_asymm),
            ('max_asymm', max_asymm),
            ('total_asymm', total_asymm),
        ])

        logging.info('Map %s...', ref_map.name)
        logging.info('  Ref map(s):')
        logging.info('    min   :' + ('%.2f' % min_ref).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_ref).rjust(12))
        logging.info('    total :' + ('%.2f' % total_ref).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_ref).rjust(12))
        logging.info('  Test map(s):')
        logging.info('    min   :' + ('%.2f' % min_test).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_test).rjust(12))
        logging.info('    total :' + ('%.2f' % total_test).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_test).rjust(12))
        logging.info('  Absolute fract. diff., abs((Test - Ref) / Ref):')
        logging.info('    max   : %.4e', max_abs_fract_diff)
        logging.info('    mean  : %.4e', mean_abs_fract_diff)
        logging.info('    median: %.4e', median_abs_fract_diff)
        logging.info('  Fractional difference, (Test - Ref) / Ref:')
        logging.info('    min   : %.4e', min_fract_diff)
        logging.info('    max   : %.4e', max_fract_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_fract_diff,
                     std_fract_diff)
        logging.info('    median: %.4e +/- %.4e', median_fract_diff,
                     mad_fract_diff)
        logging.info('  Difference, Test - Ref:')
        logging.info('    min   : %.4e', min_diff)
        logging.info('    max   : %.4e', max_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_diff, std_diff)
        logging.info('    median: %.4e +/- %.4e', median_diff, mad_diff)
        logging.info('  Asymmetry, (Test - Ref) / sqrt(Ref)')
        logging.info('    min   : %.4e', min_asymm)
        logging.info('    max   : %.4e', max_asymm)
        logging.info('    total : %.4e (sum in quadrature)', total_asymm)
        logging.info('')

    ref = MapSet(new_ref)
    test = MapSet(reordered_test)
    diff = MapSet(diff_maps)
    fract_diff = MapSet(fract_diff_maps)
    asymm = MapSet(asymm_maps)

    if json:
        diff.to_json(
            os.path.join(
                outdir,
                'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))
        fract_diff.to_json(
            os.path.join(
                outdir, 'fract_diff__%s___%s.json.bz2' %
                (test_plot_label, ref_plot_label)))
        asymm.to_json(
            os.path.join(
                outdir,
                'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label)))
        to_file(
            summary_stats,
            os.path.join(
                outdir,
                'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))

    for plot_format in plot_formats:
        # Plot the raw distributions
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=False,
                          ratio=False)
        plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label)
        plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label)

        # Plot the difference (test - ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=diff_symm,
                          ratio=False)
        plotter.label = '%s - %s' % (test_plot_label, ref_plot_label)
        plotter.plot_2d_array(
            test - ref,
            fname='diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=diff_min, vmax=diff_max
        )

        # Plot the fractional difference (test - ref)/ref
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=fract_diff_symm,
                          ratio=True)
        plotter.label = ('(%s-%s)/%s' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r) for r in ref]),
            fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=fract_diff_min, vmax=fract_diff_max
        )

        # Plot the asymmetry (test - ref)/sqrt(ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=asymm_symm,
                          ratio=True)
        plotter.label = (r'$(%s - %s)/\sqrt{%s}$' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]),
            fname='asymm__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=asymm_min, vmax=asymm_max
        )

    return summary_stats, diff, fract_diff, asymm
예제 #23
0
    def _compute_outputs(self, inputs=None):
        """Compute histograms for output channels."""
        logging.debug('Entering nusquids._compute_outputs')
        if not isinstance(inputs, MapSet):
            raise AssertionError('inputs is not a MapSet object, instead '
                                 'is type {0}'.format(type(inputs)))
        # TODO(shivesh): oversampling
        # TODO(shivesh): more options
        # TODO(shivesh): static function
        # TODO(shivesh): hashing
        binning = self.input_binning.basename_binning
        binning = binning.reorder_dimensions(('coszen', 'energy'),
                                             use_basenames=True)
        cz_binning = binning['coszen']
        en_binning = binning['energy']

        units = nsq.Const()

        interactions = False
        cz_min = cz_binning.bin_edges.min().m_as('radian')
        cz_max = cz_binning.bin_edges.max().m_as('radian')
        en_min = en_binning.bin_edges.min().m_as('GeV') * units.GeV
        en_max = en_binning.bin_edges.max().m_as('GeV') * units.GeV
        cz_centers = cz_binning.weighted_centers.m_as('radian')
        en_centers = en_binning.weighted_centers.m_as('GeV') * units.GeV
        cz_grid = np.array([cz_min] + cz_centers.tolist() + [cz_max])
        en_grid = np.array([en_min] + en_centers.tolist() + [en_max])
        nu_flavours = 3

        nuSQ = nsq.nuSQUIDSAtm(cz_grid, en_grid, nu_flavours,
                               nsq.NeutrinoType.both, interactions)

        nuSQ.Set_EvalThreads(multiprocessing.cpu_count())

        theta12 = self.params['theta12'].value.m_as('radian')
        theta13 = self.params['theta13'].value.m_as('radian')
        theta23 = self.params['theta23'].value.m_as('radian')

        deltam21 = self.params['deltam21'].value.m_as('eV**2')
        deltam31 = self.params['deltam21'].value.m_as('eV**2')

        # TODO(shivesh): check if deltacp should be in radians
        deltacp = self.params['deltacp'].value.m_as('radian')

        nuSQ.Set_MixingAngle(0, 1, theta12)
        nuSQ.Set_MixingAngle(0, 2, theta13)
        nuSQ.Set_MixingAngle(1, 2, theta23)

        nuSQ.Set_SquareMassDifference(1, deltam21)
        nuSQ.Set_SquareMassDifference(2, deltam31)

        nuSQ.Set_CPPhase(0, 2, deltacp)

        nuSQ.Set_rel_error(1.0e-10)
        nuSQ.Set_abs_error(1.0e-10)

        # Pad the edges of the energy, coszen space to cover the entire grid range
        cz_shape = cz_binning.shape[0] + 2
        en_shape = en_binning.shape[0] + 2
        shape = (cz_shape, en_shape) + (2, 3)
        initial_state = np.full(shape, np.nan)

        def pad_inputs(x):
            return np.pad(unp.nominal_values(x.hist), 1, 'edge')

        # Third index is selecting nu(0), nubar(1)
        # Fourth index is selecting flavour nue(0), numu(1), nutau(2)
        initial_state[:, :, 0, 0] = pad_inputs(inputs['nue'])
        initial_state[:, :, 1, 0] = pad_inputs(inputs['nuebar'])
        initial_state[:, :, 0, 1] = pad_inputs(inputs['numu'])
        initial_state[:, :, 1, 1] = pad_inputs(inputs['numubar'])
        initial_state[:, :, 0, 2] = np.zeros(pad_inputs(inputs['nue']).shape)
        initial_state[:, :, 1, 2] = np.zeros(pad_inputs(inputs['nue']).shape)

        if np.any(np.isnan(initial_state)):
            raise AssertionError('nan entries in initial_state: '
                                 '{0}'.format(initial_state))
        nuSQ.Set_initial_state(initial_state, nsq.Basis.flavor)

        # TODO(shivesh): use verbosity level to set this
        nuSQ.Set_ProgressBar(True)
        nuSQ.EvolveState()

        os = self.params['oversample'].value.m
        os_binning = binning.oversample(os)
        os_cz_binning = os_binning['coszen']
        os_en_binning = os_binning['energy']
        os_cz_centers = os_cz_binning.weighted_centers.m_as('radians')
        os_en_centers = os_en_binning.weighted_centers.m_as('GeV')

        fs = {}
        for nu in self.output_names:
            fs[nu] = np.full(os_binning.shape, np.nan)

        for icz, cz_bin in enumerate(os_cz_centers):
            for ie, en_bin in enumerate(os_en_centers):
                en_bin_u = en_bin * units.GeV
                fs['nue'][icz][ie] = nuSQ.EvalFlavor(0, cz_bin, en_bin_u, 0)
                fs['nuebar'][icz][ie] = nuSQ.EvalFlavor(0, cz_bin, en_bin_u, 1)
                fs['numu'][icz][ie] = nuSQ.EvalFlavor(1, cz_bin, en_bin_u, 0)
                fs['numubar'][icz][ie] = nuSQ.EvalFlavor(
                    1, cz_bin, en_bin_u, 1)
                fs['nutau'][icz][ie] = nuSQ.EvalFlavor(2, cz_bin, en_bin_u, 0)
                fs['nutaubar'][icz][ie] = nuSQ.EvalFlavor(
                    2, cz_bin, en_bin_u, 1)

        out_binning = self.input_binning.reorder_dimensions(
            ('coszen', 'energy'), use_basenames=True)
        os_out_binning = out_binning.oversample(os)

        outputs = []
        for key in fs.iterkeys():
            if np.any(np.isnan(fs[key])):
                raise AssertionError(
                    'Invalid value computed for {0} oscillated output: '
                    '\n{1}'.format(key, fs[key]))
            map = Map(name=key, binning=os_out_binning, hist=fs[key])
            map = map.downsample(os) / float(os)
            map = map.reorder_dimensions(self.input_binning)
            outputs.append(map)

        return MapSet(outputs)
예제 #24
0
파일: events.py 프로젝트: terliuk/pisa
    def histogram_set(self,
                      binning,
                      nu_weights_col,
                      mu_weights_col,
                      noise_weights_col,
                      mapset_name,
                      errors=False):
        """Uses the above histogram function but returns the set of all of them
        for everything in the Data object.

        Parameters
        ----------
        binning : OneDimBinning, MultiDimBinning
            The definition of the binning for the histograms.
        nu_weights_col : None or string
            The column in the Data object by which to weight the neutrino
            histograms. Specify None for unweighted histograms.
        mu_weights_col : None or string
            The column in the Data object by which to weight the muon
            histograms. Specify None for unweighted histograms.
        noise_weights_col : None or string
            The column in the Data object by which to weight the noise
            histograms. Specify None for unweighted histograms.
        mapset_name : string
            The name by which the resulting MapSet will be identified.
        errors : boolean
            A flag for whether to calculate errors on the histograms or not.
            This defaults to False.

        Returns
        -------
        MapSet : A MapSet containing all of the Maps for everything in this
                 Data object.

        """
        if not isinstance(binning, MultiDimBinning):
            if not isinstance(binning, OneDimBinning):
                raise TypeError('binning should be either MultiDimBinning or '
                                'OneDimBinning object. Got %s.' %
                                type(binning))
        if nu_weights_col is not None:
            if not isinstance(nu_weights_col, basestring):
                raise TypeError('nu_weights_col should be a string. Got %s' %
                                type(nu_weights_col))
        if mu_weights_col is not None:
            if not isinstance(mu_weights_col, basestring):
                raise TypeError('mu_weights_col should be a string. Got %s' %
                                type(mu_weights_col))
        if not isinstance(errors, bool):
            raise TypeError('flag for whether to calculate errors or not '
                            'should be a boolean. Got %s.' % type(errors))
        outputs = []
        if self.contains_neutrinos:
            for fig in self.iterkeys():
                outputs.append(
                    self.histogram(kinds=fig,
                                   binning=binning,
                                   weights_col=nu_weights_col,
                                   errors=errors,
                                   name=str(NuFlavIntGroup(fig))))
        if self.contains_muons:
            outputs.append(
                self.histogram(kinds='muons',
                               binning=binning,
                               weights_col=mu_weights_col,
                               errors=errors,
                               name='muons',
                               tex=r'\rm{muons}'))
        if self.contains_noise:
            outputs.append(
                self.histogram(kinds='noise',
                               binning=binning,
                               weights_col=mu_weights_col,
                               errors=errors,
                               name='noise',
                               tex=r'\rm{noise}'))
        return MapSet(maps=outputs, name=mapset_name)
예제 #25
0
def test_kde_histogramdd():
    """Unit tests for kde_histogramdd"""
    from argparse import ArgumentParser
    from shutil import rmtree
    from tempfile import mkdtemp
    from pisa import ureg
    from pisa.core.map import Map, MapSet
    from pisa.utils.log import logging, set_verbosity
    from pisa.utils.plotter import Plotter

    parser = ArgumentParser()
    parser.add_argument("-v",
                        action="count",
                        default=None,
                        help="set verbosity level")
    args = parser.parse_args()
    set_verbosity(args.v)

    temp_dir = mkdtemp()

    try:
        my_plotter = Plotter(
            stamp="",
            outdir=temp_dir,
            fmt="pdf",
            log=False,
            annotate=False,
            symmetric=False,
            ratio=True,
        )

        b1 = OneDimBinning(name="coszen",
                           num_bins=20,
                           is_lin=True,
                           domain=[-1, 1],
                           tex=r"\cos(\theta)")
        b2 = OneDimBinning(name="energy",
                           num_bins=10,
                           is_log=True,
                           domain=[1, 80] * ureg.GeV,
                           tex=r"E")
        b3 = OneDimBinning(name="pid",
                           num_bins=2,
                           bin_edges=[0, 1, 2],
                           tex=r"pid")
        binning = b1 * b2 * b3

        # now let's generate some toy data

        N = 100000
        cz = np.random.normal(1, 1.2, N)
        # cut away coszen outside -1, 1
        cz = cz[(cz >= -1) & (cz <= 1)]
        e = np.random.normal(30, 20, len(cz))
        pid = np.random.uniform(0, 2, len(cz))
        data = np.array([cz, e, pid]).T

        # make numpy histogram for validation
        bins = [unp.nominal_values(b.bin_edges) for b in binning]
        raw_hist, _ = np.histogramdd(data, bins=bins)

        # get KDE'ed histo
        hist = kde_histogramdd(
            data,
            binning,
            bw_method="silverman",
            coszen_name="coszen",
            oversample=10,
            use_cuda=True,
            stack_pid=True,
        )

        # put into mapsets and plot
        m1 = Map(name="KDE", hist=hist, binning=binning)
        m2 = Map(name="raw", hist=raw_hist, binning=binning)
        with np.errstate(divide="ignore", invalid="ignore"):
            m3 = m2 / m1
        m3.name = "hist/KDE"
        m3.tex = m3.name
        m4 = m1 - m2
        m4.name = "KDE - hist"
        m4.tex = m4.name
        ms = MapSet([m1, m2, m3, m4])
        my_plotter.plot_2d_array(ms, fname="test_kde", cmap="summer")
    except:
        rmtree(temp_dir)
        raise
    else:
        logging.warning("Inspect and manually clean up output(s) saved to %s" %
                        temp_dir)
예제 #26
0
                       prop=dict(size=12))
    plt.setp(legend.get_title(), fontsize=18)
    at = AnchoredText(r'$%s$' % map.tex,
                      prop=dict(size=20),
                      frameon=True,
                      loc=2)
    at.patch.set_boxstyle("round,pad=0.,rounding_size=0.5")
    ax.add_artist(at)
    fig.savefig(outfile, bbox_inches='tight', dpi=150)


if __name__ == "__main__":
    args = parse_args()
    set_verbosity(args.verbose)

    logging.info('Loading Map from file {0}'.format(args.infile))
    input_MapSet = MapSet.from_json(args.infile)
    if len(input_MapSet) > 1:
        input_Map = input_MapSet[args.name]
    else:
        input_Map = input_MapSet.pop()

    fileio.mkdir(args.outdir, mode=0755)
    outfile = args.outdir + '/' + args.outname
    logging.info('outfile {0}'.format(outfile))
    plot_CFX_one(map=input_Map,
                 outfile=outfile,
                 logy=args.logy,
                 ylim=args.ylim,
                 ylabel=args.ylabel)
예제 #27
0
def norm_sys_distributions(input_data):
    """Normalises systematics mapsets to the nominal mapset,
    performing error propagation.

    Parameters
    ----------
    input_data : dict
        The data container returned by `make_discrete_sys_distributions`.
        Note that this is modified by this function to add the normalised
        distrbutions.

    Notes
    -----
    Nothing is returned; `input_data` is modified in-place

    """
    #
    # Get the input mapsets
    #

    nominal_mapset = [
        dataset["mapset"] for dataset in input_data["datasets"]
        if dataset["nominal"]
    ]
    if len(nominal_mapset) != 1:
        raise ValueError("need 1 but got {} nominal mapsets".format(
            len(nominal_mapset)))
    nominal_mapset = nominal_mapset[0]

    for dataset_dict in input_data["datasets"]:
        dataset_dict["norm_mapset"] = []

    #
    # loop over types of event
    #

    for map_name in nominal_mapset.names:
        logging.info('Normalizing "%s" maps.', map_name)
        nominal_map = nominal_mapset[map_name]
        nominal_map_nominal_vals = nominal_map.nominal_values

        # Note that all
        #   `sys_mapset[map_name].nominal_values`
        # and
        #   `nominal_map.nominal_values`
        # are finite (neither infinite nor NaN), so only issue for diviiding
        # the former by the latter is if there are "empty bins" (zeros) in
        #   `nominal_map.nominal_values`
        finite_mask = nominal_map_nominal_vals != 0

        #
        # loop over datasets
        #

        for dataset_dict in input_data["datasets"]:
            #
            # Normalise maps
            #

            sys_mapset_map = dataset_dict["mapset"][map_name]

            # TODO: think about the best way to perform unc. propagation

            # Crete a new array with uncertainties the same shape as map;
            # values are irrelevant as all will be overwritten
            norm_sys_hist = copy.deepcopy(sys_mapset_map.hist)

            # Note that we divide by nominal_vals to avoid double counting the
            # uncertainty on the nominal template when applying the hyperplane
            # fits
            norm_sys_hist[finite_mask] = (
                sys_mapset_map.hist[finite_mask] /
                nominal_map_nominal_vals[finite_mask])
            norm_sys_hist[~finite_mask] = ufloat(np.NaN, np.NaN)

            # TODO Check for bins that are empty in the nominal hist but not in
            # at least one of the sys sets; currently we do not support this...

            norm_sys_map = Map(
                name=sys_mapset_map.name,
                binning=sys_mapset_map.binning,
                hist=norm_sys_hist,
            )

            # TODO Save the map
            dataset_dict["norm_mapset"].append(norm_sys_map)

    # Re-format
    for dataset_dict in input_data["datasets"]:
        dataset_dict["norm_mapset"] = MapSet(maps=dataset_dict["norm_mapset"],
                                             name=dataset_dict["mapset"].name)
예제 #28
0
    def _compute_outputs(self, inputs=None):
        """Compute histograms for output channels."""
        logging.debug('Entering mceq._compute_outputs')

        primary_model = split(self.params['primary_model'].value, ',')
        if len(primary_model) != 2:
            raise ValueError('primary_model is not of length 2, instead is of '
                             'length {0}'.format(len(primary_model)))
        primary_model[0] = eval('pm.' + primary_model[0])
        density_model = (self.params['density_model'].value,
                         (self.params['location'].value,
                          self.params['season'].value))

        mceq_run = MCEqRun(
            interaction_model=str(self.params['interaction_model'].value),
            primary_model=primary_model,
            theta_deg=0.0,
            density_model=density_model,
            **mceq_config.mceq_config_without(['density_model']))

        # Power of energy to scale the flux (the results will be returned as E**mag * flux)
        mag = 0

        # Obtain energy grid (fixed) of the solution for the x-axis of the plots
        e_grid = mceq_run.e_grid

        # Dictionary for results
        flux = OrderedDict()
        for nu in self.output_names:
            flux[nu] = []

        binning = self.output_binning
        cz_binning = binning.dims[binning.index('coszen', use_basenames=True)]
        en_binning = binning.dims[binning.index('energy', use_basenames=True)]
        cz_centers = cz_binning.weighted_centers.m
        angles = (np.arccos(cz_centers) * ureg.radian).m_as('degrees')

        for theta in angles:
            mceq_run.set_theta_deg(theta)
            mceq_run.solve()

            flux['nue'].append(mceq_run.get_solution('total_nue', mag))
            flux['nuebar'].append(mceq_run.get_solution('total_antinue', mag))
            flux['numu'].append(mceq_run.get_solution('total_numu', mag))
            flux['numubar'].append(mceq_run.get_solution(
                'total_antinumu', mag))

        for nu in flux.iterkeys():
            flux[nu] = np.array(flux[nu])

        smoothing = self.params['smoothing'].value.m
        en_centers = en_binning.weighted_centers.m_as('GeV')
        spline_flux = self.bivariate_spline(flux,
                                            cz_centers,
                                            e_grid,
                                            smooth=smoothing)
        ev_flux = self.bivariate_evaluate(spline_flux, cz_centers, en_centers)

        for nu in ev_flux:
            ev_flux[nu] = ev_flux[nu] * ureg('cm**-2 s**-1 sr**-1 GeV**-1')

        mapset = []
        for nu in ev_flux.iterkeys():
            mapset.append(Map(name=nu, hist=ev_flux[nu], binning=binning))

        return MapSet(mapset)
예제 #29
0
파일: transform.py 프로젝트: terliuk/pisa
def test_BinnedTensorTransform():
    """Unit tests for BinnedTensorTransform class"""
    binning = MultiDimBinning([
        dict(name='energy',
             is_log=True,
             domain=(1, 80) * ureg.GeV,
             num_bins=10),
        dict(name='coszen', is_lin=True, domain=(-1, 0), num_bins=5)
    ])

    nue_map = Map(name='nue',
                  binning=binning,
                  hist=np.random.random(binning.shape))
    nue_map.set_poisson_errors()
    numu_map = Map(name='numu',
                   binning=binning,
                   hist=np.random.random(binning.shape))
    numu_map.set_poisson_errors()
    inputs = MapSet(
        name='inputs',
        maps=[nue_map, numu_map],
    )

    xform0 = BinnedTensorTransform(input_names='nue',
                                   output_name='nue',
                                   input_binning=binning,
                                   output_binning=binning,
                                   xform_array=2 * np.ones(binning.shape))

    xform1 = BinnedTensorTransform(input_names=['numu'],
                                   output_name='numu',
                                   input_binning=binning,
                                   output_binning=binning,
                                   xform_array=3 * np.ones(binning.shape))

    xform2 = BinnedTensorTransform(
        input_names=['nue', 'numu'],
        output_name='nue_numu',
        input_binning=binning,
        output_binning=binning,
        xform_array=np.stack(
            [2 * np.ones(binning.shape), 3 * np.ones(binning.shape)], axis=0))
    assert np.all((xform2 + 2).xform_array - xform2.xform_array == 2)

    testdir = tempfile.mkdtemp()
    try:
        for i, t in enumerate([xform0, xform1, xform2]):
            t_file = os.path.join(testdir, str(i) + '.json')
            t.to_json(t_file)
            t_ = BinnedTensorTransform.from_json(t_file)
            assert t_ == t, 't=\n%s\nt_=\n%s' % (t, t_)
    finally:
        shutil.rmtree(testdir, ignore_errors=True)

    logging.info('<< PASS : test_BinnedTensorTransform >>')

    xforms = TransformSet(name='scaling',
                          transforms=[xform0, xform1, xform2],
                          hash=9)

    assert xforms.hash == 9
    xforms.hash = -20
    assert xforms.hash == -20

    _ = xforms.apply(inputs)

    # TODO: get this working above, then test here!
    #xforms2 = xforms * 2

    testdir = tempfile.mkdtemp()
    try:
        for i, t in enumerate([xforms]):
            t_filename = os.path.join(testdir, str(i) + '.json')
            t.to_json(t_filename)
            t_ = TransformSet.from_json(t_filename)
            assert t_ == t, 't=\n%s\nt_=\n%s' % (t.transforms, t_.transforms)
    finally:
        shutil.rmtree(testdir, ignore_errors=True)

    logging.info('<< PASS : test_TransformSet >>')
예제 #30
0
def main():
    global SIGMA
    args = vars(parse_args())
    set_verbosity(args.pop('v'))
    center_zero = args.pop('center_zero')

    make_pdf = False
    if args['pdf']:
        make_pdf = True
        args['pdf'] = False

    outdir = args.pop('outdir')
    fileio.mkdir(outdir, mode=0755)
    SIGMA *= args.pop('sigma')

    cfx_pipe = Pipeline(args.pop('cfx_pipeline'))

    signal = args.pop('signal').replace(' ', '').split(',')
    output_str = []
    for name in signal:
        if 'muons' in name or 'noise' in name:
            raise AssertionError('Are you trying to unfold muons/noise?')
        elif 'all_nu' in name:
            output_str = [str(NuFlavIntGroup(f)) for f in ALL_NUFLAVINTS]
        else:
            output_str.append(NuFlavIntGroup(name))
    output_str = [str(f) for f in output_str]
    cfx_pipe._output_names = output_str

    # Turn off stat fluctuations
    stat_param = cfx_pipe.params['stat_fluctuations']
    stat_param.value = 0 * ureg.dimensionless
    cfx_pipe.update_params(stat_param)

    # Get nominal Map
    re_param = cfx_pipe.params['regularisation']
    re_param.value = 0 * ureg.dimensionless
    cfx_pipe.update_params(re_param)
    nom_out = cfx_pipe.get_outputs()

    re_param.reset()
    cfx_pipe.update_params(re_param)

    params = ParamSet()
    for param in cfx_pipe.params:
        if param.name != 'dataset':
            params.extend(param)

    free = params.free
    logging.info('Free params = {0}'.format(free))
    contin = True
    for f in free:
        if 'hole_ice' not in f.name and 'dom_eff' not in f.name:
            continue
        # if 'atm_muon_scale' in f.name:
        #     contin = False
        # if contin:
        #     continue

        logging.info('Working on parameter {0}'.format(f.name))
        if f.prior.kind != 'uniform':
            # Use deltaLLH = SIGMA to define +/- sigma for non-uniform
            scan_over = np.linspace(*f.range, num=1000) * f.range[0].u
            llh = f.prior.llh(scan_over)
            dllh = llh - np.min(-llh)

            mllh_idx = np.argmin(-llh)
            if mllh_idx == 0:
                l_sig_idx = 0
            else:
                l_sig_idx = np.argmin(np.abs(dllh[:mllh_idx] - SIGMA))
            u_sig_idx = np.argmin(np.abs(dllh[mllh_idx:] - SIGMA)) + mllh_idx

            l_sigma = scan_over[l_sig_idx]
            u_sigma = scan_over[u_sig_idx]
        else:
            l_sigma = f.range[0]
            u_sigma = f.range[1]

        logging.info('Setting {0} lower sigma bound to '
                     '{1}'.format(f.name, l_sigma))
        f.value = l_sigma
        cfx_pipe.update_params(f)
        l_out = cfx_pipe.get_outputs()

        logging.info('Setting {0} upper sigma bound to '
                     '{1}'.format(f.name, u_sigma))
        f.value = u_sigma
        cfx_pipe.update_params(f)
        u_out = cfx_pipe.get_outputs()

        f.reset()
        cfx_pipe.update_params(f)

        f_outdir = outdir + '/' + f.name
        l_outdir = f_outdir + '/' + 'lower'
        u_outdir = f_outdir + '/' + 'upper'
        fileio.mkdir(f_outdir)
        fileio.mkdir(l_outdir)
        fileio.mkdir(u_outdir)

        compare(outdir=l_outdir,
                ref=MapSet([nom_out]),
                ref_label='baseline',
                test=MapSet([l_out]),
                test_label=r'-sigma',
                **args)
        compare(outdir=u_outdir,
                ref=MapSet([nom_out]),
                ref_label='baseline',
                test=MapSet([u_out]),
                test_label=r'+sigma',
                **args)

        l_in_mapset = l_outdir + '/' + 'fract_diff__-sigma___baseline.json.bz2'
        u_in_mapset = u_outdir + '/' + 'fract_diff__+sigma___baseline.json.bz2'
        l_in_map = MapSet.from_json(l_in_mapset).pop() * 100.
        u_in_map = MapSet.from_json(u_in_mapset).pop() * 100.

        if make_pdf:
            outfile = f_outdir + '/systematic_effect.pdf'
        else:
            outfile = f_outdir + '/systematic_effect.png'
        title = r'% effect on ' + r'${0}$'.format(l_in_map.tex) + \
                ' event counts for {0} parameter'.format(f.name)
        sub_titles = (r'(-\sigma - {\rm baseline}) \:/\: {\rm baseline}',
                      r'(+\sigma - {\rm baseline}) \:/\: {\rm baseline}')
        make_plot(
            maps=(l_in_map, u_in_map),
            outfile=outfile,
            logv=False,
            center_zero=center_zero,
            vlabel=r'({\rm change} - {\rm baseline}) \:/\: {\rm baseline} (%)',
            title=title,
            sub_titles=sub_titles)