def plot_variation(baseline_maps,
                   up_maps,
                   down_maps,
                   h1_name,
                   fulltitle,
                   savename,
                   outdir,
                   ftype='pdf'):
    matplotlib.rcParams['font.family'] = 'sans-serif'
    matplotlib.rcParams['mathtext.fontset'] = 'stixsans'

    gridspec_kw = dict(left=0.04, right=0.966, wspace=0.32)
    fig, axes = plt.subplots(nrows=1,
                             ncols=3,
                             gridspec_kw=gridspec_kw,
                             sharex=False,
                             sharey=False,
                             figsize=(15, 5))

    asymmetry_hist = (h1_map.hist - h0_map.hist) / np.sqrt(h0_map.hist)
    asymmetry_to_plot = Map(name='asymmetry',
                            hist=asymmetry_hist,
                            binning=h0_map.binning)

    asymmetrylabel = (
        r'$\left(N_{%s}-N_{%s}\right)'
        r'/\sqrt{N_{%s}}$' %
        (text2tex(h1_name), text2tex(h0_name), text2tex(h0_name)))

    vmax = max(np.nanmax(h0_map.hist), np.nanmax(h1_map.hist))

    h0_map.plot(fig=fig,
                ax=axes[0],
                title='Hypothesis 0: $%s$' % text2tex(h0_name),
                cmap=plt.cm.afmhot,
                vmax=vmax)

    h1_map.plot(fig=fig,
                ax=axes[1],
                title='Hypothesis 1: $%s$' % text2tex(h1_name),
                cmap=plt.cm.afmhot,
                vmax=vmax)

    asymmetry_to_plot.plot(fig=fig,
                           ax=axes[2],
                           title='Asymmetry',
                           symm=True,
                           cmap=plt.cm.seismic)

    plt.subplots_adjust(bottom=0.12, top=0.8)
    plt.suptitle(fulltitle, size='xx-large')
    if savename != '' and savename[-1] != '_':
        savename += '_'
    fname = '%s%s_%s_asymmetry.pdf' % (savename, h0_name, h1_name)
    fname = fname.replace(' ', '_')
    mkdir(outdir, warn=False)
    fig.savefig(os.path.join(outdir, fname))
    plt.close(fig.number)
Exemple #2
0
    def _flatten_to_2d(in_map):
        assert isinstance(in_map, Map)
        shape = in_map.shape
        names = in_map.binning.names
        dims = len(shape)
        assert dims % 2 == 0

        nbins_a = np.product(shape[:dims // 2])
        nbins_b = np.product(shape[dims // 2:])
        names_a = reduce(lambda x, y: x + ' ' + y, names[:dims // 2])
        names_b = reduce(lambda x, y: x + ' ' + y, names[dims // 2:])

        binning = []
        binning.append(
            OneDimBinning(name=names_a,
                          num_bins=nbins_a,
                          is_lin=True,
                          domain=[0, nbins_a]))
        binning.append(
            OneDimBinning(name=names_b,
                          num_bins=nbins_b,
                          is_lin=True,
                          domain=[0, nbins_b]))
        binning = MultiDimBinning(binning)

        hist = in_map.hist.reshape(nbins_a, nbins_b)
        return Map(name=in_map.name, hist=hist, binning=binning)
Exemple #3
0
        def eval_spl(spline,
                     binning,
                     out_units=ureg.m**2,
                     x_energy_scale=1,
                     **kwargs):
            init_names = ['true_energy']
            init_units = [ureg.GeV]

            if set(binning.names) != set(init_names):
                raise ValueError('Input binning names {0} does not match '
                                 'instantiation binning names '
                                 '{1}'.format(binning.names, init_names))

            if set(map(str, binning.units)) != set(map(str, init_units)):
                for name in init_names:
                    binning[name].to(init_units)

            bin_centers = [x.m for x in binning.weighted_centers][0]

            nu_O16, nu_H1 = [], []
            for e_val in bin_centers:
                nu_O16.append(spline['O16'].Eval(e_val))
                nu_H1.append(spline['H1'].Eval(e_val))

            nu_O16, nu_H1 = map(np.array, (nu_O16, nu_H1))
            nu_xsec = ((0.8879 * nu_O16) +
                       (0.1121 * nu_H1)) * 1E-38 * ureg.cm**2

            nu_xsec_hist = nu_xsec.to(out_units).magnitude
            return Map(hist=nu_xsec_hist, binning=binning, **kwargs)
Exemple #4
0
    def _compute_outputs(self, inputs=None):
        # Following is just so that we only produce new maps when params
        # change, but produce the same maps with the same param values
        # (for a more realistic test of caching).
        seed = hash_obj(self.params.values, hash_to='int') % (2**32 - 1)
        np.random.seed(seed)

        # Convert a parameter that the user can specify in any (compatible)
        # units to the units used for compuation
        height = self.params['test'].to('meter').magnitude

        output_maps = []
        for output_name in self.output_names:
            # Generate the fake per-bin "fluxes", modified by the parameter
            hist = np.random.random(self.output_binning.shape) * height

            # Put the "fluxes" into a Map object, give it the output_name
            m = Map(name=output_name, hist=hist, binning=self.output_binning)

            # Optionally turn on errors here, that will be propagated through
            # rest of pipeline (slows things down, but essential in some cases)
            #m.set_poisson_errors()
            output_maps.append(m)

        # Combine the output maps into a single MapSet object to return.
        # The MapSet contains the varous things that are necessary to make
        # caching work and also provides a nice interface for the user to all
        # of the contained maps
        return MapSet(maps=output_maps, name='flux maps')
Exemple #5
0
def unflatten_thist(in_th1d, binning, name='', errors=False, **kwargs):
    flat_hist = hist2array(in_th1d)
    if errors:
        map_errors = [in_th1d.GetBinError(idx+1)
                      for idx in xrange(len(flat_hist))]
        flat_hist = unp.uarray(flat_hist, map_errors)
    hist = flat_hist.reshape(binning.shape)
    return Map(hist=hist, binning=binning, name=name, **kwargs)
Exemple #6
0
 def get_map(self, key, error=None):
     """Return binned data in the form of a PISA map"""
     hist, binning = self.get_hist(key)
     if error is not None:
         error_hist = np.abs(self.get_hist(error)[0])
     else:
         error_hist = None
     #binning = self.get_binning(key)
     assert hist.ndim == binning.num_dims
     return Map(name=self.name, hist=hist, error_hist=error_hist, binning=binning)
Exemple #7
0
    def _compute_outputs(self, inputs=None):
        """Apply scales to histograms, put them into PISA MapSets
        Also asign errors given a method:
            * sumw2 : just sum of weights quared as error (the usual weighte histo error)
            * sumw2+shae : including the shape difference
            * fixed_sumw2+shape : errors estimated from nominal paramter values, i.e. scale-invariant

        """

        scale = self.params.atm_muon_scale.value.m_as('dimensionless')
        fixed_scale = self.params.atm_muon_scale.nominal_value.m_as('dimensionless')
        scale *= self.params.livetime.value.m_as('common_year')
        fixed_scale *= self.params.livetime.value.m_as('common_year')
        fixed_scale *= self.params.fixed_scale_factor.value.m_as('dimensionless')

        if self.error_method == 'sumw2':
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=(np.sqrt(self.icc_bg_hist) * scale) ,binning=self.output_binning)]
        elif self.error_method == 'sumw2+shape':
            error = scale * np.sqrt(self.icc_bg_hist + (self.icc_bg_hist - self.alt_icc_bg_hist)**2 )
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)]
        elif self.error_method == 'shape':
            error = scale * np.abs(self.icc_bg_hist - self.alt_icc_bg_hist)
        elif self.error_method == 'fixed_shape':
            error = fixed_scale * np.abs(self.icc_bg_hist - self.alt_icc_bg_hist)
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)]
        elif self.error_method == 'fixed_sumw2+shape':
            error = fixed_scale * np.sqrt(self.icc_bg_hist + (self.icc_bg_hist - self.alt_icc_bg_hist)**2 )
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)]
        elif self.error_method == 'fixed_doublesumw2+shape':
            error = fixed_scale * np.sqrt(2*self.icc_bg_hist + (self.icc_bg_hist - self.alt_icc_bg_hist)**2 )
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), error_hist=error ,binning=self.output_binning)]
        else:
            maps = [Map(name=self.output_names[0], hist=(self.icc_bg_hist * scale), binning=self.output_binning)]

        return MapSet(maps, name='icc')
Exemple #8
0
    def _flatten_to_1d(in_map):
        assert isinstance(in_map, Map)

        bin_name = reduce(add, in_map.binning.names)
        num_bins = np.product(in_map.shape)
        binning = MultiDimBinning([
            OneDimBinning(name=bin_name,
                          num_bins=num_bins,
                          is_lin=True,
                          domain=[0, num_bins])
        ])
        hist = in_map.hist.flatten()

        return Map(name=in_map.name, hist=hist, binning=binning)
Exemple #9
0
def create_pseudo_data(toymc_params, seed=None):
    '''
    Create pseudo data consisting of a gaussian peak
    on top of a uniform background
    '''
    if seed is not None:
        np.random.seed(seed)

    binning = toymc_params.binning
    #
    # Gaussian signal peak
    #
    signal = np.random.normal(loc=toymc_params.mu,
                              scale=toymc_params.sigma,
                              size=toymc_params.nsig)

    #
    # Uniform background
    #
    background = np.random.uniform(high=toymc_params.nbackground_high,
                                   low=toymc_params.nbackground_low,
                                   size=toymc_params.nbkg)
    total_data = np.concatenate([signal, background])
    counts_data, _ = np.histogram(total_data, bins=binning.bin_edges.magnitude)

    # Convert data histogram into a pisa map
    data_map = Map(name='total',
                   binning=MultiDimBinning([binning]),
                   hist=counts_data)

    # Set the errors as the sqrt of the counts
    data_map.set_errors(error_hist=np.sqrt(counts_data))

    data_as_mapset = MapSet([data_map])

    return data_as_mapset
Exemple #10
0
def pisa2_map_to_pisa3_map(pisa2_map,
                           ebins_name='ebins',
                           czbins_name='czbins',
                           is_log=True,
                           is_lin=True):
    expected_keys = ['map', 'ebins', 'czbins']
    if sorted(pisa2_map.keys()) != sorted(expected_keys):
        raise ValueError(
            'PISA 2 map should be a dict containining entries: %s' %
            expected_keys)
    ebins = OneDimBinning(name=ebins_name,
                          bin_edges=pisa2_map['ebins'] * ureg.GeV,
                          is_log=is_log,
                          tex='E_{\nu}')
    czbins = OneDimBinning(name=czbins_name,
                           bin_edges=pisa2_map['czbins'],
                           is_lin=is_lin,
                           tex='\cos\theta_Z')
    bins = MultiDimBinning([ebins, czbins])
    return Map(name='pisa2equivalent', hist=pisa2_map['map'], binning=bins)
Exemple #11
0
    def _histogram(events, binning, weights=None, errors=False, **kwargs):
        """Histogram the events given the input binning."""
        logging.trace('Histogramming')

        bin_names = binning.names
        bin_edges = [edges.m for edges in binning.bin_edges]
        for name in bin_names:
            if name not in events:
                raise AssertionError('Input events object does not have '
                                     'key {0}'.format(name))

        sample = [events[colname] for colname in bin_names]
        hist, edges = np.histogramdd(sample=sample,
                                     weights=weights,
                                     bins=bin_edges)
        if errors:
            hist2, edges = np.histogramdd(sample=sample,
                                          weights=np.square(weights),
                                          bins=bin_edges)
            hist = unp.uarray(hist, np.sqrt(hist2))

        return Map(hist=hist, binning=binning, **kwargs)
Exemple #12
0
def main():
    args = parse_args()
    set_verbosity(args.v)

    if args.plot:
        import matplotlib as mpl
        mpl.use('pdf')
        import matplotlib.pyplot as plt
        from pisa.utils.plotter import Plotter

    cfg = from_file(args.fit_settings)
    sys_list = cfg.get('general', 'sys_list').replace(' ', '').split(',')
    stop_idx = cfg.getint('general', 'stop_after_stage')


    for sys in sys_list:
        # Parse info for given systematic
        nominal = cfg.getfloat(sys, 'nominal')
        degree = cfg.getint(sys, 'degree')
        force_through_nominal = cfg.getboolean(sys, 'force_through_nominal')
        runs = eval(cfg.get(sys, 'runs'))
        #print "runs ", runs
        smooth = cfg.get(sys, 'smooth')

        x_values = np.array(sorted(runs))

        # Build fit function
        if force_through_nominal:
            function = "lambda x, *p: np.polynomial.polynomial.polyval(x, [1.] + list(p))"
        else:
            function = "lambda x, *p: np.polynomial.polynomial.polyval(x, list(p))"
            # Add free parameter for constant term
            degree += 1
        fit_fun = eval(function)

        # Instantiate template maker
        template_maker = Pipeline(args.template_settings)

        if not args.set_param == '':
            for one_set_param in args.set_param:
                p_name, value = one_set_param.split("=")
                #print "p_name,value= ", p_name, " ", value
                value = parse_quantity(value)
                value = value.n * value.units
                param = template_maker.params[p_name]
                #print "old ", p_name, "value = ", param.value
                param.value = value
                #print "new ", p_name, "value = ", param.value
                template_maker.update_params(param)

        inputs = {}
        map_names = None
        # Get sys templates
        for run in runs:
            for key, val in cfg.items('%s:%s'%(sys, run)):
                if key.startswith('param.'):
                    _, pname = key.split('.')
                    param = template_maker.params[pname]
                    try:
                        value = parse_quantity(val)
                        param.value = value.n * value.units
                    except ValueError:
                        value = parse_string_literal(val)
                        param.value = value
                    param.set_nominal_to_current_value()
                    template_maker.update_params(param)
            # Retreive maps
            template = template_maker.get_outputs(idx=stop_idx)
            if map_names is None: map_names = [m.name for m in template]
            inputs[run] = {}
            for m in template:
                inputs[run][m.name] = m.hist

        # Numpy acrobatics:
        arrays = {}
        for name in map_names:
            arrays[name] = []
            for x in x_values:
                arrays[name].append(
                    inputs[x][name] / unp.nominal_values(inputs[nominal][name])
                )
            a = np.array(arrays[name])
            arrays[name] = np.rollaxis(a, 0, len(a.shape))

        # Shift to get deltas
        x_values -= nominal

        # Binning object (assuming they're all the same)
        binning = template.maps[0].binning

        shape = [d.num_bins for d in binning] + [degree]
        shape_small = [d.num_bins for d in binning]

        outputs = {}
        errors = {}
        for name in map_names:
            # Now actualy perform some fits
            outputs[name] = np.ones(shape)
            errors[name] = np.ones(shape)


            for idx in np.ndindex(*shape_small):
                y_values = unp.nominal_values(arrays[name][idx])
                y_sigma = unp.std_devs(arrays[name][idx])
                if np.any(y_sigma):
                    popt, pcov = curve_fit(fit_fun, x_values, y_values,
                                           sigma=y_sigma, p0=np.ones(degree))
                else:
                    popt, pcov = curve_fit(fit_fun, x_values, y_values,
                                           p0=np.ones(degree))
                perr = np.sqrt(np.diag(pcov))
                for k, p in enumerate(popt):
                    outputs[name][idx][k] = p
                    errors[name][idx][k] = perr[k]

                # TODO(philippeller): the below block of code will fail

                # Maybe plot
                #if args.plot:
                #    fig_num = i + nx * j
                #    if fig_num == 0:
                #        fig = plt.figure(num=1, figsize=( 4*nx, 4*ny))
                #    subplot_idx = nx*(ny-1-j)+ i + 1
                #    plt.subplot(ny, nx, subplot_idx)
                #    #plt.snameter(x_values, y_values, color=plt_colors[name])
                #    plt.gca().errorbar(x_values, y_values, yerr=y_sigma,
                #                       fmt='o', color=plt_colors[name],
                #                       ecolor=plt_colors[name],
                #                       mec=plt_colors[name])
                #    # Plot nominal point again in black
                #    plt.snameter([0.0], [1.0], color='k')
                #    f_values = fit_fun(x_values, *popt)
                #    fun_plot, = plt.plot(x_values, f_values,
                #            color=plt_colors[name])
                #    plt.ylim(np.min(unp.nominal_values(arrays[name]))*0.9,
                #             np.max(unp.nominal_values(arrays[name]))*1.1)
                #    if i > 0:
                #        plt.setp(plt.gca().get_yticklabels(), visible=False)
                #    if j > 0:
                #        plt.setp(plt.gca().get_xticklabels(), visible=False)

        if smooth == 'gauss':
            for name in map_names:
                for d in range(degree):
                    outputs[name][...,d] = gaussian_filter(outputs[name][...,d],sigma=1)

        if smooth == 'gauss_pid':
            for name in map_names:
                split_idx = binning.names.index('pid')
                tot = len(binning)-1
                for d in range(degree):
                    for p in range(len(binning['pid'])):
                        outputs[name][...,p,d] = gaussian_filter(
                            np.swapaxes(outputs[name], split_idx, tot)[...,p,d],
                            sigma=1
                        )
                outputs[name] = np.swapaxes(outputs[name], split_idx, tot)

        # Save the raw ones anyway
        outputs['pname'] = sys
        outputs['nominal'] = nominal
        outputs['function'] = function
        outputs['map_names'] = map_names
        outputs['binning_hash'] = binning.hash
        to_file(outputs, '%s/%s_sysfits_%s_%s.json'%(args.out_dir, sys,
                                                     args.tag, smooth))

        if args.plot:
            for d in range(degree):
                maps = []
                for name in map_names:
                    maps.append(Map(name='%s_raw'%name, hist=outputs[name][...,d],
                                    binning=binning))
                maps = MapSet(maps)
                my_plotter = Plotter(
                    stamp='',
                    outdir=args.out_dir,
                    fmt='pdf',
                    log=False,
                    label=''
                )
                my_plotter.plot_2d_array(
                    maps,
                    fname='%s_%s_%s_%s'%(sys, args.tag, d, smooth),
                )
Exemple #13
0
    def histogram(self,
                  kinds,
                  binning,
                  binning_cols=None,
                  weights_col=None,
                  errors=False,
                  name=None,
                  tex=None,
                  **kwargs):
        """Histogram the events of all `kinds` specified, with `binning` and
        optionally applying `weights`.

        Parameters
        ----------
        kinds : string, sequence of NuFlavInt, or NuFlavIntGroup
        binning : OneDimBinning, MultiDimBinning or sequence of arrays
            (one array per binning dimension)
        binning_cols : string or sequence of strings
            Bin only these dimensions, ignoring other dimensions in `binning`
        weights_col : None or string
            Column to use for weighting the events
        errors : bool
            Whether to attach errors to the resulting Map
        name : None or string
            Name to give to resulting Map. If None, a default is derived from
            `kinds` and `weights_col`.
        tex : None or string
            TeX label to give to the resulting Map. If None, default is
            dereived from the `name` specified or the derived default.
        **kwargs : Keyword args passed to Map object

        Returns
        -------
        Map : numpy ndarray with as many dimensions as specified by `binning`
            argument

        """
        # TODO: make able to take integer for `binning` and--in combination
        # with units in the Data columns--generate an appropriate
        # MultiDimBinning object, attach this and return the package as a Map.

        if isinstance(kinds, basestring):
            kinds = [kinds]
        if 'muons' not in kinds and 'noise' not in kinds:
            kinds = self._parse_flavint_groups(kinds)
        kinds = kinds[0]

        if isinstance(binning_cols, basestring):
            binning_cols = [binning_cols]
        assert weights_col is None or isinstance(weights_col, basestring)

        # TODO: units of columns, and convert bin edges if necessary
        if isinstance(binning, OneDimBinning):
            binning = MultiDimBinning([binning])
        elif isinstance(binning, MultiDimBinning):
            pass
        elif (isinstance(binning, Iterable)
              and not isinstance(binning, Sequence)):
            binning = list(binning)
        elif isinstance(binning, Sequence):
            pass
        else:
            raise TypeError('Unhandled type %s for `binning`.' % type(binning))

        if isinstance(binning, Sequence):
            raise NotImplementedError(
                'Simle sequences not handled at this time. Please specify a'
                ' OneDimBinning or MultiDimBinning object for `binning`.')
            # assert len(binning_cols) == len(binning)
            # bin_edges = binning

        # TODO: units support for Data will mean we can do `m_as(...)` here!
        bin_edges = [edges.magnitude for edges in binning.bin_edges]
        if binning_cols is None:
            binning_cols = binning.names
        else:
            assert set(binning_cols).issubset(set(binning.names))

        # Extract the columns' data into a list of array(s) for histogramming
        sample = [self[kinds][colname] for colname in binning_cols]
        err_weights = None
        hist_weights = None
        if weights_col is not None:
            hist_weights = self[kinds][weights_col]
            if errors:
                err_weights = np.square(hist_weights)

        hist, edges = np.histogramdd(sample=sample,
                                     weights=hist_weights,
                                     bins=bin_edges)
        if errors:
            sumw2, edges = np.histogramdd(sample=sample,
                                          weights=err_weights,
                                          bins=bin_edges)
            hist = unp.uarray(hist, np.sqrt(sumw2))

        if name is None:
            if tex is None:
                try:
                    tex = kinds.tex
                # TODO: specify specific exception(s)
                except:
                    tex = r'{0}'.format(kinds)
                if weights_col is not None:
                    tex += r', \; {\rm weights} =' + text2tex(weights_col)

            name = str(kinds)
            if weights_col is not None:
                name += ', weights=' + weights_col

        if tex is None:
            tex = text2tex(name)

        return Map(name=name, hist=hist, binning=binning, tex=tex, **kwargs)
Exemple #14
0
def test_kde_histogramdd():
    """Unit tests for kde_histogramdd"""
    from argparse import ArgumentParser
    from shutil import rmtree
    from tempfile import mkdtemp
    from pisa import ureg
    from pisa.core.map import Map, MapSet
    from pisa.utils.log import logging, set_verbosity
    from pisa.utils.plotter import Plotter

    parser = ArgumentParser()
    parser.add_argument("-v",
                        action="count",
                        default=None,
                        help="set verbosity level")
    args = parser.parse_args()
    set_verbosity(args.v)

    temp_dir = mkdtemp()

    try:
        my_plotter = Plotter(
            stamp="",
            outdir=temp_dir,
            fmt="pdf",
            log=False,
            annotate=False,
            symmetric=False,
            ratio=True,
        )

        b1 = OneDimBinning(name="coszen",
                           num_bins=20,
                           is_lin=True,
                           domain=[-1, 1],
                           tex=r"\cos(\theta)")
        b2 = OneDimBinning(name="energy",
                           num_bins=10,
                           is_log=True,
                           domain=[1, 80] * ureg.GeV,
                           tex=r"E")
        b3 = OneDimBinning(name="pid",
                           num_bins=2,
                           bin_edges=[0, 1, 2],
                           tex=r"pid")
        binning = b1 * b2 * b3

        # now let's generate some toy data

        N = 100000
        cz = np.random.normal(1, 1.2, N)
        # cut away coszen outside -1, 1
        cz = cz[(cz >= -1) & (cz <= 1)]
        e = np.random.normal(30, 20, len(cz))
        pid = np.random.uniform(0, 2, len(cz))
        data = np.array([cz, e, pid]).T

        # make numpy histogram for validation
        bins = [unp.nominal_values(b.bin_edges) for b in binning]
        raw_hist, _ = np.histogramdd(data, bins=bins)

        # get KDE'ed histo
        hist = kde_histogramdd(
            data,
            binning,
            bw_method="silverman",
            coszen_name="coszen",
            oversample=10,
            use_cuda=True,
            stack_pid=True,
        )

        # put into mapsets and plot
        m1 = Map(name="KDE", hist=hist, binning=binning)
        m2 = Map(name="raw", hist=raw_hist, binning=binning)
        with np.errstate(divide="ignore", invalid="ignore"):
            m3 = m2 / m1
        m3.name = "hist/KDE"
        m3.tex = m3.name
        m4 = m1 - m2
        m4.name = "KDE - hist"
        m4.tex = m4.name
        ms = MapSet([m1, m2, m3, m4])
        my_plotter.plot_2d_array(ms, fname="test_kde", cmap="summer")
    except:
        rmtree(temp_dir)
        raise
    else:
        logging.warning("Inspect and manually clean up output(s) saved to %s" %
                        temp_dir)
Exemple #15
0
def test_BinnedTensorTransform():
    """Unit tests for BinnedTensorTransform class"""
    binning = MultiDimBinning([
        dict(name='energy',
             is_log=True,
             domain=(1, 80) * ureg.GeV,
             num_bins=10),
        dict(name='coszen', is_lin=True, domain=(-1, 0), num_bins=5)
    ])

    nue_map = Map(name='nue',
                  binning=binning,
                  hist=np.random.random(binning.shape))
    nue_map.set_poisson_errors()
    numu_map = Map(name='numu',
                   binning=binning,
                   hist=np.random.random(binning.shape))
    numu_map.set_poisson_errors()
    inputs = MapSet(
        name='inputs',
        maps=[nue_map, numu_map],
    )

    xform0 = BinnedTensorTransform(input_names='nue',
                                   output_name='nue',
                                   input_binning=binning,
                                   output_binning=binning,
                                   xform_array=2 * np.ones(binning.shape))

    xform1 = BinnedTensorTransform(input_names=['numu'],
                                   output_name='numu',
                                   input_binning=binning,
                                   output_binning=binning,
                                   xform_array=3 * np.ones(binning.shape))

    xform2 = BinnedTensorTransform(
        input_names=['nue', 'numu'],
        output_name='nue_numu',
        input_binning=binning,
        output_binning=binning,
        xform_array=np.stack(
            [2 * np.ones(binning.shape), 3 * np.ones(binning.shape)], axis=0))
    assert np.all((xform2 + 2).xform_array - xform2.xform_array == 2)

    testdir = tempfile.mkdtemp()
    try:
        for i, t in enumerate([xform0, xform1, xform2]):
            t_file = os.path.join(testdir, str(i) + '.json')
            t.to_json(t_file)
            t_ = BinnedTensorTransform.from_json(t_file)
            assert t_ == t, 't=\n%s\nt_=\n%s' % (t, t_)
    finally:
        shutil.rmtree(testdir, ignore_errors=True)

    logging.info('<< PASS : test_BinnedTensorTransform >>')

    xforms = TransformSet(name='scaling',
                          transforms=[xform0, xform1, xform2],
                          hash=9)

    assert xforms.hash == 9
    xforms.hash = -20
    assert xforms.hash == -20

    _ = xforms.apply(inputs)

    # TODO: get this working above, then test here!
    #xforms2 = xforms * 2

    testdir = tempfile.mkdtemp()
    try:
        for i, t in enumerate([xforms]):
            t_filename = os.path.join(testdir, str(i) + '.json')
            t.to_json(t_filename)
            t_ = TransformSet.from_json(t_filename)
            assert t_ == t, 't=\n%s\nt_=\n%s' % (t.transforms, t_.transforms)
    finally:
        shutil.rmtree(testdir, ignore_errors=True)

    logging.info('<< PASS : test_TransformSet >>')
Exemple #16
0
    def _compute_outputs(self, inputs=None):
        """Compute histograms for output channels."""
        logging.debug('Entering roounfold._compute_outputs')
        self.fit_hash = deepcopy(inputs.metadata['fit_hash'])
        logging.trace('{0} roounfold fit_hash = '
                      '{1}'.format(inputs.metadata['name'], self.fit_hash))
        if self.random_state is not None:
            logging.trace('{0} roounfold random_state = '
                          '{1}'.format(inputs.metadata['name'],
                                       hash_obj(
                                           self.random_state.get_state())))
        if not isinstance(inputs, Data):
            raise AssertionError('inputs is not a Data object, instead is '
                                 'type {0}'.format(type(inputs)))
        self._data = inputs

        if not self.params['return_eff'].value:
            if len(self.output_names) > 1:
                raise AssertionError(
                    'Specified more than one NuFlavIntGroup as '
                    'signal, {0}'.format(self.output_names))
            self.output_str = str(self.output_names[0])

        real_data = self.params['real_data'].value
        if real_data:
            logging.debug('Using real data')
            if 'nuall' not in self._data:
                raise AssertionError(
                    'When using real data, input Data object must contain '
                    'only one element "nuall" containing the data, instead it '
                    'contains elements {0}'.format(self._data.keys()))
            if self.disk_cache is None:
                raise AssertionError(
                    'No disk_cache specified from which to load - using real '
                    'data requires object such as the response object to be '
                    'cached to disk.')

        if self.params['optimize_reg'].value and real_data:
            raise AssertionError(
                'Cannot optimize the regularation if using real data.')
        if int(self.params['stat_fluctuations'].m) != 0 and real_data:
            raise AssertionError(
                'Cannot do poisson fluctuations if using real data.')
        if self.params['return_eff'].value and real_data:
            raise AssertionError(
                'Not implemented return of efficiency maps if using real data.'
            )

        if self.params['return_eff'].value:
            fin_data = self._data
            # Load generator level data for signal
            unfold_pipeline_cfg = self.params['unfold_pipeline_cfg'].value
            pipeline_cfg = from_file(unfold_pipeline_cfg)
            template_maker = Pipeline(pipeline_cfg)
            gen_data = template_maker.get_outputs()

            fin_data = fin_data.transform_groups(self.output_names)
            gen_data = gen_data.transform_groups(self.output_names)

            efficiencies = []
            assert set(fin_data.keys()) == set(gen_data.keys())
            for fig in fin_data.keys():
                figd_f = fin_data[fig]
                figd_g = gen_data[fig]
                inv_eff = self._get_inv_eff(figd_f, figd_g, self.true_binning,
                                            fig)

                i_mask = ~(inv_eff == 0.)
                eff = unp.uarray(np.zeros(self.true_binning.shape),
                                 np.zeros(self.true_binning.shape))
                eff[i_mask] = 1. / inv_eff[i_mask]
                efficiencies.append(
                    Map(name=fig, hist=eff, binning=self.true_binning))
            return MapSet(efficiencies)

        # TODO(shivesh): [   TRACE] None of the selections ['iron', 'nh'] found in this pipeline.
        # TODO(shivesh): Fix "smearing_matrix" memory leak
        # TODO(shivesh): Fix unweighted unfolding
        # TODO(shivesh): different algorithms
        # TODO(shivesh): implement handling of 0 division inside Map objects
        if real_data:
            unfold_map = self.unfold_real_data()
        else:
            unfold_map = self.unfold_mc()

        return MapSet([unfold_map])
Exemple #17
0
    def _compute_nominal_outputs(self):
        """load the evnts from file, perform sanity checks and histogram them
        (into final MapSet)

        """
        # get params
        data_file_name = self.params.data_file.value
        sim_version = self.params.sim_ver.value
        bdt_cut = self.params.bdt_cut.value.m_as('dimensionless')

        self.bin_names = self.output_binning.names

        # TODO: convert units using e.g. `comp_units` in stages/reco/hist.py
        self.bin_edges = []
        for name in self.bin_names:
            if 'energy' in  name:
                bin_edges = self.output_binning[name].bin_edges.to('GeV').magnitude
            else:
                bin_edges = self.output_binning[name].bin_edges.magnitude
            self.bin_edges.append(bin_edges)

        # the rest of this function is PISA v2 legacy code...
        # right now only use burn sample with sim_version = '4digit'
        #print "sim_version == ", sim_version
        if sim_version == "4digit":
            Reco_Neutrino_Name = 'IC86_Dunkman_L6_MultiNest8D_PDG_Neutrino'
            Reco_Track_Name = 'IC86_Dunkman_L6_MultiNest8D_PDG_Track'
        elif sim_version == "5digit" or sim_version=="dima":
            Reco_Neutrino_Name = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_NumuCC'
            Reco_Track_Name = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_Track'
        else:
            raise ValueError(
                'only allow 4digit, 5digit(H2 model for hole ice) or'
                ' dima (dima p1 and p2 for hole ice)!'
            )

        data_file = h5py.File(find_resource(data_file_name), 'r')
        L6_result = np.array(data_file['IC86_Dunkman_L6']['result'])
        dLLH = np.array(data_file['IC86_Dunkman_L6']['delta_LLH'])
        reco_energy_all = np.array(data_file[Reco_Neutrino_Name]['energy'])
        reco_coszen_all = np.array(np.cos(
            data_file[Reco_Neutrino_Name]['zenith']
        ))
        reco_trck_len_all = np.array(data_file[Reco_Track_Name]['length'])
        #print "before L6 cut, no. of burn sample = ", len(reco_coszen_all)

        # sanity check
        santa_doms = data_file['IC86_Dunkman_L6_SANTA_DirectDOMs']['value']
        l3 = data_file['IC86_Dunkman_L3']['value']
        l4 = data_file['IC86_Dunkman_L4']['result']
        l5 = data_file['IC86_Dunkman_L5']['bdt_score']
        assert(np.all(santa_doms>=3) and np.all(l3 == 1) and np.all(l5 >= 0.1))

        # l4==1 was not applied when i3 files were written to hdf5 files, so do
        # it here
        dLLH = dLLH[l4==1]
        reco_energy_all = reco_energy_all[l4==1]
        reco_coszen_all = reco_coszen_all[l4==1]
        l5 = l5[l4==1]
        L6_result = L6_result[l4==1]
        data_file.close()

        dLLH_L6 = dLLH[L6_result==1]
        l5 = l5[L6_result==1]
        reco_energy_L6 = reco_energy_all[L6_result==1]
        reco_coszen_L6 = reco_coszen_all[L6_result==1]
        #print "after L6 cut, no. of burn sample = ", len(reco_coszen_L6)

        # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make data/MC
        # agree much better); if use no such further cut, use bdt_cut = 0.1
        logging.info(
            "Cut2, removing events with bdt_score < %s i.e. only keep bdt > %s"
            %(bdt_cut, bdt_cut)
        )
        cut_events = {}
        cut = l5>=bdt_cut
        cut_events['reco_energy'] = reco_energy_L6[cut]
        cut_events['reco_coszen'] = reco_coszen_L6[cut]
        cut_events['pid'] = dLLH_L6[cut]

        hist, _ = np.histogramdd(sample = np.array(
            [cut_events[bin_name] for bin_name in self.bin_names]
        ).T, bins=self.bin_edges)

        maps = [Map(name=self.output_names[0], hist=hist,
                    binning=self.output_binning)]
        self.template = MapSet(maps, name='data')
Exemple #18
0
    def _compute_outputs(self, inputs=None):
        """Compute histograms for output channels."""
        logging.debug('Entering nusquids._compute_outputs')
        if not isinstance(inputs, MapSet):
            raise AssertionError('inputs is not a MapSet object, instead '
                                 'is type {0}'.format(type(inputs)))
        # TODO(shivesh): oversampling
        # TODO(shivesh): more options
        # TODO(shivesh): static function
        # TODO(shivesh): hashing
        binning = self.input_binning.basename_binning
        binning = binning.reorder_dimensions(('coszen', 'energy'),
                                             use_basenames=True)
        cz_binning = binning['coszen']
        en_binning = binning['energy']

        units = nsq.Const()

        interactions = False
        cz_min = cz_binning.bin_edges.min().m_as('radian')
        cz_max = cz_binning.bin_edges.max().m_as('radian')
        en_min = en_binning.bin_edges.min().m_as('GeV') * units.GeV
        en_max = en_binning.bin_edges.max().m_as('GeV') * units.GeV
        cz_centers = cz_binning.weighted_centers.m_as('radian')
        en_centers = en_binning.weighted_centers.m_as('GeV') * units.GeV
        cz_grid = np.array([cz_min] + cz_centers.tolist() + [cz_max])
        en_grid = np.array([en_min] + en_centers.tolist() + [en_max])
        nu_flavours = 3

        nuSQ = nsq.nuSQUIDSAtm(cz_grid, en_grid, nu_flavours,
                               nsq.NeutrinoType.both, interactions)

        nuSQ.Set_EvalThreads(multiprocessing.cpu_count())

        theta12 = self.params['theta12'].value.m_as('radian')
        theta13 = self.params['theta13'].value.m_as('radian')
        theta23 = self.params['theta23'].value.m_as('radian')

        deltam21 = self.params['deltam21'].value.m_as('eV**2')
        deltam31 = self.params['deltam21'].value.m_as('eV**2')

        # TODO(shivesh): check if deltacp should be in radians
        deltacp = self.params['deltacp'].value.m_as('radian')

        nuSQ.Set_MixingAngle(0, 1, theta12)
        nuSQ.Set_MixingAngle(0, 2, theta13)
        nuSQ.Set_MixingAngle(1, 2, theta23)

        nuSQ.Set_SquareMassDifference(1, deltam21)
        nuSQ.Set_SquareMassDifference(2, deltam31)

        nuSQ.Set_CPPhase(0, 2, deltacp)

        nuSQ.Set_rel_error(1.0e-10)
        nuSQ.Set_abs_error(1.0e-10)

        # Pad the edges of the energy, coszen space to cover the entire grid range
        cz_shape = cz_binning.shape[0] + 2
        en_shape = en_binning.shape[0] + 2
        shape = (cz_shape, en_shape) + (2, 3)
        initial_state = np.full(shape, np.nan)

        def pad_inputs(x):
            return np.pad(unp.nominal_values(x.hist), 1, 'edge')

        # Third index is selecting nu(0), nubar(1)
        # Fourth index is selecting flavour nue(0), numu(1), nutau(2)
        initial_state[:, :, 0, 0] = pad_inputs(inputs['nue'])
        initial_state[:, :, 1, 0] = pad_inputs(inputs['nuebar'])
        initial_state[:, :, 0, 1] = pad_inputs(inputs['numu'])
        initial_state[:, :, 1, 1] = pad_inputs(inputs['numubar'])
        initial_state[:, :, 0, 2] = np.zeros(pad_inputs(inputs['nue']).shape)
        initial_state[:, :, 1, 2] = np.zeros(pad_inputs(inputs['nue']).shape)

        if np.any(np.isnan(initial_state)):
            raise AssertionError('nan entries in initial_state: '
                                 '{0}'.format(initial_state))
        nuSQ.Set_initial_state(initial_state, nsq.Basis.flavor)

        # TODO(shivesh): use verbosity level to set this
        nuSQ.Set_ProgressBar(True)
        nuSQ.EvolveState()

        os = self.params['oversample'].value.m
        os_binning = binning.oversample(os)
        os_cz_binning = os_binning['coszen']
        os_en_binning = os_binning['energy']
        os_cz_centers = os_cz_binning.weighted_centers.m_as('radians')
        os_en_centers = os_en_binning.weighted_centers.m_as('GeV')

        fs = {}
        for nu in self.output_names:
            fs[nu] = np.full(os_binning.shape, np.nan)

        for icz, cz_bin in enumerate(os_cz_centers):
            for ie, en_bin in enumerate(os_en_centers):
                en_bin_u = en_bin * units.GeV
                fs['nue'][icz][ie] = nuSQ.EvalFlavor(0, cz_bin, en_bin_u, 0)
                fs['nuebar'][icz][ie] = nuSQ.EvalFlavor(0, cz_bin, en_bin_u, 1)
                fs['numu'][icz][ie] = nuSQ.EvalFlavor(1, cz_bin, en_bin_u, 0)
                fs['numubar'][icz][ie] = nuSQ.EvalFlavor(
                    1, cz_bin, en_bin_u, 1)
                fs['nutau'][icz][ie] = nuSQ.EvalFlavor(2, cz_bin, en_bin_u, 0)
                fs['nutaubar'][icz][ie] = nuSQ.EvalFlavor(
                    2, cz_bin, en_bin_u, 1)

        out_binning = self.input_binning.reorder_dimensions(
            ('coszen', 'energy'), use_basenames=True)
        os_out_binning = out_binning.oversample(os)

        outputs = []
        for key in fs.iterkeys():
            if np.any(np.isnan(fs[key])):
                raise AssertionError(
                    'Invalid value computed for {0} oscillated output: '
                    '\n{1}'.format(key, fs[key]))
            map = Map(name=key, binning=os_out_binning, hist=fs[key])
            map = map.downsample(os) / float(os)
            map = map.reorder_dimensions(self.input_binning)
            outputs.append(map)

        return MapSet(outputs)
Exemple #19
0
def test_pi_resample():
    """Unit test for the resampling stage."""
    from pisa.core.distribution_maker import DistributionMaker
    from pisa.core.map import Map
    from pisa.utils.config_parser import parse_pipeline_config
    from pisa.utils.log import set_verbosity, logging
    from pisa.utils.comparisons import ALLCLOSE_KW
    from collections import OrderedDict
    from copy import deepcopy

    example_cfg = parse_pipeline_config('settings/pipeline/example.cfg')
    reco_binning = example_cfg[('utils', 'pi_hist')]['output_specs']
    coarse_binning = reco_binning.downsample(reco_energy=2, reco_coszen=2)
    assert coarse_binning.is_compat(reco_binning)

    # replace binning of output with coarse binning
    example_cfg[('utils', 'pi_hist')]['output_specs'] = coarse_binning
    # make another pipeline with an upsampling stage to the original binning
    upsample_cfg = deepcopy(example_cfg)
    pi_resample_cfg = OrderedDict()
    pi_resample_cfg['input_specs'] = coarse_binning
    pi_resample_cfg['output_specs'] = reco_binning
    pi_resample_cfg['scale_errors'] = True
    upsample_cfg[('utils', 'pi_resample')] = pi_resample_cfg

    example_maker = DistributionMaker([example_cfg])
    upsampled_maker = DistributionMaker([upsample_cfg])

    example_map = example_maker.get_outputs(return_sum=True)[0]
    example_map_upsampled = upsampled_maker.get_outputs(return_sum=True)[0]

    # First check: The upsampled map must have the same total count as the original map
    assert np.isclose(
        np.sum(example_map.nominal_values),
        np.sum(example_map_upsampled.nominal_values),
    )

    # Check consistency of modified chi-square
    # ----------------------------------------
    # When the assumption holds that events are uniformly distributed over the coarse
    # bins, the modified chi-square should not change from upscaling the maps. We test
    # this by making a fluctuated coarse map and then upsampling that map according to
    # the assumption by bin volumes. We should find that the modified chi-square between
    # the coarse map and the coarse fluctuated map is the same as the upsampled map and
    # the upsampled fluctuated map.

    # It doesn't matter precisely how we fluctuate it here, we just want any different
    # map...
    random_map_coarse = example_map.fluctuate(method='scaled_poisson',
                                              random_state=42)
    random_map_coarse.set_errors(None)

    # This bit is an entirely independent implementation of the upsampling. The count
    # in every bin is scaled according to the reatio of weighted bin volumes.
    upsampled_hist = np.zeros_like(example_map_upsampled.nominal_values)
    upsampled_errs = np.zeros_like(example_map_upsampled.nominal_values)
    up_binning = example_map_upsampled.binning

    coarse_hist = np.array(random_map_coarse.nominal_values)
    coarse_errors = np.array(random_map_coarse.std_devs)
    coarse_binning = random_map_coarse.binning

    for bin_idx in np.ndindex(upsampled_hist.shape):
        one_bin = up_binning[bin_idx]
        fine_bin_volume = one_bin.weighted_bin_volumes(
            attach_units=False, ).squeeze().item()
        # the following is basically an independent implementation of translate.lookup
        coarse_index = []  # index where the upsampled bin came from
        for dim in up_binning.names:
            x = one_bin[dim].weighted_centers[
                0].m  # middle point of the one bin
            bins = coarse_binning[
                dim].bin_edges.m  # coarse bin edges in that dim
            coarse_index.append(np.digitize(x, bins) -
                                1)  # index 1 means bin 0
        coarse_index = tuple(coarse_index)
        coarse_bin_volume = coarse_binning.weighted_bin_volumes(
            attach_units=False, )[coarse_index].squeeze().item()

        upsampled_hist[bin_idx] = coarse_hist[coarse_index]
        upsampled_hist[bin_idx] *= fine_bin_volume
        upsampled_hist[bin_idx] /= coarse_bin_volume

    # done, at last!
    random_map_upsampled = Map(name="random_upsampled",
                               hist=upsampled_hist,
                               binning=up_binning)
    random_map_upsampled.set_errors(None)

    # After ALL THIS, we get the same modified chi-square from the coarse and the
    # upsampled pair of maps. Neat, huh?
    assert np.allclose(
        random_map_coarse.mod_chi2(example_map),
        random_map_upsampled.mod_chi2(example_map_upsampled),
        **ALLCLOSE_KW,
    )
    logging.info('<< PASS : pi_resample >>')
Exemple #20
0
    def _calculate_fit_coeffs(data,
                              params,
                              fit_binning,
                              nu_params=None,
                              mu_params=None):
        """
        Calculate the fit coefficients for each systematic, flavint,
        bin for a polynomial.
        """
        logging.debug('Calculating fit coefficients')

        config = from_file(params['discr_sys_sample_config'].value)

        degree = int(params['poly_degree'].value)
        force_through_nominal = params['force_through_nominal'].value

        if force_through_nominal:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, [1.] + list(poly_coeffs))
        else:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, list(poly_coeffs))

            # add free param for constant term
            degree += 1

        template_maker = Pipeline(params['pipeline_config'].value)
        dataset_param = template_maker.params['dataset']

        def parse(string):
            return string.replace(' ', '').split(',')

        sys_fit_coeffs = OrderedDict()
        if nu_params is not None:
            sys_list = parse(config.get('neutrinos', 'sys_list'))
            nu_params = deepcopy(map(lambda x: x[3:], nu_params))

            if set(nu_params) != set(sys_list):
                raise AssertionError(
                    'Systematics list listed in the sample config file does '
                    'not match the params in the pipeline config file\n {0} '
                    '!= {1}'.format(set(nu_params), set(sys_list)))

            for sys in sys_list:
                ev_sys = 'neutrinos|' + sys
                runs = parse(config.get(ev_sys, 'runs')[1:-1])
                nominal = config.get(ev_sys, 'nominal')

                mapset_dict = OrderedDict()
                flavint_groups = None
                for run in runs:
                    logging.info('Loading run {0} of systematic '
                                 '{1}'.format(run, sys))
                    dataset_param.value = ev_sys + '|' + run
                    template_maker.update_params(dataset_param)
                    template = template_maker.get_outputs(
                        idx=int(params['stop_after_stage'].m))
                    if not isinstance(template, Data):
                        raise AssertionError(
                            'Template output is not a Data object, instead is '
                            'type {0}'.format(type(template)))
                    if flavint_groups is None:
                        flavint_groups = template.flavint_groups
                    else:
                        if set(flavint_groups) != set(template.flavint_groups):
                            raise AssertionError(
                                'Mismatch of flavint_groups - ({0}) does not '
                                'match flavint_groups '
                                '({1})'.format(flavint_groups,
                                               template.flavint_groups))

                    outputs = []
                    for fig in template.keys():
                        outputs.append(
                            template.histogram(kinds=fig,
                                               binning=fit_binning,
                                               weights_col='pisa_weight',
                                               errors=False,
                                               name=str(NuFlavIntGroup(fig))))
                    mapset_dict[run] = MapSet(outputs, name=run)

                nom_mapset = mapset_dict[nominal]
                fracdiff_mapset_dict = OrderedDict()
                for run in runs:
                    mapset = []
                    for flavintg_map in mapset_dict[run]:
                        # TODO(shivesh): error propagation?
                        flavintg = flavintg_map.name
                        mask = ~(nom_mapset[flavintg].hist == 0.)
                        div = np.zeros(flavintg_map.shape)
                        with np.errstate(divide='ignore', invalid='ignore'):
                            div[mask] = \
                                unp.nominal_values(flavintg_map.hist[mask]) /\
                                unp.nominal_values(nom_mapset[flavintg].hist[mask])
                        mapset.append(
                            Map(name=flavintg,
                                binning=flavintg_map.binning,
                                hist=div))
                    fracdiff_mapset_dict[run] = MapSet(mapset)

                delta_runs = np.array([float(x)
                                       for x in runs]) - float(nominal)

                coeff_binning = OneDimBinning(name='coeff',
                                              num_bins=degree,
                                              is_lin=True,
                                              domain=[-1, 1])
                combined_binning = fit_binning + coeff_binning

                params_mapset = []
                for fig in template.keys():
                    # TODO(shivesh): Fix numpy warning on this line
                    pvals_hist = np.empty(map(int, combined_binning.shape),
                                          dtype=object)
                    hists = [
                        fracdiff_mapset_dict[run][fig].hist for run in runs
                    ]
                    zip_hists = np.dstack(hists)
                    for idx in np.ndindex(fit_binning.shape):
                        y_values = []
                        y_sigma = []
                        for run in fracdiff_mapset_dict:
                            y_values.append(
                                unp.nominal_values(
                                    fracdiff_mapset_dict[run][fig].hist[idx]))
                            y_sigma.append(
                                unp.std_devs(
                                    fracdiff_mapset_dict[run][fig].hist[idx]))

                        if np.any(y_sigma):
                            popt, pcov = curve_fit(fit_func,
                                                   delta_runs,
                                                   y_values,
                                                   sigma=y_sigma,
                                                   p0=np.ones(degree))
                        else:
                            popt, pcov = curve_fit(fit_func,
                                                   delta_runs,
                                                   y_values,
                                                   p0=np.ones(degree))
                        # perr = np.sqrt(np.diag(pcov))
                        # pvals = unp.uarray(popt, perr)
                        pvals_hist[idx] = popt
                    pvals_hist = np.array(pvals_hist.tolist())
                    params_mapset.append(
                        Map(name=fig,
                            binning=combined_binning,
                            hist=pvals_hist))
                params_mapset = MapSet(params_mapset, name=sys)

                if sys in sys_fit_coeffs:
                    sys_fit_coeffs[sys] = MapSet(
                        [sys_fit_coeffs[sys], params_mapset])
                else:
                    sys_fit_coeffs[sys] = params_mapset

        if mu_params is not None:
            sys_list = parse(config.get('muons', 'sys_list'))
            mu_params = deepcopy(map(lambda x: x[3:], mu_params))

            if set(mu_params) != set(sys_list):
                raise AssertionError(
                    'Systematics list listed in the sample config file does '
                    'not match the params in the pipeline config file\n {0} '
                    '!= {1}'.format(set(mu_params), set(sys_list)))

            for sys in sys_list:
                ev_sys = 'muons|' + sys
                runs = parse(config.get(ev_sys, 'runs')[1:-1])
                nominal = config.get(ev_sys, 'nominal')

                map_dict = OrderedDict()
                flavint_groups = None
                for run in runs:
                    logging.info('Loading run {0} of systematic '
                                 '{1}'.format(run, sys))
                    dataset_param.value = ev_sys + '|' + run
                    template_maker.update_params(dataset_param)
                    template = template_maker.get_outputs(
                        idx=int(params['stop_after_stage'].m))
                    if not isinstance(template, Data):
                        raise AssertionError(
                            'Template output is not a Data object, instead is '
                            'type {0}'.format(type(template)))
                    if not template.contains_muons:
                        raise AssertionError(
                            'Template output does not contain muons')

                    output = template.histogram(
                        kinds='muons',
                        binning=fit_binning,
                        # NOTE: weights cancel in fraction
                        weights_col=None,
                        errors=False,
                        name='muons')
                    map_dict[run] = output

                nom_map = map_dict[nominal]
                fracdiff_map_dict = OrderedDict()
                for run in runs:
                    mask = ~(nom_map.hist == 0.)
                    div = np.zeros(nom_map.shape)
                    with np.errstate(divide='ignore', invalid='ignore'):
                        div[mask] = \
                            unp.nominal_values(map_dict[run].hist[mask]) /\
                            unp.nominal_values(nom_map.hist[mask])
                    fracdiff_map_dict[run] = Map(name='muons',
                                                 binning=nom_map.binning,
                                                 hist=div)

                delta_runs = np.array([float(x)
                                       for x in runs]) - float(nominal)

                coeff_binning = OneDimBinning(name='coeff',
                                              num_bins=degree,
                                              is_lin=True,
                                              domain=[-1, 1])
                combined_binning = fit_binning + coeff_binning

                pvals_hist = np.empty(map(int, combined_binning.shape),
                                      dtype=object)
                hists = [fracdiff_map_dict[run].hist for run in runs]
                zip_hists = np.dstack(hists)
                for idx in np.ndindex(fit_binning.shape):
                    y_values = []
                    y_sigma = []
                    for run in fracdiff_mapset_dict:
                        y_values.append(
                            unp.nominal_values(
                                fracdiff_mapset_dict[run][fig].hist[idx]))
                        y_sigma.append(
                            unp.std_devs(
                                fracdiff_mapset_dict[run][fig].hist[idx]))
                    if np.any(y_sigma):
                        popt, pcov = curve_fit(fit_func,
                                               delta_runs,
                                               y_values,
                                               sigma=y_sigma,
                                               p0=np.ones(degree))
                    else:
                        popt, pcov = curve_fit(fit_func,
                                               delta_runs,
                                               y_values,
                                               p0=np.ones(degree))
                    # perr = np.sqrt(np.diag(pcov))
                    # pvals = unp.uarray(popt, perr)
                    pvals_hist[idx] = popt
                pvals_hist = np.array(pvals_hist.tolist())
                params_map = Map(name='muons',
                                 binning=combined_binning,
                                 hist=pvals_hist)
                if sys in sys_fit_coeffs:
                    sys_fit_coeffs[sys] = MapSet(
                        [sys_fit_coeffs[sys], params_map])
                else:
                    sys_fit_coeffs[sys] = params_map

        return sys_fit_coeffs
Exemple #21
0
    def _apply(self, inputs):
        """Apply transforms to input maps to compute output maps.

        Parameters
        ----------
        inputs : MapSet
            Container class that must contain (at least) the maps to be
            transformed.

        Returns
        -------
        output : Map
            Result of applying the transform to the input map(s).

        """
        self.validate_input(inputs)

        # TODO: In the multiple inputs / single output case and depending upon
        # the dimensions of the transform, for efficiency purposes we should
        # make sure that an operation is not carried out like
        #
        #   (input0 [*] transform) + (input1 [*] transform) = output
        #
        # but instead is performed more efficiently as
        #
        #   (input0 + input1) [*] transform = output
        #
        # where [*] is some linear operation, like element-by-element
        # multiplication, a dot product, etc.
        #
        # E.g., for a 1D dot product (dimensionality-reducing linear operation)
        # with M_in inputs and N_el elements in each
        # vector, for the first (less efficient) formulation, there are
        #
        #   (N_el-1)*M_in + (M_in-1) = (M_in*N_el - 1) adds
        #
        # and
        #
        #   (N_el*M_in) multiplies
        #
        # while for the second (more efficient) formulation, there are
        #
        #   (M_in-1)*N_el + (N_el-1) = (M_in*N_el - 1) adds
        #
        # and
        #
        #   (N_el) multiplies
        #
        # so the benefit is a reduction of the number of multiplies necessary
        # by a factor of the number of inputs being "combined" (the number of
        # adds stays the same).

        # TODO: make sure all of these operations are compatible with
        # uncertainties module!

        names = self.input_names
        in0 = inputs[names[0]]

        if self.num_inputs == 1:
            input_array = (in0.rebin(self.input_binning)).hist

        # Stack inputs, sum inputs, *then* rebin (if necessary)
        elif self.sum_inputs:
            input_array = np.sum([inputs[n].hist for n in names], axis=0)
            input_array = rebin(input_array,
                                orig_binning=in0.binning,
                                new_binning=self.input_binning)

        # Rebin (if necessary) then stack
        else:
            input_array = [(inputs[n].rebin(self.input_binning)).hist
                           for n in names]
            input_array = np.stack(input_array, axis=0)

        # TODO: is logic kosher here?

        # Transform same shape: element-by-element multiplication
        if self.xform_array.shape == input_array.shape:
            if (isinstance(self.error_method, basestring)
                    and self.error_method.strip().lower() == 'fixed'):
                # don't scale errors here
                output = unp.uarray(
                    unp.nominal_values(input_array) * self.xform_array,
                    unp.std_devs(input_array))
            else:
                output = input_array * self.xform_array

            # If multiple inputs were concatenated together, and we did not sum
            # these inputs together, we need to sum the results together now.

            # TODO: generalize this and the above operation (and possibly speed
            # this up) by formulating a simple inner product above and avoiding
            # an explicit sum here?
            if self.num_inputs > 1 and not self.sum_inputs:
                output = np.sum(output, axis=0)

        # TODO: Check that
        #   len(xform.shape) == 2*len(input_array.shape)
        # and then check that
        #   xform.shape == (input_array.shape, input_array.shape) (roughly)
        # and then apply tensordot appropriately for this generic case...

        # TODO: why does this fail for different input/output binning, but
        # below tensordot works?
        #elif len(self.xform_array.shape) == 4 and len(input_array.shape) == 2:
        #    output = map2d_kernel4d(input_array, self.xform_array)

        elif len(self.xform_array.shape) == 2 * len(input_array.shape):
            output = np.tensordot(input_array,
                                  self.xform_array,
                                  axes=([0, 1], [0, 1]))

        elif (input_array.shape ==
              self.xform_array.shape[0:len(input_array.shape)]):
            axes = np.arange(len(input_array.shape))
            output = np.tensordot(input_array,
                                  self.xform_array,
                                  axes=(axes, axes))

        elif (input_array.shape ==
              self.xform_array.shape[-len(input_array.shape):]):
            output = input_array * self.xform_array

        else:
            raise ValueError('Unhandled shapes for input(s) "%s": %s and'
                             ' transform: %s.' %
                             (', '.join(self.input_names), input_array.shape,
                              self.xform_array.shape))

        output = Map(name=self.output_name,
                     hist=output,
                     binning=self.output_binning)

        return output
Exemple #22
0
def main(return_outputs=False):
    """Run unit tests if `pipeline.py` is called as a script."""
    from pisa.utils.plotter import Plotter

    args = parse_args()
    set_verbosity(args.v)

    # Even if user specifies an integer on command line, it comes in as a
    # string. Try to convert to int (e.g. if `'1'` is passed to indicate the
    # second stage), and -- if successful -- use this as `args.only_stage`.
    # Otherwise, the string value passed will be used (e.g. `'osc'` could be
    # passed).
    try:
        only_stage_int = int(args.only_stage)
    except (ValueError, TypeError):
        pass
    else:
        args.only_stage = only_stage_int

    if args.outdir:
        mkdir(args.outdir)
    else:
        if args.pdf or args.png:
            raise ValueError("No --outdir provided, so cannot save images.")

    # Most basic parsing of the pipeline config (parsing only to this level
    # allows for simple strings to be specified as args for updating)
    bcp = PISAConfigParser()
    bcp.read(args.pipeline)

    # Update the config with any args specified on command line
    if args.arg is not None:
        for arg_list in args.arg:
            if len(arg_list) < 2:
                raise ValueError(
                    'Args must be formatted as: "section arg=val". Got "%s"'
                    " instead." % " ".join(arg_list))
            section = arg_list[0]
            remainder = " ".join(arg_list[1:])
            eq_split = remainder.split("=")
            newarg = eq_split[0].strip()
            value = ("=".join(eq_split[1:])).strip()
            logging.debug('Setting config section "%s" arg "%s" = "%s"',
                          section, newarg, value)
            try:
                bcp.set(section, newarg, value)
            except NoSectionError:
                logging.error(
                    'Invalid section "%s" specified. Must be one of %s',
                    section,
                    bcp.sections(),
                )
                raise

    # Instantiate the pipeline
    pipeline = Pipeline(bcp)  # pylint: disable=redefined-outer-name

    if args.select is not None:
        pipeline.select_params(args.select, error_on_missing=True)

    if args.only_stage is None:
        stop_idx = args.stop_after_stage
        try:
            stop_idx = int(stop_idx)
        except (TypeError, ValueError):
            pass
        if isinstance(stop_idx, str):
            stop_idx = pipeline.index(stop_idx)
        outputs = pipeline.get_outputs(idx=stop_idx)  # pylint: disable=redefined-outer-name
        if stop_idx is not None:
            stop_idx += 1
        indices = slice(0, stop_idx)
    else:
        assert args.stop_after_stage is None
        idx = pipeline.index(args.only_stage)
        stage = pipeline[idx]
        indices = slice(idx, idx + 1)

        # Create dummy inputs if necessary
        inputs = None
        if hasattr(stage, "input_binning"):
            logging.warning(
                "Stage requires input, so building dummy"
                " inputs of random numbers, with random state set to the input"
                " index according to alphabetical ordering of input names and"
                " filled in alphabetical ordering of dimension names.")
            input_maps = []
            tmp = deepcopy(stage.input_binning)
            alphabetical_binning = tmp.reorder_dimensions(sorted(tmp.names))
            for input_num, input_name in enumerate(sorted(stage.input_names)):
                # Create a new map with all 3's; name according to the input
                hist = np.full(shape=alphabetical_binning.shape,
                               fill_value=3.0)
                input_map = Map(name=input_name,
                                binning=alphabetical_binning,
                                hist=hist)

                # Apply Poisson fluctuations to randomize the values in the map
                input_map.fluctuate(method="poisson", random_state=input_num)

                # Reorder dimensions according to user's original binning spec
                input_map.reorder_dimensions(stage.input_binning)
                input_maps.append(input_map)
            inputs = MapSet(maps=input_maps, name="ones", hash=1)

        outputs = stage.run(inputs=inputs)

    for stage in pipeline[indices]:
        if not args.outdir:
            break
        stg_svc = stage.stage_name + "__" + stage.service_name
        fbase = os.path.join(args.outdir, stg_svc)
        if args.intermediate or stage == pipeline[indices][-1]:
            stage.outputs.to_json(fbase + "__output.json.bz2")

        # also only plot if args intermediate or last stage
        if args.intermediate or stage == pipeline[indices][-1]:
            formats = OrderedDict(png=args.png, pdf=args.pdf)
            if isinstance(stage.outputs, Data):
                # TODO(shivesh): plots made here will use the most recent
                # "pisa_weight" column and so all stages will have identical plots
                # (one workaround is to turn on "memcache_deepcopy")
                # TODO(shivesh): intermediate stages have no output binning
                if stage.output_binning is None:
                    logging.debug("Skipping plot of intermediate stage %s",
                                  stage)
                    continue
                outputs = stage.outputs.histogram_set(
                    binning=stage.output_binning,
                    nu_weights_col="pisa_weight",
                    mu_weights_col="pisa_weight",
                    noise_weights_col="pisa_weight",
                    mapset_name=stg_svc,
                    errors=True,
                )

            try:
                for fmt, enabled in formats.items():
                    if not enabled:
                        continue
                    my_plotter = Plotter(
                        stamp="Event rate",
                        outdir=args.outdir,
                        fmt=fmt,
                        log=False,
                        annotate=args.annotate,
                    )
                    my_plotter.ratio = True
                    my_plotter.plot_2d_array(outputs,
                                             fname=stg_svc + "__output",
                                             cmap="RdBu")
            except ValueError as exc:
                logging.error(
                    "Failed to save plot to format %s. See exception"
                    " message below",
                    fmt,
                )
                traceback.format_exc()
                logging.exception(exc)
                logging.warning("I can't go on, I'll go on.")

    if return_outputs:
        return pipeline, outputs
Exemple #23
0
def compare(outdir,
            ref,
            ref_label,
            test,
            test_label,
            asymm_max=None,
            asymm_min=None,
            combine=None,
            diff_max=None,
            diff_min=None,
            fract_diff_max=None,
            fract_diff_min=None,
            json=False,
            pdf=False,
            png=False,
            ref_abs=False,
            ref_param_selections=None,
            sum=None,
            test_abs=False,
            test_param_selections=None):
    """Compare two entities. The result each entity specification is
    formatted into a MapSet and stored to disk, so that e.g. re-running
    a DistributionMaker is unnecessary to reproduce the results.

    Parameters
    ----------
    outdir : string
        Store output plots to this directory

    ref : string or array of strings
        Pipeline settings config file that generates reference output,
        or a stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    ref_abs : bool
        Use the absolute value of the reference plot for comparisons

    ref_label : string
        Label for reference

    ref_param-selections : string
        Param selections to apply to ref pipeline config(s). Not
        applicable if ref specifies stored map or map sets

    test : string or array of strings
        Pipeline settings config file that generates test output, or a
        stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    test_abs : bool
        Use the absolute value of the test plot for comparisons

    test_label : string
        Label for test

    test_param_selections : None or string
        Param selections to apply to test pipeline config(s). Not
        applicable if test specifies stored map or map sets

    combine : None or string or array of strings
        Combine by wildcard string, where string globbing (a la command
        line) uses asterisk for any number of wildcard characters. Use
        single quotes such that asterisks do not get expanded by the
        shell. Multiple combine strings supported

    sum : None or int
        Sum over (and hence remove) the specified axis or axes. I.e.,
        project the map onto remaining (unspecified) axis or axes

    json : bool
        Save output maps in compressed json (json.bz2) format

    pdf : bool
        Save plots in PDF format. If neither this nor png is
        specified, no plots are produced

    png : bool
        Save plots in PNG format. If neither this nor pdf is specfied,
        no plots are produced

    diff_min : None or float
        Difference plot vmin; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    diff_max : None or float
        Difference plot max; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    fract_diff_min : None or float
        Fractional difference plot vmin; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    fract_diff_max : None or float
        Fractional difference plot max; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    asymm_min : None or float
        Asymmetry plot vmin; if you specify only one of asymm_min or
        asymm_max, symmetric limits are automatically used (min = -max)

    asymm_max : None or float
        Fractional difference plot max; if you specify only one of
        asymm_min or asymm_max, symmetric limits are automatically used
        (min = -max)

    Returns
    -------
    summary_stats : dict
        Dictionary containing a summary for each h Map processed

    diff : MapSet
        MapSet of the difference
        - (Test - Ref)

    fract_diff : MapSet
        MapSet of the fractional difference
        - (Test - Ref) / Ref

    asymm : MapSet
        MapSet of the asymmetric fraction difference or pull
        - (Test - Ref) / sqrt(Ref)

    """
    ref_plot_label = ref_label
    if ref_abs and not ref_label.startswith('abs'):
        ref_plot_label = 'abs(%s)' % ref_plot_label
    test_plot_label = test_label
    if test_abs and not test_label.startswith('abs'):
        test_plot_label = 'abs(%s)' % test_plot_label

    plot_formats = []
    if pdf:
        plot_formats.append('pdf')
    if png:
        plot_formats.append('png')

    diff_symm = True
    if diff_min is not None and diff_max is None:
        diff_max = -diff_min
        diff_symm = False
    if diff_max is not None and diff_min is None:
        diff_min = -diff_max
        diff_symm = False

    fract_diff_symm = True
    if fract_diff_min is not None and fract_diff_max is None:
        fract_diff_max = -fract_diff_min
        fract_diff_symm = False
    if fract_diff_max is not None and fract_diff_min is None:
        fract_diff_min = -fract_diff_max
        fract_diff_symm = False

    asymm_symm = True
    if asymm_max is not None and asymm_min is None:
        asymm_min = -asymm_max
        asymm_symm = False
    if asymm_min is not None and asymm_max is None:
        asymm_max = -asymm_min
        asymm_symm = False

    outdir = os.path.expanduser(os.path.expandvars(outdir))
    mkdir(outdir)

    # Get the reference distribution(s) into the form of a test MapSet
    p_ref = None
    ref_source = None
    if isinstance(ref, Map):
        p_ref = MapSet(ref)
        ref_source = MAP_SOURCE_STR
    elif isinstance(ref, MapSet):
        p_ref = ref
        ref_source = MAPSET_SOURCE_STR
    elif isinstance(ref, Pipeline):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = PIPELINE_SOURCE_STR
    elif isinstance(ref, DistributionMaker):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(ref) == 1:
            try:
                ref_pipeline = Pipeline(config=ref[0])
            except:
                pass
            else:
                ref_source = PIPELINE_SOURCE_STR
                if ref_param_selections is not None:
                    ref_pipeline.select_params(ref_param_selections)
                p_ref = ref_pipeline.get_outputs()
        else:
            try:
                ref_dmaker = DistributionMaker(pipelines=ref)
            except:
                pass
            else:
                ref_source = DISTRIBUTIONMAKER_SOURCE_STR
                if ref_param_selections is not None:
                    ref_dmaker.select_params(ref_param_selections)
                p_ref = ref_dmaker.get_outputs()

    if p_ref is None:
        try:
            p_ref = [Map.from_json(f) for f in ref]
        except:
            pass
        else:
            ref_source = MAP_SOURCE_STR
            p_ref = MapSet(p_ref)

    if p_ref is None:
        assert ref_param_selections is None
        assert len(ref) == 1, 'Can only handle one MapSet'
        try:
            p_ref = MapSet.from_json(ref[0])
        except:
            pass
        else:
            ref_source = MAPSET_SOURCE_STR

    if p_ref is None:
        raise ValueError(
            'Could not instantiate the reference Pipeline, DistributionMaker,'
            ' Map, or MapSet from ref value(s) %s' % ref)
    ref = p_ref

    logging.info('Reference map(s) derived from a ' + ref_source)

    # Get the test distribution(s) into the form of a test MapSet
    p_test = None
    test_source = None
    if isinstance(test, Map):
        p_test = MapSet(test)
        test_source = MAP_SOURCE_STR
    elif isinstance(test, MapSet):
        p_test = test
        test_source = MAPSET_SOURCE_STR
    elif isinstance(test, Pipeline):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = PIPELINE_SOURCE_STR
    elif isinstance(test, DistributionMaker):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(test) == 1:
            try:
                test_pipeline = Pipeline(config=test[0])
            except:
                pass
            else:
                test_source = PIPELINE_SOURCE_STR
                if test_param_selections is not None:
                    test_pipeline.select_params(test_param_selections)
                p_test = test_pipeline.get_outputs()
        else:
            try:
                test_dmaker = DistributionMaker(pipelines=test)
            except:
                pass
            else:
                test_source = DISTRIBUTIONMAKER_SOURCE_STR
                if test_param_selections is not None:
                    test_dmaker.select_params(test_param_selections)
                p_test = test_dmaker.get_outputs()

    if p_test is None:
        try:
            p_test = [Map.from_json(f) for f in test]
        except:
            pass
        else:
            test_source = MAP_SOURCE_STR
            p_test = MapSet(p_test)

    if p_test is None:
        assert test_param_selections is None
        assert len(test) == 1, 'Can only handle one MapSet'
        try:
            p_test = MapSet.from_json(test[0])
        except:
            pass
        else:
            test_source = MAPSET_SOURCE_STR

    if p_test is None:
        raise ValueError(
            'Could not instantiate the test Pipeline, DistributionMaker, Map,'
            ' or MapSet from test value(s) %s' % test)
    test = p_test

    logging.info('Test map(s) derived from a ' + test_source)

    if combine is not None:
        ref = ref.combine_wildcard(combine)
        test = test.combine_wildcard(combine)
        if isinstance(ref, Map):
            ref = MapSet([ref])
        if isinstance(test, Map):
            test = MapSet([test])

    if sum is not None:
        ref = ref.sum(sum)
        test = test.sum(sum)

    # Set the MapSet names according to args passed by user
    ref.name = ref_label
    test.name = test_label

    # Save to disk the maps being plotted (excluding optional aboslute value
    # operations)
    if json:
        refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label)
        to_file(ref, refmaps_path)

        testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label)
        to_file(test, testmaps_path)

    if set(test.names) != set(ref.names):
        raise ValueError('Test map names %s do not match ref map names %s.' %
                         (sorted(test.names), sorted(ref.names)))

    # Aliases to save keystrokes
    def masked(x):
        return np.ma.masked_invalid(x.nominal_values)

    def zero_to_nan(map):
        newmap = deepcopy(map)
        mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON)
        newmap.hist[mask] = np.nan
        return newmap

    reordered_test = []
    new_ref = []
    diff_maps = []
    fract_diff_maps = []
    asymm_maps = []
    summary_stats = {}
    for ref_map in ref:
        test_map = test[ref_map.name].reorder_dimensions(ref_map.binning)
        if ref_abs:
            ref_map = abs(ref_map)
        if test_abs:
            test_map = abs(test_map)

        diff_map = test_map - ref_map
        fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map)
        asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5)
        abs_fract_diff_map = np.abs(fract_diff_map)

        new_ref.append(ref_map)
        reordered_test.append(test_map)
        diff_maps.append(diff_map)
        fract_diff_maps.append(fract_diff_map)
        asymm_maps.append(asymm_map)

        min_ref = np.min(masked(ref_map))
        max_ref = np.max(masked(ref_map))

        min_test = np.min(masked(test_map))
        max_test = np.max(masked(test_map))

        total_ref = np.sum(masked(ref_map))
        total_test = np.sum(masked(test_map))

        mean_ref = np.mean(masked(ref_map))
        mean_test = np.mean(masked(test_map))

        max_abs_fract_diff = np.max(masked(abs_fract_diff_map))
        mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map))
        median_abs_fract_diff = np.median(masked(abs_fract_diff_map))

        mean_fract_diff = np.mean(masked(fract_diff_map))
        min_fract_diff = np.min(masked(fract_diff_map))
        max_fract_diff = np.max(masked(fract_diff_map))
        std_fract_diff = np.std(masked(fract_diff_map))

        mean_diff = np.mean(masked(diff_map))
        min_diff = np.min(masked(diff_map))
        max_diff = np.max(masked(diff_map))
        std_diff = np.std(masked(diff_map))

        median_diff = np.nanmedian(masked(diff_map))
        mad_diff = np.nanmedian(masked(np.abs(diff_map)))
        median_fract_diff = np.nanmedian(masked(fract_diff_map))
        mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map)))

        min_asymm = np.min(masked(fract_diff_map))
        max_asymm = np.max(masked(fract_diff_map))

        total_asymm = np.sqrt(np.sum(masked(asymm_map)**2))

        summary_stats[test_map.name] = OrderedDict([
            ('min_ref', min_ref),
            ('max_ref', max_ref),
            ('total_ref', total_ref),
            ('mean_ref', mean_ref),
            ('min_test', min_test),
            ('max_test', max_test),
            ('total_test', total_test),
            ('mean_test', mean_test),
            ('max_abs_fract_diff', max_abs_fract_diff),
            ('mean_abs_fract_diff', mean_abs_fract_diff),
            ('median_abs_fract_diff', median_abs_fract_diff),
            ('min_fract_diff', min_fract_diff),
            ('max_fract_diff', max_fract_diff),
            ('mean_fract_diff', mean_fract_diff),
            ('std_fract_diff', std_fract_diff),
            ('median_fract_diff', median_fract_diff),
            ('mad_fract_diff', mad_fract_diff),
            ('min_diff', min_diff),
            ('max_diff', max_diff),
            ('mean_diff', mean_diff),
            ('std_diff', std_diff),
            ('median_diff', median_diff),
            ('mad_diff', mad_diff),
            ('min_asymm', min_asymm),
            ('max_asymm', max_asymm),
            ('total_asymm', total_asymm),
        ])

        logging.info('Map %s...', ref_map.name)
        logging.info('  Ref map(s):')
        logging.info('    min   :' + ('%.2f' % min_ref).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_ref).rjust(12))
        logging.info('    total :' + ('%.2f' % total_ref).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_ref).rjust(12))
        logging.info('  Test map(s):')
        logging.info('    min   :' + ('%.2f' % min_test).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_test).rjust(12))
        logging.info('    total :' + ('%.2f' % total_test).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_test).rjust(12))
        logging.info('  Absolute fract. diff., abs((Test - Ref) / Ref):')
        logging.info('    max   : %.4e', max_abs_fract_diff)
        logging.info('    mean  : %.4e', mean_abs_fract_diff)
        logging.info('    median: %.4e', median_abs_fract_diff)
        logging.info('  Fractional difference, (Test - Ref) / Ref:')
        logging.info('    min   : %.4e', min_fract_diff)
        logging.info('    max   : %.4e', max_fract_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_fract_diff,
                     std_fract_diff)
        logging.info('    median: %.4e +/- %.4e', median_fract_diff,
                     mad_fract_diff)
        logging.info('  Difference, Test - Ref:')
        logging.info('    min   : %.4e', min_diff)
        logging.info('    max   : %.4e', max_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_diff, std_diff)
        logging.info('    median: %.4e +/- %.4e', median_diff, mad_diff)
        logging.info('  Asymmetry, (Test - Ref) / sqrt(Ref)')
        logging.info('    min   : %.4e', min_asymm)
        logging.info('    max   : %.4e', max_asymm)
        logging.info('    total : %.4e (sum in quadrature)', total_asymm)
        logging.info('')

    ref = MapSet(new_ref)
    test = MapSet(reordered_test)
    diff = MapSet(diff_maps)
    fract_diff = MapSet(fract_diff_maps)
    asymm = MapSet(asymm_maps)

    if json:
        diff.to_json(
            os.path.join(
                outdir,
                'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))
        fract_diff.to_json(
            os.path.join(
                outdir, 'fract_diff__%s___%s.json.bz2' %
                (test_plot_label, ref_plot_label)))
        asymm.to_json(
            os.path.join(
                outdir,
                'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label)))
        to_file(
            summary_stats,
            os.path.join(
                outdir,
                'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))

    for plot_format in plot_formats:
        # Plot the raw distributions
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=False,
                          ratio=False)
        plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label)
        plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label)

        # Plot the difference (test - ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=diff_symm,
                          ratio=False)
        plotter.label = '%s - %s' % (test_plot_label, ref_plot_label)
        plotter.plot_2d_array(
            test - ref,
            fname='diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=diff_min, vmax=diff_max
        )

        # Plot the fractional difference (test - ref)/ref
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=fract_diff_symm,
                          ratio=True)
        plotter.label = ('(%s-%s)/%s' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r) for r in ref]),
            fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=fract_diff_min, vmax=fract_diff_max
        )

        # Plot the asymmetry (test - ref)/sqrt(ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=asymm_symm,
                          ratio=True)
        plotter.label = (r'$(%s - %s)/\sqrt{%s}$' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]),
            fname='asymm__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=asymm_min, vmax=asymm_max
        )

    return summary_stats, diff, fract_diff, asymm
Exemple #24
0
def norm_sys_distributions(input_data):
    """Normalises systematics mapsets to the nominal mapset,
    performing error propagation.

    Parameters
    ----------
    input_data : dict
        The data container returned by `make_discrete_sys_distributions`.
        Note that this is modified by this function to add the normalised
        distrbutions.

    Notes
    -----
    Nothing is returned; `input_data` is modified in-place

    """
    #
    # Get the input mapsets
    #

    nominal_mapset = [
        dataset["mapset"] for dataset in input_data["datasets"]
        if dataset["nominal"]
    ]
    if len(nominal_mapset) != 1:
        raise ValueError("need 1 but got {} nominal mapsets".format(
            len(nominal_mapset)))
    nominal_mapset = nominal_mapset[0]

    for dataset_dict in input_data["datasets"]:
        dataset_dict["norm_mapset"] = []

    #
    # loop over types of event
    #

    for map_name in nominal_mapset.names:
        logging.info('Normalizing "%s" maps.', map_name)
        nominal_map = nominal_mapset[map_name]
        nominal_map_nominal_vals = nominal_map.nominal_values

        # Note that all
        #   `sys_mapset[map_name].nominal_values`
        # and
        #   `nominal_map.nominal_values`
        # are finite (neither infinite nor NaN), so only issue for diviiding
        # the former by the latter is if there are "empty bins" (zeros) in
        #   `nominal_map.nominal_values`
        finite_mask = nominal_map_nominal_vals != 0

        #
        # loop over datasets
        #

        for dataset_dict in input_data["datasets"]:
            #
            # Normalise maps
            #

            sys_mapset_map = dataset_dict["mapset"][map_name]

            # TODO: think about the best way to perform unc. propagation

            # Crete a new array with uncertainties the same shape as map;
            # values are irrelevant as all will be overwritten
            norm_sys_hist = copy.deepcopy(sys_mapset_map.hist)

            # Note that we divide by nominal_vals to avoid double counting the
            # uncertainty on the nominal template when applying the hyperplane
            # fits
            norm_sys_hist[finite_mask] = (
                sys_mapset_map.hist[finite_mask] /
                nominal_map_nominal_vals[finite_mask])
            norm_sys_hist[~finite_mask] = ufloat(np.NaN, np.NaN)

            # TODO Check for bins that are empty in the nominal hist but not in
            # at least one of the sys sets; currently we do not support this...

            norm_sys_map = Map(
                name=sys_mapset_map.name,
                binning=sys_mapset_map.binning,
                hist=norm_sys_hist,
            )

            # TODO Save the map
            dataset_dict["norm_mapset"].append(norm_sys_map)

    # Re-format
    for dataset_dict in input_data["datasets"]:
        dataset_dict["norm_mapset"] = MapSet(maps=dataset_dict["norm_mapset"],
                                             name=dataset_dict["mapset"].name)
Exemple #25
0
    def _compute_outputs(self, inputs=None):
        """Compute histograms for output channels."""
        logging.debug('Entering mceq._compute_outputs')

        primary_model = split(self.params['primary_model'].value, ',')
        if len(primary_model) != 2:
            raise ValueError('primary_model is not of length 2, instead is of '
                             'length {0}'.format(len(primary_model)))
        primary_model[0] = eval('pm.' + primary_model[0])
        density_model = (self.params['density_model'].value,
                         (self.params['location'].value,
                          self.params['season'].value))

        mceq_run = MCEqRun(
            interaction_model=str(self.params['interaction_model'].value),
            primary_model=primary_model,
            theta_deg=0.0,
            density_model=density_model,
            **mceq_config.mceq_config_without(['density_model']))

        # Power of energy to scale the flux (the results will be returned as E**mag * flux)
        mag = 0

        # Obtain energy grid (fixed) of the solution for the x-axis of the plots
        e_grid = mceq_run.e_grid

        # Dictionary for results
        flux = OrderedDict()
        for nu in self.output_names:
            flux[nu] = []

        binning = self.output_binning
        cz_binning = binning.dims[binning.index('coszen', use_basenames=True)]
        en_binning = binning.dims[binning.index('energy', use_basenames=True)]
        cz_centers = cz_binning.weighted_centers.m
        angles = (np.arccos(cz_centers) * ureg.radian).m_as('degrees')

        for theta in angles:
            mceq_run.set_theta_deg(theta)
            mceq_run.solve()

            flux['nue'].append(mceq_run.get_solution('total_nue', mag))
            flux['nuebar'].append(mceq_run.get_solution('total_antinue', mag))
            flux['numu'].append(mceq_run.get_solution('total_numu', mag))
            flux['numubar'].append(mceq_run.get_solution(
                'total_antinumu', mag))

        for nu in flux.iterkeys():
            flux[nu] = np.array(flux[nu])

        smoothing = self.params['smoothing'].value.m
        en_centers = en_binning.weighted_centers.m_as('GeV')
        spline_flux = self.bivariate_spline(flux,
                                            cz_centers,
                                            e_grid,
                                            smooth=smoothing)
        ev_flux = self.bivariate_evaluate(spline_flux, cz_centers, en_centers)

        for nu in ev_flux:
            ev_flux[nu] = ev_flux[nu] * ureg('cm**-2 s**-1 sr**-1 GeV**-1')

        mapset = []
        for nu in ev_flux.iterkeys():
            mapset.append(Map(name=nu, hist=ev_flux[nu], binning=binning))

        return MapSet(mapset)