Ejemplo n.º 1
0
    def load_gen_data(self):
        logging.debug('Loading generator level sample')
        unfold_pipeline_cfg = self.params['unfold_pipeline_cfg'].value
        if isinstance(unfold_pipeline_cfg, str):
            pipeline_cfg = from_file(unfold_pipeline_cfg)
            pipeline_hash = pipeline_cfg
            sa_cfg = from_file(
                pipeline_cfg.get('stage.data', 'param.data_sample_config'))
            template_maker = Pipeline(pipeline_cfg)
        elif isinstance(unfold_pipeline_cfg, Pipeline):
            pipeline_hash = unfold_pipeline_cfg.state_hash
            sa_cfg = from_file(
                unfold_pipeline_cfg.params['data_sample_config'].value)
            template_maker = unfold_pipeline_cfg
        gen_cfg = from_file(sa_cfg.get('neutrinos|gen_lvl', 'gen_cfg_file'))
        this_hash = hash_obj([gen_cfg, pipeline_hash, self.output_str],
                             full_hash=self.full_hash)
        if self.gen_data_hash == this_hash:
            return self._gen_data

        full_gen_data = template_maker.get_outputs()
        if not isinstance(full_gen_data, Data):
            raise AssertionError(
                'Output of pipeline is not a Data object, instead is type '
                '{0}'.format(type(full_gen_data)))
        trans_data = full_gen_data.transform_groups(self.output_str)
        gen_data = trans_data[self.output_str]

        self._gen_data = gen_data
        self.gen_data_hash = this_hash
        return gen_data
Ejemplo n.º 2
0
        lt_param.value = lt
        pipeline.update_params(lt_param)
        gen_pipe.update_params(lt_param)

        u_pipe = Param(name='unfold_pipeline_cfg',
                       value=gen_pipe,
                       is_fixed=True,
                       prior=None,
                       range=None)
        unfold_pipeline_cfg = u_pipe
        pipeline.update_params(unfold_pipeline_cfg)

        # Get nominal
        re_param.value = 0 * ureg.dimensionless
        pipeline.update_params(re_param)
        nom_out = pipeline.get_outputs().pop()

        re_param.value = 2 * ureg.dimensionless
        sf_param.value = 1234 * ureg.dimensionless
        pipeline.update_params(re_param)
        pipeline.update_params(sf_param)

        fe = []
        if 'test' in outname:
            n_trials = 5
        else:
            n_trials = 200
        for x in xrange(n_trials):
            temp_out = pipeline.get_outputs().pop()
            nan_mask = (nom_out.hist == 0)
            div = temp_out.hist[~nan_mask] / nom_out.hist[~nan_mask]
Ejemplo n.º 3
0
Archivo: fit.py Proyecto: thehrh/pisa-1
    def _calculate_fit_coeffs(data,
                              params,
                              fit_binning,
                              nu_params=None,
                              mu_params=None):
        """
        Calculate the fit coefficients for each systematic, flavint,
        bin for a polynomial.
        """
        logging.debug('Calculating fit coefficients')

        config = from_file(params['discr_sys_sample_config'].value)

        degree = int(params['poly_degree'].value)
        force_through_nominal = params['force_through_nominal'].value

        if force_through_nominal:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, [1.] + list(poly_coeffs))
        else:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, list(poly_coeffs))

            # add free param for constant term
            degree += 1

        template_maker = Pipeline(params['pipeline_config'].value)
        dataset_param = template_maker.params['dataset']

        def parse(string):
            return string.replace(' ', '').split(',')

        sys_fit_coeffs = OrderedDict()
        if nu_params is not None:
            sys_list = parse(config.get('neutrinos', 'sys_list'))
            nu_params = deepcopy(map(lambda x: x[3:], nu_params))

            if set(nu_params) != set(sys_list):
                raise AssertionError(
                    'Systematics list listed in the sample config file does '
                    'not match the params in the pipeline config file\n {0} '
                    '!= {1}'.format(set(nu_params), set(sys_list)))

            for sys in sys_list:
                ev_sys = 'neutrinos|' + sys
                runs = parse(config.get(ev_sys, 'runs')[1:-1])
                nominal = config.get(ev_sys, 'nominal')

                mapset_dict = OrderedDict()
                flavint_groups = None
                for run in runs:
                    logging.info('Loading run {0} of systematic '
                                 '{1}'.format(run, sys))
                    dataset_param.value = ev_sys + '|' + run
                    template_maker.update_params(dataset_param)
                    template = template_maker.get_outputs(
                        idx=int(params['stop_after_stage'].m))
                    if not isinstance(template, Data):
                        raise AssertionError(
                            'Template output is not a Data object, instead is '
                            'type {0}'.format(type(template)))
                    if flavint_groups is None:
                        flavint_groups = template.flavint_groups
                    else:
                        if set(flavint_groups) != set(template.flavint_groups):
                            raise AssertionError(
                                'Mismatch of flavint_groups - ({0}) does not '
                                'match flavint_groups '
                                '({1})'.format(flavint_groups,
                                               template.flavint_groups))

                    outputs = []
                    for fig in template.keys():
                        outputs.append(
                            template.histogram(kinds=fig,
                                               binning=fit_binning,
                                               weights_col='pisa_weight',
                                               errors=False,
                                               name=str(NuFlavIntGroup(fig))))
                    mapset_dict[run] = MapSet(outputs, name=run)

                nom_mapset = mapset_dict[nominal]
                fracdiff_mapset_dict = OrderedDict()
                for run in runs:
                    mapset = []
                    for flavintg_map in mapset_dict[run]:
                        # TODO(shivesh): error propagation?
                        flavintg = flavintg_map.name
                        mask = ~(nom_mapset[flavintg].hist == 0.)
                        div = np.zeros(flavintg_map.shape)
                        with np.errstate(divide='ignore', invalid='ignore'):
                            div[mask] = \
                                unp.nominal_values(flavintg_map.hist[mask]) /\
                                unp.nominal_values(nom_mapset[flavintg].hist[mask])
                        mapset.append(
                            Map(name=flavintg,
                                binning=flavintg_map.binning,
                                hist=div))
                    fracdiff_mapset_dict[run] = MapSet(mapset)

                delta_runs = np.array([float(x)
                                       for x in runs]) - float(nominal)

                coeff_binning = OneDimBinning(name='coeff',
                                              num_bins=degree,
                                              is_lin=True,
                                              domain=[-1, 1])
                combined_binning = fit_binning + coeff_binning

                params_mapset = []
                for fig in template.keys():
                    # TODO(shivesh): Fix numpy warning on this line
                    pvals_hist = np.empty(map(int, combined_binning.shape),
                                          dtype=object)
                    hists = [
                        fracdiff_mapset_dict[run][fig].hist for run in runs
                    ]
                    zip_hists = np.dstack(hists)
                    for idx in np.ndindex(fit_binning.shape):
                        y_values = []
                        y_sigma = []
                        for run in fracdiff_mapset_dict:
                            y_values.append(
                                unp.nominal_values(
                                    fracdiff_mapset_dict[run][fig].hist[idx]))
                            y_sigma.append(
                                unp.std_devs(
                                    fracdiff_mapset_dict[run][fig].hist[idx]))

                        if np.any(y_sigma):
                            popt, pcov = curve_fit(fit_func,
                                                   delta_runs,
                                                   y_values,
                                                   sigma=y_sigma,
                                                   p0=np.ones(degree))
                        else:
                            popt, pcov = curve_fit(fit_func,
                                                   delta_runs,
                                                   y_values,
                                                   p0=np.ones(degree))
                        # perr = np.sqrt(np.diag(pcov))
                        # pvals = unp.uarray(popt, perr)
                        pvals_hist[idx] = popt
                    pvals_hist = np.array(pvals_hist.tolist())
                    params_mapset.append(
                        Map(name=fig,
                            binning=combined_binning,
                            hist=pvals_hist))
                params_mapset = MapSet(params_mapset, name=sys)

                if sys in sys_fit_coeffs:
                    sys_fit_coeffs[sys] = MapSet(
                        [sys_fit_coeffs[sys], params_mapset])
                else:
                    sys_fit_coeffs[sys] = params_mapset

        if mu_params is not None:
            sys_list = parse(config.get('muons', 'sys_list'))
            mu_params = deepcopy(map(lambda x: x[3:], mu_params))

            if set(mu_params) != set(sys_list):
                raise AssertionError(
                    'Systematics list listed in the sample config file does '
                    'not match the params in the pipeline config file\n {0} '
                    '!= {1}'.format(set(mu_params), set(sys_list)))

            for sys in sys_list:
                ev_sys = 'muons|' + sys
                runs = parse(config.get(ev_sys, 'runs')[1:-1])
                nominal = config.get(ev_sys, 'nominal')

                map_dict = OrderedDict()
                flavint_groups = None
                for run in runs:
                    logging.info('Loading run {0} of systematic '
                                 '{1}'.format(run, sys))
                    dataset_param.value = ev_sys + '|' + run
                    template_maker.update_params(dataset_param)
                    template = template_maker.get_outputs(
                        idx=int(params['stop_after_stage'].m))
                    if not isinstance(template, Data):
                        raise AssertionError(
                            'Template output is not a Data object, instead is '
                            'type {0}'.format(type(template)))
                    if not template.contains_muons:
                        raise AssertionError(
                            'Template output does not contain muons')

                    output = template.histogram(
                        kinds='muons',
                        binning=fit_binning,
                        # NOTE: weights cancel in fraction
                        weights_col=None,
                        errors=False,
                        name='muons')
                    map_dict[run] = output

                nom_map = map_dict[nominal]
                fracdiff_map_dict = OrderedDict()
                for run in runs:
                    mask = ~(nom_map.hist == 0.)
                    div = np.zeros(nom_map.shape)
                    with np.errstate(divide='ignore', invalid='ignore'):
                        div[mask] = \
                            unp.nominal_values(map_dict[run].hist[mask]) /\
                            unp.nominal_values(nom_map.hist[mask])
                    fracdiff_map_dict[run] = Map(name='muons',
                                                 binning=nom_map.binning,
                                                 hist=div)

                delta_runs = np.array([float(x)
                                       for x in runs]) - float(nominal)

                coeff_binning = OneDimBinning(name='coeff',
                                              num_bins=degree,
                                              is_lin=True,
                                              domain=[-1, 1])
                combined_binning = fit_binning + coeff_binning

                pvals_hist = np.empty(map(int, combined_binning.shape),
                                      dtype=object)
                hists = [fracdiff_map_dict[run].hist for run in runs]
                zip_hists = np.dstack(hists)
                for idx in np.ndindex(fit_binning.shape):
                    y_values = []
                    y_sigma = []
                    for run in fracdiff_mapset_dict:
                        y_values.append(
                            unp.nominal_values(
                                fracdiff_mapset_dict[run][fig].hist[idx]))
                        y_sigma.append(
                            unp.std_devs(
                                fracdiff_mapset_dict[run][fig].hist[idx]))
                    if np.any(y_sigma):
                        popt, pcov = curve_fit(fit_func,
                                               delta_runs,
                                               y_values,
                                               sigma=y_sigma,
                                               p0=np.ones(degree))
                    else:
                        popt, pcov = curve_fit(fit_func,
                                               delta_runs,
                                               y_values,
                                               p0=np.ones(degree))
                    # perr = np.sqrt(np.diag(pcov))
                    # pvals = unp.uarray(popt, perr)
                    pvals_hist[idx] = popt
                pvals_hist = np.array(pvals_hist.tolist())
                params_map = Map(name='muons',
                                 binning=combined_binning,
                                 hist=pvals_hist)
                if sys in sys_fit_coeffs:
                    sys_fit_coeffs[sys] = MapSet(
                        [sys_fit_coeffs[sys], params_map])
                else:
                    sys_fit_coeffs[sys] = params_map

        return sys_fit_coeffs
Ejemplo n.º 4
0
def main():
    args = parse_args()
    set_verbosity(args.v)

    if args.plot:
        import matplotlib as mpl
        mpl.use('pdf')
        import matplotlib.pyplot as plt
        from pisa.utils.plotter import Plotter

    cfg = from_file(args.fit_settings)
    sys_list = cfg.get('general', 'sys_list').replace(' ', '').split(',')
    stop_idx = cfg.getint('general', 'stop_after_stage')


    for sys in sys_list:
        # Parse info for given systematic
        nominal = cfg.getfloat(sys, 'nominal')
        degree = cfg.getint(sys, 'degree')
        force_through_nominal = cfg.getboolean(sys, 'force_through_nominal')
        runs = eval(cfg.get(sys, 'runs'))
        #print "runs ", runs
        smooth = cfg.get(sys, 'smooth')

        x_values = np.array(sorted(runs))

        # Build fit function
        if force_through_nominal:
            function = "lambda x, *p: np.polynomial.polynomial.polyval(x, [1.] + list(p))"
        else:
            function = "lambda x, *p: np.polynomial.polynomial.polyval(x, list(p))"
            # Add free parameter for constant term
            degree += 1
        fit_fun = eval(function)

        # Instantiate template maker
        template_maker = Pipeline(args.template_settings)

        if not args.set_param == '':
            for one_set_param in args.set_param:
                p_name, value = one_set_param.split("=")
                #print "p_name,value= ", p_name, " ", value
                value = parse_quantity(value)
                value = value.n * value.units
                param = template_maker.params[p_name]
                #print "old ", p_name, "value = ", param.value
                param.value = value
                #print "new ", p_name, "value = ", param.value
                template_maker.update_params(param)

        inputs = {}
        map_names = None
        # Get sys templates
        for run in runs:
            for key, val in cfg.items('%s:%s'%(sys, run)):
                if key.startswith('param.'):
                    _, pname = key.split('.')
                    param = template_maker.params[pname]
                    try:
                        value = parse_quantity(val)
                        param.value = value.n * value.units
                    except ValueError:
                        value = parse_string_literal(val)
                        param.value = value
                    param.set_nominal_to_current_value()
                    template_maker.update_params(param)
            # Retreive maps
            template = template_maker.get_outputs(idx=stop_idx)
            if map_names is None: map_names = [m.name for m in template]
            inputs[run] = {}
            for m in template:
                inputs[run][m.name] = m.hist

        # Numpy acrobatics:
        arrays = {}
        for name in map_names:
            arrays[name] = []
            for x in x_values:
                arrays[name].append(
                    inputs[x][name] / unp.nominal_values(inputs[nominal][name])
                )
            a = np.array(arrays[name])
            arrays[name] = np.rollaxis(a, 0, len(a.shape))

        # Shift to get deltas
        x_values -= nominal

        # Binning object (assuming they're all the same)
        binning = template.maps[0].binning

        shape = [d.num_bins for d in binning] + [degree]
        shape_small = [d.num_bins for d in binning]

        outputs = {}
        errors = {}
        for name in map_names:
            # Now actualy perform some fits
            outputs[name] = np.ones(shape)
            errors[name] = np.ones(shape)


            for idx in np.ndindex(*shape_small):
                y_values = unp.nominal_values(arrays[name][idx])
                y_sigma = unp.std_devs(arrays[name][idx])
                if np.any(y_sigma):
                    popt, pcov = curve_fit(fit_fun, x_values, y_values,
                                           sigma=y_sigma, p0=np.ones(degree))
                else:
                    popt, pcov = curve_fit(fit_fun, x_values, y_values,
                                           p0=np.ones(degree))
                perr = np.sqrt(np.diag(pcov))
                for k, p in enumerate(popt):
                    outputs[name][idx][k] = p
                    errors[name][idx][k] = perr[k]

                # TODO(philippeller): the below block of code will fail

                # Maybe plot
                #if args.plot:
                #    fig_num = i + nx * j
                #    if fig_num == 0:
                #        fig = plt.figure(num=1, figsize=( 4*nx, 4*ny))
                #    subplot_idx = nx*(ny-1-j)+ i + 1
                #    plt.subplot(ny, nx, subplot_idx)
                #    #plt.snameter(x_values, y_values, color=plt_colors[name])
                #    plt.gca().errorbar(x_values, y_values, yerr=y_sigma,
                #                       fmt='o', color=plt_colors[name],
                #                       ecolor=plt_colors[name],
                #                       mec=plt_colors[name])
                #    # Plot nominal point again in black
                #    plt.snameter([0.0], [1.0], color='k')
                #    f_values = fit_fun(x_values, *popt)
                #    fun_plot, = plt.plot(x_values, f_values,
                #            color=plt_colors[name])
                #    plt.ylim(np.min(unp.nominal_values(arrays[name]))*0.9,
                #             np.max(unp.nominal_values(arrays[name]))*1.1)
                #    if i > 0:
                #        plt.setp(plt.gca().get_yticklabels(), visible=False)
                #    if j > 0:
                #        plt.setp(plt.gca().get_xticklabels(), visible=False)

        if smooth == 'gauss':
            for name in map_names:
                for d in range(degree):
                    outputs[name][...,d] = gaussian_filter(outputs[name][...,d],sigma=1)

        if smooth == 'gauss_pid':
            for name in map_names:
                split_idx = binning.names.index('pid')
                tot = len(binning)-1
                for d in range(degree):
                    for p in range(len(binning['pid'])):
                        outputs[name][...,p,d] = gaussian_filter(
                            np.swapaxes(outputs[name], split_idx, tot)[...,p,d],
                            sigma=1
                        )
                outputs[name] = np.swapaxes(outputs[name], split_idx, tot)

        # Save the raw ones anyway
        outputs['pname'] = sys
        outputs['nominal'] = nominal
        outputs['function'] = function
        outputs['map_names'] = map_names
        outputs['binning_hash'] = binning.hash
        to_file(outputs, '%s/%s_sysfits_%s_%s.json'%(args.out_dir, sys,
                                                     args.tag, smooth))

        if args.plot:
            for d in range(degree):
                maps = []
                for name in map_names:
                    maps.append(Map(name='%s_raw'%name, hist=outputs[name][...,d],
                                    binning=binning))
                maps = MapSet(maps)
                my_plotter = Plotter(
                    stamp='',
                    outdir=args.out_dir,
                    fmt='pdf',
                    log=False,
                    label=''
                )
                my_plotter.plot_2d_array(
                    maps,
                    fname='%s_%s_%s_%s'%(sys, args.tag, d, smooth),
                )
Ejemplo n.º 5
0
def run_interpolated_fit(fit_directory, job_idx, skip_successful=False):
    """Run the hypersurface fit for a grid point.

    If `skip_successful` is true, do not run if the `fit_successful` flag is already
    True.
    """

    #TODO a lot of this is copied from fit_hypersurfaces in hypersurface.py, would be safer to make more OAOO
    #TODO Copy the param value storage stuff from fit_hypersurfaces across in the meantime

    assert os.path.isdir(fit_directory), "fit directory does not exist"

    gridpoint_json = os.path.join(fit_directory,
                                  f"gridpoint_{job_idx:06d}.json.bz2")
    gridpoint_data = from_json(gridpoint_json)

    if skip_successful and gridpoint_data["fit_successful"]:
        logging.info(
            f"Fit at job index {job_idx} already successful, skipping...")
        return

    metadata = from_json(os.path.join(fit_directory, "metadata.json"))

    interpolation_param_spec = metadata["interpolation_param_spec"]

    # this is a pipeline configuration in the form of an OrderedDict
    nominal_dataset = metadata["nominal_dataset"]
    # Why can we still not load PISA objects from JSON that are inside a dict?! Grrr...
    nominal_dataset["pipeline_cfg"] = pipeline_cfg_from_states(
        nominal_dataset["pipeline_cfg"])
    # this is a list of pipeline configurations
    sys_datasets = metadata["sys_datasets"]
    for sys_dataset in sys_datasets:
        sys_dataset["pipeline_cfg"] = pipeline_cfg_from_states(
            sys_dataset["pipeline_cfg"])
    # this is a dict of param_name : value pairs
    param_values = gridpoint_data["param_values"]
    # we do a redundant check to make sure the parameter values at this grid point are
    # correct
    interpolation_param_names = metadata["interpolation_param_names"]
    grid_shape = tuple(metadata["grid_shape"])
    # the grid point index of this job
    grid_idx = list(np.ndindex(grid_shape))[job_idx]
    for i, n in enumerate(interpolation_param_names):
        ms = "Inconsistent parameter values at grid point!"
        assert interpolation_param_spec[n]["values"][
            grid_idx[i]] == param_values[n], ms

    # now we need to adjust the values of the parameter in all pipelines for this point
    logging.info(f"updating pipelines with parameter values: {param_values}")
    for dataset in [nominal_dataset] + sys_datasets:
        for stage_cfg in dataset["pipeline_cfg"].values():
            if "params" not in stage_cfg.keys(): continue
            for param in interpolation_param_names:
                if param in stage_cfg["params"].names:
                    stage_cfg["params"][param].value = param_values[param]

    # these are the parameters of the hypersurface, NOT the ones we interpolate them
    # over!
    hypersurface_params = []
    for param_state in metadata["hypersurface_params"]:
        hypersurface_params.append(HypersurfaceParam.from_state(param_state))

    def find_hist_stage(pipeline):
        """Locate the index of the hist stage in a pipeline."""
        hist_idx_found = False
        for i, s in enumerate(pipeline.stages):
            if s.__class__.__name__ == "hist":
                hist_idx = i
                hist_idx_found = True
                break
        if not hist_idx_found:
            raise RuntimeError(
                "Could not find histogram stage in pipeline, aborting.")
        return hist_idx

    # We create Pipeline objects, get their outputs and then forget about the Pipeline
    # object on purpose! The memory requirement to hold all systematic sets at the same
    # time is just too large, especially on the cluster. The way we do it below we
    # only need enough memory for one dataset at a time.

    for dataset in [nominal_dataset] + sys_datasets:
        pipeline = Pipeline(dataset["pipeline_cfg"])
        dataset["mapset"] = pipeline.get_outputs()
        # get the un-weighted event counts as well so that we can exclude bins
        # with too little statistics
        # First, find out which stage is the hist stage
        hist_idx = find_hist_stage(pipeline)
        pipeline.stages[hist_idx].unweighted = True
        dataset["mapset_unweighted"] = pipeline.get_outputs()
    del pipeline

    # Merge maps according to the combine regex, if one was provided
    combine_regex = metadata["combine_regex"]
    if combine_regex is not None:
        for dataset in [nominal_dataset] + sys_datasets:
            dataset["mapset"] = dataset["mapset"].combine_re(combine_regex)
            dataset["mapset_unweighted"] = dataset[
                "mapset_unweighted"].combine_re(combine_regex)

    minimum_mc = metadata["minimum_mc"]
    # Remove bins (i.e. set their count to zero) that have too few MC events
    for dataset in sys_datasets + [nominal_dataset]:
        for map_name in dataset["mapset"].names:
            insuff_mc = dataset["mapset_unweighted"][
                map_name].nominal_values < minimum_mc
            # Setting the hist to zero sets both nominal value and std_dev to zero
            dataset["mapset"][map_name].hist[insuff_mc] = 0.

    hypersurface_fit_kw = metadata["hypersurface_fit_kw"]
    hypersurfaces = collections.OrderedDict()
    log = metadata[
        "log"]  # flag determining whether hs fit is run in log-space or not
    for map_name in nominal_dataset["mapset"].names:
        nominal_map = nominal_dataset["mapset"][map_name]
        nominal_param_values = nominal_dataset["sys_params"]

        sys_maps = [
            sys_dataset["mapset"][map_name] for sys_dataset in sys_datasets
        ]
        sys_param_values = [
            sys_dataset["sys_params"] for sys_dataset in sys_datasets
        ]

        hypersurface = Hypersurface(
            # Yes, this MUST be a deepcopy! Otherwise weird memory overwrites happen
            # and all the numbers get jumbled across the hypersurfaces of different maps
            params=copy.deepcopy(hypersurface_params),
            initial_intercept=0. if log else 1.,  # Initial value for intercept
            log=log)

        hypersurface.fit(
            nominal_map=nominal_map,
            nominal_param_values=nominal_param_values,
            sys_maps=sys_maps,
            sys_param_values=sys_param_values,
            norm=True,
            # Is the space or loading time really a problem?
            # keep_maps=False,  # it would take a lot more space otherwise
            **hypersurface_fit_kw)

        logging.debug("\nFitted hypersurface report:\n%s" % hypersurface)
        hypersurfaces[map_name] = hypersurface

    gridpoint_data["hs_fit"] = hypersurfaces
    gridpoint_data["fit_successful"] = True

    to_json(gridpoint_data, gridpoint_json)
Ejemplo n.º 6
0
def test_example_pipelines(ignore_gpu=False,
                           ignore_root=False,
                           ignore_missing_data=False):
    """Run example pipelines.

    Parameters
    ----------
    ignore_gpu : bool
        Do not count errors initializing a GPU as failures

    ignore_root : bool
        Do not count errors importing ROOT as failures

    ignore_missing_data : bool
        Do not count errors due to missing data files as failures

    """
    # Set up the lists of strings needed to search the error messages for
    # things to ignore e.g. cuda stuff and ROOT stuff
    root_err_strings = ['ROOT', 'Roo', 'root', 'roo']
    cuda_err_strings = ['cuda']
    missing_data_string = ('Could not find resource "(.*)" in'
                           ' filesystem OR in PISA package.')

    example_directory = find_resource('settings/pipeline')
    settings_files = glob.glob(example_directory + '/*example*.cfg')

    num_configs = len(settings_files)
    failure_count = 0
    skip_count = 0

    for settings_file in settings_files:
        allow_error = False
        msg = ''
        try:
            logging.info('Instantiating pipeline from file "%s" ...',
                         settings_file)
            pipeline = Pipeline(settings_file)
            logging.info('    retrieving outputs...')
            _ = pipeline.get_outputs()

        except ImportError as err:
            exc = sys.exc_info()
            if any(errstr in err.message for errstr in root_err_strings) and \
              ignore_root:
                skip_count += 1
                allow_error = True
                msg = ('    Skipping pipeline, %s, as it has ROOT dependencies'
                       ' (ROOT cannot be imported)' % settings_file)
            elif any(errstr in err.message for errstr in cuda_err_strings) and \
              ignore_gpu:
                skip_count += 1
                allow_error = True
                msg = ('    Skipping pipeline, %s, as it has cuda dependencies'
                       ' (pycuda cannot be imported)' % settings_file)
            else:
                failure_count += 1

        except IOError as err:
            exc = sys.exc_info()
            match = re.match(missing_data_string, err.message, re.M | re.I)
            if match is not None and ignore_missing_data:
                skip_count += 1
                allow_error = True
                msg = ('    Skipping pipeline, %s, as it has data that cannot'
                       ' be found in the local PISA environment' %
                       settings_file)
            else:
                failure_count += 1

        except:  # pylint: disable=bare-except
            exc = sys.exc_info()
            failure_count += 1

        else:
            exc = None

        finally:
            if exc is not None:
                if allow_error:
                    logging.warning(msg)
                else:
                    logging.error(
                        '    FAILURE! %s failed to run. Please review the'
                        ' error message below and fix the problem. Continuing'
                        ' with any other configs now...', settings_file)
                    for line in format_exception(*exc):
                        for sub_line in line.splitlines():
                            logging.error(' ' * 4 + sub_line)
            else:
                logging.info('    Seems fine!')

    if skip_count > 0:
        logging.warning('%d of %d example pipeline config files were skipped',
                        skip_count, num_configs)

    if failure_count > 0:
        msg = ('<< FAIL : test_example_pipelines : (%d of %d EXAMPLE PIPELINE'
               ' CONFIG FILES FAILED) >>' % (failure_count, num_configs))
        logging.error(msg)
        raise Exception(msg)

    logging.info('<< PASS : test_example_pipelines >>')
Ejemplo n.º 7
0
def test_example_pipelines(path="settings/pipeline", verbosity=Levels.WARN):
    """Run pipelines from any "*.cfg" config files found at `path`."""
    path = find_resource(path)
    settings_files = glob.glob(path + "/*.cfg")

    failures = []
    successes = []

    for settings_file in settings_files:
        try:
            # NOTE: Force output of info on which settings file is being
            # instantiated and run, as warnings emitted by individual stages
            # are not as useful if we don't know which pipeline config is being
            # run

            set_verbosity(Levels.INFO)
            logging.info(f'Instantiating Pipeline with "{settings_file}" ...')

            set_verbosity(Levels.WARN)
            pipeline = Pipeline(settings_file)

            set_verbosity(Levels.INFO)
            logging.info(
                f'Running Pipeline instantiated from "{settings_file}" ...')

            set_verbosity(Levels.WARN)
            pipeline.get_outputs()

        except Exception as err:
            failures.append(settings_file)

            msg = f"<< FAILURE IN PIPELINE : {settings_file} >>"
            set_verbosity(verbosity)
            logging.error("=" * len(msg))
            logging.error(msg)
            logging.error("=" * len(msg))

            # Reproduce the error with full output

            set_verbosity(Levels.TRACE)
            try:
                pipeline = Pipeline(settings_file)
                pipeline.get_outputs()
            except Exception:
                pass

            set_verbosity(Levels.TRACE)
            logging.exception(err)

            set_verbosity(verbosity)
            logging.error("#" * len(msg))

        else:
            successes.append(settings_file)

        finally:
            set_verbosity(verbosity)

    # Summarize results

    set_verbosity(verbosity)
    logging.info("<< EXAMPLE PIPELINES : "
                 f"{len(successes)} succeeded and {len(failures)} failed >>")

    # Exit with error if any failures

    if failures:
        raise Exception(f"{len(failures)} example pipeline(s) failed:\n  " +
                        ", ".join(f'"{f}"' for f in failures))
Ejemplo n.º 8
0
def compare(outdir,
            ref,
            ref_label,
            test,
            test_label,
            asymm_max=None,
            asymm_min=None,
            combine=None,
            diff_max=None,
            diff_min=None,
            fract_diff_max=None,
            fract_diff_min=None,
            json=False,
            pdf=False,
            png=False,
            ref_abs=False,
            ref_param_selections=None,
            sum=None,
            test_abs=False,
            test_param_selections=None):
    """Compare two entities. The result each entity specification is
    formatted into a MapSet and stored to disk, so that e.g. re-running
    a DistributionMaker is unnecessary to reproduce the results.

    Parameters
    ----------
    outdir : string
        Store output plots to this directory

    ref : string or array of strings
        Pipeline settings config file that generates reference output,
        or a stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    ref_abs : bool
        Use the absolute value of the reference plot for comparisons

    ref_label : string
        Label for reference

    ref_param-selections : string
        Param selections to apply to ref pipeline config(s). Not
        applicable if ref specifies stored map or map sets

    test : string or array of strings
        Pipeline settings config file that generates test output, or a
        stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    test_abs : bool
        Use the absolute value of the test plot for comparisons

    test_label : string
        Label for test

    test_param_selections : None or string
        Param selections to apply to test pipeline config(s). Not
        applicable if test specifies stored map or map sets

    combine : None or string or array of strings
        Combine by wildcard string, where string globbing (a la command
        line) uses asterisk for any number of wildcard characters. Use
        single quotes such that asterisks do not get expanded by the
        shell. Multiple combine strings supported

    sum : None or int
        Sum over (and hence remove) the specified axis or axes. I.e.,
        project the map onto remaining (unspecified) axis or axes

    json : bool
        Save output maps in compressed json (json.bz2) format

    pdf : bool
        Save plots in PDF format. If neither this nor png is
        specified, no plots are produced

    png : bool
        Save plots in PNG format. If neither this nor pdf is specfied,
        no plots are produced

    diff_min : None or float
        Difference plot vmin; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    diff_max : None or float
        Difference plot max; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    fract_diff_min : None or float
        Fractional difference plot vmin; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    fract_diff_max : None or float
        Fractional difference plot max; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    asymm_min : None or float
        Asymmetry plot vmin; if you specify only one of asymm_min or
        asymm_max, symmetric limits are automatically used (min = -max)

    asymm_max : None or float
        Fractional difference plot max; if you specify only one of
        asymm_min or asymm_max, symmetric limits are automatically used
        (min = -max)

    Returns
    -------
    summary_stats : dict
        Dictionary containing a summary for each h Map processed

    diff : MapSet
        MapSet of the difference
        - (Test - Ref)

    fract_diff : MapSet
        MapSet of the fractional difference
        - (Test - Ref) / Ref

    asymm : MapSet
        MapSet of the asymmetric fraction difference or pull
        - (Test - Ref) / sqrt(Ref)

    """
    ref_plot_label = ref_label
    if ref_abs and not ref_label.startswith('abs'):
        ref_plot_label = 'abs(%s)' % ref_plot_label
    test_plot_label = test_label
    if test_abs and not test_label.startswith('abs'):
        test_plot_label = 'abs(%s)' % test_plot_label

    plot_formats = []
    if pdf:
        plot_formats.append('pdf')
    if png:
        plot_formats.append('png')

    diff_symm = True
    if diff_min is not None and diff_max is None:
        diff_max = -diff_min
        diff_symm = False
    if diff_max is not None and diff_min is None:
        diff_min = -diff_max
        diff_symm = False

    fract_diff_symm = True
    if fract_diff_min is not None and fract_diff_max is None:
        fract_diff_max = -fract_diff_min
        fract_diff_symm = False
    if fract_diff_max is not None and fract_diff_min is None:
        fract_diff_min = -fract_diff_max
        fract_diff_symm = False

    asymm_symm = True
    if asymm_max is not None and asymm_min is None:
        asymm_min = -asymm_max
        asymm_symm = False
    if asymm_min is not None and asymm_max is None:
        asymm_max = -asymm_min
        asymm_symm = False

    outdir = os.path.expanduser(os.path.expandvars(outdir))
    mkdir(outdir)

    # Get the reference distribution(s) into the form of a test MapSet
    p_ref = None
    ref_source = None
    if isinstance(ref, Map):
        p_ref = MapSet(ref)
        ref_source = MAP_SOURCE_STR
    elif isinstance(ref, MapSet):
        p_ref = ref
        ref_source = MAPSET_SOURCE_STR
    elif isinstance(ref, Pipeline):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = PIPELINE_SOURCE_STR
    elif isinstance(ref, DistributionMaker):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(ref) == 1:
            try:
                ref_pipeline = Pipeline(config=ref[0])
            except:
                pass
            else:
                ref_source = PIPELINE_SOURCE_STR
                if ref_param_selections is not None:
                    ref_pipeline.select_params(ref_param_selections)
                p_ref = ref_pipeline.get_outputs()
        else:
            try:
                ref_dmaker = DistributionMaker(pipelines=ref)
            except:
                pass
            else:
                ref_source = DISTRIBUTIONMAKER_SOURCE_STR
                if ref_param_selections is not None:
                    ref_dmaker.select_params(ref_param_selections)
                p_ref = ref_dmaker.get_outputs()

    if p_ref is None:
        try:
            p_ref = [Map.from_json(f) for f in ref]
        except:
            pass
        else:
            ref_source = MAP_SOURCE_STR
            p_ref = MapSet(p_ref)

    if p_ref is None:
        assert ref_param_selections is None
        assert len(ref) == 1, 'Can only handle one MapSet'
        try:
            p_ref = MapSet.from_json(ref[0])
        except:
            pass
        else:
            ref_source = MAPSET_SOURCE_STR

    if p_ref is None:
        raise ValueError(
            'Could not instantiate the reference Pipeline, DistributionMaker,'
            ' Map, or MapSet from ref value(s) %s' % ref)
    ref = p_ref

    logging.info('Reference map(s) derived from a ' + ref_source)

    # Get the test distribution(s) into the form of a test MapSet
    p_test = None
    test_source = None
    if isinstance(test, Map):
        p_test = MapSet(test)
        test_source = MAP_SOURCE_STR
    elif isinstance(test, MapSet):
        p_test = test
        test_source = MAPSET_SOURCE_STR
    elif isinstance(test, Pipeline):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = PIPELINE_SOURCE_STR
    elif isinstance(test, DistributionMaker):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(test) == 1:
            try:
                test_pipeline = Pipeline(config=test[0])
            except:
                pass
            else:
                test_source = PIPELINE_SOURCE_STR
                if test_param_selections is not None:
                    test_pipeline.select_params(test_param_selections)
                p_test = test_pipeline.get_outputs()
        else:
            try:
                test_dmaker = DistributionMaker(pipelines=test)
            except:
                pass
            else:
                test_source = DISTRIBUTIONMAKER_SOURCE_STR
                if test_param_selections is not None:
                    test_dmaker.select_params(test_param_selections)
                p_test = test_dmaker.get_outputs()

    if p_test is None:
        try:
            p_test = [Map.from_json(f) for f in test]
        except:
            pass
        else:
            test_source = MAP_SOURCE_STR
            p_test = MapSet(p_test)

    if p_test is None:
        assert test_param_selections is None
        assert len(test) == 1, 'Can only handle one MapSet'
        try:
            p_test = MapSet.from_json(test[0])
        except:
            pass
        else:
            test_source = MAPSET_SOURCE_STR

    if p_test is None:
        raise ValueError(
            'Could not instantiate the test Pipeline, DistributionMaker, Map,'
            ' or MapSet from test value(s) %s' % test)
    test = p_test

    logging.info('Test map(s) derived from a ' + test_source)

    if combine is not None:
        ref = ref.combine_wildcard(combine)
        test = test.combine_wildcard(combine)
        if isinstance(ref, Map):
            ref = MapSet([ref])
        if isinstance(test, Map):
            test = MapSet([test])

    if sum is not None:
        ref = ref.sum(sum)
        test = test.sum(sum)

    # Set the MapSet names according to args passed by user
    ref.name = ref_label
    test.name = test_label

    # Save to disk the maps being plotted (excluding optional aboslute value
    # operations)
    if json:
        refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label)
        to_file(ref, refmaps_path)

        testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label)
        to_file(test, testmaps_path)

    if set(test.names) != set(ref.names):
        raise ValueError('Test map names %s do not match ref map names %s.' %
                         (sorted(test.names), sorted(ref.names)))

    # Aliases to save keystrokes
    def masked(x):
        return np.ma.masked_invalid(x.nominal_values)

    def zero_to_nan(map):
        newmap = deepcopy(map)
        mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON)
        newmap.hist[mask] = np.nan
        return newmap

    reordered_test = []
    new_ref = []
    diff_maps = []
    fract_diff_maps = []
    asymm_maps = []
    summary_stats = {}
    for ref_map in ref:
        test_map = test[ref_map.name].reorder_dimensions(ref_map.binning)
        if ref_abs:
            ref_map = abs(ref_map)
        if test_abs:
            test_map = abs(test_map)

        diff_map = test_map - ref_map
        fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map)
        asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5)
        abs_fract_diff_map = np.abs(fract_diff_map)

        new_ref.append(ref_map)
        reordered_test.append(test_map)
        diff_maps.append(diff_map)
        fract_diff_maps.append(fract_diff_map)
        asymm_maps.append(asymm_map)

        min_ref = np.min(masked(ref_map))
        max_ref = np.max(masked(ref_map))

        min_test = np.min(masked(test_map))
        max_test = np.max(masked(test_map))

        total_ref = np.sum(masked(ref_map))
        total_test = np.sum(masked(test_map))

        mean_ref = np.mean(masked(ref_map))
        mean_test = np.mean(masked(test_map))

        max_abs_fract_diff = np.max(masked(abs_fract_diff_map))
        mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map))
        median_abs_fract_diff = np.median(masked(abs_fract_diff_map))

        mean_fract_diff = np.mean(masked(fract_diff_map))
        min_fract_diff = np.min(masked(fract_diff_map))
        max_fract_diff = np.max(masked(fract_diff_map))
        std_fract_diff = np.std(masked(fract_diff_map))

        mean_diff = np.mean(masked(diff_map))
        min_diff = np.min(masked(diff_map))
        max_diff = np.max(masked(diff_map))
        std_diff = np.std(masked(diff_map))

        median_diff = np.nanmedian(masked(diff_map))
        mad_diff = np.nanmedian(masked(np.abs(diff_map)))
        median_fract_diff = np.nanmedian(masked(fract_diff_map))
        mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map)))

        min_asymm = np.min(masked(fract_diff_map))
        max_asymm = np.max(masked(fract_diff_map))

        total_asymm = np.sqrt(np.sum(masked(asymm_map)**2))

        summary_stats[test_map.name] = OrderedDict([
            ('min_ref', min_ref),
            ('max_ref', max_ref),
            ('total_ref', total_ref),
            ('mean_ref', mean_ref),
            ('min_test', min_test),
            ('max_test', max_test),
            ('total_test', total_test),
            ('mean_test', mean_test),
            ('max_abs_fract_diff', max_abs_fract_diff),
            ('mean_abs_fract_diff', mean_abs_fract_diff),
            ('median_abs_fract_diff', median_abs_fract_diff),
            ('min_fract_diff', min_fract_diff),
            ('max_fract_diff', max_fract_diff),
            ('mean_fract_diff', mean_fract_diff),
            ('std_fract_diff', std_fract_diff),
            ('median_fract_diff', median_fract_diff),
            ('mad_fract_diff', mad_fract_diff),
            ('min_diff', min_diff),
            ('max_diff', max_diff),
            ('mean_diff', mean_diff),
            ('std_diff', std_diff),
            ('median_diff', median_diff),
            ('mad_diff', mad_diff),
            ('min_asymm', min_asymm),
            ('max_asymm', max_asymm),
            ('total_asymm', total_asymm),
        ])

        logging.info('Map %s...', ref_map.name)
        logging.info('  Ref map(s):')
        logging.info('    min   :' + ('%.2f' % min_ref).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_ref).rjust(12))
        logging.info('    total :' + ('%.2f' % total_ref).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_ref).rjust(12))
        logging.info('  Test map(s):')
        logging.info('    min   :' + ('%.2f' % min_test).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_test).rjust(12))
        logging.info('    total :' + ('%.2f' % total_test).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_test).rjust(12))
        logging.info('  Absolute fract. diff., abs((Test - Ref) / Ref):')
        logging.info('    max   : %.4e', max_abs_fract_diff)
        logging.info('    mean  : %.4e', mean_abs_fract_diff)
        logging.info('    median: %.4e', median_abs_fract_diff)
        logging.info('  Fractional difference, (Test - Ref) / Ref:')
        logging.info('    min   : %.4e', min_fract_diff)
        logging.info('    max   : %.4e', max_fract_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_fract_diff,
                     std_fract_diff)
        logging.info('    median: %.4e +/- %.4e', median_fract_diff,
                     mad_fract_diff)
        logging.info('  Difference, Test - Ref:')
        logging.info('    min   : %.4e', min_diff)
        logging.info('    max   : %.4e', max_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_diff, std_diff)
        logging.info('    median: %.4e +/- %.4e', median_diff, mad_diff)
        logging.info('  Asymmetry, (Test - Ref) / sqrt(Ref)')
        logging.info('    min   : %.4e', min_asymm)
        logging.info('    max   : %.4e', max_asymm)
        logging.info('    total : %.4e (sum in quadrature)', total_asymm)
        logging.info('')

    ref = MapSet(new_ref)
    test = MapSet(reordered_test)
    diff = MapSet(diff_maps)
    fract_diff = MapSet(fract_diff_maps)
    asymm = MapSet(asymm_maps)

    if json:
        diff.to_json(
            os.path.join(
                outdir,
                'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))
        fract_diff.to_json(
            os.path.join(
                outdir, 'fract_diff__%s___%s.json.bz2' %
                (test_plot_label, ref_plot_label)))
        asymm.to_json(
            os.path.join(
                outdir,
                'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label)))
        to_file(
            summary_stats,
            os.path.join(
                outdir,
                'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))

    for plot_format in plot_formats:
        # Plot the raw distributions
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=False,
                          ratio=False)
        plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label)
        plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label)

        # Plot the difference (test - ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=diff_symm,
                          ratio=False)
        plotter.label = '%s - %s' % (test_plot_label, ref_plot_label)
        plotter.plot_2d_array(
            test - ref,
            fname='diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=diff_min, vmax=diff_max
        )

        # Plot the fractional difference (test - ref)/ref
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=fract_diff_symm,
                          ratio=True)
        plotter.label = ('(%s-%s)/%s' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r) for r in ref]),
            fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=fract_diff_min, vmax=fract_diff_max
        )

        # Plot the asymmetry (test - ref)/sqrt(ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=asymm_symm,
                          ratio=True)
        plotter.label = (r'$(%s - %s)/\sqrt{%s}$' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]),
            fname='asymm__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=asymm_min, vmax=asymm_max
        )

    return summary_stats, diff, fract_diff, asymm
def compare_pisa_self(config1, config2, testname1, testname2, outdir):
    """Compare baseline output of PISA 3 with a different version of itself"""
    logging.debug('>> Comparing %s with %s (both PISA)' %
                  (testname1, testname2))

    pipeline1 = Pipeline(config1)
    outputs1 = pipeline1.get_outputs()
    pipeline2 = Pipeline(config2)
    outputs2 = pipeline2.get_outputs()

    if '5-stage' in testname1:
        cake1_trck_map = outputs1.combine_wildcard('*_trck')
        cake1_cscd_map = outputs1.combine_wildcard('*_cscd')
        cake1_trck_map_to_plot = {}
        cake1_trck_map_to_plot['ebins'] = \
            cake1_trck_map.binning['reco_energy'].bin_edges.magnitude
        cake1_trck_map_to_plot['czbins'] = \
            cake1_trck_map.binning['reco_coszen'].bin_edges.magnitude
        cake1_trck_map_to_plot['map'] = cake1_trck_map.hist
        cake1_trck_events = np.sum(cake1_trck_map_to_plot['map'])
        cake1_cscd_map_to_plot = {}
        cake1_cscd_map_to_plot['ebins'] = \
            cake1_cscd_map.binning['reco_energy'].bin_edges.magnitude
        cake1_cscd_map_to_plot['czbins'] = \
            cake1_cscd_map.binning['reco_coszen'].bin_edges.magnitude
        cake1_cscd_map_to_plot['map'] = cake1_cscd_map.hist
        cake1_cscd_events = np.sum(cake1_cscd_map_to_plot['map'])
    elif '4-stage' in testname1:
        cake1_both_map = outputs1.combine_wildcard('*')
        cake1_trck_map_to_plot = {}
        cake1_trck_map_to_plot['ebins'] = \
            cake1_both_map.binning['reco_energy'].bin_edges.magnitude
        cake1_trck_map_to_plot['czbins'] = \
            cake1_both_map.binning['reco_coszen'].bin_edges.magnitude
        cake1_trck_map_to_plot['map'] = \
            cake1_both_map.split(
                dim='pid',
                bin='trck'
            ).hist
        cake1_trck_events = np.sum(cake1_trck_map_to_plot['map'])
        cake1_cscd_map_to_plot = {}
        cake1_cscd_map_to_plot['ebins'] = \
            cake1_both_map.binning['reco_energy'].bin_edges.magnitude
        cake1_cscd_map_to_plot['czbins'] = \
            cake1_both_map.binning['reco_coszen'].bin_edges.magnitude
        cake1_cscd_map_to_plot['map'] = \
            cake1_both_map.split(
                dim='pid',
                bin='cscd'
            ).hist
        cake1_cscd_events = np.sum(cake1_cscd_map_to_plot['map'])
    else:
        raise ValueError("Should be comparing 4-stage or 5-stage PISAs.")

    if '5-stage' in testname2:
        cake2_trck_map = outputs2.combine_wildcard('*_trck')
        cake2_cscd_map = outputs2.combine_wildcard('*_cscd')
        cake2_trck_map_to_plot = {}
        cake2_trck_map_to_plot['ebins'] = \
            cake2_trck_map.binning['reco_energy'].bin_edges.magnitude
        cake2_trck_map_to_plot['czbins'] = \
            cake2_trck_map.binning['reco_coszen'].bin_edges.magnitude
        cake2_trck_map_to_plot['map'] = cake2_trck_map.hist
        cake2_trck_events = np.sum(cake2_trck_map_to_plot['map'])
        cake2_cscd_map_to_plot = {}
        cake2_cscd_map_to_plot['ebins'] = \
            cake2_cscd_map.binning['reco_energy'].bin_edges.magnitude
        cake2_cscd_map_to_plot['czbins'] = \
            cake2_cscd_map.binning['reco_coszen'].bin_edges.magnitude
        cake2_cscd_map_to_plot['map'] = cake2_cscd_map.hist
        cake2_cscd_events = np.sum(cake2_cscd_map_to_plot['map'])
    elif '4-stage' in testname2:
        cake2_both_map = outputs2.combine_wildcard('*')
        cake2_trck_map_to_plot = {}
        cake2_trck_map_to_plot['ebins'] = \
            cake2_both_map.binning['reco_energy'].bin_edges.magnitude
        cake2_trck_map_to_plot['czbins'] = \
            cake2_both_map.binning['reco_coszen'].bin_edges.magnitude
        cake2_trck_map_to_plot['map'] = \
            cake2_both_map.split(
                dim='pid',
                bin='trck'
            ).hist
        cake2_trck_events = np.sum(cake2_trck_map_to_plot['map'])
        cake2_cscd_map_to_plot = {}
        cake2_cscd_map_to_plot['ebins'] = \
            cake2_both_map.binning['reco_energy'].bin_edges.magnitude
        cake2_cscd_map_to_plot['czbins'] = \
            cake2_both_map.binning['reco_coszen'].bin_edges.magnitude
        cake2_cscd_map_to_plot['map'] = \
            cake2_both_map.split(
                dim='pid',
                bin='cscd'
            ).hist
        cake2_cscd_events = np.sum(cake2_cscd_map_to_plot['map'])
    else:
        raise ValueError("Should be comparing 4-stage or 5-stage PISAs.")

    max_diff_ratio, max_diff = plot_comparisons(ref_map=cake1_trck_map_to_plot,
                                                new_map=cake2_trck_map_to_plot,
                                                ref_abv=testname1,
                                                new_abv=testname2,
                                                outdir=outdir,
                                                subdir='recopidcombinedchecks',
                                                stagename=None,
                                                servicename='recopid',
                                                name='trck',
                                                texname=r'\rm{trck}',
                                                shorttitles=True,
                                                ftype=FMT)

    max_diff_ratio, max_diff = plot_comparisons(ref_map=cake1_cscd_map_to_plot,
                                                new_map=cake2_cscd_map_to_plot,
                                                ref_abv=testname1,
                                                new_abv=testname2,
                                                outdir=outdir,
                                                subdir='recopidcombinedchecks',
                                                stagename=None,
                                                servicename='recopid',
                                                name='cscd',
                                                texname=r'\rm{cscd}',
                                                shorttitles=True,
                                                ftype=FMT)

    print_event_rates(testname1=testname1,
                      testname2=testname2,
                      kind='trck',
                      map1_events=cake1_trck_events,
                      map2_events=cake2_trck_events)
    print_event_rates(testname1=testname1,
                      testname2=testname2,
                      kind='cscd',
                      map1_events=cake1_cscd_events,
                      map2_events=cake2_cscd_events)

    print_event_rates(testname1=testname1,
                      testname2=testname2,
                      kind='all',
                      map1_events=cake1_trck_events + cake1_cscd_events,
                      map2_events=cake2_trck_events + cake2_cscd_events)

    return pipeline2
def compare_4stage(config, testname, outdir, oscfitfile):
    """
    Compare 4 stage output of PISA 3 with OscFit.
    """
    logging.debug('>> Working on baseline comparisons between both fitters.')
    logging.debug('>>> Doing %s test.' % testname)
    baseline_comparisons = from_file(oscfitfile)
    ref_abv = 'OscFit'

    pipeline = Pipeline(config)
    outputs = pipeline.get_outputs()

    total_pisa_events = 0.0
    total_oscfit_events = 0.0

    for nukey in baseline_comparisons.keys():

        baseline_map_to_plot = baseline_comparisons[nukey]
        oscfit_events = np.sum(baseline_map_to_plot['map'])

        cake_map = outputs.combine_wildcard('*')
        cake_map_to_plot = {}
        cake_map_to_plot['ebins'] = \
                cake_map.binning['reco_energy'].bin_edges.magnitude
        cake_map_to_plot['czbins'] = \
                cake_map.binning['reco_coszen'].bin_edges.magnitude
        if nukey == 'trck':
            texname = r'\rm{trck}'
            cake_map_to_plot['map'] = \
                cake_map.split(
                    dim='pid',
                    bin='trck'
                ).hist
        elif nukey == 'cscd':
            texname = r'\rm{cscd}'
            cake_map_to_plot['map'] = \
                cake_map.split(
                    dim='pid',
                    bin='cscd'
                ).hist
        pisa_events = np.sum(cake_map_to_plot['map'])

        max_diff_ratio, max_diff = plot_comparisons(
            ref_map=baseline_map_to_plot,
            new_map=cake_map_to_plot,
            ref_abv=ref_abv,
            new_abv=testname,
            outdir=outdir,
            subdir='recopidcombinedchecks',
            stagename=None,
            servicename='baseline',
            name=nukey,
            texname=texname,
            shorttitles=True,
            ftype=FMT)

        print_event_rates(testname1=testname,
                          testname2='OscFit',
                          kind=nukey,
                          map1_events=pisa_events,
                          map2_events=oscfit_events)

        total_pisa_events += pisa_events
        total_oscfit_events += oscfit_events

    print_event_rates(testname1=testname,
                      testname2='OscFit',
                      kind='all',
                      map1_events=total_pisa_events,
                      map2_events=total_oscfit_events)

    return pipeline
Ejemplo n.º 11
0
    def _compute_outputs(self, inputs=None):
        """Compute histograms for output channels."""
        logging.debug('Entering roounfold._compute_outputs')
        self.fit_hash = deepcopy(inputs.metadata['fit_hash'])
        logging.trace('{0} roounfold fit_hash = '
                      '{1}'.format(inputs.metadata['name'], self.fit_hash))
        if self.random_state is not None:
            logging.trace('{0} roounfold random_state = '
                          '{1}'.format(inputs.metadata['name'],
                                       hash_obj(
                                           self.random_state.get_state())))
        if not isinstance(inputs, Data):
            raise AssertionError('inputs is not a Data object, instead is '
                                 'type {0}'.format(type(inputs)))
        self._data = inputs

        if not self.params['return_eff'].value:
            if len(self.output_names) > 1:
                raise AssertionError(
                    'Specified more than one NuFlavIntGroup as '
                    'signal, {0}'.format(self.output_names))
            self.output_str = str(self.output_names[0])

        real_data = self.params['real_data'].value
        if real_data:
            logging.debug('Using real data')
            if 'nuall' not in self._data:
                raise AssertionError(
                    'When using real data, input Data object must contain '
                    'only one element "nuall" containing the data, instead it '
                    'contains elements {0}'.format(self._data.keys()))
            if self.disk_cache is None:
                raise AssertionError(
                    'No disk_cache specified from which to load - using real '
                    'data requires object such as the response object to be '
                    'cached to disk.')

        if self.params['optimize_reg'].value and real_data:
            raise AssertionError(
                'Cannot optimize the regularation if using real data.')
        if int(self.params['stat_fluctuations'].m) != 0 and real_data:
            raise AssertionError(
                'Cannot do poisson fluctuations if using real data.')
        if self.params['return_eff'].value and real_data:
            raise AssertionError(
                'Not implemented return of efficiency maps if using real data.'
            )

        if self.params['return_eff'].value:
            fin_data = self._data
            # Load generator level data for signal
            unfold_pipeline_cfg = self.params['unfold_pipeline_cfg'].value
            pipeline_cfg = from_file(unfold_pipeline_cfg)
            template_maker = Pipeline(pipeline_cfg)
            gen_data = template_maker.get_outputs()

            fin_data = fin_data.transform_groups(self.output_names)
            gen_data = gen_data.transform_groups(self.output_names)

            efficiencies = []
            assert set(fin_data.keys()) == set(gen_data.keys())
            for fig in fin_data.keys():
                figd_f = fin_data[fig]
                figd_g = gen_data[fig]
                inv_eff = self._get_inv_eff(figd_f, figd_g, self.true_binning,
                                            fig)

                i_mask = ~(inv_eff == 0.)
                eff = unp.uarray(np.zeros(self.true_binning.shape),
                                 np.zeros(self.true_binning.shape))
                eff[i_mask] = 1. / inv_eff[i_mask]
                efficiencies.append(
                    Map(name=fig, hist=eff, binning=self.true_binning))
            return MapSet(efficiencies)

        # TODO(shivesh): [   TRACE] None of the selections ['iron', 'nh'] found in this pipeline.
        # TODO(shivesh): Fix "smearing_matrix" memory leak
        # TODO(shivesh): Fix unweighted unfolding
        # TODO(shivesh): different algorithms
        # TODO(shivesh): implement handling of 0 division inside Map objects
        if real_data:
            unfold_map = self.unfold_real_data()
        else:
            unfold_map = self.unfold_mc()

        return MapSet([unfold_map])
Ejemplo n.º 12
0
def main():
    global SIGMA
    args = vars(parse_args())
    set_verbosity(args.pop('v'))
    center_zero = args.pop('center_zero')

    make_pdf = False
    if args['pdf']:
        make_pdf = True
        args['pdf'] = False

    outdir = args.pop('outdir')
    fileio.mkdir(outdir, mode=0755)
    SIGMA *= args.pop('sigma')

    cfx_pipe = Pipeline(args.pop('cfx_pipeline'))

    signal = args.pop('signal').replace(' ', '').split(',')
    output_str = []
    for name in signal:
        if 'muons' in name or 'noise' in name:
            raise AssertionError('Are you trying to unfold muons/noise?')
        elif 'all_nu' in name:
            output_str = [str(NuFlavIntGroup(f)) for f in ALL_NUFLAVINTS]
        else:
            output_str.append(NuFlavIntGroup(name))
    output_str = [str(f) for f in output_str]
    cfx_pipe._output_names = output_str

    # Turn off stat fluctuations
    stat_param = cfx_pipe.params['stat_fluctuations']
    stat_param.value = 0 * ureg.dimensionless
    cfx_pipe.update_params(stat_param)

    # Get nominal Map
    re_param = cfx_pipe.params['regularisation']
    re_param.value = 0 * ureg.dimensionless
    cfx_pipe.update_params(re_param)
    nom_out = cfx_pipe.get_outputs()

    re_param.reset()
    cfx_pipe.update_params(re_param)

    params = ParamSet()
    for param in cfx_pipe.params:
        if param.name != 'dataset':
            params.extend(param)

    free = params.free
    logging.info('Free params = {0}'.format(free))
    contin = True
    for f in free:
        if 'hole_ice' not in f.name and 'dom_eff' not in f.name:
            continue
        # if 'atm_muon_scale' in f.name:
        #     contin = False
        # if contin:
        #     continue

        logging.info('Working on parameter {0}'.format(f.name))
        if f.prior.kind != 'uniform':
            # Use deltaLLH = SIGMA to define +/- sigma for non-uniform
            scan_over = np.linspace(*f.range, num=1000) * f.range[0].u
            llh = f.prior.llh(scan_over)
            dllh = llh - np.min(-llh)

            mllh_idx = np.argmin(-llh)
            if mllh_idx == 0:
                l_sig_idx = 0
            else:
                l_sig_idx = np.argmin(np.abs(dllh[:mllh_idx] - SIGMA))
            u_sig_idx = np.argmin(np.abs(dllh[mllh_idx:] - SIGMA)) + mllh_idx

            l_sigma = scan_over[l_sig_idx]
            u_sigma = scan_over[u_sig_idx]
        else:
            l_sigma = f.range[0]
            u_sigma = f.range[1]

        logging.info('Setting {0} lower sigma bound to '
                     '{1}'.format(f.name, l_sigma))
        f.value = l_sigma
        cfx_pipe.update_params(f)
        l_out = cfx_pipe.get_outputs()

        logging.info('Setting {0} upper sigma bound to '
                     '{1}'.format(f.name, u_sigma))
        f.value = u_sigma
        cfx_pipe.update_params(f)
        u_out = cfx_pipe.get_outputs()

        f.reset()
        cfx_pipe.update_params(f)

        f_outdir = outdir + '/' + f.name
        l_outdir = f_outdir + '/' + 'lower'
        u_outdir = f_outdir + '/' + 'upper'
        fileio.mkdir(f_outdir)
        fileio.mkdir(l_outdir)
        fileio.mkdir(u_outdir)

        compare(outdir=l_outdir,
                ref=MapSet([nom_out]),
                ref_label='baseline',
                test=MapSet([l_out]),
                test_label=r'-sigma',
                **args)
        compare(outdir=u_outdir,
                ref=MapSet([nom_out]),
                ref_label='baseline',
                test=MapSet([u_out]),
                test_label=r'+sigma',
                **args)

        l_in_mapset = l_outdir + '/' + 'fract_diff__-sigma___baseline.json.bz2'
        u_in_mapset = u_outdir + '/' + 'fract_diff__+sigma___baseline.json.bz2'
        l_in_map = MapSet.from_json(l_in_mapset).pop() * 100.
        u_in_map = MapSet.from_json(u_in_mapset).pop() * 100.

        if make_pdf:
            outfile = f_outdir + '/systematic_effect.pdf'
        else:
            outfile = f_outdir + '/systematic_effect.png'
        title = r'% effect on ' + r'${0}$'.format(l_in_map.tex) + \
                ' event counts for {0} parameter'.format(f.name)
        sub_titles = (r'(-\sigma - {\rm baseline}) \:/\: {\rm baseline}',
                      r'(+\sigma - {\rm baseline}) \:/\: {\rm baseline}')
        make_plot(
            maps=(l_in_map, u_in_map),
            outfile=outfile,
            logv=False,
            center_zero=center_zero,
            vlabel=r'({\rm change} - {\rm baseline}) \:/\: {\rm baseline} (%)',
            title=title,
            sub_titles=sub_titles)