def __init__(self, config, profile=False): if isinstance(config, (str, PISAConfigParser)): config = parse_pipeline_config(config=config) elif isinstance(config, OrderedDict): pass else: raise TypeError( "`config` passed is of type %s but must be string," " PISAConfigParser, or OrderedDict" % type(config).__name__ ) self.pisa_version = None self.name = config['pipeline']['name'] self.data = ContainerSet(self.name) self.detector_name = config['pipeline']['detector_name'] self.output_binning = config['pipeline']['output_binning'] self.output_key = config['pipeline']['output_key'] self._profile = profile self._stages = [] self._config = config self._init_stages() self._source_code_hash = None
def test_kde_bootstrapping(verbosity=Levels.WARN): """Unit test for the kde stage.""" set_verbosity(verbosity) example_cfg = parse_pipeline_config("settings/pipeline/example.cfg") # We have to remove containers with too few events, otherwise the KDE fails simply # because too few distinct events are in one of the PID channels after bootstrapping. example_cfg[("data", "simple_data_loader")]["output_names"] = [ "numu_cc", "numubar_cc", ] kde_stage_cfg = OrderedDict() kde_stage_cfg["apply_mode"] = example_cfg[("utils", "hist")]["apply_mode"] kde_stage_cfg["calc_mode"] = "events" kde_stage_cfg["bootstrap"] = False kde_stage_cfg["bootstrap_seed"] = 0 kde_stage_cfg["bootstrap_niter"] = 5 kde_pipe_cfg = deepcopy(example_cfg) # Replace histogram stage with KDE stage del kde_pipe_cfg[("utils", "hist")] kde_pipe_cfg[("utils", "kde")] = kde_stage_cfg # no errors in baseline since there is no bootstrapping enabled kde_pipe_cfg["pipeline"]["output_key"] = "weights" # get a baseline dmaker = DistributionMaker([kde_pipe_cfg]) map_baseline = dmaker.get_outputs(return_sum=True)[0] logging.debug(f"Baseline KDE'd map:\n{map_baseline}") # Make sure that different seeds produce different maps, and that the same seed will # produce the same map. # We enable bootstrapping now, without re-loading everything, to save time. dmaker.pipelines[0].output_key = ("weights", "errors") dmaker.pipelines[0].stages[-1].bootstrap = True map_seed0 = dmaker.get_outputs(return_sum=True)[0] dmaker.pipelines[0].stages[-1].bootstrap_seed = 1 map_seed1 = dmaker.get_outputs(return_sum=True)[0] logging.debug(f"Map with seed 0 is:\n{map_seed0}") logging.debug(f"Map with seed 1 is:\n{map_seed1}") assert not map_seed0 == map_seed1 dmaker.pipelines[0].stages[-1].bootstrap_seed = 0 map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0] assert map_seed0 == map_seed0_reprod logging.info("<< PASS : kde_bootstrapping >>")
def __init__(self, config): if isinstance(config, (str, PISAConfigParser)): config = parse_pipeline_config(config=config) elif isinstance(config, OrderedDict): pass else: raise TypeError("`config` passed is of type %s but must be string," " PISAConfigParser, or OrderedDict" % type(config).__name__) self.pisa_version = None self._stages = [] self._detector_name = config.pop('detector_name', None) self._config = config self._init_stages() self._source_code_hash = None
def create_mc_template(toymc_params, config_file=None, seed=None): ''' Create MC template out of a pisa pipeline ''' if seed is not None: np.random.seed(seed) Config = parse_pipeline_config(config_file) new_n_events_data = Param(name='n_events_data', value=toymc_params.n_data, prior=None, range=None, is_fixed=True) new_sig_frac = Param(name='signal_fraction', value=toymc_params.signal_fraction, prior=None, range=None, is_fixed=True) new_stats_factor = Param(name='stats_factor', value=toymc_params.stats_factor, prior=None, range=None, is_fixed=True) # These should match the values of the config file, but we override them just in case we need to change these later new_mu = Param(name='mu', value=toymc_params.mu, prior=None, range=[0, 100], is_fixed=False) new_sigma = Param(name='sigma', value=toymc_params.sigma, prior=None, range=None, is_fixed=True) Config[('data', 'pi_simple_signal')]['params'].update(p=ParamSet([ new_n_events_data, new_sig_frac, new_stats_factor, new_mu, new_sigma ])) MCtemplate = DistributionMaker(Config) return MCtemplate
def test_bootstrap(): """Unit test for the bootstrap stage.""" from pisa.core.distribution_maker import DistributionMaker from pisa.core.map import Map from pisa.utils.config_parser import parse_pipeline_config from pisa.utils.comparisons import ALLCLOSE_KW from numpy.testing import assert_allclose example_cfg = parse_pipeline_config("settings/pipeline/example.cfg") # We need to insert the bootstrap stage right after the data loading stage bootstrap_pipe_cfg = insert_bootstrap_after_data_loader(example_cfg, seed=0) logging.debug("bootstrapped pipeline stage order:") logging.debug(list(bootstrap_pipe_cfg.keys())) # get a baseline dmaker = DistributionMaker([example_cfg]) map_baseline = dmaker.get_outputs(return_sum=True)[0] # Make sure that different seeds produce different maps, and that the same seed will # produce the same map. dmaker = DistributionMaker([bootstrap_pipe_cfg]) map_seed0 = dmaker.get_outputs(return_sum=True)[0] # find key of bootstrap stage bootstrap_idx = 0 for i, stage in enumerate(dmaker.pipelines[0].stages): if stage.__class__.__name__ == "bootstrap": bootstrap_idx = i # without re-loading the entire pipeline, we set the seed and call the setup function # to save time for the test dmaker.pipelines[0].stages[bootstrap_idx].seed = 1 dmaker.pipelines[0].stages[bootstrap_idx].setup() map_seed1 = dmaker.get_outputs(return_sum=True)[0] assert not map_seed0 == map_seed1 dmaker.pipelines[0].stages[bootstrap_idx].seed = 0 dmaker.pipelines[0].stages[bootstrap_idx].setup() map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0] assert map_seed0 == map_seed0_reprod # Quantify the variance of the resulting maps. They should be about the size of the # expectation from sum of weights-squared. nominal_values = [] for i in range(100): dmaker.pipelines[0].stages[bootstrap_idx].seed = i dmaker.pipelines[0].stages[bootstrap_idx].setup() map_bootstrap = dmaker.get_outputs(return_sum=True)[0] nominal_values.append(map_bootstrap.nominal_values) nominal_values = np.stack(nominal_values) with np.errstate(divide="ignore", invalid="ignore"): # calculate the ratio between the bootstrap nominal and the baseline nominal bs_nom_ratios = np.mean(nominal_values, axis=0) / map_baseline.nominal_values # and the standard deviation ratio as well bs_std_ratios = np.std(nominal_values, axis=0) / map_baseline.std_devs # assert that both nominal and standard deviation match the expectation from # baseline up to a small error assert np.abs(np.nanmean(bs_nom_ratios) - 1.0) < 0.01 # the standard deviations are a little harder to match in 100 samples assert np.abs(np.nanmean(bs_std_ratios) - 1.0) < 0.02 logging.info("<< PASS : bootstrap >>")
def test_pi_resample(): """Unit test for the resampling stage.""" from pisa.core.distribution_maker import DistributionMaker from pisa.core.map import Map from pisa.utils.config_parser import parse_pipeline_config from pisa.utils.log import set_verbosity, logging from pisa.utils.comparisons import ALLCLOSE_KW from collections import OrderedDict from copy import deepcopy example_cfg = parse_pipeline_config('settings/pipeline/example.cfg') reco_binning = example_cfg[('utils', 'pi_hist')]['output_specs'] coarse_binning = reco_binning.downsample(reco_energy=2, reco_coszen=2) assert coarse_binning.is_compat(reco_binning) # replace binning of output with coarse binning example_cfg[('utils', 'pi_hist')]['output_specs'] = coarse_binning # make another pipeline with an upsampling stage to the original binning upsample_cfg = deepcopy(example_cfg) pi_resample_cfg = OrderedDict() pi_resample_cfg['input_specs'] = coarse_binning pi_resample_cfg['output_specs'] = reco_binning pi_resample_cfg['scale_errors'] = True upsample_cfg[('utils', 'pi_resample')] = pi_resample_cfg example_maker = DistributionMaker([example_cfg]) upsampled_maker = DistributionMaker([upsample_cfg]) example_map = example_maker.get_outputs(return_sum=True)[0] example_map_upsampled = upsampled_maker.get_outputs(return_sum=True)[0] # First check: The upsampled map must have the same total count as the original map assert np.isclose( np.sum(example_map.nominal_values), np.sum(example_map_upsampled.nominal_values), ) # Check consistency of modified chi-square # ---------------------------------------- # When the assumption holds that events are uniformly distributed over the coarse # bins, the modified chi-square should not change from upscaling the maps. We test # this by making a fluctuated coarse map and then upsampling that map according to # the assumption by bin volumes. We should find that the modified chi-square between # the coarse map and the coarse fluctuated map is the same as the upsampled map and # the upsampled fluctuated map. # It doesn't matter precisely how we fluctuate it here, we just want any different # map... random_map_coarse = example_map.fluctuate(method='scaled_poisson', random_state=42) random_map_coarse.set_errors(None) # This bit is an entirely independent implementation of the upsampling. The count # in every bin is scaled according to the reatio of weighted bin volumes. upsampled_hist = np.zeros_like(example_map_upsampled.nominal_values) upsampled_errs = np.zeros_like(example_map_upsampled.nominal_values) up_binning = example_map_upsampled.binning coarse_hist = np.array(random_map_coarse.nominal_values) coarse_errors = np.array(random_map_coarse.std_devs) coarse_binning = random_map_coarse.binning for bin_idx in np.ndindex(upsampled_hist.shape): one_bin = up_binning[bin_idx] fine_bin_volume = one_bin.weighted_bin_volumes( attach_units=False, ).squeeze().item() # the following is basically an independent implementation of translate.lookup coarse_index = [] # index where the upsampled bin came from for dim in up_binning.names: x = one_bin[dim].weighted_centers[ 0].m # middle point of the one bin bins = coarse_binning[ dim].bin_edges.m # coarse bin edges in that dim coarse_index.append(np.digitize(x, bins) - 1) # index 1 means bin 0 coarse_index = tuple(coarse_index) coarse_bin_volume = coarse_binning.weighted_bin_volumes( attach_units=False, )[coarse_index].squeeze().item() upsampled_hist[bin_idx] = coarse_hist[coarse_index] upsampled_hist[bin_idx] *= fine_bin_volume upsampled_hist[bin_idx] /= coarse_bin_volume # done, at last! random_map_upsampled = Map(name="random_upsampled", hist=upsampled_hist, binning=up_binning) random_map_upsampled.set_errors(None) # After ALL THIS, we get the same modified chi-square from the coarse and the # upsampled pair of maps. Neat, huh? assert np.allclose( random_map_coarse.mod_chi2(example_map), random_map_upsampled.mod_chi2(example_map_upsampled), **ALLCLOSE_KW, ) logging.info('<< PASS : pi_resample >>')
def main(): parser = ArgumentParser(description=__doc__) parser.add_argument('--oversampling', action='store_true', default=False, help='''Run oversampling tests i.e. use a finer binning through the truth stages in addition to the standard tests. You must flag this if you want it.''') parser.add_argument('--weighting', type=str, default=None, help='''Name of the weighting field to use in the comparisons. This must correspond to a field in the events files being used.''') parser.add_argument('--outdir', metavar='DIR', type=str, required=True, help='''Store all output plots to this directory. If they don't exist, the script will make them, including all subdirectories.''') parser.add_argument('-v', action='count', default=None, help='set verbosity level') args = parser.parse_args() set_verbosity(args.v) known_weights = [None, 'weighted_aeff'] if args.weighting not in known_weights: logging.warning('''%s weighting field not known to be in events file. Tests may not work in this case!''' % args.weighting) # Want these for all tests pisa_standard_settings = os.path.join( 'tests', 'settings', 'recopid_full_pipeline_5stage_test.cfg') pisa_standard_config = parse_pipeline_config(pisa_standard_settings) pisa_recopid_settings = os.path.join( 'tests', 'settings', 'recopid_full_pipeline_4stage_test.cfg') pisa_recopid_config = parse_pipeline_config(pisa_recopid_settings) # Add weighting to pipeline according to user input # Need to add it to both reco and PID for standard config reco_k = [k for k in pisa_standard_config.keys() \ if k[0] == 'reco'][0] standard_reco_params = \ pisa_standard_config[reco_k]['params'].params standard_reco_params.reco_weights_name.value = args.weighting pid_k = [k for k in pisa_standard_config.keys() \ if k[0] == 'pid'][0] standard_pid_params = \ pisa_standard_config[pid_k]['params'].params standard_pid_params.pid_weights_name.value = args.weighting # Just needs adding to reco for joined recopid config recopid_k = [k for k in pisa_recopid_config.keys() \ if k[0] == 'reco'][0] recopid_reco_params = \ pisa_recopid_config[recopid_k]['params'].params recopid_reco_params.reco_weights_name.value = args.weighting # Load OscFit file for comparisons oscfitfile = os.path.join('tests', 'data', 'oscfit', 'OscFit1X600Baseline.json') # Rename in this instance now so it's clearer in logs and filenames if args.weighting == None: args.weighting = 'unweighted' logging.info("<<<< %s reco/pid Transformations >>>>" % args.weighting) # Perform baseline tests logging.info("<< No oversampling >>") do_comparisons(config1=deepcopy(pisa_standard_config), config2=deepcopy(pisa_recopid_config), oscfitfile=oscfitfile, testname1='5-stage-%s' % args.weighting, testname2='4-stage-%s' % args.weighting, outdir=args.outdir) # Perform oversampled tests if args.oversampling: oversamples = [5, 10, 20, 50] for oversample in oversamples: pisa_standard_oversampled_config = oversample_config( base_config=deepcopy(pisa_standard_config), oversample=oversample) pisa_recopid_oversampled_config = oversample_config( base_config=deepcopy(pisa_recopid_config), oversample=oversample) logging.info("<< Oversampling by %i >>" % (oversample)) do_comparisons(config1=deepcopy(pisa_standard_oversampled_config), config2=deepcopy(pisa_recopid_oversampled_config), oscfitfile=oscfitfile, testname1='5-stage-%s-Oversampled%i' % (args.weighting, oversample), testname2='4-stage-%s-Oversampled%i' % (args.weighting, oversample), outdir=args.outdir)
def create_mc_template(toymc_params, config_file=None, seed=None, keep_same_weight=True): ''' Create MC template out of a pisa pipeline ''' if seed is not None: np.random.seed(seed) Config = parse_pipeline_config(config_file) # Change binning Config[('data', 'pi_simple_signal')]['output_specs'] = toymc_params.binning Config[( 'likelihood', 'pi_generalized_llh_params')]['output_specs'] = toymc_params.binning # If keep_same_weight is True, turn off the mean adjust and pseudo weight of pi_generalized_llh if keep_same_weight: Config[('likelihood', 'pi_generalized_llh_params')]['with_mean_adjust'] = False Config[('likelihood', 'pi_generalized_llh_params')]['with_pseudo_weight'] = False else: Config[('likelihood', 'pi_generalized_llh_params')]['with_mean_adjust'] = True Config[('likelihood', 'pi_generalized_llh_params')]['with_pseudo_weight'] = True new_n_events_data = Param(name='n_events_data', value=toymc_params.n_data, prior=None, range=None, is_fixed=True) new_sig_frac = Param(name='signal_fraction', value=toymc_params.signal_fraction, prior=None, range=None, is_fixed=True) new_stats_factor = Param(name='stats_factor', value=toymc_params.stats_factor, prior=None, range=None, is_fixed=True) # These should match the values of the config file, but we override them just in case we need to change these later new_mu = Param(name='mu', value=toymc_params.mu, prior=None, range=[0, 100], is_fixed=False) new_sigma = Param(name='sigma', value=toymc_params.sigma, prior=None, range=None, is_fixed=True) Config[('data', 'pi_simple_signal')]['params'].update(p=ParamSet([ new_n_events_data, new_sig_frac, new_stats_factor, new_mu, new_sigma ])) MCtemplate = DistributionMaker(Config) return MCtemplate