def assert_correct_scaling(pipeline_cfg, fixed_errors=False): """Run the pipeline and assert that scaling by a factor of two is correct.""" dmaker = DistributionMaker([pipeline_cfg]) out = dmaker.get_outputs(return_sum="true")[0] dmaker.pipelines[0].params.weight_scale = 2.0 out2 = dmaker.get_outputs(return_sum="true")[0] if fixed_errors: # this is special: We expect that the nominal counts are multiplied, but # that hte errors stay fixed (applies to set_variance errors) assert_array_equal(out.nominal_values * 2.0, out2.nominal_values) assert_array_equal(out.std_devs, out2.std_devs) else: assert out * 2.0 == out2
def test_kde_bootstrapping(verbosity=Levels.WARN): """Unit test for the kde stage.""" set_verbosity(verbosity) test_cfg = deepcopy(TEST_CONFIGS.pipe_cfg) test_cfg[("data", "toy_event_generator")] = deepcopy( TEST_CONFIGS.event_generator_cfg ) test_cfg[("aeff", "weight")] = deepcopy(TEST_CONFIGS.aeff_cfg) test_cfg[("utils", "kde")] = deepcopy(TEST_CONFIGS.kde_cfg) # get map, but without the linearization test_cfg[("utils", "kde")]["linearize_log_dims"] = False dmaker = DistributionMaker([test_cfg]) map_baseline_no_linearization = dmaker.get_outputs(return_sum=True)[0] # get a baseline (with linearization, which we will use from here on out) test_cfg[("utils", "kde")]["linearize_log_dims"] = True dmaker = DistributionMaker([test_cfg]) map_baseline = dmaker.get_outputs(return_sum=True)[0] logging.debug(f"Baseline KDE'd map:\n{map_baseline}") # assert that linearization make a difference at all total_no_lin = np.sum(map_baseline_no_linearization.nominal_values) total_with_lin = np.sum(map_baseline.nominal_values) assert not (total_no_lin == total_with_lin) # but also that the difference isn't huge (< 5% difference in total bin count) # --> This will fail if one forgets to *not* take the log when linearization # is turned off, for example. In that case, most bins will be empty, because # the binning would be lin while the KDE would be log. assert np.abs(total_no_lin / total_with_lin - 1.0) < 0.05 # Make sure that different seeds produce different maps, and that the same seed will # produce the same map. # We enable bootstrapping now, without re-loading everything, to save time. dmaker.pipelines[0].output_key = ("weights", "errors") dmaker.pipelines[0].stages[-1].bootstrap = True map_seed0 = dmaker.get_outputs(return_sum=True)[0] dmaker.pipelines[0].stages[-1].bootstrap_seed = 1 map_seed1 = dmaker.get_outputs(return_sum=True)[0] logging.debug(f"Map with seed 0 is:\n{map_seed0}") logging.debug(f"Map with seed 1 is:\n{map_seed1}") assert not map_seed0 == map_seed1 dmaker.pipelines[0].stages[-1].bootstrap_seed = 0 map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0] assert map_seed0 == map_seed0_reprod logging.info("<< PASS : kde_bootstrapping >>")
def test_kde_bootstrapping(verbosity=Levels.WARN): """Unit test for the kde stage.""" set_verbosity(verbosity) example_cfg = parse_pipeline_config("settings/pipeline/example.cfg") # We have to remove containers with too few events, otherwise the KDE fails simply # because too few distinct events are in one of the PID channels after bootstrapping. example_cfg[("data", "simple_data_loader")]["output_names"] = [ "numu_cc", "numubar_cc", ] kde_stage_cfg = OrderedDict() kde_stage_cfg["apply_mode"] = example_cfg[("utils", "hist")]["apply_mode"] kde_stage_cfg["calc_mode"] = "events" kde_stage_cfg["bootstrap"] = False kde_stage_cfg["bootstrap_seed"] = 0 kde_stage_cfg["bootstrap_niter"] = 5 kde_pipe_cfg = deepcopy(example_cfg) # Replace histogram stage with KDE stage del kde_pipe_cfg[("utils", "hist")] kde_pipe_cfg[("utils", "kde")] = kde_stage_cfg # no errors in baseline since there is no bootstrapping enabled kde_pipe_cfg["pipeline"]["output_key"] = "weights" # get a baseline dmaker = DistributionMaker([kde_pipe_cfg]) map_baseline = dmaker.get_outputs(return_sum=True)[0] logging.debug(f"Baseline KDE'd map:\n{map_baseline}") # Make sure that different seeds produce different maps, and that the same seed will # produce the same map. # We enable bootstrapping now, without re-loading everything, to save time. dmaker.pipelines[0].output_key = ("weights", "errors") dmaker.pipelines[0].stages[-1].bootstrap = True map_seed0 = dmaker.get_outputs(return_sum=True)[0] dmaker.pipelines[0].stages[-1].bootstrap_seed = 1 map_seed1 = dmaker.get_outputs(return_sum=True)[0] logging.debug(f"Map with seed 0 is:\n{map_seed0}") logging.debug(f"Map with seed 1 is:\n{map_seed1}") assert not map_seed0 == map_seed1 dmaker.pipelines[0].stages[-1].bootstrap_seed = 0 map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0] assert map_seed0 == map_seed0_reprod logging.info("<< PASS : kde_bootstrapping >>")
def serve(config, ref, port=DFLT_PORT): """Instantiate PISA objects and run server for processing requests. Parameters ---------- config : str or iterable thereof Resource path(s) to pipeline config(s) ref : str Resource path to reference map port : int or str, optional """ # Instantiate the objects here to save having to do this repeatedly dist_maker = DistributionMaker(config) ref = MapSet.from_json(ref) # Define server as a closure such that it captures the above-instantiated objects class MyTCPHandler(SocketServer.BaseRequestHandler): """ The request handler class for our server. It is instantiated once per connection to the server, and must override the handle() method to implement communication to the client. See SocketServer.BaseRequestHandler for documentation of args. """ def handle(self): try: param_values = receive_obj(self.request) except ConnectionClosed: return dist_maker._set_rescaled_free_params(param_values) # pylint: disable=protected-access test_map = dist_maker.get_outputs(return_sum=True)[0] llh = test_map.llh( expected_values=ref, binned= False, # return sum over llh from all bins (not per-bin llh's) ) send_obj(llh, self.request) server = SocketServer.TCPServer((DFLT_HOST, int(port)), MyTCPHandler) print("llh server started on {}:{}".format(DFLT_HOST, port)) server.serve_forever()
def __init__(self, pipelines, label=None, shared_params=None): self.label = label self._source_code_hash = None if shared_params == None: self.shared_params = [] else: self.shared_params = shared_params if isinstance(pipelines, (str, PISAConfigParser, OrderedDict, Pipeline)): pipelines = [pipelines] self._distribution_makers, self.det_names = [], [] for pipeline in pipelines: if not isinstance(pipeline, Pipeline): pipeline = Pipeline(pipeline) name = pipeline._detector_name if name in self.det_names: self._distribution_makers[self.det_names.index(name)].append( pipeline) else: self._distribution_makers.append([pipeline]) self.det_names.append(name) if None in self.det_names and len(self.det_names) > 1: raise NameError( 'At least one of the used pipelines has no detector_name.') for i, pipelines in enumerate(self._distribution_makers): self._distribution_makers[i] = DistributionMaker( pipelines=pipelines) for sp in self.shared_params: n = 0 for distribution_maker in self._distribution_makers: if sp in distribution_maker.params.free.names: n += 1 if n < 2: raise NameError( 'Shared param %s only a free param in less than 2 detectors.' % sp)
def create_mc_template(toymc_params, config_file=None, seed=None): ''' Create MC template out of a pisa pipeline ''' if seed is not None: np.random.seed(seed) Config = parse_pipeline_config(config_file) new_n_events_data = Param(name='n_events_data', value=toymc_params.n_data, prior=None, range=None, is_fixed=True) new_sig_frac = Param(name='signal_fraction', value=toymc_params.signal_fraction, prior=None, range=None, is_fixed=True) new_stats_factor = Param(name='stats_factor', value=toymc_params.stats_factor, prior=None, range=None, is_fixed=True) # These should match the values of the config file, but we override them just in case we need to change these later new_mu = Param(name='mu', value=toymc_params.mu, prior=None, range=[0, 100], is_fixed=False) new_sigma = Param(name='sigma', value=toymc_params.sigma, prior=None, range=None, is_fixed=True) Config[('data', 'pi_simple_signal')]['params'].update(p=ParamSet([ new_n_events_data, new_sig_frac, new_stats_factor, new_mu, new_sigma ])) MCtemplate = DistributionMaker(Config) return MCtemplate
def scan_allsyst(template_settings, steps, hypo_param_selections, outdir, minimizer_settings, metric, debug_mode): """Scan (separately) all systematics (i.e., non-fixed params). Parameters ---------- template_settings steps hypo_param_selections outdir minimizer_settings metric debug_mode Returns ------- restults : dict Keys are param names, values are the scan results """ outdir = expanduser(expandvars(outdir)) mkdir(outdir, warn=False) hypo_maker = DistributionMaker(template_settings) hypo_maker.select_params(hypo_param_selections) data_dist = hypo_maker.get_outputs(return_sum=True) minimizer_settings = from_file(minimizer_settings) analysis = Analysis() results = OrderedDict() # pylint: disable=redefined-outer-name for param in hypo_maker.params: if param.is_fixed: continue logging.info('Scanning %s', param.name) nominal_value = param.value outfile = join( outdir, '{:s}_{:d}_steps_{:s}_scan.json'.format(param.name, steps, metric)) if isfile(outfile): raise IOError( '`outfile` "{}" exists, not overwriting.'.format(outfile)) results[param.name] = analysis.scan( data_dist=data_dist, hypo_maker=hypo_maker, hypo_param_selections=hypo_param_selections, metric=metric, param_names=param.name, steps=steps, only_points=None, outer=True, profile=False, minimizer_settings=minimizer_settings, outfile=outfile, debug_mode=debug_mode) to_file(results[param.name], outfile) param.value = nominal_value logging.info('Done scanning param "%s"', param.name) logging.info('Done.') return results
def test_bootstrap(): """Unit test for the bootstrap stage.""" from pisa.core.distribution_maker import DistributionMaker from pisa.core.map import Map from pisa.utils.config_parser import parse_pipeline_config from pisa.utils.comparisons import ALLCLOSE_KW from numpy.testing import assert_allclose example_cfg = parse_pipeline_config("settings/pipeline/example.cfg") # We need to insert the bootstrap stage right after the data loading stage bootstrap_pipe_cfg = insert_bootstrap_after_data_loader(example_cfg, seed=0) logging.debug("bootstrapped pipeline stage order:") logging.debug(list(bootstrap_pipe_cfg.keys())) # get a baseline dmaker = DistributionMaker([example_cfg]) map_baseline = dmaker.get_outputs(return_sum=True)[0] # Make sure that different seeds produce different maps, and that the same seed will # produce the same map. dmaker = DistributionMaker([bootstrap_pipe_cfg]) map_seed0 = dmaker.get_outputs(return_sum=True)[0] # find key of bootstrap stage bootstrap_idx = 0 for i, stage in enumerate(dmaker.pipelines[0].stages): if stage.__class__.__name__ == "bootstrap": bootstrap_idx = i # without re-loading the entire pipeline, we set the seed and call the setup function # to save time for the test dmaker.pipelines[0].stages[bootstrap_idx].seed = 1 dmaker.pipelines[0].stages[bootstrap_idx].setup() map_seed1 = dmaker.get_outputs(return_sum=True)[0] assert not map_seed0 == map_seed1 dmaker.pipelines[0].stages[bootstrap_idx].seed = 0 dmaker.pipelines[0].stages[bootstrap_idx].setup() map_seed0_reprod = dmaker.get_outputs(return_sum=True)[0] assert map_seed0 == map_seed0_reprod # Quantify the variance of the resulting maps. They should be about the size of the # expectation from sum of weights-squared. nominal_values = [] for i in range(100): dmaker.pipelines[0].stages[bootstrap_idx].seed = i dmaker.pipelines[0].stages[bootstrap_idx].setup() map_bootstrap = dmaker.get_outputs(return_sum=True)[0] nominal_values.append(map_bootstrap.nominal_values) nominal_values = np.stack(nominal_values) with np.errstate(divide="ignore", invalid="ignore"): # calculate the ratio between the bootstrap nominal and the baseline nominal bs_nom_ratios = np.mean(nominal_values, axis=0) / map_baseline.nominal_values # and the standard deviation ratio as well bs_std_ratios = np.std(nominal_values, axis=0) / map_baseline.std_devs # assert that both nominal and standard deviation match the expectation from # baseline up to a small error assert np.abs(np.nanmean(bs_nom_ratios) - 1.0) < 0.01 # the standard deviations are a little harder to match in 100 samples assert np.abs(np.nanmean(bs_std_ratios) - 1.0) < 0.02 logging.info("<< PASS : bootstrap >>")
def systematics_tests(return_outputs=False): """Load the HypoTesting class and use it to do a systematic study in Asimov. This will take some input pipeline configuration and then turn each one of the systematics off in turn, doing a new hypothesis test each time. The user will have the option to fix this systematic to either the baseline or some shifted value (+/- 1 sigma, or appropriate). One also has the ability in the case of the latter to still fit with this systematically incorrect hypothesis.""" # NOTE: import here to avoid circular refs from pisa.scripts.analysis import parse_args init_args_d = parse_args(description=systematics_tests.__doc__, command=systematics_tests) # NOTE: Removing extraneous args that won't get passed to instantiate the # HypoTesting object via dictionary's `pop()` method. inject_wrong = init_args_d.pop('inject_wrong') fit_wrong = init_args_d.pop('fit_wrong') only_syst = init_args_d.pop('only_syst') do_baseline = not init_args_d.pop('skip_baseline') if fit_wrong: if not inject_wrong: raise ValueError('You have specified to fit the systematically' ' wrong hypothesis but have not specified to' ' actually generate a systematically wrong' ' hypothesis. If you want to flag "fit_wrong"' ' please also flag "inject_wrong"') else: logging.info('Injecting a systematically wrong hypothesis while' ' also allowing the minimiser to attempt to correct' ' for it.') else: if inject_wrong: logging.info('Injecting a systematically wrong hypothesis but' ' NOT allowing the minimiser to attempt to correct' ' for it. Hypothesis maker will be FIXED at the' ' baseline value.') else: logging.info('A standard N-1 test will be performed where each' ' systematic is fixed to the baseline value' ' one-by-one.') # Normalize and convert `pipeline` filenames; store to `*_maker` # (which is argument naming convention that HypoTesting init accepts). # For this test, pipeline is required so we don't need the try arguments # or the checks on it being None filenames = init_args_d.pop('pipeline') filenames = sorted([normcheckpath(fname) for fname in filenames]) init_args_d['h0_maker'] = filenames # However, we do need them for the selections, since they can be different for maker in ['h0', 'h1', 'data']: ps_name = maker + '_param_selections' ps_str = init_args_d[ps_name] if ps_str is None: ps_list = None else: ps_list = [x.strip().lower() for x in ps_str.split(',')] init_args_d[ps_name] = ps_list init_args_d['data_maker'] = init_args_d['h0_maker'] init_args_d['h1_maker'] = init_args_d['h0_maker'] init_args_d['h0_maker'] = DistributionMaker(init_args_d['h0_maker']) init_args_d['h1_maker'] = DistributionMaker(init_args_d['h1_maker']) init_args_d['h1_maker'].select_params(init_args_d['h1_param_selections']) init_args_d['data_maker'] = DistributionMaker(init_args_d['data_maker']) if init_args_d['data_param_selections'] is None: init_args_d['data_param_selections'] = \ init_args_d['h0_param_selections'] init_args_d['data_name'] = init_args_d['h0_name'] init_args_d['data_maker'].select_params( init_args_d['data_param_selections']) if only_syst is not None: for syst in only_syst: if syst not in init_args_d['h0_maker'].params.free.names: raise ValueError( 'Systematic test requested to be performed on systematic' ' %s but it does not appear in the free parameters of the' ' pipeline passed to the script - %s.' % (syst, init_args_d['h0_maker'].params.free.names)) logging.info( 'Performing chosen systematic test on just the following' ' systematics - %s.', only_syst) # Instantiate the analysis object hypo_testing = HypoTesting(**init_args_d) # Everything is set up so do the tests outputs = hypo_testing.asimov_syst_tests( # pylint: disable=redefined-outer-name inject_wrong=inject_wrong, fit_wrong=fit_wrong, only_syst=only_syst, do_baseline=do_baseline, h0_name=init_args_d['h0_name'], h1_name=init_args_d['h1_name'], data_name=init_args_d['data_name']) if return_outputs: return outputs
set_verbosity(args.v) if os.path.isfile(args.outfile): print("Output file ", args.outfile, " already existed, delete or remove it.") else: if args.blind: assert (args.function == 'fit') assert (args.pseudo_data == 'data') if args.data_settings is None: data_settings = args.template_settings else: data_settings = args.data_settings data_maker = DistributionMaker(data_settings) template_maker = DistributionMaker(args.template_settings) if not args.fix_param == '': template_maker.params.fix(args.fix_param) if args.set_param is not None: for one_set_param in args.set_param: p_name, value = one_set_param.split("=") print("set_parm ", p_name, " to ", value) value = parse_quantity(value) value = value.n * value.units prm = template_maker.params[p_name] print("old", p_name, ".value for template= ", prm.value) prm.value = value template_maker.update_params(prm) print("new ", p_name, ".value for template= ", prm.value)
def main(): args = parse_args() init_args_d = vars(args) # NOTE: Removing extraneous args that won't get passed to instantiate the # HypoTesting object via dictionary's `pop()` method. set_verbosity(init_args_d.pop('v')) detector = init_args_d.pop('detector') selection = init_args_d.pop('selection') atype = init_args_d.pop('atype') return_total = not init_args_d.pop('return_bits') # Normalize and convert `*_pipeline` filenames; store to `*_maker` # (which is argument naming convention that HypoTesting init accepts). for maker in ['h0', 'h1']: filenames = init_args_d.pop(maker + '_pipeline') if filenames is not None: filenames = sorted( [normcheckpath(fname) for fname in filenames] ) init_args_d[maker + '_maker'] = filenames ps_name = maker + '_param_selections' ps_str = init_args_d[ps_name] if ps_str is None: ps_list = None else: ps_list = [x.strip().lower() for x in ps_str.split(',')] init_args_d[ps_name] = ps_list # Add dummies to the argument we don't care about for making these plots init_args_d['minimizer_settings'] = {} init_args_d['data_is_data'] = None init_args_d['fluctuate_data'] = None init_args_d['fluctuate_fid'] = None init_args_d['metric'] = 'chi2' if init_args_d['h1_maker'] is None: init_args_d['h1_maker'] = init_args_d['h0_maker'] init_args_d['h0_maker'] = DistributionMaker(init_args_d['h0_maker']) init_args_d['h1_maker'] = DistributionMaker(init_args_d['h1_maker']) init_args_d['h1_maker'].select_params(init_args_d['h1_param_selections']) # Instantiate the analysis object hypo_testing = HypoTesting(**init_args_d) h0_maker = hypo_testing.h0_maker h0_maker.select_params(init_args_d['h0_param_selections']) for h0_pipeline in h0_maker.pipelines: # Need a special case where PID is a separate stage if 'pid' in h0_pipeline.stage_names: if return_total: raise ValueError( "PID is a separate stage but you have requested" " return_total in the arguments to this script." ) return_h0_sum = False else: return_h0_sum = return_total h0_maps = h0_maker.get_outputs(return_sum=return_h0_sum) # Assume just a singular pipeline used here. # Not sure how else to deal with PID as a separate stage. if not return_h0_sum: h0_maps = h0_maps[0] h1_maker = hypo_testing.h1_maker h1_maker.select_params(init_args_d['h1_param_selections']) for h1_pipeline in h1_maker.pipelines: # Need a special case where PID is a separate stage if 'pid' in h1_pipeline.stage_names: if return_total: raise ValueError( "PID is a separate stage but you have requested" " return_total in the arguments to this script." ) return_h1_sum = False else: return_h1_sum = return_total h1_maps = h1_maker.get_outputs(return_sum=return_h1_sum) # Assume just a singular pipeline used here. # Not sure how else to deal with PID as a separate stage. if not return_h1_sum: h1_maps = h1_maps[0] if not sorted(h0_maps.names) == sorted(h1_maps.names): raise ValueError( "The output names of your h0 and h1 pipelines " "do not agree - %s and %s."%( sorted(h0_maps.names), sorted(h1_maps.names) ) ) det_sel = [] if detector.strip() != '': det_sel.append(detector.strip()) if selection.strip() != '': det_sel.append(selection.strip()) det_sel_label = ' '.join(det_sel) det_sel_plot_label = det_sel_label if det_sel_plot_label != '': det_sel_plot_label += ', ' det_sel_file_label = det_sel_label if det_sel_file_label != '': det_sel_file_label += '_' det_sel_file_label = det_sel_file_label.replace(' ', '_') # Need a special case where PID is a separate stage if fnmatch(''.join(h0_maps.names), '*_tr*ck*'): h0_trck_map = h0_maps.combine_wildcard('*_tr*ck') h1_trck_map = h1_maps.combine_wildcard('*_tr*ck') h0_cscd_map = h0_maps.combine_wildcard('*_c*sc*d*') h1_cscd_map = h1_maps.combine_wildcard('*_c*sc*d*') plot_asymmetry( h0_map=h0_trck_map, h1_map=h1_trck_map, h0_name='%s' % args.h0_name, h1_name='%s' % args.h1_name, fulltitle='%sevents identified as track' % det_sel_plot_label, savename='%strck' % det_sel_file_label, outdir=args.logdir, atype=atype ) plot_asymmetry( h0_map=h0_cscd_map, h1_map=h1_cscd_map, h0_name='%s' % args.h0_name, h1_name='%s' % args.h1_name, fulltitle=('%sevents identified as cascade' % det_sel_plot_label), savename='%scscd' % det_sel_file_label, outdir=args.logdir, atype=atype ) # Otherwise, PID is assumed to be a binning dimension elif 'pid' in h0_maps[h0_maps.names[0]].binning.names: for map_name in h0_maps.names: h0_map = h0_maps[map_name] h0_map.set_errors(error_hist=None) h1_map = h1_maps[map_name] h1_map.set_errors(error_hist=None) pid_names = h0_map.binning['pid'].bin_names if pid_names != h1_map.binning['pid'].bin_names: raise ValueError( "h0 and h1 maps must have same PID bin names" " in order to make the asymmetry plots" ) if pid_names is None: logging.warning( "There are no names given for the PID bins, thus " "they will just be numbered in both the the plot " "save names and titles." ) pid_names = [ x for x in range(0, h0_map.binning['pid'].num_bins) ] for pid_name in pid_names: h0_to_plot = h0_map.split( dim='pid', bin=pid_name ) h1_to_plot = h1_map.split( dim='pid', bin=pid_name ) if isinstance(pid_name, int): pid_name = 'PID Bin %i' % (pid_name) plot_asymmetry( h0_map=h0_to_plot, h1_map=h1_to_plot, h0_name='%s' % args.h0_name, h1_name='%s' % args.h1_name, fulltitle=('%sevents identified as %s' % (det_sel_plot_label, pid_name)), savename=('%s_%s%s' % (map_name, det_sel_file_label, pid_name)), outdir=args.logdir, atype=atype ) else: for map_name in h0_maps.names: h0_map = h0_maps[map_name] h0_map.set_errors(error_hist=None) h1_map = h1_maps[map_name] h1_map.set_errors(error_hist=None) plot_asymmetry( h0_map=h0_map, h1_map=h1_map, h0_name='%s' % args.h0_name, h1_name='%s' % args.h1_name, fulltitle=('%sevents'%(det_sel_plot_label)), savename=('%s_%s' % (map_name, det_sel_file_label)), outdir=args.logdir, atype=atype )
def compare(outdir, ref, ref_label, test, test_label, asymm_max=None, asymm_min=None, combine=None, diff_max=None, diff_min=None, fract_diff_max=None, fract_diff_min=None, json=False, pdf=False, png=False, ref_abs=False, ref_param_selections=None, sum=None, test_abs=False, test_param_selections=None): """Compare two entities. The result each entity specification is formatted into a MapSet and stored to disk, so that e.g. re-running a DistributionMaker is unnecessary to reproduce the results. Parameters ---------- outdir : string Store output plots to this directory ref : string or array of strings Pipeline settings config file that generates reference output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported ref_abs : bool Use the absolute value of the reference plot for comparisons ref_label : string Label for reference ref_param-selections : string Param selections to apply to ref pipeline config(s). Not applicable if ref specifies stored map or map sets test : string or array of strings Pipeline settings config file that generates test output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported test_abs : bool Use the absolute value of the test plot for comparisons test_label : string Label for test test_param_selections : None or string Param selections to apply to test pipeline config(s). Not applicable if test specifies stored map or map sets combine : None or string or array of strings Combine by wildcard string, where string globbing (a la command line) uses asterisk for any number of wildcard characters. Use single quotes such that asterisks do not get expanded by the shell. Multiple combine strings supported sum : None or int Sum over (and hence remove) the specified axis or axes. I.e., project the map onto remaining (unspecified) axis or axes json : bool Save output maps in compressed json (json.bz2) format pdf : bool Save plots in PDF format. If neither this nor png is specified, no plots are produced png : bool Save plots in PNG format. If neither this nor pdf is specfied, no plots are produced diff_min : None or float Difference plot vmin; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) diff_max : None or float Difference plot max; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) fract_diff_min : None or float Fractional difference plot vmin; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) fract_diff_max : None or float Fractional difference plot max; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) asymm_min : None or float Asymmetry plot vmin; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) asymm_max : None or float Fractional difference plot max; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) Returns ------- summary_stats : dict Dictionary containing a summary for each h Map processed diff : MapSet MapSet of the difference - (Test - Ref) fract_diff : MapSet MapSet of the fractional difference - (Test - Ref) / Ref asymm : MapSet MapSet of the asymmetric fraction difference or pull - (Test - Ref) / sqrt(Ref) """ ref_plot_label = ref_label if ref_abs and not ref_label.startswith('abs'): ref_plot_label = 'abs(%s)' % ref_plot_label test_plot_label = test_label if test_abs and not test_label.startswith('abs'): test_plot_label = 'abs(%s)' % test_plot_label plot_formats = [] if pdf: plot_formats.append('pdf') if png: plot_formats.append('png') diff_symm = True if diff_min is not None and diff_max is None: diff_max = -diff_min diff_symm = False if diff_max is not None and diff_min is None: diff_min = -diff_max diff_symm = False fract_diff_symm = True if fract_diff_min is not None and fract_diff_max is None: fract_diff_max = -fract_diff_min fract_diff_symm = False if fract_diff_max is not None and fract_diff_min is None: fract_diff_min = -fract_diff_max fract_diff_symm = False asymm_symm = True if asymm_max is not None and asymm_min is None: asymm_min = -asymm_max asymm_symm = False if asymm_min is not None and asymm_max is None: asymm_max = -asymm_min asymm_symm = False outdir = os.path.expanduser(os.path.expandvars(outdir)) mkdir(outdir) # Get the reference distribution(s) into the form of a test MapSet p_ref = None ref_source = None if isinstance(ref, Map): p_ref = MapSet(ref) ref_source = MAP_SOURCE_STR elif isinstance(ref, MapSet): p_ref = ref ref_source = MAPSET_SOURCE_STR elif isinstance(ref, Pipeline): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = PIPELINE_SOURCE_STR elif isinstance(ref, DistributionMaker): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(ref) == 1: try: ref_pipeline = Pipeline(config=ref[0]) except: pass else: ref_source = PIPELINE_SOURCE_STR if ref_param_selections is not None: ref_pipeline.select_params(ref_param_selections) p_ref = ref_pipeline.get_outputs() else: try: ref_dmaker = DistributionMaker(pipelines=ref) except: pass else: ref_source = DISTRIBUTIONMAKER_SOURCE_STR if ref_param_selections is not None: ref_dmaker.select_params(ref_param_selections) p_ref = ref_dmaker.get_outputs() if p_ref is None: try: p_ref = [Map.from_json(f) for f in ref] except: pass else: ref_source = MAP_SOURCE_STR p_ref = MapSet(p_ref) if p_ref is None: assert ref_param_selections is None assert len(ref) == 1, 'Can only handle one MapSet' try: p_ref = MapSet.from_json(ref[0]) except: pass else: ref_source = MAPSET_SOURCE_STR if p_ref is None: raise ValueError( 'Could not instantiate the reference Pipeline, DistributionMaker,' ' Map, or MapSet from ref value(s) %s' % ref) ref = p_ref logging.info('Reference map(s) derived from a ' + ref_source) # Get the test distribution(s) into the form of a test MapSet p_test = None test_source = None if isinstance(test, Map): p_test = MapSet(test) test_source = MAP_SOURCE_STR elif isinstance(test, MapSet): p_test = test test_source = MAPSET_SOURCE_STR elif isinstance(test, Pipeline): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = PIPELINE_SOURCE_STR elif isinstance(test, DistributionMaker): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(test) == 1: try: test_pipeline = Pipeline(config=test[0]) except: pass else: test_source = PIPELINE_SOURCE_STR if test_param_selections is not None: test_pipeline.select_params(test_param_selections) p_test = test_pipeline.get_outputs() else: try: test_dmaker = DistributionMaker(pipelines=test) except: pass else: test_source = DISTRIBUTIONMAKER_SOURCE_STR if test_param_selections is not None: test_dmaker.select_params(test_param_selections) p_test = test_dmaker.get_outputs() if p_test is None: try: p_test = [Map.from_json(f) for f in test] except: pass else: test_source = MAP_SOURCE_STR p_test = MapSet(p_test) if p_test is None: assert test_param_selections is None assert len(test) == 1, 'Can only handle one MapSet' try: p_test = MapSet.from_json(test[0]) except: pass else: test_source = MAPSET_SOURCE_STR if p_test is None: raise ValueError( 'Could not instantiate the test Pipeline, DistributionMaker, Map,' ' or MapSet from test value(s) %s' % test) test = p_test logging.info('Test map(s) derived from a ' + test_source) if combine is not None: ref = ref.combine_wildcard(combine) test = test.combine_wildcard(combine) if isinstance(ref, Map): ref = MapSet([ref]) if isinstance(test, Map): test = MapSet([test]) if sum is not None: ref = ref.sum(sum) test = test.sum(sum) # Set the MapSet names according to args passed by user ref.name = ref_label test.name = test_label # Save to disk the maps being plotted (excluding optional aboslute value # operations) if json: refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label) to_file(ref, refmaps_path) testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label) to_file(test, testmaps_path) if set(test.names) != set(ref.names): raise ValueError('Test map names %s do not match ref map names %s.' % (sorted(test.names), sorted(ref.names))) # Aliases to save keystrokes def masked(x): return np.ma.masked_invalid(x.nominal_values) def zero_to_nan(map): newmap = deepcopy(map) mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON) newmap.hist[mask] = np.nan return newmap reordered_test = [] new_ref = [] diff_maps = [] fract_diff_maps = [] asymm_maps = [] summary_stats = {} for ref_map in ref: test_map = test[ref_map.name].reorder_dimensions(ref_map.binning) if ref_abs: ref_map = abs(ref_map) if test_abs: test_map = abs(test_map) diff_map = test_map - ref_map fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map) asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5) abs_fract_diff_map = np.abs(fract_diff_map) new_ref.append(ref_map) reordered_test.append(test_map) diff_maps.append(diff_map) fract_diff_maps.append(fract_diff_map) asymm_maps.append(asymm_map) min_ref = np.min(masked(ref_map)) max_ref = np.max(masked(ref_map)) min_test = np.min(masked(test_map)) max_test = np.max(masked(test_map)) total_ref = np.sum(masked(ref_map)) total_test = np.sum(masked(test_map)) mean_ref = np.mean(masked(ref_map)) mean_test = np.mean(masked(test_map)) max_abs_fract_diff = np.max(masked(abs_fract_diff_map)) mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map)) median_abs_fract_diff = np.median(masked(abs_fract_diff_map)) mean_fract_diff = np.mean(masked(fract_diff_map)) min_fract_diff = np.min(masked(fract_diff_map)) max_fract_diff = np.max(masked(fract_diff_map)) std_fract_diff = np.std(masked(fract_diff_map)) mean_diff = np.mean(masked(diff_map)) min_diff = np.min(masked(diff_map)) max_diff = np.max(masked(diff_map)) std_diff = np.std(masked(diff_map)) median_diff = np.nanmedian(masked(diff_map)) mad_diff = np.nanmedian(masked(np.abs(diff_map))) median_fract_diff = np.nanmedian(masked(fract_diff_map)) mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map))) min_asymm = np.min(masked(fract_diff_map)) max_asymm = np.max(masked(fract_diff_map)) total_asymm = np.sqrt(np.sum(masked(asymm_map)**2)) summary_stats[test_map.name] = OrderedDict([ ('min_ref', min_ref), ('max_ref', max_ref), ('total_ref', total_ref), ('mean_ref', mean_ref), ('min_test', min_test), ('max_test', max_test), ('total_test', total_test), ('mean_test', mean_test), ('max_abs_fract_diff', max_abs_fract_diff), ('mean_abs_fract_diff', mean_abs_fract_diff), ('median_abs_fract_diff', median_abs_fract_diff), ('min_fract_diff', min_fract_diff), ('max_fract_diff', max_fract_diff), ('mean_fract_diff', mean_fract_diff), ('std_fract_diff', std_fract_diff), ('median_fract_diff', median_fract_diff), ('mad_fract_diff', mad_fract_diff), ('min_diff', min_diff), ('max_diff', max_diff), ('mean_diff', mean_diff), ('std_diff', std_diff), ('median_diff', median_diff), ('mad_diff', mad_diff), ('min_asymm', min_asymm), ('max_asymm', max_asymm), ('total_asymm', total_asymm), ]) logging.info('Map %s...', ref_map.name) logging.info(' Ref map(s):') logging.info(' min :' + ('%.2f' % min_ref).rjust(12)) logging.info(' max :' + ('%.2f' % max_ref).rjust(12)) logging.info(' total :' + ('%.2f' % total_ref).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_ref).rjust(12)) logging.info(' Test map(s):') logging.info(' min :' + ('%.2f' % min_test).rjust(12)) logging.info(' max :' + ('%.2f' % max_test).rjust(12)) logging.info(' total :' + ('%.2f' % total_test).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_test).rjust(12)) logging.info(' Absolute fract. diff., abs((Test - Ref) / Ref):') logging.info(' max : %.4e', max_abs_fract_diff) logging.info(' mean : %.4e', mean_abs_fract_diff) logging.info(' median: %.4e', median_abs_fract_diff) logging.info(' Fractional difference, (Test - Ref) / Ref:') logging.info(' min : %.4e', min_fract_diff) logging.info(' max : %.4e', max_fract_diff) logging.info(' mean : %.4e +/- %.4e', mean_fract_diff, std_fract_diff) logging.info(' median: %.4e +/- %.4e', median_fract_diff, mad_fract_diff) logging.info(' Difference, Test - Ref:') logging.info(' min : %.4e', min_diff) logging.info(' max : %.4e', max_diff) logging.info(' mean : %.4e +/- %.4e', mean_diff, std_diff) logging.info(' median: %.4e +/- %.4e', median_diff, mad_diff) logging.info(' Asymmetry, (Test - Ref) / sqrt(Ref)') logging.info(' min : %.4e', min_asymm) logging.info(' max : %.4e', max_asymm) logging.info(' total : %.4e (sum in quadrature)', total_asymm) logging.info('') ref = MapSet(new_ref) test = MapSet(reordered_test) diff = MapSet(diff_maps) fract_diff = MapSet(fract_diff_maps) asymm = MapSet(asymm_maps) if json: diff.to_json( os.path.join( outdir, 'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) fract_diff.to_json( os.path.join( outdir, 'fract_diff__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) asymm.to_json( os.path.join( outdir, 'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) to_file( summary_stats, os.path.join( outdir, 'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) for plot_format in plot_formats: # Plot the raw distributions plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=False, ratio=False) plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label) plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label) # Plot the difference (test - ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=diff_symm, ratio=False) plotter.label = '%s - %s' % (test_plot_label, ref_plot_label) plotter.plot_2d_array( test - ref, fname='diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=diff_min, vmax=diff_max ) # Plot the fractional difference (test - ref)/ref plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=fract_diff_symm, ratio=True) plotter.label = ('(%s-%s)/%s' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r) for r in ref]), fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=fract_diff_min, vmax=fract_diff_max ) # Plot the asymmetry (test - ref)/sqrt(ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=asymm_symm, ratio=True) plotter.label = (r'$(%s - %s)/\sqrt{%s}$' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]), fname='asymm__%s__%s' % (test_plot_label, ref_plot_label), #vmin=asymm_min, vmax=asymm_max ) return summary_stats, diff, fract_diff, asymm
def profile_scan(data_settings, template_settings, param_names, steps, only_points, no_outer, data_param_selections, hypo_param_selections, profile, outfile, minimizer_settings, metric, debug_mode): """Perform a profile scan. Parameters ---------- data_settings template_settings param_names steps only_points no_outer data_param_selections hypo_param_selections profile outfile minimizer_settings metric debug_mode Returns ------- results analysis """ outfile = expanduser(expandvars(outfile)) if isfile(outfile): raise IOError('`outfile` "{}" already exists!'.format(outfile)) minimizer_settings = from_file(minimizer_settings) hypo_maker = DistributionMaker(template_settings) if data_settings is None: if (data_param_selections is None or data_param_selections == hypo_param_selections): data_maker = hypo_maker else: data_maker = deepcopy(hypo_maker) data_maker.select_params(data_param_selections) else: data_maker = DistributionMaker(data_settings) data_maker.select_params(data_param_selections) data_dist = data_maker.get_outputs(return_sum=True) analysis = Analysis() results = analysis.scan(data_dist=data_dist, hypo_maker=hypo_maker, hypo_param_selections=hypo_param_selections, metric=metric, param_names=param_names, steps=steps, only_points=only_points, outer=not no_outer, profile=profile, minimizer_settings=minimizer_settings, outfile=outfile, debug_mode=debug_mode) to_file(results, outfile) logging.info("Done.") return results, analysis
def create_hypersurfaces(fit_cfg): """Generate and store mapsets for different discrete systematics sets (with a single set characterised by a dedicated pipeline configuration) Parameters ---------- fit_cfg : string Path to a fit config file Returns ------- hypersurfaces : OrderedDict Container with the fitted hypersurface for each map type """ # # Parse fit config file # parsed_fit_cfg, sys_list, sys_func_list, units_list, combine_regex = parse_fit_config(fit_cfg) # # Create the hypersurface params # # Loop over the param names and functional forms and create the params #TODO Add option to support initial param guesses params = [ HypersurfaceParam(name=param_name, func_name=param_func_name) for param_name, param_func_name in zip(sys_list, sys_func_list) ] # # Parse defintion of each dataset # fit_cfg_txt_buf = StringIO() parsed_fit_cfg.write(fit_cfg_txt_buf) fit_cfg_txt = fit_cfg_txt_buf.getvalue() nominal_pipeline_cfg = None nominal_param_values = None sys_pipeline_cfgs = [] sys_param_values = [] # Loop over config for section in parsed_fit_cfg.sections(): no_ws_section = section.strip() section_prefix = no_ws_section.split(":")[0].strip() is_nominal = section_prefix == NOMINAL_SET_PFX is_dataset = is_nominal or section_prefix == SYS_SET_PFX if is_dataset: # Parse the list of systematics parameter values from the section name sys_param_point = tuple(float(x) for x in section.split(":")[1].split(",")) if len(sys_param_point) != len(sys_list): raise ValueError( "Section heading [{}] specifies {:d} systematic" " parameter values, but there are {:d} systematics".format( section, len(sys_param_point), len(sys_list) ) ) # Parse the config file parsed_pipeline_cfg, pipeline_cfg_path = load_and_modify_pipeline_cfg( fit_cfg=parsed_fit_cfg, section=section ) # Store if is_nominal : assert nominal_pipeline_cfg is None, "Found multiple nominal dataset definitions" nominal_pipeline_cfg = parsed_pipeline_cfg nominal_param_values = sys_param_point else : sys_pipeline_cfgs.append(parsed_pipeline_cfg) sys_param_values.append(sys_param_point) # In this loop, nothing to do for general & apply_to_all_sets sections elif no_ws_section in (GENERAL_SECTION_NAME, APPLY_ALL_SECTION_NAME): pass # Do not allow any other sections in the config else: raise ValueError("Invalid section in fit config file: [%s]" % section) # Check found stuff assert nominal_pipeline_cfg is not None, "No nominal dataset definition found" assert len(sys_pipeline_cfgs) > 0, "No systematics dataset definitions found" # Re-format params into a dict, including the param names nominal_param_values = { name:val for name, val in zip(sys_list,nominal_param_values) } sys_param_values = [ { name:val for name, val in zip(sys_list,s) } for s in sys_param_values ] # # Create mapsets # # Get the nominal mapset nominal_dist_maker = DistributionMaker(nominal_pipeline_cfg) nominal_mapset = nominal_dist_maker.get_outputs(return_sum=False)[0] # Get the systematics mapsets sys_mapsets = [] for sys_pipeline_cfg in sys_pipeline_cfgs : sys_dist_maker = DistributionMaker(sys_pipeline_cfg) sys_mapset = sys_dist_maker.get_outputs(return_sum=False)[0] sys_mapsets.append(sys_mapset) # Combine maps according to the provided regex, if one was provided if combine_regex: logging.info( "Combining maps according to regular expression(s) %s", combine_regex ) nominal_mapset = nominal_mapset.combine_re(combine_regex) sys_mapsets = [ s.combine_re(combine_regex) for s in sys_mapsets ] # # Fit the hypersurface # hypersurfaces = OrderedDict() # Fit one per map, so loop over them for map_name in nominal_mapset.names : # Create the hypersurface hypersurface = Hypersurface( params=params, initial_intercept=1., # Initial value for intercept ) # Get just the requested map nominal_map = nominal_mapset[map_name] sys_maps = [ s[map_name] for s in sys_mapsets ] # Perform fit hypersurface.fit( nominal_map=nominal_map, nominal_param_values=nominal_param_values, sys_maps=sys_maps, sys_param_values=sys_param_values, norm=True, ) # Store the result hypersurfaces[map_name] = hypersurface # Done return hypersurfaces
def main(): args = parse_args() init_args_d = vars(args) # NOTE: Removing extraneous args that won't get passed to instantiate the # HypoTesting object via dictionary's `pop()` method. set_verbosity(init_args_d.pop('v')) detector = init_args_d.pop('detector') selection = init_args_d.pop('selection') # Normalize and convert `*_pipeline` filenames; store to `*_maker` # (which is argument naming convention that HypoTesting init accepts). filenames = init_args_d.pop('pipeline') if filenames is not None: filenames = sorted([normcheckpath(fname) for fname in filenames]) ps_str = init_args_d['param_selections'] if ps_str is None: ps_list = None else: ps_list = [x.strip().lower() for x in ps_str.split(',')] data_maker = DistributionMaker(filenames) data_maker.select_params(ps_list) for data_pipeline in data_maker.pipelines: # Need a special case where PID is a separate stage if 'pid' in data_pipeline.stage_names: raise ValueError("Special case for separate PID stage currently " "not implemented.") else: return_sum = True baseline_maps = data_maker.get_outputs(return_sum=return_sum) det_sel = [] if detector.strip() != '': det_sel.append(detector.strip()) if selection.strip() != '': det_sel.append(selection.strip()) det_sel_label = ' '.join(det_sel) det_sel_plot_label = det_sel_label if det_sel_plot_label != '': det_sel_plot_label += ', ' det_sel_file_label = det_sel_label if det_sel_file_label != '': det_sel_file_label += '_' det_sel_file_label = det_sel_file_label.replace(' ', '_') for data_param in data_maker.params.free: # Calculate a shifted value based on the prior if possible if hasattr(data_param, 'prior') and (data_param.prior is not None): # Gaussian priors are easy - just do 1 sigma if data_param.prior.kind == 'gaussian': data_param.value = \ data_param.value + data_param.prior.stddev shift_label = r"$1\sigma$" # Else do 10%, or +/- 1 if the baseline is zero else: if data_param.value != 0.0: data_param.value = 1.1 * data_param.value shift_label = r"10%" else: data_param.value = 1.0 shift_label = r"1" # For no prior also do 10%, or +/- 1 if the baseline is zero else: if data_param.value != 0.0: data_param.value = 1.1 * data_param.value shift_label = r"10%" else: data_param.value = 1.0 shift_label = r"1" up_maps = data_maker.get_outputs(return_sum=return_sum) data_maker.params.reset_free() if hasattr(data_param, 'prior') and (data_param.prior is not None): if data_param.prior.kind == 'gaussian': data_param.value = \ data_param.value - data_param.prior.stddev else: if data_param.value != 0.0: data_param.value = 0.9 * data_param.value else: data_param.value = -1.0 else: if data_param.value != 0.0: data_param.value = 0.9 * data_param.value else: data_param.value = -1.0 down_maps = data_maker.get_outputs(return_sum=return_sum) data_maker.params.reset_free() baseline_map = baseline_maps['total'] baseline_map.set_errors(error_hist=None) up_map = up_maps['total'] up_map.set_errors(error_hist=None) down_map = down_maps['total'] down_map.set_errors(error_hist=None) pid_names = baseline_map.binning['pid'].bin_names if pid_names is None: logging.warn('There are no names given for the PID bins, thus ' 'they will just be numbered in both the the plot ' 'save names and titles.') pid_names = [ x for x in range(0, baseline_map.binning['pid'].num_bins) ] gridspec_kw = dict(left=0.04, right=0.966, wspace=0.32) fig, axes = plt.subplots(nrows=2, ncols=len(pid_names), gridspec_kw=gridspec_kw, sharex=False, sharey=False, figsize=(7 * len(pid_names), 14)) for i, pid_name in enumerate(pid_names): baseline = baseline_map.split(dim='pid', bin=pid_name) up_to_plot = up_map.split(dim='pid', bin=pid_name) up_to_plot = (up_to_plot - baseline) / baseline * 100.0 down_to_plot = down_map.split(dim='pid', bin=pid_name) down_to_plot = (down_to_plot - baseline) / baseline * 100.0 if isinstance(pid_name, int): pid_name = 'PID Bin %i' % (pid_name) else: pid_name += ' Channel' up_to_plot.plot(fig=fig, ax=axes[0][i], title="%s " % (pid_name) + "\n" + " %s + %s" % (tex_axis_label(data_param.name), shift_label), titlesize=30, cmap=plt.cm.seismic, clabel='% Change from Baseline', clabelsize=30, xlabelsize=24, ylabelsize=24, symm=True) down_to_plot.plot(fig=fig, ax=axes[1][i], title="%s " % (pid_name) + "\n" + " %s - %s" % (tex_axis_label(data_param.name), shift_label), titlesize=30, cmap=plt.cm.seismic, clabel='% Change from Baseline', clabelsize=30, xlabelsize=24, ylabelsize=24, symm=True) fig.subplots_adjust(hspace=0.4) savename = det_sel_file_label if savename != '' and savename[-1] != '_': savename += '_' savename += '%s_variation.png' % (data_param.name) mkdir(args.logdir, warn=False) fig.savefig(os.path.join(args.logdir, savename), bbox_inches='tight') plt.close(fig.number)
def inj_param_scan(return_outputs=False): """Load the HypoTesting class and use it to do an Asimov test across the space of one of the injected parameters. The user will define the parameter and pass a numpy-interpretable string to set the range of values. For example, one could scan over the space of theta23 by using a string such as `"numpy.linspace(0.35, 0.65, 31)"` which will then be evaluated to figure out a space of theta23 to inject and run Asimov tests. """ # NOTE: import here to avoid circular refs from pisa.scripts.analysis import parse_args init_args_d = parse_args(description=inj_param_scan.__doc__, command=inj_param_scan) # Normalize and convert `*_pipeline` filenames; store to `*_maker` # (which is argument naming convention that HypoTesting init accepts). # For this test, pipeline is required so we don't need the try arguments # or the checks on it being None filenames = init_args_d.pop('pipeline') filenames = sorted([normcheckpath(fname) for fname in filenames]) init_args_d['h0_maker'] = filenames # However, we do need them for the selections, since they can be different for maker in ['h0', 'h1', 'data']: ps_name = maker + '_param_selections' ps_str = init_args_d[ps_name] if ps_str is None: ps_list = None else: ps_list = [x.strip().lower() for x in ps_str.split(',')] init_args_d[ps_name] = ps_list init_args_d['data_maker'] = init_args_d['h0_maker'] init_args_d['h1_maker'] = init_args_d['h0_maker'] init_args_d['h0_maker'] = DistributionMaker(init_args_d['h0_maker']) init_args_d['h1_maker'] = DistributionMaker(init_args_d['h1_maker']) init_args_d['h1_maker'].select_params(init_args_d['h1_param_selections']) init_args_d['data_maker'] = DistributionMaker(init_args_d['data_maker']) if init_args_d['data_param_selections'] is None: init_args_d['data_param_selections'] = \ init_args_d['h0_param_selections'] init_args_d['data_name'] = init_args_d['h0_name'] init_args_d['data_maker'].select_params( init_args_d['data_param_selections']) # Remove final parameters that don't want to be passed to HypoTesting param_name = init_args_d.pop('param_name') inj_vals = eval(init_args_d.pop('inj_vals')) inj_units = init_args_d.pop('inj_units') force_prior = init_args_d.pop('use_inj_prior') # Instantiate the analysis object hypo_testing = HypoTesting(**init_args_d) logging.info('Scanning over %s between %.4f and %.4f with %i vals', param_name, min(inj_vals), max(inj_vals), len(inj_vals)) # Modify parameters if necessary if param_name == 'sin2theta23': requested_vals = inj_vals inj_vals = np.arcsin(np.sqrt(inj_vals)) logging.info( 'Converting to theta23 values. Equivalent range is %.4f to %.4f' ' radians, or %.4f to %.4f degrees', min(inj_vals), max(inj_vals), min(inj_vals) * 180 / np.pi, max(inj_vals) * 180 / np.pi) test_name = 'theta23' inj_units = 'radians' elif param_name == 'deltam31': raise ValueError('Need to implement a test where it ensures the sign ' 'of the requested values matches those in truth and ' 'the hypo makers (else it makes no sense). For now, ' 'please select deltam3l instead.') elif param_name == 'deltam3l': # Ensure all values are the same sign, else it doesn't make any sense if not np.alltrue(np.sign(inj_vals)): raise ValueError("Not all requested values to inject are the same " "sign. This doesn't make any sense given that you" " have requested to inject different values of " "deltam3l.") logging.info('Parameter requested was deltam3l - will convert assuming' ' that this is always the largest of the two splittings ' 'i.e. deltam3l = deltam31 for deltam3l > 0 and deltam3l ' '= deltam32 for deltam3l < 0.') inj_sign = np.sign(inj_vals)[0] requested_vals = inj_vals test_name = 'deltam31' deltam21_val = hypo_testing.data_maker.params['deltam21'].value.to( inj_units).magnitude if inj_sign == 1: no_inj_vals = requested_vals io_inj_vals = (requested_vals - deltam21_val) * -1.0 else: io_inj_vals = requested_vals no_inj_vals = (requested_vals * -1.0) + deltam21_val inj_vals = [] for no_inj_val, io_inj_val in zip(no_inj_vals, io_inj_vals): o_vals = {} o_vals['nh'] = no_inj_val o_vals['ih'] = io_inj_val inj_vals.append(o_vals) else: test_name = param_name requested_vals = inj_vals unit_inj_vals = [] for inj_val in inj_vals: if isinstance(inj_val, dict): o_vals = {} for ivkey in inj_val.keys(): o_vals[ivkey] = inj_val[ivkey] * ureg(inj_units) unit_inj_vals.append(o_vals) else: unit_inj_vals.append(inj_val * ureg(inj_units)) inj_vals = unit_inj_vals # Extend the ranges of the distribution makers so that they reflect the # range of the scan. This is a pain if there are different values depending # on the ordering. Need to extend the ranges of both values in the # hypothesis maker since the hypotheses may minimise over the ordering, # and could then go out of range. # Also, some parameters CANNOT go negative or else things won't work. # To account for this, check if parameters lower value was positive and, # if so, enforce that it is positive now. if isinstance(inj_vals[0], dict): # Calculate ranges for both parameters norangediff = max(no_inj_vals) - min(no_inj_vals) norangediff = norangediff * ureg(inj_units) norangetuple = (min(no_inj_vals) * ureg(inj_units) - 0.5 * norangediff, max(no_inj_vals) * ureg(inj_units) + 0.5 * norangediff) iorangediff = max(io_inj_vals) - min(io_inj_vals) iorangediff = iorangediff * ureg(inj_units) iorangetuple = (min(io_inj_vals) * ureg(inj_units) - 0.5 * iorangediff, max(io_inj_vals) * ureg(inj_units) + 0.5 * iorangediff) # Do it for both hierarchies for hierarchy, rangetuple in zip(['nh', 'ih'], [norangetuple, iorangetuple]): hypo_testing.set_param_ranges(selection=hierarchy, test_name=test_name, rangetuple=rangetuple, inj_units=inj_units) # Select the proper params again hypo_testing.h0_maker.select_params(init_args_d['h0_param_selections']) hypo_testing.h1_maker.select_params(init_args_d['h1_param_selections']) # Otherwise it's way simpler... else: rangediff = max(inj_vals) - min(inj_vals) rangetuple = (min(inj_vals) - 0.5 * rangediff, max(inj_vals) + 0.5 * rangediff) hypo_testing.set_param_ranges(selection=None, test_name=test_name, rangetuple=rangetuple, inj_units=inj_units) if hypo_testing.data_maker.params[test_name].prior is not None: if hypo_testing.data_maker.params[test_name].prior.kind != 'uniform': if force_prior: logging.warning( 'Parameter to be scanned, %s, has a %s prior that you have' ' requested to be left on. This will likely make the' ' results wrong.', test_name, hypo_testing.data_maker.params[test_name].prior.kind) else: logging.info( 'Parameter to be scanned, %s, has a %s prior.This will be' ' changed to a uniform prior (i.e. no prior) for this' ' test.', test_name, hypo_testing.data_maker.params[test_name].prior.kind) uniformprior = Prior(kind='uniform') hypo_testing.h0_maker.params[test_name].prior = uniformprior hypo_testing.h1_maker.params[test_name].prior = uniformprior else: if force_prior: raise ValueError('Parameter to be scanned, %s, does not have a' ' prior but you have requested to force one to be' ' left on. Something is potentially wrong.' % test_name) else: logging.info( 'Parameter to be scanned, %s, does not have a prior.' ' So nothing needs to be done.', test_name) # Everything is set up. Now do the scan. outputs = hypo_testing.asimov_inj_param_scan( # pylint: disable=redefined-outer-name param_name=param_name, test_name=test_name, inj_vals=inj_vals, requested_vals=requested_vals, h0_name=init_args_d['h0_name'], h1_name=init_args_d['h1_name'], data_name=init_args_d['data_name']) if return_outputs: return outputs
def test_pi_resample(): """Unit test for the resampling stage.""" from pisa.core.distribution_maker import DistributionMaker from pisa.core.map import Map from pisa.utils.config_parser import parse_pipeline_config from pisa.utils.log import set_verbosity, logging from pisa.utils.comparisons import ALLCLOSE_KW from collections import OrderedDict from copy import deepcopy example_cfg = parse_pipeline_config('settings/pipeline/example.cfg') reco_binning = example_cfg[('utils', 'pi_hist')]['output_specs'] coarse_binning = reco_binning.downsample(reco_energy=2, reco_coszen=2) assert coarse_binning.is_compat(reco_binning) # replace binning of output with coarse binning example_cfg[('utils', 'pi_hist')]['output_specs'] = coarse_binning # make another pipeline with an upsampling stage to the original binning upsample_cfg = deepcopy(example_cfg) pi_resample_cfg = OrderedDict() pi_resample_cfg['input_specs'] = coarse_binning pi_resample_cfg['output_specs'] = reco_binning pi_resample_cfg['scale_errors'] = True upsample_cfg[('utils', 'pi_resample')] = pi_resample_cfg example_maker = DistributionMaker([example_cfg]) upsampled_maker = DistributionMaker([upsample_cfg]) example_map = example_maker.get_outputs(return_sum=True)[0] example_map_upsampled = upsampled_maker.get_outputs(return_sum=True)[0] # First check: The upsampled map must have the same total count as the original map assert np.isclose( np.sum(example_map.nominal_values), np.sum(example_map_upsampled.nominal_values), ) # Check consistency of modified chi-square # ---------------------------------------- # When the assumption holds that events are uniformly distributed over the coarse # bins, the modified chi-square should not change from upscaling the maps. We test # this by making a fluctuated coarse map and then upsampling that map according to # the assumption by bin volumes. We should find that the modified chi-square between # the coarse map and the coarse fluctuated map is the same as the upsampled map and # the upsampled fluctuated map. # It doesn't matter precisely how we fluctuate it here, we just want any different # map... random_map_coarse = example_map.fluctuate(method='scaled_poisson', random_state=42) random_map_coarse.set_errors(None) # This bit is an entirely independent implementation of the upsampling. The count # in every bin is scaled according to the reatio of weighted bin volumes. upsampled_hist = np.zeros_like(example_map_upsampled.nominal_values) upsampled_errs = np.zeros_like(example_map_upsampled.nominal_values) up_binning = example_map_upsampled.binning coarse_hist = np.array(random_map_coarse.nominal_values) coarse_errors = np.array(random_map_coarse.std_devs) coarse_binning = random_map_coarse.binning for bin_idx in np.ndindex(upsampled_hist.shape): one_bin = up_binning[bin_idx] fine_bin_volume = one_bin.weighted_bin_volumes( attach_units=False, ).squeeze().item() # the following is basically an independent implementation of translate.lookup coarse_index = [] # index where the upsampled bin came from for dim in up_binning.names: x = one_bin[dim].weighted_centers[ 0].m # middle point of the one bin bins = coarse_binning[ dim].bin_edges.m # coarse bin edges in that dim coarse_index.append(np.digitize(x, bins) - 1) # index 1 means bin 0 coarse_index = tuple(coarse_index) coarse_bin_volume = coarse_binning.weighted_bin_volumes( attach_units=False, )[coarse_index].squeeze().item() upsampled_hist[bin_idx] = coarse_hist[coarse_index] upsampled_hist[bin_idx] *= fine_bin_volume upsampled_hist[bin_idx] /= coarse_bin_volume # done, at last! random_map_upsampled = Map(name="random_upsampled", hist=upsampled_hist, binning=up_binning) random_map_upsampled.set_errors(None) # After ALL THIS, we get the same modified chi-square from the coarse and the # upsampled pair of maps. Neat, huh? assert np.allclose( random_map_coarse.mod_chi2(example_map), random_map_upsampled.mod_chi2(example_map_upsampled), **ALLCLOSE_KW, ) logging.info('<< PASS : pi_resample >>')
def make_discrete_sys_distributions(fit_cfg, set_params=None): """Generate and store mapsets for different discrete systematics sets (with a single set characterised by a dedicated pipeline configuration) Parameters ---------- fit_cfg : string Path to a fit config file Returns ------- input_data : OrderedDict Container with the processed input data including MapSets resulting from each input pipelines """ # check optional `set_params` if set_params is not None: if not isinstance(set_params, Mapping): raise TypeError("`set_params` must be dict-like") for param_name, param_value in set_params.items(): if not isinstance(param_name, basestring): raise TypeError( "`set_params` keys must be strings (parameter name)") if not isinstance(param_value, ureg.Quantity): raise TypeError("`set_params` values must be Quantities") parsed_fit_cfg, sys_list, units_list, combine_regex = parse_fit_config( fit_cfg) fit_cfg_txt_buf = StringIO() parsed_fit_cfg.write(fit_cfg_txt_buf) fit_cfg_txt = fit_cfg_txt_buf.getvalue() # prepare the data container input_data = OrderedDict() input_data["fit_cfg_path"] = fit_cfg input_data["fit_cfg_txt"] = fit_cfg_txt input_data["param_names"] = sys_list input_data["param_units"] = units_list input_data["datasets"] = [] # -- Load systematics sets -- # found_nominal = False sys_sets_info = OrderedDict() for section in parsed_fit_cfg.sections(): no_ws_section = section.strip() section_pfx = no_ws_section.split(":")[0].strip() is_nominal = section_pfx == NOMINAL_SET_PFX is_sys_set = is_nominal or section_pfx == SYS_SET_PFX if is_nominal: if found_nominal: raise ValueError( "Found multiple nominal sets in fit cfg! There must be" " exactly one.") found_nominal = True if is_sys_set: # Parse the list of systematics parameter values from the section name sys_param_point = tuple( float(x) for x in section.split(":")[1].split(",")) if len(sys_param_point) != len(sys_list): raise ValueError( "Section heading [{}] specifies {:d} systematic" " parameter values, but there are {:d} systematics".format( section, len(sys_param_point), len(sys_list))) parsed_pipeline_cfg, pipeline_cfg_path = load_and_modify_pipeline_cfg( fit_cfg=parsed_fit_cfg, section=section) pipeline_cfg_txt_buf = StringIO() parsed_pipeline_cfg.write(pipeline_cfg_txt_buf) pipeline_cfg_txt = pipeline_cfg_txt_buf.getvalue() sys_sets_info[sys_param_point] = dict( is_nominal=is_nominal, parsed_pipeline_cfgs=[parsed_pipeline_cfg], pipeline_cfg_paths=[pipeline_cfg_path], pipeline_cfg_txts=[pipeline_cfg_txt], ) # In this loop, nothing to do for general & apply_to_all_sets sections elif no_ws_section in (GENERAL_SECTION_NAME, APPLY_ALL_SECTION_NAME): pass # Do not allow any other sections in the config else: raise ValueError("Invalid section in fit config file: [%s]" % section) if not found_nominal: raise ValueError( "Could not find a nominal discrete systematics set in fit cfg." " There must be exactly one.") nsets = len(sys_sets_info) nsys = len(sys_list) if nsets <= nsys: logging.warn( "Fit will either fail or be unreliable since the number of" " systematics sets to be fit is small (%d <= %d).", nsets, nsys + 1, ) for sys_param_point, info in sys_sets_info.items(): point_str = " | ".join( ["%s=%.2f" % (p, v) for p, v in zip(sys_list, sys_param_point)]) logging.info( "Generating maps for discrete systematics point: %s. Using" ' pipeline config(s) at "%s"', point_str, info["pipeline_cfg_paths"], ) # make a dedicated distribution maker for each systematics set distribution_maker = DistributionMaker(info["parsed_pipeline_cfgs"]) # update params if requested if set_params is not None: for pname, pval in set_params.items(): if pname not in distribution_maker.params.names: raise ValueError("Unknown param '%s' in `set_params`" % pname) if (pval.dimensionality != distribution_maker.params[pname].dimensionality): raise ValueError( 'Incorrect units for param "%s" in `set_params`' % pname) distribution_maker.params[pname].value = pval logging.info("Changed param '%s' to %s", pname, pval) distribution_maker_param_values = OrderedDict() for dmpname in sorted(distribution_maker.params.names): dmpval = distribution_maker.params[dmpname].value distribution_maker_param_values[dmpname] = dmpval # run the distribution maker to get the mapset # TODO This assumes only one pipeline, either make more general or enforce mapset = distribution_maker.get_outputs(return_sum=False)[0] if combine_regex: logging.info( "Combining maps according to regular expression(s) %s", combine_regex) mapset = mapset.combine_re(combine_regex) # Store the info dataset = OrderedDict() dataset["pipeline_cfg_paths"] = info["pipeline_cfg_paths"] dataset["pipeline_cfg_txts"] = info["pipeline_cfg_txts"] dataset[ "distribution_maker_param_values"] = distribution_maker_param_values dataset["param_values"] = sys_param_point dataset["mapset"] = mapset dataset["nominal"] = info["is_nominal"] input_data["datasets"].append(dataset) return input_data
def create_mc_template(toymc_params, config_file=None, seed=None, keep_same_weight=True): ''' Create MC template out of a pisa pipeline ''' if seed is not None: np.random.seed(seed) Config = parse_pipeline_config(config_file) # Change binning Config[('data', 'pi_simple_signal')]['output_specs'] = toymc_params.binning Config[( 'likelihood', 'pi_generalized_llh_params')]['output_specs'] = toymc_params.binning # If keep_same_weight is True, turn off the mean adjust and pseudo weight of pi_generalized_llh if keep_same_weight: Config[('likelihood', 'pi_generalized_llh_params')]['with_mean_adjust'] = False Config[('likelihood', 'pi_generalized_llh_params')]['with_pseudo_weight'] = False else: Config[('likelihood', 'pi_generalized_llh_params')]['with_mean_adjust'] = True Config[('likelihood', 'pi_generalized_llh_params')]['with_pseudo_weight'] = True new_n_events_data = Param(name='n_events_data', value=toymc_params.n_data, prior=None, range=None, is_fixed=True) new_sig_frac = Param(name='signal_fraction', value=toymc_params.signal_fraction, prior=None, range=None, is_fixed=True) new_stats_factor = Param(name='stats_factor', value=toymc_params.stats_factor, prior=None, range=None, is_fixed=True) # These should match the values of the config file, but we override them just in case we need to change these later new_mu = Param(name='mu', value=toymc_params.mu, prior=None, range=[0, 100], is_fixed=False) new_sigma = Param(name='sigma', value=toymc_params.sigma, prior=None, range=None, is_fixed=True) Config[('data', 'pi_simple_signal')]['params'].update(p=ParamSet([ new_n_events_data, new_sig_frac, new_stats_factor, new_mu, new_sigma ])) MCtemplate = DistributionMaker(Config) return MCtemplate