def load_gen_data(self): logging.debug('Loading generator level sample') unfold_pipeline_cfg = self.params['unfold_pipeline_cfg'].value if isinstance(unfold_pipeline_cfg, str): pipeline_cfg = from_file(unfold_pipeline_cfg) pipeline_hash = pipeline_cfg sa_cfg = from_file( pipeline_cfg.get('stage.data', 'param.data_sample_config')) template_maker = Pipeline(pipeline_cfg) elif isinstance(unfold_pipeline_cfg, Pipeline): pipeline_hash = unfold_pipeline_cfg.state_hash sa_cfg = from_file( unfold_pipeline_cfg.params['data_sample_config'].value) template_maker = unfold_pipeline_cfg gen_cfg = from_file(sa_cfg.get('neutrinos|gen_lvl', 'gen_cfg_file')) this_hash = hash_obj([gen_cfg, pipeline_hash, self.output_str], full_hash=self.full_hash) if self.gen_data_hash == this_hash: return self._gen_data full_gen_data = template_maker.get_outputs() if not isinstance(full_gen_data, Data): raise AssertionError( 'Output of pipeline is not a Data object, instead is type ' '{0}'.format(type(full_gen_data))) trans_data = full_gen_data.transform_groups(self.output_str) gen_data = trans_data[self.output_str] self._gen_data = gen_data self.gen_data_hash = this_hash return gen_data
lt_param.value = lt pipeline.update_params(lt_param) gen_pipe.update_params(lt_param) u_pipe = Param(name='unfold_pipeline_cfg', value=gen_pipe, is_fixed=True, prior=None, range=None) unfold_pipeline_cfg = u_pipe pipeline.update_params(unfold_pipeline_cfg) # Get nominal re_param.value = 0 * ureg.dimensionless pipeline.update_params(re_param) nom_out = pipeline.get_outputs().pop() re_param.value = 2 * ureg.dimensionless sf_param.value = 1234 * ureg.dimensionless pipeline.update_params(re_param) pipeline.update_params(sf_param) fe = [] if 'test' in outname: n_trials = 5 else: n_trials = 200 for x in xrange(n_trials): temp_out = pipeline.get_outputs().pop() nan_mask = (nom_out.hist == 0) div = temp_out.hist[~nan_mask] / nom_out.hist[~nan_mask]
def _calculate_fit_coeffs(data, params, fit_binning, nu_params=None, mu_params=None): """ Calculate the fit coefficients for each systematic, flavint, bin for a polynomial. """ logging.debug('Calculating fit coefficients') config = from_file(params['discr_sys_sample_config'].value) degree = int(params['poly_degree'].value) force_through_nominal = params['force_through_nominal'].value if force_through_nominal: def fit_func(vals, *poly_coeffs): return np.polynomial.polynomial.polyval( vals, [1.] + list(poly_coeffs)) else: def fit_func(vals, *poly_coeffs): return np.polynomial.polynomial.polyval( vals, list(poly_coeffs)) # add free param for constant term degree += 1 template_maker = Pipeline(params['pipeline_config'].value) dataset_param = template_maker.params['dataset'] def parse(string): return string.replace(' ', '').split(',') sys_fit_coeffs = OrderedDict() if nu_params is not None: sys_list = parse(config.get('neutrinos', 'sys_list')) nu_params = deepcopy(map(lambda x: x[3:], nu_params)) if set(nu_params) != set(sys_list): raise AssertionError( 'Systematics list listed in the sample config file does ' 'not match the params in the pipeline config file\n {0} ' '!= {1}'.format(set(nu_params), set(sys_list))) for sys in sys_list: ev_sys = 'neutrinos|' + sys runs = parse(config.get(ev_sys, 'runs')[1:-1]) nominal = config.get(ev_sys, 'nominal') mapset_dict = OrderedDict() flavint_groups = None for run in runs: logging.info('Loading run {0} of systematic ' '{1}'.format(run, sys)) dataset_param.value = ev_sys + '|' + run template_maker.update_params(dataset_param) template = template_maker.get_outputs( idx=int(params['stop_after_stage'].m)) if not isinstance(template, Data): raise AssertionError( 'Template output is not a Data object, instead is ' 'type {0}'.format(type(template))) if flavint_groups is None: flavint_groups = template.flavint_groups else: if set(flavint_groups) != set(template.flavint_groups): raise AssertionError( 'Mismatch of flavint_groups - ({0}) does not ' 'match flavint_groups ' '({1})'.format(flavint_groups, template.flavint_groups)) outputs = [] for fig in template.keys(): outputs.append( template.histogram(kinds=fig, binning=fit_binning, weights_col='pisa_weight', errors=False, name=str(NuFlavIntGroup(fig)))) mapset_dict[run] = MapSet(outputs, name=run) nom_mapset = mapset_dict[nominal] fracdiff_mapset_dict = OrderedDict() for run in runs: mapset = [] for flavintg_map in mapset_dict[run]: # TODO(shivesh): error propagation? flavintg = flavintg_map.name mask = ~(nom_mapset[flavintg].hist == 0.) div = np.zeros(flavintg_map.shape) with np.errstate(divide='ignore', invalid='ignore'): div[mask] = \ unp.nominal_values(flavintg_map.hist[mask]) /\ unp.nominal_values(nom_mapset[flavintg].hist[mask]) mapset.append( Map(name=flavintg, binning=flavintg_map.binning, hist=div)) fracdiff_mapset_dict[run] = MapSet(mapset) delta_runs = np.array([float(x) for x in runs]) - float(nominal) coeff_binning = OneDimBinning(name='coeff', num_bins=degree, is_lin=True, domain=[-1, 1]) combined_binning = fit_binning + coeff_binning params_mapset = [] for fig in template.keys(): # TODO(shivesh): Fix numpy warning on this line pvals_hist = np.empty(map(int, combined_binning.shape), dtype=object) hists = [ fracdiff_mapset_dict[run][fig].hist for run in runs ] zip_hists = np.dstack(hists) for idx in np.ndindex(fit_binning.shape): y_values = [] y_sigma = [] for run in fracdiff_mapset_dict: y_values.append( unp.nominal_values( fracdiff_mapset_dict[run][fig].hist[idx])) y_sigma.append( unp.std_devs( fracdiff_mapset_dict[run][fig].hist[idx])) if np.any(y_sigma): popt, pcov = curve_fit(fit_func, delta_runs, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_func, delta_runs, y_values, p0=np.ones(degree)) # perr = np.sqrt(np.diag(pcov)) # pvals = unp.uarray(popt, perr) pvals_hist[idx] = popt pvals_hist = np.array(pvals_hist.tolist()) params_mapset.append( Map(name=fig, binning=combined_binning, hist=pvals_hist)) params_mapset = MapSet(params_mapset, name=sys) if sys in sys_fit_coeffs: sys_fit_coeffs[sys] = MapSet( [sys_fit_coeffs[sys], params_mapset]) else: sys_fit_coeffs[sys] = params_mapset if mu_params is not None: sys_list = parse(config.get('muons', 'sys_list')) mu_params = deepcopy(map(lambda x: x[3:], mu_params)) if set(mu_params) != set(sys_list): raise AssertionError( 'Systematics list listed in the sample config file does ' 'not match the params in the pipeline config file\n {0} ' '!= {1}'.format(set(mu_params), set(sys_list))) for sys in sys_list: ev_sys = 'muons|' + sys runs = parse(config.get(ev_sys, 'runs')[1:-1]) nominal = config.get(ev_sys, 'nominal') map_dict = OrderedDict() flavint_groups = None for run in runs: logging.info('Loading run {0} of systematic ' '{1}'.format(run, sys)) dataset_param.value = ev_sys + '|' + run template_maker.update_params(dataset_param) template = template_maker.get_outputs( idx=int(params['stop_after_stage'].m)) if not isinstance(template, Data): raise AssertionError( 'Template output is not a Data object, instead is ' 'type {0}'.format(type(template))) if not template.contains_muons: raise AssertionError( 'Template output does not contain muons') output = template.histogram( kinds='muons', binning=fit_binning, # NOTE: weights cancel in fraction weights_col=None, errors=False, name='muons') map_dict[run] = output nom_map = map_dict[nominal] fracdiff_map_dict = OrderedDict() for run in runs: mask = ~(nom_map.hist == 0.) div = np.zeros(nom_map.shape) with np.errstate(divide='ignore', invalid='ignore'): div[mask] = \ unp.nominal_values(map_dict[run].hist[mask]) /\ unp.nominal_values(nom_map.hist[mask]) fracdiff_map_dict[run] = Map(name='muons', binning=nom_map.binning, hist=div) delta_runs = np.array([float(x) for x in runs]) - float(nominal) coeff_binning = OneDimBinning(name='coeff', num_bins=degree, is_lin=True, domain=[-1, 1]) combined_binning = fit_binning + coeff_binning pvals_hist = np.empty(map(int, combined_binning.shape), dtype=object) hists = [fracdiff_map_dict[run].hist for run in runs] zip_hists = np.dstack(hists) for idx in np.ndindex(fit_binning.shape): y_values = [] y_sigma = [] for run in fracdiff_mapset_dict: y_values.append( unp.nominal_values( fracdiff_mapset_dict[run][fig].hist[idx])) y_sigma.append( unp.std_devs( fracdiff_mapset_dict[run][fig].hist[idx])) if np.any(y_sigma): popt, pcov = curve_fit(fit_func, delta_runs, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_func, delta_runs, y_values, p0=np.ones(degree)) # perr = np.sqrt(np.diag(pcov)) # pvals = unp.uarray(popt, perr) pvals_hist[idx] = popt pvals_hist = np.array(pvals_hist.tolist()) params_map = Map(name='muons', binning=combined_binning, hist=pvals_hist) if sys in sys_fit_coeffs: sys_fit_coeffs[sys] = MapSet( [sys_fit_coeffs[sys], params_map]) else: sys_fit_coeffs[sys] = params_map return sys_fit_coeffs
def main(): args = parse_args() set_verbosity(args.v) if args.plot: import matplotlib as mpl mpl.use('pdf') import matplotlib.pyplot as plt from pisa.utils.plotter import Plotter cfg = from_file(args.fit_settings) sys_list = cfg.get('general', 'sys_list').replace(' ', '').split(',') stop_idx = cfg.getint('general', 'stop_after_stage') for sys in sys_list: # Parse info for given systematic nominal = cfg.getfloat(sys, 'nominal') degree = cfg.getint(sys, 'degree') force_through_nominal = cfg.getboolean(sys, 'force_through_nominal') runs = eval(cfg.get(sys, 'runs')) #print "runs ", runs smooth = cfg.get(sys, 'smooth') x_values = np.array(sorted(runs)) # Build fit function if force_through_nominal: function = "lambda x, *p: np.polynomial.polynomial.polyval(x, [1.] + list(p))" else: function = "lambda x, *p: np.polynomial.polynomial.polyval(x, list(p))" # Add free parameter for constant term degree += 1 fit_fun = eval(function) # Instantiate template maker template_maker = Pipeline(args.template_settings) if not args.set_param == '': for one_set_param in args.set_param: p_name, value = one_set_param.split("=") #print "p_name,value= ", p_name, " ", value value = parse_quantity(value) value = value.n * value.units param = template_maker.params[p_name] #print "old ", p_name, "value = ", param.value param.value = value #print "new ", p_name, "value = ", param.value template_maker.update_params(param) inputs = {} map_names = None # Get sys templates for run in runs: for key, val in cfg.items('%s:%s'%(sys, run)): if key.startswith('param.'): _, pname = key.split('.') param = template_maker.params[pname] try: value = parse_quantity(val) param.value = value.n * value.units except ValueError: value = parse_string_literal(val) param.value = value param.set_nominal_to_current_value() template_maker.update_params(param) # Retreive maps template = template_maker.get_outputs(idx=stop_idx) if map_names is None: map_names = [m.name for m in template] inputs[run] = {} for m in template: inputs[run][m.name] = m.hist # Numpy acrobatics: arrays = {} for name in map_names: arrays[name] = [] for x in x_values: arrays[name].append( inputs[x][name] / unp.nominal_values(inputs[nominal][name]) ) a = np.array(arrays[name]) arrays[name] = np.rollaxis(a, 0, len(a.shape)) # Shift to get deltas x_values -= nominal # Binning object (assuming they're all the same) binning = template.maps[0].binning shape = [d.num_bins for d in binning] + [degree] shape_small = [d.num_bins for d in binning] outputs = {} errors = {} for name in map_names: # Now actualy perform some fits outputs[name] = np.ones(shape) errors[name] = np.ones(shape) for idx in np.ndindex(*shape_small): y_values = unp.nominal_values(arrays[name][idx]) y_sigma = unp.std_devs(arrays[name][idx]) if np.any(y_sigma): popt, pcov = curve_fit(fit_fun, x_values, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_fun, x_values, y_values, p0=np.ones(degree)) perr = np.sqrt(np.diag(pcov)) for k, p in enumerate(popt): outputs[name][idx][k] = p errors[name][idx][k] = perr[k] # TODO(philippeller): the below block of code will fail # Maybe plot #if args.plot: # fig_num = i + nx * j # if fig_num == 0: # fig = plt.figure(num=1, figsize=( 4*nx, 4*ny)) # subplot_idx = nx*(ny-1-j)+ i + 1 # plt.subplot(ny, nx, subplot_idx) # #plt.snameter(x_values, y_values, color=plt_colors[name]) # plt.gca().errorbar(x_values, y_values, yerr=y_sigma, # fmt='o', color=plt_colors[name], # ecolor=plt_colors[name], # mec=plt_colors[name]) # # Plot nominal point again in black # plt.snameter([0.0], [1.0], color='k') # f_values = fit_fun(x_values, *popt) # fun_plot, = plt.plot(x_values, f_values, # color=plt_colors[name]) # plt.ylim(np.min(unp.nominal_values(arrays[name]))*0.9, # np.max(unp.nominal_values(arrays[name]))*1.1) # if i > 0: # plt.setp(plt.gca().get_yticklabels(), visible=False) # if j > 0: # plt.setp(plt.gca().get_xticklabels(), visible=False) if smooth == 'gauss': for name in map_names: for d in range(degree): outputs[name][...,d] = gaussian_filter(outputs[name][...,d],sigma=1) if smooth == 'gauss_pid': for name in map_names: split_idx = binning.names.index('pid') tot = len(binning)-1 for d in range(degree): for p in range(len(binning['pid'])): outputs[name][...,p,d] = gaussian_filter( np.swapaxes(outputs[name], split_idx, tot)[...,p,d], sigma=1 ) outputs[name] = np.swapaxes(outputs[name], split_idx, tot) # Save the raw ones anyway outputs['pname'] = sys outputs['nominal'] = nominal outputs['function'] = function outputs['map_names'] = map_names outputs['binning_hash'] = binning.hash to_file(outputs, '%s/%s_sysfits_%s_%s.json'%(args.out_dir, sys, args.tag, smooth)) if args.plot: for d in range(degree): maps = [] for name in map_names: maps.append(Map(name='%s_raw'%name, hist=outputs[name][...,d], binning=binning)) maps = MapSet(maps) my_plotter = Plotter( stamp='', outdir=args.out_dir, fmt='pdf', log=False, label='' ) my_plotter.plot_2d_array( maps, fname='%s_%s_%s_%s'%(sys, args.tag, d, smooth), )
def run_interpolated_fit(fit_directory, job_idx, skip_successful=False): """Run the hypersurface fit for a grid point. If `skip_successful` is true, do not run if the `fit_successful` flag is already True. """ #TODO a lot of this is copied from fit_hypersurfaces in hypersurface.py, would be safer to make more OAOO #TODO Copy the param value storage stuff from fit_hypersurfaces across in the meantime assert os.path.isdir(fit_directory), "fit directory does not exist" gridpoint_json = os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2") gridpoint_data = from_json(gridpoint_json) if skip_successful and gridpoint_data["fit_successful"]: logging.info( f"Fit at job index {job_idx} already successful, skipping...") return metadata = from_json(os.path.join(fit_directory, "metadata.json")) interpolation_param_spec = metadata["interpolation_param_spec"] # this is a pipeline configuration in the form of an OrderedDict nominal_dataset = metadata["nominal_dataset"] # Why can we still not load PISA objects from JSON that are inside a dict?! Grrr... nominal_dataset["pipeline_cfg"] = pipeline_cfg_from_states( nominal_dataset["pipeline_cfg"]) # this is a list of pipeline configurations sys_datasets = metadata["sys_datasets"] for sys_dataset in sys_datasets: sys_dataset["pipeline_cfg"] = pipeline_cfg_from_states( sys_dataset["pipeline_cfg"]) # this is a dict of param_name : value pairs param_values = gridpoint_data["param_values"] # we do a redundant check to make sure the parameter values at this grid point are # correct interpolation_param_names = metadata["interpolation_param_names"] grid_shape = tuple(metadata["grid_shape"]) # the grid point index of this job grid_idx = list(np.ndindex(grid_shape))[job_idx] for i, n in enumerate(interpolation_param_names): ms = "Inconsistent parameter values at grid point!" assert interpolation_param_spec[n]["values"][ grid_idx[i]] == param_values[n], ms # now we need to adjust the values of the parameter in all pipelines for this point logging.info(f"updating pipelines with parameter values: {param_values}") for dataset in [nominal_dataset] + sys_datasets: for stage_cfg in dataset["pipeline_cfg"].values(): if "params" not in stage_cfg.keys(): continue for param in interpolation_param_names: if param in stage_cfg["params"].names: stage_cfg["params"][param].value = param_values[param] # these are the parameters of the hypersurface, NOT the ones we interpolate them # over! hypersurface_params = [] for param_state in metadata["hypersurface_params"]: hypersurface_params.append(HypersurfaceParam.from_state(param_state)) def find_hist_stage(pipeline): """Locate the index of the hist stage in a pipeline.""" hist_idx_found = False for i, s in enumerate(pipeline.stages): if s.__class__.__name__ == "hist": hist_idx = i hist_idx_found = True break if not hist_idx_found: raise RuntimeError( "Could not find histogram stage in pipeline, aborting.") return hist_idx # We create Pipeline objects, get their outputs and then forget about the Pipeline # object on purpose! The memory requirement to hold all systematic sets at the same # time is just too large, especially on the cluster. The way we do it below we # only need enough memory for one dataset at a time. for dataset in [nominal_dataset] + sys_datasets: pipeline = Pipeline(dataset["pipeline_cfg"]) dataset["mapset"] = pipeline.get_outputs() # get the un-weighted event counts as well so that we can exclude bins # with too little statistics # First, find out which stage is the hist stage hist_idx = find_hist_stage(pipeline) pipeline.stages[hist_idx].unweighted = True dataset["mapset_unweighted"] = pipeline.get_outputs() del pipeline # Merge maps according to the combine regex, if one was provided combine_regex = metadata["combine_regex"] if combine_regex is not None: for dataset in [nominal_dataset] + sys_datasets: dataset["mapset"] = dataset["mapset"].combine_re(combine_regex) dataset["mapset_unweighted"] = dataset[ "mapset_unweighted"].combine_re(combine_regex) minimum_mc = metadata["minimum_mc"] # Remove bins (i.e. set their count to zero) that have too few MC events for dataset in sys_datasets + [nominal_dataset]: for map_name in dataset["mapset"].names: insuff_mc = dataset["mapset_unweighted"][ map_name].nominal_values < minimum_mc # Setting the hist to zero sets both nominal value and std_dev to zero dataset["mapset"][map_name].hist[insuff_mc] = 0. hypersurface_fit_kw = metadata["hypersurface_fit_kw"] hypersurfaces = collections.OrderedDict() log = metadata[ "log"] # flag determining whether hs fit is run in log-space or not for map_name in nominal_dataset["mapset"].names: nominal_map = nominal_dataset["mapset"][map_name] nominal_param_values = nominal_dataset["sys_params"] sys_maps = [ sys_dataset["mapset"][map_name] for sys_dataset in sys_datasets ] sys_param_values = [ sys_dataset["sys_params"] for sys_dataset in sys_datasets ] hypersurface = Hypersurface( # Yes, this MUST be a deepcopy! Otherwise weird memory overwrites happen # and all the numbers get jumbled across the hypersurfaces of different maps params=copy.deepcopy(hypersurface_params), initial_intercept=0. if log else 1., # Initial value for intercept log=log) hypersurface.fit( nominal_map=nominal_map, nominal_param_values=nominal_param_values, sys_maps=sys_maps, sys_param_values=sys_param_values, norm=True, # Is the space or loading time really a problem? # keep_maps=False, # it would take a lot more space otherwise **hypersurface_fit_kw) logging.debug("\nFitted hypersurface report:\n%s" % hypersurface) hypersurfaces[map_name] = hypersurface gridpoint_data["hs_fit"] = hypersurfaces gridpoint_data["fit_successful"] = True to_json(gridpoint_data, gridpoint_json)
def test_example_pipelines(ignore_gpu=False, ignore_root=False, ignore_missing_data=False): """Run example pipelines. Parameters ---------- ignore_gpu : bool Do not count errors initializing a GPU as failures ignore_root : bool Do not count errors importing ROOT as failures ignore_missing_data : bool Do not count errors due to missing data files as failures """ # Set up the lists of strings needed to search the error messages for # things to ignore e.g. cuda stuff and ROOT stuff root_err_strings = ['ROOT', 'Roo', 'root', 'roo'] cuda_err_strings = ['cuda'] missing_data_string = ('Could not find resource "(.*)" in' ' filesystem OR in PISA package.') example_directory = find_resource('settings/pipeline') settings_files = glob.glob(example_directory + '/*example*.cfg') num_configs = len(settings_files) failure_count = 0 skip_count = 0 for settings_file in settings_files: allow_error = False msg = '' try: logging.info('Instantiating pipeline from file "%s" ...', settings_file) pipeline = Pipeline(settings_file) logging.info(' retrieving outputs...') _ = pipeline.get_outputs() except ImportError as err: exc = sys.exc_info() if any(errstr in err.message for errstr in root_err_strings) and \ ignore_root: skip_count += 1 allow_error = True msg = (' Skipping pipeline, %s, as it has ROOT dependencies' ' (ROOT cannot be imported)' % settings_file) elif any(errstr in err.message for errstr in cuda_err_strings) and \ ignore_gpu: skip_count += 1 allow_error = True msg = (' Skipping pipeline, %s, as it has cuda dependencies' ' (pycuda cannot be imported)' % settings_file) else: failure_count += 1 except IOError as err: exc = sys.exc_info() match = re.match(missing_data_string, err.message, re.M | re.I) if match is not None and ignore_missing_data: skip_count += 1 allow_error = True msg = (' Skipping pipeline, %s, as it has data that cannot' ' be found in the local PISA environment' % settings_file) else: failure_count += 1 except: # pylint: disable=bare-except exc = sys.exc_info() failure_count += 1 else: exc = None finally: if exc is not None: if allow_error: logging.warning(msg) else: logging.error( ' FAILURE! %s failed to run. Please review the' ' error message below and fix the problem. Continuing' ' with any other configs now...', settings_file) for line in format_exception(*exc): for sub_line in line.splitlines(): logging.error(' ' * 4 + sub_line) else: logging.info(' Seems fine!') if skip_count > 0: logging.warning('%d of %d example pipeline config files were skipped', skip_count, num_configs) if failure_count > 0: msg = ('<< FAIL : test_example_pipelines : (%d of %d EXAMPLE PIPELINE' ' CONFIG FILES FAILED) >>' % (failure_count, num_configs)) logging.error(msg) raise Exception(msg) logging.info('<< PASS : test_example_pipelines >>')
def test_example_pipelines(path="settings/pipeline", verbosity=Levels.WARN): """Run pipelines from any "*.cfg" config files found at `path`.""" path = find_resource(path) settings_files = glob.glob(path + "/*.cfg") failures = [] successes = [] for settings_file in settings_files: try: # NOTE: Force output of info on which settings file is being # instantiated and run, as warnings emitted by individual stages # are not as useful if we don't know which pipeline config is being # run set_verbosity(Levels.INFO) logging.info(f'Instantiating Pipeline with "{settings_file}" ...') set_verbosity(Levels.WARN) pipeline = Pipeline(settings_file) set_verbosity(Levels.INFO) logging.info( f'Running Pipeline instantiated from "{settings_file}" ...') set_verbosity(Levels.WARN) pipeline.get_outputs() except Exception as err: failures.append(settings_file) msg = f"<< FAILURE IN PIPELINE : {settings_file} >>" set_verbosity(verbosity) logging.error("=" * len(msg)) logging.error(msg) logging.error("=" * len(msg)) # Reproduce the error with full output set_verbosity(Levels.TRACE) try: pipeline = Pipeline(settings_file) pipeline.get_outputs() except Exception: pass set_verbosity(Levels.TRACE) logging.exception(err) set_verbosity(verbosity) logging.error("#" * len(msg)) else: successes.append(settings_file) finally: set_verbosity(verbosity) # Summarize results set_verbosity(verbosity) logging.info("<< EXAMPLE PIPELINES : " f"{len(successes)} succeeded and {len(failures)} failed >>") # Exit with error if any failures if failures: raise Exception(f"{len(failures)} example pipeline(s) failed:\n " + ", ".join(f'"{f}"' for f in failures))
def compare(outdir, ref, ref_label, test, test_label, asymm_max=None, asymm_min=None, combine=None, diff_max=None, diff_min=None, fract_diff_max=None, fract_diff_min=None, json=False, pdf=False, png=False, ref_abs=False, ref_param_selections=None, sum=None, test_abs=False, test_param_selections=None): """Compare two entities. The result each entity specification is formatted into a MapSet and stored to disk, so that e.g. re-running a DistributionMaker is unnecessary to reproduce the results. Parameters ---------- outdir : string Store output plots to this directory ref : string or array of strings Pipeline settings config file that generates reference output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported ref_abs : bool Use the absolute value of the reference plot for comparisons ref_label : string Label for reference ref_param-selections : string Param selections to apply to ref pipeline config(s). Not applicable if ref specifies stored map or map sets test : string or array of strings Pipeline settings config file that generates test output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported test_abs : bool Use the absolute value of the test plot for comparisons test_label : string Label for test test_param_selections : None or string Param selections to apply to test pipeline config(s). Not applicable if test specifies stored map or map sets combine : None or string or array of strings Combine by wildcard string, where string globbing (a la command line) uses asterisk for any number of wildcard characters. Use single quotes such that asterisks do not get expanded by the shell. Multiple combine strings supported sum : None or int Sum over (and hence remove) the specified axis or axes. I.e., project the map onto remaining (unspecified) axis or axes json : bool Save output maps in compressed json (json.bz2) format pdf : bool Save plots in PDF format. If neither this nor png is specified, no plots are produced png : bool Save plots in PNG format. If neither this nor pdf is specfied, no plots are produced diff_min : None or float Difference plot vmin; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) diff_max : None or float Difference plot max; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) fract_diff_min : None or float Fractional difference plot vmin; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) fract_diff_max : None or float Fractional difference plot max; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) asymm_min : None or float Asymmetry plot vmin; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) asymm_max : None or float Fractional difference plot max; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) Returns ------- summary_stats : dict Dictionary containing a summary for each h Map processed diff : MapSet MapSet of the difference - (Test - Ref) fract_diff : MapSet MapSet of the fractional difference - (Test - Ref) / Ref asymm : MapSet MapSet of the asymmetric fraction difference or pull - (Test - Ref) / sqrt(Ref) """ ref_plot_label = ref_label if ref_abs and not ref_label.startswith('abs'): ref_plot_label = 'abs(%s)' % ref_plot_label test_plot_label = test_label if test_abs and not test_label.startswith('abs'): test_plot_label = 'abs(%s)' % test_plot_label plot_formats = [] if pdf: plot_formats.append('pdf') if png: plot_formats.append('png') diff_symm = True if diff_min is not None and diff_max is None: diff_max = -diff_min diff_symm = False if diff_max is not None and diff_min is None: diff_min = -diff_max diff_symm = False fract_diff_symm = True if fract_diff_min is not None and fract_diff_max is None: fract_diff_max = -fract_diff_min fract_diff_symm = False if fract_diff_max is not None and fract_diff_min is None: fract_diff_min = -fract_diff_max fract_diff_symm = False asymm_symm = True if asymm_max is not None and asymm_min is None: asymm_min = -asymm_max asymm_symm = False if asymm_min is not None and asymm_max is None: asymm_max = -asymm_min asymm_symm = False outdir = os.path.expanduser(os.path.expandvars(outdir)) mkdir(outdir) # Get the reference distribution(s) into the form of a test MapSet p_ref = None ref_source = None if isinstance(ref, Map): p_ref = MapSet(ref) ref_source = MAP_SOURCE_STR elif isinstance(ref, MapSet): p_ref = ref ref_source = MAPSET_SOURCE_STR elif isinstance(ref, Pipeline): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = PIPELINE_SOURCE_STR elif isinstance(ref, DistributionMaker): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(ref) == 1: try: ref_pipeline = Pipeline(config=ref[0]) except: pass else: ref_source = PIPELINE_SOURCE_STR if ref_param_selections is not None: ref_pipeline.select_params(ref_param_selections) p_ref = ref_pipeline.get_outputs() else: try: ref_dmaker = DistributionMaker(pipelines=ref) except: pass else: ref_source = DISTRIBUTIONMAKER_SOURCE_STR if ref_param_selections is not None: ref_dmaker.select_params(ref_param_selections) p_ref = ref_dmaker.get_outputs() if p_ref is None: try: p_ref = [Map.from_json(f) for f in ref] except: pass else: ref_source = MAP_SOURCE_STR p_ref = MapSet(p_ref) if p_ref is None: assert ref_param_selections is None assert len(ref) == 1, 'Can only handle one MapSet' try: p_ref = MapSet.from_json(ref[0]) except: pass else: ref_source = MAPSET_SOURCE_STR if p_ref is None: raise ValueError( 'Could not instantiate the reference Pipeline, DistributionMaker,' ' Map, or MapSet from ref value(s) %s' % ref) ref = p_ref logging.info('Reference map(s) derived from a ' + ref_source) # Get the test distribution(s) into the form of a test MapSet p_test = None test_source = None if isinstance(test, Map): p_test = MapSet(test) test_source = MAP_SOURCE_STR elif isinstance(test, MapSet): p_test = test test_source = MAPSET_SOURCE_STR elif isinstance(test, Pipeline): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = PIPELINE_SOURCE_STR elif isinstance(test, DistributionMaker): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(test) == 1: try: test_pipeline = Pipeline(config=test[0]) except: pass else: test_source = PIPELINE_SOURCE_STR if test_param_selections is not None: test_pipeline.select_params(test_param_selections) p_test = test_pipeline.get_outputs() else: try: test_dmaker = DistributionMaker(pipelines=test) except: pass else: test_source = DISTRIBUTIONMAKER_SOURCE_STR if test_param_selections is not None: test_dmaker.select_params(test_param_selections) p_test = test_dmaker.get_outputs() if p_test is None: try: p_test = [Map.from_json(f) for f in test] except: pass else: test_source = MAP_SOURCE_STR p_test = MapSet(p_test) if p_test is None: assert test_param_selections is None assert len(test) == 1, 'Can only handle one MapSet' try: p_test = MapSet.from_json(test[0]) except: pass else: test_source = MAPSET_SOURCE_STR if p_test is None: raise ValueError( 'Could not instantiate the test Pipeline, DistributionMaker, Map,' ' or MapSet from test value(s) %s' % test) test = p_test logging.info('Test map(s) derived from a ' + test_source) if combine is not None: ref = ref.combine_wildcard(combine) test = test.combine_wildcard(combine) if isinstance(ref, Map): ref = MapSet([ref]) if isinstance(test, Map): test = MapSet([test]) if sum is not None: ref = ref.sum(sum) test = test.sum(sum) # Set the MapSet names according to args passed by user ref.name = ref_label test.name = test_label # Save to disk the maps being plotted (excluding optional aboslute value # operations) if json: refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label) to_file(ref, refmaps_path) testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label) to_file(test, testmaps_path) if set(test.names) != set(ref.names): raise ValueError('Test map names %s do not match ref map names %s.' % (sorted(test.names), sorted(ref.names))) # Aliases to save keystrokes def masked(x): return np.ma.masked_invalid(x.nominal_values) def zero_to_nan(map): newmap = deepcopy(map) mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON) newmap.hist[mask] = np.nan return newmap reordered_test = [] new_ref = [] diff_maps = [] fract_diff_maps = [] asymm_maps = [] summary_stats = {} for ref_map in ref: test_map = test[ref_map.name].reorder_dimensions(ref_map.binning) if ref_abs: ref_map = abs(ref_map) if test_abs: test_map = abs(test_map) diff_map = test_map - ref_map fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map) asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5) abs_fract_diff_map = np.abs(fract_diff_map) new_ref.append(ref_map) reordered_test.append(test_map) diff_maps.append(diff_map) fract_diff_maps.append(fract_diff_map) asymm_maps.append(asymm_map) min_ref = np.min(masked(ref_map)) max_ref = np.max(masked(ref_map)) min_test = np.min(masked(test_map)) max_test = np.max(masked(test_map)) total_ref = np.sum(masked(ref_map)) total_test = np.sum(masked(test_map)) mean_ref = np.mean(masked(ref_map)) mean_test = np.mean(masked(test_map)) max_abs_fract_diff = np.max(masked(abs_fract_diff_map)) mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map)) median_abs_fract_diff = np.median(masked(abs_fract_diff_map)) mean_fract_diff = np.mean(masked(fract_diff_map)) min_fract_diff = np.min(masked(fract_diff_map)) max_fract_diff = np.max(masked(fract_diff_map)) std_fract_diff = np.std(masked(fract_diff_map)) mean_diff = np.mean(masked(diff_map)) min_diff = np.min(masked(diff_map)) max_diff = np.max(masked(diff_map)) std_diff = np.std(masked(diff_map)) median_diff = np.nanmedian(masked(diff_map)) mad_diff = np.nanmedian(masked(np.abs(diff_map))) median_fract_diff = np.nanmedian(masked(fract_diff_map)) mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map))) min_asymm = np.min(masked(fract_diff_map)) max_asymm = np.max(masked(fract_diff_map)) total_asymm = np.sqrt(np.sum(masked(asymm_map)**2)) summary_stats[test_map.name] = OrderedDict([ ('min_ref', min_ref), ('max_ref', max_ref), ('total_ref', total_ref), ('mean_ref', mean_ref), ('min_test', min_test), ('max_test', max_test), ('total_test', total_test), ('mean_test', mean_test), ('max_abs_fract_diff', max_abs_fract_diff), ('mean_abs_fract_diff', mean_abs_fract_diff), ('median_abs_fract_diff', median_abs_fract_diff), ('min_fract_diff', min_fract_diff), ('max_fract_diff', max_fract_diff), ('mean_fract_diff', mean_fract_diff), ('std_fract_diff', std_fract_diff), ('median_fract_diff', median_fract_diff), ('mad_fract_diff', mad_fract_diff), ('min_diff', min_diff), ('max_diff', max_diff), ('mean_diff', mean_diff), ('std_diff', std_diff), ('median_diff', median_diff), ('mad_diff', mad_diff), ('min_asymm', min_asymm), ('max_asymm', max_asymm), ('total_asymm', total_asymm), ]) logging.info('Map %s...', ref_map.name) logging.info(' Ref map(s):') logging.info(' min :' + ('%.2f' % min_ref).rjust(12)) logging.info(' max :' + ('%.2f' % max_ref).rjust(12)) logging.info(' total :' + ('%.2f' % total_ref).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_ref).rjust(12)) logging.info(' Test map(s):') logging.info(' min :' + ('%.2f' % min_test).rjust(12)) logging.info(' max :' + ('%.2f' % max_test).rjust(12)) logging.info(' total :' + ('%.2f' % total_test).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_test).rjust(12)) logging.info(' Absolute fract. diff., abs((Test - Ref) / Ref):') logging.info(' max : %.4e', max_abs_fract_diff) logging.info(' mean : %.4e', mean_abs_fract_diff) logging.info(' median: %.4e', median_abs_fract_diff) logging.info(' Fractional difference, (Test - Ref) / Ref:') logging.info(' min : %.4e', min_fract_diff) logging.info(' max : %.4e', max_fract_diff) logging.info(' mean : %.4e +/- %.4e', mean_fract_diff, std_fract_diff) logging.info(' median: %.4e +/- %.4e', median_fract_diff, mad_fract_diff) logging.info(' Difference, Test - Ref:') logging.info(' min : %.4e', min_diff) logging.info(' max : %.4e', max_diff) logging.info(' mean : %.4e +/- %.4e', mean_diff, std_diff) logging.info(' median: %.4e +/- %.4e', median_diff, mad_diff) logging.info(' Asymmetry, (Test - Ref) / sqrt(Ref)') logging.info(' min : %.4e', min_asymm) logging.info(' max : %.4e', max_asymm) logging.info(' total : %.4e (sum in quadrature)', total_asymm) logging.info('') ref = MapSet(new_ref) test = MapSet(reordered_test) diff = MapSet(diff_maps) fract_diff = MapSet(fract_diff_maps) asymm = MapSet(asymm_maps) if json: diff.to_json( os.path.join( outdir, 'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) fract_diff.to_json( os.path.join( outdir, 'fract_diff__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) asymm.to_json( os.path.join( outdir, 'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) to_file( summary_stats, os.path.join( outdir, 'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) for plot_format in plot_formats: # Plot the raw distributions plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=False, ratio=False) plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label) plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label) # Plot the difference (test - ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=diff_symm, ratio=False) plotter.label = '%s - %s' % (test_plot_label, ref_plot_label) plotter.plot_2d_array( test - ref, fname='diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=diff_min, vmax=diff_max ) # Plot the fractional difference (test - ref)/ref plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=fract_diff_symm, ratio=True) plotter.label = ('(%s-%s)/%s' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r) for r in ref]), fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=fract_diff_min, vmax=fract_diff_max ) # Plot the asymmetry (test - ref)/sqrt(ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=asymm_symm, ratio=True) plotter.label = (r'$(%s - %s)/\sqrt{%s}$' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]), fname='asymm__%s__%s' % (test_plot_label, ref_plot_label), #vmin=asymm_min, vmax=asymm_max ) return summary_stats, diff, fract_diff, asymm
def compare_pisa_self(config1, config2, testname1, testname2, outdir): """Compare baseline output of PISA 3 with a different version of itself""" logging.debug('>> Comparing %s with %s (both PISA)' % (testname1, testname2)) pipeline1 = Pipeline(config1) outputs1 = pipeline1.get_outputs() pipeline2 = Pipeline(config2) outputs2 = pipeline2.get_outputs() if '5-stage' in testname1: cake1_trck_map = outputs1.combine_wildcard('*_trck') cake1_cscd_map = outputs1.combine_wildcard('*_cscd') cake1_trck_map_to_plot = {} cake1_trck_map_to_plot['ebins'] = \ cake1_trck_map.binning['reco_energy'].bin_edges.magnitude cake1_trck_map_to_plot['czbins'] = \ cake1_trck_map.binning['reco_coszen'].bin_edges.magnitude cake1_trck_map_to_plot['map'] = cake1_trck_map.hist cake1_trck_events = np.sum(cake1_trck_map_to_plot['map']) cake1_cscd_map_to_plot = {} cake1_cscd_map_to_plot['ebins'] = \ cake1_cscd_map.binning['reco_energy'].bin_edges.magnitude cake1_cscd_map_to_plot['czbins'] = \ cake1_cscd_map.binning['reco_coszen'].bin_edges.magnitude cake1_cscd_map_to_plot['map'] = cake1_cscd_map.hist cake1_cscd_events = np.sum(cake1_cscd_map_to_plot['map']) elif '4-stage' in testname1: cake1_both_map = outputs1.combine_wildcard('*') cake1_trck_map_to_plot = {} cake1_trck_map_to_plot['ebins'] = \ cake1_both_map.binning['reco_energy'].bin_edges.magnitude cake1_trck_map_to_plot['czbins'] = \ cake1_both_map.binning['reco_coszen'].bin_edges.magnitude cake1_trck_map_to_plot['map'] = \ cake1_both_map.split( dim='pid', bin='trck' ).hist cake1_trck_events = np.sum(cake1_trck_map_to_plot['map']) cake1_cscd_map_to_plot = {} cake1_cscd_map_to_plot['ebins'] = \ cake1_both_map.binning['reco_energy'].bin_edges.magnitude cake1_cscd_map_to_plot['czbins'] = \ cake1_both_map.binning['reco_coszen'].bin_edges.magnitude cake1_cscd_map_to_plot['map'] = \ cake1_both_map.split( dim='pid', bin='cscd' ).hist cake1_cscd_events = np.sum(cake1_cscd_map_to_plot['map']) else: raise ValueError("Should be comparing 4-stage or 5-stage PISAs.") if '5-stage' in testname2: cake2_trck_map = outputs2.combine_wildcard('*_trck') cake2_cscd_map = outputs2.combine_wildcard('*_cscd') cake2_trck_map_to_plot = {} cake2_trck_map_to_plot['ebins'] = \ cake2_trck_map.binning['reco_energy'].bin_edges.magnitude cake2_trck_map_to_plot['czbins'] = \ cake2_trck_map.binning['reco_coszen'].bin_edges.magnitude cake2_trck_map_to_plot['map'] = cake2_trck_map.hist cake2_trck_events = np.sum(cake2_trck_map_to_plot['map']) cake2_cscd_map_to_plot = {} cake2_cscd_map_to_plot['ebins'] = \ cake2_cscd_map.binning['reco_energy'].bin_edges.magnitude cake2_cscd_map_to_plot['czbins'] = \ cake2_cscd_map.binning['reco_coszen'].bin_edges.magnitude cake2_cscd_map_to_plot['map'] = cake2_cscd_map.hist cake2_cscd_events = np.sum(cake2_cscd_map_to_plot['map']) elif '4-stage' in testname2: cake2_both_map = outputs2.combine_wildcard('*') cake2_trck_map_to_plot = {} cake2_trck_map_to_plot['ebins'] = \ cake2_both_map.binning['reco_energy'].bin_edges.magnitude cake2_trck_map_to_plot['czbins'] = \ cake2_both_map.binning['reco_coszen'].bin_edges.magnitude cake2_trck_map_to_plot['map'] = \ cake2_both_map.split( dim='pid', bin='trck' ).hist cake2_trck_events = np.sum(cake2_trck_map_to_plot['map']) cake2_cscd_map_to_plot = {} cake2_cscd_map_to_plot['ebins'] = \ cake2_both_map.binning['reco_energy'].bin_edges.magnitude cake2_cscd_map_to_plot['czbins'] = \ cake2_both_map.binning['reco_coszen'].bin_edges.magnitude cake2_cscd_map_to_plot['map'] = \ cake2_both_map.split( dim='pid', bin='cscd' ).hist cake2_cscd_events = np.sum(cake2_cscd_map_to_plot['map']) else: raise ValueError("Should be comparing 4-stage or 5-stage PISAs.") max_diff_ratio, max_diff = plot_comparisons(ref_map=cake1_trck_map_to_plot, new_map=cake2_trck_map_to_plot, ref_abv=testname1, new_abv=testname2, outdir=outdir, subdir='recopidcombinedchecks', stagename=None, servicename='recopid', name='trck', texname=r'\rm{trck}', shorttitles=True, ftype=FMT) max_diff_ratio, max_diff = plot_comparisons(ref_map=cake1_cscd_map_to_plot, new_map=cake2_cscd_map_to_plot, ref_abv=testname1, new_abv=testname2, outdir=outdir, subdir='recopidcombinedchecks', stagename=None, servicename='recopid', name='cscd', texname=r'\rm{cscd}', shorttitles=True, ftype=FMT) print_event_rates(testname1=testname1, testname2=testname2, kind='trck', map1_events=cake1_trck_events, map2_events=cake2_trck_events) print_event_rates(testname1=testname1, testname2=testname2, kind='cscd', map1_events=cake1_cscd_events, map2_events=cake2_cscd_events) print_event_rates(testname1=testname1, testname2=testname2, kind='all', map1_events=cake1_trck_events + cake1_cscd_events, map2_events=cake2_trck_events + cake2_cscd_events) return pipeline2
def compare_4stage(config, testname, outdir, oscfitfile): """ Compare 4 stage output of PISA 3 with OscFit. """ logging.debug('>> Working on baseline comparisons between both fitters.') logging.debug('>>> Doing %s test.' % testname) baseline_comparisons = from_file(oscfitfile) ref_abv = 'OscFit' pipeline = Pipeline(config) outputs = pipeline.get_outputs() total_pisa_events = 0.0 total_oscfit_events = 0.0 for nukey in baseline_comparisons.keys(): baseline_map_to_plot = baseline_comparisons[nukey] oscfit_events = np.sum(baseline_map_to_plot['map']) cake_map = outputs.combine_wildcard('*') cake_map_to_plot = {} cake_map_to_plot['ebins'] = \ cake_map.binning['reco_energy'].bin_edges.magnitude cake_map_to_plot['czbins'] = \ cake_map.binning['reco_coszen'].bin_edges.magnitude if nukey == 'trck': texname = r'\rm{trck}' cake_map_to_plot['map'] = \ cake_map.split( dim='pid', bin='trck' ).hist elif nukey == 'cscd': texname = r'\rm{cscd}' cake_map_to_plot['map'] = \ cake_map.split( dim='pid', bin='cscd' ).hist pisa_events = np.sum(cake_map_to_plot['map']) max_diff_ratio, max_diff = plot_comparisons( ref_map=baseline_map_to_plot, new_map=cake_map_to_plot, ref_abv=ref_abv, new_abv=testname, outdir=outdir, subdir='recopidcombinedchecks', stagename=None, servicename='baseline', name=nukey, texname=texname, shorttitles=True, ftype=FMT) print_event_rates(testname1=testname, testname2='OscFit', kind=nukey, map1_events=pisa_events, map2_events=oscfit_events) total_pisa_events += pisa_events total_oscfit_events += oscfit_events print_event_rates(testname1=testname, testname2='OscFit', kind='all', map1_events=total_pisa_events, map2_events=total_oscfit_events) return pipeline
def _compute_outputs(self, inputs=None): """Compute histograms for output channels.""" logging.debug('Entering roounfold._compute_outputs') self.fit_hash = deepcopy(inputs.metadata['fit_hash']) logging.trace('{0} roounfold fit_hash = ' '{1}'.format(inputs.metadata['name'], self.fit_hash)) if self.random_state is not None: logging.trace('{0} roounfold random_state = ' '{1}'.format(inputs.metadata['name'], hash_obj( self.random_state.get_state()))) if not isinstance(inputs, Data): raise AssertionError('inputs is not a Data object, instead is ' 'type {0}'.format(type(inputs))) self._data = inputs if not self.params['return_eff'].value: if len(self.output_names) > 1: raise AssertionError( 'Specified more than one NuFlavIntGroup as ' 'signal, {0}'.format(self.output_names)) self.output_str = str(self.output_names[0]) real_data = self.params['real_data'].value if real_data: logging.debug('Using real data') if 'nuall' not in self._data: raise AssertionError( 'When using real data, input Data object must contain ' 'only one element "nuall" containing the data, instead it ' 'contains elements {0}'.format(self._data.keys())) if self.disk_cache is None: raise AssertionError( 'No disk_cache specified from which to load - using real ' 'data requires object such as the response object to be ' 'cached to disk.') if self.params['optimize_reg'].value and real_data: raise AssertionError( 'Cannot optimize the regularation if using real data.') if int(self.params['stat_fluctuations'].m) != 0 and real_data: raise AssertionError( 'Cannot do poisson fluctuations if using real data.') if self.params['return_eff'].value and real_data: raise AssertionError( 'Not implemented return of efficiency maps if using real data.' ) if self.params['return_eff'].value: fin_data = self._data # Load generator level data for signal unfold_pipeline_cfg = self.params['unfold_pipeline_cfg'].value pipeline_cfg = from_file(unfold_pipeline_cfg) template_maker = Pipeline(pipeline_cfg) gen_data = template_maker.get_outputs() fin_data = fin_data.transform_groups(self.output_names) gen_data = gen_data.transform_groups(self.output_names) efficiencies = [] assert set(fin_data.keys()) == set(gen_data.keys()) for fig in fin_data.keys(): figd_f = fin_data[fig] figd_g = gen_data[fig] inv_eff = self._get_inv_eff(figd_f, figd_g, self.true_binning, fig) i_mask = ~(inv_eff == 0.) eff = unp.uarray(np.zeros(self.true_binning.shape), np.zeros(self.true_binning.shape)) eff[i_mask] = 1. / inv_eff[i_mask] efficiencies.append( Map(name=fig, hist=eff, binning=self.true_binning)) return MapSet(efficiencies) # TODO(shivesh): [ TRACE] None of the selections ['iron', 'nh'] found in this pipeline. # TODO(shivesh): Fix "smearing_matrix" memory leak # TODO(shivesh): Fix unweighted unfolding # TODO(shivesh): different algorithms # TODO(shivesh): implement handling of 0 division inside Map objects if real_data: unfold_map = self.unfold_real_data() else: unfold_map = self.unfold_mc() return MapSet([unfold_map])
def main(): global SIGMA args = vars(parse_args()) set_verbosity(args.pop('v')) center_zero = args.pop('center_zero') make_pdf = False if args['pdf']: make_pdf = True args['pdf'] = False outdir = args.pop('outdir') fileio.mkdir(outdir, mode=0755) SIGMA *= args.pop('sigma') cfx_pipe = Pipeline(args.pop('cfx_pipeline')) signal = args.pop('signal').replace(' ', '').split(',') output_str = [] for name in signal: if 'muons' in name or 'noise' in name: raise AssertionError('Are you trying to unfold muons/noise?') elif 'all_nu' in name: output_str = [str(NuFlavIntGroup(f)) for f in ALL_NUFLAVINTS] else: output_str.append(NuFlavIntGroup(name)) output_str = [str(f) for f in output_str] cfx_pipe._output_names = output_str # Turn off stat fluctuations stat_param = cfx_pipe.params['stat_fluctuations'] stat_param.value = 0 * ureg.dimensionless cfx_pipe.update_params(stat_param) # Get nominal Map re_param = cfx_pipe.params['regularisation'] re_param.value = 0 * ureg.dimensionless cfx_pipe.update_params(re_param) nom_out = cfx_pipe.get_outputs() re_param.reset() cfx_pipe.update_params(re_param) params = ParamSet() for param in cfx_pipe.params: if param.name != 'dataset': params.extend(param) free = params.free logging.info('Free params = {0}'.format(free)) contin = True for f in free: if 'hole_ice' not in f.name and 'dom_eff' not in f.name: continue # if 'atm_muon_scale' in f.name: # contin = False # if contin: # continue logging.info('Working on parameter {0}'.format(f.name)) if f.prior.kind != 'uniform': # Use deltaLLH = SIGMA to define +/- sigma for non-uniform scan_over = np.linspace(*f.range, num=1000) * f.range[0].u llh = f.prior.llh(scan_over) dllh = llh - np.min(-llh) mllh_idx = np.argmin(-llh) if mllh_idx == 0: l_sig_idx = 0 else: l_sig_idx = np.argmin(np.abs(dllh[:mllh_idx] - SIGMA)) u_sig_idx = np.argmin(np.abs(dllh[mllh_idx:] - SIGMA)) + mllh_idx l_sigma = scan_over[l_sig_idx] u_sigma = scan_over[u_sig_idx] else: l_sigma = f.range[0] u_sigma = f.range[1] logging.info('Setting {0} lower sigma bound to ' '{1}'.format(f.name, l_sigma)) f.value = l_sigma cfx_pipe.update_params(f) l_out = cfx_pipe.get_outputs() logging.info('Setting {0} upper sigma bound to ' '{1}'.format(f.name, u_sigma)) f.value = u_sigma cfx_pipe.update_params(f) u_out = cfx_pipe.get_outputs() f.reset() cfx_pipe.update_params(f) f_outdir = outdir + '/' + f.name l_outdir = f_outdir + '/' + 'lower' u_outdir = f_outdir + '/' + 'upper' fileio.mkdir(f_outdir) fileio.mkdir(l_outdir) fileio.mkdir(u_outdir) compare(outdir=l_outdir, ref=MapSet([nom_out]), ref_label='baseline', test=MapSet([l_out]), test_label=r'-sigma', **args) compare(outdir=u_outdir, ref=MapSet([nom_out]), ref_label='baseline', test=MapSet([u_out]), test_label=r'+sigma', **args) l_in_mapset = l_outdir + '/' + 'fract_diff__-sigma___baseline.json.bz2' u_in_mapset = u_outdir + '/' + 'fract_diff__+sigma___baseline.json.bz2' l_in_map = MapSet.from_json(l_in_mapset).pop() * 100. u_in_map = MapSet.from_json(u_in_mapset).pop() * 100. if make_pdf: outfile = f_outdir + '/systematic_effect.pdf' else: outfile = f_outdir + '/systematic_effect.png' title = r'% effect on ' + r'${0}$'.format(l_in_map.tex) + \ ' event counts for {0} parameter'.format(f.name) sub_titles = (r'(-\sigma - {\rm baseline}) \:/\: {\rm baseline}', r'(+\sigma - {\rm baseline}) \:/\: {\rm baseline}') make_plot( maps=(l_in_map, u_in_map), outfile=outfile, logv=False, center_zero=center_zero, vlabel=r'({\rm change} - {\rm baseline}) \:/\: {\rm baseline} (%)', title=title, sub_titles=sub_titles)