def sampleHypercube(n_dim, n_samp, rand_set_id=0, crit='m', iterations=5, rdata_dir='~/cowen/data/random'): """Load (if file exists) or generate samples from within hypercube using Latin hypercube sampling Requires pyDOE to generate new samples. """ fname = samplesFilename(n_dim=n_dim, n_samp=n_samp, rand_set_id=rand_set_id, crit=crit, iterations=iterations) rdata_dir = os.path.expandvars(os.path.expanduser(rdata_dir)) fpath = os.path.join(rdata_dir, fname) if os.path.exists(fpath): samps = fileio.from_file(fpath) else: logging.info('File not found. Generating new set of samples & saving' ' result to "%s"', fpath) import pyDOE mkdir(rdata_dir) # Set a deterministic random state based upon the critical hypercube # sampling parameters specified n_bad_seeds(n_dim, n_samp, rand_set_id) samps = pyDOE.lhs(n=n_dim, samples=n_samp, criterion=crit, iterations=iterations) fileio.to_file(samps, fpath) return samps
def main(return_outputs=False): """Main; call as script with `return_outputs=False` or interactively with `return_outputs=True`""" from pisa.utils.plotter import Plotter args = parse_args() set_verbosity(args.v) plot_formats = [] if args.pdf: plot_formats.append('pdf') if args.png: plot_formats.append('png') distribution_maker = DistributionMaker(pipelines=args.pipeline) # pylint: disable=redefined-outer-name if args.select is not None: distribution_maker.select_params(args.select) outputs = distribution_maker.get_outputs(return_sum=args.return_sum) # pylint: disable=redefined-outer-name if args.outdir: # TODO: unique filename: append hash (or hash per pipeline config) fname = 'distribution_maker_outputs.json.bz2' mkdir(args.outdir) fpath = expand(os.path.join(args.outdir, fname)) to_file(outputs, fpath) if args.outdir and plot_formats: my_plotter = Plotter(outdir=args.outdir, fmt=plot_formats, log=False, annotate=False) for num, output in enumerate(outputs): my_plotter.plot_2d_array(output, fname='dist_output_%d' % num) if return_outputs: return distribution_maker, outputs
def save_hyperplane_fits(input_data, fit_results, outdir, tag): """Store discrete systematics fits and chi-square values to a specified output location, with results identified by a tag. Parameters ---------- input_data : mapping input data container returned by `hyperplane` function fit_results : dict fit results data container returned by `hyperplane` function outdir : string output directory tag : string identifier for filenames holding fit results """ # Get some strings to use when naming dim = len(input_data["param_names"]) param_str = "_".join(input_data["param_names"]) # Store as JSON mkdir(outdir) res_path = join( outdir, "%s__%dd__%s__hyperplane_fits.json" % (tag, dim, param_str)) to_file(fit_results, res_path)
def save(self, fpath, ver=None, **kwargs): """Save cross sections (and the energy specification) to a file at `fpath`.""" if ver is None: if self._ver is None: raise ValueError( 'Either a ver must be specified in call to `save` or it ' 'must have been set prior to the invocation of `save`.' ) ver = self._ver else: assert ver == self._ver try: fpath = find_resource(fpath) except IOError: pass fpath = os.path.expandvars(os.path.expanduser(fpath)) all_xs = {} # Get any existing data from file if os.path.exists(fpath): all_xs = from_file(fpath) # Validate existing data by instantiating objects from each for v, d in all_xs.items(): CrossSections(ver=v, energy=d['energy'], xsec=d['xsec']) if ver in all_xs: logging.warning('Overwriting existing version "' + ver + '" in file ' + fpath) all_xs[ver] = {'xsec':self, 'energy':self.energy} to_file(all_xs, fpath, **kwargs)
def saveFile(self, filename): """ Write Fisher matrix to json file """ dict_to_write = {} dict_to_write['matrix'] = self.matrix dict_to_write['parameters'] = self.parameters dict_to_write['best_fits'] = self.best_fits dict_to_write['labels'] = self.labels dict_to_write['priors'] = self.priors to_file(dict_to_write, filename)
def main(return_outputs=False): """Main; call as script with `return_outputs=False` or interactively with `return_outputs=True`""" from pisa.utils.plotter import Plotter args = parse_args() set_verbosity(args.v) plot_formats = [] if args.pdf: plot_formats.append('pdf') if args.png: plot_formats.append('png') detectors = Detectors(args.pipeline,shared_params=args.shared_params) Names = detectors.det_names if args.select is not None: detectors.select_params(args.select) outputs = detectors.get_outputs(return_sum=args.return_sum) #outputs = outputs[0].fluctuate( # method='poisson', random_state=get_random_state([0, 0, 0])) if args.outdir: # TODO: unique filename: append hash (or hash per pipeline config) fname = 'detectors_outputs.json.bz2' mkdir(args.outdir) fpath = expand(os.path.join(args.outdir, fname)) to_file(outputs, fpath) if args.outdir and plot_formats: my_plotter = Plotter( outdir=args.outdir, fmt=plot_formats, log=False, annotate=False ) for num, output in enumerate(outputs): if args.return_sum: my_plotter.plot_2d_array( output, fname=Names[num] ) else: for out in output: my_plotter.plot_2d_array( out, fname=Names[num] ) if return_outputs: return detectors, outputs
def main(): """Perform a hypersurface fit to discrete systematics sets.""" # Get args args = parse_args() set_verbosity(args.v) # Read in data and fit hypersurfaces to it hypersurfaces = create_hypersurfaces(fit_cfg=args.fit_cfg) # Store as JSON mkdir(args.outdir) arbitrary_hypersurface = list(hypersurfaces.values())[0] output_path = join( args.outdir, get_hypersurface_file_name(arbitrary_hypersurface, args.tag) ) to_file(hypersurfaces, output_path)
def assemble_interpolated_fits(fit_directory, output_file, drop_fit_maps=False): """After all of the fits on the cluster are done, assemble the results to one JSON. The JSON produced by this function is what `load_interpolated_hypersurfaces` expects. """ assert os.path.isdir(fit_directory), "fit directory does not exist" metadata = from_json(os.path.join(fit_directory, "metadata.json")) combined_data = collections.OrderedDict() combined_data["interpolation_param_spec"] = metadata[ "interpolation_param_spec"] # Loop over grid points hs_fits = [] grid_shape = tuple(metadata["grid_shape"]) for job_idx, grid_idx in enumerate(np.ndindex(grid_shape)): # Load grid point data gridpoint_json = os.path.join(fit_directory, f"gridpoint_{job_idx:06d}.json.bz2") logging.info(f"Reading {gridpoint_json}") gridpoint_data = from_json(gridpoint_json) # Check the loaded data assert job_idx == gridpoint_data["job_idx"] assert np.all(grid_idx == gridpoint_data["grid_idx"]) # TODO: Offer to run incomplete fits locally assert gridpoint_data[ "fit_successful"], f"job no. {job_idx} not finished" # Drop fit maps if requested (can significantly reduce file size) if drop_fit_maps: for key, hs_state in gridpoint_data["hs_fit"].items(): hs_state["fit_maps_raw"] = None hs_state["fit_maps_norm"] = None # Add grid point data to output file hs_fits.append( collections.OrderedDict( param_values=gridpoint_data["param_values"], hs_fit=gridpoint_data["hs_fit"])) # Write the output file combined_data["hs_fits"] = hs_fits to_file(combined_data, output_file)
def stability_test(func, func_kw, ref_path, ignore_fails=False, define_as_ref=False): """basic stability test of a Numba CPUDispatcher function (i.e., function compiled via @jit / @njit)""" func_name = func.py_func.__name__ logging.info("stability testing `%s`", func_name) ref_path = expand(ref_path) test = execute_func(func=func, func_kw=func_kw) if define_as_ref: to_file(test, ref_path) # Even when we define the test case as ref, round-trip to/from file to # ensure that doesn't corrupt the values ref = from_file(ref_path) check(test=test, ref=ref, label=func_name, ignore_fails=ignore_fails) return test, ref
def store_kernels(self, filename, fmt=None): """Store reconstruction kernels to file""" fileio.to_file(self.kernels, filename, fmt=fmt)
def main(): args = parse_args() set_verbosity(args.v) if args.plot: import matplotlib as mpl mpl.use('pdf') import matplotlib.pyplot as plt from pisa.utils.plotter import Plotter cfg = from_file(args.fit_settings) sys_list = cfg.get('general', 'sys_list').replace(' ', '').split(',') stop_idx = cfg.getint('general', 'stop_after_stage') for sys in sys_list: # Parse info for given systematic nominal = cfg.getfloat(sys, 'nominal') degree = cfg.getint(sys, 'degree') force_through_nominal = cfg.getboolean(sys, 'force_through_nominal') runs = eval(cfg.get(sys, 'runs')) #print "runs ", runs smooth = cfg.get(sys, 'smooth') x_values = np.array(sorted(runs)) # Build fit function if force_through_nominal: function = "lambda x, *p: np.polynomial.polynomial.polyval(x, [1.] + list(p))" else: function = "lambda x, *p: np.polynomial.polynomial.polyval(x, list(p))" # Add free parameter for constant term degree += 1 fit_fun = eval(function) # Instantiate template maker template_maker = Pipeline(args.template_settings) if not args.set_param == '': for one_set_param in args.set_param: p_name, value = one_set_param.split("=") #print "p_name,value= ", p_name, " ", value value = parse_quantity(value) value = value.n * value.units param = template_maker.params[p_name] #print "old ", p_name, "value = ", param.value param.value = value #print "new ", p_name, "value = ", param.value template_maker.update_params(param) inputs = {} map_names = None # Get sys templates for run in runs: for key, val in cfg.items('%s:%s'%(sys, run)): if key.startswith('param.'): _, pname = key.split('.') param = template_maker.params[pname] try: value = parse_quantity(val) param.value = value.n * value.units except ValueError: value = parse_string_literal(val) param.value = value param.set_nominal_to_current_value() template_maker.update_params(param) # Retreive maps template = template_maker.get_outputs(idx=stop_idx) if map_names is None: map_names = [m.name for m in template] inputs[run] = {} for m in template: inputs[run][m.name] = m.hist # Numpy acrobatics: arrays = {} for name in map_names: arrays[name] = [] for x in x_values: arrays[name].append( inputs[x][name] / unp.nominal_values(inputs[nominal][name]) ) a = np.array(arrays[name]) arrays[name] = np.rollaxis(a, 0, len(a.shape)) # Shift to get deltas x_values -= nominal # Binning object (assuming they're all the same) binning = template.maps[0].binning shape = [d.num_bins for d in binning] + [degree] shape_small = [d.num_bins for d in binning] outputs = {} errors = {} for name in map_names: # Now actualy perform some fits outputs[name] = np.ones(shape) errors[name] = np.ones(shape) for idx in np.ndindex(*shape_small): y_values = unp.nominal_values(arrays[name][idx]) y_sigma = unp.std_devs(arrays[name][idx]) if np.any(y_sigma): popt, pcov = curve_fit(fit_fun, x_values, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_fun, x_values, y_values, p0=np.ones(degree)) perr = np.sqrt(np.diag(pcov)) for k, p in enumerate(popt): outputs[name][idx][k] = p errors[name][idx][k] = perr[k] # TODO(philippeller): the below block of code will fail # Maybe plot #if args.plot: # fig_num = i + nx * j # if fig_num == 0: # fig = plt.figure(num=1, figsize=( 4*nx, 4*ny)) # subplot_idx = nx*(ny-1-j)+ i + 1 # plt.subplot(ny, nx, subplot_idx) # #plt.snameter(x_values, y_values, color=plt_colors[name]) # plt.gca().errorbar(x_values, y_values, yerr=y_sigma, # fmt='o', color=plt_colors[name], # ecolor=plt_colors[name], # mec=plt_colors[name]) # # Plot nominal point again in black # plt.snameter([0.0], [1.0], color='k') # f_values = fit_fun(x_values, *popt) # fun_plot, = plt.plot(x_values, f_values, # color=plt_colors[name]) # plt.ylim(np.min(unp.nominal_values(arrays[name]))*0.9, # np.max(unp.nominal_values(arrays[name]))*1.1) # if i > 0: # plt.setp(plt.gca().get_yticklabels(), visible=False) # if j > 0: # plt.setp(plt.gca().get_xticklabels(), visible=False) if smooth == 'gauss': for name in map_names: for d in range(degree): outputs[name][...,d] = gaussian_filter(outputs[name][...,d],sigma=1) if smooth == 'gauss_pid': for name in map_names: split_idx = binning.names.index('pid') tot = len(binning)-1 for d in range(degree): for p in range(len(binning['pid'])): outputs[name][...,p,d] = gaussian_filter( np.swapaxes(outputs[name], split_idx, tot)[...,p,d], sigma=1 ) outputs[name] = np.swapaxes(outputs[name], split_idx, tot) # Save the raw ones anyway outputs['pname'] = sys outputs['nominal'] = nominal outputs['function'] = function outputs['map_names'] = map_names outputs['binning_hash'] = binning.hash to_file(outputs, '%s/%s_sysfits_%s_%s.json'%(args.out_dir, sys, args.tag, smooth)) if args.plot: for d in range(degree): maps = [] for name in map_names: maps.append(Map(name='%s_raw'%name, hist=outputs[name][...,d], binning=binning)) maps = MapSet(maps) my_plotter = Plotter( stamp='', outdir=args.out_dir, fmt='pdf', log=False, label='' ) my_plotter.plot_2d_array( maps, fname='%s_%s_%s_%s'%(sys, args.tag, d, smooth), )
def _compute_nominal_transforms(self): self.load_events(self.params.aeff_events) self.cut_events(self.params.transform_events_keep_criteria) # Units must be the following for correctly converting a sum-of- # OneWeights-in-bin to an average effective area across the bin. comp_units = dict(true_energy='GeV', true_coszen=None, true_azimuth='rad') # Select only the units in the input/output binning for conversion # (can't pass more than what's actually there) in_units = {dim: unit for dim, unit in comp_units.items() if dim in self.input_binning} # TODO: use out_units for some kind of conversion? #out_units = {dim: unit for dim, unit in comp_units.items() # if dim in self.output_binning} # These will be in the computational units input_binning = self.input_binning.to(**in_units) # Account for "missing" dimension(s) (dimensions OneWeight expects for # computation of bin volume), and accommodate with a factor equal to # the full range. See IceCube wiki/documentation for OneWeight for # more info. missing_dims_vol = 1 if 'true_azimuth' not in input_binning: missing_dims_vol *= 2*np.pi if 'true_coszen' not in input_binning: missing_dims_vol *= 2 if bool(self.debug_mode): outdir = os.path.join(find_resource('debug'), self.stage_name, self.service_name) mkdir(outdir) #hex_hash = hash2hex(kde_hash) bin_volumes = input_binning.bin_volumes(attach_units=False) norm_volumes = bin_volumes * missing_dims_vol nominal_transforms = [] for xform_flavints in self.transform_groups: logging.debug('Working on %s effective areas xform', xform_flavints) aeff_transform = self.events.histogram( kinds=xform_flavints, binning=input_binning, weights_col='weighted_aeff', errors=(self.error_method not in [None, False]) ) aeff_transform = aeff_transform.hist # Divide histogram by # (energy bin width x coszen bin width x azimuth bin width) # volumes to convert from sums-of-OneWeights-in-bins to # effective areas. Note that volume correction factor for # missing dimensions is applied here. aeff_transform /= norm_volumes if self.debug_mode: outfile = os.path.join( outdir, 'aeff_' + str(xform_flavints) + '.pkl' ) to_file(aeff_transform, outfile) nominal_transforms.extend( populate_transforms( service=self, xform_flavints=xform_flavints, xform_array=aeff_transform ) ) return TransformSet(transforms=nominal_transforms)
def make_toy_events(outdir, num_events, energy_range, spectral_index, coszen_range, num_sets, first_set, aeff_energy_param, aeff_coszen_param, reco_param, pid_param, pid_dist): """Make toy events and store to a file. Parameters ---------- outdir : string num_events : int energy_range : 2-tuple of floats spectral_index : float coszen_range : 2-tuple of floats num_sets : int first_set : int aeff_energy_param : string aeff_coszen_param : string reco_param : string pid_param : string pid_dist : string Returns ------- events : :class:`pisa.core.events.Events` """ energy_range = sorted(energy_range) coszen_range = sorted(coszen_range) # Validation of args assert energy_range[0] > 0 and energy_range[1] < 1e9 assert coszen_range[0] >= -1 and coszen_range[1] <= 1 assert np.diff(energy_range)[0] > 0, str(energy_range) assert np.diff(coszen_range)[0] > 0, str(coszen_range) assert spectral_index >= 0, str(spectral_index) assert first_set >= 0, str(first_set) assert num_sets >= 1, str(first_set) # Make sure resources specified actually exist for arg in [aeff_energy_param, aeff_coszen_param, reco_param, pid_param]: find_resource(arg) mkdir(outdir, warn=False) set_indices = list(range(first_set, first_set + num_sets)) # The following loop is for validation only for num, index in product(num_events, set_indices): mcgen_random_state(num_events=num, set_index=index) for num, set_index in product(num_events, set_indices): mcevts_fname = FNAME_TEMPLATE.format( file_type='events', detector='vlvnt', e_min=format_num(energy_range[0]), e_max=format_num(energy_range[1]), spectral_index=format_num(spectral_index, sigfigs=2, trailing_zeros=True), cz_min=format_num(coszen_range[0]), cz_max=format_num(coszen_range[1]), num_events=format_num(num, sigfigs=3, sci_thresh=(1, -1)), set_index=format_num(set_index, sci_thresh=(10, -10)), extension='hdf5') mcevts_fpath = os.path.join(outdir, mcevts_fname) if os.path.isfile(mcevts_fpath): logging.warn('File already exists, skipping: "%s"', mcevts_fpath) continue logging.info('Working on set "%s"', mcevts_fname) # TODO: pass filepaths / resource locations via command line args # Create a single random state object to pass from function to function random_state = mcgen_random_state(num_events=num, set_index=set_index) mc_events = generate_mc_events( num_events=num, energy_range=energy_range, coszen_range=coszen_range, spec_ind=spectral_index, aeff_energy_param_source=aeff_energy_param, aeff_coszen_param_source=aeff_coszen_param, random_state=random_state) populate_reco_observables(mc_events=mc_events, param_source=reco_param, random_state=random_state) populate_pid(mc_events=mc_events, param_source=pid_param, random_state=random_state, dist=pid_dist) to_file(mc_events, mcevts_fpath) return mc_events
def compare(outdir, ref, ref_label, test, test_label, asymm_max=None, asymm_min=None, combine=None, diff_max=None, diff_min=None, fract_diff_max=None, fract_diff_min=None, json=False, pdf=False, png=False, ref_abs=False, ref_param_selections=None, sum=None, test_abs=False, test_param_selections=None): """Compare two entities. The result each entity specification is formatted into a MapSet and stored to disk, so that e.g. re-running a DistributionMaker is unnecessary to reproduce the results. Parameters ---------- outdir : string Store output plots to this directory ref : string or array of strings Pipeline settings config file that generates reference output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported ref_abs : bool Use the absolute value of the reference plot for comparisons ref_label : string Label for reference ref_param-selections : string Param selections to apply to ref pipeline config(s). Not applicable if ref specifies stored map or map sets test : string or array of strings Pipeline settings config file that generates test output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported test_abs : bool Use the absolute value of the test plot for comparisons test_label : string Label for test test_param_selections : None or string Param selections to apply to test pipeline config(s). Not applicable if test specifies stored map or map sets combine : None or string or array of strings Combine by wildcard string, where string globbing (a la command line) uses asterisk for any number of wildcard characters. Use single quotes such that asterisks do not get expanded by the shell. Multiple combine strings supported sum : None or int Sum over (and hence remove) the specified axis or axes. I.e., project the map onto remaining (unspecified) axis or axes json : bool Save output maps in compressed json (json.bz2) format pdf : bool Save plots in PDF format. If neither this nor png is specified, no plots are produced png : bool Save plots in PNG format. If neither this nor pdf is specfied, no plots are produced diff_min : None or float Difference plot vmin; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) diff_max : None or float Difference plot max; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) fract_diff_min : None or float Fractional difference plot vmin; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) fract_diff_max : None or float Fractional difference plot max; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) asymm_min : None or float Asymmetry plot vmin; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) asymm_max : None or float Fractional difference plot max; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) Returns ------- summary_stats : dict Dictionary containing a summary for each h Map processed diff : MapSet MapSet of the difference - (Test - Ref) fract_diff : MapSet MapSet of the fractional difference - (Test - Ref) / Ref asymm : MapSet MapSet of the asymmetric fraction difference or pull - (Test - Ref) / sqrt(Ref) """ ref_plot_label = ref_label if ref_abs and not ref_label.startswith('abs'): ref_plot_label = 'abs(%s)' % ref_plot_label test_plot_label = test_label if test_abs and not test_label.startswith('abs'): test_plot_label = 'abs(%s)' % test_plot_label plot_formats = [] if pdf: plot_formats.append('pdf') if png: plot_formats.append('png') diff_symm = True if diff_min is not None and diff_max is None: diff_max = -diff_min diff_symm = False if diff_max is not None and diff_min is None: diff_min = -diff_max diff_symm = False fract_diff_symm = True if fract_diff_min is not None and fract_diff_max is None: fract_diff_max = -fract_diff_min fract_diff_symm = False if fract_diff_max is not None and fract_diff_min is None: fract_diff_min = -fract_diff_max fract_diff_symm = False asymm_symm = True if asymm_max is not None and asymm_min is None: asymm_min = -asymm_max asymm_symm = False if asymm_min is not None and asymm_max is None: asymm_max = -asymm_min asymm_symm = False outdir = os.path.expanduser(os.path.expandvars(outdir)) mkdir(outdir) # Get the reference distribution(s) into the form of a test MapSet p_ref = None ref_source = None if isinstance(ref, Map): p_ref = MapSet(ref) ref_source = MAP_SOURCE_STR elif isinstance(ref, MapSet): p_ref = ref ref_source = MAPSET_SOURCE_STR elif isinstance(ref, Pipeline): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = PIPELINE_SOURCE_STR elif isinstance(ref, DistributionMaker): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(ref) == 1: try: ref_pipeline = Pipeline(config=ref[0]) except: pass else: ref_source = PIPELINE_SOURCE_STR if ref_param_selections is not None: ref_pipeline.select_params(ref_param_selections) p_ref = ref_pipeline.get_outputs() else: try: ref_dmaker = DistributionMaker(pipelines=ref) except: pass else: ref_source = DISTRIBUTIONMAKER_SOURCE_STR if ref_param_selections is not None: ref_dmaker.select_params(ref_param_selections) p_ref = ref_dmaker.get_outputs() if p_ref is None: try: p_ref = [Map.from_json(f) for f in ref] except: pass else: ref_source = MAP_SOURCE_STR p_ref = MapSet(p_ref) if p_ref is None: assert ref_param_selections is None assert len(ref) == 1, 'Can only handle one MapSet' try: p_ref = MapSet.from_json(ref[0]) except: pass else: ref_source = MAPSET_SOURCE_STR if p_ref is None: raise ValueError( 'Could not instantiate the reference Pipeline, DistributionMaker,' ' Map, or MapSet from ref value(s) %s' % ref) ref = p_ref logging.info('Reference map(s) derived from a ' + ref_source) # Get the test distribution(s) into the form of a test MapSet p_test = None test_source = None if isinstance(test, Map): p_test = MapSet(test) test_source = MAP_SOURCE_STR elif isinstance(test, MapSet): p_test = test test_source = MAPSET_SOURCE_STR elif isinstance(test, Pipeline): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = PIPELINE_SOURCE_STR elif isinstance(test, DistributionMaker): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(test) == 1: try: test_pipeline = Pipeline(config=test[0]) except: pass else: test_source = PIPELINE_SOURCE_STR if test_param_selections is not None: test_pipeline.select_params(test_param_selections) p_test = test_pipeline.get_outputs() else: try: test_dmaker = DistributionMaker(pipelines=test) except: pass else: test_source = DISTRIBUTIONMAKER_SOURCE_STR if test_param_selections is not None: test_dmaker.select_params(test_param_selections) p_test = test_dmaker.get_outputs() if p_test is None: try: p_test = [Map.from_json(f) for f in test] except: pass else: test_source = MAP_SOURCE_STR p_test = MapSet(p_test) if p_test is None: assert test_param_selections is None assert len(test) == 1, 'Can only handle one MapSet' try: p_test = MapSet.from_json(test[0]) except: pass else: test_source = MAPSET_SOURCE_STR if p_test is None: raise ValueError( 'Could not instantiate the test Pipeline, DistributionMaker, Map,' ' or MapSet from test value(s) %s' % test) test = p_test logging.info('Test map(s) derived from a ' + test_source) if combine is not None: ref = ref.combine_wildcard(combine) test = test.combine_wildcard(combine) if isinstance(ref, Map): ref = MapSet([ref]) if isinstance(test, Map): test = MapSet([test]) if sum is not None: ref = ref.sum(sum) test = test.sum(sum) # Set the MapSet names according to args passed by user ref.name = ref_label test.name = test_label # Save to disk the maps being plotted (excluding optional aboslute value # operations) if json: refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label) to_file(ref, refmaps_path) testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label) to_file(test, testmaps_path) if set(test.names) != set(ref.names): raise ValueError('Test map names %s do not match ref map names %s.' % (sorted(test.names), sorted(ref.names))) # Aliases to save keystrokes def masked(x): return np.ma.masked_invalid(x.nominal_values) def zero_to_nan(map): newmap = deepcopy(map) mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON) newmap.hist[mask] = np.nan return newmap reordered_test = [] new_ref = [] diff_maps = [] fract_diff_maps = [] asymm_maps = [] summary_stats = {} for ref_map in ref: test_map = test[ref_map.name].reorder_dimensions(ref_map.binning) if ref_abs: ref_map = abs(ref_map) if test_abs: test_map = abs(test_map) diff_map = test_map - ref_map fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map) asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5) abs_fract_diff_map = np.abs(fract_diff_map) new_ref.append(ref_map) reordered_test.append(test_map) diff_maps.append(diff_map) fract_diff_maps.append(fract_diff_map) asymm_maps.append(asymm_map) min_ref = np.min(masked(ref_map)) max_ref = np.max(masked(ref_map)) min_test = np.min(masked(test_map)) max_test = np.max(masked(test_map)) total_ref = np.sum(masked(ref_map)) total_test = np.sum(masked(test_map)) mean_ref = np.mean(masked(ref_map)) mean_test = np.mean(masked(test_map)) max_abs_fract_diff = np.max(masked(abs_fract_diff_map)) mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map)) median_abs_fract_diff = np.median(masked(abs_fract_diff_map)) mean_fract_diff = np.mean(masked(fract_diff_map)) min_fract_diff = np.min(masked(fract_diff_map)) max_fract_diff = np.max(masked(fract_diff_map)) std_fract_diff = np.std(masked(fract_diff_map)) mean_diff = np.mean(masked(diff_map)) min_diff = np.min(masked(diff_map)) max_diff = np.max(masked(diff_map)) std_diff = np.std(masked(diff_map)) median_diff = np.nanmedian(masked(diff_map)) mad_diff = np.nanmedian(masked(np.abs(diff_map))) median_fract_diff = np.nanmedian(masked(fract_diff_map)) mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map))) min_asymm = np.min(masked(fract_diff_map)) max_asymm = np.max(masked(fract_diff_map)) total_asymm = np.sqrt(np.sum(masked(asymm_map)**2)) summary_stats[test_map.name] = OrderedDict([ ('min_ref', min_ref), ('max_ref', max_ref), ('total_ref', total_ref), ('mean_ref', mean_ref), ('min_test', min_test), ('max_test', max_test), ('total_test', total_test), ('mean_test', mean_test), ('max_abs_fract_diff', max_abs_fract_diff), ('mean_abs_fract_diff', mean_abs_fract_diff), ('median_abs_fract_diff', median_abs_fract_diff), ('min_fract_diff', min_fract_diff), ('max_fract_diff', max_fract_diff), ('mean_fract_diff', mean_fract_diff), ('std_fract_diff', std_fract_diff), ('median_fract_diff', median_fract_diff), ('mad_fract_diff', mad_fract_diff), ('min_diff', min_diff), ('max_diff', max_diff), ('mean_diff', mean_diff), ('std_diff', std_diff), ('median_diff', median_diff), ('mad_diff', mad_diff), ('min_asymm', min_asymm), ('max_asymm', max_asymm), ('total_asymm', total_asymm), ]) logging.info('Map %s...', ref_map.name) logging.info(' Ref map(s):') logging.info(' min :' + ('%.2f' % min_ref).rjust(12)) logging.info(' max :' + ('%.2f' % max_ref).rjust(12)) logging.info(' total :' + ('%.2f' % total_ref).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_ref).rjust(12)) logging.info(' Test map(s):') logging.info(' min :' + ('%.2f' % min_test).rjust(12)) logging.info(' max :' + ('%.2f' % max_test).rjust(12)) logging.info(' total :' + ('%.2f' % total_test).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_test).rjust(12)) logging.info(' Absolute fract. diff., abs((Test - Ref) / Ref):') logging.info(' max : %.4e', max_abs_fract_diff) logging.info(' mean : %.4e', mean_abs_fract_diff) logging.info(' median: %.4e', median_abs_fract_diff) logging.info(' Fractional difference, (Test - Ref) / Ref:') logging.info(' min : %.4e', min_fract_diff) logging.info(' max : %.4e', max_fract_diff) logging.info(' mean : %.4e +/- %.4e', mean_fract_diff, std_fract_diff) logging.info(' median: %.4e +/- %.4e', median_fract_diff, mad_fract_diff) logging.info(' Difference, Test - Ref:') logging.info(' min : %.4e', min_diff) logging.info(' max : %.4e', max_diff) logging.info(' mean : %.4e +/- %.4e', mean_diff, std_diff) logging.info(' median: %.4e +/- %.4e', median_diff, mad_diff) logging.info(' Asymmetry, (Test - Ref) / sqrt(Ref)') logging.info(' min : %.4e', min_asymm) logging.info(' max : %.4e', max_asymm) logging.info(' total : %.4e (sum in quadrature)', total_asymm) logging.info('') ref = MapSet(new_ref) test = MapSet(reordered_test) diff = MapSet(diff_maps) fract_diff = MapSet(fract_diff_maps) asymm = MapSet(asymm_maps) if json: diff.to_json( os.path.join( outdir, 'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) fract_diff.to_json( os.path.join( outdir, 'fract_diff__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) asymm.to_json( os.path.join( outdir, 'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) to_file( summary_stats, os.path.join( outdir, 'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) for plot_format in plot_formats: # Plot the raw distributions plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=False, ratio=False) plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label) plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label) # Plot the difference (test - ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=diff_symm, ratio=False) plotter.label = '%s - %s' % (test_plot_label, ref_plot_label) plotter.plot_2d_array( test - ref, fname='diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=diff_min, vmax=diff_max ) # Plot the fractional difference (test - ref)/ref plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=fract_diff_symm, ratio=True) plotter.label = ('(%s-%s)/%s' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r) for r in ref]), fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=fract_diff_min, vmax=fract_diff_max ) # Plot the asymmetry (test - ref)/sqrt(ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=asymm_symm, ratio=True) plotter.label = (r'$(%s - %s)/\sqrt{%s}$' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]), fname='asymm__%s__%s' % (test_plot_label, ref_plot_label), #vmin=asymm_min, vmax=asymm_max ) return summary_stats, diff, fract_diff, asymm
def makeEventsFile(data_files, detector, proc_ver, cut, outdir, run_settings=None, data_proc_params=None, join=None, cust_cuts=None, extract_fields=EXTRACT_FIELDS, output_fields=OUTPUT_FIELDS): r"""Take the simulated and reconstructed HDF5 file(s) (as converted from I3 by icecube.hdfwriter.I3HDFTableService) as input and write out a simplified PISA-standard-format HDF5 file for use in aeff, reco, and/or PID stages. Parameters ---------- data_files : dict File paths for finding data files for each run, formatted as: { <string run>: <list of file paths>, <string run>: <list of file paths>, ... <string run>: <list of file paths>, } detector : string Name of the detector (e.g. IceCube, DeepCore, PINGU, etc.) as found in e.g. mc_sim_run_settings.json and data_proc_params.json files. proc_ver Version of processing applied to the events, as found in e.g. data_proc_params.json. cut Name of a standard cut to use; must be specified in the relevant detector/processing version node of the data processing parameters (file from which the data_proc_params object was instantiated) outdir Directory path in which to store resulting files; will be generated if it does not already exist (including any parent directories that do not exist) run_settings : string or MCSimRunSettings Resource location of mc_sim_run_settings.json or an MCSimRunSettings object instantiated therefrom. data_proc_params : string or DataProcParams Resource location of data_proc_params.json or a DataProcParams object instantiated therefrom. join String specifying any flavor/interaction types (flavInts) to join together. Separate flavInts with commas (',') and separate groups with semicolons (';'). E.g. an acceptable string is: 'numucc+numubarcc; nuall bar NC, nuall NC' cust_cuts dict with a single DataProcParams cut specification or list of same (see help for DataProcParams for detailed description of cut spec) extract_fields : None or iterable of strings Field names to extract from source HDF5 file. If None, extract all fields. output_fields : None or iterable of strings Fields to include in the generated PISA-standard-format events HDF5 file; note that if 'weighted_aeff' is not preent, effective area will not be computed. If None, all fields will be written. Notes ----- Compute "weighted_aeff" field: Within each int type (CC or NC), ngen should be added together; events recorded of that int type then get their one_weight divided by the total *for that int type only* to obtain the "weighted_aeff" for that event (even if int types are being grouped/joined together). This has the effect that within a group, ... ... and within an interaction type, effective area is a weighted average of that of the flavors being combined. E.g. for CC, \sum_{run x}\sum_{flav y} (Aeff_{x,y} * ngen_{x,y}) Aeff_CC = ----------------------------------------------------- , \sum_{run x}\sum_{flav y} (ngen_{x,y}) ... and then across interaction types, the results of the above for each int type need to be summed together, i.e.: Aeff_total = Aeff_CC + Aeff_NC Note that each grouping of flavors is calculated with the above math completely independently from other flavor groupings specified. See Justin Lanfranchi's presentation on the PINGU Analysis call, 2015-10-21, for more details: https://wikispaces.psu.edu/download/attachments/282040606/meff_report_jllanfranchi_v05_2015-10-21.pdf """ if isinstance(run_settings, str): run_settings = DetMCSimRunsSettings(find_resource(run_settings), detector=detector) assert isinstance(run_settings, DetMCSimRunsSettings) assert run_settings.detector == detector if isinstance(data_proc_params, str): data_proc_params = DataProcParams( detector=detector, proc_ver=proc_ver, data_proc_params=find_resource(data_proc_params)) assert data_proc_params.detector == detector assert data_proc_params.proc_ver == proc_ver runs = sorted(data_files.keys()) all_flavs = [] flavs_by_run = {} run_norm_factors = {} bin_edges = set() runs_by_flavint = FlavIntData() for flavint in runs_by_flavint.flavints: runs_by_flavint[flavint] = [] #ngen_flavint_by_run = {run:FlavIntData() for run in runs} ##ngen_per_flav_by_run = {run:FlavIntData() for run in runs} #eint_per_flav_by_run = {run:FlavIntData() for run in runs} #for run in runs: # flavints_in_run = run_settings.get_flavints(run=run) # e_range = run_settings.get_energy_range(run) # gamma = run_settings.get_spectral_index(run) # for flavint in flavints_in_run: # runs_by_flavint[flavint].append(run) # ngen_flav = run_settings.get_num_gen( # run=run, flav_or_flavint=flavint, include_physical_fract=True # ) # #runs_by_flavint[flavint].append(run) # #this_flav = flavint. # #xsec_fract_en_wtd_avg[run][flavint] = \ # ngen_flavint_by_run[run][flavint] = \ # xsec.get_xs_ratio_integral( # flavintgrp0=flavint, # flavintgrp1=flavint.flav, # e_range=e_range, # gamma=gamma, # average=True # ) # xsec_ver = run_settings.get_xsec_version(run=run) # if xsec_ver_ref is None: # xsec_ver_ref = xsec_ver # # An assumption of below logic is that all MC is generated using the # # same cross sections version. # # # # TODO / NOTE: # # It would be possible to combine runs with different cross sections so # # long as each (flavor, interaction type) cross sections are # # weighted-averaged together using weights # # N_gen_{n,flav+inttype} * E_x^{-gamma_n} / # # ( \int_{E_min_n}^{E_max_n} E^{-\gamma_n} dE ) # # where E_x are the energy sample points specified in the cross # # sections (and hence these must also be identical across all cross # # sections that get combined, unless interpolation is performed). # assert xsec_ver == xsec_ver_ref # #ngen_weighted_energy_integral[str(run)] = powerLawIntegral( # #flavs_by_run[run] = run_settings.flavs(run) ##flavs_present = detector_geom = run_settings[runs[0]]['geom'] # Create Events object to store data evts = Events() evts.metadata.update({ 'detector': run_settings.detector, 'proc_ver': data_proc_params.proc_ver, 'geom': detector_geom, 'runs': runs, }) cuts = [] if isinstance(cust_cuts, dict): cust_cuts = [cust_cuts] if cut is not None: evts.metadata['cuts'].append(cut) cuts.append(cut) if cust_cuts is not None: for ccut in cust_cuts: evts.metadata['cuts'].append('custom: ' + ccut['pass_if']) cuts.append(ccut) orig_outdir = outdir outdir = expand(outdir) logging.info('Output dir spec\'d: %s', orig_outdir) if outdir != orig_outdir: logging.info('Output dir expands to: %s', outdir) mkdir(outdir) detector_label = str(data_proc_params.detector) proc_label = 'proc_' + str(data_proc_params.proc_ver) # What flavints to group together if join is None or join == '': grouped = [] ungrouped = [NuFlavIntGroup(k) for k in ALL_NUFLAVINTS] groups_label = 'unjoined' logging.info('Events in the following groups will be joined together:' ' (none)') else: grouped, ungrouped = xlateGroupsStr(join) evts.metadata['flavints_joined'] = [str(g) for g in grouped] groups_label = 'joined_G_' + '_G_'.join([str(g) for g in grouped]) logging.info( 'Events in the following groups will be joined together: ' + '; '.join([str(g) for g in grouped])) # Find any flavints not included in the above groupings flavint_groupings = grouped + ungrouped if len(ungrouped) == 0: ungrouped = ['(none)'] logging.info('Events of the following flavints will NOT be joined' 'together: ' + '; '.join([str(k) for k in ungrouped])) # Enforce that flavints composing groups are mutually exclusive for grp_n, flavintgrp0 in enumerate(flavint_groupings[:-1]): for flavintgrp1 in flavint_groupings[grp_n + 1:]: assert len(set(flavintgrp0).intersection(set(flavintgrp1))) == 0 flavintgrp_names = [str(flavintgrp) for flavintgrp in flavint_groupings] # Instantiate storage for all intermediate destination fields; # The data structure looks like: # extracted_data[group #][interaction type][field name] = list of data if extract_fields is None: extracted_data = [{inttype: {} for inttype in ALL_NUINT_TYPES} for _ in flavintgrp_names] else: extracted_data = [{ inttype: {field: [] for field in extract_fields} for inttype in ALL_NUINT_TYPES } for _ in flavintgrp_names] # Instantiate generated-event counts for destination fields; count # CClseparately from NC because aeff's for CC & NC add, whereas # aeffs intra-CC should be weighted-averaged (as for intra-NC) ngen = [{inttype: {} for inttype in ALL_NUINT_TYPES} for _ in flavintgrp_names] # Loop through all of the files, retrieving the events, filtering, # and recording the number of generated events pertinent to # calculating aeff filecount = {} detector_geom = None bad_files = [] for run, fnames in data_files.items(): file_count = 0 for fname in fnames: # Retrieve data from all nodes specified in the processing # settings file logging.trace('Trying to get data from file %s', fname) try: data = data_proc_params.get_data(fname, run_settings=run_settings) except (ValueError, KeyError, IOError): logging.warning('Bad file encountered: %s', fname) bad_files.append(fname) continue file_count += 1 # Check to make sure only one run is present in the data runs_in_data = set(data['run']) assert len(runs_in_data) == 1, 'Must be just one run in data' #run = int(data['run'][0]) if not run in filecount: filecount[run] = 0 filecount[run] += 1 rs_run = run_settings[run] # Record geom; check that geom is consistent with other runs if detector_geom is None: detector_geom = rs_run['geom'] assert rs_run['geom'] == detector_geom, \ 'All runs\' geometries must match!' # Loop through all flavints spec'd for run for run_flavint in rs_run['flavints']: barnobar = run_flavint.bar_code int_type = run_flavint.intType # Retrieve this-interaction-type- & this-barnobar-only events # that also pass cuts. (note that cut names are strings) intonly_cut_data = data_proc_params.apply_cuts( data, cuts=cuts + [str(int_type), str(barnobar)], return_fields=extract_fields) # Record the generated count and data for this run/flavor for # each group to which it's applicable for grp_n, flavint_group in enumerate(flavint_groupings): if not run_flavint in flavint_group: continue # Instantiate a field for particles and antiparticles, # keyed by the output of the bar_code property for each if not run in ngen[grp_n][int_type]: ngen[grp_n][int_type][run] = { NuFlav(12).bar_code: 0, NuFlav(-12).bar_code: 0, } # Record count only if it hasn't already been recorded if ngen[grp_n][int_type][run][barnobar] == 0: # Note that one_weight includes cc/nc:total fraction, # so DO NOT specify the full flavint here, only flav # (since one_weight does NOT take bar/nobar fraction, # it must be included here in the ngen computation) flav_ngen = run_settings.get_num_gen(run=run, barnobar=barnobar) ngen[grp_n][int_type][run][barnobar] = flav_ngen # Append the data. Note that extracted_data is: # extracted_data[group n][int_type][extract field name] = # list if extract_fields is None: for f in intonly_cut_data.keys(): if f not in extracted_data[grp_n][int_type]: extracted_data[grp_n][int_type][f] = [] extracted_data[grp_n][int_type][f].extend( intonly_cut_data[f]) else: for f in extract_fields: extracted_data[grp_n][int_type][f].extend( intonly_cut_data[f]) logging.info('File count for run %s: %d', run, file_count) to_file(bad_files, '/tmp/bad_files.json') if ((output_fields is None and (extract_fields is None or 'one_weight' in extract_fields)) or 'weighted_aeff' in output_fields): fmtfields = (' ' * 12 + 'flavint_group', 'int type', ' run', 'part/anti', 'part/anti count', 'aggregate count') fmt_n = [len(f) for f in fmtfields] fmt = ' '.join([r'%' + str(n) + r's' for n in fmt_n]) lines = ' '.join(['-' * n for n in fmt_n]) logging.info(fmt, fmtfields) logging.info(lines) for grp_n, flavint_group in enumerate(flavint_groupings): for int_type in set([fi.intType for fi in flavint_group.flavints]): ngen_it_tot = 0 for run, run_counts in ngen[grp_n][int_type].items(): for barnobar, barnobar_counts in run_counts.items(): ngen_it_tot += barnobar_counts logging.info(fmt, flavint_group.simple_str(), int_type, str(run), barnobar, int(barnobar_counts), int(ngen_it_tot)) # Convert data to numpy array if extract_fields is None: for field in extracted_data[grp_n][int_type].keys(): extracted_data[grp_n][int_type][field] = \ np.array(extracted_data[grp_n][int_type][field]) else: for field in extract_fields: extracted_data[grp_n][int_type][field] = \ np.array(extracted_data[grp_n][int_type][field]) # Generate weighted_aeff field for this group / int type's data extracted_data[grp_n][int_type]['weighted_aeff'] = \ extracted_data[grp_n][int_type]['one_weight'] \ / ngen_it_tot * CMSQ_TO_MSQ # Report file count per run for run, count in filecount.items(): logging.info('Files read, run %s: %d', run, count) ref_num_i3_files = run_settings[run]['num_i3_files'] if count != ref_num_i3_files: logging.warning( 'Run %s, Number of files read (%d) != number of ' 'source I3 files (%d), which may indicate an error.', run, count, ref_num_i3_files) # Generate output data for flavint in ALL_NUFLAVINTS: int_type = flavint.intType for grp_n, flavint_group in enumerate(flavint_groupings): if not flavint in flavint_group: logging.trace('flavint %s not in flavint_group %s, passing.', flavint, flavint_group) continue else: logging.trace( 'flavint %s **IS** in flavint_group %s, storing.', flavint, flavint_group) if output_fields is None: evts[flavint] = extracted_data[grp_n][int_type] else: evts[flavint] = { f: extracted_data[grp_n][int_type][f] for f in output_fields } # Generate file name numerical_runs = [] alphanumerical_runs = [] for run in runs: try: int(run) numerical_runs.append(int(run)) except ValueError: alphanumerical_runs.append(str(run)) run_labels = [] if len(numerical_runs) > 0: run_labels.append(list2hrlist(numerical_runs)) if len(alphanumerical_runs) > 0: run_labels += sorted(alphanumerical_runs) run_label = 'runs_' + ','.join(run_labels) geom_label = '' + detector_geom fname = 'events__' + '__'.join([ detector_label, geom_label, run_label, proc_label, groups_label, ]) + '.hdf5' outfpath = os.path.join(outdir, fname) logging.info('Writing events to %s', outfpath) # Save data to output file evts.save(outfpath)
def test_Prior(): """Unit tests for Prior class""" uniform = Prior(kind='uniform', llh_offset=1.5) jeffreys = Prior(kind='jeffreys', A=2 * ureg.s, B=3 * ureg.ns) gaussian = Prior(kind='gaussian', mean=10, stddev=1) x = np.linspace(-10, 10, 100) y = x**2 linterp = Prior(kind='linterp', param_vals=x * ureg.meter / ureg.s, llh_vals=y) param_vals = np.linspace(-10, 10, 100) llh_vals = x**2 knots, coeffs, deg = splrep(param_vals, llh_vals) spline = Prior(kind='spline', knots=knots * ureg.foot, coeffs=coeffs, deg=deg) param_upsamp = np.linspace(-10, 10, 1000) * ureg.foot llh_upsamp = splev(param_upsamp.magnitude, tck=(knots, coeffs, deg), ext=2) assert all(spline.llh(param_upsamp) == llh_upsamp) # Asking for param value outside of range should fail try: linterp.llh(-1000 * ureg.mile / ureg.s) except ValueError: pass else: assert False # Asking for value at quantity with invalid units try: linterp.chi2(-1000 * ureg.km) except pint.DimensionalityError: pass else: assert False try: spline.llh(-1000 * ureg.meter) except ValueError: pass else: assert False try: spline.chi2(+1000 * ureg.meter) except ValueError: pass else: assert False # Asking for param value when units were used should fail try: spline.llh(10) except TypeError: pass else: assert False # ... or vice versa try: gaussian.llh(10 * ureg.meter) except pint.DimensionalityError: pass else: assert False # -- Test writing to and reading from JSON files -- # with tempfile.TemporaryDirectory() as temp_dir: for pri in [uniform, jeffreys, gaussian, linterp, spline]: fpath = join(temp_dir, pri.kind + '.json') try: to_file(pri, fpath) loaded = from_file(fpath, cls=Prior) assert loaded == pri except: logging.error('prior %s failed', pri.kind) if isfile(fpath): logging.error( 'contents of %s:\n%s', fpath, open(fpath, 'r').read(), ) raise logging.info('<< PASS : test_Prior >>')
def profile_scan(data_settings, template_settings, param_names, steps, only_points, no_outer, data_param_selections, hypo_param_selections, profile, outfile, minimizer_settings, metric, debug_mode): """Perform a profile scan. Parameters ---------- data_settings template_settings param_names steps only_points no_outer data_param_selections hypo_param_selections profile outfile minimizer_settings metric debug_mode Returns ------- results analysis """ outfile = expanduser(expandvars(outfile)) if isfile(outfile): raise IOError('`outfile` "{}" already exists!'.format(outfile)) minimizer_settings = from_file(minimizer_settings) hypo_maker = DistributionMaker(template_settings) if data_settings is None: if (data_param_selections is None or data_param_selections == hypo_param_selections): data_maker = hypo_maker else: data_maker = deepcopy(hypo_maker) data_maker.select_params(data_param_selections) else: data_maker = DistributionMaker(data_settings) data_maker.select_params(data_param_selections) data_dist = data_maker.get_outputs(return_sum=True) analysis = Analysis() results = analysis.scan(data_dist=data_dist, hypo_maker=hypo_maker, hypo_param_selections=hypo_param_selections, metric=metric, param_names=param_names, steps=steps, only_points=only_points, outer=not no_outer, profile=profile, minimizer_settings=minimizer_settings, outfile=outfile, debug_mode=debug_mode) to_file(results, outfile) logging.info("Done.") return results, analysis
def add_fluxes_to_file(data_file_path, flux_table, flux_name, outdir=None, label=None, overwrite=False): """Add fluxes to PISA events file (e.g. for use by an mc stage) Parameters ----------- data_file_path : string flux_table flux_name outdir : string or None If None, output is to the same directory as `data_file_path` overwrite : bool, optional """ data, attrs = from_file(find_resource(data_file_path), return_attrs=True) bname, ext = splitext(basename(data_file_path)) assert ext.lstrip('.') in HDF5_EXTS if outdir is None: outdir = dirname(data_file_path) if label is None: label = '' else: assert isinstance(label, basestring) label = '_' + label outpath = join(outdir, '{}__with_fluxes{}{}'.format(bname, label, ext)) if not overwrite and isfile(outpath): logging.warning('Output path "%s" already exists, not regenerating', outpath) return mkdir(outdir, warn=False) # Loop over the top-level keys for primary, primary_node in data.items(): # Only handling neutrnio fluxes here, skip past e.g. muon or noise MC events if primary.startswith("nu"): logging.info('Adding fluxes to "%s" events', primary) # Input data may have one layer of hierarchy before the event variables (e.g. [numu_cc]), # or for older files there maybe be a second layer (e.g. [numu][cc]). # Handling either case here... if "true_energy" in primary_node: secondary_nodes = [primary_node] else: secondary_nodes = primary_node.values() for secondary_node in secondary_nodes: true_e = secondary_node['true_energy'] true_cz = secondary_node['true_coszen'] # calculate all 4 fluxes (nue, nuebar, numu and numubar) for table in ['nue', 'nuebar', 'numu', 'numubar']: flux = calculate_2d_flux_weights( true_energies=true_e, true_coszens=true_cz, en_splines=flux_table[table]) keyname = flux_name + '_' + table + '_flux' secondary_node[keyname] = flux to_file(data, outpath, attrs=attrs, overwrite=overwrite) logging.info('--> Wrote file including fluxes to "%s"', outpath)
analysis.profile(args.var, eval(args.range), check_octant=not args.no_check_octant, pprint=not args.quiet)) elif args.mode == 'feldman_cousins': assert (data_fixed_param != None) p_name, value = data_fixed_param.items( )[0][0], data_fixed_param.items()[0][1] print("save the fixed_param_data to output: ", p_name, " ", value) return_result = analysis.profile( p_name, [value], check_octant=not args.no_check_octant, pprint=not args.quiet) return_result.append({'data_%s' % p_name: value}) results.append(return_result) elif args.function == 'fit': best_fit_result = analysis.find_best_fit( check_octant=not args.no_check_octant, pprint=not args.quiet) if (data_fixed_param != None): p_name, value = data_fixed_param.items( )[0][0], data_fixed_param.items()[0][1] best_fit_result['data_' + p_name] = value print("save the fixed_param_data to output: ", p_name, " ", value) results.append(best_fit_result) to_file(results, args.outfile) logging.info('Done.')
def scan_allsyst(template_settings, steps, hypo_param_selections, outdir, minimizer_settings, metric, debug_mode): """Scan (separately) all systematics (i.e., non-fixed params). Parameters ---------- template_settings steps hypo_param_selections outdir minimizer_settings metric debug_mode Returns ------- restults : dict Keys are param names, values are the scan results """ outdir = expanduser(expandvars(outdir)) mkdir(outdir, warn=False) hypo_maker = DistributionMaker(template_settings) hypo_maker.select_params(hypo_param_selections) data_dist = hypo_maker.get_outputs(return_sum=True) minimizer_settings = from_file(minimizer_settings) analysis = Analysis() results = OrderedDict() # pylint: disable=redefined-outer-name for param in hypo_maker.params: if param.is_fixed: continue logging.info('Scanning %s', param.name) nominal_value = param.value outfile = join( outdir, '{:s}_{:d}_steps_{:s}_scan.json'.format(param.name, steps, metric)) if isfile(outfile): raise IOError( '`outfile` "{}" exists, not overwriting.'.format(outfile)) results[param.name] = analysis.scan( data_dist=data_dist, hypo_maker=hypo_maker, hypo_param_selections=hypo_param_selections, metric=metric, param_names=param.name, steps=steps, only_points=None, outer=True, profile=False, minimizer_settings=minimizer_settings, outfile=outfile, debug_mode=debug_mode) to_file(results[param.name], outfile) param.value = nominal_value logging.info('Done scanning param "%s"', param.name) logging.info('Done.') return results
def main(): parser = ArgumentParser(description=__doc__, formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('-io', '--io_chi2_file', type=str, required=True, help="Inverted Ordering Chi2 file from NuFit") parser.add_argument('-no', '--no_chi2_file', type=str, required=True, help="Inverted Ordering Chi2 file from NuFit") parser.add_argument('--shifted', action='store_true', default=False, help='''Flag if wanting prior which attempts to remove the ordering prior by subtracting the delta chi2.''') parser.add_argument('--minimised', action='store_true', default=False, help='''Flag if wanting prior which attempts to remove the ordering prior by minimising over both surfaces.''' ) parser.add_argument('--outdir', metavar='DIR', type=str, required=True, help='''Store all output files to this directory. It is recommended you save them in the priors directory in the PISA resources.''') args = parser.parse_args() io_filename, io_fileext = os.path.splitext(args.io_chi2_file) no_filename, no_fileext = os.path.splitext(args.no_chi2_file) if io_fileext != '.gz': raise ValueError('%s file extension not expected. Please use the file ' 'as downloaded from the Nu-Fit website.' % io_fileext) if no_fileext != '.gz': raise ValueError( '%s file extension not expected. Please use the file as ' 'downloaded directly from the Nu-Fit website.' % no_fileext) # Get Nu-Fit version from filenames NuFitVersion = io_filename.split('/')[-1].split('.')[0] if NuFitVersion[0].lower() != 'v': raise ValueError('%s%s input file does not allow for discerning the ' 'Nu-Fit version directly from the filename. Please ' 'use the file as downloaded directly from the Nu-Fit ' 'website.' % (io_filename, io_fileext)) NO_NuFitVersion = no_filename.split('/')[-1].split('.')[0] if NuFitVersion != NO_NuFitVersion: raise ValueError( 'The NuFit version extracted from the NO and IO files ' 'do not match. i.e. %s is not the same as %s. Please ' 'use the same NuFit version for each of the NO and IO ' 'chi2 surfaces.' % (NuFitVersion, NO_NuFitVersion)) # Add special treatment for NuFit 2.1 since it has two releases if NuFitVersion == 'v21': NuFitVersion += io_filename.split('/')[-1].split('-')[1] io_infile = gzip.open(args.io_chi2_file) no_infile = gzip.open(args.no_chi2_file) io_s2th23, io_dchi2 = extract_vals( infile=io_infile, string_of_interest='# T23 projection: sin^2(theta23) Delta_chi^2') no_s2th23, no_dchi2 = extract_vals( infile=no_infile, string_of_interest='# T23 projection: sin^2(theta23) Delta_chi^2') io_th23 = np.arcsin(np.sqrt(np.array(io_s2th23))) no_th23 = np.arcsin(np.sqrt(np.array(no_s2th23))) io_dchi2 = np.array(io_dchi2) no_dchi2 = np.array(no_dchi2) f_io = scipy.interpolate.splrep(io_th23, -io_dchi2 / 2.0, s=0) f_no = scipy.interpolate.splrep(no_th23, -no_dchi2 / 2.0, s=0) priors = make_prior_dict(f_io=f_io, f_no=f_no) to_file( priors, os.path.join(args.outdir, 'nufit%sstandardtheta23splines.json' % NuFitVersion)) if args.shifted: # Make priors where the delta chi2 between the orderings is removed. # The idea is to remove the prior on the ordering. io_shifteddchi2 = io_dchi2 - min(io_dchi2) no_shifteddchi2 = no_dchi2 - min(no_dchi2) f_shiftedio = scipy.interpolate.splrep(io_th23, -io_shifteddchi2 / 2.0, s=0) f_shiftedno = scipy.interpolate.splrep(no_th23, -no_shifteddchi2 / 2.0, s=0) shiftedpriors = make_prior_dict(f_io=f_shiftedio, f_no=f_shiftedno) to_file( shiftedpriors, os.path.join(args.outdir, 'nufit%sshiftedtheta23splines.json' % NuFitVersion)) if args.minimised: # Make one prior that is the minimum of both of the original chi2 # surfaces. The idea is to remove the prior on the ordering. minchi2 = np.minimum(io_dchi2, no_dchi2) # Now just one prior. X values should be the same for both. f_minimised = scipy.interpolate.splrep(io_th23, -minchi2 / 2.0, s=0) minimisedprior = make_prior_dict(f=f_minimised) to_file( minimisedprior, os.path.join(args.outdir, 'nufit%sminimisedtheta23spline.json' % NuFitVersion))
'--infile', type=str, required=True ) parser.add_argument( '--outfile', type=str, required=True ) args = parser.parse_args() import sys, os, re, traceback, time, warnings, itertools import copy #from pisa.utils import utils as putils from pisa.utils.fileio import from_file, to_file from pisa.utils import params as ppars from pisa.utils import utils as putils ts0 = from_file(args.infile) ts1 = copy.deepcopy(ts0) for paramname, param in sorted(ts0['params'].iteritems()): new_prior = ppars.Prior.from_param(param) if new_prior is None: continue print 'Converting prior for param `' + paramname + '`' new_param = copy.deepcopy(param) new_param.update(new_prior.build_dict()) ts1['params'][paramname] = new_param to_file(ts1, args.outfile)