def instantiate_disk_cache(self): """Instantiate a disk cache for use by the stage.""" if isinstance(self.disk_cache, DiskCache): self.disk_cache_path = self.disk_cache.path return if self.disk_cache is False or self.disk_cache is None: self.disk_cache = None self.disk_cache_path = None return if isinstance(self.disk_cache, basestring): dirpath, filename = os.path.split( os.path.expandvars(os.path.expanduser(self.disk_cache)) ) if os.path.isabs(dirpath): self.disk_cache_path = os.path.join(dirpath, filename) else: self.disk_cache_path = os.path.join(CACHE_DIR, dirpath, filename) elif self.disk_cache is True: dirs = [CACHE_DIR, self.stage_name] dirpath = os.path.expandvars(os.path.expanduser(os.path.join(*dirs))) if self.service_name is not None and self.service_name != "": filename = self.service_name + ".sqlite" else: filename = "generic.sqlite" mkdir(dirpath, warn=False) self.disk_cache_path = os.path.join(dirpath, filename) else: raise ValueError("Don't know what to do with a %s." % type(self.disk_cache)) self.disk_cache = DiskCache(self.disk_cache_path, max_depth=10, is_lru=False)
def main(return_outputs=False): """Main; call as script with `return_outputs=False` or interactively with `return_outputs=True`""" from pisa.utils.plotter import Plotter args = parse_args() set_verbosity(args.v) plot_formats = [] if args.pdf: plot_formats.append('pdf') if args.png: plot_formats.append('png') distribution_maker = DistributionMaker(pipelines=args.pipeline) # pylint: disable=redefined-outer-name if args.select is not None: distribution_maker.select_params(args.select) outputs = distribution_maker.get_outputs(return_sum=args.return_sum) # pylint: disable=redefined-outer-name if args.outdir: # TODO: unique filename: append hash (or hash per pipeline config) fname = 'distribution_maker_outputs.json.bz2' mkdir(args.outdir) fpath = expand(os.path.join(args.outdir, fname)) to_file(outputs, fpath) if args.outdir and plot_formats: my_plotter = Plotter(outdir=args.outdir, fmt=plot_formats, log=False, annotate=False) for num, output in enumerate(outputs): my_plotter.plot_2d_array(output, fname='dist_output_%d' % num) if return_outputs: return distribution_maker, outputs
def save_hyperplane_fits(input_data, fit_results, outdir, tag): """Store discrete systematics fits and chi-square values to a specified output location, with results identified by a tag. Parameters ---------- input_data : mapping input data container returned by `hyperplane` function fit_results : dict fit results data container returned by `hyperplane` function outdir : string output directory tag : string identifier for filenames holding fit results """ # Get some strings to use when naming dim = len(input_data["param_names"]) param_str = "_".join(input_data["param_names"]) # Store as JSON mkdir(outdir) res_path = join( outdir, "%s__%dd__%s__hyperplane_fits.json" % (tag, dim, param_str)) to_file(fit_results, res_path)
def postproc_profile_scan(return_outputs=False): """Process the output files of profile_scan""" init_args_d = parse_args( description=postproc_profile_scan.__doc__, command='profile_scan' ) if init_args_d['pseudo_experiments'] is not None: fluctuate_fid = True fluctuate_data = False else: fluctuate_fid = None fluctuate_data = None mkdir(init_args_d['outdir']) postprocessor = Postprocessor( analysis_type='profile_scan', detector=init_args_d['detector'], selection=init_args_d['selection'], outdir=init_args_d['outdir'], formats=init_args_d['formats'], scan_file=init_args_d['infile'], best_fit_file=init_args_d['best_fit_infile'], projection_files=init_args_d['projection_infile'], other_contours=init_args_d['other_contour'], pseudo_experiments=init_args_d['pseudo_experiments'], fluctuate_fid=fluctuate_fid, fluctuate_data=fluctuate_data ) # 1D profile scans if len(postprocessor.all_bin_cens) == 1: postprocessor.plot_1d_scans() # 2D profile scans elif len(postprocessor.all_bin_cens) == 2: postprocessor.plot_2d_scans() if (postprocessor.all_bin_names[0] == 'theta23' and postprocessor.all_bin_names[1] == 'deltam31'): postprocessor.add_deltam32_sin2theta23() postprocessor.plot_2d_scans( xlabel='sin2theta23', xunits='dimensionless', ylabel='deltam32' ) else: raise NotImplementedError( 'Postprocessing of profile scans in anything other than 1D or ' ' 2D not implemented in this script.' ) if return_outputs: return postprocessor
def plot_variation(baseline_maps, up_maps, down_maps, h1_name, fulltitle, savename, outdir, ftype='pdf'): matplotlib.rcParams['font.family'] = 'sans-serif' matplotlib.rcParams['mathtext.fontset'] = 'stixsans' gridspec_kw = dict(left=0.04, right=0.966, wspace=0.32) fig, axes = plt.subplots(nrows=1, ncols=3, gridspec_kw=gridspec_kw, sharex=False, sharey=False, figsize=(15, 5)) asymmetry_hist = (h1_map.hist - h0_map.hist) / np.sqrt(h0_map.hist) asymmetry_to_plot = Map(name='asymmetry', hist=asymmetry_hist, binning=h0_map.binning) asymmetrylabel = ( r'$\left(N_{%s}-N_{%s}\right)' r'/\sqrt{N_{%s}}$' % (text2tex(h1_name), text2tex(h0_name), text2tex(h0_name))) vmax = max(np.nanmax(h0_map.hist), np.nanmax(h1_map.hist)) h0_map.plot(fig=fig, ax=axes[0], title='Hypothesis 0: $%s$' % text2tex(h0_name), cmap=plt.cm.afmhot, vmax=vmax) h1_map.plot(fig=fig, ax=axes[1], title='Hypothesis 1: $%s$' % text2tex(h1_name), cmap=plt.cm.afmhot, vmax=vmax) asymmetry_to_plot.plot(fig=fig, ax=axes[2], title='Asymmetry', symm=True, cmap=plt.cm.seismic) plt.subplots_adjust(bottom=0.12, top=0.8) plt.suptitle(fulltitle, size='xx-large') if savename != '' and savename[-1] != '_': savename += '_' fname = '%s%s_%s_asymmetry.pdf' % (savename, h0_name, h1_name) fname = fname.replace(' ', '_') mkdir(outdir, warn=False) fig.savefig(os.path.join(outdir, fname)) plt.close(fig.number)
def main(return_outputs=False): """Main; call as script with `return_outputs=False` or interactively with `return_outputs=True`""" from pisa.utils.plotter import Plotter args = parse_args() set_verbosity(args.v) plot_formats = [] if args.pdf: plot_formats.append('pdf') if args.png: plot_formats.append('png') detectors = Detectors(args.pipeline,shared_params=args.shared_params) Names = detectors.det_names if args.select is not None: detectors.select_params(args.select) outputs = detectors.get_outputs(return_sum=args.return_sum) #outputs = outputs[0].fluctuate( # method='poisson', random_state=get_random_state([0, 0, 0])) if args.outdir: # TODO: unique filename: append hash (or hash per pipeline config) fname = 'detectors_outputs.json.bz2' mkdir(args.outdir) fpath = expand(os.path.join(args.outdir, fname)) to_file(outputs, fpath) if args.outdir and plot_formats: my_plotter = Plotter( outdir=args.outdir, fmt=plot_formats, log=False, annotate=False ) for num, output in enumerate(outputs): if args.return_sum: my_plotter.plot_2d_array( output, fname=Names[num] ) else: for out in output: my_plotter.plot_2d_array( out, fname=Names[num] ) if return_outputs: return detectors, outputs
def main(): """Perform a hypersurface fit to discrete systematics sets.""" # Get args args = parse_args() set_verbosity(args.v) # Read in data and fit hypersurfaces to it hypersurfaces = create_hypersurfaces(fit_cfg=args.fit_cfg) # Store as JSON mkdir(args.outdir) arbitrary_hypersurface = list(hypersurfaces.values())[0] output_path = join( args.outdir, get_hypersurface_file_name(arbitrary_hypersurface, args.tag) ) to_file(hypersurfaces, output_path)
def plot_xsec(self, map_set, ylim=None, logx=True): from pisa.utils import fileio zero_np_element = np.array([0]) for map in map_set: binning = map.binning if 'true_energy' in binning.names: energy_binning = binning.true_energy elif 'reco_energy' in binning.names: energy_binning = binning.reco_energy else: dim_idx = binning.index('energy', use_basenames=True) energy_binning = binning.dims[dim_idx] fig = plt.figure(figsize=self.size) fig.suptitle(map.name, y=0.95) ax = fig.add_subplot(111) ax.grid(b=True, which='major') ax.grid(b=True, which='minor', linestyle=':') plt.xlabel(tex_dollars(energy_binning.label), size=18) plt.ylabel(tex_dollars(text2tex(self.label)), size=18) if self.log: ax.set_yscale('log') if logx: ax.set_xscale('log') if ylim: ax.set_ylim(ylim) ax.set_xlim(np.min(energy_binning.bin_edges.m), np.max(energy_binning.bin_edges.m)) hist = map.hist array_element = np.hstack((hist, zero_np_element)) ax.step(energy_binning.bin_edges.m, array_element, where='post') fileio.mkdir(self.outdir) fig.savefig(self.outdir + '/' + map.name + '.png', bbox_inches='tight', dpi=150)
def scan_allsyst(template_settings, steps, hypo_param_selections, outdir, minimizer_settings, metric, debug_mode): """Scan (separately) all systematics (i.e., non-fixed params). Parameters ---------- template_settings steps hypo_param_selections outdir minimizer_settings metric debug_mode Returns ------- restults : dict Keys are param names, values are the scan results """ outdir = expanduser(expandvars(outdir)) mkdir(outdir, warn=False) hypo_maker = DistributionMaker(template_settings) hypo_maker.select_params(hypo_param_selections) data_dist = hypo_maker.get_outputs(return_sum=True) minimizer_settings = from_file(minimizer_settings) analysis = Analysis() results = OrderedDict() # pylint: disable=redefined-outer-name for param in hypo_maker.params: if param.is_fixed: continue logging.info('Scanning %s', param.name) nominal_value = param.value outfile = join( outdir, '{:s}_{:d}_steps_{:s}_scan.json'.format(param.name, steps, metric)) if isfile(outfile): raise IOError( '`outfile` "{}" exists, not overwriting.'.format(outfile)) results[param.name] = analysis.scan( data_dist=data_dist, hypo_maker=hypo_maker, hypo_param_selections=hypo_param_selections, metric=metric, param_names=param.name, steps=steps, only_points=None, outer=True, profile=False, minimizer_settings=minimizer_settings, outfile=outfile, debug_mode=debug_mode) to_file(results[param.name], outfile) param.value = nominal_value logging.info('Done scanning param "%s"', param.name) logging.info('Done.') return results
def make_toy_events(outdir, num_events, energy_range, spectral_index, coszen_range, num_sets, first_set, aeff_energy_param, aeff_coszen_param, reco_param, pid_param, pid_dist): """Make toy events and store to a file. Parameters ---------- outdir : string num_events : int energy_range : 2-tuple of floats spectral_index : float coszen_range : 2-tuple of floats num_sets : int first_set : int aeff_energy_param : string aeff_coszen_param : string reco_param : string pid_param : string pid_dist : string Returns ------- events : :class:`pisa.core.events.Events` """ energy_range = sorted(energy_range) coszen_range = sorted(coszen_range) # Validation of args assert energy_range[0] > 0 and energy_range[1] < 1e9 assert coszen_range[0] >= -1 and coszen_range[1] <= 1 assert np.diff(energy_range)[0] > 0, str(energy_range) assert np.diff(coszen_range)[0] > 0, str(coszen_range) assert spectral_index >= 0, str(spectral_index) assert first_set >= 0, str(first_set) assert num_sets >= 1, str(first_set) # Make sure resources specified actually exist for arg in [aeff_energy_param, aeff_coszen_param, reco_param, pid_param]: find_resource(arg) mkdir(outdir, warn=False) set_indices = list(range(first_set, first_set + num_sets)) # The following loop is for validation only for num, index in product(num_events, set_indices): mcgen_random_state(num_events=num, set_index=index) for num, set_index in product(num_events, set_indices): mcevts_fname = FNAME_TEMPLATE.format( file_type='events', detector='vlvnt', e_min=format_num(energy_range[0]), e_max=format_num(energy_range[1]), spectral_index=format_num(spectral_index, sigfigs=2, trailing_zeros=True), cz_min=format_num(coszen_range[0]), cz_max=format_num(coszen_range[1]), num_events=format_num(num, sigfigs=3, sci_thresh=(1, -1)), set_index=format_num(set_index, sci_thresh=(10, -10)), extension='hdf5') mcevts_fpath = os.path.join(outdir, mcevts_fname) if os.path.isfile(mcevts_fpath): logging.warn('File already exists, skipping: "%s"', mcevts_fpath) continue logging.info('Working on set "%s"', mcevts_fname) # TODO: pass filepaths / resource locations via command line args # Create a single random state object to pass from function to function random_state = mcgen_random_state(num_events=num, set_index=set_index) mc_events = generate_mc_events( num_events=num, energy_range=energy_range, coszen_range=coszen_range, spec_ind=spectral_index, aeff_energy_param_source=aeff_energy_param, aeff_coszen_param_source=aeff_coszen_param, random_state=random_state) populate_reco_observables(mc_events=mc_events, param_source=reco_param, random_state=random_state) populate_pid(mc_events=mc_events, param_source=pid_param, random_state=random_state, dist=pid_dist) to_file(mc_events, mcevts_fpath) return mc_events
def main(return_outputs=False): """Run unit tests if `pipeline.py` is called as a script.""" from pisa.utils.plotter import Plotter args = parse_args() set_verbosity(args.v) # Even if user specifies an integer on command line, it comes in as a # string. Try to convert to int (e.g. if `'1'` is passed to indicate the # second stage), and -- if successful -- use this as `args.only_stage`. # Otherwise, the string value passed will be used (e.g. `'osc'` could be # passed). try: only_stage_int = int(args.only_stage) except (ValueError, TypeError): pass else: args.only_stage = only_stage_int if args.outdir: mkdir(args.outdir) else: if args.pdf or args.png: raise ValueError("No --outdir provided, so cannot save images.") # Most basic parsing of the pipeline config (parsing only to this level # allows for simple strings to be specified as args for updating) bcp = PISAConfigParser() bcp.read(args.pipeline) # Update the config with any args specified on command line if args.arg is not None: for arg_list in args.arg: if len(arg_list) < 2: raise ValueError( 'Args must be formatted as: "section arg=val". Got "%s"' " instead." % " ".join(arg_list)) section = arg_list[0] remainder = " ".join(arg_list[1:]) eq_split = remainder.split("=") newarg = eq_split[0].strip() value = ("=".join(eq_split[1:])).strip() logging.debug('Setting config section "%s" arg "%s" = "%s"', section, newarg, value) try: bcp.set(section, newarg, value) except NoSectionError: logging.error( 'Invalid section "%s" specified. Must be one of %s', section, bcp.sections(), ) raise # Instantiate the pipeline pipeline = Pipeline(bcp) # pylint: disable=redefined-outer-name if args.select is not None: pipeline.select_params(args.select, error_on_missing=True) if args.only_stage is None: stop_idx = args.stop_after_stage try: stop_idx = int(stop_idx) except (TypeError, ValueError): pass if isinstance(stop_idx, str): stop_idx = pipeline.index(stop_idx) outputs = pipeline.get_outputs(idx=stop_idx) # pylint: disable=redefined-outer-name if stop_idx is not None: stop_idx += 1 indices = slice(0, stop_idx) else: assert args.stop_after_stage is None idx = pipeline.index(args.only_stage) stage = pipeline[idx] indices = slice(idx, idx + 1) # Create dummy inputs if necessary inputs = None if hasattr(stage, "input_binning"): logging.warning( "Stage requires input, so building dummy" " inputs of random numbers, with random state set to the input" " index according to alphabetical ordering of input names and" " filled in alphabetical ordering of dimension names.") input_maps = [] tmp = deepcopy(stage.input_binning) alphabetical_binning = tmp.reorder_dimensions(sorted(tmp.names)) for input_num, input_name in enumerate(sorted(stage.input_names)): # Create a new map with all 3's; name according to the input hist = np.full(shape=alphabetical_binning.shape, fill_value=3.0) input_map = Map(name=input_name, binning=alphabetical_binning, hist=hist) # Apply Poisson fluctuations to randomize the values in the map input_map.fluctuate(method="poisson", random_state=input_num) # Reorder dimensions according to user's original binning spec input_map.reorder_dimensions(stage.input_binning) input_maps.append(input_map) inputs = MapSet(maps=input_maps, name="ones", hash=1) outputs = stage.run(inputs=inputs) for stage in pipeline[indices]: if not args.outdir: break stg_svc = stage.stage_name + "__" + stage.service_name fbase = os.path.join(args.outdir, stg_svc) if args.intermediate or stage == pipeline[indices][-1]: stage.outputs.to_json(fbase + "__output.json.bz2") # also only plot if args intermediate or last stage if args.intermediate or stage == pipeline[indices][-1]: formats = OrderedDict(png=args.png, pdf=args.pdf) if isinstance(stage.outputs, Data): # TODO(shivesh): plots made here will use the most recent # "pisa_weight" column and so all stages will have identical plots # (one workaround is to turn on "memcache_deepcopy") # TODO(shivesh): intermediate stages have no output binning if stage.output_binning is None: logging.debug("Skipping plot of intermediate stage %s", stage) continue outputs = stage.outputs.histogram_set( binning=stage.output_binning, nu_weights_col="pisa_weight", mu_weights_col="pisa_weight", noise_weights_col="pisa_weight", mapset_name=stg_svc, errors=True, ) try: for fmt, enabled in formats.items(): if not enabled: continue my_plotter = Plotter( stamp="Event rate", outdir=args.outdir, fmt=fmt, log=False, annotate=args.annotate, ) my_plotter.ratio = True my_plotter.plot_2d_array(outputs, fname=stg_svc + "__output", cmap="RdBu") except ValueError as exc: logging.error( "Failed to save plot to format %s. See exception" " message below", fmt, ) traceback.format_exc() logging.exception(exc) logging.warning("I can't go on, I'll go on.") if return_outputs: return pipeline, outputs
prop=dict(size=12)) plt.setp(legend.get_title(), fontsize=18) at = AnchoredText(r'$%s$' % map.tex, prop=dict(size=20), frameon=True, loc=2) at.patch.set_boxstyle("round,pad=0.,rounding_size=0.5") ax.add_artist(at) fig.savefig(outfile, bbox_inches='tight', dpi=150) if __name__ == "__main__": args = parse_args() set_verbosity(args.verbose) logging.info('Loading Map from file {0}'.format(args.infile)) input_MapSet = MapSet.from_json(args.infile) if len(input_MapSet) > 1: input_Map = input_MapSet[args.name] else: input_Map = input_MapSet.pop() fileio.mkdir(args.outdir, mode=0755) outfile = args.outdir + '/' + args.outname logging.info('outfile {0}'.format(outfile)) plot_CFX_one(map=input_Map, outfile=outfile, logy=args.logy, ylim=args.ylim, ylabel=args.ylabel)
def _compute_nominal_transforms(self): self.load_events(self.params.aeff_events) self.cut_events(self.params.transform_events_keep_criteria) # Units must be the following for correctly converting a sum-of- # OneWeights-in-bin to an average effective area across the bin. comp_units = dict(true_energy='GeV', true_coszen=None, true_azimuth='rad') # Select only the units in the input/output binning for conversion # (can't pass more than what's actually there) in_units = {dim: unit for dim, unit in comp_units.items() if dim in self.input_binning} # TODO: use out_units for some kind of conversion? #out_units = {dim: unit for dim, unit in comp_units.items() # if dim in self.output_binning} # These will be in the computational units input_binning = self.input_binning.to(**in_units) # Account for "missing" dimension(s) (dimensions OneWeight expects for # computation of bin volume), and accommodate with a factor equal to # the full range. See IceCube wiki/documentation for OneWeight for # more info. missing_dims_vol = 1 if 'true_azimuth' not in input_binning: missing_dims_vol *= 2*np.pi if 'true_coszen' not in input_binning: missing_dims_vol *= 2 if bool(self.debug_mode): outdir = os.path.join(find_resource('debug'), self.stage_name, self.service_name) mkdir(outdir) #hex_hash = hash2hex(kde_hash) bin_volumes = input_binning.bin_volumes(attach_units=False) norm_volumes = bin_volumes * missing_dims_vol nominal_transforms = [] for xform_flavints in self.transform_groups: logging.debug('Working on %s effective areas xform', xform_flavints) aeff_transform = self.events.histogram( kinds=xform_flavints, binning=input_binning, weights_col='weighted_aeff', errors=(self.error_method not in [None, False]) ) aeff_transform = aeff_transform.hist # Divide histogram by # (energy bin width x coszen bin width x azimuth bin width) # volumes to convert from sums-of-OneWeights-in-bins to # effective areas. Note that volume correction factor for # missing dimensions is applied here. aeff_transform /= norm_volumes if self.debug_mode: outfile = os.path.join( outdir, 'aeff_' + str(xform_flavints) + '.pkl' ) to_file(aeff_transform, outfile) nominal_transforms.extend( populate_transforms( service=self, xform_flavints=xform_flavints, xform_array=aeff_transform ) ) return TransformSet(transforms=nominal_transforms)
def plot_cmp(new, ref, new_label, ref_label, plot_label, file_label, outdir, ftype='png'): """Plot comparisons between two (identically-binned) maps or map sets. Parameters ---------- new : Map or MapSet ref : Map or MapSet new_label : str ref_label : str plot_label : str file_label : str outdir : str ftype : str """ path = [outdir] if isinstance(ref, Map): assert isinstance(new, Map) ref_maps = [ref] new_maps = [new] if outdir is not None: mkdir(os.path.join(*path), warn=False) for ref, new in zip(ref_maps, new_maps): assert ref.binning == new.binning fname = get_valid_filename('__'.join([ get_valid_filename(file_label), '%s_vs_%s' % (get_valid_filename(new_label.lower()), get_valid_filename(ref_label.lower())) ]) + '.' + ftype) path.append(fname) ratio = new / ref diff = new - ref fract_diff = diff / ref finite_ratio = ratio.hist[np.isfinite(ratio.hist)] ratio_mean = np.mean(finite_ratio) ratio_median = np.median(finite_ratio) finite_diff = diff.hist[np.isfinite(diff.hist)] diff_mean = np.mean(finite_diff) diff_median = np.median(finite_diff) finite_fract_diff = fract_diff.hist[np.isfinite(fract_diff.hist)] fract_diff_mean = np.mean(finite_fract_diff) fract_diff_median = np.median(finite_fract_diff) max_diff_ratio = np.nanmax(fract_diff.hist) # Handle cases where ratio returns infinite # This isn't necessarily a fail, since all it means is the referene was # zero. If the new value is sufficiently close to zero then it's stil # fine. if max_diff_ratio == np.inf: logging.warn( 'Infinite value found in ratio tests. Difference tests' ' now also being calculated') # First find all the finite elements finite_mask = np.isfinite(fract_diff.hist) # Then find the nanmax of this, will be our new test value max_diff_ratio = np.nanmax(fract_diff.hist[finite_mask]) # Also find all the infinite elements; compute a second test value max_diff = np.nanmax(diff.hist[~finite_mask]) else: # Without any infinite elements we can ignore this second test max_diff = 0.0 if outdir is not None: if new.binning.num_dims == 2: n_dims = 2 n_third_dim_bins = 1 elif new.binning.num_dims == 3: n_dims = 3 odd_dim_idx = new.binning.shape.index(np.min( new.binning.shape)) logging.debug('odd_dim_idx: %s', odd_dim_idx) n_third_dim_bins = new.binning.shape[odd_dim_idx] gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32) fig, axes = plt.subplots(nrows=n_third_dim_bins, ncols=5, gridspec_kw=gridspec_kw, squeeze=False, sharex=False, sharey=False, figsize=(20, 5)) refslice = ref newslice = new bin_names = None if n_dims == 3: if odd_dim_idx != 0: refslice = np.moveaxis(ref, source=odd_dim_idx, destination=0) newslice = np.moveaxis(new, source=odd_dim_idx, destination=0) bin_names = new.binning.dims[odd_dim_idx].bin_names for odd_bin_idx in range(n_third_dim_bins): if n_dims == 2: thisbin_ref = refslice thisbin_new = newslice tmp_ref_label = ref_label tmp_new_label = new_label elif n_dims == 3: thisbin_ref = refslice[odd_bin_idx, ...].squeeze() thisbin_new = newslice[odd_bin_idx, ...].squeeze() if bin_names is not None: suffix = bin_names[odd_bin_idx] else: suffix = format(odd_bin_idx, 'd') tmp_new_label = new_label + ' ' + suffix tmp_ref_label = ref_label + ' ' + suffix ratio = thisbin_new / thisbin_ref diff = thisbin_new - thisbin_ref fract_diff = diff / thisbin_ref refmax = np.nanmax(thisbin_ref.hist) newmax = np.nanmax(thisbin_new.hist) vmax = refmax if refmax > newmax else newmax baseplot2(map=thisbin_new, title=tmp_new_label, vmax=vmax, evtrate=True, ax=axes[odd_bin_idx][0]) baseplot2(map=thisbin_ref, title=tmp_ref_label, vmax=vmax, evtrate=True, ax=axes[odd_bin_idx][1]) ax, _, _ = baseplot2(map=ratio, title='%s/%s' % (tmp_new_label, tmp_ref_label), ax=axes[odd_bin_idx][2]) ax.text(0.95, 0.95, "Mean: %.6f" % ratio_mean, horizontalalignment='right', transform=ax.transAxes, color=(0, 0.8, 0.8)) ax.text(0.95, 0.91, "Median: %.6f" % ratio_median, horizontalalignment='right', transform=ax.transAxes, color=(0, 0.8, 0.8)) ax, _, _ = baseplot2(map=diff, title='%s-%s' % (tmp_new_label, tmp_ref_label), symm=True, ax=axes[odd_bin_idx][3]) ax.text(0.95, 0.95, "Mean: %.6f" % diff_mean, horizontalalignment='right', transform=ax.transAxes) ax.text(0.95, 0.91, "Median: %.6f" % diff_median, horizontalalignment='right', transform=ax.transAxes) ax, _, _ = baseplot2( map=fract_diff, title='(%s-%s)/%s' % (tmp_new_label, tmp_ref_label, tmp_ref_label), symm=True, ax=axes[odd_bin_idx][4]) ax.text(0.95, 0.95, "Mean: %.6f" % fract_diff_mean, horizontalalignment='right', transform=ax.transAxes) ax.text(0.95, 0.91, "Median: %.6f" % fract_diff_median, horizontalalignment='right', transform=ax.transAxes) logging.debug('>>>> Plot for inspection saved at %s' % os.path.join(*path)) fig.savefig(os.path.join(*path)) plt.close(fig.number) return max_diff_ratio, max_diff
def plot_map_comparisons(ref_map, new_map, ref_abv, new_abv, outdir, subdir, name, texname, stagename, servicename, shorttitles=False, ftype='png'): """Plot comparisons between two identically-binned PISA 3 style maps""" path = [outdir] if subdir is None: subdir = stagename.lower() path.append(subdir) if outdir is not None: mkdir(os.path.join(*path), warn=False) if stagename is not None: fname = [ '%s_%s_comparisons' % (ref_abv.lower(), new_abv.lower()), 'stage_' + stagename ] else: fname = ['%s_%s_comparisons' % (ref_abv.lower(), new_abv.lower())] if servicename is not None: fname.append('service_' + servicename) if name is not None: fname.append(name.lower()) fname = '__'.join(fname) + '.' + ftype path.append(fname) basetitle = [] if stagename is not None: basetitle.append('%s' % stagename) if texname is not None: basetitle.append(r'$%s$' % texname) basetitle = ' '.join(basetitle) validate_map_objs(new_map, ref_map) with np.errstate(divide='ignore', invalid='ignore'): ratio_map = new_map / ref_map diff_map = new_map - ref_map with np.errstate(divide='ignore', invalid='ignore'): diff_ratio_map = diff_map / ref_map max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist)) # Handle cases where ratio returns infinite # This isn't necessarily a fail, since all it means is the referene was # zero If the new value is sufficiently close to zero then it's still fine if max_diff_ratio == float('inf'): logging.warn('Infinite value found in ratio tests. Difference tests ' 'now also being calculated') # First find all the finite elements finite_map = np.isfinite(diff_ratio_map.hist) # Then find the nanmax of this, will be our new test value max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist[finite_map])) # Also find all the infinite elements infinite_map = np.logical_not(finite_map) # This will be a second test value max_diff = np.nanmax(np.abs(diff_map.hist[infinite_map])) else: # Without any infinite elements we can ignore this second test max_diff = 0.0 if outdir is not None: gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32) fig, axes = plt.subplots(nrows=1, ncols=5, gridspec_kw=gridspec_kw, sharex=False, sharey=False, figsize=(20, 5)) if shorttitles: ref_map.plot(fig=fig, ax=axes[0], title=basetitle + ' ' + ref_abv + ' (A)', cmap=plt.cm.afmhot) new_map.plot(fig=fig, ax=axes[1], title=basetitle + ' ' + new_abv + ' (B)', cmap=plt.cm.afmhot) ratio_map.plot(fig=fig, ax=axes[2], title='A/B', cmap=plt.cm.afmhot) diff_map.plot(fig=fig, ax=axes[3], title='A-B', symm=True, cmap=plt.cm.seismic) diff_ratio_map.plot(fig=fig, ax=axes[4], title='(A-B)/A', symm=True, cmap=plt.cm.seismic) else: ref_map.plot(fig=fig, ax=axes[0], title=basetitle + ' ' + ref_abv, cmap=plt.cm.afmhot) new_map.plot(fig=fig, ax=axes[1], title=basetitle + ' ' + new_abv, cmap=plt.cm.afmhot) ratio_map.plot(fig=fig, ax=axes[2], title=basetitle + ' %s/%s' % (new_abv, ref_abv), cmap=plt.cm.afmhot) diff_map.plot(fig=fig, ax=axes[3], title=basetitle + ' %s-%s' % (new_abv, ref_abv), symm=True, cmap=plt.cm.seismic) diff_ratio_map.plot(fig=fig, ax=axes[4], title=basetitle + ' (%s-%s)/%s' % (new_abv, ref_abv, ref_abv), symm=True, cmap=plt.cm.seismic) logging.debug('>>>> Plot for inspection saved at %s' % os.path.join(*path)) fig.savefig(os.path.join(*path)) plt.close(fig.number) return max_diff_ratio, max_diff
def main(): global SIGMA args = vars(parse_args()) set_verbosity(args.pop('v')) center_zero = args.pop('center_zero') make_pdf = False if args['pdf']: make_pdf = True args['pdf'] = False outdir = args.pop('outdir') fileio.mkdir(outdir, mode=0755) SIGMA *= args.pop('sigma') cfx_pipe = Pipeline(args.pop('cfx_pipeline')) signal = args.pop('signal').replace(' ', '').split(',') output_str = [] for name in signal: if 'muons' in name or 'noise' in name: raise AssertionError('Are you trying to unfold muons/noise?') elif 'all_nu' in name: output_str = [str(NuFlavIntGroup(f)) for f in ALL_NUFLAVINTS] else: output_str.append(NuFlavIntGroup(name)) output_str = [str(f) for f in output_str] cfx_pipe._output_names = output_str # Turn off stat fluctuations stat_param = cfx_pipe.params['stat_fluctuations'] stat_param.value = 0 * ureg.dimensionless cfx_pipe.update_params(stat_param) # Get nominal Map re_param = cfx_pipe.params['regularisation'] re_param.value = 0 * ureg.dimensionless cfx_pipe.update_params(re_param) nom_out = cfx_pipe.get_outputs() re_param.reset() cfx_pipe.update_params(re_param) params = ParamSet() for param in cfx_pipe.params: if param.name != 'dataset': params.extend(param) free = params.free logging.info('Free params = {0}'.format(free)) contin = True for f in free: if 'hole_ice' not in f.name and 'dom_eff' not in f.name: continue # if 'atm_muon_scale' in f.name: # contin = False # if contin: # continue logging.info('Working on parameter {0}'.format(f.name)) if f.prior.kind != 'uniform': # Use deltaLLH = SIGMA to define +/- sigma for non-uniform scan_over = np.linspace(*f.range, num=1000) * f.range[0].u llh = f.prior.llh(scan_over) dllh = llh - np.min(-llh) mllh_idx = np.argmin(-llh) if mllh_idx == 0: l_sig_idx = 0 else: l_sig_idx = np.argmin(np.abs(dllh[:mllh_idx] - SIGMA)) u_sig_idx = np.argmin(np.abs(dllh[mllh_idx:] - SIGMA)) + mllh_idx l_sigma = scan_over[l_sig_idx] u_sigma = scan_over[u_sig_idx] else: l_sigma = f.range[0] u_sigma = f.range[1] logging.info('Setting {0} lower sigma bound to ' '{1}'.format(f.name, l_sigma)) f.value = l_sigma cfx_pipe.update_params(f) l_out = cfx_pipe.get_outputs() logging.info('Setting {0} upper sigma bound to ' '{1}'.format(f.name, u_sigma)) f.value = u_sigma cfx_pipe.update_params(f) u_out = cfx_pipe.get_outputs() f.reset() cfx_pipe.update_params(f) f_outdir = outdir + '/' + f.name l_outdir = f_outdir + '/' + 'lower' u_outdir = f_outdir + '/' + 'upper' fileio.mkdir(f_outdir) fileio.mkdir(l_outdir) fileio.mkdir(u_outdir) compare(outdir=l_outdir, ref=MapSet([nom_out]), ref_label='baseline', test=MapSet([l_out]), test_label=r'-sigma', **args) compare(outdir=u_outdir, ref=MapSet([nom_out]), ref_label='baseline', test=MapSet([u_out]), test_label=r'+sigma', **args) l_in_mapset = l_outdir + '/' + 'fract_diff__-sigma___baseline.json.bz2' u_in_mapset = u_outdir + '/' + 'fract_diff__+sigma___baseline.json.bz2' l_in_map = MapSet.from_json(l_in_mapset).pop() * 100. u_in_map = MapSet.from_json(u_in_mapset).pop() * 100. if make_pdf: outfile = f_outdir + '/systematic_effect.pdf' else: outfile = f_outdir + '/systematic_effect.png' title = r'% effect on ' + r'${0}$'.format(l_in_map.tex) + \ ' event counts for {0} parameter'.format(f.name) sub_titles = (r'(-\sigma - {\rm baseline}) \:/\: {\rm baseline}', r'(+\sigma - {\rm baseline}) \:/\: {\rm baseline}') make_plot( maps=(l_in_map, u_in_map), outfile=outfile, logv=False, center_zero=center_zero, vlabel=r'({\rm change} - {\rm baseline}) \:/\: {\rm baseline} (%)', title=title, sub_titles=sub_titles)
def compare(outdir, ref, ref_label, test, test_label, asymm_max=None, asymm_min=None, combine=None, diff_max=None, diff_min=None, fract_diff_max=None, fract_diff_min=None, json=False, pdf=False, png=False, ref_abs=False, ref_param_selections=None, sum=None, test_abs=False, test_param_selections=None): """Compare two entities. The result each entity specification is formatted into a MapSet and stored to disk, so that e.g. re-running a DistributionMaker is unnecessary to reproduce the results. Parameters ---------- outdir : string Store output plots to this directory ref : string or array of strings Pipeline settings config file that generates reference output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported ref_abs : bool Use the absolute value of the reference plot for comparisons ref_label : string Label for reference ref_param-selections : string Param selections to apply to ref pipeline config(s). Not applicable if ref specifies stored map or map sets test : string or array of strings Pipeline settings config file that generates test output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported test_abs : bool Use the absolute value of the test plot for comparisons test_label : string Label for test test_param_selections : None or string Param selections to apply to test pipeline config(s). Not applicable if test specifies stored map or map sets combine : None or string or array of strings Combine by wildcard string, where string globbing (a la command line) uses asterisk for any number of wildcard characters. Use single quotes such that asterisks do not get expanded by the shell. Multiple combine strings supported sum : None or int Sum over (and hence remove) the specified axis or axes. I.e., project the map onto remaining (unspecified) axis or axes json : bool Save output maps in compressed json (json.bz2) format pdf : bool Save plots in PDF format. If neither this nor png is specified, no plots are produced png : bool Save plots in PNG format. If neither this nor pdf is specfied, no plots are produced diff_min : None or float Difference plot vmin; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) diff_max : None or float Difference plot max; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) fract_diff_min : None or float Fractional difference plot vmin; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) fract_diff_max : None or float Fractional difference plot max; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) asymm_min : None or float Asymmetry plot vmin; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) asymm_max : None or float Fractional difference plot max; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) Returns ------- summary_stats : dict Dictionary containing a summary for each h Map processed diff : MapSet MapSet of the difference - (Test - Ref) fract_diff : MapSet MapSet of the fractional difference - (Test - Ref) / Ref asymm : MapSet MapSet of the asymmetric fraction difference or pull - (Test - Ref) / sqrt(Ref) """ ref_plot_label = ref_label if ref_abs and not ref_label.startswith('abs'): ref_plot_label = 'abs(%s)' % ref_plot_label test_plot_label = test_label if test_abs and not test_label.startswith('abs'): test_plot_label = 'abs(%s)' % test_plot_label plot_formats = [] if pdf: plot_formats.append('pdf') if png: plot_formats.append('png') diff_symm = True if diff_min is not None and diff_max is None: diff_max = -diff_min diff_symm = False if diff_max is not None and diff_min is None: diff_min = -diff_max diff_symm = False fract_diff_symm = True if fract_diff_min is not None and fract_diff_max is None: fract_diff_max = -fract_diff_min fract_diff_symm = False if fract_diff_max is not None and fract_diff_min is None: fract_diff_min = -fract_diff_max fract_diff_symm = False asymm_symm = True if asymm_max is not None and asymm_min is None: asymm_min = -asymm_max asymm_symm = False if asymm_min is not None and asymm_max is None: asymm_max = -asymm_min asymm_symm = False outdir = os.path.expanduser(os.path.expandvars(outdir)) mkdir(outdir) # Get the reference distribution(s) into the form of a test MapSet p_ref = None ref_source = None if isinstance(ref, Map): p_ref = MapSet(ref) ref_source = MAP_SOURCE_STR elif isinstance(ref, MapSet): p_ref = ref ref_source = MAPSET_SOURCE_STR elif isinstance(ref, Pipeline): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = PIPELINE_SOURCE_STR elif isinstance(ref, DistributionMaker): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(ref) == 1: try: ref_pipeline = Pipeline(config=ref[0]) except: pass else: ref_source = PIPELINE_SOURCE_STR if ref_param_selections is not None: ref_pipeline.select_params(ref_param_selections) p_ref = ref_pipeline.get_outputs() else: try: ref_dmaker = DistributionMaker(pipelines=ref) except: pass else: ref_source = DISTRIBUTIONMAKER_SOURCE_STR if ref_param_selections is not None: ref_dmaker.select_params(ref_param_selections) p_ref = ref_dmaker.get_outputs() if p_ref is None: try: p_ref = [Map.from_json(f) for f in ref] except: pass else: ref_source = MAP_SOURCE_STR p_ref = MapSet(p_ref) if p_ref is None: assert ref_param_selections is None assert len(ref) == 1, 'Can only handle one MapSet' try: p_ref = MapSet.from_json(ref[0]) except: pass else: ref_source = MAPSET_SOURCE_STR if p_ref is None: raise ValueError( 'Could not instantiate the reference Pipeline, DistributionMaker,' ' Map, or MapSet from ref value(s) %s' % ref) ref = p_ref logging.info('Reference map(s) derived from a ' + ref_source) # Get the test distribution(s) into the form of a test MapSet p_test = None test_source = None if isinstance(test, Map): p_test = MapSet(test) test_source = MAP_SOURCE_STR elif isinstance(test, MapSet): p_test = test test_source = MAPSET_SOURCE_STR elif isinstance(test, Pipeline): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = PIPELINE_SOURCE_STR elif isinstance(test, DistributionMaker): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(test) == 1: try: test_pipeline = Pipeline(config=test[0]) except: pass else: test_source = PIPELINE_SOURCE_STR if test_param_selections is not None: test_pipeline.select_params(test_param_selections) p_test = test_pipeline.get_outputs() else: try: test_dmaker = DistributionMaker(pipelines=test) except: pass else: test_source = DISTRIBUTIONMAKER_SOURCE_STR if test_param_selections is not None: test_dmaker.select_params(test_param_selections) p_test = test_dmaker.get_outputs() if p_test is None: try: p_test = [Map.from_json(f) for f in test] except: pass else: test_source = MAP_SOURCE_STR p_test = MapSet(p_test) if p_test is None: assert test_param_selections is None assert len(test) == 1, 'Can only handle one MapSet' try: p_test = MapSet.from_json(test[0]) except: pass else: test_source = MAPSET_SOURCE_STR if p_test is None: raise ValueError( 'Could not instantiate the test Pipeline, DistributionMaker, Map,' ' or MapSet from test value(s) %s' % test) test = p_test logging.info('Test map(s) derived from a ' + test_source) if combine is not None: ref = ref.combine_wildcard(combine) test = test.combine_wildcard(combine) if isinstance(ref, Map): ref = MapSet([ref]) if isinstance(test, Map): test = MapSet([test]) if sum is not None: ref = ref.sum(sum) test = test.sum(sum) # Set the MapSet names according to args passed by user ref.name = ref_label test.name = test_label # Save to disk the maps being plotted (excluding optional aboslute value # operations) if json: refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label) to_file(ref, refmaps_path) testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label) to_file(test, testmaps_path) if set(test.names) != set(ref.names): raise ValueError('Test map names %s do not match ref map names %s.' % (sorted(test.names), sorted(ref.names))) # Aliases to save keystrokes def masked(x): return np.ma.masked_invalid(x.nominal_values) def zero_to_nan(map): newmap = deepcopy(map) mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON) newmap.hist[mask] = np.nan return newmap reordered_test = [] new_ref = [] diff_maps = [] fract_diff_maps = [] asymm_maps = [] summary_stats = {} for ref_map in ref: test_map = test[ref_map.name].reorder_dimensions(ref_map.binning) if ref_abs: ref_map = abs(ref_map) if test_abs: test_map = abs(test_map) diff_map = test_map - ref_map fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map) asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5) abs_fract_diff_map = np.abs(fract_diff_map) new_ref.append(ref_map) reordered_test.append(test_map) diff_maps.append(diff_map) fract_diff_maps.append(fract_diff_map) asymm_maps.append(asymm_map) min_ref = np.min(masked(ref_map)) max_ref = np.max(masked(ref_map)) min_test = np.min(masked(test_map)) max_test = np.max(masked(test_map)) total_ref = np.sum(masked(ref_map)) total_test = np.sum(masked(test_map)) mean_ref = np.mean(masked(ref_map)) mean_test = np.mean(masked(test_map)) max_abs_fract_diff = np.max(masked(abs_fract_diff_map)) mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map)) median_abs_fract_diff = np.median(masked(abs_fract_diff_map)) mean_fract_diff = np.mean(masked(fract_diff_map)) min_fract_diff = np.min(masked(fract_diff_map)) max_fract_diff = np.max(masked(fract_diff_map)) std_fract_diff = np.std(masked(fract_diff_map)) mean_diff = np.mean(masked(diff_map)) min_diff = np.min(masked(diff_map)) max_diff = np.max(masked(diff_map)) std_diff = np.std(masked(diff_map)) median_diff = np.nanmedian(masked(diff_map)) mad_diff = np.nanmedian(masked(np.abs(diff_map))) median_fract_diff = np.nanmedian(masked(fract_diff_map)) mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map))) min_asymm = np.min(masked(fract_diff_map)) max_asymm = np.max(masked(fract_diff_map)) total_asymm = np.sqrt(np.sum(masked(asymm_map)**2)) summary_stats[test_map.name] = OrderedDict([ ('min_ref', min_ref), ('max_ref', max_ref), ('total_ref', total_ref), ('mean_ref', mean_ref), ('min_test', min_test), ('max_test', max_test), ('total_test', total_test), ('mean_test', mean_test), ('max_abs_fract_diff', max_abs_fract_diff), ('mean_abs_fract_diff', mean_abs_fract_diff), ('median_abs_fract_diff', median_abs_fract_diff), ('min_fract_diff', min_fract_diff), ('max_fract_diff', max_fract_diff), ('mean_fract_diff', mean_fract_diff), ('std_fract_diff', std_fract_diff), ('median_fract_diff', median_fract_diff), ('mad_fract_diff', mad_fract_diff), ('min_diff', min_diff), ('max_diff', max_diff), ('mean_diff', mean_diff), ('std_diff', std_diff), ('median_diff', median_diff), ('mad_diff', mad_diff), ('min_asymm', min_asymm), ('max_asymm', max_asymm), ('total_asymm', total_asymm), ]) logging.info('Map %s...', ref_map.name) logging.info(' Ref map(s):') logging.info(' min :' + ('%.2f' % min_ref).rjust(12)) logging.info(' max :' + ('%.2f' % max_ref).rjust(12)) logging.info(' total :' + ('%.2f' % total_ref).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_ref).rjust(12)) logging.info(' Test map(s):') logging.info(' min :' + ('%.2f' % min_test).rjust(12)) logging.info(' max :' + ('%.2f' % max_test).rjust(12)) logging.info(' total :' + ('%.2f' % total_test).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_test).rjust(12)) logging.info(' Absolute fract. diff., abs((Test - Ref) / Ref):') logging.info(' max : %.4e', max_abs_fract_diff) logging.info(' mean : %.4e', mean_abs_fract_diff) logging.info(' median: %.4e', median_abs_fract_diff) logging.info(' Fractional difference, (Test - Ref) / Ref:') logging.info(' min : %.4e', min_fract_diff) logging.info(' max : %.4e', max_fract_diff) logging.info(' mean : %.4e +/- %.4e', mean_fract_diff, std_fract_diff) logging.info(' median: %.4e +/- %.4e', median_fract_diff, mad_fract_diff) logging.info(' Difference, Test - Ref:') logging.info(' min : %.4e', min_diff) logging.info(' max : %.4e', max_diff) logging.info(' mean : %.4e +/- %.4e', mean_diff, std_diff) logging.info(' median: %.4e +/- %.4e', median_diff, mad_diff) logging.info(' Asymmetry, (Test - Ref) / sqrt(Ref)') logging.info(' min : %.4e', min_asymm) logging.info(' max : %.4e', max_asymm) logging.info(' total : %.4e (sum in quadrature)', total_asymm) logging.info('') ref = MapSet(new_ref) test = MapSet(reordered_test) diff = MapSet(diff_maps) fract_diff = MapSet(fract_diff_maps) asymm = MapSet(asymm_maps) if json: diff.to_json( os.path.join( outdir, 'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) fract_diff.to_json( os.path.join( outdir, 'fract_diff__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) asymm.to_json( os.path.join( outdir, 'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) to_file( summary_stats, os.path.join( outdir, 'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) for plot_format in plot_formats: # Plot the raw distributions plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=False, ratio=False) plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label) plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label) # Plot the difference (test - ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=diff_symm, ratio=False) plotter.label = '%s - %s' % (test_plot_label, ref_plot_label) plotter.plot_2d_array( test - ref, fname='diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=diff_min, vmax=diff_max ) # Plot the fractional difference (test - ref)/ref plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=fract_diff_symm, ratio=True) plotter.label = ('(%s-%s)/%s' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r) for r in ref]), fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=fract_diff_min, vmax=fract_diff_max ) # Plot the asymmetry (test - ref)/sqrt(ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=asymm_symm, ratio=True) plotter.label = (r'$(%s - %s)/\sqrt{%s}$' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]), fname='asymm__%s__%s' % (test_plot_label, ref_plot_label), #vmin=asymm_min, vmax=asymm_max ) return summary_stats, diff, fract_diff, asymm
def makeEventsFile(data_files, detector, proc_ver, cut, outdir, run_settings=None, data_proc_params=None, join=None, cust_cuts=None, extract_fields=EXTRACT_FIELDS, output_fields=OUTPUT_FIELDS): r"""Take the simulated and reconstructed HDF5 file(s) (as converted from I3 by icecube.hdfwriter.I3HDFTableService) as input and write out a simplified PISA-standard-format HDF5 file for use in aeff, reco, and/or PID stages. Parameters ---------- data_files : dict File paths for finding data files for each run, formatted as: { <string run>: <list of file paths>, <string run>: <list of file paths>, ... <string run>: <list of file paths>, } detector : string Name of the detector (e.g. IceCube, DeepCore, PINGU, etc.) as found in e.g. mc_sim_run_settings.json and data_proc_params.json files. proc_ver Version of processing applied to the events, as found in e.g. data_proc_params.json. cut Name of a standard cut to use; must be specified in the relevant detector/processing version node of the data processing parameters (file from which the data_proc_params object was instantiated) outdir Directory path in which to store resulting files; will be generated if it does not already exist (including any parent directories that do not exist) run_settings : string or MCSimRunSettings Resource location of mc_sim_run_settings.json or an MCSimRunSettings object instantiated therefrom. data_proc_params : string or DataProcParams Resource location of data_proc_params.json or a DataProcParams object instantiated therefrom. join String specifying any flavor/interaction types (flavInts) to join together. Separate flavInts with commas (',') and separate groups with semicolons (';'). E.g. an acceptable string is: 'numucc+numubarcc; nuall bar NC, nuall NC' cust_cuts dict with a single DataProcParams cut specification or list of same (see help for DataProcParams for detailed description of cut spec) extract_fields : None or iterable of strings Field names to extract from source HDF5 file. If None, extract all fields. output_fields : None or iterable of strings Fields to include in the generated PISA-standard-format events HDF5 file; note that if 'weighted_aeff' is not preent, effective area will not be computed. If None, all fields will be written. Notes ----- Compute "weighted_aeff" field: Within each int type (CC or NC), ngen should be added together; events recorded of that int type then get their one_weight divided by the total *for that int type only* to obtain the "weighted_aeff" for that event (even if int types are being grouped/joined together). This has the effect that within a group, ... ... and within an interaction type, effective area is a weighted average of that of the flavors being combined. E.g. for CC, \sum_{run x}\sum_{flav y} (Aeff_{x,y} * ngen_{x,y}) Aeff_CC = ----------------------------------------------------- , \sum_{run x}\sum_{flav y} (ngen_{x,y}) ... and then across interaction types, the results of the above for each int type need to be summed together, i.e.: Aeff_total = Aeff_CC + Aeff_NC Note that each grouping of flavors is calculated with the above math completely independently from other flavor groupings specified. See Justin Lanfranchi's presentation on the PINGU Analysis call, 2015-10-21, for more details: https://wikispaces.psu.edu/download/attachments/282040606/meff_report_jllanfranchi_v05_2015-10-21.pdf """ if isinstance(run_settings, str): run_settings = DetMCSimRunsSettings(find_resource(run_settings), detector=detector) assert isinstance(run_settings, DetMCSimRunsSettings) assert run_settings.detector == detector if isinstance(data_proc_params, str): data_proc_params = DataProcParams( detector=detector, proc_ver=proc_ver, data_proc_params=find_resource(data_proc_params)) assert data_proc_params.detector == detector assert data_proc_params.proc_ver == proc_ver runs = sorted(data_files.keys()) all_flavs = [] flavs_by_run = {} run_norm_factors = {} bin_edges = set() runs_by_flavint = FlavIntData() for flavint in runs_by_flavint.flavints: runs_by_flavint[flavint] = [] #ngen_flavint_by_run = {run:FlavIntData() for run in runs} ##ngen_per_flav_by_run = {run:FlavIntData() for run in runs} #eint_per_flav_by_run = {run:FlavIntData() for run in runs} #for run in runs: # flavints_in_run = run_settings.get_flavints(run=run) # e_range = run_settings.get_energy_range(run) # gamma = run_settings.get_spectral_index(run) # for flavint in flavints_in_run: # runs_by_flavint[flavint].append(run) # ngen_flav = run_settings.get_num_gen( # run=run, flav_or_flavint=flavint, include_physical_fract=True # ) # #runs_by_flavint[flavint].append(run) # #this_flav = flavint. # #xsec_fract_en_wtd_avg[run][flavint] = \ # ngen_flavint_by_run[run][flavint] = \ # xsec.get_xs_ratio_integral( # flavintgrp0=flavint, # flavintgrp1=flavint.flav, # e_range=e_range, # gamma=gamma, # average=True # ) # xsec_ver = run_settings.get_xsec_version(run=run) # if xsec_ver_ref is None: # xsec_ver_ref = xsec_ver # # An assumption of below logic is that all MC is generated using the # # same cross sections version. # # # # TODO / NOTE: # # It would be possible to combine runs with different cross sections so # # long as each (flavor, interaction type) cross sections are # # weighted-averaged together using weights # # N_gen_{n,flav+inttype} * E_x^{-gamma_n} / # # ( \int_{E_min_n}^{E_max_n} E^{-\gamma_n} dE ) # # where E_x are the energy sample points specified in the cross # # sections (and hence these must also be identical across all cross # # sections that get combined, unless interpolation is performed). # assert xsec_ver == xsec_ver_ref # #ngen_weighted_energy_integral[str(run)] = powerLawIntegral( # #flavs_by_run[run] = run_settings.flavs(run) ##flavs_present = detector_geom = run_settings[runs[0]]['geom'] # Create Events object to store data evts = Events() evts.metadata.update({ 'detector': run_settings.detector, 'proc_ver': data_proc_params.proc_ver, 'geom': detector_geom, 'runs': runs, }) cuts = [] if isinstance(cust_cuts, dict): cust_cuts = [cust_cuts] if cut is not None: evts.metadata['cuts'].append(cut) cuts.append(cut) if cust_cuts is not None: for ccut in cust_cuts: evts.metadata['cuts'].append('custom: ' + ccut['pass_if']) cuts.append(ccut) orig_outdir = outdir outdir = expand(outdir) logging.info('Output dir spec\'d: %s', orig_outdir) if outdir != orig_outdir: logging.info('Output dir expands to: %s', outdir) mkdir(outdir) detector_label = str(data_proc_params.detector) proc_label = 'proc_' + str(data_proc_params.proc_ver) # What flavints to group together if join is None or join == '': grouped = [] ungrouped = [NuFlavIntGroup(k) for k in ALL_NUFLAVINTS] groups_label = 'unjoined' logging.info('Events in the following groups will be joined together:' ' (none)') else: grouped, ungrouped = xlateGroupsStr(join) evts.metadata['flavints_joined'] = [str(g) for g in grouped] groups_label = 'joined_G_' + '_G_'.join([str(g) for g in grouped]) logging.info( 'Events in the following groups will be joined together: ' + '; '.join([str(g) for g in grouped])) # Find any flavints not included in the above groupings flavint_groupings = grouped + ungrouped if len(ungrouped) == 0: ungrouped = ['(none)'] logging.info('Events of the following flavints will NOT be joined' 'together: ' + '; '.join([str(k) for k in ungrouped])) # Enforce that flavints composing groups are mutually exclusive for grp_n, flavintgrp0 in enumerate(flavint_groupings[:-1]): for flavintgrp1 in flavint_groupings[grp_n + 1:]: assert len(set(flavintgrp0).intersection(set(flavintgrp1))) == 0 flavintgrp_names = [str(flavintgrp) for flavintgrp in flavint_groupings] # Instantiate storage for all intermediate destination fields; # The data structure looks like: # extracted_data[group #][interaction type][field name] = list of data if extract_fields is None: extracted_data = [{inttype: {} for inttype in ALL_NUINT_TYPES} for _ in flavintgrp_names] else: extracted_data = [{ inttype: {field: [] for field in extract_fields} for inttype in ALL_NUINT_TYPES } for _ in flavintgrp_names] # Instantiate generated-event counts for destination fields; count # CClseparately from NC because aeff's for CC & NC add, whereas # aeffs intra-CC should be weighted-averaged (as for intra-NC) ngen = [{inttype: {} for inttype in ALL_NUINT_TYPES} for _ in flavintgrp_names] # Loop through all of the files, retrieving the events, filtering, # and recording the number of generated events pertinent to # calculating aeff filecount = {} detector_geom = None bad_files = [] for run, fnames in data_files.items(): file_count = 0 for fname in fnames: # Retrieve data from all nodes specified in the processing # settings file logging.trace('Trying to get data from file %s', fname) try: data = data_proc_params.get_data(fname, run_settings=run_settings) except (ValueError, KeyError, IOError): logging.warning('Bad file encountered: %s', fname) bad_files.append(fname) continue file_count += 1 # Check to make sure only one run is present in the data runs_in_data = set(data['run']) assert len(runs_in_data) == 1, 'Must be just one run in data' #run = int(data['run'][0]) if not run in filecount: filecount[run] = 0 filecount[run] += 1 rs_run = run_settings[run] # Record geom; check that geom is consistent with other runs if detector_geom is None: detector_geom = rs_run['geom'] assert rs_run['geom'] == detector_geom, \ 'All runs\' geometries must match!' # Loop through all flavints spec'd for run for run_flavint in rs_run['flavints']: barnobar = run_flavint.bar_code int_type = run_flavint.intType # Retrieve this-interaction-type- & this-barnobar-only events # that also pass cuts. (note that cut names are strings) intonly_cut_data = data_proc_params.apply_cuts( data, cuts=cuts + [str(int_type), str(barnobar)], return_fields=extract_fields) # Record the generated count and data for this run/flavor for # each group to which it's applicable for grp_n, flavint_group in enumerate(flavint_groupings): if not run_flavint in flavint_group: continue # Instantiate a field for particles and antiparticles, # keyed by the output of the bar_code property for each if not run in ngen[grp_n][int_type]: ngen[grp_n][int_type][run] = { NuFlav(12).bar_code: 0, NuFlav(-12).bar_code: 0, } # Record count only if it hasn't already been recorded if ngen[grp_n][int_type][run][barnobar] == 0: # Note that one_weight includes cc/nc:total fraction, # so DO NOT specify the full flavint here, only flav # (since one_weight does NOT take bar/nobar fraction, # it must be included here in the ngen computation) flav_ngen = run_settings.get_num_gen(run=run, barnobar=barnobar) ngen[grp_n][int_type][run][barnobar] = flav_ngen # Append the data. Note that extracted_data is: # extracted_data[group n][int_type][extract field name] = # list if extract_fields is None: for f in intonly_cut_data.keys(): if f not in extracted_data[grp_n][int_type]: extracted_data[grp_n][int_type][f] = [] extracted_data[grp_n][int_type][f].extend( intonly_cut_data[f]) else: for f in extract_fields: extracted_data[grp_n][int_type][f].extend( intonly_cut_data[f]) logging.info('File count for run %s: %d', run, file_count) to_file(bad_files, '/tmp/bad_files.json') if ((output_fields is None and (extract_fields is None or 'one_weight' in extract_fields)) or 'weighted_aeff' in output_fields): fmtfields = (' ' * 12 + 'flavint_group', 'int type', ' run', 'part/anti', 'part/anti count', 'aggregate count') fmt_n = [len(f) for f in fmtfields] fmt = ' '.join([r'%' + str(n) + r's' for n in fmt_n]) lines = ' '.join(['-' * n for n in fmt_n]) logging.info(fmt, fmtfields) logging.info(lines) for grp_n, flavint_group in enumerate(flavint_groupings): for int_type in set([fi.intType for fi in flavint_group.flavints]): ngen_it_tot = 0 for run, run_counts in ngen[grp_n][int_type].items(): for barnobar, barnobar_counts in run_counts.items(): ngen_it_tot += barnobar_counts logging.info(fmt, flavint_group.simple_str(), int_type, str(run), barnobar, int(barnobar_counts), int(ngen_it_tot)) # Convert data to numpy array if extract_fields is None: for field in extracted_data[grp_n][int_type].keys(): extracted_data[grp_n][int_type][field] = \ np.array(extracted_data[grp_n][int_type][field]) else: for field in extract_fields: extracted_data[grp_n][int_type][field] = \ np.array(extracted_data[grp_n][int_type][field]) # Generate weighted_aeff field for this group / int type's data extracted_data[grp_n][int_type]['weighted_aeff'] = \ extracted_data[grp_n][int_type]['one_weight'] \ / ngen_it_tot * CMSQ_TO_MSQ # Report file count per run for run, count in filecount.items(): logging.info('Files read, run %s: %d', run, count) ref_num_i3_files = run_settings[run]['num_i3_files'] if count != ref_num_i3_files: logging.warning( 'Run %s, Number of files read (%d) != number of ' 'source I3 files (%d), which may indicate an error.', run, count, ref_num_i3_files) # Generate output data for flavint in ALL_NUFLAVINTS: int_type = flavint.intType for grp_n, flavint_group in enumerate(flavint_groupings): if not flavint in flavint_group: logging.trace('flavint %s not in flavint_group %s, passing.', flavint, flavint_group) continue else: logging.trace( 'flavint %s **IS** in flavint_group %s, storing.', flavint, flavint_group) if output_fields is None: evts[flavint] = extracted_data[grp_n][int_type] else: evts[flavint] = { f: extracted_data[grp_n][int_type][f] for f in output_fields } # Generate file name numerical_runs = [] alphanumerical_runs = [] for run in runs: try: int(run) numerical_runs.append(int(run)) except ValueError: alphanumerical_runs.append(str(run)) run_labels = [] if len(numerical_runs) > 0: run_labels.append(list2hrlist(numerical_runs)) if len(alphanumerical_runs) > 0: run_labels += sorted(alphanumerical_runs) run_label = 'runs_' + ','.join(run_labels) geom_label = '' + detector_geom fname = 'events__' + '__'.join([ detector_label, geom_label, run_label, proc_label, groups_label, ]) + '.hdf5' outfpath = os.path.join(outdir, fname) logging.info('Writing events to %s', outfpath) # Save data to output file evts.save(outfpath)
def add_fluxes_to_file(data_file_path, flux_table, flux_name, outdir=None, label=None, overwrite=False): """Add fluxes to PISA events file (e.g. for use by an mc stage) Parameters ----------- data_file_path : string flux_table flux_name outdir : string or None If None, output is to the same directory as `data_file_path` overwrite : bool, optional """ data, attrs = from_file(find_resource(data_file_path), return_attrs=True) bname, ext = splitext(basename(data_file_path)) assert ext.lstrip('.') in HDF5_EXTS if outdir is None: outdir = dirname(data_file_path) if label is None: label = '' else: assert isinstance(label, basestring) label = '_' + label outpath = join(outdir, '{}__with_fluxes{}{}'.format(bname, label, ext)) if not overwrite and isfile(outpath): logging.warning('Output path "%s" already exists, not regenerating', outpath) return mkdir(outdir, warn=False) # Loop over the top-level keys for primary, primary_node in data.items(): # Only handling neutrnio fluxes here, skip past e.g. muon or noise MC events if primary.startswith("nu"): logging.info('Adding fluxes to "%s" events', primary) # Input data may have one layer of hierarchy before the event variables (e.g. [numu_cc]), # or for older files there maybe be a second layer (e.g. [numu][cc]). # Handling either case here... if "true_energy" in primary_node: secondary_nodes = [primary_node] else: secondary_nodes = primary_node.values() for secondary_node in secondary_nodes: true_e = secondary_node['true_energy'] true_cz = secondary_node['true_coszen'] # calculate all 4 fluxes (nue, nuebar, numu and numubar) for table in ['nue', 'nuebar', 'numu', 'numubar']: flux = calculate_2d_flux_weights( true_energies=true_e, true_coszens=true_cz, en_splines=flux_table[table]) keyname = flux_name + '_' + table + '_flux' secondary_node[keyname] = flux to_file(data, outpath, attrs=attrs, overwrite=overwrite) logging.info('--> Wrote file including fluxes to "%s"', outpath)
def main(): args = parse_args() init_args_d = vars(args) # NOTE: Removing extraneous args that won't get passed to instantiate the # HypoTesting object via dictionary's `pop()` method. set_verbosity(init_args_d.pop('v')) detector = init_args_d.pop('detector') selection = init_args_d.pop('selection') # Normalize and convert `*_pipeline` filenames; store to `*_maker` # (which is argument naming convention that HypoTesting init accepts). filenames = init_args_d.pop('pipeline') if filenames is not None: filenames = sorted([normcheckpath(fname) for fname in filenames]) ps_str = init_args_d['param_selections'] if ps_str is None: ps_list = None else: ps_list = [x.strip().lower() for x in ps_str.split(',')] data_maker = DistributionMaker(filenames) data_maker.select_params(ps_list) for data_pipeline in data_maker.pipelines: # Need a special case where PID is a separate stage if 'pid' in data_pipeline.stage_names: raise ValueError("Special case for separate PID stage currently " "not implemented.") else: return_sum = True baseline_maps = data_maker.get_outputs(return_sum=return_sum) det_sel = [] if detector.strip() != '': det_sel.append(detector.strip()) if selection.strip() != '': det_sel.append(selection.strip()) det_sel_label = ' '.join(det_sel) det_sel_plot_label = det_sel_label if det_sel_plot_label != '': det_sel_plot_label += ', ' det_sel_file_label = det_sel_label if det_sel_file_label != '': det_sel_file_label += '_' det_sel_file_label = det_sel_file_label.replace(' ', '_') for data_param in data_maker.params.free: # Calculate a shifted value based on the prior if possible if hasattr(data_param, 'prior') and (data_param.prior is not None): # Gaussian priors are easy - just do 1 sigma if data_param.prior.kind == 'gaussian': data_param.value = \ data_param.value + data_param.prior.stddev shift_label = r"$1\sigma$" # Else do 10%, or +/- 1 if the baseline is zero else: if data_param.value != 0.0: data_param.value = 1.1 * data_param.value shift_label = r"10%" else: data_param.value = 1.0 shift_label = r"1" # For no prior also do 10%, or +/- 1 if the baseline is zero else: if data_param.value != 0.0: data_param.value = 1.1 * data_param.value shift_label = r"10%" else: data_param.value = 1.0 shift_label = r"1" up_maps = data_maker.get_outputs(return_sum=return_sum) data_maker.params.reset_free() if hasattr(data_param, 'prior') and (data_param.prior is not None): if data_param.prior.kind == 'gaussian': data_param.value = \ data_param.value - data_param.prior.stddev else: if data_param.value != 0.0: data_param.value = 0.9 * data_param.value else: data_param.value = -1.0 else: if data_param.value != 0.0: data_param.value = 0.9 * data_param.value else: data_param.value = -1.0 down_maps = data_maker.get_outputs(return_sum=return_sum) data_maker.params.reset_free() baseline_map = baseline_maps['total'] baseline_map.set_errors(error_hist=None) up_map = up_maps['total'] up_map.set_errors(error_hist=None) down_map = down_maps['total'] down_map.set_errors(error_hist=None) pid_names = baseline_map.binning['pid'].bin_names if pid_names is None: logging.warn('There are no names given for the PID bins, thus ' 'they will just be numbered in both the the plot ' 'save names and titles.') pid_names = [ x for x in range(0, baseline_map.binning['pid'].num_bins) ] gridspec_kw = dict(left=0.04, right=0.966, wspace=0.32) fig, axes = plt.subplots(nrows=2, ncols=len(pid_names), gridspec_kw=gridspec_kw, sharex=False, sharey=False, figsize=(7 * len(pid_names), 14)) for i, pid_name in enumerate(pid_names): baseline = baseline_map.split(dim='pid', bin=pid_name) up_to_plot = up_map.split(dim='pid', bin=pid_name) up_to_plot = (up_to_plot - baseline) / baseline * 100.0 down_to_plot = down_map.split(dim='pid', bin=pid_name) down_to_plot = (down_to_plot - baseline) / baseline * 100.0 if isinstance(pid_name, int): pid_name = 'PID Bin %i' % (pid_name) else: pid_name += ' Channel' up_to_plot.plot(fig=fig, ax=axes[0][i], title="%s " % (pid_name) + "\n" + " %s + %s" % (tex_axis_label(data_param.name), shift_label), titlesize=30, cmap=plt.cm.seismic, clabel='% Change from Baseline', clabelsize=30, xlabelsize=24, ylabelsize=24, symm=True) down_to_plot.plot(fig=fig, ax=axes[1][i], title="%s " % (pid_name) + "\n" + " %s - %s" % (tex_axis_label(data_param.name), shift_label), titlesize=30, cmap=plt.cm.seismic, clabel='% Change from Baseline', clabelsize=30, xlabelsize=24, ylabelsize=24, symm=True) fig.subplots_adjust(hspace=0.4) savename = det_sel_file_label if savename != '' and savename[-1] != '_': savename += '_' savename += '%s_variation.png' % (data_param.name) mkdir(args.logdir, warn=False) fig.savefig(os.path.join(args.logdir, savename), bbox_inches='tight') plt.close(fig.number)