def _compute_transforms(self): """For the current parameter values, evaluate the fit function and write the resulting scaling into an x-form array""" # TODO: use iterators to collapse nested loops transforms = [] for input_name in self.input_names: transform = None sys_values = [] for sys in self.sys_list: sys_values.append(self.params[sys].magnitude) fit_params = self.fit_results[input_name] shape = fit_params.shape[:-1] if transform is None: transform = np.ones(shape) for idx in np.ndindex(*shape): # At every point evaluate the function transform[idx] *= fit_fun(sys_values, *fit_params[idx]) xform = BinnedTensorTransform( input_names=(input_name), output_name=input_name, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=transform, error_method=self.error_method, ) transforms.append(xform) return TransformSet(transforms)
def _compute_transforms(self): dims = self.input_binning.names transforms = [] for group, in_names in self.combine_groups.items(): xform_shape = [len(in_names) ] + [self.input_binning[d].num_bins for d in dims] xform = np.ones(xform_shape) input_names = self.input_names for i, name in enumerate(in_names): scale = 1. if '_nc' in name: scale *= self.params.nu_nc_norm.value.m_as('dimensionless') #if 'nutau' in name: # scale *= self.params.nutau_norm.value.m_as('dimensionless') #if name in ['nutau_cc','nutaubar_cc']: # scale *= self.params.nutau_cc_norm.value.m_as('dimensionless') if scale != 1: xform[i] *= scale transforms.append( BinnedTensorTransform(input_names=in_names, output_name=group, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=xform)) return TransformSet(transforms=transforms)
def _compute_transforms(self): """Compute new oscillation transforms.""" # The seed is created from parameter values to produce different sets # of transforms for different sets of parameters seed = hash_obj(self.params.values, hash_to='int') % (2**32 - 1) np.random.seed(seed) # Read parameters in in the units used for computation, e.g. theta23 = self.params.theta23.m_as('rad') transforms = [] for out_idx, output_name in enumerate(self.output_names): if out_idx < 3: # neutrinos (-> input names are neutrinos) input_names = self.input_names[0:2] else: # anti-neutrinos (-> input names are anti-neutrinos) input_names = self.input_names[2:4] # generate the "oscillation probabilities" xform = self.create_dummy_osc_probs() # create object of type `BinnedTensorTransform` and attach # to list of transforms with correct set of input names for the # output name in question transforms.append( BinnedTensorTransform( input_names=input_names, output_name=output_name, # we have already made sure that input and output binnings # are identical input_binning=self.input_binning, output_binning=self.output_binning, xform_array=xform)) return TransformSet(transforms=transforms)
def _compute_transforms(self): # pylint: disable=no-self-use """Stages that apply transforms to inputs should override this method for deriving the transform. No-input stages should leave this as-is.""" return TransformSet([])
def __init__( self, use_transforms, params=None, expected_params=None, input_names=None, output_names=None, error_method=None, disk_cache=None, memcache_deepcopy=True, transforms_cache_depth=10, outputs_cache_depth=0, input_binning=None, output_binning=None, debug_mode=None, ): # Allow for string inputs, but have to populate into lists for # consistent interfacing to one or multiple of these things logging.warning('This is a cake-style PISA stage, which is DEPRECATED!') self.use_transforms = use_transforms """Whether or not stage uses transforms""" self._events_hash = None self.input_binning = input_binning self.output_binning = output_binning self.validate_binning() # init base class! super(Stage, self).__init__( params=params, expected_params=expected_params, input_names=input_names, output_names=output_names, debug_mode=debug_mode, error_method=error_method, ) # Storage of latest transforms and outputs; default to empty # TransformSet and None, respectively. self.transforms = TransformSet([]) """A stage that takes to-be-transformed inputs and has had these transforms computed stores them here. Before computation, `transforms` is an empty TransformSet; a stage that does not make use of these (such as a no-input stage) has an empty TransformSet.""" self.memcache_deepcopy = memcache_deepcopy self.transforms_cache_depth = int(transforms_cache_depth) self.transforms_cache = None """Memory cache object for storing transforms""" self.nominal_transforms_cache = None """Memory cache object for storing nominal transforms""" self.full_hash = True """Whether to do full hashing if true, otherwise do fast hashing""" self.transforms_cache = MemoryCache( max_depth=self.transforms_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.nominal_transforms_cache = MemoryCache( max_depth=self.transforms_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.outputs_cache_depth = int(outputs_cache_depth) self.outputs_cache = None """Memory cache object for storing outputs (excludes sideband objects).""" self.outputs_cache = None if self.outputs_cache_depth > 0: self.outputs_cache = MemoryCache( max_depth=self.outputs_cache_depth, is_lru=True, deepcopy=self.memcache_deepcopy, ) self.disk_cache = disk_cache """Disk cache object""" self.disk_cache_path = None """Path to disk cache file for this stage/service (or None).""" # Include each attribute here for hashing if it is defined and its # value is not None default_attrs_to_hash = [ "input_names", "output_names", "input_binning", "output_binning", ] self._attrs_to_hash = set([]) for attr in default_attrs_to_hash: if not hasattr(self, attr): continue val = getattr(self, attr) if val is None: continue try: self.include_attrs_for_hashes(attr) except ValueError(): pass self.events = None self.nominal_transforms = None # Define useful flags and values for debugging behavior after running self.nominal_transforms_loaded_from_cache = None """Records which cache nominal transforms were loaded from, or None.""" self.nominal_transforms_computed = False """Records whether nominal transforms were (re)computed.""" self.transforms_loaded_from_cache = None """Records which cache transforms were loaded from, or None.""" self.transforms_computed = False """Records whether transforms were (re)computed.""" self.nominal_outputs_computed = False """Records whether nominal outputs were (re)computed.""" self.outputs_loaded_from_cache = None """Records which cache outputs were loaded from, or None.""" self.outputs_computed = False """Records whether outputs were (re)computed.""" self.nominal_transforms_hash = None self.transforms_hash = None self.nominal_outputs_hash = None self.outputs_hash = None self.instantiate_disk_cache()
def _compute_nominal_transforms(self): """Compute new PID transforms.""" logging.debug('Updating pid.hist PID histograms...') # TODO(shivesh): As of now, events do not have units as far as PISA # is concerned self.load_events(self.params.pid_events) self.cut_events(self.params.transform_events_keep_criteria) # TODO: in future, the events file will not have these combined # already, and it should be done here (or in a nominal transform, # etc.). See below about taking this step when we move to directly # using the I3-HDF5 files. #events_file_combined_flavints = tuple([ # NuFlavIntGroup(s) # for s in self.events.metadata['flavints_joined'] #]) # TODO: take events object as an input instead of as a param that # specifies a file? Or handle both cases? pid_spec = OrderedDict(eval(self.params.pid_spec.value)) if set(pid_spec.keys()) != set(self.output_channels): msg = 'PID criteria from `pid_spec` {0} does not match {1}' raise ValueError(msg.format(pid_spec.keys(), self.output_channels)) # TODO: add importance weights, error computation logging.debug("Separating events by PID...") separated_events = OrderedDict() for sig in self.output_channels: this_sig_events = self.events.applyCut(pid_spec[sig]) separated_events[sig] = this_sig_events # Derive transforms by combining flavints that behave similarly, but # apply the derived transforms to the input flavints separately # (leaving combining these together to later) transforms = [] for flavint_group in self.transform_groups: logging.debug("Working on %s PID", flavint_group) repr_flavint = flavint_group[0] # TODO(shivesh): errors # TODO(shivesh): total histo check? sig_histograms = {} total_histo = np.zeros(self.output_binning.shape) for repr_flavint in flavint_group: histo = self.events.histogram( kinds=repr_flavint, binning=self.output_binning, weights_col=self.params.pid_weights_name.value, errors=None).hist total_histo += histo for sig in self.output_channels: sig_histograms[sig] = np.zeros(self.output_binning.shape) for repr_flavint in flavint_group: this_sig_histo = separated_events[sig].histogram( kinds=repr_flavint, binning=self.output_binning, weights_col=self.params.pid_weights_name.value, errors=None).hist sig_histograms[sig] += this_sig_histo for sig in self.output_channels: with np.errstate(divide='ignore', invalid='ignore'): xform_array = sig_histograms[sig] / total_histo num_invalid = np.sum(~np.isfinite(xform_array)) if num_invalid > 0: logging.warn( 'Group "%s", PID signature "%s" has %d bins with no' ' events (and hence the ability to separate events' ' by PID cannot be ascertained). These are being' ' masked off from any further computations.', flavint_group, sig, num_invalid) # TODO: this caused buggy event propagation for some # reason; check and re-introduced the masked array idea # when this is fixed. For now, replicating the behavior # from PISA 2. #xform_array = np.ma.masked_invalid(xform_array) # Double check that no NaN remain #assert not np.any(np.isnan(xform_array)) # Copy this transform to use for each input in the group for input_name in self.input_names: if input_name not in flavint_group: continue xform = BinnedTensorTransform( input_names=input_name, output_name=self.suffix_channel(input_name, sig), input_binning=self.input_binning, output_binning=self.output_binning, xform_array=xform_array) transforms.append(xform) return TransformSet(transforms=transforms)
def _compute_nominal_transforms(self): self.load_events(self.params.aeff_events) self.cut_events(self.params.transform_events_keep_criteria) # Units must be the following for correctly converting a sum-of- # OneWeights-in-bin to an average effective area across the bin. comp_units = dict(true_energy='GeV', true_coszen=None, true_azimuth='rad') # Select only the units in the input/output binning for conversion # (can't pass more than what's actually there) in_units = { dim: unit for dim, unit in comp_units.items() if dim in self.input_binning } #out_units = {dim: unit for dim, unit in comp_units.items() # if dim in self.output_binning} # These will be in the computational units input_binning = self.input_binning.to(**in_units) # Account for "missing" dimension(s) (dimensions OneWeight expects for # computation of bin volume), and accommodate with a factor equal to # the full range. See IceCube wiki/documentation for OneWeight for # more info. missing_dims_vol = 1 # TODO: currently, azimuth required to *not* be part of input binning if 'true_azimuth' not in input_binning: missing_dims_vol *= 2 * np.pi # TODO: Following is currently never the case, handle? if 'true_coszen' not in input_binning: missing_dims_vol *= 2 nominal_transforms = [] for xform_flavints in self.transform_groups: logging.info("Working on %s effective areas xform", xform_flavints) raw_hist = self.events.histogram(kinds=xform_flavints, binning=input_binning, weights_col='weighted_aeff', errors=True) raw_transform = unp.nominal_values(raw_hist.hist) raw_errors = unp.std_devs(raw_hist.hist) # Divide histogram by # (energy bin width x coszen bin width x azimuth bin width) # volumes to convert from sums-of-OneWeights-in-bins to # effective areas. Note that volume correction factor for # missing dimensions is applied here. bin_volumes = input_binning.bin_volumes(attach_units=False) raw_transform /= (bin_volumes * missing_dims_vol) raw_errors /= (bin_volumes * missing_dims_vol) e_idx = input_binning.index('true_energy') if e_idx == 1: # transpose raw_transform = raw_transform.T raw_errors = raw_errors.T # Do the smoothing smooth_transform = self.smooth(raw_transform, raw_errors, input_binning['true_energy'], input_binning['true_coszen']) if e_idx == 1: # transpose back smooth_transform = smooth_transform.T nominal_transforms.extend( populate_transforms(service=self, xform_flavints=xform_flavints, xform_array=smooth_transform)) return TransformSet(transforms=nominal_transforms)
def _compute_nominal_transforms(self): self.load_events(self.params.aeff_events) self.cut_events(self.params.transform_events_keep_criteria) # Units must be the following for correctly converting a sum-of- # OneWeights-in-bin to an average effective area across the bin. comp_units = dict(true_energy='GeV', true_coszen=None, true_azimuth='rad') # Select only the units in the input/output binning for conversion # (can't pass more than what's actually there) in_units = {dim: unit for dim, unit in comp_units.items() if dim in self.input_binning} # TODO: use out_units for some kind of conversion? #out_units = {dim: unit for dim, unit in comp_units.items() # if dim in self.output_binning} # These will be in the computational units input_binning = self.input_binning.to(**in_units) # Account for "missing" dimension(s) (dimensions OneWeight expects for # computation of bin volume), and accommodate with a factor equal to # the full range. See IceCube wiki/documentation for OneWeight for # more info. missing_dims_vol = 1 if 'true_azimuth' not in input_binning: missing_dims_vol *= 2*np.pi if 'true_coszen' not in input_binning: missing_dims_vol *= 2 if bool(self.debug_mode): outdir = os.path.join(find_resource('debug'), self.stage_name, self.service_name) mkdir(outdir) #hex_hash = hash2hex(kde_hash) bin_volumes = input_binning.bin_volumes(attach_units=False) norm_volumes = bin_volumes * missing_dims_vol nominal_transforms = [] for xform_flavints in self.transform_groups: logging.debug('Working on %s effective areas xform', xform_flavints) aeff_transform = self.events.histogram( kinds=xform_flavints, binning=input_binning, weights_col='weighted_aeff', errors=(self.error_method not in [None, False]) ) aeff_transform = aeff_transform.hist # Divide histogram by # (energy bin width x coszen bin width x azimuth bin width) # volumes to convert from sums-of-OneWeights-in-bins to # effective areas. Note that volume correction factor for # missing dimensions is applied here. aeff_transform /= norm_volumes if self.debug_mode: outfile = os.path.join( outdir, 'aeff_' + str(xform_flavints) + '.pkl' ) to_file(aeff_transform, outfile) nominal_transforms.extend( populate_transforms( service=self, xform_flavints=xform_flavints, xform_array=aeff_transform ) ) return TransformSet(transforms=nominal_transforms)
def compute_transforms(service): """Compute effective area transforms, taking aeff systematics into account. Systematics are: `aeff_scale`, `livetime`, and `nutau_cc_norm` """ aeff_scale = service.params.aeff_scale.m_as('dimensionless') livetime_s = service.params.livetime.m_as('sec') base_scale = aeff_scale * livetime_s logging.trace('livetime = %s --> %s sec', service.params.livetime.value, livetime_s) if service.particles == 'neutrinos': if not hasattr(service, 'nutau_cc_norm_must_be_one'): service.nutau_cc_norm_must_be_one = False """If any flav/ints besides nutau_cc and nutaubar_cc are grouped with one or both of those for transforms, then a `nutau_cc_norm` != 1 cannot be applied.""" nutaucc_and_nutaubarcc = set(NuFlavIntGroup('nutau_cc+nutaubar_cc')) for group in service.transform_groups: # If nutau_cc, nutaubar_cc, or both are the group and other flavors # are present, nutau_cc_norm must be one! group_set = set(group) if group_set.intersection(nutaucc_and_nutaubarcc) and \ group_set.difference(nutaucc_and_nutaubarcc): service.nutau_cc_norm_must_be_one = True nutau_cc_norm = service.params.nutau_cc_norm.m_as('dimensionless') if nutau_cc_norm != 1 and service.nutau_cc_norm_must_be_one: raise ValueError( '`nutau_cc_norm` = %e but can only be != 1 if nutau CC and' ' nutaubar CC are separated from other flav/ints.' ' Transform groups are: %s' % (nutau_cc_norm, service.transform_groups) ) if hasattr(service, 'sum_grouped_flavints'): sum_grouped_flavints = service.sum_grouped_flavints else: sum_grouped_flavints = False new_transforms = [] for transform in service.nominal_transforms: this_scale = base_scale if service.particles == 'neutrinos': out_nfig = NuFlavIntGroup(transform.output_name) if 'nutau_cc' in out_nfig or 'nutaubar_cc' in out_nfig: this_scale *= nutau_cc_norm if this_scale != 1: aeff_transform = transform.xform_array * this_scale else: aeff_transform = transform.xform_array new_xform = BinnedTensorTransform( input_names=transform.input_names, output_name=transform.output_name, input_binning=transform.input_binning, output_binning=transform.output_binning, xform_array=aeff_transform, sum_inputs=sum_grouped_flavints ) new_transforms.append(new_xform) return TransformSet(new_transforms)
def _compute_nominal_transforms(self): """Compute cross-section transforms.""" logging.info('Updating xsec.genie cross-section histograms...') self.load_xsec_splines() livetime = self._ev_param(self.params['livetime'].value) ice_p = self._ev_param(self.params['ice_p'].value) fid_vol = self._ev_param(self.params['fid_vol'].value) mr_h20 = self._ev_param(self.params['mr_h20'].value) x_energy_scale = self.params['x_energy_scale'].value input_binning = self.input_binning ebins = input_binning.true_energy for idx, name in enumerate(input_binning.names): if 'true_energy' in name: e_idx = idx xsec_transforms = {} for flav in self.input_names: for int_ in ALL_NUINT_TYPES: flavint = flav + '_' + str(int_) logging.debug('Obtaining cross-sections for %s', flavint) xsec_map = self.xsec.get_map(flavint, MultiDimBinning([ebins]), x_energy_scale=x_energy_scale) def func(idx): if idx == e_idx: return xsec_map.hist return tuple(range(input_binning.shape[idx])) num_dims = input_binning.num_dims xsec_trns = np.meshgrid(*map(func, range(num_dims)), indexing='ij')[e_idx] xsec_trns *= (livetime * fid_vol * (ice_p / mr_h20) * (6.022140857e+23 / ureg.mol)) xsec_transforms[NuFlavInt(flavint)] = xsec_trns nominal_transforms = [] for flavint_group in self.transform_groups: flav_names = [str(flav) for flav in flavint_group.flavs] for input_name in self.input_names: if input_name not in flav_names: continue xform_array = [] for flavint in flavint_group.flavints: if flavint in xsec_transforms: xform_array.append(xsec_transforms[flavint]) xform_array = reduce(add, xform_array) xform = BinnedTensorTransform( input_names=input_name, output_name=str(flavint_group), input_binning=input_binning, output_binning=self.output_binning, xform_array=xform_array) nominal_transforms.append(xform) return TransformSet(transforms=nominal_transforms)
def _compute_transforms(self): """Generate reconstruction "smearing kernels" by histogramming true and reconstructed variables from a Monte Carlo events file. The resulting transform is a 2N-dimensional histogram, where N is the dimensionality of the input binning. The transform maps the truth bin counts to the reconstructed bin counts. I.e., for the case of 1D input binning, the ith element of the reconstruction kernel will be a map showing the distribution of events over all the reco space from truth bin i. This will be normalised to the total number of events in truth bin i. Notes ----- In the current implementation these histograms are made **UN**weighted. This is probably quite wrong... """ e_res_scale = self.params.e_res_scale.value.m_as('dimensionless') cz_res_scale = self.params.cz_res_scale.value.m_as('dimensionless') e_reco_bias = self.params.e_reco_bias.value.m_as('GeV') cz_reco_bias = self.params.cz_reco_bias.value.m_as('dimensionless') res_scale_ref = self.params.res_scale_ref.value.strip().lower() assert res_scale_ref in ['zero'] # TODO: , 'mean', 'median'] self.load_events(self.params.reco_events) self.cut_events(self.params.transform_events_keep_criteria) # Computational units must be the following for compatibility with # events file comp_units = dict(true_energy='GeV', true_coszen=None, true_azimuth='rad', reco_energy='GeV', reco_coszen=None, reco_azimuth='rad', pid=None) # Select only the units in the input/output binning for conversion # (can't pass more than what's actually there) in_units = { dim: unit for dim, unit in comp_units.items() if dim in self.input_binning } out_units = { dim: unit for dim, unit in comp_units.items() if dim in self.output_binning } # These binnings will be in the computational units defined above input_binning = self.input_binning.to(**in_units) output_binning = self.output_binning.to(**out_units) xforms = [] for xform_flavints in self.transform_groups: logging.debug("Working on %s reco kernels" % xform_flavints) repr_flavint = xform_flavints[0] true_energy = self.events[repr_flavint]['true_energy'] true_coszen = self.events[repr_flavint]['true_coszen'] reco_energy = self.events[repr_flavint]['reco_energy'] reco_coszen = self.events[repr_flavint]['reco_coszen'] e_reco_err = reco_energy - true_energy cz_reco_err = reco_coszen - true_coszen if self.params.res_scale_ref.value.strip().lower() == 'zero': self.events[repr_flavint]['reco_energy'] = ( true_energy + e_reco_err * e_res_scale + e_reco_bias) self.events[repr_flavint]['reco_coszen'] = ( true_coszen + cz_reco_err * cz_res_scale + cz_reco_bias) # True (input) + reco {+ PID} (output)-dimensional histogram # is the basis for the transformation reco_kernel = self.events.histogram( kinds=xform_flavints, binning=input_binning * output_binning, weights_col=self.params.reco_weights_name.value, errors=(self.error_method not in [None, False])) # Extract just the numpy array to work with reco_kernel = reco_kernel.hist # This takes into account the correct kernel normalization: # What this means is that we have to normalise the reco map # to the number of events in the truth bin. # # I.e., we have N events from the truth bin which then become # spread out over the whole map due to reconstruction. # The normalisation is dividing this map by N. # # Previously this was hard-coded for 2 dimensions, but I have tried # to generalise it to arbitrary dimensionality. # Truth-only (N-dimensional) histogram will be used for # normalization (so transform is in terms of fraction-of-events in # input--i.e. truth--bin). Sum over the input dimensions. true_event_counts = self.events.histogram( kinds=xform_flavints, binning=input_binning, weights_col=self.params.reco_weights_name.value, errors=(self.error_method not in [None, False])) # Extract just the numpy array to work with true_event_counts = true_event_counts.hist # If there weren't any events in the input (true_*) bin, make this # bin have no effect -- i.e., populate all output bins # corresponding to the input bin with zeros via `nan_to_num`. with np.errstate(divide='ignore', invalid='ignore'): true_event_counts[true_event_counts == 0] = np.nan norm_factors = 1.0 / true_event_counts norm_factors = np.nan_to_num(norm_factors) # Numpy broadcasts lower-dimensional things to higher dimensions # from last dimension to first; if we simply mult the reco_kernel # by norm_factors, this will apply the normalization to the # __output__ dimensions rather than the input dimensions. Add # "dummy" dimensions to norm_factors where we want the "extra # dimensions": at the end. for dim in self.output_binning: norm_factors = np.expand_dims(norm_factors, axis=-1) # Apply the normalization to the kernels reco_kernel *= norm_factors assert np.all(reco_kernel >= 0), \ 'number of elements less than 0 = %d' \ % np.sum(reco_kernel < 0) sum_over_axes = tuple(range(-len(self.output_binning), 0)) totals = np.sum(reco_kernel, axis=sum_over_axes) assert np.all( totals <= 1 + 1e-14), 'max = ' + str(np.max(totals) - 1) # Now populate this transform to each input for which it applies. if self.sum_grouped_flavints: xform_input_names = [] for input_name in self.input_names: input_flavs = NuFlavIntGroup(input_name) if len(set(xform_flavints).intersection(input_flavs)) > 0: xform_input_names.append(input_name) for output_name in self.output_names: if output_name not in xform_flavints: continue xform = BinnedTensorTransform( input_names=xform_input_names, output_name=output_name, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=reco_kernel, sum_inputs=self.sum_grouped_flavints) xforms.append(xform) else: # NOTES: # * Output name is same as input name # * Use `self.input_binning` and `self.output_binning` so maps # are returned in user-defined units (rather than # computational units, which are attached to the non-`self` # versions of these binnings). for input_name in self.input_names: if input_name not in xform_flavints: continue xform = BinnedTensorTransform( input_names=input_name, output_name=input_name, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=reco_kernel, ) xforms.append(xform) return TransformSet(transforms=xforms)
def _compute_transforms(self): """ Generate reconstruction "smearing kernels" by reading in a set of parameterisation functions from a json file. This should have the same dimensionality as the input binning i.e. if you have energy and coszenith input binning then the kernels provided should have both energy and coszenith resolution functions. Any superposition of distributions from scipy.stats is supported. """ res_scale_ref = self.params.res_scale_ref.value.strip().lower() assert res_scale_ref in ['zero'] # TODO: , 'mean', 'median'] reco_param_source = self.params.reco_paramfile.value if reco_param_source is None: raise ValueError( 'non-None reco parameterization params.reco_paramfile' ' must be provided') reco_param_hash = hash_obj(reco_param_source) if (self._reco_param_hash is None or reco_param_hash != self._reco_param_hash): reco_param = load_reco_param(reco_param_source) # Transform groups are implicitly defined by the contents of the # reco paramfile's keys implicit_transform_groups = reco_param.keys() # Make sure these match transform groups specified for the stage if set(implicit_transform_groups) != set(self.transform_groups): raise ValueError( 'Transform groups (%s) defined implicitly by' ' %s reco parameterizations do not match those' ' defined as the stage\'s `transform_groups` (%s).' % (implicit_transform_groups, reco_param_source, self.transform_groups)) self.param_dict = reco_param self._reco_param_hash = reco_param_hash self.eval_dict = self.evaluate_reco_param() self.reco_scales_and_biases_applicable() # everything seems to be fine, so rescale and shift distributions eval_dict = self.scale_and_shift_reco_dists() # Computational units must be the following for compatibility with # events file comp_units = dict(true_energy='GeV', true_coszen=None, true_azimuth='rad', reco_energy='GeV', reco_coszen=None, reco_azimuth='rad', pid=None) # Select only the units in the input/output binning for conversion # (can't pass more than what's actually there) in_units = { dim: unit for dim, unit in comp_units.items() if dim in self.input_binning } out_units = { dim: unit for dim, unit in comp_units.items() if dim in self.output_binning } # These binnings will be in the computational units defined above input_binning = self.input_binning.to(**in_units) output_binning = self.output_binning.to(**out_units) en_centers_in = self.input_binning[ 'true_energy'].weighted_centers.magnitude en_edges_in = self.input_binning['true_energy'].bin_edges.magnitude cz_centers_in = self.input_binning[ 'true_coszen'].weighted_centers.magnitude cz_edges_in = self.input_binning['true_coszen'].bin_edges.magnitude en_edges_out = self.output_binning['reco_energy'].bin_edges.magnitude cz_edges_out = self.output_binning['reco_coszen'].bin_edges.magnitude n_e_in = len(en_centers_in) n_cz_in = len(cz_centers_in) n_e_out = len(en_edges_out) - 1 n_cz_out = len(cz_edges_out) - 1 if self.coszen_flipback: cz_edges_out, flipback_mask, keep = \ self.extend_binning_for_coszen(ext_low=-3., ext_high=+3.) xforms = [] for xform_flavints in self.transform_groups: logging.debug("Working on %s reco kernel..." % xform_flavints) this_params = eval_dict[xform_flavints] reco_kernel = np.zeros((n_e_in, n_cz_in, n_e_out, n_cz_out)) for (i, j) in itertools.product(range(n_e_in), range(n_cz_in)): e_kern_cdf = self.make_cdf(bin_edges=en_edges_out, enval=en_centers_in[i], enindex=i, czval=None, czindex=j, dist_params=this_params['energy']) cz_kern_cdf = self.make_cdf(bin_edges=cz_edges_out, enval=en_centers_in[i], enindex=i, czval=cz_centers_in[j], czindex=j, dist_params=this_params['coszen']) if self.coszen_flipback: cz_kern_cdf = perform_coszen_flipback( cz_kern_cdf, flipback_mask, keep) reco_kernel[i, j] = np.outer(e_kern_cdf, cz_kern_cdf) # Sanity check of reco kernels - intolerable negative values? logging.trace(" Ensuring reco kernel sanity...") kern_neg_invalid = reco_kernel < -EQUALITY_PREC if np.any(kern_neg_invalid): raise ValueError("Detected intolerable negative entries in" " reco kernel! Min.: %.15e" % np.min(reco_kernel)) # Set values numerically compatible with zero to zero np.where((np.abs(reco_kernel) < EQUALITY_PREC), reco_kernel, 0) sum_over_axes = tuple(range(-len(self.output_binning), 0)) totals = np.sum(reco_kernel, axis=sum_over_axes) totals_large = totals > (1 + EQUALITY_PREC) if np.any(totals_large): raise ValueError("Detected overflow in reco kernel! Max.:" " %0.15e" % (np.max(totals))) if self.input_binning.basenames[0] == "coszen": # The reconstruction kernel has been set up with energy as its # first dimension, so swap axes if it is applied to an input # binning where 'coszen' is the first logging.trace(" Swapping kernel dimensions since 'coszen' has" " been requested as the first.") reco_kernel = np.swapaxes(reco_kernel, 0, 1) reco_kernel = np.swapaxes(reco_kernel, 2, 3) if self.sum_grouped_flavints: xform_input_names = [] for input_name in self.input_names: if set(NuFlavIntGroup(input_name)).isdisjoint( xform_flavints): continue xform_input_names.append(input_name) for output_name in self.output_names: if output_name not in xform_flavints: continue xform = BinnedTensorTransform( input_names=xform_input_names, output_name=output_name, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=reco_kernel, sum_inputs=self.sum_grouped_flavints) xforms.append(xform) # If *not* combining grouped flavints: # Copy the transform for each input flavor, regardless if the # transform is computed from a combination of flavors. else: for input_name in self.input_names: if set(NuFlavIntGroup(input_name)).isdisjoint( xform_flavints): continue for output_name in self.output_names: if (output_name not in NuFlavIntGroup(input_name) or output_name not in xform_flavints): continue logging.trace(' input: %s, output: %s, xform: %s', input_name, output_name, xform_flavints) xform = BinnedTensorTransform( input_names=input_name, output_name=output_name, input_binning=self.input_binning, output_binning=self.output_binning, xform_array=reco_kernel, sum_inputs=self.sum_grouped_flavints) xforms.append(xform) return TransformSet(transforms=xforms)
def _compute_nominal_transforms(self): """Compute new PID transforms.""" logging.debug('Updating pid.param PID histograms...') self.load_pid_energy_param(self.params.pid_energy_paramfile.value) nominal_transforms = [] for xform_flavints in self.transform_groups: logging.debug('Working on %s PID', xform_flavints) xform_array = np.empty(self.transform_output_binning.shape) subdict = self.pid_energy_param_dict[xform_flavints] for signature, sig_param_func in subdict.items(): # Get the PID probabilities vs. energy at the energy bins' # (weighted) centers pid1d = sig_param_func(self.ebin_centers) # Broadcast this 1d array across the reco_coszen dimension # since it's independent of reco_coszen broadcasted_pid = self.transform_output_binning.broadcast( pid1d, from_dim='reco_energy', to_dims='reco_coszen') pid_indexer = (self.transform_output_binning.indexer( pid=signature)) # Assign the broadcasted array to the correct PID bin xform_array[pid_indexer] = broadcasted_pid if self.sum_grouped_flavints: xform_input_names = [] for input_name in self.input_names: input_flavs = NuFlavIntGroup(input_name) if set(xform_flavints).intersection(input_flavs): xform_input_names.append(input_name) for output_name in self.output_names: if output_name not in xform_flavints: continue xform = BinnedTensorTransform( input_names=xform_input_names, output_name=str(xform_flavints), input_binning=self.input_binning, output_binning=self.transform_output_binning, xform_array=xform_array, sum_inputs=self.sum_grouped_flavints) nominal_transforms.append(xform) else: for input_name in self.input_names: if input_name not in xform_flavints: continue xform = BinnedTensorTransform( input_names=input_name, output_name=input_name, input_binning=self.input_binning, output_binning=self.transform_output_binning, xform_array=xform_array, ) nominal_transforms.append(xform) return TransformSet(transforms=nominal_transforms)
def _compute_nominal_transforms(self): """Compute parameterised effective area transforms""" energy_param_source = self.params.aeff_energy_paramfile.value coszen_param_source = self.params.aeff_coszen_paramfile.value energy_param_hash = hash_obj(energy_param_source) coszen_param_hash = hash_obj(coszen_param_source) load_energy = False load_coszen = False if (self._param_hashes['energy'] is None or energy_param_hash != self._param_hashes['energy']): load_energy = True if (self.has_cz and (self._param_hashes['coszen'] is None or energy_param_hash != self._param_hashes)): load_coszen = True if energy_param_source is None: raise ValueError( 'non-None energy parameterization params.aeff_energy_paramfile' ' must be provided' ) if not self.has_cz and coszen_param_source is not None: raise ValueError( 'true_coszen dimension was not found in the binning but a' ' coszen parameterisation file has been provided by' ' `params.aeff_coszen_paramfile`.' ) if not (load_energy or load_coszen): return dims = ['energy', 'coszen'] loads = [load_energy, load_coszen] sources = [energy_param_source, coszen_param_source] hashes = [energy_param_hash, coszen_param_hash] for dim, load, source, hash_ in zip(dims, loads, sources, hashes): if not load: continue self._param_hashes[dim] = None self.aeff_params[dim] = None params = load_aeff_param(source) # Transform groups are implicitly defined by the contents of the # `pid_energy_paramfile`'s keys implicit_transform_groups = params.keys() # Make sure these match transform groups specified for the stage if set(implicit_transform_groups) != set(self.transform_groups): raise ValueError( 'Transform groups (%s) defined implicitly by' ' %s aeff parameterizations "%s" do not match those' ' defined as the stage\'s `transform_groups` (%s).' % (implicit_transform_groups, dim, source, self.transform_groups) ) self.aeff_params[dim] = params self._param_hashes[dim] = hash_ nominal_transforms = [] for xform_flavints in self.transform_groups: logging.debug('Working on %s effective areas xform', xform_flavints) energy_param_func = self.aeff_params['energy'][xform_flavints] coszen_param_func = None if self.aeff_params['coszen'] is not None: coszen_param_func = self.aeff_params['coszen'][xform_flavints] # Now calculate the 1D aeff along energy aeff_vs_e = energy_param_func(self.ecen) # NOTE/TODO: Below is taken from the PISA 2 implementation of this. # Almost certainly comes from the fact that the highest knot there # was 79.5 GeV with the upper energy bin edge being 80 GeV. There's # probably something better that could be done here... # Correct for final energy bin, since interpolation does not # extend to JUST right outside the final bin if aeff_vs_e[-1] == 0: aeff_vs_e[-1] = aeff_vs_e[-2] if self.has_cz: aeff_vs_e = self.input_binning.broadcast( aeff_vs_e, from_dim='true_energy', to_dims='true_coszen' ) if coszen_param_func is not None: aeff_vs_cz = coszen_param_func(self.czcen) # Normalize aeff_vs_cz *= len(aeff_vs_cz) / np.sum(aeff_vs_cz) else: aeff_vs_cz = np.ones(shape=len(self.czcen)) cz_broadcasted = self.input_binning.broadcast( aeff_vs_cz, from_dim='true_coszen', to_dims='true_energy' ) aeff_transform = aeff_vs_e * cz_broadcasted else: aeff_transform = aeff_vs_e nominal_transforms.extend( populate_transforms( service=self, xform_flavints=xform_flavints, xform_array=aeff_transform ) ) return TransformSet(transforms=nominal_transforms)