def select_params(self, selections, error_on_missing=True): successes = 0 if selections is not None: for pipeline in self: try: pipeline.select_params(selections, error_on_missing=True) except KeyError: pass else: successes += 1 if error_on_missing and successes == 0: raise KeyError( 'None of the stages from any pipeline in this distribution' ' maker has all of the selections %s available.' %(selections,) ) else: for pipeline in self: possible_selections = pipeline.param_selections if possible_selections: logging.warn("Although you didn't make a parameter " "selection, the following were available: %s." " This may cause issues.", possible_selections)
def oversample_binning(coarse_bins, factor): """ Oversample bin edges (coarse_bins) by the given factor """ if is_linear(coarse_bins): logging.info('Oversampling linear output binning by factor %i.' %factor) fine_bins = np.linspace(coarse_bins[0], coarse_bins[-1], factor*(len(coarse_bins)-1)+1) elif is_logarithmic(coarse_bins): logging.info('Oversampling logarithmic output binning by factor %i.' %factor) fine_bins = np.logspace(np.log10(coarse_bins[0]), np.log10(coarse_bins[-1]), factor*(len(coarse_bins)-1)+1) else: logging.warn('Irregular binning detected! Evenly oversampling ' 'by factor %i'%factor) fine_bins = np.array([]) for i, upper_edge in enumerate(coarse_bins[1:]): fine_bins = np.append(fine_bins, np.linspace(coarse_bins[i], upper_edge, factor, endpoint=False)) return fine_bins
def get_reco_arrays(data,cuts,files_per_run,reco_string=None, mcnu='MCNeutrino'): ''' Forms arrays of reco events for true/reco energy/coszen from the data_files ''' logging.warn('Getting reconstructions from: %s'%reco_string) nfiles = len(set(data.root.I3EventHeader.col('Run')))*files_per_run sim_weight = ((2.0*data.root.I3MCWeightDict.col('OneWeight')[cuts]*CMSQ_TO_MSQ)/ (data.root.I3MCWeightDict.col('NEvents')[cuts]*nfiles)) try: true_cz = np.cos(data.root.__getattr__(mcnu).col('zenith'))[cuts] true_egy = data.root.__getattr__(mcnu).col('energy')[cuts] reco_cz = np.cos(data.root.__getattr__(reco_string).col('zenith'))[cuts] reco_egy = data.root.__getattr__(reco_string).col('energy')[cuts] except: true_cz = np.cos(data.root.__getattribute__(mcnu).col('zenith'))[cuts] true_egy = data.root.__getattribute__(mcnu).col('energy')[cuts] reco_cz = np.cos(data.root.__getattribute__(reco_string).col('zenith'))[cuts] reco_egy = data.root.__getattribute__(reco_string).col('energy')[cuts] arrays = [true_egy,true_cz,reco_egy,reco_cz,sim_weight] return arrays
def compute_function(self): self.data.data_specs = self.calc_specs # Link containers if self.links is not None: for key, val in self.links.items(): self.data.link_containers(key, val) # Format the params dict that will be passed to `Hypersurface.evaluate` #TODO checks on param units param_values = { sys_param_name: self.params[sys_param_name].m for sys_param_name in self.hypersurface_param_names } if self.interpolated: osc_params = { name: self.params[name] for name in self.inter_params } # Evaluate the hypersurfaces for container in self.data: if self.interpolated: # in the case of interpolated hypersurfaces, the actual hypersurface # must be generated for the given oscillation parameters first container_hs = self.hypersurfaces[ container.name].get_hypersurface(**osc_params) else: container_hs = self.hypersurfaces[container.name] # Get the hypersurface scale factors (reshape to 1D array) if self.propagate_uncertainty: scales, uncertainties = container_hs.evaluate( param_values, return_uncertainty=True) scales = scales.reshape(container.size) uncertainties = uncertainties.reshape(container.size) else: scales = container_hs.evaluate(param_values).reshape( container.size) # Where there are no scales (e.g. empty bins), set scale factor to 1 empty_bins_mask = ~np.isfinite(scales) num_empty_bins = np.sum(empty_bins_mask) if num_empty_bins > 0. and not self.warning_issued: logging.warn("%i empty bins found in hypersurface" % num_empty_bins) self.warning_issued = True scales[empty_bins_mask] = 1. if self.propagate_uncertainty: uncertainties[empty_bins_mask] = 0. # Add to container np.copyto(src=scales, dst=container["hs_scales"].get('host')) container["hs_scales"].mark_changed() if self.propagate_uncertainty: np.copyto(src=uncertainties, dst=container["hs_scales_uncertainty"].get('host')) container["hs_scales_uncertainty"].mark_changed() # Unlink the containers again self.data.unlink_containers()
def oversample_binning(coarse_bins, factor): """ Oversample bin edges (coarse_bins) by the given factor """ if is_linear(coarse_bins): logging.info('Oversampling linear output binning by factor %i.' % factor) fine_bins = np.linspace(coarse_bins[0], coarse_bins[-1], factor * (len(coarse_bins) - 1) + 1) elif is_logarithmic(coarse_bins): logging.info('Oversampling logarithmic output binning by factor %i.' % factor) fine_bins = np.logspace(np.log10(coarse_bins[0]), np.log10(coarse_bins[-1]), factor * (len(coarse_bins) - 1) + 1) else: logging.warn('Irregular binning detected! Evenly oversampling ' 'by factor %i' % factor) fine_bins = np.array([]) for i, upper_edge in enumerate(coarse_bins[1:]): fine_bins = np.append( fine_bins, np.linspace(coarse_bins[i], upper_edge, factor, endpoint=False)) return fine_bins
def get_reco_arrays(data, cuts, reco_string=None, mcnu='MCNeutrino'): ''' Forms arrays of reco events for true/reco energy/coszen from the data_files ''' logging.warn('Getting reconstructions from: %s' % reco_string) #true_egy = data.root.MCNeutrino.col('energy')[cuts] #true_cz = np.cos(data.root.MCNeutrino.col('zenith'))[cuts] try: true_egy = data.root.__getattr__(mcnu).col('energy')[cuts] true_cz = np.cos(data.root.__getattr__(mcnu).col('zenith'))[cuts] reco_cz = np.cos( data.root.__getattr__(reco_string).col('zenith'))[cuts] reco_egy = data.root.__getattr__(reco_string).col('energy')[cuts] except: true_egy = data.root.__getattribute__(mcnu).col('energy')[cuts] true_cz = np.cos(data.root.__getattribute__(mcnu).col('zenith'))[cuts] reco_cz = np.cos( data.root.__getattribute__(reco_string).col('zenith'))[cuts] reco_egy = data.root.__getattribute__(reco_string).col('energy')[cuts] arrays = [true_egy, true_cz, reco_egy, reco_cz] return arrays
def get_reco_arrays(data,cuts,reco_string=None,mcnu='MCNeutrino'): ''' Forms arrays of reco events for true/reco energy/coszen from the data_files ''' logging.warn('Getting reconstructions from: %s'%reco_string) #true_egy = data.root.MCNeutrino.col('energy')[cuts] #true_cz = np.cos(data.root.MCNeutrino.col('zenith'))[cuts] try: true_egy = data.root.__getattr__(mcnu).col('energy')[cuts] true_cz = np.cos(data.root.__getattr__(mcnu).col('zenith'))[cuts] reco_cz = np.cos(data.root.__getattr__(reco_string).col('zenith'))[cuts] reco_egy = data.root.__getattr__(reco_string).col('energy')[cuts] except: true_egy = data.root.__getattribute__(mcnu).col('energy')[cuts] true_cz = np.cos(data.root.__getattribute__(mcnu).col('zenith'))[cuts] reco_cz = np.cos(data.root.__getattribute__(reco_string).col('zenith'))[cuts] reco_egy = data.root.__getattribute__(reco_string).col('energy')[cuts] arrays = [true_egy,true_cz,reco_egy,reco_cz] return arrays
def setup_function(self): ''' Check the range of the axial masses parameter in the analysis. Send a warning if these are beyond +- 2sigma ''' if self.params['Genie_Ma_QE'].range[0]<-2. or self.params['Genie_Ma_QE'].range[1]>2.: logging.warn('Genie_Ma_QE parameter bounds have been set larger than the range used to produce interpolation points ([-2.,2]). This will void the warranty...') if self.params['Genie_Ma_RES'].range[0]<-2. or self.params['Genie_Ma_RES'].range[1]>2.: logging.warn('Genie_Ma_RES parameter bounds have been set larger than the range used to produce interpolation points ([-2.,2]). This will void the warranty...')
def check_scipy_version(minimizer_settings): #Workaround for old scipy versions import scipy if scipy.__version__ < '0.12.0': logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__) if 'maxiter' in minimizer_settings: logging.warn('Optimizer settings for \"maxiter\" will be ignored') minimizer_settings.pop('maxiter') return
def test_nsi_parameterization(): """Unit test for Hvac-like NSI parameterization.""" alpha1, alpha2, deltansi = np.random.rand(3) * 2. * np.pi phi12, phi13, phi23 = np.random.rand(3) * 2 * np.pi - np.pi eps_max_abs = 10.0 eps_scale, eps_prime = np.random.rand(2) * 2 * eps_max_abs - eps_max_abs nsi_params = VacuumLikeNSIParams() nsi_params.eps_scale = eps_scale nsi_params.eps_prime = eps_prime nsi_params.phi12 = phi12 nsi_params.phi13 = phi13 nsi_params.phi23 = phi23 nsi_params.alpha1 = alpha1 nsi_params.alpha2 = alpha2 nsi_params.deltansi = deltansi logging.trace( 'Checking agreement between numerical & analytical NSI matrix...') eps_mat_numerical = nsi_params.eps_matrix eps_mat_analytical = nsi_params.eps_matrix_analytical logging.trace("Numerical NSI matrix:\n%s" % eps_mat_numerical) logging.trace("Analytical expansion (by hand):\n%s" % eps_mat_analytical) try: close = np.isclose(eps_mat_numerical, eps_mat_analytical, **ALLCLOSE_KW) if not np.all(close): raise ValueError( 'Evaluating analytical expressions for NSI matrix elements' ' does not give agreement with numerical calculation!' ' Elementwise agreement:\n%s' % close) except ValueError as e: logging.warn( str(e) + "...\nThis is expected." " Going ahead with numerical calculation for now.") logging.trace('Now checking agreement with sympy calculation...') eps_mat_sympy = nsi_sympy_mat_mult(eps_scale_val=eps_scale, eps_prime_val=eps_prime, phi12_val=phi12, phi13_val=phi13, phi23_val=phi23, alpha1_val=alpha1, alpha2_val=alpha2, deltansi_val=deltansi) logging.trace('Numerical NSI matrix:\n%s' % eps_mat_numerical) logging.trace('Sympy NSI matrix:\n%s' % eps_mat_sympy) close = np.isclose(eps_mat_numerical, eps_mat_sympy, **ALLCLOSE_KW) if not np.all(close): raise ValueError( 'Sympy and numerical calculations disagree! Elementwise agreement:\n' '%s' % close)
def get_reco_kernels(self, **kwargs): """ Wrapper around _get_reco_kernels() that is to be used from outside, ensures that reco kernels are in correct shape and normalized """ kernels = self._get_reco_kernels(**kwargs) if kernels is None: logging.warn("No kernels defined yet...") return kernels if self.check_kernels(kernels): return kernels
def store_recursively(fhandle, node, path=[], node_hashes={}): full_path = '/' + '/'.join(path) if isinstance(node, dict): try: fhandle.create_group(full_path) except ValueError: pass for key in sorted(node.iterkeys()): key_str = str(key) if not isinstance(key, str): logging.warn('Stringifying key "' + key_str + '"for use as name in HDF5 file') val = node[key] new_path = path + [key_str] store_recursively(fhandle=fhandle, node=val, path=new_path, node_hashes=node_hashes) else: # Check for existing node node_hash = utils.utils.hash_obj(node) if node_hash in node_hashes: # Hardlink the matching existing dataset fhandle[full_path] = fhandle[node_hashes[node_hash]] return node_hashes[node_hash] = full_path # "Scalar datasets don't support chunk/filter options"; extra # checking that a sequence isn't a string, also. Shuffling is # a good idea since subsequent compression will generally benefit; # shuffling requires chunking. Compression is not done here # since it is slow. if hasattr(node, '__iter__') and not isinstance(node, basestring): shuffle = True chunks = True else: shuffle = False chunks = None fhandle.create_dataset(name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False)
def get_earth_model(self, model): """ Check whether the specified Earth density profile has a correct NuCraft preface. If not, create a temporary file that does. """ logging.debug('Trying to construct Earth model from "%s"'%model) try: resource_path = find_resource(model) self.earth_model = EarthModel(resource_path) logging.info('Loaded Earth model from %s'%model) except SyntaxError: #Probably the file is lacking the correct preamble logging.warn('Failed to construct NuCraft Earth model from ' '%s! Adding default preamble...'%resource_path) #Generate tempfile with preamble with open(resource_path, 'r') as infile: profile_lines = infile.readlines() preamble = ['# nuCraft Earth model with PREM density ' 'values for use as template; keep structure ' 'of the first six lines unmodified!\n', '(0.5, 0.5, 0.5) # tuple of (relative) ' 'electron numbers for mantle, outer core, ' 'and inner core\n', '6371. # radius of the Earth\n', '3480. # radius of the outer core\n', '1121.5 # radius of the inner core\n', '# two-columned list of radii and corresponding ' 'matter density values in km and kg/dm^3; ' 'add, remove or modify lines as necessary\n'] tfile = NamedTemporaryFile() tfile.writelines(preamble+profile_lines) tfile.flush() try: self.earth_model = EarthModel(tfile.name) except: logging.error('Could not construct Earth model from %s: %s' %(model, sys.exc_info()[1])) sys.exit(1) logging.info('Successfully constructed Earth model') tfile.close() except IOError: logging.info('Using NuCraft built-in Earth model "%s"'%model) self.earth_model = EarthModel(model)
def mkdir(d, mode=0o2777, group=None, warn=True): """Only set mode and group for dirs created by this function""" d = expand(d) gid = None if group is not None: gid = get_gid(group) if warn and os.path.isdir(d): logging.warn('Directory already exists: "%s"', d) return dirs = path_components(d) fullpath = '' for d in dirs: fullpath = os.path.join(fullpath, d) if os.path.isdir(fullpath): continue os.mkdir(fullpath, mode) if gid is not None: os.chown(fullpath, -1, gid)
def single_kernel_set(self, e_true, cz_true, e_reco, cz_reco, flav, int_type, make_plots=False, out_dir=None): """Construct a 4D kernel set from MC events using VBWKDE. Given a set of MC events and each of their {energy{true, reco}, coszen{true, reco}}, generate a 4D NumPy array that maps a 2D true-flux histogram onto the corresponding 2D reco-flux histogram. The resulting 4D array can be indexed logically using kernel4d[e_true_i, cz_true_j][e_reco_k, cz_reco_l] where the 4 indices point from a single MC-true histogram bin (i,j) to a single reco histogram bin (k,l). Binning of both MC-true and reco histograms is the same and is given by the values in self.ebins and self.czbins which define the bin *edges* (not the bin centers; hence, len(self.ebins) is one greater than the number of bins, etc.). NOTE: Actual limits in energy used to group events into a single "true" bin may be extended beyond the bin edges defined by self.ebins in order to gather enough events to successfully apply VBWKDE. Parameters ---------- e_true : sequence MC-true neutrino energies, one per event cz_true : sequence MC-true neutrino coszen, one per event e_reco : sequence Reconstructed neutrino energies, one per event cz_reco : sequence Reconstructed neutrino coszen, one per event flav : str int_type : str make_plots : bool out_dir : str or None path to directory into which to save plots. ``None`` (default) saves to PWD. Returns ------- kernel4d : 4D array of float Mapping from the number of events in each bin of the 2D MC-true-events histogram to the number of events reconstructed in each bin of the 2D reconstructed-events histogram. Dimensions are len(self.ebins)-1 x len(self.czbins)-1 x len(self.ebins)-1 x len(self.czbins)-1 since ebins and czbins define the histograms' bin edges. """ OVERFIT_FACTOR = 1.0 if make_plots: import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages from matplotlib.patches import Rectangle plt.close(1) plt.close(2) plt.close(3) def rugplot(a, y0, dy, ax, **kwargs): return ax.plot([a, a], [y0, y0 + dy], **kwargs) plot_fname = '_'.join(['resolutions', 'vbwkde', flav, int_type ]) + '.pdf' if out_dir is not None: plot_fname = os.path.join(out_dir, plot_fname) TOP = 0.925 BOTTOM = 0.05 RIGHT = 0.97 LEFT = 0.07 HSPACE = 0.12 LABELPAD = 0.058 AXISBG = (0.5, 0.5, 0.5) DARK_RED = (0.7, 0.0, 0.0) HIST_PP = dict(facecolor=(1, 0.5, 0.5), edgecolor=DARK_RED, histtype='stepfilled', alpha=0.7, linewidth=2.0, label=r'$\mathrm{Histogram}$') N_HBINS = 25 DIFFUS_PP = dict(color=(0.0, 0.0, 0.0), linestyle='-', marker=None, alpha=0.6, linewidth=2.0, label=r'$\mathrm{VBWKDE}$') RUG_PP = dict(color=(1.0, 1.0, 1.0), linewidth=0.4, alpha=0.5) RUG_LAB = r'$\mathrm{Rug\,plot}$' LEGFNTCOL = (1, 1, 1) LEGFACECOL = (0.2, 0.2, 0.2) GRIDCOL = (0.4, 0.4, 0.4) pdfpgs = PdfPages(plot_fname) assert np.min(np.diff(self.ebins)) > 0, \ "Energy bin edges not monotonically increasing." assert np.min(np.diff(self.czbins)) > 0, \ "coszen bin edges not monotonically increasing." # NOTE: below defines bin centers on linear scale; other logic # in this method assumes this to be the case, so # **DO NOT USE** utils.utils.get_bin_centers in this method, which # may return logarithmically-defined centers instead. ebin_edges = np.array(self.ebins) left_ebin_edges = ebin_edges[0:-1] right_ebin_edges = ebin_edges[1:] ebin_centers = (left_ebin_edges + right_ebin_edges) / 2.0 ebin_range = ebin_edges[-1] - ebin_edges[0] n_ebins = len(ebin_centers) czbin_edges = np.array(self.czbins) left_czbin_edges = czbin_edges[0:-1] right_czbin_edges = czbin_edges[1:] czbin_centers = (left_czbin_edges + right_czbin_edges) / 2.0 n_czbins = len(czbin_centers) n_events = len(e_true) if self.MIN_NUM_EVENTS > n_events: self.MIN_NUM_EVENTS = n_events if self.TGT_NUM_EVENTS > n_events: self.TGT_NUM_EVENTS = n_events # Object with which to store the 4D kernels: np 4D array kernel4d = np.zeros((n_ebins, n_czbins, n_ebins, n_czbins)) # Object with which to store the 2D "aggregate_map": the total number # of events reconstructed into a given (E, CZ) bin, used for sanity # checks aggregate_map = np.zeros((n_ebins, n_czbins)) for ebin_n in range(n_ebins): ebin_min = left_ebin_edges[ebin_n] ebin_max = right_ebin_edges[ebin_n] ebin_mid = (ebin_min + ebin_max) / 2.0 ebin_wid = ebin_max - ebin_min logging.debug('Processing true-energy bin_n=' + format(ebin_n, 'd') + ' of ' + format(n_ebins - 1, 'd') + ', E_{nu,true} in ' + '[' + format(ebin_min, '0.3f') + ', ' + format(ebin_max, '0.3f') + '] ...') # Absolute distance from these events' re-centered reco energies to # the center of this energy bin; sort in ascending-distance order abs_enu_dist = np.abs(e_true - ebin_mid) sorted_abs_enu_dist = np.sort(abs_enu_dist) # Grab the distance the number-"TGT_NUM_EVENTS" event is from the # bin center tgt_thresh_enu_dist = sorted_abs_enu_dist[self.TGT_NUM_EVENTS - 1] # Grab the distance the number-"MIN_NUM_EVENTS" event is from the # bin center min_thresh_enu_dist = sorted_abs_enu_dist[self.MIN_NUM_EVENTS - 1] # TODO: revisit the below algorithm with proper testing # Make threshold distance (which is half the total width) no more # than 4x the true-energy-bin width in order to capture the # "target" number of points (TGT_NUM_EVENTS) but no less than half # the bin width (i.e., the bin should be at least be as wide as the # pre-defined bin width). # # HOWEVER, allow the threshold distance (bin half-width) to expand # to as much as 4x the original bin full-width in order to capture # the "minimum" number of points (MIN_NUM_EVENTS). thresh_enu_dist = \ max(min(max(tgt_thresh_enu_dist, ebin_wid/2), 4*ebin_wid), min_thresh_enu_dist) # Grab all events within the threshold distance in_ebin_ind = np.where(abs_enu_dist <= thresh_enu_dist)[0] #print '** IN EBIN FIRST, LAST ENERGY:', e_reco[in_ebin_ind[0]], e_reco[in_ebin_ind[-1]] n_in_bin = len(in_ebin_ind) # Record lowest/highest energies that are included in the bin actual_left_ebin_edge = min( ebin_min, min(e_true[in_ebin_ind] )) #max(min(ebins), ebin_mid-thresh_enu_dist) actual_right_ebin_edge = max(ebin_max, max( e_true[in_ebin_ind])) #(max(ebins), ebin_mid+thresh_enu_dist) # Extract just the neutrino-energy/coszen error columns' values for # succinctness enu_err = e_reco[in_ebin_ind] - e_true[in_ebin_ind] cz_err = cz_reco[in_ebin_ind] - cz_true[in_ebin_ind] #================================================================== # Neutrino energy resolutions #================================================================== dmin = min(enu_err) dmax = max(enu_err) drange = dmax - dmin e_lowerlim = min(self.ENERGY_RANGE[0] - ebin_mid * 1.5, dmin - drange * 0.5) e_upperlim = max((np.max(ebin_edges) - ebin_mid) * 1.5, dmax + drange * 0.5) egy_kde_lims = np.array([e_lowerlim, e_upperlim]) # Use at least min_num_pts points and at most the next-highest # integer-power-of-two that allows for at least 10 points in the # smallest energy bin min_num_pts = 2**12 min_bin_width = np.min(ebin_edges[1:] - ebin_edges[:-1]) min_pts_smallest_bin = 5.0 kde_range = np.diff(egy_kde_lims) num_pts0 = kde_range / (min_bin_width / min_pts_smallest_bin) kde_num_pts = int(max(min_num_pts, 2**np.ceil(np.log2(num_pts0)))) logging.debug(' N_evts=' + str(n_in_bin) + ', taken from [' + format(actual_left_ebin_edge, '0.3f') + ', ' + format(actual_right_ebin_edge, '0.3f') + ']' + ', VBWKDE lims=' + str(egy_kde_lims) + ', VBWKDE_N: ' + str(kde_num_pts)) # Compute variable-bandwidth KDEs enu_bw, enu_mesh, enu_pdf = kde.vbw_kde( data=enu_err, overfit_factor=OVERFIT_FACTOR, MIN=egy_kde_lims[0], MAX=egy_kde_lims[1], N=kde_num_pts) if np.min(enu_pdf) < 0: # Only issue warning if the most-negative value is negative # beyond specified acceptable-numerical-precision threshold # (EPSILON) if np.min(enu_pdf) <= -self.EPSILON: logging.warn("np.min(enu_pdf) < 0: Minimum value is " + str(np.min(enu_pdf)) + "; forcing all negative values to 0.") # Otherwise, just quietly clip any negative values at 0 enu_pdf = np.clip(a=enu_pdf, a_min=0, a_max=np.inf) assert np.min(enu_pdf) >= 0, str(np.min(enu_pdf)) # Re-center distribution at the center of the energy bin for which # errors were computed offset_enu_mesh = enu_mesh + ebin_mid offset_enu_pdf = enu_pdf # Get reference area under the PDF, for checking after interpolated # values are added. # # NOTE There should be NO normalization because any events lost due # to cutting off tails outside the binned region are actually going # to be lost, and so should penalize the total area. int_val0 = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh) # Create linear interpolator for the PDF interp = interpolate.interp1d(x=offset_enu_mesh, y=offset_enu_pdf, kind='linear', copy=True, bounds_error=True, fill_value=np.nan) # Insert all bin edges' exact locations into the mesh (For accurate # accounting of area in each bin, must include values out to bin # edges) edge_locs = [ be for be in np.concatenate((left_ebin_edges, right_ebin_edges)) if not (be in offset_enu_mesh) ] edge_locs.sort() edge_pdfs = interp(edge_locs) insert_ind = np.searchsorted(offset_enu_mesh, edge_locs) offset_enu_mesh = np.insert(offset_enu_mesh, insert_ind, edge_locs) offset_enu_pdf = np.insert(offset_enu_pdf, insert_ind, edge_pdfs) int_val = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh) assert np.abs(int_val - int_val0) < self.EPSILON # Chop off distribution at extrema of energy bins valid_ind = np.where((offset_enu_mesh >= np.min(ebin_edges)) & (offset_enu_mesh <= np.max(ebin_edges)))[0] offset_enu_mesh = offset_enu_mesh[valid_ind] offset_enu_pdf = offset_enu_pdf[valid_ind] # Check that there are no negative density values (after inserts) assert np.min(offset_enu_pdf) > 0-self.EPSILON, \ str(np.min(offset_enu_pdf)) # Record the integrated area after removing parts outside binned # range tot_ebin_area0 = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh) # Check that it integrates to <= 1, sanity check assert tot_ebin_area0 < 1 + self.EPSILON, str(tot_ebin_area0) # Identify indices encapsulating the defined energy bins' ranges, # and find the area of each bin lbinds = np.searchsorted(offset_enu_mesh, left_ebin_edges) rbinds = np.searchsorted(offset_enu_mesh, right_ebin_edges) bininds = zip(lbinds, rbinds) ebin_areas = [ np.trapz(y=offset_enu_pdf[l:r + 1], x=offset_enu_mesh[l:r + 1]) for (l, r) in bininds ] # Check that no bins have negative areas assert np.min(ebin_areas) >= 0 # Sum the individual bins' areas tot_ebin_area = np.sum(ebin_areas) # Check that this total of all the bins is equal to the total area # under the curve (i.e., make sure there is no overlap or gaps # between bins) assert np.abs(tot_ebin_area-tot_ebin_area0) < self.EPSILON, \ 'tot_ebin_area=' + str(tot_ebin_area) + \ ' should equal tot_ebin_area0=' + str(tot_ebin_area0) if make_plots: fig1 = plt.figure(1, figsize=(8, 10), dpi=90) fig1.clf() ax1 = fig1.add_subplot(211, axisbg=AXISBG) # Retrieve region where VBWKDE lives ml_ci = confInterval.MLConfInterval(x=enu_mesh, y=enu_pdf) #for conf in np.logspace(np.log10(0.999), np.log10(0.95), 50): # try: # lb, ub, yopt, r = ml_ci.findCI_lin(conf=conf) # except: # pass # else: # break #xlims = (min(-ebin_mid*1.5, lb), # max(min(ub, 6*ebin_mid),2*ebin_mid)) lb, ub, yopt, r = ml_ci.findCI_lin(conf=0.98) xlims = ( lb, #min(-ebin_mid*1.5, lb), max(min(ub, 6 * ebin_mid), 2 * ebin_wid)) #xlims = ( # -ebin_wid*1.5, # ebin_wid*1.5 #) # min(ebin_mid*2, ebin_edges[-1]+(ebin_edges[-1]-ebin_edges[0])*0.1) #) # Histogram of events' reco error hbins = np.linspace( dmin - 0.02 * drange, dmax + 0.02 * drange, N_HBINS * np.round(drange / ebin_centers[ebin_n])) hvals, hbins, hpatches = ax1.hist(enu_err, bins=hbins, normed=True, **HIST_PP) # Plot the VBWKDE ax1.plot(enu_mesh, enu_pdf, **DIFFUS_PP) axlims = ax1.axis('tight') ax1.set_xlim(xlims) ymax = axlims[3] * 1.05 ax1.set_ylim(0, ymax) # Grey-out regions outside binned region, so it's clear what # part of tail(s) will be thrown away width = -ebin_mid + ebin_edges[0] - xlims[0] unbinned_region_tex = r'$\mathrm{Unbinned}$' if width > 0: ax1.add_patch( Rectangle( (xlims[0], 0), width, ymax, #zorder=-1, alpha=0.30, facecolor=(0.0, 0.0, 0.0), fill=True, ec='none')) ax1.text(xlims[0] + (xlims[1] - xlims[0]) / 40., ymax / 10., unbinned_region_tex, fontsize=14, ha='left', va='bottom', rotation=90, color='k') width = xlims[1] - (ebin_edges[-1] - ebin_mid) if width > 0: ax1.add_patch( Rectangle((xlims[1] - width, 0), width, ymax, alpha=0.30, facecolor=(0, 0, 0), fill=True, ec='none')) ax1.text(xlims[1] - (xlims[1] - xlims[0]) / 40., ymax / 10., unbinned_region_tex, fontsize=14, ha='right', va='bottom', rotation=90, color='k') # Rug plot of events' reco energy errors ylim = ax1.get_ylim() dy = ylim[1] - ylim[0] ruglines = rugplot(enu_err, y0=ylim[1], dy=-dy / 40., ax=ax1, **RUG_PP) ruglines[-1].set_label(RUG_LAB) # Legend leg_title_tex = r'$\mathrm{Normalized}\,E_\nu\mathrm{-err.\,distr.}$' x1lab = ax1.set_xlabel( r'$E_{\nu,\mathrm{reco}}-E_{\nu,\mathrm{true}}\;' + r'(\mathrm{GeV})$', labelpad=LABELPAD) leg = ax1.legend(loc='upper right', title=leg_title_tex, frameon=True, framealpha=0.8, fancybox=True, bbox_to_anchor=[1, 0.975]) # Other plot details ax1.xaxis.set_label_coords(0.9, -LABELPAD) ax1.xaxis.grid(color=GRIDCOL) ax1.yaxis.grid(color=GRIDCOL) leg.get_title().set_fontsize(16) leg.get_title().set_color(LEGFNTCOL) [t.set_color(LEGFNTCOL) for t in leg.get_texts()] frame = leg.get_frame() frame.set_facecolor(LEGFACECOL) frame.set_edgecolor(None) #================================================================== # Neutrino coszen resolution for events in this energy bin #================================================================== dmin = min(cz_err) dmax = max(cz_err) drange = dmax - dmin # NOTE the limits are 1 less than / 1 greater than the limits that # the error will actually take on, so as to allow for any smooth # roll-off at edges of data. The calculation of areas below # captures all of the area, though, by reflecting bins defined in # [-1, 1] about the points -1 and 1, thereby capturing any # densities in the range [-3, +3]. This is not necessarily # accurate, but it's better than throwing that info out entirely. # # NOTE also that since reco events as of now are only in range -1 # to 0, though, that there are "gaps" in the capture range, but # this is due to densities being in the upper-hemisphere which we # are intentionally ignoring, rather than the code here not taking # them into account. Normalization is based upon *all* events, # whether or not they fall within a bin specified above. # Number of points in the mesh used for VBWKDE; must be large # enough to capture fast changes in the data but the larger the # number, the longer it takes to compute the densities at all the # points. Here, just choosing a fixed number regardless of the data # or binning N_cz_mesh = 2**10 # Data range for VBWKDE to consider cz_kde_min = -3 cz_kde_max = +2 cz_kde_failed = False previous_fail = False for n in xrange(3): # TODO: only catch specific exception try: cz_bw, cz_mesh, cz_pdf = kde.vbw_kde( data=cz_err, overfit_factor=OVERFIT_FACTOR, MIN=cz_kde_min, MAX=cz_kde_max, N=N_cz_mesh) except: cz_kde_failed = True if n == 0: logging.trace('(cz vbwkde ') logging.trace('fail, ') # If failure occurred in vbw_kde, expand the data range it # takes into account; this usually helps cz_kde_min -= 1 cz_kde_max += 1 else: if cz_kde_failed: previous_fail = True logging.trace('success!') cz_kde_failed = False finally: if previous_fail: logging.trace(')') previous_fail = False if not cz_kde_failed: break if cz_kde_failed: logging.warn('Failed to fit VBWKDE!') continue if np.min(cz_pdf) < 0: logging.warn("np.min(cz_pdf) < 0: Minimum value is " + str(np.min(cz_pdf)) + "; forcing all negative values to 0.") np.clip(a=cz_mesh, a_min=0, a_max=np.inf) assert np.min(cz_pdf) >= -self.EPSILON, \ str(np.min(cz_pdf)) # TODO: test and/or visualize the shifting & re-binning process for czbin_n in range(n_czbins): czbin_mid = czbin_centers[czbin_n] # Re-center distribution at the center of the current cz bin offset_cz_mesh = cz_mesh + czbin_mid # Create interpolation object, used to fill in bin edge values interp = interpolate.interp1d(x=offset_cz_mesh, y=cz_pdf, kind='linear', copy=True, bounds_error=False, fill_value=0) # Figure out where all bin edges lie in this re-centered # distribution (some bins may be repeated since bins in [-1,0] # and err in [-2,1]: # # 1. Find limits of mesh values.. mmin = offset_cz_mesh[0] mmax = offset_cz_mesh[-1] # 2. Map all bin edges into the full mesh-value range, # reflecting about -1 and +1. If the reflected edge is outside # the mesh range, use the exceeded limit of the mesh range as # the bin edge instead. # # This maps every bin edge {i} to 3 new edges, indexed # new_edges[i][{0,1,2}]. Bins are formed by adjacent indices # and same-subindices, so what started as, e.g., bin 3 now is # described by (left, right) edges at # (new_edges[3][0], new_edges[4][0]), # (new_edges[3][1], new_edges[4][1]), and # (new_edges[3][2], new_edges[4][2]). # NOTE / TODO: It's tempting to dynamically set the number of # reflections to minimize computation time, but I think it # breaks the code. Just set to a reasonably large number for # now and accept the performance penalty. ALSO: if you change # the parity of the number of reflections, the code below that # has either (wrap_n % 2 == 0) or (wrap_n+1 % 2 == 0) must be # swapped!!! n_left_reflections = 4 n_right_reflections = 4 new_czbin_edges = [] for edge in czbin_edges: edges_refl_left = [] for n in xrange(n_left_reflections): edge_refl_left = reflect1d(edge, -1 - (2 * n)) if edge_refl_left < mmin: edge_refl_left = mmin edges_refl_left.append(edge_refl_left) edges_refl_right = [] for n in xrange(n_right_reflections): edge_refl_right = reflect1d(edge, +1 + (2 * n)) if edge_refl_right > mmax: edge_refl_right = mmax edges_refl_right.append(edge_refl_right) # Include all left-reflected versions of this bin edge, in # increasing-x order + this bin edge + right-reflected # versions of this bin edge new_czbin_edges.append(edges_refl_left[::-1] + [edge] + edges_refl_right) # Record all unique bin edges edge_locs = set() [edge_locs.update(edges) for edges in new_czbin_edges] # Throw away bin edges that are already in the mesh [ edge_locs.remove(edge) for edge in list(edge_locs) if edge in offset_cz_mesh ] # Make into sorted list edge_locs = sorted(edge_locs) # Record the total area under the curve int_val0 = np.trapz(y=cz_pdf, x=offset_cz_mesh) # Insert the missing bin edge locations & pdf-values into # the mesh & pdf, respectively edge_pdfs = interp(edge_locs) insert_ind = np.searchsorted(offset_cz_mesh, edge_locs) offset_cz_mesh = np.insert(offset_cz_mesh, insert_ind, edge_locs) offset_cz_pdf = np.insert(cz_pdf, insert_ind, edge_pdfs) assert np.min(offset_cz_pdf) > -self.EPSILON # Check that this total of all the bins is equal to the total # area under the curve (i.e., check there is no overlap between # or gaps between bins) int_val = np.trapz(y=offset_cz_pdf, x=offset_cz_mesh) assert np.abs(int_val - 1) < self.EPSILON # Renormalize if it's not exactly 1 if int_val != 1.0: offset_cz_pdf = offset_cz_pdf / int_val # Add up the area in the bin and areas that are "reflected" # into this bin new_czbin_edges = np.array(new_czbin_edges) czbin_areas = np.zeros(np.shape(new_czbin_edges)[0] - 1) for wrap_n in range(np.shape(new_czbin_edges)[1]): bin_edge_inds = np.searchsorted(offset_cz_mesh, new_czbin_edges[:, wrap_n]) lbinds = bin_edge_inds[0:-1] rbinds = bin_edge_inds[1:] # Make sure indices that appear first are less than indices # that appear second in a pair of bin indices if (wrap_n + 1) % 2 == 0: bininds = zip(rbinds, lbinds) else: bininds = zip(lbinds, rbinds) tmp_areas = [] for (binind_left_edge, binind_right_edge) in bininds: if binind_left_edge == binind_right_edge: tmp_areas.append(0) continue this_bin_area = np.array( np.trapz( y=offset_cz_pdf[ binind_left_edge:binind_right_edge + 1], x=offset_cz_mesh[ binind_left_edge:binind_right_edge + 1])) tmp_areas.append(this_bin_area) czbin_areas += np.array(tmp_areas) assert np.min(czbin_areas) > -self.EPSILON tot_czbin_area = np.sum(czbin_areas) assert tot_czbin_area < int_val + self.EPSILON kernel4d[ebin_n, czbin_n] = np.outer(ebin_areas, czbin_areas) assert (np.sum(kernel4d[ebin_n, czbin_n]) - tot_ebin_area * tot_czbin_area) < self.EPSILON if make_plots: ax2 = fig1.add_subplot(212, axisbg=AXISBG) hbins = np.linspace(dmin - 0.02 * drange, dmax + 0.02 * drange, N_HBINS * 3) hvals, hbins, hpatches = ax2.hist(cz_err, bins=hbins, normed=True, **HIST_PP) ax2.plot(cz_mesh, cz_pdf, **DIFFUS_PP) fci = confInterval.MLConfInterval(x=cz_mesh, y=cz_pdf) lb, ub, yopt, r = fci.findCI_lin(conf=0.995) axlims = ax2.axis('tight') ax2.set_xlim(lb, ub) ax2.set_ylim(0, axlims[3] * 1.05) ylim = ax2.get_ylim() dy = ylim[1] - ylim[0] ruglines = rugplot(cz_err, y0=ylim[1], dy=-dy / 40., ax=ax2, **RUG_PP) ruglines[-1].set_label(r'$\mathrm{Rug\,plot}$') x2lab = ax2.set_xlabel( r'$\cos\vartheta_{\mathrm{track,reco}}-\cos\vartheta_{\nu,\mathrm{true}}$', labelpad=LABELPAD) ax2.xaxis.set_label_coords(0.9, -LABELPAD) ax2.xaxis.grid(color=GRIDCOL) ax2.yaxis.grid(color=GRIDCOL) leg_title_tex = r'$\mathrm{Normalized}\,\cos\vartheta\mathrm{-err.\,distr.}$' leg = ax2.legend(loc='upper right', title=leg_title_tex, frameon=True, framealpha=0.8, fancybox=True, bbox_to_anchor=[1, 0.975]) leg.get_title().set_fontsize(16) leg.get_title().set_color(LEGFNTCOL) [t.set_color(LEGFNTCOL) for t in leg.get_texts()] frame = leg.get_frame() frame.set_facecolor(LEGFACECOL) frame.set_edgecolor(None) actual_bin_tex = '' if (actual_left_ebin_edge != ebin_min) or (actual_right_ebin_edge != ebin_max): actual_bin_tex = r'E_{\nu,\mathrm{true}}\in [' + \ format(actual_left_ebin_edge, '0.2f') + r',\,' + \ format(actual_right_ebin_edge, '0.2f') + r'] \mapsto ' stt = r'$\mathrm{Resolutions,\,' + flav_tex(flav) + r'\,' + \ int_tex(int_type) + r'}$' + '\n' + \ r'$' + actual_bin_tex + r'\mathrm{Bin}_{' + format(ebin_n, 'd') + r'}\equiv E_{\nu,\mathrm{true}}\in [' + format(ebin_min, '0.2f') + \ r',\,' + format(ebin_max, '0.2f') + r']\,\mathrm{GeV}' + \ r',\,N_\mathrm{events}=' + format(n_in_bin, 'd') + r'$' fig1.subplots_adjust(top=TOP, bottom=BOTTOM, left=LEFT, right=RIGHT, hspace=HSPACE) suptitle = fig1.suptitle(stt) suptitle.set_fontsize(16) suptitle.set_position((0.5, 0.98)) fig1.savefig(pdfpgs, format='pdf') check_areas = kernel4d.sum(axis=(2, 3)) assert np.max(check_areas) < 1 + self.EPSILON, str(np.max(check_areas)) assert np.min(check_areas) > 0 - self.EPSILON, str(np.min(check_areas)) if make_plots: fig2 = plt.figure(2, figsize=(8, 10), dpi=90) fig2.clf() ax = fig2.add_subplot(111) X, Y = np.meshgrid(range(n_czbins), range(n_ebins)) cm = mpl.cm.Paired_r cm.set_over((1, 1, 1), 1) cm.set_under((0, 0, 0), 1) plt.pcolor(X, Y, check_areas, vmin=0 + self.EPSILON, vmax=1.0, shading='faceted', cmap=cm) plt.colorbar(ticks=np.arange(0, 1.05, 0.05)) ax.grid(0) ax.axis('tight') ax.set_xlabel(r'$\cos\vartheta_\mathrm{true}\mathrm{\,bin\,num.}$') ax.set_ylabel(r'$E_{\nu,\mathrm{true}}\mathrm{\,bin\,num.}$') ax.set_title( r'$\mathrm{Fract\,of\,evts\,starting\,in\,each}\,(E_{\nu,\mathrm{true}},\,\cos\vartheta_\mathrm{true})\,\mathrm{bin\,that\,reco\,in\,bounds}$' + '\n' + r'$\mathrm{None\,should\,be\,>1\,(shown\,white);\,no-event\,bins\,are\,black;\,avg.}=' + format(np.mean(check_areas), '0.3f') + r'$') fig2.tight_layout() fig2.savefig(pdfpgs, format='pdf') check_areas2 = kernel4d.sum(axis=(0, 1)) fig3 = plt.figure(2, figsize=(8, 10), dpi=90) fig3.clf() ax = fig3.add_subplot(111) X, Y = np.meshgrid(range(n_czbins), range(n_ebins)) cm = mpl.cm.Paired_r cm.set_over((1, 1, 1), 1) cm.set_under((0, 0, 0), 1) plt.pcolor( X, Y, check_areas2, vmin=0 + self.EPSILON, # vmax=1.0, shading='faceted', cmap=cm) plt.colorbar(ticks=np.arange( 0, 0.1 + np.ceil(10. * np.max(check_areas2)) / 10., 0.05)) ax.grid(0) ax.axis('tight') ax.set_xlabel(r'$\cos\vartheta_\mathrm{reco}\mathrm{\,bin\,num.}$') ax.set_ylabel(r'$E_{\nu,\mathrm{reco}}\mathrm{\,bin\,num.}$') ax.set_title( r'$\mathrm{Normed\,num\,events\,reconstructing\,into\,each}\,(E_{\nu,\mathrm{reco}},\,\cos\vartheta_\mathrm{reco})\,\mathrm{bin}$' + '\n' + r'$\mathrm{No-event\,bins\,are\,black;\,avg.}=' + format(np.mean(check_areas2), '0.3f') + r'$') fig3.tight_layout() fig3.savefig(pdfpgs, format='pdf') pdfpgs.close() return kernel4d
def _compute_nominal_transforms(self): """Compute new PID transforms.""" logging.debug('Updating pid.hist PID histograms...') # TODO(shivesh): As of now, events do not have units as far as PISA # is concerned self.load_events(self.params.pid_events) self.cut_events(self.params.transform_events_keep_criteria) # TODO: in future, the events file will not have these combined # already, and it should be done here (or in a nominal transform, # etc.). See below about taking this step when we move to directly # using the I3-HDF5 files. #events_file_combined_flavints = tuple([ # NuFlavIntGroup(s) # for s in self.events.metadata['flavints_joined'] #]) # TODO: take events object as an input instead of as a param that # specifies a file? Or handle both cases? pid_spec = OrderedDict(eval(self.params.pid_spec.value)) if set(pid_spec.keys()) != set(self.output_channels): msg = 'PID criteria from `pid_spec` {0} does not match {1}' raise ValueError(msg.format(pid_spec.keys(), self.output_channels)) # TODO: add importance weights, error computation logging.debug("Separating events by PID...") separated_events = OrderedDict() for sig in self.output_channels: this_sig_events = self.events.applyCut(pid_spec[sig]) separated_events[sig] = this_sig_events # Derive transforms by combining flavints that behave similarly, but # apply the derived transforms to the input flavints separately # (leaving combining these together to later) transforms = [] for flavint_group in self.transform_groups: logging.debug("Working on %s PID", flavint_group) repr_flavint = flavint_group[0] # TODO(shivesh): errors # TODO(shivesh): total histo check? sig_histograms = {} total_histo = np.zeros(self.output_binning.shape) for repr_flavint in flavint_group: histo = self.events.histogram( kinds=repr_flavint, binning=self.output_binning, weights_col=self.params.pid_weights_name.value, errors=None).hist total_histo += histo for sig in self.output_channels: sig_histograms[sig] = np.zeros(self.output_binning.shape) for repr_flavint in flavint_group: this_sig_histo = separated_events[sig].histogram( kinds=repr_flavint, binning=self.output_binning, weights_col=self.params.pid_weights_name.value, errors=None).hist sig_histograms[sig] += this_sig_histo for sig in self.output_channels: with np.errstate(divide='ignore', invalid='ignore'): xform_array = sig_histograms[sig] / total_histo num_invalid = np.sum(~np.isfinite(xform_array)) if num_invalid > 0: logging.warn( 'Group "%s", PID signature "%s" has %d bins with no' ' events (and hence the ability to separate events' ' by PID cannot be ascertained). These are being' ' masked off from any further computations.', flavint_group, sig, num_invalid) # TODO: this caused buggy event propagation for some # reason; check and re-introduced the masked array idea # when this is fixed. For now, replicating the behavior # from PISA 2. #xform_array = np.ma.masked_invalid(xform_array) # Double check that no NaN remain #assert not np.any(np.isnan(xform_array)) # Copy this transform to use for each input in the group for input_name in self.input_names: if input_name not in flavint_group: continue xform = BinnedTensorTransform( input_names=input_name, output_name=self.suffix_channel(input_name, sig), input_binning=self.input_binning, output_binning=self.output_binning, xform_array=xform_array) transforms.append(xform) return TransformSet(transforms=transforms)
'nue': {'filename': args.nue,'nfiles': args.nfiles_nue}, 'numu': {'filename': args.numu,'nfiles': args.nfiles_numu}, 'nutau': {'filename': args.nutau,'nfiles': args.nfiles_nutau}} logging.info("input files:\n%s"%data_files) # Ensure overwrite of existing filename... outfilename = args.outfile fh = h5py.File(outfilename,'w') fh.close() logging.info("Writing to file: %s",outfilename) # Define V3, V4, or V5 cuts: cut_list = [] if args.V3cuts: logging.warn("Using cuts V3...") cut_list.append(('NewestBgRejCutsStep1','value',True)) cut_list.append(('NewestBgRejCutsStep2','value',True)) elif args.V4cuts: logging.warn("Using cuts V4...") cut_list.append(('Cuts_V4_Step1','value',True)) cut_list.append(('Cuts_V4_Step2','value',True)) elif args.V5cuts: logging.warn("Using cuts V5...") cut_list.append(('Cuts_V5_Step1','value',True)) cut_list.append(('Cuts_V5_Step2','value',True)) elif args.nocuts: logging.warn("Using NO S1/S2 selection CUTS") cut_list = [] elif args.custom: logging.warn("Using CUSTOM cuts: %s..."%args.custom_str)
help='set verbosity level') args = parser.parse_args() set_verbosity(args.verbose) #Read in the settings template_settings = from_json(args.template_settings) minimizer_settings = from_json(args.minimizer_settings) pseudo_data_settings = from_json( args.pseudo_data_settings ) if args.pseudo_data_settings is not None else template_settings #Workaround for old scipy versions import scipy if scipy.__version__ < '0.12.0': logging.warn('Detected scipy version %s < 0.12.0' % scipy.__version__) if 'maxiter' in minimizer_settings: logging.warn('Optimizer settings for \"maxiter\" will be ignored') minimizer_settings.pop('maxiter') # make sure that both pseudo data and template are using the same # channel. Raise Exception and quit otherwise channel = template_settings['params']['channel']['value'] if channel != pseudo_data_settings['params']['channel']['value']: error_msg = "Both template and pseudo data must have same channel!\n" error_msg += " pseudo_data_settings chan: '%s', template chan: '%s' " % ( pseudo_data_settings['params']['channel']['value'], channel) raise ValueError(error_msg) if args.gpu_id is not None: template_settings['params']['gpu_id'] = {}
def plot_map_comparisons(ref_map, new_map, ref_abv, new_abv, outdir, subdir, name, texname, stagename, servicename, shorttitles=False, ftype='png'): """Plot comparisons between two identically-binned PISA 3 style maps""" path = [outdir] if subdir is None: subdir = stagename.lower() path.append(subdir) if outdir is not None: mkdir(os.path.join(*path), warn=False) if stagename is not None: fname = [ '%s_%s_comparisons' % (ref_abv.lower(), new_abv.lower()), 'stage_' + stagename ] else: fname = ['%s_%s_comparisons' % (ref_abv.lower(), new_abv.lower())] if servicename is not None: fname.append('service_' + servicename) if name is not None: fname.append(name.lower()) fname = '__'.join(fname) + '.' + ftype path.append(fname) basetitle = [] if stagename is not None: basetitle.append('%s' % stagename) if texname is not None: basetitle.append(r'$%s$' % texname) basetitle = ' '.join(basetitle) validate_map_objs(new_map, ref_map) with np.errstate(divide='ignore', invalid='ignore'): ratio_map = new_map / ref_map diff_map = new_map - ref_map with np.errstate(divide='ignore', invalid='ignore'): diff_ratio_map = diff_map / ref_map max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist)) # Handle cases where ratio returns infinite # This isn't necessarily a fail, since all it means is the referene was # zero If the new value is sufficiently close to zero then it's still fine if max_diff_ratio == float('inf'): logging.warn('Infinite value found in ratio tests. Difference tests ' 'now also being calculated') # First find all the finite elements finite_map = np.isfinite(diff_ratio_map.hist) # Then find the nanmax of this, will be our new test value max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist[finite_map])) # Also find all the infinite elements infinite_map = np.logical_not(finite_map) # This will be a second test value max_diff = np.nanmax(np.abs(diff_map.hist[infinite_map])) else: # Without any infinite elements we can ignore this second test max_diff = 0.0 if outdir is not None: gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32) fig, axes = plt.subplots(nrows=1, ncols=5, gridspec_kw=gridspec_kw, sharex=False, sharey=False, figsize=(20, 5)) if shorttitles: ref_map.plot(fig=fig, ax=axes[0], title=basetitle + ' ' + ref_abv + ' (A)', cmap=plt.cm.afmhot) new_map.plot(fig=fig, ax=axes[1], title=basetitle + ' ' + new_abv + ' (B)', cmap=plt.cm.afmhot) ratio_map.plot(fig=fig, ax=axes[2], title='A/B', cmap=plt.cm.afmhot) diff_map.plot(fig=fig, ax=axes[3], title='A-B', symm=True, cmap=plt.cm.seismic) diff_ratio_map.plot(fig=fig, ax=axes[4], title='(A-B)/A', symm=True, cmap=plt.cm.seismic) else: ref_map.plot(fig=fig, ax=axes[0], title=basetitle + ' ' + ref_abv, cmap=plt.cm.afmhot) new_map.plot(fig=fig, ax=axes[1], title=basetitle + ' ' + new_abv, cmap=plt.cm.afmhot) ratio_map.plot(fig=fig, ax=axes[2], title=basetitle + ' %s/%s' % (new_abv, ref_abv), cmap=plt.cm.afmhot) diff_map.plot(fig=fig, ax=axes[3], title=basetitle + ' %s-%s' % (new_abv, ref_abv), symm=True, cmap=plt.cm.seismic) diff_ratio_map.plot(fig=fig, ax=axes[4], title=basetitle + ' (%s-%s)/%s' % (new_abv, ref_abv, ref_abv), symm=True, cmap=plt.cm.seismic) logging.debug('>>>> Plot for inspection saved at %s' % os.path.join(*path)) fig.savefig(os.path.join(*path)) plt.close(fig.number) return max_diff_ratio, max_diff
help='set verbosity level') args = parser.parse_args() set_verbosity(args.verbose) print "FILE NORMALIZATION: " print " >> nue: ",args.ne print " >> numu: ",args.nmu print " >> nutau: ",args.ntau ebins = np.linspace(args.emin,args.emax,args.nebins) if args.elin else np.logspace(np.log10(args.emin), np.log10(args.emax), args.nebins) # Cut definitions: s1_s2_cuts = [] if args.v4cuts: logging.warn("Using cuts V4!") s1_s2_cuts = [("Cuts_V4_Step1",'value',True),("Cuts_V4_Step2",'value',True)] elif args.v3cuts: logging.warn("Using cuts V3!") s1_s2_cuts = [('NewestBgRejCutsStep1','value',True), ('NewestBgRejCutsStep2','value',True)] elif args.v5truth: logging.warn("USING V5 TRUTH information") s1_s2_cuts = [('Cuts_V5_Step2_upgoing_Truth','value',True)] elif args.nocuts: logging.warn("Using no selection cuts!") s1_s2_cuts = [] else: logging.warn("Using cuts V5!") s1_s2_cuts= [("Cuts_V5_Step1",'value',True),("Cuts_V5_Step2",'value',True)]
def __init__( self, earth_model=None, detector_depth=None, prop_height=None, prop_height_min=None, YeI=None, YeO=None, YeM=None, rel_err=None, abs_err=None, prop_lowpass_cutoff=None, prop_lowpass_frac=None, eval_lowpass_cutoff=None, eval_lowpass_frac=None, node_mode=None, use_decoherence=False, num_decoherence_gamma=1, use_nsi=False, num_neutrinos=3, exact_mode=False, **std_kwargs, ): if use_nsi: raise NotImplementedError("NSI not implemented") if use_decoherence: raise NotImplementedError("Decoherence not implemented") if type(prop_height) is not ureg.Quantity: raise NotImplementedError( "Getting propagation heights from containers is " "not yet implemented") self.num_neutrinos = int(num_neutrinos) assert self.num_neutrinos < 5, "currently only supports up to 4 flavor oscillations" self.use_nsi = use_nsi self.use_decoherence = use_decoherence self.num_decoherence_gamma = num_decoherence_gamma self.node_mode = node_mode self.earth_model = earth_model self.YeI = YeI.m_as("dimensionless") self.YeO = YeO.m_as("dimensionless") self.YeM = YeM.m_as("dimensionless") self.detector_depth = detector_depth.m_as("km") self.prop_height = prop_height.m_as("km") self.avg_height = False self.prop_height_min = None if prop_height_min is not None: # this is optional self.prop_height_min = prop_height_min.m_as("km") self.avg_height = True self.layers = None self.rel_err = rel_err.m_as( "dimensionless") if rel_err is not None else 1.0e-10 self.abs_err = abs_err.m_as( "dimensionless") if abs_err is not None else 1.0e-10 self.prop_lowpass_cutoff = (prop_lowpass_cutoff.m_as("1/km") if prop_lowpass_cutoff is not None else 0.) self.prop_lowpass_frac = (prop_lowpass_frac.m_as("dimensionless") if prop_lowpass_frac is not None else 0.) self.eval_lowpass_cutoff = (eval_lowpass_cutoff.m_as("1/km") if eval_lowpass_cutoff is not None else 0.) self.eval_lowpass_frac = (eval_lowpass_frac.m_as("dimensionless") if eval_lowpass_frac is not None else 0.) if self.prop_lowpass_frac > 1. or self.eval_lowpass_frac > 1.: raise ValueError( "lowpass filter fraction cannot be greater than one") if self.prop_lowpass_frac < 0. or self.eval_lowpass_frac < 0.: raise ValueError( "lowpass filter fraction cannot be smaller than zero") self.nus_layer = None self.nus_layerbar = None # Define standard params expected_params = [ "theta12", "theta13", "theta23", "deltam21", "deltam31", "deltacp", ] # Add decoherence parameters assert self.num_decoherence_gamma in [ 1, 3 ], ("Must choose either 1 or 3 " "decoherence gamma parameters") if self.use_decoherence: if self.num_decoherence_gamma == 1: expected_params.extend(["gamma"]) elif self.num_decoherence_gamma == 3: expected_params.extend(["gamma21", "gamma31", "gamma32"]) expected_params.extend(["n_energy"]) # We may want to reparametrize this with the difference between deltacp14 and # deltacp24, as the absolute value seems to play a small role (see # https://arxiv.org/pdf/2010.06321.pdf) if self.num_neutrinos == 4: expected_params.extend([ "theta14", "theta24", "theta34", "deltam41", "deltacp14", "deltacp24", ]) # init base class super().__init__( expected_params=expected_params, **std_kwargs, ) # This is special: We have an additional "binning" to account for. It is in # principle possible to work in event mode even for the nodes, which would mean # that the full oscillation problem is solved for all events individually. # Together with the constant oscillation mode, this can be used to calculate # probabilities in exact mode in a time that is reasonable at least for # generating pseudodata. assert not (self.use_nsi and self.use_decoherence), ( "NSI and decoherence not " "suported together, must use one or the other") self.exact_mode = exact_mode if exact_mode: # No interpolation is happening in exact mode so any passed node_mode # will be ignored. Probabilities are calculated at calc_specs. if self.node_mode is not None: logging.warn( "nuSQuIDS is configured in exact mode, the passed " f"`node_mode`\n({self.node_mode})\n will be ignored!") if self.prop_lowpass_cutoff > 0 or self.eval_lowpass_cutoff > 0: logging.warn( "nuSQuIDS is configured in exact mode, low-pass filters " "will be ignored") else: if isinstance(self.calc_mode, MultiDimBinning): assert isinstance(self.node_mode, MultiDimBinning), ( "cannot use " "event-wise nodes with binned calculation") self.e_node_mode = None self.e_mesh = None self.coszen_node_mode = None self.cosz_mesh = None
def __init__( self, fit_results_file, data=None, params=None, input_names=None, output_names=None, debug_mode=None, error_method=None, input_specs=None, calc_specs=None, output_specs=None, links=None, ): # -- Read fit_results_file and extract necessary info -- # fit_results = from_file(fit_results_file) # handle backwards compatibility for old style fit results files if "hyperplanes" in fit_results: using_old_fit_file = False elif "sys_list" in fit_results: using_old_fit_file = True else: raise ValueError("Unrecognised format for input fit file") # get list of systematic parameter names fitted; need to conserve order here! if using_old_fit_file: fit_param_names = fit_results["sys_list"] else: fit_param_names = fit_results["param_names"] if "param_units" in fit_results: fit_param_units = fit_results["param_units"] else: fit_param_units = ["dimensionless" for _ in fit_param_names] fit_param_units = [ureg.Unit(u) for u in fit_param_units] # Perfer to have the actual binning, so we can compare bin edges to # "reasonable" precision to make sure the hyperplane fits are applicable to the # current binning. # # If there is no binning in the hyperplane fit results file, look for a hash # value; barring that, just ensure that the dimensionality & number of bins # match. binning_spec = fit_results.get("binning", None) if binning_spec is not None: fit_binning = MultiDimBinning(**binning_spec) else: fit_binning = None if fit_binning is not None: fit_binning_hash = fit_binning.hash else: fit_binning_hash = fit_results.get("binning_hash", None) if fit_binning_hash is None: logging.warn("Cannot determine the hash of the binning employed" " for the hyperplane fits. Correct application of" " fits is not guaranteed!") # -- Expected input / output names -- # input_names = () output_names = () # -- Which keys are added or altered for the outputs during `apply` -- # input_calc_keys = () output_calc_keys = ("hyperplane_scalefactors", ) if error_method == "sumw2": output_apply_keys = ("weights", "errors") input_apply_keys = output_apply_keys else: output_apply_keys = ("weights", ) input_apply_keys = output_apply_keys # -- Initialize base class -- # super(pi_hyperplanes, self).__init__( data=data, params=params, expected_params=fit_param_names, input_names=input_names, output_names=output_names, debug_mode=debug_mode, error_method=error_method, input_specs=input_specs, calc_specs=calc_specs, output_specs=output_specs, input_calc_keys=input_calc_keys, output_calc_keys=output_calc_keys, input_apply_keys=input_apply_keys, output_apply_keys=output_apply_keys, ) # -- Only allowed/implemented modes -- # assert self.input_mode is not None assert self.calc_mode == "binned" assert self.output_mode is not None self.links = ast.literal_eval(links) # -- Add attrs to `self` specific to `pi_hyperplanes` -- # self.fit_results_file = fit_results_file """str : path to hyperplane fit results file""" self.using_old_fit_file = using_old_fit_file """bool : whether the hyperplane fit file is in the "old" format""" self.fit_results = fit_results """OrderedDict : parsed hyperplane fit file""" self.fit_param_names = fit_param_names """list : param names used in hyperplane fit, in order they appear in file""" self.fit_param_units = fit_param_units """list : param untis used in hyperplane fit, in order they appear in file""" self.fit_binning = fit_binning """MultiDimBinning : binning used for hyperplane fits; one hyperplane per bin""" self.fit_binning_hash = fit_binning_hash """str : hash of the binning used for hyperplane fits"""
parser.add_argument('-v', '--verbose', action='count', default=None, help='''set verbosity level''') args = parser.parse_args() set_verbosity(args.verbose) #Read in the settings template_settings = from_json(args.template_settings) minimizer_settings = from_json(args.minimizer_settings) grid_settings = from_json(args.grid_settings) channel = template_settings['params']['channel']['value'] #Workaround for old scipy versions import scipy if scipy.__version__ < '0.12.0': logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__) if 'maxiter' in minimizer_settings: logging.warn('Optimizer settings for \"maxiter\" will be ignored') minimizer_settings.pop('maxiter') #Get the parameters params = template_settings['params'] # Make sure that atmospheric parameters are fixed: logging.warn("Ensuring that atmospheric parameters are fixed for this analysis") params = fix_atm_params(params) #print "params: ",params.items() with Timer() as t: template_maker = TemplateMaker(get_values(params),**template_settings['binning']) profile.info("==> elapsed time to initialize templates: %s sec"%t.secs)
llh_data = from_hdf(args.llh_file) df_true_h, df_false_h = get_llr_data_frames(llh_data) template_params = llh_data['template_settings']['params'] if args.verbose > 1: show_frame(df_true_h) print "\n columns: ",df_true_h[0].columns ################################################################ ### 1) Plot LLR Distributions ################################################################ # df_true_h MUST be filled, but df_false_h is allowed to be empty llr_dict_true_h = get_llh_ratios(df_true_h) if (len(df_false_h) == 0 or args.no_false_h): logging.warn("No false hierarchy best fit llr distributions...") fig = make_llr_only_true_h(llr_dict_true_h, args.nbins, args.xlim) else: logging.warn("Making llr distributions with false hierarchy best fit.") llr_dict_false_h = get_llh_ratios(df_false_h) fig = make_llr_with_false_h(llr_dict_true_h, llr_dict_false_h, args.nbins, args.xlim) ################################################################ ### 2) Plot Posterior Distributions ################################################################ if args.params: df = df_true_h if args.true_h else df_false_h
def single_kernel_set(self, e_true, cz_true, e_reco, cz_reco): """Construct a 4D kernel set from MC events using VBWKDE. Given a set of MC events and each of their {energy{true, reco}, coszen{true, reco}}, generate a 4D NumPy array that maps a 2D true-flux histogram onto the corresponding 2D reco-flux histogram. The resulting 4D array can be indexed logically using kernel4d[e_true_i, cz_true_j][e_reco_k, cz_reco_l] where the 4 indices point from a single MC-true histogram bin (i,j) to a single reco histogram bin (k,l). Binning of both MC-true and reco histograms is the same and is given by the values in self.ebins and self.czbins which define the bin *edges* (not the bin centers; hence, len(self.ebins) is one greater than the number of bins, etc.). NOTE: Actual limits in energy used to group events into a single "true" bin may be extended beyond the bin edges defined by self.ebins in order to gather enough events to successfully apply VBWKDE. Parameters ---------- e_true : sequence MC-true neutrino energies, one per event cz_true : sequence MC-true neutrino coszen, one per event e_reco : sequence Reconstructed neutrino energies, one per event cz_reco : sequence Reconstructed neutrino coszen, one per event Returns ------- kernel4d : 4D array of float Mapping from the number of events in each bin of the 2D MC-true-events histogram to the number of events reconstructed in each bin of the 2D reconstructed-events histogram. Dimensions are len(self.ebins)-1 x len(self.czbins)-1 x len(self.ebins)-1 x len(self.czbins)-1 since ebins and czbins define the histograms' bin edges. """ OVERFIT_FACTOR = 1.0 assert np.min(np.diff(self.ebins)) > 0, \ "Energy bin edges not monotonically increasing." assert np.min(np.diff(self.czbins)) > 0, \ "coszen bin edges not monotonically increasing." # NOTE: below defines bin centers on linear scale; other logic # in this method assumes this to be the case, so # **DO NOT USE** utils.utils.get_bin_centers in this method, which # may return logarithmically-defined centers instead. ebin_edges = np.array(self.ebins) left_ebin_edges = ebin_edges[0:-1] right_ebin_edges = ebin_edges[1:] ebin_centers = (left_ebin_edges+right_ebin_edges)/2.0 n_ebins = len(ebin_centers) czbin_edges = np.array(self.czbins) left_czbin_edges = czbin_edges[0:-1] right_czbin_edges = czbin_edges[1:] czbin_centers = (left_czbin_edges+right_czbin_edges)/2.0 n_czbins = len(czbin_centers) n_events = len(e_true) if self.MIN_NUM_EVENTS > n_events: self.MIN_NUM_EVENTS = n_events if self.TGT_NUM_EVENTS > n_events: self.TGT_NUM_EVENTS = n_events # Object with which to store the 4D kernels: np 4D array kernel4d = np.zeros((n_ebins, n_czbins, n_ebins, n_czbins)) # Object with which to store the 2D "aggregate_map": the total number # of events reconstructed into a given (E, CZ) bin, used for sanity # checks aggregate_map = np.zeros((n_ebins, n_czbins)) for ebin_n in range(n_ebins): ebin_min = left_ebin_edges[ebin_n] ebin_max = right_ebin_edges[ebin_n] ebin_mid = (ebin_min+ebin_max)/2.0 ebin_wid = ebin_max-ebin_min logging.trace( ' processing true-energy bin_n=' + str(ebin_n) + ' of ' + str(n_ebins-1) + ', E_{nu,true} in ' + '[' + str(ebin_min) + ', ' + str(ebin_max) + '] ...' ) # Absolute distance from these events' re-centered reco energies to # the center of this energy bin; sort in ascending-distance order abs_enu_dist = sorted(np.abs(e_true - ebin_mid)) # Grab the distance the number-"TGT_NUM_EVENTS" event is from the # bin center tgt_thresh_enu_dist = abs_enu_dist[self.TGT_NUM_EVENTS-1] # Grab the distance the number-"MIN_NUM_EVENTS" event is from the # bin center min_thresh_enu_dist = abs_enu_dist[self.MIN_NUM_EVENTS-1] # TODO: revisit the below algorithm with proper testing # Make threshold distance (which is half the total width) no more # than 4x the true-energy-bin width in order to capture the # "target" number of points (TGT_NUM_EVENTS) but no less than half # the bin width (i.e., the bin should be at least be as wide as the # pre-defined bin width). # # HOWEVER, allow the threshold distance (bin half-width) to expand # to as much as 4x the original bin full-width in order to capture # the "minimum" number of points (MIN_NUM_EVENTS). thresh_enu_dist = \ max(min(max(tgt_thresh_enu_dist, ebin_wid/2), 4*ebin_wid), min_thresh_enu_dist) # Grab all events within the threshold distance in_ebin_ind = np.where(abs_enu_dist <= thresh_enu_dist)[0] n_in_bin = len(in_ebin_ind) # Extract just the neutrino-energy/coszen error columns' values for # succinctness enu_err = e_reco[in_ebin_ind] - e_true[in_ebin_ind] cz_err = cz_reco[in_ebin_ind] - cz_true[in_ebin_ind] #================================================================== # Neutrino energy resolutions #================================================================== dmin = min(enu_err) dmax = max(enu_err) drange = dmax-dmin e_lowerlim = min(self.ENERGY_RANGE[0]-ebin_mid*1.5, dmin-drange*0.5) e_upperlim = max((np.max(ebin_edges)-ebin_mid)*1.5, dmax+drange*0.5) egy_kde_lims = np.array([e_lowerlim, e_upperlim]) # Use at least min_num_pts points and at most the next-highest # integer-power-of-two that allows for at least 10 points in the # smallest energy bin min_num_pts = 2**12 min_bin_width = np.min(ebin_edges) min_pts_smallest_bin = 10.0 kde_range = np.diff(egy_kde_lims) num_pts0 = kde_range/(min_bin_width/min_pts_smallest_bin) kde_num_pts = int(max(2**10, 2**np.ceil(np.log2(num_pts0)))) logging.debug( ' Nevts=' + str(n_in_bin) + ' taken from [' + str(ebin_mid-thresh_enu_dist) + ', ' + str(ebin_mid+thresh_enu_dist) + ']' + ', KDE lims=' + str(kde_range) + ', KDE_N: ' + str(kde_num_pts) ) # Compute variable-bandwidth KDEs enu_bw, enu_mesh, enu_pdf = kde.vbw_kde( data = enu_err, overfit_factor = OVERFIT_FACTOR, MIN = egy_kde_lims[0], MAX = egy_kde_lims[1], N = kde_num_pts ) if np.min(enu_pdf) < 0: # Only issue warning if the most-negative value is negative # beyond specified acceptable-numerical-precision threshold # (EPSILON) if np.min(enu_pdf) <= -self.EPSILON: logging.warn( "np.min(enu_pdf) < 0: Minimum value is " + str(np.min(enu_pdf)) + "; forcing all negative values to 0." ) # Otherwise, just quietly clip any negative values at 0 enu_pdf = np.clip(a=enu_pdf, a_min=0, a_max=np.inf) assert np.min(enu_pdf) >= 0, str(np.min(enu_pdf)) # Re-center distribution at the center of the energy bin for which # errors were computed offset_enu_mesh = enu_mesh+ebin_mid offset_enu_pdf = enu_pdf # Get reference area under the PDF, for checking after interpolated # values are added. # # NOTE There should be NO normalization because any events lost due # to cutting off tails outside the binned region are actually going # to be lost, and so should penalize the total area. int_val0 = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh) # Create linear interpolator for the PDF interp = interpolate.interp1d( x = offset_enu_mesh, y = offset_enu_pdf, kind = 'linear', copy = True, bounds_error = True, fill_value = np.nan ) # Insert all bin edges' exact locations into the mesh (For accurate # accounting of area in each bin, must include values out to bin # edges) edge_locs = [be for be in np.concatenate((left_ebin_edges, right_ebin_edges)) if not(be in offset_enu_mesh)] edge_locs.sort() edge_pdfs = interp(edge_locs) insert_ind = np.searchsorted(offset_enu_mesh, edge_locs) offset_enu_mesh = np.insert(offset_enu_mesh, insert_ind, edge_locs) offset_enu_pdf = np.insert(offset_enu_pdf, insert_ind, edge_pdfs) int_val = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh) assert np.abs(int_val - int_val0) < self.EPSILON # Chop off distribution at extrema of energy bins valid_ind = np.where( (offset_enu_mesh >= np.min(ebin_edges)) & (offset_enu_mesh <= np.max(ebin_edges)) )[0] offset_enu_mesh = offset_enu_mesh[valid_ind] offset_enu_pdf = offset_enu_pdf[valid_ind] # Check that there are no negative density values (after inserts) assert np.min(offset_enu_pdf) > 0-self.EPSILON, \ str(np.min(offset_enu_pdf)) # Record the integrated area after removing parts outside binned # range tot_ebin_area0 = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh) # Check that it integrates to <= 1, sanity check assert tot_ebin_area0 < 1+self.EPSILON, str(tot_ebin_area0) # Identify indices encapsulating the defined energy bins' ranges, # and find the area of each bin lbinds = np.searchsorted(offset_enu_mesh, left_ebin_edges) rbinds = np.searchsorted(offset_enu_mesh, right_ebin_edges) bininds = zip(lbinds, rbinds) ebin_areas = [np.trapz(y=offset_enu_pdf[l:r+1], x=offset_enu_mesh[l:r+1]) for (l, r) in bininds] # Check that no bins have negative areas assert np.min(ebin_areas) >= 0 # Sum the individual bins' areas tot_ebin_area = np.sum(ebin_areas) # Check that this total of all the bins is equal to the total area # under the curve (i.e., make sure there is no overlap or gaps # between bins) assert np.abs(tot_ebin_area-tot_ebin_area0) < self.EPSILON, \ 'tot_ebin_area=' + str(tot_ebin_area) + \ ' should equal tot_ebin_area0=' + str(tot_ebin_area0) #================================================================== # Neutrino coszen resolutions #================================================================== dmin = min(cz_err) dmax = max(cz_err) drange = dmax-dmin # NOTE the limits are 1 less than / 1 greater than the limits that # the error will actually take on, so as to allow for any smooth # roll-off at edges of data. The calculation of areas below # captures all of the area, though, by reflecting bins defined in # [-1, 1] about the points -1 and 1, thereby capturing any # densities in the range [-3, +3]. This is not necessarily # accurate, but it's better than throwing that info out entirely. # # NOTE also that since reco events as of now are only in range -1 # to 0, though, that there are "gaps" in the capture range, but # this is due to densities being in the upper-hemisphere which we # are intentionally ignoring, rather than the code here not taking # them into account. Normalization is based upon *all* events, # whether or not they fall within a bin specified above. # Number of points in the mesh used for VBWKDE; must be large # enough to capture fast changes in the data but the larger the # number, the longer it takes to compute the densities at all the # points. Here, just choosing a fixed number regardless of the data # or binning N_cz_mesh = 2**10 # Data range for VBWKDE to consider cz_gaus_kde_min = -3 cz_gaus_kde_max = +2 cz_gaus_kde_failed = False previous_fail = False for n in xrange(3): # TODO: only catch specific exception try: cz_bw, cz_mesh, cz_pdf = kde.vbw_kde( data = cz_err, overfit_factor = OVERFIT_FACTOR, MIN = cz_gaus_kde_min, MAX = cz_gaus_kde_max, N = N_cz_mesh ) except: cz_gaus_kde_failed = True if n == 0: logging.trace('(cz vbwkde ') logging.trace('fail, ') # If failure occurred in vbw_kde, expand the data range it # takes into account; this usually helps cz_gaus_kde_min -= 1 cz_gaus_kde_max += 1 else: if cz_gaus_kde_failed: previous_fail = True logging.trace('success!') cz_gaus_kde_failed = False finally: if previous_fail: logging.trace(')') previous_fail = False if not cz_gaus_kde_failed: break if cz_gaus_kde_failed: logging.warn('Failed to fit VBWKDE!') continue if np.min(cz_pdf) < 0: logging.warn("np.min(cz_pdf) < 0: Minimum value is " + str(np.min(cz_pdf)) + "; forcing all negative values to 0.") np.clip(a=cz_mesh, a_min=0, a_max=np.inf) assert np.min(cz_pdf) >= -self.EPSILON, \ str(np.min(cz_pdf)) for czbin_n in range(n_czbins): czbin_mid = czbin_centers[czbin_n] # Re-center distribution at the center of the current cz bin offset_cz_mesh = cz_mesh + czbin_mid # Create interpolation object, used to fill in bin edge values interp = interpolate.interp1d( x = offset_cz_mesh, y = cz_pdf, kind = 'linear', copy = True, bounds_error = False, fill_value = 0 ) # Figure out where all bin edges lie in this re-centered # distribution (some bins may be repeated since bins in [-1,0] # and err in [-2,1]: # # 1. Find limits of mesh values.. mmin = offset_cz_mesh[0] mmax = offset_cz_mesh[-1] # 2. Map all bin edges into the full mesh-value range, # reflecting about -1 and +1. If the reflected edge is outside # the mesh range, use the exceeded limit of the mesh range as # the bin edge instead. # # This maps every bin edge {i} to 3 new edges, indexed # new_edges[i][{0,1,2}]. Bins are formed by adjacent indices # and same-subindices, so what started as, e.g., bin 3 now is # described by (left, right) edges at # (new_edges[3][0], new_edges[4][0]), # (new_edges[3][1], new_edges[4][1]), and # (new_edges[3][2], new_edges[4][2]). # NOTE / TODO: It's tempting to dynamically set the number of # reflections to minimize computation time, but I think it # breaks the code. Just set to a reasonably large number for # now and accept the performance penalty. ALSO: if you change # the parity of the number of reflections, the code below that # has either (wrap_n % 2 == 0) or (wrap_n+1 % 2 == 0) must be # swapped!!! n_left_reflections = 4 n_right_reflections = 4 new_czbin_edges = [] for edge in czbin_edges: edges_refl_left = [] for n in xrange(n_left_reflections): edge_refl_left = reflect1d(edge, -1-(2*n)) if edge_refl_left < mmin: edge_refl_left = mmin edges_refl_left.append(edge_refl_left) edges_refl_right = [] for n in xrange(n_right_reflections): edge_refl_right = reflect1d(edge, +1+(2*n)) if edge_refl_right > mmax: edge_refl_right = mmax edges_refl_right.append(edge_refl_right) # Include all left-reflected versions of this bin edge, in # increasing-x order + this bin edge + right-reflected # versions of this bin edge new_czbin_edges.append(edges_refl_left[::-1] + [edge] + edges_refl_right) # Record all unique bin edges edge_locs = set() [edge_locs.update(edges) for edges in new_czbin_edges] # Throw away bin edges that are already in the mesh [edge_locs.remove(edge) for edge in list(edge_locs) if edge in offset_cz_mesh] # Make into sorted list edge_locs = sorted(edge_locs) # Record the total area under the curve int_val0 = np.trapz(y=cz_pdf, x=offset_cz_mesh) # Insert the missing bin edge locations & pdf-values into # the mesh & pdf, respectively edge_pdfs = interp(edge_locs) insert_ind = np.searchsorted(offset_cz_mesh, edge_locs) offset_cz_mesh = np.insert(offset_cz_mesh, insert_ind, edge_locs) offset_cz_pdf = np.insert(cz_pdf, insert_ind, edge_pdfs) assert np.min(offset_cz_pdf) > -self.EPSILON # Check that this total of all the bins is equal to the total # area under the curve (i.e., check there is no overlap between # or gaps between bins) int_val = np.trapz(y=offset_cz_pdf, x=offset_cz_mesh) assert np.abs(int_val-1) < self.EPSILON # Renormalize if it's not exactly 1 if int_val != 1.0: offset_cz_pdf = offset_cz_pdf / int_val # Add up the area in the bin and areas that are "reflected" # into this bin new_czbin_edges = np.array(new_czbin_edges) czbin_areas = np.zeros(np.shape(new_czbin_edges)[0]-1) for wrap_n in range(np.shape(new_czbin_edges)[1]): bin_edge_inds = np.searchsorted(offset_cz_mesh, new_czbin_edges[:,wrap_n]) lbinds = bin_edge_inds[0:-1] rbinds = bin_edge_inds[1:] # Make sure indices that appear first are less than indices # that appear second in a pair of bin indices if (wrap_n+1) % 2 == 0: bininds = zip(rbinds, lbinds) else: bininds = zip(lbinds, rbinds) tmp_areas = [] for (binind_left_edge, binind_right_edge) in bininds: if binind_left_edge == binind_right_edge: tmp_areas.append(0) continue this_bin_area = np.array(np.trapz( y=offset_cz_pdf[binind_left_edge:binind_right_edge+1], x=offset_cz_mesh[binind_left_edge:binind_right_edge+1] )) tmp_areas.append(this_bin_area) czbin_areas += np.array(tmp_areas) assert np.min(czbin_areas) > -self.EPSILON tot_czbin_area = np.sum(czbin_areas) assert tot_czbin_area < int_val + self.EPSILON kernel4d[ebin_n, czbin_n] = np.outer(ebin_areas, czbin_areas) assert (np.sum(kernel4d[ebin_n, czbin_n]) - tot_ebin_area*tot_czbin_area) < self.EPSILON check_areas = kernel4d.sum(axis=(2,3)) assert np.max(check_areas) < 1 + self.EPSILON, str(np.max(check_areas)) assert np.min(check_areas) > 0 - self.EPSILON, str(np.min(check_areas)) return kernel4d
asimov_data_set = get_asimov_fmap(template_maker, asimov_params, channel=asimov_params['channel']) # Store injected true values in result: for key in free_params.keys(): if 'theta23' in key: continue result['true_' + key].append(asimov_params[key]) result['true_theta23'].append(step) result['asimov_data'].append(asimov_data_set) # now get fitted values of opposite hierarchy: hypo_normal = False if true_normal else True hypo_tag = 'hypo_IMH' if true_normal else 'hypo_NMH' llh_data = find_alt_hierarchy_fit(asimov_data_set, template_maker, params, hypo_normal, minimizer_settings, only_atm_params=False, check_octant=args.check_octant) for key in free_params.keys(): result['fit_' + key].append(llh_data[key][-1]) results[true_tag] = result logging.warn("FINISHED. Saving to file: %s" % args.outfile) to_json(results, args.outfile)
def plot_cmp(new, ref, new_label, ref_label, plot_label, file_label, outdir, ftype='png'): """Plot comparisons between two (identically-binned) maps or map sets. Parameters ---------- new : Map or MapSet ref : Map or MapSet new_label : str ref_label : str plot_label : str file_label : str outdir : str ftype : str """ path = [outdir] if isinstance(ref, Map): assert isinstance(new, Map) ref_maps = [ref] new_maps = [new] if outdir is not None: mkdir(os.path.join(*path), warn=False) for ref, new in zip(ref_maps, new_maps): assert ref.binning == new.binning fname = get_valid_filename('__'.join([ get_valid_filename(file_label), '%s_vs_%s' % (get_valid_filename(new_label.lower()), get_valid_filename(ref_label.lower())) ]) + '.' + ftype) path.append(fname) ratio = new / ref diff = new - ref fract_diff = diff / ref finite_ratio = ratio.hist[np.isfinite(ratio.hist)] ratio_mean = np.mean(finite_ratio) ratio_median = np.median(finite_ratio) finite_diff = diff.hist[np.isfinite(diff.hist)] diff_mean = np.mean(finite_diff) diff_median = np.median(finite_diff) finite_fract_diff = fract_diff.hist[np.isfinite(fract_diff.hist)] fract_diff_mean = np.mean(finite_fract_diff) fract_diff_median = np.median(finite_fract_diff) max_diff_ratio = np.nanmax(fract_diff.hist) # Handle cases where ratio returns infinite # This isn't necessarily a fail, since all it means is the referene was # zero. If the new value is sufficiently close to zero then it's stil # fine. if max_diff_ratio == np.inf: logging.warn( 'Infinite value found in ratio tests. Difference tests' ' now also being calculated') # First find all the finite elements finite_mask = np.isfinite(fract_diff.hist) # Then find the nanmax of this, will be our new test value max_diff_ratio = np.nanmax(fract_diff.hist[finite_mask]) # Also find all the infinite elements; compute a second test value max_diff = np.nanmax(diff.hist[~finite_mask]) else: # Without any infinite elements we can ignore this second test max_diff = 0.0 if outdir is not None: if new.binning.num_dims == 2: n_dims = 2 n_third_dim_bins = 1 elif new.binning.num_dims == 3: n_dims = 3 odd_dim_idx = new.binning.shape.index(np.min( new.binning.shape)) logging.debug('odd_dim_idx: %s', odd_dim_idx) n_third_dim_bins = new.binning.shape[odd_dim_idx] gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32) fig, axes = plt.subplots(nrows=n_third_dim_bins, ncols=5, gridspec_kw=gridspec_kw, squeeze=False, sharex=False, sharey=False, figsize=(20, 5)) refslice = ref newslice = new bin_names = None if n_dims == 3: if odd_dim_idx != 0: refslice = np.moveaxis(ref, source=odd_dim_idx, destination=0) newslice = np.moveaxis(new, source=odd_dim_idx, destination=0) bin_names = new.binning.dims[odd_dim_idx].bin_names for odd_bin_idx in range(n_third_dim_bins): if n_dims == 2: thisbin_ref = refslice thisbin_new = newslice tmp_ref_label = ref_label tmp_new_label = new_label elif n_dims == 3: thisbin_ref = refslice[odd_bin_idx, ...].squeeze() thisbin_new = newslice[odd_bin_idx, ...].squeeze() if bin_names is not None: suffix = bin_names[odd_bin_idx] else: suffix = format(odd_bin_idx, 'd') tmp_new_label = new_label + ' ' + suffix tmp_ref_label = ref_label + ' ' + suffix ratio = thisbin_new / thisbin_ref diff = thisbin_new - thisbin_ref fract_diff = diff / thisbin_ref refmax = np.nanmax(thisbin_ref.hist) newmax = np.nanmax(thisbin_new.hist) vmax = refmax if refmax > newmax else newmax baseplot2(map=thisbin_new, title=tmp_new_label, vmax=vmax, evtrate=True, ax=axes[odd_bin_idx][0]) baseplot2(map=thisbin_ref, title=tmp_ref_label, vmax=vmax, evtrate=True, ax=axes[odd_bin_idx][1]) ax, _, _ = baseplot2(map=ratio, title='%s/%s' % (tmp_new_label, tmp_ref_label), ax=axes[odd_bin_idx][2]) ax.text(0.95, 0.95, "Mean: %.6f" % ratio_mean, horizontalalignment='right', transform=ax.transAxes, color=(0, 0.8, 0.8)) ax.text(0.95, 0.91, "Median: %.6f" % ratio_median, horizontalalignment='right', transform=ax.transAxes, color=(0, 0.8, 0.8)) ax, _, _ = baseplot2(map=diff, title='%s-%s' % (tmp_new_label, tmp_ref_label), symm=True, ax=axes[odd_bin_idx][3]) ax.text(0.95, 0.95, "Mean: %.6f" % diff_mean, horizontalalignment='right', transform=ax.transAxes) ax.text(0.95, 0.91, "Median: %.6f" % diff_median, horizontalalignment='right', transform=ax.transAxes) ax, _, _ = baseplot2( map=fract_diff, title='(%s-%s)/%s' % (tmp_new_label, tmp_ref_label, tmp_ref_label), symm=True, ax=axes[odd_bin_idx][4]) ax.text(0.95, 0.95, "Mean: %.6f" % fract_diff_mean, horizontalalignment='right', transform=ax.transAxes) ax.text(0.95, 0.91, "Median: %.6f" % fract_diff_median, horizontalalignment='right', transform=ax.transAxes) logging.debug('>>>> Plot for inspection saved at %s' % os.path.join(*path)) fig.savefig(os.path.join(*path)) plt.close(fig.number) return max_diff_ratio, max_diff
set_verbosity(args.verbose) data_files = {'nue':args.nue,'numu':args.numu,'nutau':args.nutau} logging.info("input files:\n%s"%data_files) # Ensure overwrite of existing filename... outfilename = args.outfile fh = h5py.File(outfilename,'w') fh.close() logging.info("Writing to file: %s",outfilename) # Define V3, V4, or V5 cuts: cut_list = [] if args.cutsV3: logging.warn("Using cuts V3...") cut_list.append(('NewestBgRejCutsStep1','value',True)) cut_list.append(('NewestBgRejCutsStep2','value',True)) elif args.cutsV4: logging.warn("Using cuts V4...") cut_list.append(('Cuts_V4_Step1','value',True)) cut_list.append(('Cuts_V4_Step2','value',True)) elif args.cutsV5: logging.warn("Using cuts V5...") cut_list.append(('Cuts_V5_Step1','value',True)) cut_list.append(('Cuts_V5_Step2','value',True)) elif args.nocuts: logging.warn("Using no selection cuts!") cut_list = [] elif args.custom: logging.warn("Using CUSTOM cuts: %s..."%args.custom_str)
def get_hypersurface(self, **param_kw): """ Get a Hypersurface object with interpolated coefficients. Parameters ---------- **param_kw Parameters are given as keyword arguments, where the names of the arguments must match the names of the parameters over which the hypersurfaces are interpolated. The values are given as :obj:`Quantity` objects with units. """ assert set(param_kw.keys()) == set( self.interp_param_spec.keys()), "invalid parameters" # getting param magnitudes in the same units as the parameter specification x = np.array([ param_kw[p].m_as(self.interp_param_spec[p]["values"][0].u) # we have checked that this is an OrderedDict so that the order of x is not # ambiguous here for p in self.interp_param_spec.keys() ]) assert len(x) == len(self.param_bounds) for i, bounds in enumerate(self.param_bounds): x[i] = np.clip(x[i], *bounds) # if a parameter scales as log, we have to take the log here again for i, param_name in enumerate(self.interpolation_param_names): if self.interp_param_spec[param_name]["scales_log"]: # We must be strict with raising errors here, because otherwise # the Hypersurface will suddenly have NaNs everywhere! This shouldn't # happen because we clip values into the valid parameter range. if x[i] <= 0: raise RuntimeError( "A log-scaling parameter cannot become zero " "or negative!") x[i] = np.log10(x[i]) state = copy.deepcopy(self._reference_state) # fit covariance matrices are stored directly in the state while fit coeffts # must be assigned with the setter method... # need squeeze here because the RegularGridInterpolator always puts another # dimension around the output state["fit_cov_mat"] = np.squeeze(self.covars(x)) assert state["fit_cov_mat"].shape == self.covars_shape for idx in np.ndindex(state['fit_cov_mat'].shape): if self.ignore_nan: continue assert np.isfinite(state['fit_cov_mat'][idx]), ( "invalid cov matrix " f"element encountered at {param_kw} in loc {idx}") # check covariance matrices for symmetry, positive semi-definiteness for bin_idx in np.ndindex(state['fit_cov_mat'].shape[:-2]): m = state['fit_cov_mat'][bin_idx] if self.ignore_nan and np.any(~np.isfinite(m)): state['fit_cov_mat'][bin_idx] = np.identity(m.shape[0]) m = state['fit_cov_mat'][bin_idx] assert np.allclose( m, m.T, rtol=ALLCLOSE_KW['rtol'] * 10.), f'cov matrix not symmetric in bin {bin_idx}' if not matrix.is_psd(m): state['fit_cov_mat'][bin_idx] = matrix.fronebius_nearest_psd(m) if not bin_idx in self.covar_bins_warning_issued: logging.warn( f'Invalid covariance matrix fixed in bin: {bin_idx}') self.covar_bins_warning_issued.append(bin_idx) hypersurface = Hypersurface.from_state(state) coeffts = np.squeeze(self.coefficients(x)) # calls interpolator assert coeffts.shape == self.coeff_shape # check that coefficients exist and if not replace with default values for idx in np.ndindex(self.coeff_shape): if self.ignore_nan and ~np.isfinite(coeffts[idx]): coeffts[idx] = 1 if idx[ -1] == 0 else 0 # set intercept to 1, slopes 0 assert np.isfinite(coeffts[idx]), ("invalid coeff encountered at " f"{param_kw} in loc {idx}") # the setter method defined in the Hypersurface class takes care of # putting the coefficients in the right place in their respective parameters hypersurface.fit_coeffts = coeffts return hypersurface
def single_kernel_set(self, e_true, cz_true, e_reco, cz_reco, flav, int_type, make_plots=False, out_dir=None): """Construct a 4D kernel set from MC events using VBWKDE. Given a set of MC events and each of their {energy{true, reco}, coszen{true, reco}}, generate a 4D NumPy array that maps a 2D true-flux histogram onto the corresponding 2D reco-flux histogram. The resulting 4D array can be indexed logically using kernel4d[e_true_i, cz_true_j][e_reco_k, cz_reco_l] where the 4 indices point from a single MC-true histogram bin (i,j) to a single reco histogram bin (k,l). Binning of both MC-true and reco histograms is the same and is given by the values in self.ebins and self.czbins which define the bin *edges* (not the bin centers; hence, len(self.ebins) is one greater than the number of bins, etc.). NOTE: Actual limits in energy used to group events into a single "true" bin may be extended beyond the bin edges defined by self.ebins in order to gather enough events to successfully apply VBWKDE. Parameters ---------- e_true : sequence MC-true neutrino energies, one per event cz_true : sequence MC-true neutrino coszen, one per event e_reco : sequence Reconstructed neutrino energies, one per event cz_reco : sequence Reconstructed neutrino coszen, one per event flav : str int_type : str make_plots : bool out_dir : str or None path to directory into which to save plots. ``None`` (default) saves to PWD. Returns ------- kernel4d : 4D array of float Mapping from the number of events in each bin of the 2D MC-true-events histogram to the number of events reconstructed in each bin of the 2D reconstructed-events histogram. Dimensions are len(self.ebins)-1 x len(self.czbins)-1 x len(self.ebins)-1 x len(self.czbins)-1 since ebins and czbins define the histograms' bin edges. """ OVERFIT_FACTOR = 1.0 if make_plots: import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages from matplotlib.patches import Rectangle plt.close(1) plt.close(2) plt.close(3) def rugplot(a, y0, dy, ax, **kwargs): return ax.plot([a,a], [y0, y0+dy], **kwargs) plot_fname = '_'.join(['resolutions', 'vbwkde', flav, int_type]) + '.pdf' if out_dir is not None: plot_fname = os.path.join(out_dir, plot_fname) TOP = 0.925 BOTTOM = 0.05 RIGHT = 0.97 LEFT = 0.07 HSPACE = 0.12 LABELPAD = 0.058 AXISBG = (0.5, 0.5, 0.5) DARK_RED = (0.7, 0.0, 0.0) HIST_PP = dict( facecolor=(1,0.5,0.5), edgecolor=DARK_RED, histtype='stepfilled', alpha=0.7, linewidth=2.0, label=r'$\mathrm{Histogram}$' ) N_HBINS = 25 DIFFUS_PP = dict( color=(0.0, 0.0, 0.0), linestyle='-', marker=None, alpha=0.6, linewidth=2.0, label=r'$\mathrm{VBWKDE}$' ) RUG_PP = dict(color=(1.0, 1.0, 1.0), linewidth=0.4, alpha=0.5) RUG_LAB =r'$\mathrm{Rug\,plot}$' LEGFNTCOL = (1,1,1) LEGFACECOL = (0.2,0.2,0.2) GRIDCOL = (0.4, 0.4, 0.4) pdfpgs = PdfPages(plot_fname) assert np.min(np.diff(self.ebins)) > 0, \ "Energy bin edges not monotonically increasing." assert np.min(np.diff(self.czbins)) > 0, \ "coszen bin edges not monotonically increasing." # NOTE: below defines bin centers on linear scale; other logic # in this method assumes this to be the case, so # **DO NOT USE** utils.utils.get_bin_centers in this method, which # may return logarithmically-defined centers instead. ebin_edges = np.array(self.ebins) left_ebin_edges = ebin_edges[0:-1] right_ebin_edges = ebin_edges[1:] ebin_centers = (left_ebin_edges+right_ebin_edges)/2.0 ebin_range = ebin_edges[-1] - ebin_edges[0] n_ebins = len(ebin_centers) czbin_edges = np.array(self.czbins) left_czbin_edges = czbin_edges[0:-1] right_czbin_edges = czbin_edges[1:] czbin_centers = (left_czbin_edges+right_czbin_edges)/2.0 n_czbins = len(czbin_centers) n_events = len(e_true) if self.MIN_NUM_EVENTS > n_events: self.MIN_NUM_EVENTS = n_events if self.TGT_NUM_EVENTS > n_events: self.TGT_NUM_EVENTS = n_events # Object with which to store the 4D kernels: np 4D array kernel4d = np.zeros((n_ebins, n_czbins, n_ebins, n_czbins)) # Object with which to store the 2D "aggregate_map": the total number # of events reconstructed into a given (E, CZ) bin, used for sanity # checks aggregate_map = np.zeros((n_ebins, n_czbins)) for ebin_n in range(n_ebins): ebin_min = left_ebin_edges[ebin_n] ebin_max = right_ebin_edges[ebin_n] ebin_mid = (ebin_min+ebin_max)/2.0 ebin_wid = ebin_max-ebin_min logging.debug( 'Processing true-energy bin_n=' + format(ebin_n, 'd') + ' of ' + format(n_ebins-1, 'd') + ', E_{nu,true} in ' + '[' + format(ebin_min, '0.3f') + ', ' + format(ebin_max, '0.3f') + '] ...' ) # Absolute distance from these events' re-centered reco energies to # the center of this energy bin; sort in ascending-distance order abs_enu_dist = np.abs(e_true - ebin_mid) sorted_abs_enu_dist = np.sort(abs_enu_dist) # Grab the distance the number-"TGT_NUM_EVENTS" event is from the # bin center tgt_thresh_enu_dist = sorted_abs_enu_dist[self.TGT_NUM_EVENTS-1] # Grab the distance the number-"MIN_NUM_EVENTS" event is from the # bin center min_thresh_enu_dist = sorted_abs_enu_dist[self.MIN_NUM_EVENTS-1] # TODO: revisit the below algorithm with proper testing # Make threshold distance (which is half the total width) no more # than 4x the true-energy-bin width in order to capture the # "target" number of points (TGT_NUM_EVENTS) but no less than half # the bin width (i.e., the bin should be at least be as wide as the # pre-defined bin width). # # HOWEVER, allow the threshold distance (bin half-width) to expand # to as much as 4x the original bin full-width in order to capture # the "minimum" number of points (MIN_NUM_EVENTS). thresh_enu_dist = \ max(min(max(tgt_thresh_enu_dist, ebin_wid/2), 4*ebin_wid), min_thresh_enu_dist) # Grab all events within the threshold distance in_ebin_ind = np.where(abs_enu_dist <= thresh_enu_dist)[0] #print '** IN EBIN FIRST, LAST ENERGY:', e_reco[in_ebin_ind[0]], e_reco[in_ebin_ind[-1]] n_in_bin = len(in_ebin_ind) # Record lowest/highest energies that are included in the bin actual_left_ebin_edge = min(ebin_min, min(e_true[in_ebin_ind])) #max(min(ebins), ebin_mid-thresh_enu_dist) actual_right_ebin_edge = max(ebin_max, max(e_true[in_ebin_ind])) #(max(ebins), ebin_mid+thresh_enu_dist) # Extract just the neutrino-energy/coszen error columns' values for # succinctness enu_err = e_reco[in_ebin_ind] - e_true[in_ebin_ind] cz_err = cz_reco[in_ebin_ind] - cz_true[in_ebin_ind] #================================================================== # Neutrino energy resolutions #================================================================== dmin = min(enu_err) dmax = max(enu_err) drange = dmax-dmin e_lowerlim = min(self.ENERGY_RANGE[0]-ebin_mid*1.5, dmin-drange*0.5) e_upperlim = max((np.max(ebin_edges)-ebin_mid)*1.5, dmax+drange*0.5) egy_kde_lims = np.array([e_lowerlim, e_upperlim]) # Use at least min_num_pts points and at most the next-highest # integer-power-of-two that allows for at least 10 points in the # smallest energy bin min_num_pts = 2**12 min_bin_width = np.min(ebin_edges[1:]-ebin_edges[:-1]) min_pts_smallest_bin = 5.0 kde_range = np.diff(egy_kde_lims) num_pts0 = kde_range/(min_bin_width/min_pts_smallest_bin) kde_num_pts = int(max(min_num_pts, 2**np.ceil(np.log2(num_pts0)))) logging.debug( ' N_evts=' + str(n_in_bin) + ', taken from [' + format(actual_left_ebin_edge, '0.3f') + ', ' + format(actual_right_ebin_edge, '0.3f') + ']' + ', VBWKDE lims=' + str(egy_kde_lims) + ', VBWKDE_N: ' + str(kde_num_pts) ) # Compute variable-bandwidth KDEs enu_bw, enu_mesh, enu_pdf = kde.vbw_kde( data = enu_err, overfit_factor = OVERFIT_FACTOR, MIN = egy_kde_lims[0], MAX = egy_kde_lims[1], N = kde_num_pts ) if np.min(enu_pdf) < 0: # Only issue warning if the most-negative value is negative # beyond specified acceptable-numerical-precision threshold # (EPSILON) if np.min(enu_pdf) <= -self.EPSILON: logging.warn( "np.min(enu_pdf) < 0: Minimum value is " + str(np.min(enu_pdf)) + "; forcing all negative values to 0." ) # Otherwise, just quietly clip any negative values at 0 enu_pdf = np.clip(a=enu_pdf, a_min=0, a_max=np.inf) assert np.min(enu_pdf) >= 0, str(np.min(enu_pdf)) # Re-center distribution at the center of the energy bin for which # errors were computed offset_enu_mesh = enu_mesh+ebin_mid offset_enu_pdf = enu_pdf # Get reference area under the PDF, for checking after interpolated # values are added. # # NOTE There should be NO normalization because any events lost due # to cutting off tails outside the binned region are actually going # to be lost, and so should penalize the total area. int_val0 = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh) # Create linear interpolator for the PDF interp = interpolate.interp1d( x = offset_enu_mesh, y = offset_enu_pdf, kind = 'linear', copy = True, bounds_error = True, fill_value = np.nan ) # Insert all bin edges' exact locations into the mesh (For accurate # accounting of area in each bin, must include values out to bin # edges) edge_locs = [be for be in np.concatenate((left_ebin_edges, right_ebin_edges)) if not(be in offset_enu_mesh)] edge_locs.sort() edge_pdfs = interp(edge_locs) insert_ind = np.searchsorted(offset_enu_mesh, edge_locs) offset_enu_mesh = np.insert(offset_enu_mesh, insert_ind, edge_locs) offset_enu_pdf = np.insert(offset_enu_pdf, insert_ind, edge_pdfs) int_val = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh) assert np.abs(int_val - int_val0) < self.EPSILON # Chop off distribution at extrema of energy bins valid_ind = np.where( (offset_enu_mesh >= np.min(ebin_edges)) & (offset_enu_mesh <= np.max(ebin_edges)) )[0] offset_enu_mesh = offset_enu_mesh[valid_ind] offset_enu_pdf = offset_enu_pdf[valid_ind] # Check that there are no negative density values (after inserts) assert np.min(offset_enu_pdf) > 0-self.EPSILON, \ str(np.min(offset_enu_pdf)) # Record the integrated area after removing parts outside binned # range tot_ebin_area0 = np.trapz(y=offset_enu_pdf, x=offset_enu_mesh) # Check that it integrates to <= 1, sanity check assert tot_ebin_area0 < 1+self.EPSILON, str(tot_ebin_area0) # Identify indices encapsulating the defined energy bins' ranges, # and find the area of each bin lbinds = np.searchsorted(offset_enu_mesh, left_ebin_edges) rbinds = np.searchsorted(offset_enu_mesh, right_ebin_edges) bininds = zip(lbinds, rbinds) ebin_areas = [np.trapz(y=offset_enu_pdf[l:r+1], x=offset_enu_mesh[l:r+1]) for (l, r) in bininds] # Check that no bins have negative areas assert np.min(ebin_areas) >= 0 # Sum the individual bins' areas tot_ebin_area = np.sum(ebin_areas) # Check that this total of all the bins is equal to the total area # under the curve (i.e., make sure there is no overlap or gaps # between bins) assert np.abs(tot_ebin_area-tot_ebin_area0) < self.EPSILON, \ 'tot_ebin_area=' + str(tot_ebin_area) + \ ' should equal tot_ebin_area0=' + str(tot_ebin_area0) if make_plots: fig1 = plt.figure(1, figsize=(8,10), dpi=90) fig1.clf() ax1 = fig1.add_subplot(211, axisbg=AXISBG) # Retrieve region where VBWKDE lives ml_ci = confInterval.MLConfInterval(x=enu_mesh, y=enu_pdf) #for conf in np.logspace(np.log10(0.999), np.log10(0.95), 50): # try: # lb, ub, yopt, r = ml_ci.findCI_lin(conf=conf) # except: # pass # else: # break #xlims = (min(-ebin_mid*1.5, lb), # max(min(ub, 6*ebin_mid),2*ebin_mid)) lb, ub, yopt, r = ml_ci.findCI_lin(conf=0.98) xlims = (lb, #min(-ebin_mid*1.5, lb), max(min(ub, 6*ebin_mid),2*ebin_wid)) #xlims = ( # -ebin_wid*1.5, # ebin_wid*1.5 #) # min(ebin_mid*2, ebin_edges[-1]+(ebin_edges[-1]-ebin_edges[0])*0.1) #) # Histogram of events' reco error hbins = np.linspace(dmin-0.02*drange, dmax+0.02*drange, N_HBINS*np.round(drange/ebin_centers[ebin_n])) hvals, hbins, hpatches = ax1.hist(enu_err, bins=hbins, normed=True, **HIST_PP) # Plot the VBWKDE ax1.plot(enu_mesh, enu_pdf, **DIFFUS_PP) axlims = ax1.axis('tight') ax1.set_xlim(xlims) ymax = axlims[3]*1.05 ax1.set_ylim(0, ymax) # Grey-out regions outside binned region, so it's clear what # part of tail(s) will be thrown away width = -ebin_mid+ebin_edges[0]-xlims[0] unbinned_region_tex = r'$\mathrm{Unbinned}$' if width > 0: ax1.add_patch(Rectangle((xlims[0],0), width, ymax, #zorder=-1, alpha=0.30, facecolor=(0.0 ,0.0, 0.0), fill=True, ec='none')) ax1.text(xlims[0]+(xlims[1]-xlims[0])/40., ymax/10., unbinned_region_tex, fontsize=14, ha='left', va='bottom', rotation=90, color='k') width = xlims[1] - (ebin_edges[-1]-ebin_mid) if width > 0: ax1.add_patch(Rectangle((xlims[1]-width,0), width, ymax, alpha=0.30, facecolor=(0, 0, 0), fill=True, ec='none')) ax1.text(xlims[1]-(xlims[1]-xlims[0])/40., ymax/10., unbinned_region_tex, fontsize=14, ha='right', va='bottom', rotation=90, color='k') # Rug plot of events' reco energy errors ylim = ax1.get_ylim() dy = ylim[1] - ylim[0] ruglines = rugplot(enu_err, y0=ylim[1], dy=-dy/40., ax=ax1, **RUG_PP) ruglines[-1].set_label(RUG_LAB) # Legend leg_title_tex = r'$\mathrm{Normalized}\,E_\nu\mathrm{-err.\,distr.}$' x1lab = ax1.set_xlabel( r'$E_{\nu,\mathrm{reco}}-E_{\nu,\mathrm{true}}\;' + r'(\mathrm{GeV})$', labelpad=LABELPAD ) leg = ax1.legend(loc='upper right', title=leg_title_tex, frameon=True, framealpha=0.8, fancybox=True, bbox_to_anchor=[1,0.975]) # Other plot details ax1.xaxis.set_label_coords(0.9, -LABELPAD) ax1.xaxis.grid(color=GRIDCOL) ax1.yaxis.grid(color=GRIDCOL) leg.get_title().set_fontsize(16) leg.get_title().set_color(LEGFNTCOL) [t.set_color(LEGFNTCOL) for t in leg.get_texts()] frame = leg.get_frame() frame.set_facecolor(LEGFACECOL) frame.set_edgecolor(None) #================================================================== # Neutrino coszen resolution for events in this energy bin #================================================================== dmin = min(cz_err) dmax = max(cz_err) drange = dmax-dmin # NOTE the limits are 1 less than / 1 greater than the limits that # the error will actually take on, so as to allow for any smooth # roll-off at edges of data. The calculation of areas below # captures all of the area, though, by reflecting bins defined in # [-1, 1] about the points -1 and 1, thereby capturing any # densities in the range [-3, +3]. This is not necessarily # accurate, but it's better than throwing that info out entirely. # # NOTE also that since reco events as of now are only in range -1 # to 0, though, that there are "gaps" in the capture range, but # this is due to densities being in the upper-hemisphere which we # are intentionally ignoring, rather than the code here not taking # them into account. Normalization is based upon *all* events, # whether or not they fall within a bin specified above. # Number of points in the mesh used for VBWKDE; must be large # enough to capture fast changes in the data but the larger the # number, the longer it takes to compute the densities at all the # points. Here, just choosing a fixed number regardless of the data # or binning N_cz_mesh = 2**10 # Data range for VBWKDE to consider cz_kde_min = -3 cz_kde_max = +2 cz_kde_failed = False previous_fail = False for n in xrange(3): # TODO: only catch specific exception try: cz_bw, cz_mesh, cz_pdf = kde.vbw_kde( data = cz_err, overfit_factor = OVERFIT_FACTOR, MIN = cz_kde_min, MAX = cz_kde_max, N = N_cz_mesh ) except: cz_kde_failed = True if n == 0: logging.trace('(cz vbwkde ') logging.trace('fail, ') # If failure occurred in vbw_kde, expand the data range it # takes into account; this usually helps cz_kde_min -= 1 cz_kde_max += 1 else: if cz_kde_failed: previous_fail = True logging.trace('success!') cz_kde_failed = False finally: if previous_fail: logging.trace(')') previous_fail = False if not cz_kde_failed: break if cz_kde_failed: logging.warn('Failed to fit VBWKDE!') continue if np.min(cz_pdf) < 0: logging.warn("np.min(cz_pdf) < 0: Minimum value is " + str(np.min(cz_pdf)) + "; forcing all negative values to 0.") np.clip(a=cz_mesh, a_min=0, a_max=np.inf) assert np.min(cz_pdf) >= -self.EPSILON, \ str(np.min(cz_pdf)) # TODO: test and/or visualize the shifting & re-binning process for czbin_n in range(n_czbins): czbin_mid = czbin_centers[czbin_n] # Re-center distribution at the center of the current cz bin offset_cz_mesh = cz_mesh + czbin_mid # Create interpolation object, used to fill in bin edge values interp = interpolate.interp1d( x = offset_cz_mesh, y = cz_pdf, kind = 'linear', copy = True, bounds_error = False, fill_value = 0 ) # Figure out where all bin edges lie in this re-centered # distribution (some bins may be repeated since bins in [-1,0] # and err in [-2,1]: # # 1. Find limits of mesh values.. mmin = offset_cz_mesh[0] mmax = offset_cz_mesh[-1] # 2. Map all bin edges into the full mesh-value range, # reflecting about -1 and +1. If the reflected edge is outside # the mesh range, use the exceeded limit of the mesh range as # the bin edge instead. # # This maps every bin edge {i} to 3 new edges, indexed # new_edges[i][{0,1,2}]. Bins are formed by adjacent indices # and same-subindices, so what started as, e.g., bin 3 now is # described by (left, right) edges at # (new_edges[3][0], new_edges[4][0]), # (new_edges[3][1], new_edges[4][1]), and # (new_edges[3][2], new_edges[4][2]). # NOTE / TODO: It's tempting to dynamically set the number of # reflections to minimize computation time, but I think it # breaks the code. Just set to a reasonably large number for # now and accept the performance penalty. ALSO: if you change # the parity of the number of reflections, the code below that # has either (wrap_n % 2 == 0) or (wrap_n+1 % 2 == 0) must be # swapped!!! n_left_reflections = 4 n_right_reflections = 4 new_czbin_edges = [] for edge in czbin_edges: edges_refl_left = [] for n in xrange(n_left_reflections): edge_refl_left = reflect1d(edge, -1-(2*n)) if edge_refl_left < mmin: edge_refl_left = mmin edges_refl_left.append(edge_refl_left) edges_refl_right = [] for n in xrange(n_right_reflections): edge_refl_right = reflect1d(edge, +1+(2*n)) if edge_refl_right > mmax: edge_refl_right = mmax edges_refl_right.append(edge_refl_right) # Include all left-reflected versions of this bin edge, in # increasing-x order + this bin edge + right-reflected # versions of this bin edge new_czbin_edges.append(edges_refl_left[::-1] + [edge] + edges_refl_right) # Record all unique bin edges edge_locs = set() [edge_locs.update(edges) for edges in new_czbin_edges] # Throw away bin edges that are already in the mesh [edge_locs.remove(edge) for edge in list(edge_locs) if edge in offset_cz_mesh] # Make into sorted list edge_locs = sorted(edge_locs) # Record the total area under the curve int_val0 = np.trapz(y=cz_pdf, x=offset_cz_mesh) # Insert the missing bin edge locations & pdf-values into # the mesh & pdf, respectively edge_pdfs = interp(edge_locs) insert_ind = np.searchsorted(offset_cz_mesh, edge_locs) offset_cz_mesh = np.insert(offset_cz_mesh, insert_ind, edge_locs) offset_cz_pdf = np.insert(cz_pdf, insert_ind, edge_pdfs) assert np.min(offset_cz_pdf) > -self.EPSILON # Check that this total of all the bins is equal to the total # area under the curve (i.e., check there is no overlap between # or gaps between bins) int_val = np.trapz(y=offset_cz_pdf, x=offset_cz_mesh) assert np.abs(int_val-1) < self.EPSILON # Renormalize if it's not exactly 1 if int_val != 1.0: offset_cz_pdf = offset_cz_pdf / int_val # Add up the area in the bin and areas that are "reflected" # into this bin new_czbin_edges = np.array(new_czbin_edges) czbin_areas = np.zeros(np.shape(new_czbin_edges)[0]-1) for wrap_n in range(np.shape(new_czbin_edges)[1]): bin_edge_inds = np.searchsorted(offset_cz_mesh, new_czbin_edges[:,wrap_n]) lbinds = bin_edge_inds[0:-1] rbinds = bin_edge_inds[1:] # Make sure indices that appear first are less than indices # that appear second in a pair of bin indices if (wrap_n+1) % 2 == 0: bininds = zip(rbinds, lbinds) else: bininds = zip(lbinds, rbinds) tmp_areas = [] for (binind_left_edge, binind_right_edge) in bininds: if binind_left_edge == binind_right_edge: tmp_areas.append(0) continue this_bin_area = np.array(np.trapz( y=offset_cz_pdf[binind_left_edge:binind_right_edge+1], x=offset_cz_mesh[binind_left_edge:binind_right_edge+1] )) tmp_areas.append(this_bin_area) czbin_areas += np.array(tmp_areas) assert np.min(czbin_areas) > -self.EPSILON tot_czbin_area = np.sum(czbin_areas) assert tot_czbin_area < int_val + self.EPSILON kernel4d[ebin_n, czbin_n] = np.outer(ebin_areas, czbin_areas) assert (np.sum(kernel4d[ebin_n, czbin_n]) - tot_ebin_area*tot_czbin_area) < self.EPSILON if make_plots: ax2 = fig1.add_subplot(212, axisbg=AXISBG) hbins = np.linspace(dmin-0.02*drange, dmax+0.02*drange, N_HBINS*3) hvals, hbins, hpatches = ax2.hist(cz_err, bins=hbins, normed=True, **HIST_PP) ax2.plot(cz_mesh, cz_pdf, **DIFFUS_PP) fci = confInterval.MLConfInterval(x=cz_mesh, y=cz_pdf) lb, ub, yopt, r = fci.findCI_lin(conf=0.995) axlims = ax2.axis('tight') ax2.set_xlim(lb, ub) ax2.set_ylim(0, axlims[3]*1.05) ylim = ax2.get_ylim() dy = ylim[1] - ylim[0] ruglines = rugplot(cz_err, y0=ylim[1], dy=-dy/40., ax=ax2, **RUG_PP) ruglines[-1].set_label(r'$\mathrm{Rug\,plot}$') x2lab = ax2.set_xlabel( r'$\cos\vartheta_{\mathrm{track,reco}}-\cos\vartheta_{\nu,\mathrm{true}}$', labelpad=LABELPAD ) ax2.xaxis.set_label_coords(0.9, -LABELPAD) ax2.xaxis.grid(color=GRIDCOL) ax2.yaxis.grid(color=GRIDCOL) leg_title_tex = r'$\mathrm{Normalized}\,\cos\vartheta\mathrm{-err.\,distr.}$' leg = ax2.legend(loc='upper right', title=leg_title_tex, frameon=True, framealpha=0.8, fancybox=True, bbox_to_anchor=[1,0.975]) leg.get_title().set_fontsize(16) leg.get_title().set_color(LEGFNTCOL) [t.set_color(LEGFNTCOL) for t in leg.get_texts()] frame = leg.get_frame() frame.set_facecolor(LEGFACECOL) frame.set_edgecolor(None) actual_bin_tex = '' if (actual_left_ebin_edge != ebin_min) or (actual_right_ebin_edge != ebin_max): actual_bin_tex = r'E_{\nu,\mathrm{true}}\in [' + \ format(actual_left_ebin_edge, '0.2f') + r',\,' + \ format(actual_right_ebin_edge, '0.2f') + r'] \mapsto ' stt = r'$\mathrm{Resolutions,\,' + flav_tex(flav) + r'\,' + \ int_tex(int_type) + r'}$' + '\n' + \ r'$' + actual_bin_tex + r'\mathrm{Bin}_{' + format(ebin_n, 'd') + r'}\equiv E_{\nu,\mathrm{true}}\in [' + format(ebin_min, '0.2f') + \ r',\,' + format(ebin_max, '0.2f') + r']\,\mathrm{GeV}' + \ r',\,N_\mathrm{events}=' + format(n_in_bin, 'd') + r'$' fig1.subplots_adjust(top=TOP, bottom=BOTTOM, left=LEFT, right=RIGHT, hspace=HSPACE) suptitle = fig1.suptitle(stt) suptitle.set_fontsize(16) suptitle.set_position((0.5,0.98)) fig1.savefig(pdfpgs, format='pdf') check_areas = kernel4d.sum(axis=(2,3)) assert np.max(check_areas) < 1 + self.EPSILON, str(np.max(check_areas)) assert np.min(check_areas) > 0 - self.EPSILON, str(np.min(check_areas)) if make_plots: fig2 = plt.figure(2, figsize=(8,10), dpi=90) fig2.clf() ax = fig2.add_subplot(111) X, Y = np.meshgrid(range(n_czbins), range(n_ebins)) cm = mpl.cm.Paired_r cm.set_over((1,1,1), 1) cm.set_under((0,0,0), 1) plt.pcolor(X, Y, check_areas, vmin=0+self.EPSILON, vmax=1.0, shading='faceted', cmap=cm) plt.colorbar(ticks=np.arange(0, 1.05, 0.05)) ax.grid(0) ax.axis('tight') ax.set_xlabel(r'$\cos\vartheta_\mathrm{true}\mathrm{\,bin\,num.}$') ax.set_ylabel(r'$E_{\nu,\mathrm{true}}\mathrm{\,bin\,num.}$') ax.set_title(r'$\mathrm{Fract\,of\,evts\,starting\,in\,each}\,(E_{\nu,\mathrm{true}},\,\cos\vartheta_\mathrm{true})\,\mathrm{bin\,that\,reco\,in\,bounds}$'+ '\n'+r'$\mathrm{None\,should\,be\,>1\,(shown\,white);\,no-event\,bins\,are\,black;\,avg.}=' + format(np.mean(check_areas),'0.3f') + r'$') fig2.tight_layout() fig2.savefig(pdfpgs, format='pdf') check_areas2 = kernel4d.sum(axis=(0,1)) fig3 = plt.figure(2, figsize=(8,10), dpi=90) fig3.clf() ax = fig3.add_subplot(111) X, Y = np.meshgrid(range(n_czbins), range(n_ebins)) cm = mpl.cm.Paired_r cm.set_over((1,1,1), 1) cm.set_under((0,0,0), 1) plt.pcolor(X, Y, check_areas2, vmin=0+self.EPSILON,# vmax=1.0, shading='faceted', cmap=cm) plt.colorbar(ticks=np.arange(0, 0.1+np.ceil(10.*np.max(check_areas2))/10., 0.05)) ax.grid(0) ax.axis('tight') ax.set_xlabel(r'$\cos\vartheta_\mathrm{reco}\mathrm{\,bin\,num.}$') ax.set_ylabel(r'$E_{\nu,\mathrm{reco}}\mathrm{\,bin\,num.}$') ax.set_title(r'$\mathrm{Normed\,num\,events\,reconstructing\,into\,each}\,(E_{\nu,\mathrm{reco}},\,\cos\vartheta_\mathrm{reco})\,\mathrm{bin}$'+ '\n'+r'$\mathrm{No-event\,bins\,are\,black;\,avg.}=' + format(np.mean(check_areas2),'0.3f') + r'$') fig3.tight_layout() fig3.savefig(pdfpgs, format='pdf') pdfpgs.close() return kernel4d
# the asimov data set: for step in steplist: print "Running at asimov parameters: %s"%step asimov_params = get_values(getAsimovParams(params,true_normal,step)) asimov_data_set = get_asimov_fmap( template_maker, asimov_params, chan=asimov_params['channel']) # Store injected true values in result: for key in free_params.keys(): if 'theta23' in key: continue result['true_'+key].append(asimov_params[key]) result['true_theta23'].append(step) result['asimov_data'].append(asimov_data_set) # now get fitted values of opposite hierarchy: hypo_normal = False if true_normal else True hypo_tag = 'hypo_IMH' if true_normal else 'hypo_NMH' llh_data = find_alt_hierarchy_fit( asimov_data_set,template_maker, params, hypo_normal, minimizer_settings, only_atm_params=False, check_octant=args.check_octant) for key in free_params.keys(): result['fit_'+key].append(llh_data[key][-1]) results[true_tag] = result logging.warn("FINISHED. Saving to file: %s"%args.outfile) to_json(results,args.outfile)
def make_toy_events(outdir, num_events, energy_range, spectral_index, coszen_range, num_sets, first_set, aeff_energy_param, aeff_coszen_param, reco_param, pid_param, pid_dist): """Make toy events and store to a file. Parameters ---------- outdir : string num_events : int energy_range : 2-tuple of floats spectral_index : float coszen_range : 2-tuple of floats num_sets : int first_set : int aeff_energy_param : string aeff_coszen_param : string reco_param : string pid_param : string pid_dist : string Returns ------- events : :class:`pisa.core.events.Events` """ energy_range = sorted(energy_range) coszen_range = sorted(coszen_range) # Validation of args assert energy_range[0] > 0 and energy_range[1] < 1e9 assert coszen_range[0] >= -1 and coszen_range[1] <= 1 assert np.diff(energy_range)[0] > 0, str(energy_range) assert np.diff(coszen_range)[0] > 0, str(coszen_range) assert spectral_index >= 0, str(spectral_index) assert first_set >= 0, str(first_set) assert num_sets >= 1, str(first_set) # Make sure resources specified actually exist for arg in [aeff_energy_param, aeff_coszen_param, reco_param, pid_param]: find_resource(arg) mkdir(outdir, warn=False) set_indices = list(range(first_set, first_set + num_sets)) # The following loop is for validation only for num, index in product(num_events, set_indices): mcgen_random_state(num_events=num, set_index=index) for num, set_index in product(num_events, set_indices): mcevts_fname = FNAME_TEMPLATE.format( file_type='events', detector='vlvnt', e_min=format_num(energy_range[0]), e_max=format_num(energy_range[1]), spectral_index=format_num(spectral_index, sigfigs=2, trailing_zeros=True), cz_min=format_num(coszen_range[0]), cz_max=format_num(coszen_range[1]), num_events=format_num(num, sigfigs=3, sci_thresh=(1, -1)), set_index=format_num(set_index, sci_thresh=(10, -10)), extension='hdf5') mcevts_fpath = os.path.join(outdir, mcevts_fname) if os.path.isfile(mcevts_fpath): logging.warn('File already exists, skipping: "%s"', mcevts_fpath) continue logging.info('Working on set "%s"', mcevts_fname) # TODO: pass filepaths / resource locations via command line args # Create a single random state object to pass from function to function random_state = mcgen_random_state(num_events=num, set_index=set_index) mc_events = generate_mc_events( num_events=num, energy_range=energy_range, coszen_range=coszen_range, spec_ind=spectral_index, aeff_energy_param_source=aeff_energy_param, aeff_coszen_param_source=aeff_coszen_param, random_state=random_state) populate_reco_observables(mc_events=mc_events, param_source=reco_param, random_state=random_state) populate_pid(mc_events=mc_events, param_source=pid_param, random_state=random_state, dist=pid_dist) to_file(mc_events, mcevts_fpath) return mc_events
help="Output filename.") parser.add_argument('-v', '--verbose', action='count', default=None, help='set verbosity level') args = parser.parse_args() set_verbosity(args.verbose) #Read in the settings template_settings = from_json(args.template_settings) minimizer_settings = from_json(args.minimizer_settings) pseudo_data_settings = from_json(args.pseudo_data_settings) if args.pseudo_data_settings is not None else template_settings #Workaround for old scipy versions import scipy if scipy.__version__ < '0.12.0': logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__) if 'maxiter' in minimizer_settings: logging.warn('Optimizer settings for \"maxiter\" will be ignored') minimizer_settings.pop('maxiter') # make sure that both pseudo data and template are using the same # channel. Raise Exception and quit otherwise channel = template_settings['params']['channel']['value'] if channel != pseudo_data_settings['params']['channel']['value']: error_msg = "Both template and pseudo data must have same channel!\n" error_msg += " pseudo_data_settings chan: '%s', template chan: '%s' "%(pseudo_data_settings['params']['channel']['value'],channel) raise ValueError(error_msg) if args.gpu_id is not None: template_settings['params']['gpu_id'] = {}
def parse_fit_config(fit_cfg): """Perform sanity checks on and parse fit configuration file. Parameters ---------- fit_cfg : str path to a fit configuration file Returns ------- fit_cfg : PISAConfigParser parsed fit configuration sys_list : list of str parsed names of systematic parameters units_list : list of str units corresponding to each discrete systematic combine_regex : list of str each string is a regular expression for combining pipeline outputs; see :func:`pisa.core.map.MapSet.combine_regex` for details. """ fit_cfg = from_file(fit_cfg) no_ws_section_map = {s.strip(): s for s in fit_cfg.sections()} if GENERAL_SECTION_NAME not in no_ws_section_map.values(): raise KeyError('Fit config is missing the "%s" section!' % GENERAL_SECTION_NAME) general_section = fit_cfg[GENERAL_SECTION_NAME] if SYS_LIST_OPTION not in general_section: raise KeyError( "Fit config has to specify systematic parameters as" ' "%s" option in "%s" section (comma-separated list of names).' % (SYS_LIST_OPTION, GENERAL_SECTION_NAME)) sys_list = [s.strip() for s in general_section[SYS_LIST_OPTION].split(",")] if UNITS_OPTION in general_section: units_list = [] units_specs = (general_section[UNITS_OPTION].replace( UNITS_SPECIFIER, "").split(",")) for units_spec in units_specs: # Make sure units are interpret-able by Pint try: ureg.Unit(units_spec) except: logging.error( 'Unit "%s" specified by "%s" option in "general" section is not' "interpret-able by Pint", units_spec, UNITS_OPTION, ) raise units_list.append(units_spec) else: units_list = ["dimensionless" for s in sys_list] logging.warn( "No %s option found in %s section; assuming systematic parameters are" " dimensionless", UNITS_OPTION, GENERAL_SECTION_NAME, ) if len(units_list) != len(sys_list): raise ValueError( '{} units specified by "{}" option but {} systematics specified by "{}"' "option; must be same number of each.".format( len(units_list), UNITS_OPTION, len(sys_list), SYS_LIST_OPTION)) logging.info( "Found systematic parameters %s", ["{} ({})".format(s, u) for s, u in zip(sys_list, units_list)], ) combine_regex = general_section.get(COMBINE_REGEX_OPTION, None) if combine_regex: try: combine_regex = literal_eval(combine_regex) except (SyntaxError, ValueError): logging.warn( 'Deprecated syntax for "combine_re" (make into a Python-evaluatable' "sequence of strings instead) :: combine_regex = %s", combine_regex, ) combine_regex = [r.strip() for r in combine_regex.split(",")] if APPLY_ALL_SECTION_NAME in no_ws_section_map: apply_all_section = fit_cfg[no_ws_section_map[APPLY_ALL_SECTION_NAME]] for no_ws_sname, sname in no_ws_section_map.items(): if not (no_ws_sname.startswith(NOMINAL_SET_PFX) or no_ws_sname.startswith(SYS_SET_PFX)): continue sys_set_section = fit_cfg[sname] for option, val in apply_all_section.items(): sys_set_section[option] = val return fit_cfg, sys_list, units_list, combine_regex
def store_recursively(fhandle, node, path=None, node_hashes=None): if path is None: path = [] if node_hashes is None: node_hashes = {} full_path = '/' + '/'.join(path) if isinstance(node, dict): logging.trace(" creating Group `%s`" % full_path) try: fhandle.create_group(full_path) except ValueError: pass for key in sorted(node.iterkeys()): key_str = str(key) if not isinstance(key, str): logging.warn('Stringifying key `' + key_str + '`for use as name in HDF5 file') val = node[key] new_path = path + [key_str] store_recursively(fhandle=fhandle, node=val, path=new_path, node_hashes=node_hashes) else: # Check for existing node node_hash = utils.hash_obj(node) if node_hash in node_hashes: logging.trace(" creating hardlink for Dataset: `%s` -> `%s`" % (full_path, node_hashes[node_hash])) # Hardlink the matching existing dataset fhandle[full_path] = fhandle[node_hashes[node_hash]] return # For now, convert None to np.nan since h5py appears to not handle None if node is None: node = np.nan logging.warn(" encountered `None` at node `%s`; converting to" " np.nan" % full_path) # "Scalar datasets don't support chunk/filter options". Shuffling # is a good idea otherwise since subsequent compression will # generally benefit; shuffling requires chunking. Compression is # not done here since it is slow. if np.isscalar(node): shuffle = False chunks = None else: shuffle = True chunks = True # Store the node_hash for linking to later if this is more than # a scalar datatype. Assumed that "None" has node_hashes[node_hash] = full_path # TODO: Treat strings as follows? Would this break compatibility # with pytables/Pandas? What are benefits? Leaving out for now. # if isinstance(node, basestr): # dtype = h5py.special_dtype(vlen=str) # fh.create_dataset(k,data=v,dtype=dtype) logging.trace(" creating dataset at node `%s`" % full_path) try: fhandle.create_dataset(name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False) except TypeError: try: shuffle = False chunks = None fhandle.create_dataset(name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False) except: logging.error(' full_path: ' + full_path) logging.error(' chunks : ' + str(chunks)) logging.error(' shuffle : ' + str(shuffle)) logging.error(' node : ' + str(node)) raise
def make_discrete_sys_distributions(fit_cfg, set_params=None): """Generate and store mapsets for different discrete systematics sets (with a single set characterised by a dedicated pipeline configuration) Parameters ---------- fit_cfg : string Path to a fit config file Returns ------- input_data : OrderedDict Container with the processed input data including MapSets resulting from each input pipelines """ # check optional `set_params` if set_params is not None: if not isinstance(set_params, Mapping): raise TypeError("`set_params` must be dict-like") for param_name, param_value in set_params.items(): if not isinstance(param_name, basestring): raise TypeError( "`set_params` keys must be strings (parameter name)") if not isinstance(param_value, ureg.Quantity): raise TypeError("`set_params` values must be Quantities") parsed_fit_cfg, sys_list, units_list, combine_regex = parse_fit_config( fit_cfg) fit_cfg_txt_buf = StringIO() parsed_fit_cfg.write(fit_cfg_txt_buf) fit_cfg_txt = fit_cfg_txt_buf.getvalue() # prepare the data container input_data = OrderedDict() input_data["fit_cfg_path"] = fit_cfg input_data["fit_cfg_txt"] = fit_cfg_txt input_data["param_names"] = sys_list input_data["param_units"] = units_list input_data["datasets"] = [] # -- Load systematics sets -- # found_nominal = False sys_sets_info = OrderedDict() for section in parsed_fit_cfg.sections(): no_ws_section = section.strip() section_pfx = no_ws_section.split(":")[0].strip() is_nominal = section_pfx == NOMINAL_SET_PFX is_sys_set = is_nominal or section_pfx == SYS_SET_PFX if is_nominal: if found_nominal: raise ValueError( "Found multiple nominal sets in fit cfg! There must be" " exactly one.") found_nominal = True if is_sys_set: # Parse the list of systematics parameter values from the section name sys_param_point = tuple( float(x) for x in section.split(":")[1].split(",")) if len(sys_param_point) != len(sys_list): raise ValueError( "Section heading [{}] specifies {:d} systematic" " parameter values, but there are {:d} systematics".format( section, len(sys_param_point), len(sys_list))) parsed_pipeline_cfg, pipeline_cfg_path = load_and_modify_pipeline_cfg( fit_cfg=parsed_fit_cfg, section=section) pipeline_cfg_txt_buf = StringIO() parsed_pipeline_cfg.write(pipeline_cfg_txt_buf) pipeline_cfg_txt = pipeline_cfg_txt_buf.getvalue() sys_sets_info[sys_param_point] = dict( is_nominal=is_nominal, parsed_pipeline_cfgs=[parsed_pipeline_cfg], pipeline_cfg_paths=[pipeline_cfg_path], pipeline_cfg_txts=[pipeline_cfg_txt], ) # In this loop, nothing to do for general & apply_to_all_sets sections elif no_ws_section in (GENERAL_SECTION_NAME, APPLY_ALL_SECTION_NAME): pass # Do not allow any other sections in the config else: raise ValueError("Invalid section in fit config file: [%s]" % section) if not found_nominal: raise ValueError( "Could not find a nominal discrete systematics set in fit cfg." " There must be exactly one.") nsets = len(sys_sets_info) nsys = len(sys_list) if nsets <= nsys: logging.warn( "Fit will either fail or be unreliable since the number of" " systematics sets to be fit is small (%d <= %d).", nsets, nsys + 1, ) for sys_param_point, info in sys_sets_info.items(): point_str = " | ".join( ["%s=%.2f" % (p, v) for p, v in zip(sys_list, sys_param_point)]) logging.info( "Generating maps for discrete systematics point: %s. Using" ' pipeline config(s) at "%s"', point_str, info["pipeline_cfg_paths"], ) # make a dedicated distribution maker for each systematics set distribution_maker = DistributionMaker(info["parsed_pipeline_cfgs"]) # update params if requested if set_params is not None: for pname, pval in set_params.items(): if pname not in distribution_maker.params.names: raise ValueError("Unknown param '%s' in `set_params`" % pname) if (pval.dimensionality != distribution_maker.params[pname].dimensionality): raise ValueError( 'Incorrect units for param "%s" in `set_params`' % pname) distribution_maker.params[pname].value = pval logging.info("Changed param '%s' to %s", pname, pval) distribution_maker_param_values = OrderedDict() for dmpname in sorted(distribution_maker.params.names): dmpval = distribution_maker.params[dmpname].value distribution_maker_param_values[dmpname] = dmpval # run the distribution maker to get the mapset # TODO This assumes only one pipeline, either make more general or enforce mapset = distribution_maker.get_outputs(return_sum=False)[0] if combine_regex: logging.info( "Combining maps according to regular expression(s) %s", combine_regex) mapset = mapset.combine_re(combine_regex) # Store the info dataset = OrderedDict() dataset["pipeline_cfg_paths"] = info["pipeline_cfg_paths"] dataset["pipeline_cfg_txts"] = info["pipeline_cfg_txts"] dataset[ "distribution_maker_param_values"] = distribution_maker_param_values dataset["param_values"] = sys_param_point dataset["mapset"] = mapset dataset["nominal"] = info["is_nominal"] input_data["datasets"].append(dataset) return input_data
args = parser.parse_args() set_verbosity(args.verbose) llhfiles = glob(os.path.join(args.data_dir, "llh_data*")) if args.log_dir is not None: logfiles = glob(os.path.join(args.log_dir, "log*")) # These MUST have the same number initialized if we are using the logging # information. Otherwise, perhaps one of the directories are incorrect. # Sometimes there are fewere llh files, since they crash before writing out. assert len(llhfiles) <= len(logfiles), "Data and log directories don't match?" # Output to save to hdf5 file: output_data = {"minimizer_settings": {}, "template_settings": {}, "true_NMH": {}, "true_IMH": {}} logging.warn("Processing {0:d} files".format(len(llhfiles))) mod = len(llhfiles) // 20 start = time.time() for i, filename in enumerate(llhfiles): if (mod > 0) and (i % mod == 0): logging.info(" >> {0:d} files done...".format(i)) try: data = from_json(filename) except Exception as inst: # print(inst) print("Skipping file: ", filename) continue
def __init__( self, earth_model=None, detector_depth=None, prop_height=None, prop_height_range=None, YeI=None, YeO=None, YeM=None, rel_err=None, abs_err=None, prop_lowpass_cutoff=None, prop_lowpass_frac=None, eval_lowpass_cutoff=None, eval_lowpass_frac=None, apply_lowpass_above_hor=True, apply_height_avg_below_hor=True, suppress_interpolation_warning=False, node_mode=None, use_decoherence=False, num_decoherence_gamma=1, use_nsi=False, num_neutrinos=3, use_taus=False, exact_mode=False, vacuum=False, **std_kwargs, ): # Checks if use_nsi: raise NotImplementedError("NSI not implemented") if type(prop_height) is not ureg.Quantity: raise NotImplementedError( "Getting propagation heights from containers is " "not yet implemented, saw {} type".format(type(prop_height)) ) # Store args self.num_neutrinos = int(num_neutrinos) assert ( self.num_neutrinos < 5 ), "currently only supports up to 4 flavor oscillations" self.use_nsi = use_nsi self.use_decoherence = use_decoherence self.num_decoherence_gamma = num_decoherence_gamma self.node_mode = node_mode self.vacuum = vacuum self.use_taus = use_taus self.earth_model = earth_model self.YeI = YeI.m_as("dimensionless") self.YeO = YeO.m_as("dimensionless") self.YeM = YeM.m_as("dimensionless") self.detector_depth = detector_depth.m_as("km") self.prop_height = prop_height.m_as("km") self.avg_height = False self.concurrent_threads = PISA_NUM_THREADS if TARGET == "parallel" else 1 self.prop_height_range = None self.apply_height_avg_below_hor = apply_height_avg_below_hor if prop_height_range is not None: # this is optional self.prop_height_range = prop_height_range.m_as("km") self.avg_height = True self.layers = None self.rel_err = rel_err.m_as("dimensionless") if rel_err is not None else 1.0e-10 self.abs_err = abs_err.m_as("dimensionless") if abs_err is not None else 1.0e-10 self.prop_lowpass_cutoff = ( prop_lowpass_cutoff.m_as("1/km") if prop_lowpass_cutoff is not None else 0.0 ) self.prop_lowpass_frac = ( prop_lowpass_frac.m_as("dimensionless") if prop_lowpass_frac is not None else 0.0 ) self.eval_lowpass_cutoff = ( eval_lowpass_cutoff.m_as("1/km") if eval_lowpass_cutoff is not None else 0.0 ) self.eval_lowpass_frac = ( eval_lowpass_frac.m_as("dimensionless") if eval_lowpass_frac is not None else 0.0 ) if self.prop_lowpass_frac > 1.0 or self.eval_lowpass_frac > 1.0: raise ValueError("lowpass filter fraction cannot be greater than one") if self.prop_lowpass_frac < 0.0 or self.eval_lowpass_frac < 0.0: raise ValueError("lowpass filter fraction cannot be smaller than zero") self.apply_lowpass_above_hor = apply_lowpass_above_hor self.nus_layer = None self.nus_layerbar = None # Define the layers class self.nusquids_layers_class = nsq.nuSQUIDSLayers # Define standard params expected_params = [ "theta12", "theta13", "theta23", "deltam21", "deltam31", "deltacp", ] # Add decoherence parameters if self.use_decoherence: # Use derived nuSQuIDS classes import nuSQUIDSDecohPy self.nusquids_layers_class = nuSQUIDSDecohPy.nuSQUIDSDecohLayers # Checks assert ( self.num_neutrinos == 3 ), "Decoherence only supports 3 neutrinos currently" # Add decoherence params expected_params.extend(["gamma0"]) expected_params.extend(["n"]) expected_params.extend(["E0"]) # We may want to reparametrize this with the difference between deltacp14 and # deltacp24, as the absolute value seems to play a small role (see # https://arxiv.org/pdf/2010.06321.pdf) if self.num_neutrinos == 4: expected_params.extend( [ "theta14", "theta24", "theta34", "deltam41", "deltacp14", "deltacp24", ] ) # init base class super().__init__( expected_params=expected_params, **std_kwargs, ) # This is special: We have an additional "binning" to account for. It is in # principle possible to work in event mode even for the nodes, which would mean # that the full oscillation problem is solved for all events individually. # Together with the constant oscillation mode, this can be used to calculate # probabilities in exact mode in a time that is reasonable at least for # generating pseudodata. assert not (self.use_nsi and self.use_decoherence), ( "NSI and decoherence not " "suported together, must use one or the other" ) self.exact_mode = exact_mode if exact_mode: # No interpolation is happening in exact mode so any passed node_mode # will be ignored. Probabilities are calculated at calc_specs. if self.node_mode is not None: logging.warn( "nuSQuIDS is configured in exact mode, the passed " f"`node_mode`\n({self.node_mode})\n will be ignored!" ) if self.prop_lowpass_cutoff > 0 or self.eval_lowpass_cutoff > 0: logging.warn( "nuSQuIDS is configured in exact mode, low-pass filters " "will be ignored" ) else: if isinstance(self.calc_mode, MultiDimBinning): assert isinstance(self.node_mode, MultiDimBinning), ( "cannot use " "event-wise nodes with binned calculation" ) self.e_node_mode = None self.e_mesh = None self.coszen_node_mode = None self.cosz_mesh = None # We don't want to spam the user with repeated warnings about the same issue. self.interpolation_warning_issued = suppress_interpolation_warning
def apply_function(self): ''' Computes the main inputs to the generalized likelihood function on every iteration of the minimizer ''' N_bins = self.output_specs.tot_num_bins # # Step 4: Apply the empty bin strategy and mean adjustment # Compute the alphas and betas that go into the # poisson-gamma mixture of the llh # for container in self.data: self.data.data_specs = 'events' # # Step 3: Find the maximum weight accross all events # of each MC set. The value of that weight defines # the value of the pseudo-weight that will be included # in empty bins # for this part we are in events mode # Find the mean weight of an entire MC set # # We only consider the first 90 percentiles of the weight # values, to avoid the high extreme weights that muongun # often gives # all_container_weights = container['weights'].get('host') if self.with_pseudo_weight: percentile90 = np.percentile(all_container_weights, 90) pseudo_weight = np.mean(all_container_weights[ all_container_weights <= percentile90]) #pseudo_weight = np.amin(all_container_weights[all_container_weights>0]) container.add_scalar_data(key='pseudo_weight', data=pseudo_weight) old_weight_sum = np.zeros(N_bins) new_weight_sum = np.zeros(N_bins) alphas_vector = np.zeros(N_bins) betas_vector = np.zeros(N_bins) # # Load the pseudo_weight and mean displacement values # if self.with_mean_adjust: mean_adjustment = container.scalar_data['mean_adjustment'] for index in range(N_bins): index_mask = container['bin_{}_mask'.format(index)].get('host') if 'kfold_mask' in container: index_mask *= container['kfold_mask'].get('host') current_weights = all_container_weights[index_mask] old_weight_sum[index] += np.sum(current_weights) assert np.all(current_weights >= 0), 'SOME WEIGHTS BELOW ZERO' n_weights = current_weights.shape[0] # If no weights and other datasets have some, include a pseudo weight # Bins with no mc event in all set will be ignore in the likelihood later # # make the whole bin treatment here if n_weights <= 0 and self.with_pseudo_weight: current_weights = np.array([pseudo_weight]) n_weights = 1 # write the new weight distribution down new_weight_sum[index] += np.sum(current_weights) # Mean of the current weight distribution mean_w = np.mean(current_weights) # variance of the current weight var_of_weights = ( (current_weights - mean_w)**2).sum() / (float(n_weights)) # Variance of the poisson-gamma distributed variable var_z = (var_of_weights + mean_w**2) if var_z < 0: logging.warn('warning: var_z is less than zero') logging.warn(container.name, var_z) raise Exception # if the weights presents have a mean of zero, # default to alphas values of PSEUDO_WEIGHT and # of beta = 1.0, which mimicks a narrow PDF # close to 0.0 beta = np.divide(mean_w, var_z, out=np.ones(1), where=var_z != 0) trad_alpha = np.divide(mean_w**2, var_z, out=np.ones(1) * np.NaN, where=var_z != 0) if self.with_mean_adjust: alpha = (n_weights + mean_adjustment) * trad_alpha else: alpha = n_weights * trad_alpha alphas_vector[index] = alpha betas_vector[index] = beta # Calculate alphas and betas self.data.data_specs = self.output_specs np.copyto(src=alphas_vector, dst=container['llh_alphas'].get('host')) np.copyto(src=betas_vector, dst=container['llh_betas'].get('host')) #only change the weights if they were modified if self.with_pseudo_weight or self.with_mean_adjust: np.copyto(src=new_weight_sum, dst=container['weights'].get('host')) container['weights'].mark_changed() np.copyto(src=old_weight_sum, dst=container['old_sum'].get('host')) container['llh_alphas'].mark_changed() container['llh_betas'].mark_changed() container['old_sum'].mark_changed()
def compute_function_interpolated(self): """ Version of the compute function that does use interpolation between nodes. """ nsq_units = nsq.Const() # We need to make two evolutions, one for numu and the other for nue. # These produce neutrino and antineutrino states at the same time thanks to # the "both" neutrino mode of nuSQuIDS. self.apply_prop_settings(self.nus_layer) self.set_osc_parameters(self.nus_layer) ini_state_nue = np.array([1, 0, 0] + [0] * (self.num_neutrinos - 3)) ini_state_numu = np.array([0, 1, 0] + [0] * (self.num_neutrinos - 3)) ini_state_nutau = np.array([0, 0, 1] + [0] * (self.num_neutrinos - 3)) self.nus_layer.Set_initial_state(ini_state_nue, nsq.Basis.flavor) if not self.vacuum: self.nus_layer.EvolveState() evolved_states_nue = self.nus_layer.GetStates(0) evolved_states_nuebar = self.nus_layer.GetStates(1) self.nus_layer.Set_initial_state(ini_state_numu, nsq.Basis.flavor) if not self.vacuum: self.nus_layer.EvolveState() evolved_states_numu = self.nus_layer.GetStates(0) evolved_states_numubar = self.nus_layer.GetStates(1) if self.use_taus: self.nus_layer.Set_initial_state(ini_state_nutau, nsq.Basis.flavor) if not self.vacuum: self.nus_layer.EvolveState() evolved_states_nutau = self.nus_layer.GetStates(0) evolved_states_nutaubar = self.nus_layer.GetStates(1) # Now comes the step where we interpolate the interaction picture states # and project out oscillation probabilities. This can be done in either events # or binned mode. if isinstance(self.calc_mode, MultiDimBinning): self.data.link_containers( "nu", ["nue_cc", "numu_cc", "nutau_cc", "nue_nc", "numu_nc", "nutau_nc"] ) self.data.link_containers( "nubar", [ "nuebar_cc", "numubar_cc", "nutaubar_cc", "nuebar_nc", "numubar_nc", "nutaubar_nc", ], ) for container in self.data: nubar = container["nubar"] < 0 container["interp_states_e"] = self.calc_interpolated_states( evolved_states_nuebar if nubar else evolved_states_nue, container["true_energy"] * nsq_units.GeV, container["true_coszen"], ) container["interp_states_mu"] = self.calc_interpolated_states( evolved_states_numubar if nubar else evolved_states_numu, container["true_energy"] * nsq_units.GeV, container["true_coszen"], ) if self.use_taus: container["interp_states_tau"] = self.calc_interpolated_states( evolved_states_nutaubar if nubar else evolved_states_nutau, container["true_energy"] * nsq_units.GeV, container["true_coszen"], ) self.data.unlink_containers() if isinstance(self.calc_mode, MultiDimBinning): self.data.link_containers("nue", ["nue_cc", "nue_nc"]) self.data.link_containers("numu", ["numu_cc", "numu_nc"]) self.data.link_containers("nutau", ["nutau_cc", "nutau_nc"]) self.data.link_containers("nuebar", ["nuebar_cc", "nuebar_nc"]) self.data.link_containers("numubar", ["numubar_cc", "numubar_nc"]) self.data.link_containers("nutaubar", ["nutaubar_cc", "nutaubar_nc"]) for container in self.data: nubar = container["nubar"] < 0 flav_out = container["flav"] input_flavs = ["e", "mu", "tau"] if self.use_taus else ["e", "mu"] for flav_in in input_flavs: container["prob_" + flav_in] = self.calc_probs_interp( flav_out=flav_out, nubar=nubar, interp_states=container["interp_states_" + flav_in], out_distances=container["tot_distances"] * nsq_units.km, e_out=container["true_energy"] * nsq_units.GeV, avg_ranges=container["avg_ranges"] * nsq_units.km, lowpass_cutoff=container["lowpass_cutoff"] / nsq_units.km, ) # It is possible to get slightly negative probabilities from imperfect # state interpolation between nodes. # It's impractical to avoid any probability dipping below zero in every # conceivable situation because that would require very dense node # spacing. We get around this by flooring the probability at zero. # However, dipping below zero by more than 1% may indicate that nodes # aren't spaced tightly enough to achieve an acceptable accuracy, so we # issue a warning. if ( np.any(container["prob_" + flav_in] < -0.01) and not self.interpolation_warning_issued ): mask = container["prob_" + flav_in] < -0.01 en_med = np.median(container["true_energy"][mask]) cz_med = np.median(container["true_coszen"][mask]) logging.warn( f"Some probabilities in nu_{flav_in} -> {container.name} dip " "below zero by more than 1%! This may indicate too few nodes " f"in the problematic region. Median energy: {en_med}, median " f"coszen: {cz_med}. This warning is only issued once." ) self.interpolation_warning_issued = True container["prob_" + flav_in][container["prob_" + flav_in] < 0] = 0.0 container.mark_changed("prob_e") container.mark_changed("prob_mu") if self.use_taus: container.mark_changed("prob_tau") self.data.unlink_containers()
help='set verbosity level') args = parser.parse_args() set_verbosity(args.verbose) print "FILE NORMALIZATION: " print " >> nue: ",args.nfiles_nue print " >> numu: ",args.nfiles_numu print " >> nutau: ",args.nfiles_nutau ebins = np.linspace(args.emin,args.emax,args.nebins) if args.elin else np.logspace(np.log10(args.emin), np.log10(args.emax), args.nebins) # Cut definitions: s1_s2_cuts = [] if args.v4cuts: logging.warn("Using cuts V4!") s1_s2_cuts = [("Cuts_V4_Step1",'value',True),("Cuts_V4_Step2",'value',True)] elif args.v3cuts: logging.warn("Using cuts V3!") s1_s2_cuts = [('NewestBgRejCutsStep1','value',True), ('NewestBgRejCutsStep2','value',True)] elif args.v5truth: logging.warn("USING V5 TRUTH information") s1_s2_cuts = [('Cuts_V5_Step2_upgoing_Truth','value',True)] elif args.nocuts: logging.warn("Using no selection cuts!") s1_s2_cuts = [] else: logging.warn("Using cuts V5!") s1_s2_cuts= [("Cuts_V5_Step1",'value',True),("Cuts_V5_Step2",'value',True)]
def vbw_kde(data, N=None, MIN=None, MAX=None, evaluate_dens=True, evaluate_at=None, overfit_factor=1.0): ''' Parameters ---------- data The data points for which the density estimate is sought N Number of points with which to form regular mesh, from MIN to MAX; this gets DCT'd, so N should be a power of two. -> Default: 2**14 (16384) MIN Minimum of range over which to compute density. -> Default: min(data) - range(data)/10 MAX Maximum of range over which to compute density> -> Default: max(data) + range(data)/10 evaluate_dens Whether to evaluate the density either at the mesh points defined by N, MIN, and MAX, or at the points specified by the argument evaluate_at. If False, only the gaussians' bandwidths and the mesh locations (no density) are returned. Evaluating the density is a large fraction of total execution time, so setting this to False saves time if only the bandwidths are desired. -> Default: True evaluate_at Points at which to evaluate the density. If None is specified, evaluates at points on the mesh defined by MIN, MAX, and N. -> Default: None overfit_factor EXPERIMENTAL: For the first part of the algorithm, the improved-Sheather-Jones fixed-bandwidth (ISJ-FBW) bit, the density can be overfit by specifying overfit_factor > 1.0 and underfit using a value < 1.0. -> Default: 1.0 Returns ------- kernel_bandwidths The gaussian bandwidths, one for each data point evaluate_at Locations at which the density is evaluated vbw_dens_est Density estimates at the mesh points, or None if evaluate_dens is False Notes ----- Specifying the range: The specification of MIN and MAX are critical for obtaining a reasonable density estimate. If the true underlying density slowly decays to zero on one side or the other, like a gaussian, specifying too-small a range will distort the edge the VBW-KDE finds. On the other hand, an abrupt cut-off in the distribution should be accompanied by a similar cutoff in the computational range (MIN and/or MAX). The algorithm here will approximate such a sharp cut-off with roughly the same performance to the reflection method for standard KDE's (as the fixed-BW portion uses a DCT of the data), but note that this will not perform as well as polynomial-edges or other modifications that have been proposed in the literature. Specifying overfit_factor; other tweaks: I've seen no improvement by changing this parameter, but it remains for experimental purposes. Other avenues to explore include changing the "normalization" of the variable-bandwidth bit that I use which forces it to have a bandwidth at the peak matching that found by the ISJ-FBW part ''' # Parameters to set up the mesh on which to calculate if N is None: N = 2**14 #if N is None else int(2**np.ceil(np.log2(N))) if MIN is None or MAX is None: minimum = min(data) maximum = max(data) Range = maximum - minimum if Range == 0: logging.warn('Range of data is 0; there are ' + str(len(data)) + ' data points.') MIN = minimum - Range/10 if MIN is None else MIN MAX = maximum + Range/10 if MAX is None else MAX # Range for computation R = MAX-MIN # Histogram the data to get a crude first approximation of the density M = len(data) DataHist, bins = np.histogram(data, bins=N, range=(MIN, MAX)) DataHist = DataHist/M DCTData = fftpack.dct(DataHist, norm=None) M = M I = np.arange(1, N, dtype=np.float64)**2 SqDCTData = np.float64((DCTData[1:]/2.0)**2) # The fixed point calculation finds the bandwidth = t_star failure = True for guess in np.logspace(-1, 2, 20): try: t_star = optimize.brentq(fixed_point, 0, guess, args=(np.float64(M), I, SqDCTData)) failure = False break except ValueError: failure = True if failure: raise ValueError('Initial root-finding failed.') # Smooth the DCTransformed data using t_star divided by an overfitting # param that allows sub-optimal but allows for "sharper" features SmDCTData = DCTData*np.exp(-np.arange(N)**2*pisq*t_star/(2*overfit_factor)) # Inverse DCT to get density fbw_dens_on_mesh = fftpack.idct(SmDCTData, norm=None)*N/R # Start by defining the mesh as the bins' centers mesh = (bins[0:-1]+bins[1:])/2. # But add the lower and upper edges in case data points live there fbw_dens_on_mesh = fbw_dens_on_mesh/np.trapz(fbw_dens_on_mesh, mesh) isj_bandwidth = np.sqrt(t_star)*R # Create linear interpolator for this new density then find density est. at # the original data points' locations; call this fbw_dens_at_datapoints interp = interpolate.interp1d(x = mesh, y = fbw_dens_on_mesh, kind = 'linear', copy = False, bounds_error = True, fill_value = np.nan) fbw_dens_at_datapoints = interp(data) # Note below diverges from the published Ambramson method, by forcing the # bandwidth at the max of the density distribution to be exactly the # bandwidth found above with the improved Sheather-Jones BW selection # technique. Refs: # I.S. Abramson, On bandwidth variation in kernel estimates - A square # root law, Annals of Stat. Vol. 10, No. 4, 1217-1223 1982 # P. Hall, T. C. Hu, J. S. Marron, Improved Variable Window Kernel # Estimates of Probability Densities, Annals of Statistics Vol. 23, # No. 1, 1-10, 1995 root_pknorm_fbw_dens_est = np.sqrt(fbw_dens_at_datapoints / np.max(fbw_dens_at_datapoints)) kernel_bandwidths = isj_bandwidth/root_pknorm_fbw_dens_est if evaluate_at is None: evaluate_at = mesh if not evaluate_dens: return kernel_bandwidths, evaluate_at, None vbw_dens_est = np.zeros_like(evaluate_at, dtype=np.double) gaussians(outbuf = vbw_dens_est, x = evaluate_at.astype(np.double), mu = data.astype(np.double), sigma = kernel_bandwidths.astype(np.double), threads = int(openmp_num_threads)) # Normalize distribution to have area of 1 vbw_dens_est = vbw_dens_est/np.trapz(y=vbw_dens_est, x=evaluate_at) return kernel_bandwidths, evaluate_at, vbw_dens_est
parser.add_argument('-o','--outfile',type=str,default='llh_data.json',metavar='JSONFILE', help="Output filename.") parser.add_argument('-v', '--verbose', action='count', default=None, help='set verbosity level') args = parser.parse_args() set_verbosity(args.verbose) #Read in the settings template_settings = from_json(args.template_settings) minimizer_settings = from_json(args.minimizer_settings) #Workaround for old scipy versions import scipy if scipy.__version__ < '0.12.0': logging.warn('Detected scipy version %s < 0.12.0'%scipy.__version__) if 'maxiter' in minimizer_settings: logging.warn('Optimizer settings for \"maxiter\" will be ignored') minimizer_settings.pop('maxiter') #Get the parameters params = template_settings['params'] #store results from all the trials trials = [] template_maker = TemplateMaker(get_values(params),**template_settings['binning']) for itrial in xrange(1,args.ntrials+1): profile.info("start trial %d"%itrial) logging.info(">"*10 + "Running trial: %05d"%itrial + "<"*10)
'nue': {'filename': args.nue,'nfiles': args.nfiles_nue}, 'numu': {'filename': args.numu,'nfiles': args.nfiles_numu}, 'nutau': {'filename': args.nutau,'nfiles': args.nfiles_nutau}} logging.info("input files:\n%s"%data_files) # Ensure overwrite of existing filename... outfilename = args.outfile fh = h5py.File(outfilename,'w') fh.close() logging.info("Writing to file: %s",outfilename) # Define V3, V4, or V5 cuts: cut_list = [] if args.cutsV3: logging.warn("Using cuts V3...") cut_list.append(('NewestBgRejCutsStep1','value',True)) cut_list.append(('NewestBgRejCutsStep2','value',True)) elif args.cutsV4: logging.warn("Using cuts V4...") cut_list.append(('Cuts_V4_Step1','value',True)) cut_list.append(('Cuts_V4_Step2','value',True)) elif args.cutsV5: logging.warn("Using cuts V5...") cut_list.append(('Cuts_V5_Step1','value',True)) cut_list.append(('Cuts_V5_Step2','value',True)) nuDict = {} if args.old_pid: nuDict = {'nue':66,'numu':68,'nutau':133,'nue_bar':67,'numu_bar':69,'nutau_bar':134}
def find_max_llh_bfgs(fmap, template_maker, params, bfgs_settings, save_steps=False, normal_hierarchy=None, check_octant=False): """ Finds the template (and free systematic params) that maximize likelihood that the data came from the chosen template of true params, using the limited memory BFGS algorithm subject to bounds (l_bfgs_b). returns a dictionary of llh data and best fit params, in the format: {'llh': [...], 'param1': [...], 'param2': [...], ...} where 'param1', 'param2', ... are the free params varied by optimizer, and they hold a list of all the values tested in optimizer algorithm, unless save_steps is False, in which case they are one element in length-the best fit params and best fit llh. """ # Get params dict which will be optimized (free_params) and which # won't be (fixed_params) but are still needed for get_template() fixed_params = get_fixed_params(select_hierarchy(params,normal_hierarchy)) free_params = get_free_params(select_hierarchy(params,normal_hierarchy)) if len(free_params) == 0: logging.warn("NO FREE PARAMS, returning LLH") true_template = template_maker.get_template(get_values(fixed_params)) channel = params['channel']['value'] true_fmap = flatten_map(true_template,chan=channel) return {'llh': [-get_binwise_llh(fmap,true_fmap)]} init_vals = get_param_values(free_params) scales = get_param_scales(free_params) bounds = get_param_bounds(free_params) priors = get_param_priors(free_params) names = sorted(free_params.keys()) # Scale init-vals and bounds to work with bfgs opt: init_vals = np.array(init_vals)*np.array(scales) bounds = [bounds[i]*scales[i] for i in range(len(bounds))] opt_steps_dict = {key:[] for key in names} opt_steps_dict['llh'] = [] const_args = (names,scales,fmap,fixed_params,template_maker,opt_steps_dict,priors) display_optimizer_settings(free_params, names, init_vals, bounds, priors, bfgs_settings) best_fit_vals,llh,dict_flags = opt.fmin_l_bfgs_b( llh_bfgs, init_vals, args=const_args, approx_grad=True, iprint=0, bounds=bounds, **get_values(bfgs_settings)) # If needed, run optimizer again, checking for second octant solution: if check_octant and ('theta23' in free_params.keys()): physics.info("Checking alternative octant solution") old_th23_val = free_params['theta23']['value'] delta = np.pi - old_th23_val free_params['theta23']['value'] = np.pi + delta init_vals = get_param_values(free_params) const_args = (names,scales,fmap,fixed_params,template_maker,opt_steps_dict,priors) display_optimizer_settings(free_params, names, init_vals, bounds, priors, bfgs_settings) alt_fit_vals,alt_llh,alt_dict_flags = opt.fmin_l_bfgs_b( llh_bfgs, init_vals, args=const_args, approx_grad=True, iprint=0, bounds=bounds, **get_values(bfgs_settings)) # Alternative octant solution is optimal: if alt_llh < llh: best_fit_vals = alt_fit_vals llh = alt_llh dict_flags = alt_dict_flags best_fit_params = { name: value for name, value in zip(names, best_fit_vals) } #Report best fit physics.info('Found best LLH = %.2f in %d calls at:' %(llh,dict_flags['funcalls'])) for name, val in best_fit_params.items(): physics.info(' %20s = %6.4f'%(name,val)) #Report any warnings if there are lvl = logging.WARN if (dict_flags['warnflag'] != 0) else logging.DEBUG for name, val in dict_flags.items(): physics.log(lvl," %s : %s"%(name,val)) if not save_steps: # Do not store the extra history of opt steps: for key in opt_steps_dict.keys(): opt_steps_dict[key] = [opt_steps_dict[key][-1]] return opt_steps_dict
def find_max_llh_bfgs(fmap,template_maker,params,bfgs_settings,save_steps=False, normal_hierarchy=None): ''' Finds the template (and free systematic params) that maximize likelihood that the data came from the chosen template of true params, using the limited memory BFGS algorithm subject to bounds (l_bfgs_b). returns a dictionary of llh data and best fit params, in the format: {'llh': [...], 'param1': [...], 'param2': [...], ...} where 'param1', 'param2', ... are the free params varied by optimizer, and they hold a list of all the values tested in optimizer algorithm, unless save_steps is False, in which case they are one element in length-the best fit params and best fit llh. ''' # Get params dict which will be optimized (free_params) and which # won't be (fixed_params) but are still needed for get_template() fixed_params = get_fixed_params(select_hierarchy(params,normal_hierarchy)) free_params = get_free_params(select_hierarchy(params,normal_hierarchy)) if len(free_params) == 0: logging.warn("NO FREE PARAMS, returning LLH") true_template = template_maker.get_template(get_values(fixed_params)) channel = params['channel']['value'] true_fmap = flatten_map(true_template,chan=channel) return {'llh': [-get_binwise_llh(fmap,true_fmap)]} init_vals = get_param_values(free_params) scales = get_param_scales(free_params) bounds = get_param_bounds(free_params) priors = get_param_priors(free_params) names = sorted(free_params.keys()) # Scale init-vals and bounds to work with bfgs opt: init_vals = np.array(init_vals)*np.array(scales) bounds = [bounds[i]*scales[i] for i in range(len(bounds))] opt_steps_dict = {key:[] for key in names} opt_steps_dict['llh'] = [] const_args = (names,scales,fmap,fixed_params,template_maker,opt_steps_dict,priors) physics.info('%d parameters to be optimized'%len(free_params)) for name,init,(down,up),(prior, best) in zip(names, init_vals, bounds, priors): physics.info(('%20s : init = %6.4f, bounds = [%6.4f,%6.4f], ' 'best = %6.4f, prior = '+ ('%6.4f' if prior else "%s"))% (name, init, up, down, best, prior)) physics.debug("Optimizer settings:") for key,item in bfgs_settings.items(): physics.debug(" %s -> `%s` = %.2e"%(item['desc'],key,item['value'])) best_fit_vals,llh,dict_flags = opt.fmin_l_bfgs_b(llh_bfgs, init_vals, args=const_args, approx_grad=True, iprint=0, bounds=bounds, **get_values(bfgs_settings)) best_fit_params = { name: value for name, value in zip(names, best_fit_vals) } #Report best fit physics.info('Found best LLH = %.2f in %d calls at:' %(llh,dict_flags['funcalls'])) for name, val in best_fit_params.items(): physics.info(' %20s = %6.4f'%(name,val)) #Report any warnings if there are lvl = logging.WARN if (dict_flags['warnflag'] != 0) else logging.DEBUG for name, val in dict_flags.items(): physics.log(lvl," %s : %s"%(name,val)) if not save_steps: # Do not store the extra history of opt steps: for key in opt_steps_dict.keys(): opt_steps_dict[key] = [opt_steps_dict[key][-1]] return opt_steps_dict
def load_and_modify_pipeline_cfg(fit_cfg, section): """Load and modify the pipeline config file as specified in that section of the fit config. Parameters ---------- fit_cfg : pisa.utils.config_parser.PISAConfigParser any subclass of :class:`configparser.RawConfigParser` should work as well section : str name of the section to extract from the `fit_cfg` Returns ------- pipeline_cfg : pisa.utils.config_parser.PISAConfigParser pipeline config pipeline_cfg_path : str path to the pipeline config as it is specified in the fit config """ pipeline_cfg_path = fit_cfg.get(section, SYS_SET_OPTION) other_options = fit_cfg.options(section) other_options.remove(SYS_SET_OPTION) pipeline_cfg = from_file(pipeline_cfg_path) # Get a no-whitespace version of the section names section_map = {s.strip(): s for s in pipeline_cfg.sections()} for option in other_options: set_match = SET_OPTION_RE.match(option) remove_match = REMOVE_OPTION_RE.match( option) if not set_match else None if set_match: section_spec, set_option = set_match.groups() no_ws_section_spec = section_spec.strip() set_option = set_option.strip() if no_ws_section_spec not in section_map: logging.debug( 'Adding section [%s] to in-memory copy of pipeline config "%s"', section_spec, pipeline_cfg_path, ) pipeline_cfg.add_section(section_spec) section_map[no_ws_section_spec] = section_spec if set_option: set_value = fit_cfg.get(section, option).strip() logging.debug( 'Setting section [%s] option "%s = %s" in in-memory' ' copy of pipeline config "%s"', section_spec, set_option, set_value, pipeline_cfg_path, ) pipeline_cfg.set(section_map[no_ws_section_spec], set_option, set_value) elif remove_match: section_spec, remove_option = remove_match.groups() no_ws_section_spec = section_spec.strip() remove_option = remove_option.strip() if no_ws_section_spec in section_map: if remove_option: logging.debug( 'Removing section [%s] option "%s" from in-memory copy of' ' pipeline config "%s"', section_spec, remove_option, pipeline_cfg_path, ) pipeline_cfg.remove_option(section_map[no_ws_section_spec], remove_option) else: logging.debug( "Removing section [%s] from in-memory copy of pipeline config" ' "%s"', section_spec, pipeline_cfg_path, ) pipeline_cfg.remove_section( section_map[no_ws_section_spec]) else: logging.warn( "Told to remove section [%s] but section does not exist in" ' pipline config "%s"', section_spec, pipeline_cfg_path, ) else: raise ValueError( "Unhandled option in fit config: {}".format(option)) return pipeline_cfg, pipeline_cfg_path
def fit_discrete_sys_distributions(input_data, p0=None, fit_method=None): """Fits a hyperplane to MapSets generated at given systematics parameters values. Parameters ---------- input_data : OrderedDict The data container returned by `make_discrete_sys_distributions` and modified by `norm_sys_distributions`. p0 : list or dict Initial guess list (same initial guess for all maps) or dictionary (keys have to correspond to event groups/channels in maps) with one offset and len(sys_list) slopes. Default is list of ones. fit_method : None or string `method` arg to pass to `curve_fit` (see curve_fit docs). If None, will default to `trf` (this method supports covariance matrix calculation in the dimensionality we're dealing with). Returns ------- fit_results : OrderedDict Container of the hyerplane fit results + supporting data """ # # Prepare a few things before fitting # # Set a default fit method for curve_fit if fit_method is None: fit_method = "trf" # lm, trf, dogbox # TODO Store in output data # prepare an output data container fit_results = OrderedDict() fit_results["hyperplanes"] = OrderedDict() # store info from the input data in the fit results fit_results["datasets"] = input_data["datasets"] fit_results["param_names"] = input_data["param_names"] fit_results["fit_cfg_path"] = input_data["fit_cfg_path"] fit_results["fit_cfg_txt"] = input_data["fit_cfg_txt"] # get number of systematic parameters and datasets n_sys_params = len(fit_results["param_names"]) n_datasets = len(fit_results["datasets"]) # get number of params in hyperplane fit # this is one slope per systematic, plus a single intercept n_fit_params = 1 + len(fit_results["param_names"]) # get binning info binning = fit_results["datasets"][0]["mapset"][0].binning binning_shape = list(binning.shape) # normalise the systematics variations to the nominal distribution # with error propagation norm_sys_distributions(input_data) # re-organise normalised maps to be stored per event type (a list for each dataset) norm_sys_maps = OrderedDict() for map_name in input_data["datasets"][0]["norm_mapset"].names: norm_sys_maps[map_name] = [ dataset_dict["norm_mapset"][map_name] for dataset_dict in input_data["datasets"] ] # get an array of the systematic parameter points sampled across all datasets # transpose to get format compatible with scipy.optimize.curve_fit sys_param_points = np.asarray([ dataset_dict["param_values"] for dataset_dict in fit_results["datasets"] ]) # [datasets, params] sys_param_points_T = sys_param_points.T assert sys_param_points_T.shape[0] == n_sys_params assert sys_param_points_T.shape[1] == n_datasets # store some of this stuff fit_results["sys_param_points"] = sys_param_points fit_results["binning"] = binning fit_results["binning_hash"] = binning.hash # # Prepare initial parameter guesses # if p0: if isinstance(p0, Mapping): p0_keys = sorted(p0.keys()) map_keys = sorted(norm_sys_maps.keys()) if not p0_keys == map_keys: raise KeyError( "Initial guess mapping contains keys %s which are not the" " same as %s in maps." % (p0_keys, map_keys)) for ini_guess in p0.values(): assert len(ini_guess) == n_fit_params elif isinstance(p0, Sequence): assert len(p0) == n_fit_params p0 = {map_name: p0 for map_name in norm_sys_maps.keys()} else: raise TypeError( "Initial guess must be a mapping or a sequence. Found %s." % type(p0)) else: p0 = { map_name: np.ones(n_fit_params) for map_name in norm_sys_maps.keys() } fit_results["p0"] = p0 # # Loop over event types # for map_name, chan_norm_sys_maps in norm_sys_maps.items(): logging.info('Fitting "%s" maps with initial guess %s.', map_name, p0[map_name]) # create a container for fit results for this event type fit_results["hyperplanes"][map_name] = OrderedDict() # initialise data arrays with NaNs fit_results["hyperplanes"][map_name]["fit_params"] = np.full( shape=binning_shape + [n_fit_params], # [bins..., hyperplane params] fill_value=np.nan, ) fit_results["hyperplanes"][map_name]["chi2s"] = np.full( shape=binning_shape + [n_datasets], fill_value=np.nan # [bins..., datasets] ) fit_results["hyperplanes"][map_name]["cov_matrices"] = np.full( shape=binning_shape + [ n_fit_params, n_fit_params, ], # [bins..., hyperplane params, hyperplane params] fill_value=np.nan, ) fit_results["hyperplanes"][map_name]["finite_mask"] = np.full( shape=binning_shape + [n_datasets], fill_value=np.nan # [bins..., datasets] ) # # loop over bins # for idx in np.ndindex(*binning_shape): # get the bin content, including uncertainty and mask indicating if # the bin is finite treat the bin content as y values in the fit, # e.g. y(x0,...,xN) where N is the number of parameters each of # these 1D arrays has one element per input dataset y = np.asarray([m.hist[idx] for m in chan_norm_sys_maps]) y_values = unp.nominal_values(y) y_sigma = unp.std_devs(y) finite_mask = np.isfinite(y_values) & np.isfinite(y_sigma) # empty bins have sigma=0 which causes the hyperplane fit to fail (silently) # replace with sigma=inf (e.g. we know nothing in this bin) empty_bin_mask = np.isclose(y_values, 0.0) if np.any(empty_bin_mask): empty_bin_zero_sigma_mask = empty_bin_mask & np.isclose( y_sigma, 0.0) if np.any(empty_bin_zero_sigma_mask): y_sigma[empty_bin_zero_sigma_mask] = np.inf # check no zero sigma values remaining if np.any(np.isclose(y_sigma, 0.0)): raise ValueError( "Found histogram sigma values that are 0., which is unphysical" ) # # Perform hyperplane fit in this bin # # case 1: uncertainties are available in the bins (ideal case) if np.any(y_sigma[finite_mask]): # fit popt, pcov = curve_fit( hyperplane_fun, sys_param_points_T[:, finite_mask], y_values[finite_mask], sigma=y_sigma[finite_mask], p0=p0[map_name], absolute_sigma=True, # TODO Should we use this? method=fit_method, ) # Calculate chi-square values comparing the input data and the # fit results at each data point (e.g. per dataset, and of # course in each bin) for point_idx in range(n_datasets): # Loop over datasets # Get param values for this dataset point = sys_param_points[point_idx, :] # Predict counts in this bin accoridng to hyperplane for # this dataset predicted = hyperplane_fun(point, *popt) observed = y_values[point_idx] sigma = y_sigma[point_idx] # TODO Is chi2 computation correct? chi2 = ((predicted - observed) / sigma)**2 chi2_idx = tuple(list(idx) + [point_idx]) fit_results["hyperplanes"][map_name]["chi2s"][ chi2_idx] = chi2 else: # if here, no uncertainties are available for this bin # note that cannot calculate chi2 without uncertainties # case 2: there are at least central values in the bins if np.any(y_values[finite_mask]): # without error estimates each point has the same weight # and we cannot get chi-square values (but can still fit) logging.warn( "No uncertainties for any of the normalised counts in bin" ' %s ("%s") found. Fit is performed unweighted and no' " chisquare values will be available.", idx, map_name, ) # fit popt, pcov = curve_fit( hyperplane_fun, sys_param_points_T[:, finite_mask], y_values, p0=p0[map_name], methods=fit_method, ) # case 3: no data in this bin # this is the worst case, where there are no central values or # errors. Most likely this came about because this bin is # empty, which is not necessarily an error. else: # Store NaN for fit params and chi2 popt = np.full_like(p0[map_name], np.NaN) pcov = np.NaN # TODO Shape? # store the results for this bin # note that chi2 is already stored above fit_results["hyperplanes"][map_name]["fit_params"][idx] = popt fit_results["hyperplanes"][map_name]["cov_matrices"][idx] = pcov fit_results["hyperplanes"][map_name]["finite_mask"][ idx] = finite_mask return fit_results
) parser.add_argument( 'infiles', nargs='*', help="input llh files to combine into one output hdf5 file." ) parser.add_argument( '-v', '--verbose', action='count', default=None, help="set verbosity level" ) args = parser.parse_args() set_verbosity(args.verbose) logging.warn("processing " + str(len(args.infiles)) + " files...") logging.warn("Saving to file: %s"%args.outfile) mod_num = len(args.infiles)/20 start_time = datetime.now() minimizer_settings = {} template_settings = {} pseudo_data_settings = {} trials = {} for i,filename in enumerate(args.infiles): if mod_num > 0: if i%mod_num == 0: print " >> %d files done..."%i try: data = from_json(filename)