def setup_function(self): assert isinstance( self.apply_mode, MultiDimBinning ), f"KDE stage needs a binning as `apply_mode`, but is {self.apply_mode}" # For dimensions that are logarithmic, we add a linear binning in # the logarithm. dimensions = [] for dim in self.apply_mode: if dim.is_lin: new_dim = deepcopy(dim) # We don't compute the log of the variable just yet, this # will be done later during `apply_function` using the # representation mechanism. # We replace the logarithmic binning with a linear binning in log-space elif dim.is_irregular: new_dim = OneDimBinning( dim.name, bin_edges=np.log(dim.bin_edges.m), ) else: new_dim = OneDimBinning(dim.name, domain=np.log(dim.domain.m), num_bins=dim.num_bins) dimensions.append(new_dim) self.regularized_apply_mode = MultiDimBinning(dimensions) logging.debug("Using regularized binning:\n" + repr(self.regularized_apply_mode))
def _flatten_to_2d(in_map): assert isinstance(in_map, Map) shape = in_map.shape names = in_map.binning.names dims = len(shape) assert dims % 2 == 0 nbins_a = np.product(shape[:dims // 2]) nbins_b = np.product(shape[dims // 2:]) names_a = reduce(lambda x, y: x + ' ' + y, names[:dims // 2]) names_b = reduce(lambda x, y: x + ' ' + y, names[dims // 2:]) binning = [] binning.append( OneDimBinning(name=names_a, num_bins=nbins_a, is_lin=True, domain=[0, nbins_a])) binning.append( OneDimBinning(name=names_b, num_bins=nbins_b, is_lin=True, domain=[0, nbins_b])) binning = MultiDimBinning(binning) hist = in_map.hist.reshape(nbins_a, nbins_b) return Map(name=in_map.name, hist=hist, binning=binning)
def test_container(): n_evts = 10000 x = np.arange(n_evts, dtype=FTYPE) y = np.arange(n_evts, dtype=FTYPE) w = np.ones(n_evts, dtype=FTYPE) w *= np.random.rand(n_evts) container = Container('test') container.add_array_data('x', x) container.add_array_data('y', y) container.add_array_data('w', w) binning_x = OneDimBinning(name='x', num_bins=10, is_lin=True, domain=[0, 100]) binning_y = OneDimBinning(name='y', num_bins=10, is_lin=True, domain=[0, 100]) binning = MultiDimBinning([binning_x, binning_y]) #print binning.names print(container.get_binned_data('x', binning).get('host')) print(Container.unroll_binning('x', binning).get('host')) # array print('original array') print(container.get_array_data('w').get('host')) container.array_to_binned('w', binning) # binned print('binned') print(container.get_binned_data('w').get('host')) print(container.get_hist('w')) print('augmented again') # augment container.binned_to_array('w') print(container.get_array_data('w').get('host'))
def test_container(): """Unit tests for Container class.""" # NOTE: Right now the numbers are tuned so that the weights are identical # per bin. If you change binning that's likely not the case anymore and you # inevitably end up with averaged values over bins, which are then not # equal to the individual weights anymore when those are not identical per # bin n_evts = 10000 x = np.linspace(0, 100, n_evts, dtype=FTYPE) y = np.linspace(0, 100, n_evts, dtype=FTYPE) w = np.tile(np.arange(100, dtype=FTYPE) + 0.5, (100, 1)).T.ravel() container = Container('test', 'events') container['x'] = x container['y'] = y container['w'] = w binning_x = OneDimBinning(name='x', num_bins=100, is_lin=True, domain=[0, 100]) binning_y = OneDimBinning(name='y', num_bins=100, is_lin=True, domain=[0, 100]) binning = MultiDimBinning([binning_x, binning_y]) logging.trace('Testing container and translation methods') container.representation = binning bx = container['x'] m = np.meshgrid(binning.midpoints[0].m, binning.midpoints[1].m)[1].ravel() assert np.allclose(bx, m, **ALLCLOSE_KW), f'test:\n{bx}\n!= ref:\n{m}' # array repr container.representation = 'events' array_weights = container['w'] assert np.allclose(array_weights, w, **ALLCLOSE_KW), f'test:\n{array_weights}\n!= ref:\n{w}' # binned repr container.representation = binning diag = np.diag(np.arange(100) + 0.5) bd = container['w'] h = container.get_hist('w') assert np.allclose(bd, diag.ravel(), **ALLCLOSE_KW), f'test:\n{bd}\n!= ref:\n{diag.ravel()}' assert np.allclose(h[0], diag, **ALLCLOSE_KW), f'test:\n{h[0]}\n!= ref:\n{diag}' assert h[1] == binning, f'test:\n{h[1]}\n!= ref:\n{binning}' # augment to array repr again container.representation = 'events' a = container['w'] assert np.allclose(a, w, **ALLCLOSE_KW), f'test:\n{a}\n!= ref:\n{w}'
def keep_inbounds(self, binning): """Cut out any events that fall outside `binning`. Note that events that fall exactly on an outer edge are kept. Parameters ---------- binning : OneDimBinning or MultiDimBinning Returns ------- cut_data : EventsPi """ # Get the binning instance try: binning = OneDimBinning(binning) except: # pylint: disable=bare-except pass if isinstance(binning, OneDimBinning): binning = [binning] binning = MultiDimBinning(binning) # Define a cut to remove events outside of the binned region bin_edge_cuts = [dim.inbounds_criteria for dim in binning] bin_edge_cuts = " & ".join([str(x) for x in bin_edge_cuts]) # Apply the cut return self.apply_cut(bin_edge_cuts)
def binned_to_array(self, key, src_representation, dest_representation): """Augmented binned data to array data""" logging.trace('Transforming %s binned to array data' % (key)) self.representation = src_representation weights = self[key] if not src_representation.is_irregular: logging.trace( f"Container `{self.name}`: regularized lookup for {key}") sample = [] dimensions = [] for d in src_representation: if d.is_log: self.representation = "log_events" sample.append(self[d.name]) dimensions.append( OneDimBinning(d.name, domain=np.log(d.domain.m), num_bins=d.num_bins)) else: self.representation = "events" sample.append(self[d.name]) dimensions.append(d) hist_binning = MultiDimBinning(dimensions) else: logging.trace( f"Container `{self.name}`: irregular lookup for {key}") self.representation = dest_representation sample = [self[name] for name in src_representation.names] hist_binning = src_representation return lookup(sample, weights, hist_binning)
def pisa2_map_to_pisa3_map(pisa2_map, ebins_name='ebins', czbins_name='czbins', is_log=True, is_lin=True): expected_keys = ['map', 'ebins', 'czbins'] if sorted(pisa2_map.keys()) != sorted(expected_keys): raise ValueError( 'PISA 2 map should be a dict containining entries: %s' % expected_keys) ebins = OneDimBinning(name=ebins_name, bin_edges=pisa2_map['ebins'] * ureg.GeV, is_log=is_log, tex='E_{\nu}') czbins = OneDimBinning(name=czbins_name, bin_edges=pisa2_map['czbins'], is_lin=is_lin, tex='\cos\theta_Z') bins = MultiDimBinning([ebins, czbins]) return Map(name='pisa2equivalent', hist=pisa2_map['map'], binning=bins)
def test_histogram(): n_evts = 100 x = np.arange(n_evts, dtype=FTYPE) y = np.arange(n_evts, dtype=FTYPE) w = np.ones(n_evts, dtype=FTYPE) #w *= np.random.rand(n_evts) x = SmartArray(x) y = SmartArray(y) w = SmartArray(w) binning_x = OneDimBinning(name='x', num_bins=10, is_lin=True, domain=[0, 100]) binning_y = OneDimBinning(name='y', num_bins=10, is_lin=True, domain=[0, 100]) binning = MultiDimBinning([binning_x, binning_y]) sample = [x, y] weights = w averaged = True histo = histogram(sample, weights, binning, averaged) assert np.array_equal(histo.reshape(10, 10), np.zeros(shape=(10, 10)))
def _flatten_to_1d(in_map): assert isinstance(in_map, Map) bin_name = reduce(add, in_map.binning.names) num_bins = np.product(in_map.shape) binning = MultiDimBinning([ OneDimBinning(name=bin_name, num_bins=num_bins, is_lin=True, domain=[0, num_bins]) ]) hist = in_map.hist.flatten() return Map(name=in_map.name, hist=hist, binning=binning)
def array_to_binned(self, key, src_representation, dest_representation): """Histogram data array into binned data Parameters ---------- key : str src_representation : str dest_representation : MultiDimBinning #averaged : bool # if True, the histogram entries are averages of the numbers that # end up in a given bin. This for example must be used when oscillation # probabilities are translated.....otherwise we end up with probability*count # per bin Notes ----- right now, CPU-only """ # TODO: make work for n-dim logging.trace('Transforming %s array to binned data' % (key)) assert src_representation in self.array_representations assert isinstance(dest_representation, MultiDimBinning) if not dest_representation.is_irregular: sample = [] dimensions = [] for d in dest_representation: if d.is_log: self.representation = "log_events" sample.append(self[d.name]) dimensions.append( OneDimBinning(d.name, domain=np.log(d.domain.m), num_bins=d.num_bins)) else: self.representation = "events" sample.append(self[d.name]) dimensions.append(d) hist_binning = MultiDimBinning(dimensions) else: self.representation = src_representation sample = [self[name] for name in dest_representation.names] hist_binning = dest_representation self.representation = src_representation weights = self[key] hist = histogram(sample, weights, hist_binning, averaged=True) return hist
def test_standard_plots(xsec_file, outdir='./'): from pisa.utils.plotter import Plotter xsec = genie.get_combined_xsec(xsec_file) e_bins = MultiDimBinning( [OneDimBinning(name='true_energy', tex=r'E_\nu', num_bins=150, domain=(1E-1, 1E3)*ureg.GeV, is_log=True)] ) xsec.compute_maps(e_bins) logging.info('Making plots for genie xsec_maps') plot_obj = Plotter(outdir=outdir, stamp='Cross-Section', fmt='png', log=True, size=(12, 8), label=r'Cross-Section ($m^{2}$)') maps = xsec.return_mapset() plot_obj.plot_xsec(maps, ylim=(1E-43, 1E-37))
def test_histogram(): """Unit tests for `histogram` function. Correctness is defined as matching the histogram produced by numpy.histogramdd. """ all_num_bins = [2, 3, 4] n_evts = 10000 rand = np.random.RandomState(seed=0) weights = rand.rand(n_evts).astype(FTYPE) binning = [] sample = [] for num_dims, num_bins in enumerate(all_num_bins, start=1): binning.append( OneDimBinning( name=f'dim{num_dims - 1}', num_bins=num_bins, is_lin=True, domain=[0, num_bins], )) s = rand.rand(n_evts).astype(FTYPE) * num_bins sample.append(s) bin_edges = [b.edge_magnitudes for b in binning] test = histogram(sample, weights, MultiDimBinning(binning), averaged=False) ref, _ = np.histogramdd(sample=sample, bins=bin_edges, weights=weights) ref = ref.astype(FTYPE).ravel() assert recursiveEquality(test, ref), f'\ntest:\n{test}\n\nref:\n{ref}' test_avg = histogram(sample, weights, MultiDimBinning(binning), averaged=True) ref_counts, _ = np.histogramdd(sample=sample, bins=bin_edges, weights=None) ref_counts = ref_counts.astype(FTYPE).ravel() ref_avg = (ref / ref_counts).astype(FTYPE) assert recursiveEquality(test_avg, ref_avg), \ f'\ntest_avg:\n{test_avg}\n\nref_avg:\n{ref_avg}' logging.info('<< PASS : test_histogram >>')
def test_per_e_plot(xsec_file, outdir='./'): from pisa.utils.plotter import Plotter xsec = genie.get_combined_xsec(xsec_file) e_bins = MultiDimBinning( [OneDimBinning(name='true_energy', tex=r'E_\nu', num_bins=200, domain=(1E-1, 1E3)*ureg.GeV, is_log=True)] ) xsec.compute_maps(e_bins) xsec.scale_maps(1/e_bins.true_energy.bin_widths.magnitude) logging.info('Making plots for genie xsec_maps per energy') plot_obj = Plotter(outdir=outdir, stamp='Cross-Section / Energy', fmt='png', log=False, size=(12, 8), label=r'Cross-Section / Energy ($m^{2} GeV^{-1}$)') maps = xsec.return_mapset() plot_obj.plot_xsec(maps, ylim=(3.5E-41, 3E-40))
def keepInbounds(self, binning): """Cut out any events that fall outside `binning`. Note that events that fall exactly on an outer edge are kept. Parameters ---------- binning : OneDimBinning or MultiDimBinning Returns ------- remaining_events : Events """ try: binning = OneDimBinning(binning) except Exception: pass if isinstance(binning, OneDimBinning): binning = [binning] binning = MultiDimBinning(binning) current_cuts = self.metadata['cuts'] new_cuts = [dim.inbounds_criteria for dim in binning] unapplied_cuts = [c for c in new_cuts if c not in current_cuts] if not unapplied_cuts: logging.debug( "All inbounds criteria '%s' have already been" " applied. Returning events unmodified.", new_cuts) return self all_cuts = deepcopy(current_cuts) + unapplied_cuts # Create a single cut from all unapplied cuts keep_criteria = ' & '.join(['(%s)' % c for c in unapplied_cuts]) # Do the cutting remaining_events = self.applyCut(keep_criteria=keep_criteria) # Replace the combined 'cuts' string with individual cut strings remaining_events.metadata['cuts'] = all_cuts return remaining_events
def test_lookup_indices(): """Unit tests for `lookup_indices` function""" # # Test a variety of points. # Points falling exactly on the bound are included in the # n_evts = 100 x = np.array([-5, 0.5, 1.5, 7.0, 6.5, 8.0, 6.5], dtype=FTYPE) y = np.array([-5, 0.5, 1.5, 1.5, 3.0, 1.5, 2.5], dtype=FTYPE) z = np.array([-5, 0.5, 1.5, 1.5, 0.5, 6.0, 0.5], dtype=FTYPE) w = np.ones(n_evts, dtype=FTYPE) x = SmartArray(x) y = SmartArray(y) z = SmartArray(z) w = SmartArray(w) binning_x = OneDimBinning(name="x", num_bins=7, is_lin=True, domain=[0, 7]) binning_y = OneDimBinning(name="y", num_bins=4, is_lin=True, domain=[0, 4]) binning_z = OneDimBinning(name="z", num_bins=2, is_lin=True, domain=[0, 2]) binning_1d = binning_x binning_2d = binning_x * binning_y binning_3d = binning_x * binning_y * binning_z # 1D case: check that each event falls into its predicted bin # # All values higher or equal to the last bin edges are assigned an index of zero # logging.trace("TEST 1D:") logging.trace("Total number of bins: {}".format(7)) logging.trace("array in 1D: {}".format(x.get())) logging.trace("Binning: {}".format(binning_1d.bin_edges[0])) indices = lookup_indices([x], binning_1d) logging.trace("indices of each array element: {}".format(indices.get())) logging.trace("*********************************") test = indices.get() ref = np.array([-1, 0, 1, 6, 6, 7, 6]) assert np.array_equal(test, ref), "test={} != ref={}".format(test, ref) # 2D case: # # The binning edges are flattened as follows: # [(x=0, y=0), (x=0, y=1), (x=1, y=0), ...] # logging.trace("TEST 2D:") logging.trace("Total number of bins: {}".format(7 * 4)) logging.trace("array in 2D: {}".format(list(zip(x.get(), y.get())))) logging.trace("Binning: {}".format(binning_2d.bin_edges)) indices = lookup_indices([x, y], binning_2d) logging.trace("indices of each array element: {}".format(indices.get())) logging.trace("*********************************") test = indices.get() ref = np.array([-1, 0, 5, 25, 27, 28, 26]) assert np.array_equal(test, ref), "test={} != ref={}".format(test, ref) # 3D case: # # the binning edges are flattened as follows: # [(x=0, y=0, z=0), (x=0, y=0, z=1), (x=0, y=1, z=0)...] # logging.trace("TEST 3D:") logging.trace("Total number of bins: {}".format(7 * 4 * 2)) logging.trace("array in 3D: {}".format(list(zip(x.get(), y.get(), z.get())))) logging.trace("Binning: {}".format(binning_3d.bin_edges)) indices = lookup_indices([x, y, z], binning_3d) logging.trace("indices of each array element: {}".format(indices.get())) logging.trace("*********************************") test = indices.get() ref = np.array([-1, 0, 11, 51, 54, 56, 52]) assert np.array_equal(test, ref), "test={} != ref={}".format(test, ref) logging.info("<< PASS : test_lookup_indices >>")
def parse_pipeline_config(config): """Parse pipeline config. Parameters ---------- config : string or ConfigParser Returns ------- stage_dicts : OrderedDict Keys are (stage_name, service_name) tuples and values are OrderedDicts with keys the argnames and values the arguments' values. Some known arg values are parsed out fully into Python objects, while the rest remain as strings that must be used or parsed elsewhere. """ # Note: imports placed here to avoid circular imports from pisa.core.binning import MultiDimBinning, OneDimBinning from pisa.core.param import ParamSelector if isinstance(config, basestring): config = from_file(config) elif isinstance(config, PISAConfigParser): pass else: raise TypeError( '`config` must either be a string or PISAConfigParser. Got %s ' 'instead.' % type(config)) if not config.has_section('binning'): raise NoSectionError( "Could not find 'binning'. Only found sections: %s" % config.sections()) # Create binning objects binning_dict = {} for name, value in config['binning'].items(): if name.endswith('.order'): order = split(config.get('binning', name)) binning, _ = split(name, sep='.') bins = [] for bin_name in order: try: def_raw = config.get('binning', binning + '.' + bin_name) except: dims_defined = [ split(dim, sep='.')[1] for dim in config['binning'].keys() if dim.startswith(binning + '.') and not dim.endswith('.order') ] logging.error( "Failed to find definition of '%s' dimension of '%s'" " binning entry. Only found definition(s) of: %s", bin_name, binning, dims_defined) del dims_defined raise try: kwargs = eval(def_raw) # pylint: disable=eval-used except: logging.error( "Failed to evaluate definition of '%s' dimension of" " '%s' binning entry:\n'%s'", bin_name, binning, def_raw) raise try: bins.append(OneDimBinning(bin_name, **kwargs)) except: logging.error( "Failed to instantiate new `OneDimBinning` from '%s'" " dimension of '%s' binning entry with definition:\n" "'%s'\n", bin_name, binning, kwargs) raise binning_dict[binning] = MultiDimBinning(bins) # Pipeline section section = 'pipeline' # Get and parse the order of the stages (and which services implement them) order = [split(x, STAGE_SEP) for x in split(config.get(section, 'order'))] param_selections = [] if config.has_option(section, 'param_selections'): param_selections = split(config.get(section, 'param_selections')) detector_name = None if config.has_option(section, 'detector_name'): detector_name = config.get(section, 'detector_name') # Parse [stage.<stage_name>] sections and store to stage_dicts stage_dicts = OrderedDict() for stage, service in order: old_section_header = 'stage%s%s' % (STAGE_SEP, stage) new_section_header = '%s%s%s' % (stage, STAGE_SEP, service) if config.has_section(old_section_header): logging.warning( '"%s" is an old-style section header, in the future use "%s"' % (old_section_header, new_section_header)) section = old_section_header elif config.has_section(new_section_header): section = new_section_header else: raise IOError( 'missing section in cfg for stage "%s" service "%s"' % (stage, service)) # Instantiate dict to store args to pass to this stage service_kwargs = OrderedDict() param_selector = ParamSelector(selections=param_selections) service_kwargs['params'] = param_selector n_params = 0 for fullname in config.options(section): try: value = config.get(section, fullname) except: logging.error( 'Unable to obtain value of option "%s" in section "%s".' % (fullname, section)) raise # See if this matches a param specification param_match = PARAM_RE.match(fullname) if param_match is not None: n_params += 1 param_match_dict = param_match.groupdict() param_subfields = param_match_dict['subfields'].split('.') # Figure out what the dotted fields represent... infodict = interpret_param_subfields(subfields=param_subfields) # If field is an attr, skip since these are located manually if infodict['attr'] is not None: continue # Check if this param already exists in a previous stage; if # so, make sure there are no specs for this param, but just a # link to previous the param object that is already # instantiated. for kw in stage_dicts.values(): # Stage did not get a `params` argument from config if not kw.has_key('params'): continue # Retrieve the param from the ParamSelector try: param = kw['params'].get(name=infodict['pname'], selector=infodict['selector']) except KeyError: continue # Make sure there are no other specs (in this section) for # the param defined defined in previous section for a in PARAM_ATTRS: if config.has_option(section, '%s.%s' % (fullname, a)): raise ValueError("Parameter spec. '%s' of '%s' " "found in section '%s', but " "parameter exists in previous " "stage!" % (a, fullname, section)) break # Param *not* found in a previous stage (i.e., no explicit # `break` encountered in `for` loop above); therefore must # instantiate it. else: param = parse_param(config=config, section=section, selector=infodict['selector'], fullname=fullname, pname=infodict['pname'], value=value) param_selector.update(param, selector=infodict['selector']) # If it's not a param spec but contains 'binning', assume it's a # binning spec for CAKE stages elif 'binning' in fullname: service_kwargs[fullname] = binning_dict[value] # it's gonna be a PI stage elif '_specs' in fullname: value = parse_string_literal(value) # is it None? if value is None: service_kwargs[fullname] = value # is it evts? elif value in ['evnts', 'events']: service_kwargs[fullname] = 'events' # so it gotta be a binning else: service_kwargs[fullname] = binning_dict[value] # it's a list on in/output names list elif fullname.endswith('_names'): value = split(value) service_kwargs[fullname] = value # Otherwise it's some other stage instantiation argument; identify # this by its full name and try to interpret and instantiate a # Python object using the string else: try: value = parse_quantity(value) value = value.nominal_value * value.units except ValueError: value = parse_string_literal(value) service_kwargs[fullname] = value # If no params actually specified in config, remove 'params' from the # service's keyword args if n_params == 0: service_kwargs.pop('params') # Store the service's kwargs to the stage_dicts stage_dicts[(stage, service)] = service_kwargs stage_dicts['detector_name'] = detector_name return stage_dicts
def smooth(self, xform, errors, e_binning, cz_binning): """Smooth a 2d array with energy as the first dimension and CZ as the second First performing a gaussian smoothing to get rid of zeros then spline smooth Parameters ---------- xform : 2d-array errors : 2d-array e_binning : OneDimBinning cz_binning : OneDimBinning """ sumw2 = np.square(errors) # First, let's check if we have entire coszen values with zero, this is for example # the case for all nutau CC below 3.5 GeV non_zero_idx = 0 while np.sum(xform[non_zero_idx, :]) == 0: non_zero_idx += 1 # cut away these all-zero regions for now truncated_xform = xform[non_zero_idx:, :] truncated_sumw2 = sumw2[non_zero_idx:, :] truncated_e_binning = e_binning[non_zero_idx:] # Now lets extend that array at both energy ends # by about 10% of bins num_extension_bins_lower = int(np.floor(0.1 * len(e_binning))) num_extension_bins_upper = 0 assert e_binning.is_lin or e_binning.is_log, 'Do not know how to extend arbitrary binning' # what will new bin edges be? bin_edges = truncated_e_binning.bin_edges.m if e_binning.is_log: bin_edges = np.log10(bin_edges) bin_edges = list(bin_edges) lower_edges = [] upper_edges = [] delta = bin_edges[1] - bin_edges[0] for i in range(num_extension_bins_lower): lower_edges.append(bin_edges[0] - (i + 1) * delta) for i in range(num_extension_bins_upper): upper_edges.append(bin_edges[-1] + (i + 1) * delta) new_edges = np.array(lower_edges[::-1] + bin_edges + upper_edges) if e_binning.is_log: new_edges = np.power(10, new_edges) extended_binning = OneDimBinning('true_energy', bin_edges=new_edges, is_lin=e_binning.is_lin, is_log=e_binning.is_log) # also extend that arrays # We do that by point-reflecting the values # so an array like [0 1 2 3 4 ...] will become [-3 -2 -1 0 1 2 3 4 ...] #if non_zero_idx == 0: lower_bit = 2 * truncated_xform[0, :] - np.flipud( truncated_xform[1:num_extension_bins_lower + 1, :]) #else: # lower_bit = - np.flipud(truncated_xform[1:num_extension_bins+1,:]) upper_bit = 2 * truncated_xform[-1, :] - np.flipud( truncated_xform[-num_extension_bins_upper - 1:-1, :]) extended_xform = np.concatenate( (lower_bit, truncated_xform, upper_bit)) # also handle the errors (which simply add up in quadrature) #if non_zero_idx == 0: lower_bit = truncated_sumw2[0, :] + np.flipud( truncated_sumw2[1:num_extension_bins_lower + 1, :]) #else: # lower_bit = np.flipud(truncated_sumw2[1:num_extension_bins+1,:]) upper_bit = truncated_sumw2[-1, :] + np.flipud( truncated_sumw2[-num_extension_bins_upper - 1:-1, :]) extended_sumw2 = np.concatenate( (lower_bit, truncated_sumw2, upper_bit)) # what's the stat. situation here? rel_error = np.square(errors) / xform rel_error = np.median(rel_error[xform != 0]) logging.debug('Relative errors are ~ %.5f', rel_error) # how many zero bins? zero_fraction = np.count_nonzero(truncated_xform == 0) / float( truncated_xform.size) logging.debug('zero fraction is %.3f', zero_fraction) # now use gaussian smoothing on those # some black magic sigma values sigma_e = xform.shape[0] * np.sqrt(zero_fraction) * 0.02 sigma_cz = xform.shape[1] * np.sqrt(zero_fraction) * 0.05 sigma1 = (0, sigma_cz) sigma2 = (sigma_e, 0) smooth_extended_xform = ndimage.filters.gaussian_filter(extended_xform, sigma1, mode='reflect') smooth_extended_sumw2 = ndimage.filters.gaussian_filter(extended_sumw2, sigma1, mode='reflect') smooth_extended_xform = ndimage.filters.gaussian_filter( smooth_extended_xform, sigma2, mode='nearest') smooth_extended_sumw2 = ndimage.filters.gaussian_filter( smooth_extended_sumw2, sigma2, mode='nearest') smooth_extended_errors = np.sqrt(smooth_extended_sumw2) # for low stats, smooth less....otherwise leave factor alone smooth_corr_factor = max(1., (rel_error * 10000)) # now spline smooth new_xform, _ = spline_smooth( array=smooth_extended_xform, spline_binning=extended_binning, eval_binning=e_binning, axis=0, smooth_factor=self.params.aeff_e_smooth_factor.value / smooth_corr_factor, k=3, errors=smooth_extended_errors) final_xform, _ = spline_smooth( array=new_xform, spline_binning=cz_binning, eval_binning=cz_binning, axis=1, smooth_factor=self.params.aeff_cz_smooth_factor.value / smooth_corr_factor, k=3, errors=None) # the final array has the right shape again, because we evaluated the splines only # on the real binning # don't forget to zero out the zero bins again final_xform[:non_zero_idx, :] *= 0 # clip unphysical (negative) values return final_xform.clip(0)
def _calculate_fit_coeffs(data, params, fit_binning, nu_params=None, mu_params=None): """ Calculate the fit coefficients for each systematic, flavint, bin for a polynomial. """ logging.debug('Calculating fit coefficients') config = from_file(params['discr_sys_sample_config'].value) degree = int(params['poly_degree'].value) force_through_nominal = params['force_through_nominal'].value if force_through_nominal: def fit_func(vals, *poly_coeffs): return np.polynomial.polynomial.polyval( vals, [1.] + list(poly_coeffs)) else: def fit_func(vals, *poly_coeffs): return np.polynomial.polynomial.polyval( vals, list(poly_coeffs)) # add free param for constant term degree += 1 template_maker = Pipeline(params['pipeline_config'].value) dataset_param = template_maker.params['dataset'] def parse(string): return string.replace(' ', '').split(',') sys_fit_coeffs = OrderedDict() if nu_params is not None: sys_list = parse(config.get('neutrinos', 'sys_list')) nu_params = deepcopy(map(lambda x: x[3:], nu_params)) if set(nu_params) != set(sys_list): raise AssertionError( 'Systematics list listed in the sample config file does ' 'not match the params in the pipeline config file\n {0} ' '!= {1}'.format(set(nu_params), set(sys_list))) for sys in sys_list: ev_sys = 'neutrinos|' + sys runs = parse(config.get(ev_sys, 'runs')[1:-1]) nominal = config.get(ev_sys, 'nominal') mapset_dict = OrderedDict() flavint_groups = None for run in runs: logging.info('Loading run {0} of systematic ' '{1}'.format(run, sys)) dataset_param.value = ev_sys + '|' + run template_maker.update_params(dataset_param) template = template_maker.get_outputs( idx=int(params['stop_after_stage'].m)) if not isinstance(template, Data): raise AssertionError( 'Template output is not a Data object, instead is ' 'type {0}'.format(type(template))) if flavint_groups is None: flavint_groups = template.flavint_groups else: if set(flavint_groups) != set(template.flavint_groups): raise AssertionError( 'Mismatch of flavint_groups - ({0}) does not ' 'match flavint_groups ' '({1})'.format(flavint_groups, template.flavint_groups)) outputs = [] for fig in template.keys(): outputs.append( template.histogram(kinds=fig, binning=fit_binning, weights_col='pisa_weight', errors=False, name=str(NuFlavIntGroup(fig)))) mapset_dict[run] = MapSet(outputs, name=run) nom_mapset = mapset_dict[nominal] fracdiff_mapset_dict = OrderedDict() for run in runs: mapset = [] for flavintg_map in mapset_dict[run]: # TODO(shivesh): error propagation? flavintg = flavintg_map.name mask = ~(nom_mapset[flavintg].hist == 0.) div = np.zeros(flavintg_map.shape) with np.errstate(divide='ignore', invalid='ignore'): div[mask] = \ unp.nominal_values(flavintg_map.hist[mask]) /\ unp.nominal_values(nom_mapset[flavintg].hist[mask]) mapset.append( Map(name=flavintg, binning=flavintg_map.binning, hist=div)) fracdiff_mapset_dict[run] = MapSet(mapset) delta_runs = np.array([float(x) for x in runs]) - float(nominal) coeff_binning = OneDimBinning(name='coeff', num_bins=degree, is_lin=True, domain=[-1, 1]) combined_binning = fit_binning + coeff_binning params_mapset = [] for fig in template.keys(): # TODO(shivesh): Fix numpy warning on this line pvals_hist = np.empty(map(int, combined_binning.shape), dtype=object) hists = [ fracdiff_mapset_dict[run][fig].hist for run in runs ] zip_hists = np.dstack(hists) for idx in np.ndindex(fit_binning.shape): y_values = [] y_sigma = [] for run in fracdiff_mapset_dict: y_values.append( unp.nominal_values( fracdiff_mapset_dict[run][fig].hist[idx])) y_sigma.append( unp.std_devs( fracdiff_mapset_dict[run][fig].hist[idx])) if np.any(y_sigma): popt, pcov = curve_fit(fit_func, delta_runs, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_func, delta_runs, y_values, p0=np.ones(degree)) # perr = np.sqrt(np.diag(pcov)) # pvals = unp.uarray(popt, perr) pvals_hist[idx] = popt pvals_hist = np.array(pvals_hist.tolist()) params_mapset.append( Map(name=fig, binning=combined_binning, hist=pvals_hist)) params_mapset = MapSet(params_mapset, name=sys) if sys in sys_fit_coeffs: sys_fit_coeffs[sys] = MapSet( [sys_fit_coeffs[sys], params_mapset]) else: sys_fit_coeffs[sys] = params_mapset if mu_params is not None: sys_list = parse(config.get('muons', 'sys_list')) mu_params = deepcopy(map(lambda x: x[3:], mu_params)) if set(mu_params) != set(sys_list): raise AssertionError( 'Systematics list listed in the sample config file does ' 'not match the params in the pipeline config file\n {0} ' '!= {1}'.format(set(mu_params), set(sys_list))) for sys in sys_list: ev_sys = 'muons|' + sys runs = parse(config.get(ev_sys, 'runs')[1:-1]) nominal = config.get(ev_sys, 'nominal') map_dict = OrderedDict() flavint_groups = None for run in runs: logging.info('Loading run {0} of systematic ' '{1}'.format(run, sys)) dataset_param.value = ev_sys + '|' + run template_maker.update_params(dataset_param) template = template_maker.get_outputs( idx=int(params['stop_after_stage'].m)) if not isinstance(template, Data): raise AssertionError( 'Template output is not a Data object, instead is ' 'type {0}'.format(type(template))) if not template.contains_muons: raise AssertionError( 'Template output does not contain muons') output = template.histogram( kinds='muons', binning=fit_binning, # NOTE: weights cancel in fraction weights_col=None, errors=False, name='muons') map_dict[run] = output nom_map = map_dict[nominal] fracdiff_map_dict = OrderedDict() for run in runs: mask = ~(nom_map.hist == 0.) div = np.zeros(nom_map.shape) with np.errstate(divide='ignore', invalid='ignore'): div[mask] = \ unp.nominal_values(map_dict[run].hist[mask]) /\ unp.nominal_values(nom_map.hist[mask]) fracdiff_map_dict[run] = Map(name='muons', binning=nom_map.binning, hist=div) delta_runs = np.array([float(x) for x in runs]) - float(nominal) coeff_binning = OneDimBinning(name='coeff', num_bins=degree, is_lin=True, domain=[-1, 1]) combined_binning = fit_binning + coeff_binning pvals_hist = np.empty(map(int, combined_binning.shape), dtype=object) hists = [fracdiff_map_dict[run].hist for run in runs] zip_hists = np.dstack(hists) for idx in np.ndindex(fit_binning.shape): y_values = [] y_sigma = [] for run in fracdiff_mapset_dict: y_values.append( unp.nominal_values( fracdiff_mapset_dict[run][fig].hist[idx])) y_sigma.append( unp.std_devs( fracdiff_mapset_dict[run][fig].hist[idx])) if np.any(y_sigma): popt, pcov = curve_fit(fit_func, delta_runs, y_values, sigma=y_sigma, p0=np.ones(degree)) else: popt, pcov = curve_fit(fit_func, delta_runs, y_values, p0=np.ones(degree)) # perr = np.sqrt(np.diag(pcov)) # pvals = unp.uarray(popt, perr) pvals_hist[idx] = popt pvals_hist = np.array(pvals_hist.tolist()) params_map = Map(name='muons', binning=combined_binning, hist=pvals_hist) if sys in sys_fit_coeffs: sys_fit_coeffs[sys] = MapSet( [sys_fit_coeffs[sys], params_map]) else: sys_fit_coeffs[sys] = params_map return sys_fit_coeffs
# ================================================================ # # Parameters of the data # toymc_params = ToyMCllhParam() # Number of data points to bin toymc_params.n_data = args.ndata # fraction of those points that will constitute the signal toymc_params.signal_fraction = args.signal_fraction toymc_params.mu = TRUE_MU # True mean of the signal toymc_params.sigma = TRUE_SIGMA # True width of the signal toymc_params.nbackground_low = 0. # lowest value the background can take toymc_params.nbackground_high = 40. # highest value the background can take toymc_params.binning = OneDimBinning( name='stuff', bin_edges=np.linspace(toymc_params.nbackground_low, toymc_params.nbackground_high, NBINS)) # Statistical factor for the MC toymc_params.stats_factor = args.stats_factor toymc_params.infinite_stats = 10000 metrics_to_test = [ 'llh', 'mcllh_eff', 'mod_chi2', 'mcllh_mean', 'generalized_poisson_llh' ] results = collections.OrderedDict() results['toymc_params'] = toymc_params for metric in metrics_to_test: results[metric] = collections.OrderedDict() # ==============================================================
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" PARAM_DEFAULTS = {"prior": None, "range": None, "is_fixed": True} """Defaults for stage parameters.""" TEST_BINNING = MultiDimBinning( [ OneDimBinning( name="true_energy", is_log=True, num_bins=15, domain=[10, 100] * ureg.GeV, tex=r"E_{\rm true}", ), OneDimBinning( name="true_coszen", is_log=False, num_bins=16, domain=[-1, 0] * ureg.dimensionless, tex=r"\cos(\theta_z)", ), ] ) """Binning to use for test pipeline."""
def test_Events(): """Unit tests for Events class""" from pisa.utils.flavInt import NuFlavInt # Instantiate empty object events = Events() # Instantiate from PISA events HDF5 file events = Events( 'events/events__vlvnt__toy_1_to_80GeV_spidx1.0_cz-1_to_1_1e2evts_set0__unjoined__with_fluxes_honda-2015-spl-solmin-aa.hdf5' ) # Apply a simple cut events = events.applyCut('(true_coszen <= 0.5) & (true_energy <= 70)') for fi in events.flavints: assert np.max(events[fi]['true_coszen']) <= 0.5 assert np.max(events[fi]['true_energy']) <= 70 # Apply an "inbounds" cut via a OneDimBinning true_e_binning = OneDimBinning(name='true_energy', num_bins=80, is_log=True, domain=[10, 60] * ureg.GeV) events = events.keepInbounds(true_e_binning) for fi in events.flavints: assert np.min(events[fi]['true_energy']) >= 10 assert np.max(events[fi]['true_energy']) <= 60 # Apply an "inbounds" cut via a MultiDimBinning true_e_binning = OneDimBinning(name='true_energy', num_bins=80, is_log=True, domain=[20, 50] * ureg.GeV) true_cz_binning = OneDimBinning(name='true_coszen', num_bins=40, is_lin=True, domain=[-0.8, 0]) mdb = MultiDimBinning([true_e_binning, true_cz_binning]) events = events.keepInbounds(mdb) for fi in events.flavints: assert np.min(events[fi]['true_energy']) >= 20 assert np.max(events[fi]['true_energy']) <= 50 assert np.min(events[fi]['true_coszen']) >= -0.8 assert np.max(events[fi]['true_coszen']) <= 0 # Now try to apply a cut that fails on one flav/int (since the field will # be missing) and make sure that the cut did not get applied anywhere in # the end (i.e., it is rolled back) sub_evts = events['nutaunc'] sub_evts.pop('true_energy') events['nutaunc'] = sub_evts try: events = events.applyCut('(true_energy >= 30) & (true_energy <= 40)') except Exception: pass else: raise Exception('Should not have been able to apply the cut!') for fi in events.flavints: if fi == NuFlavInt('nutaunc'): continue assert np.min(events[fi]['true_energy']) < 30 logging.info( '<< PASS : test_Events >> (note:' ' "[ ERROR] Events object is in an inconsistent state. Reverting cut' ' for all flavInts." message above **is expected**.)')
def test_kde_histogramdd(): """Unit tests for kde_histogramdd""" from argparse import ArgumentParser from shutil import rmtree from tempfile import mkdtemp from pisa import ureg from pisa.core.map import Map, MapSet from pisa.utils.log import logging, set_verbosity from pisa.utils.plotter import Plotter parser = ArgumentParser() parser.add_argument("-v", action="count", default=None, help="set verbosity level") args = parser.parse_args() set_verbosity(args.v) temp_dir = mkdtemp() try: my_plotter = Plotter( stamp="", outdir=temp_dir, fmt="pdf", log=False, annotate=False, symmetric=False, ratio=True, ) b1 = OneDimBinning(name="coszen", num_bins=20, is_lin=True, domain=[-1, 1], tex=r"\cos(\theta)") b2 = OneDimBinning(name="energy", num_bins=10, is_log=True, domain=[1, 80] * ureg.GeV, tex=r"E") b3 = OneDimBinning(name="pid", num_bins=2, bin_edges=[0, 1, 2], tex=r"pid") binning = b1 * b2 * b3 # now let's generate some toy data N = 100000 cz = np.random.normal(1, 1.2, N) # cut away coszen outside -1, 1 cz = cz[(cz >= -1) & (cz <= 1)] e = np.random.normal(30, 20, len(cz)) pid = np.random.uniform(0, 2, len(cz)) data = np.array([cz, e, pid]).T # make numpy histogram for validation bins = [unp.nominal_values(b.bin_edges) for b in binning] raw_hist, _ = np.histogramdd(data, bins=bins) # get KDE'ed histo hist = kde_histogramdd( data, binning, bw_method="silverman", coszen_name="coszen", oversample=10, use_cuda=True, stack_pid=True, ) # put into mapsets and plot m1 = Map(name="KDE", hist=hist, binning=binning) m2 = Map(name="raw", hist=raw_hist, binning=binning) with np.errstate(divide="ignore", invalid="ignore"): m3 = m2 / m1 m3.name = "hist/KDE" m3.tex = m3.name m4 = m1 - m2 m4.name = "KDE - hist" m4.tex = m4.name ms = MapSet([m1, m2, m3, m4]) my_plotter.plot_2d_array(ms, fname="test_kde", cmap="summer") except: rmtree(temp_dir) raise else: logging.warning("Inspect and manually clean up output(s) saved to %s" % temp_dir)
def test_survival_prob(): """Run forward simulation over a few bins""" t_start = time() from pisa.core.binning import OneDimBinning absorption_length = 1 # m n_photons = int(1e6) speed_of_light = SPEED_OF_LIGHT_IN_VACUUM #/ 1.5 # m/s dom_radius = DOM_RADIUS dom_efficiency = 1 seed = 0 r_binning = OneDimBinning(name='r', domain=(0, 25), is_lin=True, num_bins=25, units='m') phi_binning = OneDimBinning(name='phi', tex=r'\phi', domain=(0, 2 * np.pi), is_lin=True, num_bins=32, units='rad') t_binning = OneDimBinning(name='time', domain=(0, 100), is_lin=True, num_bins=10, units='ns') phidir_binning = OneDimBinning(name='phidir', tex=r'\phi_{\rm dir}', domain=(0, 2 * np.pi), is_lin=True, num_bins=96, units='rad') binning = r_binning * phi_binning * t_binning * phidir_binning for r_idx in range(binning.r.num_bins): # Pick a bin that will have non-zero entries out_binning, _ = pick_nonzero_bin(binning=binning, r_idx=r_idx, phi_idx=0, dom_radius=dom_radius, speed_of_light=speed_of_light) #print('indices of bin0 in table:', indices0) #print('bin0:\n{}'.format(bin0)) #indices0 = np.array(indices0) #r0_idx, phi0_idx, t0_idx, phidir0_idx = indices0 # Get probs for nominal r and t bins +/- 2 bins away #r_slice = slice(r0_idx - 2, r0_idx + 3) #t_slice = slice(t0_idx - 2, t0_idx + 3) #r_slice = slice(None) #t_slice = slice(None) #out_binning = binning[r_slice, phi0_idx, t_slice, phidir0_idx + 2] survival_probs = out_binning.empty(name='survival_prob') geom_norms = out_binning.empty(name='geom_norm') for bin_flat_idx, this_bin in enumerate(out_binning.iterbins()): prob, num_hits = forward_survival_prob( absorption_length=absorption_length, binning=this_bin, n_photons=n_photons, dom_efficiency=dom_efficiency, dom_radius=dom_radius, speed_of_light=speed_of_light, seed=seed) #print('') survival_probs[out_binning.index2coord(bin_flat_idx)] = prob geom_norms[out_binning.index2coord( bin_flat_idx)] = num_hits / n_photons print('Total time to run:', time() - t_start) return survival_probs, geom_norms, binning
def setup_function(self): assert isinstance( self.apply_mode, MultiDimBinning ), "Hist stage needs a binning as `apply_mode`, but is %s" % self.apply_mode if isinstance(self.calc_mode, MultiDimBinning): # The two binning must be exclusive assert len(set(self.calc_mode.names) & set(self.apply_mode.names)) == 0 transform_binning = self.calc_mode + self.apply_mode # go to "events" mode to create the transforms for container in self.data: self.data.representation = "events" sample = [container[name] for name in transform_binning.names] hist = histogram(sample, None, transform_binning, averaged=False) transform = hist.reshape(self.calc_mode.shape + (-1, )) self.data.representation = self.calc_mode container['hist_transform'] = transform elif self.calc_mode == "events": # For dimensions where the binning is irregular, we pre-compute the # index that each sample falls into and then bin regularly in the index. # For dimensions that are logarithmic, we add a linear binning in # the logarithm. dimensions = [] for dim in self.apply_mode: if dim.is_irregular: # create a new axis with digitized variable varname = dim.name + "__" + self.apply_mode.name + "_idx" new_dim = OneDimBinning(varname, domain=[0, dim.num_bins], num_bins=dim.num_bins) dimensions.append(new_dim) for container in self.data: container.representation = "events" x = container[dim.name] * dim.units # Compute the bin index each sample would fall into, and # shift by -1 such that samples below the binning range # get assigned the index -1. x_idx = np.searchsorted(dim.bin_edges, x, side="right") - 1 # To be consistent with numpy histogramming, we need to # shift those values that are exactly at the uppermost edge # down one index such that they are included in the highest # bin instead of being treated as an outlier. on_edge = (x == dim.bin_edges[-1]) x_idx[on_edge] -= 1 container[varname] = x_idx elif dim.is_log: # We don't compute the log of the variable just yet, this # will be done later during `apply_function` using the # representation mechanism. new_dim = OneDimBinning(dim.name, domain=np.log(dim.domain.m), num_bins=dim.num_bins) dimensions.append(new_dim) else: dimensions.append(dim) self.regularized_apply_mode = MultiDimBinning(dimensions) logging.debug("Using regularized binning:\n" + str(self.regularized_apply_mode)) else: raise ValueError(f"unknown calc mode: {self.calc_mode}")
def test_nusquids_osc(): """Test nuSQuIDS functions.""" from pisa.core.binning import OneDimBinning # define binning for nuSQuIDS nodes (where master eqn. is solved) en_calc_binning = OneDimBinning( name='true_energy', bin_edges=np.logspace(0.99, 2.01, 40)*ureg.GeV, ) cz_calc_binning = OneDimBinning( name='true_coszen', domain=[-1, 1]*ureg.dimensionless, is_lin=True, num_bins=21 ) # make 2D binning binning_2d_calc = en_calc_binning*cz_calc_binning # check it has necessary entries validate_calc_grid(binning_2d_calc) # pad the grid to make sure we can later on evaluate osc. probs. # *anywhere* in between of the outermost bin edges en_calc_grid, cz_calc_grid = compute_binning_constants(binning_2d_calc) # set up initial states, get the nuSQuIDS "propagator" instances ini_states, props = init_nusquids_prop( cz_nodes=cz_calc_grid, en_nodes=en_calc_grid, nu_flav_no=3, rel_err=1.0e-5, abs_err=1.0e-5, progress_bar=True ) # make an Earth model YeI, YeM, YeO = 0.4656, 0.4957, 0.4656 earth_atm = earth_model(YeI=YeI, YeM=YeM, YeO=YeO) # define some oscillation parameter values osc_params = OscParams() osc_params.theta23 = np.deg2rad(48.7) osc_params.theta12 = np.deg2rad(33.63) osc_params.theta13 = np.deg2rad(8.52) osc_params.theta14 = np.deg2rad(0.0) osc_params.dm21 = 7.40e-5 osc_params.dm31 = 2.515e-3 osc_params.dm41 = 0. osc_params.eps_ee = 0. osc_params.eps_emu = 0. osc_params.eps_etau = 0. osc_params.eps_mumu = 0. osc_params.eps_mutau = 0.005 osc_params.eps_tautau = 0. # evolve the states starting from initial ones evolve_states( cz_shape=cz_calc_grid.shape[0], propagators=props, ini_states=ini_states, nsq_earth_atm=earth_atm, osc_params=osc_params ) # define some points where osc. probs. are to be # evaluated en_eval = np.logspace(1, 2, 500) * NSQ_CONST.GeV cz_eval = np.linspace(-0.95, 0.95, 500) # look them up for appearing tau neutrinos nuflav = 'nutau' # collect the transition probabilities from # muon and electron neutrinos prob_e, prob_mu = osc_probs( # pylint: disable=unused-variable nuflav=nuflav, propagators=props, true_energies=en_eval, true_coszens=cz_eval, )