def __init_spline(self, knots, coeffs, deg, units=None): if not isinstance(knots, ureg.Quantity): if units is None: knots = knots * ureg.dimensionless else: knots = ureg.Quantity(np.asarray(knots), units) self._state_attrs.extend(['knots', 'coeffs', 'deg', 'units']) self.kind = 'spline' if isinstance(knots, ureg.Quantity): self.units = str(knots.units) self.knots = knots self.coeffs = coeffs self.deg = deg def llh(x): x = self.__strip(self.__convert(x)) return splev(x, tck=(self.__strip(self.knots), coeffs, deg), ext=2) self.llh = llh self.max_at = fminbound( func=self.__attach_units_to_args(self.chi2), x1=np.min(self.__strip(self.knots)), x2=np.max(self.__strip(self.knots)), ) if self.units is not None: self.max_at = self.max_at * ureg(self.units) self.max_at_str = self.__stringify(self.max_at) self.valid_range = (np.min(self.knots) * ureg(self.units), np.max(self.knots) * ureg(self.units)) self._str = lambda s: 'spline prior: deg=%d, valid in [%s, %s]%s; max at %s%s' \ %(self.deg, self.__stringify(np.min(self.knots)), self.__stringify(np.max(self.knots)), self.units_str, self.max_at_str, self.units_str)
def __init_gaussian(self, mean, stddev): if isinstance(mean, Number): mean = mean * ureg.dimensionless if isinstance(stddev, Number): stddev = stddev * ureg.dimensionless assert mean.dimensionality == stddev.dimensionality self._state_attrs.extend(['mean', 'stddev']) self.kind = 'gaussian' if isinstance(mean, ureg.Quantity): self.units = str(mean.units) assert isinstance(stddev, ureg.Quantity), \ str(type(stddev)) stddev = stddev.to(self.units) self.mean = mean self.stddev = stddev def llh(x): x = self.__strip(self.__convert(x)) m = self.__strip(self.mean) s = self.__strip(self.stddev) return -(x - m)**2 / (2 * s**2) self.llh = llh self.max_at = self.mean self.max_at_str = self.__stringify(self.max_at) self.valid_range = (-np.inf * ureg(self.units), np.inf * ureg(self.units)) self._str = lambda s: 'gaussian prior: stddev=%s%s, maximum at %s%s' \ %(self.__stringify(self.stddev), self.units_str, self.__stringify(self.mean), self.units_str)
def __init_jeffreys(self, A, B): """Calculate jeffreys prior as defined in Sivia p.125""" self.kind = 'jeffreys' A = interpret_quantity(A, expect_sequence=False) B = interpret_quantity(B, expect_sequence=False) assert A.dimensionality == B.dimensionality self._state_attrs.extend(['A', 'B']) self.units = str(A.units) B = B.to(A.units) self.A = A self.B = B def llh(x): x = self.__strip(self.__convert(x)) A = self.__strip(self.A) B = self.__strip(self.B) return -np.log(x) + np.log(np.log(B) - np.log(A)) self.llh = llh self.max_at = self.A self.max_at_str = self.__stringify(self.max_at) self.valid_range = (self.A * ureg(self.units), self.B * ureg(self.units)) self._str = lambda s: "jeffreys' prior, range [%s,%s]" % (self.A, self. B)
def __init_jeffreys(self, A, B): """Calculate jeffreys prior as defined in Sivia p.125""" self.kind = 'jeffreys' if isinstance(A, Number): A = A * ureg.dimensionless if isinstance(B, Number): B = B * ureg.dimensionless assert A.dimensionality == B.dimensionality self._state_attrs.extend(['A', 'B']) if isinstance(A, ureg.Quantity): self.units = str(A.units) assert isinstance(B, ureg.Quantity), '%s' % type(B) B = B.to(self.units) self.A = A self.B = B def llh(x): x = self.__strip(self.__convert(x)) A = self.__strip(self.A) B = self.__strip(self.B) return -np.log(x) + np.log(np.log(B) - np.log(A)) self.llh = llh self.max_at = self.A self.max_at_str = self.__stringify(self.max_at) self.valid_range = (self.A * ureg(self.units), self.B * ureg(self.units)) self._str = lambda s: "jeffreys' prior, range [%s,%s]" % (self.A, self. B)
def __init_spline(self, knots, coeffs, deg, units=None): knots = interpret_quantity(knots, expect_sequence=True) self._state_attrs.extend(['knots', 'coeffs', 'deg']) self.kind = 'spline' if isunitless(knots): knots = ureg.Quantity(knots, units) elif units is not None: units = ureg.Unit(units) assert knots.dimensionality == units.dimensionality knots = knots.to(units) self.units = str(knots.units) self.knots = knots self.coeffs = coeffs self.deg = deg def llh(x): x = self.__strip(self.__convert(x)) return splev(x, tck=(self.__strip(self.knots), coeffs, deg), ext=2) self.llh = llh self.max_at = fminbound( func=self.__attach_units_to_args(self.chi2), x1=np.min(self.__strip(self.knots)), x2=np.max(self.__strip(self.knots)), ) if self.units is not None: self.max_at = self.max_at * ureg(self.units) self.max_at_str = self.__stringify(self.max_at) self.valid_range = (np.min(self.knots) * ureg(self.units), np.max(self.knots) * ureg(self.units)) self._str = lambda s: 'spline prior: deg=%d, valid in [%s, %s]%s; max at %s%s' \ %(self.deg, self.__stringify(np.min(self.knots)), self.__stringify(np.max(self.knots)), self.units_str, self.max_at_str, self.units_str)
def __init_linterp(self, param_vals, llh_vals): if not isinstance(param_vals, ureg.Quantity): param_vals = param_vals * ureg.dimensionless self._state_attrs.extend(['param_vals', 'llh_vals']) self.kind = 'linterp' if isinstance(param_vals, ureg.Quantity): self.units = str(param_vals.units) self.interp = interp1d(param_vals, llh_vals, kind='linear', copy=True, bounds_error=True, assume_sorted=False) self.param_vals = param_vals self.llh_vals = llh_vals def llh(x): x = self.__strip(self.__convert(x)) return self.interp(x) self.llh = llh self.max_at = self.param_vals[self.llh_vals == np.max(self.llh_vals)] self.max_at_str = ', '.join([self.__stringify(v) for v in self.max_at]) self.valid_range = (np.min(self.param_vals) * ureg(self.units), np.max(self.param_vals) * ureg(self.units)) self._str = lambda s: 'linearly-interpolated prior: valid in [%s, %s]%s, maxima at (%s)%s' \ %(self.__stringify(np.min(self.param_vals)), self.__stringify(np.max(self.param_vals)), self.units_str, self.max_at_str, self.units_str)
def __init_uniform(self, llh_offset=0): self._state_attrs.append('llh_offset') self.kind = 'uniform' self.llh_offset = llh_offset def llh(x): return 0.*self.__strip(x) + self.llh_offset self.llh = llh self.max_at = np.nan self.max_at_str = 'no maximum' self.valid_range = (-np.inf * ureg(self.units), np.inf * ureg(self.units)) self._str = lambda s: 'uniform prior, llh_offset=%s' %self.llh_offset
def load_noise_events(config, dataset): name = config.get('general', 'name') weight = config.get('noise', 'weight') weight_units = config.get('noise', 'weight_units') sys_list = split(config.get('noise', 'sys_list')) base_prefix = config.get('noise', 'baseprefix') keep_keys = split(config.get('noise', 'keep_keys')) aliases = config.items('noise%saliases' % SEP) if base_prefix == 'None': base_prefix = '' if dataset == 'nominal': paths = [] for sys in sys_list: ev_sys = 'noise%s%s' % (SEP, sys) nominal = config.get(ev_sys, 'nominal') ev_sys_nom = ev_sys + SEP + nominal paths.append(config.get(ev_sys_nom, 'file_path')) if len(set(paths)) > 1: raise AssertionError( 'Choice of nominal file is ambigous. Nominal ' 'choice of systematic parameters must coincide ' 'with one and only one file. Options found are: ' '{0}'.format(paths)) file_path = paths[0] else: file_path = config.get(dataset, 'file_path') logging.info('Extracting noise dataset "{0}" from sample ' '"{1}"'.format(dataset, name)) noise = from_file(file_path) sample.strip_keys(keep_keys, noise) if weight == 'None' or weight == '1': noise['sample_weight'] = np.ones(noise['weights'].shape) elif weight == '0': noise['sample_weight'] = np.zeros(noise['weights'].shape) else: noise['sample_weight'] = noise[weight] * ureg(weight_units) noise['pisa_weight'] = deepcopy(noise['sample_weight']) for alias, expr in aliases: if alias in noise: logging.warning( 'Overwriting Data key {0} with aliased expression ' '{1}'.format(alias, expr)) noise[alias] = eval(re.sub(r'\<(.*?)\>', r"noise['\1']", expr)) noise_dict = {'noise': noise} return Data(noise_dict, metadata={ 'name': name, 'noise_sample': dataset })
def __check_units(self, param_val): if self.units is None: if (isinstance(param_val, ureg.Quantity) and param_val.dimensionality != ureg.dimensionless.dimensionality): raise TypeError('Passed a value with units (%s), but this' ' prior has no units.' % param_val.units) else: if not isinstance(param_val, ureg.Quantity): raise TypeError('Passed a value without units, but this prior' ' has units (%s).' % self.units) if param_val.dimensionality != ureg(self.units).dimensionality: raise TypeError('Passed a value with units (%s);' ' incompatible with prior units (%s)' % (param_val.units, self.units))
def parse_quantity(string): """Parse a string into a pint/uncertainty quantity. Parameters ---------- string : string Returns ------- value : pint.quantity of uncertainties.core.AffineScalarFunc Examples -------- >>> quant = parse_quantity('1.2 +/- 0.7 * units.meter') >>> print str(quant) 1.2+/-0.7 meter >>> print '{:~}'.format(quant) 1.2+/-0.7 m >>> print quant.magnitude 1.2+/-0.7 >>> print quant.units meter >>> print quant.nominal_value 1.2 >>> print quant.std_dev 0.7 Also note that spaces and the "*" are optional: >>> print parse_quantity('1+/-1units.GeV') 1.0+/-1.0 gigaelectron_volt """ value = string.replace(' ', '') if 'units.' in value: value, unit = value.split('units.') else: unit = None value = value.rstrip('*') if '+/-' in value: value = ufloat_fromstr(value) else: value = ufloat(float(value), 0) value *= ureg(unit) return value
def load_from_nu_file(events_file, all_flavints, weight, weight_units, keep_keys, aliases): flav_fidg = FlavIntDataGroup(flavint_groups=all_flavints) events = from_file(events_file) sample.strip_keys(keep_keys, events) nu_mask = events['ptype'] > 0 nubar_mask = events['ptype'] < 0 cc_mask = events['interaction'] == 1 nc_mask = events['interaction'] == 2 if weight == 'None' or weight == '1': events['sample_weight'] = \ np.ones(events['ptype'].shape) * ureg.dimensionless elif weight == '0': events['sample_weight'] = \ np.zeros(events['ptype'].shape) * ureg.dimensionless else: events['sample_weight'] = events[weight] * \ ureg(weight_units) events['pisa_weight'] = deepcopy(events['sample_weight']) for alias, expr in aliases: if alias in events: logging.warning( 'Overwriting Data key {0} with aliased expression ' '{1}'.format(alias, expr)) events[alias] = eval(re.sub(r'\<(.*?)\>', r"events['\1']", expr)) for flavint in all_flavints: i_mask = cc_mask if flavint.cc else nc_mask t_mask = nu_mask if flavint.particle else nubar_mask flav_fidg[flavint] = { var: events[var][i_mask & t_mask] for var in events.iterkeys() } return flav_fidg
def _compute_outputs(self, inputs=None): """Compute histograms for output channels.""" logging.debug('Entering mceq._compute_outputs') primary_model = split(self.params['primary_model'].value, ',') if len(primary_model) != 2: raise ValueError('primary_model is not of length 2, instead is of ' 'length {0}'.format(len(primary_model))) primary_model[0] = eval('pm.' + primary_model[0]) density_model = (self.params['density_model'].value, (self.params['location'].value, self.params['season'].value)) mceq_run = MCEqRun( interaction_model=str(self.params['interaction_model'].value), primary_model=primary_model, theta_deg=0.0, density_model=density_model, **mceq_config.mceq_config_without(['density_model'])) # Power of energy to scale the flux (the results will be returned as E**mag * flux) mag = 0 # Obtain energy grid (fixed) of the solution for the x-axis of the plots e_grid = mceq_run.e_grid # Dictionary for results flux = OrderedDict() for nu in self.output_names: flux[nu] = [] binning = self.output_binning cz_binning = binning.dims[binning.index('coszen', use_basenames=True)] en_binning = binning.dims[binning.index('energy', use_basenames=True)] cz_centers = cz_binning.weighted_centers.m angles = (np.arccos(cz_centers) * ureg.radian).m_as('degrees') for theta in angles: mceq_run.set_theta_deg(theta) mceq_run.solve() flux['nue'].append(mceq_run.get_solution('total_nue', mag)) flux['nuebar'].append(mceq_run.get_solution('total_antinue', mag)) flux['numu'].append(mceq_run.get_solution('total_numu', mag)) flux['numubar'].append(mceq_run.get_solution( 'total_antinumu', mag)) for nu in flux.iterkeys(): flux[nu] = np.array(flux[nu]) smoothing = self.params['smoothing'].value.m en_centers = en_binning.weighted_centers.m_as('GeV') spline_flux = self.bivariate_spline(flux, cz_centers, e_grid, smooth=smoothing) ev_flux = self.bivariate_evaluate(spline_flux, cz_centers, en_centers) for nu in ev_flux: ev_flux[nu] = ev_flux[nu] * ureg('cm**-2 s**-1 sr**-1 GeV**-1') mapset = [] for nu in ev_flux.iterkeys(): mapset.append(Map(name=nu, hist=ev_flux[nu], binning=binning)) return MapSet(mapset)
def newfunc(*args): if self.units is None: return func(*args) u = ureg(self.units) unitized_args = tuple([u * arg for arg in args]) return func(*unitized_args)
def units_str(self): if self.units is None: return '' return ' ' + format(ureg(self.units).units, '~').strip()
def inj_param_scan(return_outputs=False): """Load the HypoTesting class and use it to do an Asimov test across the space of one of the injected parameters. The user will define the parameter and pass a numpy-interpretable string to set the range of values. For example, one could scan over the space of theta23 by using a string such as `"numpy.linspace(0.35, 0.65, 31)"` which will then be evaluated to figure out a space of theta23 to inject and run Asimov tests. """ # NOTE: import here to avoid circular refs from pisa.scripts.analysis import parse_args init_args_d = parse_args(description=inj_param_scan.__doc__, command=inj_param_scan) # Normalize and convert `*_pipeline` filenames; store to `*_maker` # (which is argument naming convention that HypoTesting init accepts). # For this test, pipeline is required so we don't need the try arguments # or the checks on it being None filenames = init_args_d.pop('pipeline') filenames = sorted([normcheckpath(fname) for fname in filenames]) init_args_d['h0_maker'] = filenames # However, we do need them for the selections, since they can be different for maker in ['h0', 'h1', 'data']: ps_name = maker + '_param_selections' ps_str = init_args_d[ps_name] if ps_str is None: ps_list = None else: ps_list = [x.strip().lower() for x in ps_str.split(',')] init_args_d[ps_name] = ps_list init_args_d['data_maker'] = init_args_d['h0_maker'] init_args_d['h1_maker'] = init_args_d['h0_maker'] init_args_d['h0_maker'] = DistributionMaker(init_args_d['h0_maker']) init_args_d['h1_maker'] = DistributionMaker(init_args_d['h1_maker']) init_args_d['h1_maker'].select_params(init_args_d['h1_param_selections']) init_args_d['data_maker'] = DistributionMaker(init_args_d['data_maker']) if init_args_d['data_param_selections'] is None: init_args_d['data_param_selections'] = \ init_args_d['h0_param_selections'] init_args_d['data_name'] = init_args_d['h0_name'] init_args_d['data_maker'].select_params( init_args_d['data_param_selections']) # Remove final parameters that don't want to be passed to HypoTesting param_name = init_args_d.pop('param_name') inj_vals = eval(init_args_d.pop('inj_vals')) inj_units = init_args_d.pop('inj_units') force_prior = init_args_d.pop('use_inj_prior') # Instantiate the analysis object hypo_testing = HypoTesting(**init_args_d) logging.info('Scanning over %s between %.4f and %.4f with %i vals', param_name, min(inj_vals), max(inj_vals), len(inj_vals)) # Modify parameters if necessary if param_name == 'sin2theta23': requested_vals = inj_vals inj_vals = np.arcsin(np.sqrt(inj_vals)) logging.info( 'Converting to theta23 values. Equivalent range is %.4f to %.4f' ' radians, or %.4f to %.4f degrees', min(inj_vals), max(inj_vals), min(inj_vals) * 180 / np.pi, max(inj_vals) * 180 / np.pi) test_name = 'theta23' inj_units = 'radians' elif param_name == 'deltam31': raise ValueError('Need to implement a test where it ensures the sign ' 'of the requested values matches those in truth and ' 'the hypo makers (else it makes no sense). For now, ' 'please select deltam3l instead.') elif param_name == 'deltam3l': # Ensure all values are the same sign, else it doesn't make any sense if not np.alltrue(np.sign(inj_vals)): raise ValueError("Not all requested values to inject are the same " "sign. This doesn't make any sense given that you" " have requested to inject different values of " "deltam3l.") logging.info('Parameter requested was deltam3l - will convert assuming' ' that this is always the largest of the two splittings ' 'i.e. deltam3l = deltam31 for deltam3l > 0 and deltam3l ' '= deltam32 for deltam3l < 0.') inj_sign = np.sign(inj_vals)[0] requested_vals = inj_vals test_name = 'deltam31' deltam21_val = hypo_testing.data_maker.params['deltam21'].value.to( inj_units).magnitude if inj_sign == 1: no_inj_vals = requested_vals io_inj_vals = (requested_vals - deltam21_val) * -1.0 else: io_inj_vals = requested_vals no_inj_vals = (requested_vals * -1.0) + deltam21_val inj_vals = [] for no_inj_val, io_inj_val in zip(no_inj_vals, io_inj_vals): o_vals = {} o_vals['nh'] = no_inj_val o_vals['ih'] = io_inj_val inj_vals.append(o_vals) else: test_name = param_name requested_vals = inj_vals unit_inj_vals = [] for inj_val in inj_vals: if isinstance(inj_val, dict): o_vals = {} for ivkey in inj_val.keys(): o_vals[ivkey] = inj_val[ivkey] * ureg(inj_units) unit_inj_vals.append(o_vals) else: unit_inj_vals.append(inj_val * ureg(inj_units)) inj_vals = unit_inj_vals # Extend the ranges of the distribution makers so that they reflect the # range of the scan. This is a pain if there are different values depending # on the ordering. Need to extend the ranges of both values in the # hypothesis maker since the hypotheses may minimise over the ordering, # and could then go out of range. # Also, some parameters CANNOT go negative or else things won't work. # To account for this, check if parameters lower value was positive and, # if so, enforce that it is positive now. if isinstance(inj_vals[0], dict): # Calculate ranges for both parameters norangediff = max(no_inj_vals) - min(no_inj_vals) norangediff = norangediff * ureg(inj_units) norangetuple = (min(no_inj_vals) * ureg(inj_units) - 0.5 * norangediff, max(no_inj_vals) * ureg(inj_units) + 0.5 * norangediff) iorangediff = max(io_inj_vals) - min(io_inj_vals) iorangediff = iorangediff * ureg(inj_units) iorangetuple = (min(io_inj_vals) * ureg(inj_units) - 0.5 * iorangediff, max(io_inj_vals) * ureg(inj_units) + 0.5 * iorangediff) # Do it for both hierarchies for hierarchy, rangetuple in zip(['nh', 'ih'], [norangetuple, iorangetuple]): hypo_testing.set_param_ranges(selection=hierarchy, test_name=test_name, rangetuple=rangetuple, inj_units=inj_units) # Select the proper params again hypo_testing.h0_maker.select_params(init_args_d['h0_param_selections']) hypo_testing.h1_maker.select_params(init_args_d['h1_param_selections']) # Otherwise it's way simpler... else: rangediff = max(inj_vals) - min(inj_vals) rangetuple = (min(inj_vals) - 0.5 * rangediff, max(inj_vals) + 0.5 * rangediff) hypo_testing.set_param_ranges(selection=None, test_name=test_name, rangetuple=rangetuple, inj_units=inj_units) if hypo_testing.data_maker.params[test_name].prior is not None: if hypo_testing.data_maker.params[test_name].prior.kind != 'uniform': if force_prior: logging.warning( 'Parameter to be scanned, %s, has a %s prior that you have' ' requested to be left on. This will likely make the' ' results wrong.', test_name, hypo_testing.data_maker.params[test_name].prior.kind) else: logging.info( 'Parameter to be scanned, %s, has a %s prior.This will be' ' changed to a uniform prior (i.e. no prior) for this' ' test.', test_name, hypo_testing.data_maker.params[test_name].prior.kind) uniformprior = Prior(kind='uniform') hypo_testing.h0_maker.params[test_name].prior = uniformprior hypo_testing.h1_maker.params[test_name].prior = uniformprior else: if force_prior: raise ValueError('Parameter to be scanned, %s, does not have a' ' prior but you have requested to force one to be' ' left on. Something is potentially wrong.' % test_name) else: logging.info( 'Parameter to be scanned, %s, does not have a prior.' ' So nothing needs to be done.', test_name) # Everything is set up. Now do the scan. outputs = hypo_testing.asimov_inj_param_scan( # pylint: disable=redefined-outer-name param_name=param_name, test_name=test_name, inj_vals=inj_vals, requested_vals=requested_vals, h0_name=init_args_d['h0_name'], h1_name=init_args_d['h1_name'], data_name=init_args_d['data_name']) if return_outputs: return outputs
def _compute_nominal_outputs(self): ''' load events, perform sanity check and put them into histograms, if alt_bg file is specified, also put these events into separate histograms, that are normalized to the nominal ones (we are only interested in the shape difference) ''' # get params icc_bg_file = self.params.icc_bg_file.value if 'shape' in self.error_method: alt_icc_bg_file = self.params.alt_icc_bg_file.value else: alt_icc_bg_file = None sim_ver = self.params.sim_ver.value use_def1 = self.params.use_def1.value bdt_cut = self.params.bdt_cut.m_as('dimensionless') self.bin_names = self.output_binning.names self.bin_edges = [] for name in self.bin_names: if 'energy' in name: bin_edges = self.output_binning[name].bin_edges.to( 'GeV').magnitude else: bin_edges = self.output_binning[name].bin_edges.magnitude self.bin_edges.append(bin_edges) # the rest of this function is PISA v2 legacy code... logging.info('Initializing BackgroundServiceICC...') logging.info('Opening file: %s', icc_bg_file) try: bg_file = h5py.File(find_resource(icc_bg_file), 'r') if alt_icc_bg_file is not None: alt_bg_file = h5py.File(find_resource(alt_icc_bg_file), 'r') except IOError as e: logging.error("Unable to open icc_bg_file %s", icc_bg_file) logging.error(e) sys.exit(1) # sanity check santa_doms = bg_file['IC86_Dunkman_L6_SANTA_DirectDOMs']['value'] l3 = bg_file['IC86_Dunkman_L3']['value'] l4 = bg_file['IC86_Dunkman_L4']['result'] l5 = bg_file['IC86_Dunkman_L5']['bdt_score'] l6 = bg_file['IC86_Dunkman_L6'] if use_def1: l4_pass = np.all(l4 == 1) else: if sim_ver in ['5digit', 'dima']: l4_invVICH = bg_file['IC86_Dunkman_L4']['result_invertedVICH'] l4_pass = np.all(np.logical_or(l4 == 1, l4_invVICH == 1)) else: logging.info( 'For the old simulation, def.2 background not done yet,' ' so still use def1 for it.') l4_pass = np.all(l4 == 1) assert (np.all(santa_doms >= 3) and np.all(l3 == 1) and l4_pass and np.all(l5 >= 0.1)) corridor_doms_over_threshold = l6['corridor_doms_over_threshold'] inverted_corridor_cut = corridor_doms_over_threshold > 1 assert (np.all(inverted_corridor_cut) and np.all(l6['santa_direct_doms'] >= 3) and np.all(l6['mn_start_contained'] == 1.) and np.all(l6['mn_stop_contained'] == 1.)) #load events if sim_ver == '4digit': variable = 'IC86_Dunkman_L6_MultiNest8D_PDG_Neutrino' elif sim_ver in ['5digit', 'dima']: variable = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_NumuCC' else: raise ValueError('Only allow sim_ver 4digit, 5 digit or dima!') reco_energy_all = np.array(bg_file[variable]['energy']) reco_coszen_all = np.array(np.cos(bg_file[variable]['zenith'])) pid_all = np.array(bg_file['IC86_Dunkman_L6']['delta_LLH']) if alt_icc_bg_file is not None: alt_reco_energy_all = np.array(alt_bg_file[variable]['energy']) alt_reco_coszen_all = np.array( np.cos(alt_bg_file[variable]['zenith'])) alt_pid_all = np.array(alt_bg_file['IC86_Dunkman_L6']['delta_LLH']) alt_l5 = alt_bg_file['IC86_Dunkman_L5']['bdt_score'] # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make data/MC # agree much better) cut_events = {} cut = l5 >= bdt_cut cut_events['reco_energy'] = reco_energy_all[cut] cut_events['reco_coszen'] = reco_coszen_all[cut] cut_events['pid'] = pid_all[cut] if alt_icc_bg_file is not None: # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make # data/MC agree much better) alt_cut_events = {} alt_cut = alt_l5 >= bdt_cut alt_cut_events['reco_energy'] = alt_reco_energy_all[alt_cut] alt_cut_events['reco_coszen'] = alt_reco_coszen_all[alt_cut] alt_cut_events['pid'] = alt_pid_all[alt_cut] logging.info("Creating a ICC background hists...") # make histo if self.params.kde_hist.value: self.icc_bg_hist = self.kde_histogramdd( np.array([cut_events[bin_name] for bin_name in self.bin_names]).T, binning=self.output_binning, coszen_name='reco_coszen', use_cuda=True, bw_method='silverman', alpha=0.3, oversample=10, coszen_reflection=0.5, adaptive=True) else: self.icc_bg_hist, _ = np.histogramdd(sample=np.array( [cut_events[bin_name] for bin_name in self.bin_names]).T, bins=self.bin_edges) conversion = self.params.atm_muon_scale.value.m_as( 'dimensionless') / ureg('common_year').to('seconds').m logging.info('nominal ICC rate at %.6E Hz', self.icc_bg_hist.sum() * conversion) if alt_icc_bg_file is not None: if self.params.kde_hist.value: self.alt_icc_bg_hist = self.kde_histogramdd( np.array([ alt_cut_events[bin_name] for bin_name in self.bin_names ]).T, binning=self.output_binning, coszen_name='reco_coszen', use_cuda=True, bw_method='silverman', alpha=0.3, oversample=10, coszen_reflection=0.5, adaptive=True) else: self.alt_icc_bg_hist, _ = np.histogramdd(sample=np.array([ alt_cut_events[bin_name] for bin_name in self.bin_names ]).T, bins=self.bin_edges) # only interested in shape difference, not rate scale = self.icc_bg_hist.sum() / self.alt_icc_bg_hist.sum() self.alt_icc_bg_hist *= scale
class icc(Stage): """ Data loader stage Paramaters ---------- params : ParamSet atm_muon_scale: float scale factor to be apllied to outputs icc_bg_file : string path pointing to the hdf5 file containing the events use_def1 : bool whether ICC definition 1 is used sim_ver: string indicating the sim version, wither 4digit, 5digit or dima livetime : time quantity livetime scale factor bdt_cut : float further cut applied to events for the atm. muon rejections BDT alt_icc_bg_file : string path pointing to an hdf5 file containing the events for an kde_hist : bool fixed_scale_factor : float scale fixed errors Notes ----- The current version of this code is a port from pisa v2 nutau branch. It clearly needs to be cleaned up properly at some point. """ def __init__(self, params, output_binning, disk_cache=None, memcache_deepcopy=True, error_method=None, outputs_cache_depth=20, debug_mode=None): expected_params = ( 'atm_muon_scale', 'icc_bg_file', 'use_def1', 'sim_ver', 'livetime', 'bdt_cut', 'alt_icc_bg_file', 'kde_hist', 'fixed_scale_factor' ) output_names = ('total') super(self.__class__, self).__init__( use_transforms=False, params=params, expected_params=expected_params, output_names=output_names, error_method=error_method, disk_cache=disk_cache, memcache_deepcopy=memcache_deepcopy, outputs_cache_depth=outputs_cache_depth, output_binning=output_binning, debug_mode=debug_mode ) if self.params.kde_hist.value: from pisa.utils.kde_hist import kde_histogramdd self.kde_histogramdd = kde_histogramdd def _compute_nominal_outputs(self): ''' load events, perform sanity check and put them into histograms, if alt_bg file is specified, also put these events into separate histograms, that are normalized to the nominal ones (we are only interested in the shape difference) ''' # get params icc_bg_file = self.params.icc_bg_file.value if 'shape' in self.error_method: alt_icc_bg_file = self.params.alt_icc_bg_file.value else: alt_icc_bg_file = None sim_ver = self.params.sim_ver.value use_def1 = self.params.use_def1.value bdt_cut = self.params.bdt_cut.m_as('dimensionless') self.bin_names = self.output_binning.names self.bin_edges = [] for name in self.bin_names: if 'energy' in name: bin_edges = self.output_binning[name].bin_edges.to('GeV').magnitude else: bin_edges = self.output_binning[name].bin_edges.magnitude self.bin_edges.append(bin_edges) # the rest of this function is PISA v2 legacy code... logging.info('Initializing BackgroundServiceICC...') logging.info('Opening file: %s'%(icc_bg_file)) try: bg_file = h5py.File(find_resource(icc_bg_file),'r') if alt_icc_bg_file is not None: alt_bg_file = h5py.File(find_resource(alt_icc_bg_file),'r') except IOError,e: logging.error("Unable to open icc_bg_file %s"%icc_bg_file) logging.error(e) sys.exit(1) # sanity check santa_doms = bg_file['IC86_Dunkman_L6_SANTA_DirectDOMs']['value'] l3 = bg_file['IC86_Dunkman_L3']['value'] l4 = bg_file['IC86_Dunkman_L4']['result'] l5 = bg_file['IC86_Dunkman_L5']['bdt_score'] l6 = bg_file['IC86_Dunkman_L6'] if use_def1: l4_pass = np.all(l4==1) else: if sim_ver in ['5digit', 'dima']: l4_invVICH = bg_file['IC86_Dunkman_L4']['result_invertedVICH'] l4_pass = np.all(np.logical_or(l4==1, l4_invVICH==1)) else: logging.info( 'For the old simulation, def.2 background not done yet,' ' so still use def1 for it.' ) l4_pass = np.all(l4==1) assert (np.all(santa_doms>=3) and np.all(l3 == 1) and l4_pass and np.all(l5 >= 0.1)) corridor_doms_over_threshold = l6['corridor_doms_over_threshold'] inverted_corridor_cut = corridor_doms_over_threshold > 1 assert (np.all(inverted_corridor_cut) and np.all(l6['santa_direct_doms'] >= 3) and np.all(l6['mn_start_contained'] == 1.) and np.all(l6['mn_stop_contained'] == 1.)) #load events if sim_ver == '4digit': variable ='IC86_Dunkman_L6_MultiNest8D_PDG_Neutrino' elif sim_ver in ['5digit', 'dima']: variable = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_NumuCC' else: raise ValueError('Only allow sim_ver 4digit, 5 digit or dima!') reco_energy_all = np.array(bg_file[variable]['energy']) reco_coszen_all = np.array(np.cos(bg_file[variable]['zenith'])) pid_all = np.array(bg_file['IC86_Dunkman_L6']['delta_LLH']) if alt_icc_bg_file is not None: alt_reco_energy_all = np.array(alt_bg_file[variable]['energy']) alt_reco_coszen_all = np.array(np.cos(alt_bg_file[variable]['zenith'])) alt_pid_all = np.array(alt_bg_file['IC86_Dunkman_L6']['delta_LLH']) alt_l5 = alt_bg_file['IC86_Dunkman_L5']['bdt_score'] # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make data/MC # agree much better) cut_events = {} cut = l5>=bdt_cut cut_events['reco_energy'] = reco_energy_all[cut] cut_events['reco_coszen'] = reco_coszen_all[cut] cut_events['pid'] = pid_all[cut] if alt_icc_bg_file is not None: # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make # data/MC agree much better) alt_cut_events = {} alt_cut = alt_l5>=bdt_cut alt_cut_events['reco_energy'] = alt_reco_energy_all[alt_cut] alt_cut_events['reco_coszen'] = alt_reco_coszen_all[alt_cut] alt_cut_events['pid'] = alt_pid_all[alt_cut] logging.info("Creating a ICC background hists...") # make histo if self.params.kde_hist.value: self.icc_bg_hist = self.kde_histogramdd( np.array([cut_events[bin_name] for bin_name in self.bin_names]).T, binning=self.output_binning, coszen_name='reco_coszen', use_cuda=True, bw_method='silverman', alpha=0.3, oversample=10, coszen_reflection=0.5, adaptive=True ) else: self.icc_bg_hist,_ = np.histogramdd(sample = np.array([cut_events[bin_name] for bin_name in self.bin_names]).T, bins=self.bin_edges) conversion = self.params.atm_muon_scale.value.m_as('dimensionless') / ureg('common_year').to('seconds').m logging.info('nominal ICC rate at %.6E Hz'%(self.icc_bg_hist.sum()*conversion)) if alt_icc_bg_file is not None: if self.params.kde_hist.value: self.alt_icc_bg_hist = self.kde_histogramdd( np.array([alt_cut_events[bin_name] for bin_name in self.bin_names]).T, binning=self.output_binning, coszen_name='reco_coszen', use_cuda=True, bw_method='silverman', alpha=0.3, oversample=10, coszen_reflection=0.5, adaptive=True ) else: self.alt_icc_bg_hist,_ = np.histogramdd(sample = np.array([alt_cut_events[bin_name] for bin_name in self.bin_names]).T, bins=self.bin_edges) # only interested in shape difference, not rate scale = self.icc_bg_hist.sum()/self.alt_icc_bg_hist.sum() self.alt_icc_bg_hist *= scale