def setup_function(self): events = pd.read_csv(self.events_file) container = Container('icc') container.data_specs = 'events' container['count'] = events['count'].values.astype(FTYPE) container['weights'] = np.ones(container.array_length, dtype=FTYPE) container['errors'] = events['abs_uncert'].values.astype(FTYPE) container['reco_energy'] = events['reco_energy'].values.astype(FTYPE) container['reco_coszen'] = events['reco_coszen'].values.astype(FTYPE) container['pid'] = events['pid'].values.astype(FTYPE) self.data.add_container(container) # check created at least one container if len(self.data.names) == 0: raise ValueError( 'No containers created during data loading for some reason.') # let's convert that into the right binning container.array_to_binned('weights', self.output_specs) container.array_to_binned('count', self.output_specs) container.array_to_binned('errors', self.output_specs)
def setup_function(self): for name in self.output_names: # Create the container container = Container(name, self.calc_mode) # Determine flavor nubar = -1 if 'bar' in name else 1 if 'e' in name: flav = 0 if 'mu' in name: flav = 1 if 'tau' in name: flav = 2 # Create arrays mesh = self.calc_mode.meshgrid(entity=self.entity, attach_units=False) size = mesh[0].size for var_name, var_vals in zip(self.calc_mode.names, mesh): container[var_name] = var_vals.flatten().astype(FTYPE) # Add useful info container.set_aux_data('nubar', nubar) container.set_aux_data('flav', flav) # Make some initial weights container['initial_weights'] = np.ones(size, dtype=FTYPE) container['weights'] = np.ones(size, dtype=FTYPE) self.data.add_container(container)
def setup_function(self): raw_data = pd.read_csv(self.events_file) # create containers from the events for name in self.output_names: # make container container = Container(name) container.data_specs = self.input_specs nubar = -1 if 'bar' in name else 1 if 'e' in name: flav = 0 if 'mu' in name: flav = 1 if 'tau' in name: flav = 2 # cut out right part pdg = nubar * (12 + 2 * flav) mask = raw_data['pdg'] == pdg if 'cc' in name: mask = np.logical_and(mask, raw_data['type'] > 0) else: mask = np.logical_and(mask, raw_data['type'] == 0) events = raw_data[mask] container['weighted_aeff'] = events['weight'].values.astype(FTYPE) container['weights'] = np.ones(container.array_length, dtype=FTYPE) container['initial_weights'] = np.ones(container.array_length, dtype=FTYPE) container['true_energy'] = events['true_energy'].values.astype( FTYPE) container['true_coszen'] = events['true_coszen'].values.astype( FTYPE) container['reco_energy'] = events['reco_energy'].values.astype( FTYPE) container['reco_coszen'] = events['reco_coszen'].values.astype( FTYPE) container['pid'] = events['pid'].values.astype(FTYPE) container.add_scalar_data('nubar', nubar) container.add_scalar_data('flav', flav) self.data.add_container(container) # check created at least one container if len(self.data.names) == 0: raise ValueError( 'No containers created during data loading for some reason.') # test if self.output_mode == 'binned': for container in self.data: container.array_to_binned('weights', self.output_specs)
def setup_function(self): n_events = int(self.params.n_events.value.m) seed = int(self.params.seed.value.m) self.random_state = np.random.RandomState(seed) for name in self.output_names: container = Container(name) container.data_specs = self.input_specs nubar = -1 if 'bar' in name else 1 if 'e' in name: flav = 0 if 'mu' in name: flav = 1 if 'tau' in name: flav = 2 # Generate some events in the array representation just to have them # here we add those explicitly in the array representation true_energy = np.power( 10, self.random_state.rand(n_events).astype(FTYPE) * 3) true_coszen = self.random_state.rand(n_events).astype( FTYPE) * 2 - 1 container.add_array_data('true_energy', true_energy) container.add_array_data('true_coszen', true_coszen) if self.input_mode == 'events': size = n_events elif self.input_mode == 'binned': size = self.input_specs.size # make some initial weights if self.params.random.value: container['initial_weights'] = self.random_state.rand( size).astype(FTYPE) else: container['initial_weights'] = np.ones(size, dtype=FTYPE) # other necessary info container.add_scalar_data('nubar', nubar) container.add_scalar_data('flav', flav) container['weights'] = np.ones(size, dtype=FTYPE) container['weighted_aeff'] = np.ones(size, dtype=FTYPE) flux_nue = np.zeros(size, dtype=FTYPE) flux_numu = np.ones(size, dtype=FTYPE) flux = np.stack([flux_nue, flux_numu], axis=1) container['nominal_nu_flux'] = flux container['nominal_nubar_flux'] = flux self.data.add_container(container)
def setup_function(self): """ Load in the lic files, build the weighters, and get all the one-weights. To get the true """ raw_data = h5.File(self.in_files[0]) # create containers from the events for name in self.output_names: # make container container = Container(name) nubar = -1 if "bar" in name else 1 if "e" in name: flav = 0 if "mu" in name: flav = 1 if "tau" in name: flav = 2 # cut out right part pdg = nubar * (12 + 2 * flav) mask = raw_data["true_pid"] == pdg if "cc" in name: mask = np.logical_and(mask, raw_data["type"] > 0) else: mask = np.logical_and(mask, raw_data["type"] == 0) events = raw_data[mask] # aaahhhh no this format will only work container["weighted_aeff"] = events["weight"][:].astype(FTYPE) container["weights"] = np.ones(container.size, dtype=FTYPE) container["initial_weights"] = np.ones(container.size, dtype=FTYPE) container["astro_weights"] = np.ones(container.size, dtype=FTYPE) container["astro_initial_weights"] = np.ones(container.size, dtype=FTYPE) container["total_column_depth"] = events["total_column_depth"][:].astype( FTYPE ) container["true_bjorkenx"] = events["true_bjorkenx"][:].astype(FTYPE) container["true_bjorkeny"] = events["true_bjorkeny"][:].astype(FTYPE) container["true_energy"] = events["true_energy"][:].astype(FTYPE) container["true_coszen"] = events["true_zenith"][:].astype(FTYPE) container["reco_energy"] = events["reco_energy"][:].astype(FTYPE) container["reco_coszen"] = events["reco_zenith"][:].astype(FTYPE) container["pid"] = events["pid"][:].astype(FTYPE) container.set_aux_data("nubar", nubar) container.set_aux_data("flav", flav) self.data.add_container(container)
def setup_function(self): n_events = int(self.params.n_events.value.m) seed = int(self.params.seed.value.m) self.random_state = np.random.RandomState(seed) for name in self.output_names: container = Container(name, representation=self.calc_mode) nubar = -1 if 'bar' in name else 1 if 'e' in name: flav = 0 if 'mu' in name: flav = 1 if 'tau' in name: flav = 2 if not isinstance(self.calc_mode, MultiDimBinning): # Generate some events in the array representation just to have them # here we add those explicitly in the array representation container['true_energy'] = np.power( 10, self.random_state.rand(n_events).astype(FTYPE) * 3) container['true_coszen'] = self.random_state.rand( n_events).astype(FTYPE) * 2 - 1 size = container.size # make some initial weights if self.params.random.value: container['initial_weights'] = self.random_state.rand( size).astype(FTYPE) else: container['initial_weights'] = np.ones(size, dtype=FTYPE) # other necessary info container.set_aux_data('nubar', nubar) container.set_aux_data('flav', flav) container['weights'] = np.ones(size, dtype=FTYPE) container['weighted_aeff'] = np.ones(size, dtype=FTYPE) flux_nue = np.zeros(size, dtype=FTYPE) flux_numu = np.ones(size, dtype=FTYPE) flux = np.stack([flux_nue, flux_numu], axis=1) container['nu_flux_nominal'] = flux container['nubar_flux_nominal'] = flux self.data.add_container(container)
def setup_function(self): events = pd.read_csv(self.events_file) container = Container('total') container.representation = self.calc_mode container['weights'] = events['count'].values.astype(FTYPE) container['reco_energy'] = events['reco_energy'].values.astype(FTYPE) container['reco_coszen'] = events['reco_coszen'].values.astype(FTYPE) container['pid'] = events['pid'].values.astype(FTYPE) self.data.add_container(container) # check created at least one container if len(self.data.names) == 0: raise ValueError( 'No containers created during data loading for some reason.')
def setup_function(self): events = pd.read_csv(self.events_file) container = Container('icc') container.data_specs = 'events' container['count'] = events['count'].values.astype(FTYPE) container['weights'] = np.ones(container.size, dtype=FTYPE) container['errors'] = events['abs_uncert'].values.astype(FTYPE) container['reco_energy'] = events['reco_energy'].values.astype(FTYPE) container['reco_coszen'] = events['reco_coszen'].values.astype(FTYPE) container['pid'] = events['pid'].values.astype(FTYPE) self.data.add_container(container) # check created at least one container if len(self.data.names) == 0: raise ValueError( 'No containers created during data loading for some reason.')
def setup_function(self): events = pd.read_csv(self.events_file) container = Container('data') container.data_specs = 'events' container['weights'] = events['count'].values.astype(FTYPE) container['reco_energy'] = events['reco_energy'].values.astype(FTYPE) container['reco_coszen'] = events['reco_coszen'].values.astype(FTYPE) container['pid'] = events['pid'].values.astype(FTYPE) self.data.add_container(container) # check created at least one container if len(self.data.names) == 0: raise ValueError( 'No containers created during data loading for some reason.') container.array_to_binned('weights', self.output_specs)
def setup_function(self): # create containers from the events for name in self.output_names: # make container container = Container(name) pid, interaction_type, nubar, flavor = self.get_pid_and_interaction_type( name) truth, reco, n_i3files_with_flavor = self.query_database( interaction_type, pid) container = self.add_truth(container, truth, nubar, flavor) container = self.add_reco(container, reco) container = self.initialize_weights(container) container = self.add_aeff_weight(container, truth, n_i3files_with_flavor) self.data.add_container(container) # check created at least one container if len(self.data.names) == 0: raise ValueError( 'No containers created during data loading for some reason.')
def setup_function(self): data = self.load_hdf5_file(self.events_file) data = self.calc_rho36(data) if any(key.startswith("unc_est") for key in self.cuts): data = self.calc_uncertainties(data) data = self.apply_cuts(data, self.cuts) for name in self.output_names: container = Container(name) nubar = -1 if "bar" in name else 1 if "e" in name: flav = 0 n_files = int(self.files_per_flavor[0]) if "mu" in name: flav = 1 n_files = int(self.files_per_flavor[1]) if "tau" in name: flav = 2 n_files = int(self.files_per_flavor[2]) pdg = nubar * (12 + 2 * flav) mask = data["pdg_id"] == pdg if "cc" in name: mask = np.logical_and(mask, data["interaction_type"] == 1) else: mask = np.logical_and(mask, data["interaction_type"] == 2) events = {key: value[mask] for key, value in data.items()} weight_dict = events["I3MCWeightDict"] primary = events["MCInIcePrimary"] container["true_energy"] = primary["energy"].astype(FTYPE) container["true_coszen"] = np.cos(primary["zenith"]).astype(FTYPE) container["pdg_code"] = primary["pdg_encoding"].astype(FTYPE) container["interaction"] = weight_dict["InteractionType"].astype( FTYPE) CM2_TO_M2 = 1e-4 derived_weight = (CM2_TO_M2 * weight_dict["OneWeight"] / n_files / weight_dict["gen_ratio"] / weight_dict["NEvents"]) container["weighted_aeff"] = derived_weight.astype(FTYPE) reco = self.reco reco_total_energy = (events[f"{reco}_cascade_energy"] + events[f"{reco}_track_energy"]) container["reco_energy"] = reco_total_energy.astype(FTYPE) container["reco_coszen"] = np.cos( events[f"{reco}_zenith"]).astype(FTYPE) container["reco_z"] = events[f"{reco}_z"].astype(FTYPE) container["reco_rho"] = events["rho_36"].astype(FTYPE) if self.track_E_cut is None: container["pid"] = events["L7_PIDClassifier_ProbTrack"].astype( FTYPE) else: pid = events[f"{reco}_track_energy"] > float(self.track_E_cut) container["pid"] = pid.astype(FTYPE) container["weights"] = np.ones(container.size, dtype=FTYPE) container["initial_weights"] = np.ones(container.size, dtype=FTYPE) container.set_aux_data("nubar", nubar) container.set_aux_data("flav", flav) self.data.add_container(container) if len(self.data.names) == 0: raise ValueError( "No containers created during data loading for some reason.")
def record_event_properties(self): '''Adds fields present in events file and selected in `self.data_dict` into containers for the specified output names. Also ensures the presence of a set of nominal weights. ''' # define which categories to include in the data # user can manually specify what they want using `output_names`, or else just use everything output_keys = self.output_names if len(self.output_names) > 0 else self.evts.keys() # create containers from the events for name in output_keys: # make container container = Container(name) container.representation = 'events' event_groups = self.evts.keys() if name not in event_groups: raise ValueError( 'Output name "%s" not found in events. Only found %s.' % (name, event_groups) ) # add the events data to the container for key, val in self.evts[name].items(): container[key] = val # create weights arrays: # * `initial_weights` as starting point (never modified) # * `weights` to be initialised from `initial_weights` # and modified by the stages # * user can also provide `initial_weights` in input file #TODO Maybe add this directly into EventsPi if 'weights' in container.keys: # raise manually to give user some helpful feedback raise KeyError( 'Found an existing `weights` array in "%s"' ' which would be overwritten. Consider renaming it' ' to `initial_weights`.' % name ) container['weights'] = np.ones(container.size, dtype=FTYPE) if 'initial_weights' not in container.keys: if self.fraction_events_to_keep is None: container['initial_weights'] = np.ones(container.size, dtype=FTYPE) else : # Need to scale weights if using down-sampling container['initial_weights'] = np.full(container.size, 1. / float(self.fraction_events_to_keep), dtype=FTYPE) # add neutrino flavor information for neutrino events #TODO Maybe add this directly into EventsPi if self.neutrinos: # this determination of flavour is the worst possible coding, ToDo nubar = -1 if 'bar' in name else 1 if name.startswith('nutau'): flav = 2 elif name.startswith('numu'): flav = 1 elif name.startswith('nue'): flav = 0 else: raise ValueError('Cannot determine flavour of %s'%name) container.set_aux_data('nubar', nubar) container.set_aux_data('flav', flav) self.data.add_container(container) # check created at least one container if len(self.data.names) == 0: raise ValueError( 'No containers created during data loading for some reason.' )
def setup_function(self): ''' This is where we figure out how many events to generate, define their weights relative to the data statistics and initialize the container we will need This function is run once when we instantiate the pipeline ''' # # figure out how many signal and background events to create # n_data_events = int(self.params.n_events_data.value.m) self.stats_factor = float(self.params.stats_factor.value.m) signal_fraction = float(self.params.signal_fraction.value.m) # Number of simulated MC events self.n_mc = int(n_data_events * self.stats_factor) # Number of signal MC events self.nsig = int(self.n_mc * signal_fraction) self.nbkg = self.n_mc - self.nsig # Number of bkg MC events # Go in events mode self.data.data_specs = 'events' # # Create a signal container, with equal weights # signal_container = Container('signal') signal_container.data_specs = 'events' # Populate the signal physics quantity over a uniform range signal_initial = np.random.uniform(low=self.params.bkg_min.value.m, high=self.params.bkg_max.value.m, size=self.nsig) signal_container.add_array_data('stuff', signal_initial) # Populate its MC weight by equal constant factors signal_container.add_array_data( 'weights', np.ones(self.nsig, dtype=FTYPE) * 1. / self.stats_factor) # Populate the error on those weights signal_container.add_array_data('errors', (np.ones(self.nsig, dtype=FTYPE) * 1. / self.stats_factor)**2.) # # Compute the bin indices associated with each event # sig_indices = lookup_indices(sample=[signal_container['stuff']], binning=self.output_specs) sig_indices = sig_indices.get('host') signal_container.add_array_data('bin_indices', sig_indices) # # Compute an associated bin mask for each output bin # for bin_i in range(self.output_specs.tot_num_bins): sig_bin_mask = sig_indices == bin_i signal_container.add_array_data(key='bin_{}_mask'.format(bin_i), data=sig_bin_mask) # # Add container to the data # self.data.add_container(signal_container) # # Create a background container # if self.nbkg > 0: bkg_container = Container('background') bkg_container.data_specs = 'events' # Create a set of background events initial_bkg_events = np.random.uniform( low=self.params.bkg_min.value.m, high=self.params.bkg_max.value.m, size=self.nbkg) bkg_container.add_array_data('stuff', initial_bkg_events) # create their associated weights bkg_container.add_array_data( 'weights', np.ones(self.nbkg) * 1. / self.stats_factor) bkg_container.add_array_data('errors', (np.ones(self.nbkg) * 1. / self.stats_factor)**2.) # compute their bin indices bkg_indices = lookup_indices(sample=[bkg_container['stuff']], binning=self.output_specs) bkg_indices = bkg_indices.get('host') bkg_container.add_array_data('bin_indices', bkg_indices) # Add bin indices mask (used in generalized poisson llh) for bin_i in range(self.output_specs.tot_num_bins): bkg_bin_mask = bkg_indices == bin_i bkg_container.add_array_data(key='bin_{}_mask'.format(bin_i), data=bkg_bin_mask) self.data.add_container(bkg_container) # # Add the binned counterpart of each events container # for container in self.data: container.array_to_binned('weights', binning=self.output_specs, averaged=False) container.array_to_binned('errors', binning=self.output_specs, averaged=False)
def setup_function(self): ''' This is where we figure out how many events to generate, define their weights relative to the data statistics and initialize the container we will need This function is run once when we instantiate the pipeline ''' # # figure out how many signal and background events to create # n_data_events = int(self.params.n_events_data.value.m) stats_factor = float(self.params.stats_factor.value.m) signal_fraction = float(self.params.signal_fraction.value.m) # Number of simulated MC events self.n_mc = int(n_data_events * stats_factor) # Number of signal MC events self.nsig = int(self.n_mc * signal_fraction) self.nbkg = self.n_mc - self.nsig # Number of bkg MC events # Go in events mode self.data.data_specs = 'events' # # Create a signal container, with equal weights # signal_container = Container('signal') signal_container.data_specs = 'events' # Populate the signal physics quantity signal_container.add_array_data('stuff', np.zeros(self.nsig)) # Populate its MC weight signal_container.add_array_data('weights', np.ones(self.nsig) * 1. / stats_factor) # Populate the error on those weights signal_container.add_array_data('errors', (np.ones(self.nsig) * 1. / stats_factor)**2.) # Add empty bin_indices array (used in generalized poisson llh) signal_container.add_array_data('bin_indices', np.ones(self.nsig) * -1) # Add bin indices mask (used in generalized poisson llh) for bin_i in range(self.output_specs.tot_num_bins): signal_container.add_array_data(key='bin_{}_mask'.format(bin_i), data=np.zeros(self.nsig, dtype=bool)) # Add container to the data self.data.add_container(signal_container) # # Create a background container # if self.nbkg > 0: bkg_container = Container('background') bkg_container.data_specs = 'events' bkg_container.add_array_data('stuff', np.zeros(self.nbkg)) bkg_container.add_array_data( 'weights', np.ones(self.nbkg) * 1. / stats_factor) bkg_container.add_array_data('errors', (np.ones(self.nbkg) * 1. / stats_factor)**2.) bkg_container.add_array_data('bin_indices', np.ones(self.nbkg) * -1) # Add bin indices mask (used in generalized poisson llh) for bin_i in range(self.output_specs.tot_num_bins): bkg_container.add_array_data(key='bin_{}_mask'.format(bin_i), data=np.zeros(self.nbkg, dtype=bool)) self.data.add_container(bkg_container) # # Bin the weights according to the output specs binning # Provide a binning if non is specified # if self.output_specs is None: # self.output_specs = MultiDimBinning([OneDimBinning(name='stuff', bin_edges=np.linspace(0.,40.,21))]) for container in self.data: container.array_to_binned('weights', binning=self.output_specs, averaged=False) container.array_to_binned('errors', binning=self.output_specs, averaged=False)
def setup_function(self): """ Go over all those input files and load them in. We load the first data in specifically to setup the containers, and afterwards go through appending to the end of those arrays """ print("Loading data...", end="") st = time() raw_data = h5.File(find_resource(self.events_file), "r") for name in self.output_names: # make container container = Container(name) nubar = -1 if "bar" in name else 1 if "e" in name: flav = 0 if "mu" in name: flav = 1 if "tau" in name: flav = 2 # cut out right part pdg = nubar * (12 + 2 * flav) mask = raw_data["PrimaryType"][:] == pdg # there's no interaction key in this MC, so we put this in so only the CC are used if "cc" in name: mask = np.logical_and(mask, raw_data["PrimaryType"] != 0) else: mask = np.logical_and(mask, raw_data["PrimaryType"] == 0) events = raw_data container["weighted_aeff"] = ( events["oneweight"][mask][:].astype(FTYPE) * (1e-4) / (98000 / 5.0)) container["weights"] = np.ones(container.size, dtype=FTYPE) container["initial_weights"] = np.ones(container.size, dtype=FTYPE) container["total_column_depth"] = events["TotalColumnDepth"][ mask][:].astype(FTYPE) container["true_bjorkenx"] = events["FinalStateX"][mask][:].astype( FTYPE) container["true_bjorkeny"] = events["FinalStateY"][mask][:].astype( FTYPE) container["true_energy"] = events["NuEnergy"][mask][:].astype( FTYPE) container["true_coszen"] = np.cos( events["NuZenith"][mask][:].astype(FTYPE)) container["reco_energy"] = events["MuExEnergy"][mask][:].astype( FTYPE) container["reco_coszen"] = np.cos( events["MuExZenith"][mask][:].astype(FTYPE)) container["pid"] = events["pid"][mask][:].astype(FTYPE) container.set_aux_data("nubar", nubar) container.set_aux_data("flav", flav) self.data.add_container(container) ed = time() print(" done! Took {} minutes".format((ed - st) / 60)) raw_data.close()