def split_data(self): this_hash = hash_obj([ self.fit_hash, self.output_str, self._data.contains_muons, self._data.contains_noise ], full_hash=self.full_hash) if self.split_data_hash == this_hash: return self._signal_data, self._bg_data, self._all_data if self.params['real_data'].value: return self._data, None, self._data trans_data = self._data.transform_groups(self.output_str) [trans_data[fig].pop('sample_weight') for fig in trans_data] bg_str = [fig for fig in trans_data if fig != self.output_str] if trans_data.contains_muons: trans_data['muons'].pop('sample_weight') bg_str.append('muons') if trans_data.contains_noise: trans_data['noise'].pop('sample_weight') bg_str.append('noise') signal_data = trans_data[self.output_str] bg_data = [trans_data[bg] for bg in bg_str] bg_data = reduce(Data._merge, bg_data) all_data = Data._merge(deepcopy(bg_data), signal_data) self._signal_data = signal_data self._bg_data = bg_data self._all_data = all_data self.split_data_hash = this_hash return signal_data, bg_data, all_data
def _compute_outputs(self, inputs=None): """Apply basic cuts and compute histograms for output channels.""" logging.debug('Entering events_to_data._compute_outputs') #Hashing #TODO What should I hash?? hash_property = [ self.events_file, self.params['dataset'].value, self.output_names ] this_hash = hash_obj(hash_property, full_hash=self.full_hash) #if this_hash == self.sample_hash: #TODO Fix this and replace... # return #TODO Check there are no inputs #Fill an events instance from a file events = Events(self.events_file) #TODO Handle nominal, etc, etc datasets? #Extract the neutrino data from the 'Events' instance nu_data = [] flav_fidg = FlavIntDataGroup(flavint_groups=events.flavints) for flavint in events.present_flavints: flav_fidg[flavint] = { var: events[flavint][var] for var in events[flavint].keys() } nu_data.append(flav_fidg) #Create the data instance, including the metadata #Note that there is no muon or noise data in the 'Events' data = Data(reduce(add, nu_data), metadata=deepcopy(events.metadata)) #Make cuts if self.params['keep_criteria'].value is not None: self._data.applyCut(self.params['keep_criteria'].value ) #TODO Shivesh says this needs testing self._data.update_hash() #Update hashes self.sample_hash = this_hash data.metadata['sample_hash'] = this_hash data.update_hash() return data
def load_noise_events(config, dataset): name = config.get('general', 'name') weight = config.get('noise', 'weight') weight_units = config.get('noise', 'weight_units') sys_list = split(config.get('noise', 'sys_list')) base_prefix = config.get('noise', 'baseprefix') keep_keys = split(config.get('noise', 'keep_keys')) aliases = config.items('noise%saliases' % SEP) if base_prefix == 'None': base_prefix = '' if dataset == 'nominal': paths = [] for sys in sys_list: ev_sys = 'noise%s%s' % (SEP, sys) nominal = config.get(ev_sys, 'nominal') ev_sys_nom = ev_sys + SEP + nominal paths.append(config.get(ev_sys_nom, 'file_path')) if len(set(paths)) > 1: raise AssertionError( 'Choice of nominal file is ambigous. Nominal ' 'choice of systematic parameters must coincide ' 'with one and only one file. Options found are: ' '{0}'.format(paths)) file_path = paths[0] else: file_path = config.get(dataset, 'file_path') logging.info('Extracting noise dataset "{0}" from sample ' '"{1}"'.format(dataset, name)) noise = from_file(file_path) sample.strip_keys(keep_keys, noise) if weight == 'None' or weight == '1': noise['sample_weight'] = np.ones(noise['weights'].shape) elif weight == '0': noise['sample_weight'] = np.zeros(noise['weights'].shape) else: noise['sample_weight'] = noise[weight] * ureg(weight_units) noise['pisa_weight'] = deepcopy(noise['sample_weight']) for alias, expr in aliases: if alias in noise: logging.warning( 'Overwriting Data key {0} with aliased expression ' '{1}'.format(alias, expr)) noise[alias] = eval(re.sub(r'\<(.*?)\>', r"noise['\1']", expr)) noise_dict = {'noise': noise} return Data(noise_dict, metadata={ 'name': name, 'noise_sample': dataset })
def load_neutrino_events(config, dataset): nu_data = [] if dataset == 'neutrinos%sgen_lvl' % SEP: gen_cfg = from_file(config.get(dataset, 'gen_cfg_file')) name = gen_cfg.get('general', 'name') datadir = gen_cfg.get('general', 'datadir') event_types = split(gen_cfg.get('general', 'event_type')) weights = split(gen_cfg.get('general', 'weights')) weight_units = gen_cfg.get('general', 'weight_units') keep_keys = split(gen_cfg.get('general', 'keep_keys')) aliases = gen_cfg.items('aliases') logging.info('Extracting neutrino dataset "{0}" from generator ' 'level sample "{1}"'.format(dataset, name)) for idx, flav in enumerate(event_types): fig = NuFlavIntGroup(flav) all_flavints = fig.flavints events_file = datadir + gen_cfg.get(flav, 'filename') flav_fidg = sample.load_from_nu_file( events_file, all_flavints, weights[idx], weight_units, keep_keys, aliases ) nu_data.append(flav_fidg) else: name = config.get('general', 'name') flavours = split(config.get('neutrinos', 'flavours')) weights = split(config.get('neutrinos', 'weights')) weight_units = config.get('neutrinos', 'weight_units') sys_list = split(config.get('neutrinos', 'sys_list')) base_prefix = config.get('neutrinos', 'baseprefix') keep_keys = split(config.get('neutrinos', 'keep_keys')) aliases = config.items('neutrinos%saliases' % SEP) logging.info('Extracting neutrino dataset "{0}" from sample ' '"{1}"'.format(dataset, name)) if base_prefix == 'None': base_prefix = '' for idx, flav in enumerate(flavours): f = int(flav) all_flavints = NuFlavIntGroup(f, -f).flavints if dataset == 'nominal': prefixes = [] for sys in sys_list: ev_sys = 'neutrinos%s%s' % (SEP, sys) nominal = config.get(ev_sys, 'nominal') ev_sys_nom = ev_sys + SEP + nominal prefixes.append(config.get(ev_sys_nom, 'file_prefix')) if len(set(prefixes)) > 1: raise AssertionError( 'Choice of nominal file is ambigous. Nominal ' 'choice of systematic parameters must coincide ' 'with one and only one file. Options found are: ' '{0}'.format(prefixes) ) file_prefix = flav + prefixes[0] else: file_prefix = flav + config.get(dataset, 'file_prefix') events_file = path.join( config.get('general', 'datadir'), base_prefix + file_prefix ) flav_fidg = sample.load_from_nu_file( events_file, all_flavints, weights[idx], weight_units, keep_keys, aliases ) nu_data.append(flav_fidg) nu_data = Data( reduce(add, nu_data), metadata={'name': name, 'sample': dataset} ) return nu_data