def test_conversion_of_hdf5header(self): hdict = { "param_a": {"field_a_1": "1", "field_a_2": "2"}, "param_b": {"field_b_1": "a"}, "param_c": {"field_c_1": 1}, } header = HDF5Header(hdict) tab = header2table(header) for p in [b"param_a", b"param_b", b"param_c"]: assert p in tab.parameter hdf5header_from_table = HDF5Header.from_table(tab)
def test_header_from_km3io(self): head = { "a": "1 2 3", "b+c": "4 5 6", "c": "foo", "d": "7", "e+f": "bar", } header = HDF5Header.from_km3io(km3io.offline.Header(head)) assert 1 == header["a"][0] assert 2 == header["a"][1] assert 3 == header["a"][2] assert 1 == header.a[0] assert 2 == header.a[1] assert 3 == header.a[2] assert 4 == header["b+c"][0] assert 5 == header["b+c"][1] assert 6 == header["b+c"][2] assert "foo" == header.c assert "foo" == header["c"] assert 7 == header.d assert 7 == header["d"] assert "bar" == header["e+f"]
def test_header_getitem(self): header = HDF5Header(self.hdict) print(header["param_a"]) assert "1" == header["param_a"].field_a_1 assert "2" == header["param_a"].field_a_2 assert "a" == header["param_b"].field_b_1 assert 23 == header["param_c"].field_c_1
def test_header_from_table(self): table = header2table(self.hdict) header = HDF5Header.from_table(table) print(header) assert 1.0 == header.param_a.field_a_1 assert 2.0 == header.param_a.field_a_2 assert "a" == header.param_b.field_b_1 assert 23 == header.param_c.field_c_1 self.assertTupleEqual((1, 2, 3), header.param_d)
def test_header_from_table(self): table = convert_header_dict_to_table(self.hdict) header = HDF5Header.from_table(table) print(header) assert 1.0 == header.param_a.field_a_1 assert 2.0 == header.param_a.field_a_2 assert "a" == header.param_b.field_b_1 assert 23 == header.param_c.field_c_1 self.assertTupleEqual((1, 2, 3), header.param_d)
def test_header_from_hdf5_file(self): header = HDF5Header.from_hdf5(data_path("hdf5/raw_header.h5")) assert "MUSIC" == header.propag[0] assert "seawater" == header.propag[1] assert 3450 == header.seabottom[0] self.assertAlmostEqual(12.1, header.livetime.numberOfSeconds, places=3) self.assertAlmostEqual(0.09, header.livetime.errorOfSeconds, places=3) assert 0 == header.coord_origin.x assert 0 == header.coord_origin.y assert 0 == header.coord_origin.z self.assertTupleEqual((0, 0, 0), header.coord_origin)
def test_header_from_hdf5_file(self): header = HDF5Header.from_hdf5(join(DATA_DIR, 'raw_header.h5')) assert 'MUSIC' == header.propag[0] assert 'seawater' == header.propag[1] assert 3450 == header.seabottom[0] self.assertAlmostEqual(12.1, header.livetime.numberOfSeconds, places=3) self.assertAlmostEqual(0.09, header.livetime.errorOfSeconds, places=3) assert 0 == header.coord_origin.x assert 0 == header.coord_origin.y assert 0 == header.coord_origin.z self.assertTupleEqual((0, 0, 0), header.coord_origin)
def test_header_from_table_with_bytes(self): table = Table( { "dtype": [b"f4 a2", b"f4"], "field_names": [b"a b", b"c"], "field_values": [b"1.2 ab", b"3.4"], "parameter": [b"foo", b"bar"], } ) header = HDF5Header.from_aanet(table) self.assertAlmostEqual(1.2, header.foo.a, places=2) assert "ab" == header.foo.b self.assertAlmostEqual(3.4, header.bar.c, places=2)
def test_conversion_of_km3io_header(self): header = km3io.OfflineReader(data_path("offline/numucc.root")).header tab = header2table(header) print(tab) for p in [ b"DAQ", b"PDF", b"can", b"can_user", b"coord_origin", b"cut_in", b"cut_nu", b"cut_primary", b"cut_seamuon", b"decay", b"detector", b"drawing", b"genhencut", b"genvol", b"kcut", b"livetime", b"model", b"ngen", b"norma", b"nuflux", b"physics", b"seed", b"simul", b"sourcemode", b"spectrum", b"start_run", b"target", b"usedetfile", b"xlat_user", b"xparam", b"zed_user", ]: assert p in tab.parameter h5header = HDF5Header.from_table(tab) assert h5header.can.zmin == header.can.zmin
def get_neutrino_mc_info_extr(input_file): """ Wrapper function that includes the actual mc_info_extr for neutrino simulations. The n_gen parameter, needed for neutrino weighting is extracted from the header of the file. Parameters ---------- input_file : km3net data file Can be online or offline format. Returns ------- mc_info_extr : function The actual mc_info_extr function that holds the extractions. """ # check if std reco is present f = File(input_file, "r") has_std_reco = "reco" in f.keys() if has_std_reco: # also check, which rec types are present rec_types, rec_parameters_names = get_rec_types_in_file(f) # get the n_gen header = HDF5Header.from_hdf5(input_file) n_gen = header.genvol.numberOfEvents # an identifier for what the part of the mc simulation this was # this way, events can later be unambiguously identified input_filename_string = os.path.basename(input_file) try: part_number = re.findall(r"\d+", input_filename_string)[ -2 ] # second last because of .h5 - works only for officially named files except IndexError: part_number = 0 def mc_info_extr(blob): """ Processes a blob and creates the y with mc_info and, if existing, std reco. For this neutrino case it is the full mc info for the primary neutrino; there are the several "McTracks": check the simulation which index "p" the neutrino has. Parameters ---------- blob : dict The blob from the pipeline. Returns ------- track : dict Containing all the specified info the y should have. """ # get general info about the event event_info = blob["EventInfo"] event_id = event_info.event_id[0] run_id = event_info.run_id[0] # weights for neutrino analysis weight_w1 = event_info.weight_w1[0] weight_w2 = event_info.weight_w2[0] weight_w3 = event_info.weight_w3[0] is_cc = event_info.W2LIST_GSEAGEN_CC[0] bjorkeny = event_info.W2LIST_GSEAGEN_BY[0] # first, look for the particular neutrino index of the production p = 0 # for ORCA4 (and probably subsequent productions) primary_mc_track = blob["McTracks"][p] # some track mc truth info particle_type = primary_mc_track.pdgid # sometimes type, sometimes pdgid energy = primary_mc_track.energy dir_x, dir_y, dir_z = ( primary_mc_track.dir_x, primary_mc_track.dir_y, primary_mc_track.dir_z, ) time_interaction = ( primary_mc_track.time ) # actually always 0 for primary neutrino, measured in MC time vertex_pos_x, vertex_pos_y, vertex_pos_z = ( primary_mc_track.pos_x, primary_mc_track.pos_y, primary_mc_track.pos_z, ) # for (muon) NC interactions, the visible energy is different if np.abs(particle_type) == 14 and is_cc == 3: visible_energy = energy * bjorkeny else: visible_energy = energy # for tau CC it is not clear what the second interaction is; 1 for shower, 2 for track, 3 for nothing tau_topology = 3 if np.abs(particle_type) == 16: if 13 in np.abs(blob["McTracks"].pdgid): tau_topology = 2 else: tau_topology = 1 # add also the nhits info n_hits = len(blob["Hits"]) n_trig_hits = np.count_nonzero(blob["Hits"]["triggered"]) track = { "event_id": event_id, "particle_type": particle_type, "energy": energy, "visible_energy": visible_energy, "is_cc": is_cc, "bjorkeny": bjorkeny, "dir_x": dir_x, "dir_y": dir_y, "dir_z": dir_z, "time_interaction": time_interaction, "run_id": run_id, "vertex_pos_x": vertex_pos_x, "vertex_pos_y": vertex_pos_y, "vertex_pos_z": vertex_pos_z, "n_hits": n_hits, "n_trig_hits": n_trig_hits, "weight_w1": weight_w1, "weight_w2": weight_w2, "weight_w3": weight_w3, "n_gen": n_gen, "part_number": part_number, "tau_topology": tau_topology, } # get all the std reco info if has_std_reco: std_reco_info = get_std_reco(blob, rec_types, rec_parameters_names) track.update(std_reco_info) return track return mc_info_extr
def get_neutrino_mc_info_extr(input_file): """ Wrapper function that includes the actual mc_info_extr for neutrino simulations. The n_gen parameter, needed for neutrino weighting is extracted from the header of the file. Parameters ---------- input_file : km3net data file Can be online or offline format. Returns ------- mc_info_extr : function The actual mc_info_extr function that holds the extractions. """ # check if std reco is present f = File(input_file, "r") has_std_reco = "reco" in f.keys() if has_std_reco: #also check, which rec types are present rec_types, rec_parameters_names = get_rec_types_in_file(f) # get the n_gen header = HDF5Header.from_hdf5(input_file) n_gen = header.genvol.numberOfEvents def mc_info_extr(blob): """ Processes a blob and creates the y with mc_info and, if existing, std reco. For this neutrino case it is the full mc info for the primary neutrino; there are the several "McTracks": check the simulation which index "p" the neutrino has. Parameters ---------- blob : dict The blob from the pipeline. Returns ------- track : dict Containing all the specified info the y should have. """ # get general info about the event event_id = blob["EventInfo"].event_id[0] run_id = blob["EventInfo"].run_id[0] # weights for neutrino analysis weight_w1 = blob["EventInfo"].weight_w1[0] weight_w2 = blob["EventInfo"].weight_w2[0] weight_w3 = blob["EventInfo"].weight_w3[0] # first, look for the particular neutrino index of the production p = 0 # for ORCA4 (and probably subsequent productions) mc_track = blob["McTracks"][p] # some track mc truth info particle_type = mc_track.pdgid #sometimes type, sometimes pdgid energy = mc_track.energy is_cc = mc_track.cc bjorkeny = mc_track.by dir_x, dir_y, dir_z = mc_track.dir_x, mc_track.dir_y, mc_track.dir_z time_interaction = ( mc_track.time ) # actually always 0 for primary neutrino, measured in MC time vertex_pos_x, vertex_pos_y, vertex_pos_z = ( mc_track.pos_x, mc_track.pos_y, mc_track.pos_z, ) # add also the nhits info n_hits = len(blob["Hits"]) track = { "event_id": event_id, "particle_type": particle_type, "energy": energy, "is_cc": is_cc, "bjorkeny": bjorkeny, "dir_x": dir_x, "dir_y": dir_y, "dir_z": dir_z, "time_interaction": time_interaction, "run_id": run_id, "vertex_pos_x": vertex_pos_x, "vertex_pos_y": vertex_pos_y, "vertex_pos_z": vertex_pos_z, "n_hits": n_hits, "weight_w1": weight_w1, "weight_w2": weight_w2, "weight_w3": weight_w3, "n_gen": n_gen, } # get all the std reco info if has_std_reco: std_reco_info = get_std_reco(blob, rec_types, rec_parameters_names) track.update(std_reco_info) return track return mc_info_extr
def test_header_fails_when_no_info_in_file(self): with self.assertRaises(tb.NoSuchNodeError): HDF5Header.from_hdf5(data_path("hdf5/basic_analysis_sample.h5"))
def test_init(self): HDF5Header({})
def test_header_from_hdf5_file_with_invalid_identifier_names_in_header(self): header = HDF5Header.from_hdf5(data_path("hdf5/geamon.h5")) assert 1.0 == header["drays+z"][0] assert 68.5 == header["drays+z"][1]
def test_header_with_scrumbled_vectors(self): header = HDF5Header(self.hdict) self.assertTupleEqual((1, 2, 3), header.param_e)
def test_header_behaves_like_a_dict(self): h = HDF5Header(self.hdict) self.assertListEqual(list(h.keys()), list(self.hdict.keys())) assert 5 == len(h.items()) assert 5 == len(h.values())
def test_header(self): header = HDF5Header(self.hdict) assert "1" == header.param_a.field_a_1 assert "2" == header.param_a.field_a_2 assert "a" == header.param_b.field_b_1 assert 23 == header.param_c.field_c_1