def test_generate_digitizer(): """Test the functions that `generate_digitizer` produces.""" # TODO: use local file for this test meta = load_pickle( '/home/icecube/retro/tables/' 'large_5d_notilt_combined/stacked/stacked_ckv_template_map_meta.pkl') binning = meta['binning'] for dim, edges in binning.items(): assert np.all(np.diff(edges) > 0) num_bins = len(edges) - 1 digitize = generate_digitizer(edges) digitize_overflow = generate_digitizer(edges, clip=False) rand = np.random.RandomState(0) # Check lots of values within the valid range of the binning vals = rand.uniform(low=edges[0], high=edges[-1], size=int(1e5)) test = np.array([digitize(v) for v in vals]) ref = np.digitize(vals, bins=edges, right=False) - 1 assert np.all(test == ref), dim # Check edge cases assert digitize(edges[0]) == 0, dim assert digitize(edges[0] - 1e-8) == 0, dim assert digitize_overflow(edges[0] - 1e-8) < 0, dim assert digitize(edges[-1]) == num_bins - 1, dim assert digitize(edges[-1] + 1e-8) == num_bins - 1, dim assert digitize_overflow(edges[-1] + 1e-8) == num_bins, dim print('<< PASS : test_generate_digitizer >>')
def setup_tdi_tables(tdi=None, mmap=False): """Load and instantiate (Cherenkov) TDI tables. Parameters ---------- tdi : sequence of strings, optional Path to TDI tables' `ckv_tdi_table.npy` files, or paths to directories containing those files; one entry per TDI table mmap : bool Returns ------- tdi_tables : tuple of 0 or more numpy arrays tdi_metas : tuple of 0 or more OrderedDicts """ if tdi is None: return (), () mmap_mode = 'r' if mmap else None tdi_tables = [] tdi_metas = [] for tdi_ in tdi: if tdi_ is None: continue tdi_ = expand(tdi_) if isdir(tdi_): tdi_ = join(tdi_, 'ckv_tdi_table.npy') print('Loading and instantiating TDI table at "{}"'.format(tdi_)) be = load_pickle(join(dirname(tdi_), 'tdi_bin_edges.pkl')) meta = load_pickle(join(dirname(tdi_), 'tdi_metadata.pkl')) meta['bin_edges'] = be tdi_table = np.load(tdi_, mmap_mode=mmap_mode) tdi_metas.append(meta) tdi_tables.append(tdi_table) return tuple(tdi_tables), tuple(tdi_metas)
def find_problematic_pulses(indir, pulse_series): """Find missing, bad, or old extracted pulse series and print the paths of the corresponding events directories. Parameters ---------- indir : str pulse_series : str or iterable thereof """ if isinstance(pulse_series, str): pulse_series = [pulse_series] indir = expand(indir) for dirpath, dirs_, files in walk(indir, followlinks=True): if "events.npy" in files: dirs_.clear() else: dirs_.sort(key=nsort_key_func) files.sort(key=nsort_key_func) for fname in files: match = OSCNEXT_FNAME_RE.match(fname) if not match: continue i3f_dname = join(dirpath, match.groupdict()["basename"]) if isdir(i3f_dname): if not isfile(join(i3f_dname, "events.npy")): print(i3f_dname) else: print(i3f_dname) continue sys.stderr.write(".") sys.stderr.flush() # If any one of the named pulse series are missing or bad, record # the path and move on without checking the other pulse series for ps_name in pulse_series: pulses_fpath = join(dirpath, "pulses", ps_name + ".pkl") if not isfile(pulses_fpath): print(dirpath) break try: pulses = load_pickle(pulses_fpath) if len(pulses) > 0 and "flags" not in pulses[0][0][1].dtype.names: print(dirpath) break except Exception: print(dirpath) break
def load_stacked_tables( self, stacked_tables_meta_fpath, stacked_tables_fpath, stacked_t_indep_tables_fpath, mmap_tables=False, mmap_t_indep=False, ): if self.is_stacked is not None: assert self.is_stacked stacked_tables_meta_fpath = expand(stacked_tables_meta_fpath) stacked_tables_fpath = expand(stacked_tables_fpath) stacked_t_indep_tables_fpath = expand(stacked_t_indep_tables_fpath) tables_mmap_mode = 'r' if mmap_tables else None t_indep_mmap_mode = 'r' if mmap_t_indep else None self.table_meta = load_pickle(stacked_tables_meta_fpath) self.tables = np.load(stacked_tables_fpath, mmap_mode=tables_mmap_mode) self.tables.setflags(write=False, align=True, uic=False) num_tables = self.tables.shape[0] self.t_is_residual_time = bool( self.table_meta.get('t_is_residual_time', False)) self.t_indep_tables = np.load(stacked_t_indep_tables_fpath, mmap_mode=t_indep_mmap_mode) self.t_indep_tables.setflags(write=False, align=True, uic=False) assert self.t_indep_tables.shape[0] == num_tables self.sd_idx_table_indexer = deepcopy( self.table_meta['sd_idx_table_indexer']) self.sd_idx_table_indexer.setflags(write=False, align=True, uic=False) self.loaded_sd_indices = np.where(self.sd_idx_table_indexer >= 0)[0] self.n_photons_per_table = self.table_meta['n_photons_per_table'] # Note that in creating the stacked tables, each indiividual table # is scaled such that the effective number of photons used to generate # the table is one (to avoid different norms across the tables if # different number of photons was used originally to create each). self.table_norm, self.t_indep_table_norm = get_table_norm( avg_angsens=self.avg_angsens, quantum_efficiency=1, norm_version=self.norm_version, **{k: self.table_meta[k] for k in TABLE_NORM_KEYS}) self.table_norms = [self.table_norm] * num_tables self.t_indep_table_norms = [self.t_indep_table_norm] * num_tables self.is_stacked = True
def iterate_file(fpath, start=None, stop=None, step=None, mmap_mode=None): """Iterate through the elements in a pickle (.pkl) or numpy (.npy) file. If a pickle file, structure must be a sequence of objects, one object per event. If a numpy file, it must be a one-dimensional structured array where each "entry" in the array contains the information from one event. Parameters ---------- fpath : string start, stop, step : optional Arguments passed to `slice` for extracting select events from the file. mmap_mode : None or string in {"r", "r+", "w+", "c"} Only applicable if `fpath` is a numpy .npy file; see help for `numpy.memmap` for more information on each mode. Note that memory mapping a file is useful for not consuming too much memory and being able to simultaneously write to the same reco output file from multiple processes (presumably each process working on different events) from multiple processes BUT too many open file handles can result in an exception. Default is `None` (file is not memory mapped, instead entire file is read into memory). Yields ------ info : OrderedDict Information extracted from the file for each event. """ slicer = slice(start, stop, step) _, ext = splitext(fpath) if ext == '.pkl': events = load_pickle(fpath) elif ext == '.npy': try: events = np.load(fpath, mmap_mode=mmap_mode) except: sys.stderr.write('failed to load "{}"\n'.format(fpath)) raise else: raise ValueError(fpath) num_events_in_file = len(events) indices = range(num_events_in_file)[slicer] # pylint: disable=range-builtin-not-iterating sliced_events = events[slicer] return num_events_in_file, indices, sliced_events
def generate_binmap(r_max, r_power, n_rbins, n_costhetabins, n_phibins, cart_binwidth, oversample, antialias, tables_dir, recompute): """Generate mapping from polar binning (assumed to be symmetric about Z-axis) to Cartesian 3D binning. The heart of the functionality is implemented in `retro.sphbin2cartbin.sphbin2cartbin`, while this function implements loading already-computed mappings and storing the results to disk. Parameters ---------- r_max : float > 0 r_power : float != 0 n_rbins, n_costhetabins, n_phibins : int >= 1 cart_binwidth : float > 0 oversample : int >= 1 antialias : int between 1 and 50 tables_dir : string recompute : bool Returns ------- ind_arrays vol_arrays meta Output of `generate_binmap_meta` """ assert isdir(tables_dir) r_edges = powerspace(0, r_max, n_rbins + 1, r_power) theta_edges = np.arccos(np.linspace(1, -1, n_costhetabins + 1)) r_mesh, theta_mesh = np.meshgrid(r_edges, theta_edges, indexing='ij') exact_vols = [] for ri in range(n_rbins): sub_exact_vols = [] for ti in range(int(np.ceil(n_costhetabins / 2.0))): rs = r_mesh[ri:ri+2, ti:ti+2] ts = theta_mesh[ri:ri+2, ti:ti+2] dcostheta = np.abs(np.diff(np.cos([ts.max(), ts.min()]))) exact_vol = spherical_volume(rmin=rs.max(), rmax=rs.min(), dcostheta=dcostheta, dphi=np.pi/2) sub_exact_vols.append(exact_vol) exact_vols.append(sub_exact_vols) exact_vols = np.array(exact_vols) meta = generate_binmap_meta( r_max=r_max, r_power=r_power, n_rbins=n_rbins, n_costhetabins=n_costhetabins, n_phibins=n_phibins, cart_binwidth=cart_binwidth, oversample=oversample, antialias=antialias ) fpath = join(tables_dir, meta['fname']) print('Binmap kwargs:', meta['kwargs']) if not recompute and isfile(fpath): sys.stdout.write('Loading binmap from file\n "%s"\n' % fpath) sys.stdout.flush() t0 = time.time() data = load_pickle(fpath) ind_arrays = data['ind_arrays'] vol_arrays = data['vol_arrays'] t1 = time.time() print(' Time to load bin mapping from pickle: {} ms' .format(np.round((t1 - t0)*1000, 3))) else: sys.stdout.write(' Computing bin mapping...\n') sys.stdout.flush() t0 = time.time() ind_arrays, vol_arrays = sphbin2cartbin(**meta['kwargs']) t1 = time.time() print(' Time to compute bin mapping: {} ms' .format(np.round((t1 - t0)*1000, 3))) print(' Writing bin mapping to pickle file\n "%s"' % fpath) data = OrderedDict([ ('kwargs', meta['kwargs']), ('ind_arrays', ind_arrays), ('vol_arrays', vol_arrays) ]) pickle.dump(data, file(fpath, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) t2 = time.time() print(' Time to pickle bin mapping: {} ms' .format(np.round((t2 - t1)*1000, 3))) print('') binned_vol = np.sum([va.sum() for va in vol_arrays]) exact_vol = spherical_volume(rmin=0, rmax=r_max, dcostheta=-1, dphi=np.pi/2) print(' Exact vol = %f, binned vol = %f (%e fract error)' % (exact_vol, binned_vol, (binned_vol-exact_vol)/exact_vol)) ind_bin_vols = np.array([va.sum() for va in vol_arrays]) fract_err = ind_bin_vols/exact_vols.flat - 1 abs_fract_err = np.abs(fract_err) worst_abs_fract_err = np.max(abs_fract_err) flat_idx = np.where(abs_fract_err == worst_abs_fract_err)[0][0] r_idx, costheta_idx = divmod(flat_idx, int(np.ceil(n_costhetabins/2))) print(' Worst single-bin fract err: %e;' 'r_idx=%d, costheta_idx=%d;' 'binned vol=%e, exact vol=%e' % (worst_abs_fract_err, r_idx, costheta_idx, ind_bin_vols[flat_idx], exact_vols[r_idx, costheta_idx])) return ind_arrays, vol_arrays, meta
def combine_tdi_tiles( source_dir, dest_dir, table_hash, gcd, bin_edges_file, tile_spec_file, ): """Combine individual time-independent tiles (one produced per DOM) into a single TDI table. Parameters ---------- source_dir : str dest_dir : str bin_edges_file : str tile_spec_file : str """ source_dir = expand(source_dir) dest_dir = expand(dest_dir) gcd = expand(gcd) bin_edges_file = expand(bin_edges_file) tile_spec_file = expand(tile_spec_file) mkdir(dest_dir) assert isdir(source_dir) assert isfile(bin_edges_file) assert isfile(tile_spec_file) gcd = extract_gcd(gcd) bin_edges = load_pickle(bin_edges_file) x_edges = bin_edges['x'] y_edges = bin_edges['y'] z_edges = bin_edges['z'] ctdir_edges = bin_edges['costhetadir'] phidir_edges = bin_edges['phidir'] n_x = len(x_edges) - 1 n_y = len(y_edges) - 1 n_z = len(z_edges) - 1 n_ctdir = len(ctdir_edges) - 1 n_phidir = len(phidir_edges) - 1 n_dir_bins = n_ctdir * n_phidir x_bw = (x_edges.max() - x_edges.min()) / n_x y_bw = (y_edges.max() - y_edges.min()) / n_y z_bw = (z_edges.max() - z_edges.min()) / n_z bin_vol = x_bw * y_bw * z_bw ctdir_min = ctdir_edges.min() ctdir_max = ctdir_edges.max() phidir_min = phidir_edges.min() phidir_max = phidir_edges.max() with open(tile_spec_file, 'r') as f: tile_specs = [l.strip() for l in f.readlines()] table = np.zeros(shape=(n_x, n_y, n_z, n_ctdir, n_phidir), dtype=np.float32) # Slice all table dimensions to exclude {under,over}flow bins central_slice = (slice(1, -1), ) * 5 angsens_model = None ice_model = None disable_tilt = None disable_anisotropy = None n_phase = None n_group = None tiles_info = [] for tile_spec in tile_specs: info = None try: fields = tile_spec.split() info = OrderedDict() info['tbl_idx'] = int(fields[0]) info['string'] = int(fields[1]) info['dom'] = int(fields[2]) info['seed'] = int(fields[3]) info['n_events'] = int(fields[4]) info['x_min'] = float(fields[5]) info['x_max'] = float(fields[6]) info['n_x'] = int(fields[7]) info['y_min'] = float(fields[8]) info['y_max'] = float(fields[9]) info['n_y'] = int(fields[10]) info['z_min'] = float(fields[11]) info['z_max'] = float(fields[12]) info['n_z'] = int(fields[13]) info['n_ctdir'] = int(fields[14]) info['n_phidir'] = int(fields[15]) tiles_info.append(info) tile_fpath = glob( join( source_dir, 'clsim_table_set' '_{table_hash}' '_tile_{tbl_idx}' '_string_{string}' '_dom_{dom}' '_seed_{seed}' '_n_{n_events}' '.fits'.format(table_hash=table_hash, **info)))[0] try: fits_table = fits.open(tile_fpath, mode='readonly', memmap=True) except: wstderr('Failed on tile_fpath "{}"'.format(tile_fpath)) raise primary = fits_table[0] header = primary.header # pylint: disable=no-member keys = header.keys() this_gcd_i3_md5 = extract_meta_from_keys(keys, '_i3_gcd_i3_md5_') assert this_gcd_i3_md5 == gcd['source_gcd_i3_md5'], \ 'this: {}, ref: {}'.format(this_gcd_i3_md5, gcd['source_gcd_i3_md5']) this_angsens_model = extract_meta_from_keys(keys, '_i3_angsens_') if angsens_model is None: angsens_model = this_angsens_model _, avg_angsens = load_angsens_model(angsens_model) else: assert this_angsens_model == angsens_model this_table_hash = extract_meta_from_keys(keys, '_i3_hash_') assert this_table_hash == table_hash this_ice_model = extract_meta_from_keys(keys, '_i3_ice_') if ice_model is None: ice_model = this_ice_model else: assert this_ice_model == ice_model this_disable_anisotropy = header['_i3_disable_anisotropy'] if disable_anisotropy is None: disable_anisotropy = this_disable_anisotropy else: assert this_disable_anisotropy == disable_anisotropy this_disable_tilt = header['_i3_disable_tilt'] if disable_tilt is None: disable_tilt = this_disable_tilt else: assert this_disable_tilt == disable_tilt this_n_phase = header['_i3_n_phase'] if n_phase is None: n_phase = this_n_phase else: assert this_n_phase == n_phase this_n_group = header['_i3_n_group'] if n_group is None: n_group = this_n_group else: assert this_n_group == n_group assert info['n_ctdir'] == n_ctdir assert info['n_phidir'] == n_phidir assert np.isclose(header['_i3_costhetadir_min'], ctdir_min) assert np.isclose(header['_i3_costhetadir_max'], ctdir_max) assert np.isclose(header['_i3_phidir_min'], phidir_min) assert np.isclose(header['_i3_phidir_max'], phidir_max) n_photons = header['_i3_n_photons'] n_dir_bins = info['n_ctdir'] * info['n_phidir'] this_x_bw = (info['x_max'] - info['x_min']) / info['n_x'] this_y_bw = (info['y_max'] - info['y_min']) / info['n_y'] this_z_bw = (info['z_max'] - info['z_min']) / info['n_z'] assert this_x_bw == x_bw assert this_y_bw == y_bw assert this_z_bw == z_bw assert np.any(np.isclose(info['x_min'], x_edges)) assert np.any(np.isclose(info['x_max'], x_edges)) assert np.any(np.isclose(info['y_min'], y_edges)) assert np.any(np.isclose(info['y_max'], y_edges)) assert np.any(np.isclose(info['z_min'], z_edges)) assert np.any(np.isclose(info['z_max'], z_edges)) quantum_efficiency = 0.25 * gcd['rde'][info['string'] - 1, info['dom'] - 1] norm = n_dir_bins * quantum_efficiency * avg_angsens / (n_photons * bin_vol) if np.isnan(norm): print('\nTile {} norm is nan!'.format(info['tbl_idx'])) print(' quantum_efficiency = {}, n_photons = {}'.format( quantum_efficiency, n_photons)) elif norm == 0: print('\nTile {} norm is 0'.format(info['tbl_idx'])) x_start = np.digitize(info['x_min'] + x_bw / 2, x_edges) - 1 x_stop = np.digitize(info['x_max'] - x_bw / 2, x_edges) y_start = np.digitize(info['y_min'] + y_bw / 2, y_edges) - 1 y_stop = np.digitize(info['y_max'] - y_bw / 2, y_edges) z_start = np.digitize(info['z_min'] + z_bw / 2, z_edges) - 1 z_stop = np.digitize(info['z_max'] - z_bw / 2, z_edges) # NOTE: comparison excludes norm = 0 _and_ norm = NaN if norm > 0: assert not np.isnan(norm) table[x_start:x_stop, y_start:y_stop, z_start:z_stop, :, :] += ( norm * primary.data[central_slice] # pylint: disable=no-member ) except: wstderr('Failed on tile_spec {}'.format(tile_spec)) if info is not None: wstderr('Info:\n{}'.format(info)) raise wstderr('.') wstderr('\n') metadata = OrderedDict() metadata['table_hash'] = table_hash metadata['disable_tilt'] = disable_tilt metadata['disable_anisotropy'] = disable_anisotropy metadata['gcd'] = gcd metadata['angsens_model'] = angsens_model metadata['ice_model'] = ice_model metadata['n_phase'] = n_phase metadata['n_group'] = n_group metadata['tiles_info'] = tiles_info outdir = join( dest_dir, 'tdi_table_{}_tilt_{}_anisotropy_{}'.format( table_hash, 'off' if disable_tilt else 'on', 'off' if disable_anisotropy else 'on', )) mkdir(outdir) name = 'tdi_table.npy' outfpath = join(outdir, name) wstdout('saving table to "{}"\n'.format(outfpath)) np.save(outfpath, table) #outfpath = join(outdir, 'tdi_bin_edges.json') #wstdout('saving bin edges to "{}"\n'.format(outfpath)) #json.dump( # bin_edges, # file(outfpath, 'w'), # sort_keys=False, # indent=2, #) outfpath = join(outdir, 'tdi_bin_edges.pkl') wstdout('saving bin edges to "{}"\n'.format(outfpath)) pickle.dump( bin_edges, open(outfpath, 'wb'), protocol=pickle.HIGHEST_PROTOCOL, ) #outfpath = join(outdir, 'tdi_metadata.json') #wstdout('saving metadata to "{}"\n'.format(outfpath)) #json.dump( # metadata, # file(outfpath, 'w'), # sort_keys=False, # indent=2, #) outfpath = join(outdir, 'tdi_metadata.pkl') wstdout('saving metadata to "{}"\n'.format(outfpath)) pickle.dump( metadata, open(outfpath, 'wb'), protocol=pickle.HIGHEST_PROTOCOL, )
def process_dir( dirpath, n_files, min_pulses_per_event, pulses_filter, emax, verbosity=0, ): """ Parameters ---------- dirpath : string n_files : int > 0 min_pulses_per_event : int >= 0 pulses_filter : None or callable, optional emax : 0 <= scalar <= np.inf verbosity : int >= 0 Returns ------- stats : OrderedDict Keys are taken from STATS_PROTO, values are numpy arrays """ stats = deepcopy(STATS_PROTO) events = np.load(join(dirpath, "events.npy"), mmap_mode="r") if len(events) == 0: return stats mask_vals = deepcopy(events["L5_oscNext_bool"]) if np.count_nonzero(mask_vals) == 0: return stats if verbosity >= 2: wstderr(".") if isfile(join(dirpath, "truth.npy")): truth = np.load(join(dirpath, "truth.npy"), mmap_mode="r") weights = truth["weight"] use_weights = True else: weights = np.ones(shape=len(events)) use_weights = False if np.isfinite(emax) and emax > 0: recos = np.load( join(dirpath, "recos", "retro_crs_prefit.npy"), mmap_mode="r", ) with np.errstate(invalid='ignore'): mask_vals &= recos["energy"]["median"] <= emax if np.count_nonzero(mask_vals) == 0: return stats pulses = load_pickle( join(dirpath, "pulses", "{}.pkl".format(PULSE_SERIES_NAME))) for mask_val, event_pulses, weight in zip(mask_vals, pulses, weights): if not mask_val: continue if callable(pulses_filter): event_pulses = pulses_filter(event_pulses) if len(event_pulses) == 0: continue if use_weights: normed_weight = weight / n_files # qtot is sum of charge of all hits on all DOMs event_pulses_ = [] tmp_hits_per_dom = [] tmp_charge_per_dom = [] tmp_time_diffs_within_dom = [] tmp_weight_per_dom = [] for omkey, dom_pulses in event_pulses: event_pulses_.append(dom_pulses) tmp_hits_per_dom.append(len(dom_pulses)) tmp_charge_per_dom.append(dom_pulses["charge"].sum()) #stats["time_diffs_between_hits"].append( # np.concatenate([[0.], np.diff(np.sort(dom_pulses["time"]))]) #) tmp_time_diffs_within_dom.append(dom_pulses["time"] - dom_pulses["time"].min()) if use_weights: tmp_weight_per_dom.append(normed_weight) event_pulses = np.concatenate(event_pulses_) # TODO: move min_pulses_per_event before qmin processing # TODO: small-pulse agglomeration filter if len(event_pulses) < min_pulses_per_event: continue stats["doms_per_event"].append(len(event_pulses)) stats["hits_per_dom"].extend(tmp_hits_per_dom) stats["charge_per_dom"].extend(tmp_charge_per_dom) stats["time_diffs_within_dom"].extend(tmp_time_diffs_within_dom) if use_weights: stats["weight_per_dom"].extend(tmp_weight_per_dom) charge = event_pulses["charge"] stats["charge_per_hit"].append(charge) stats["charge_per_event"].append(charge.sum()) stats["hits_per_event"].append(len(event_pulses)) stats["time_diffs_within_event"].append(event_pulses["time"] - event_pulses["time"].min()) if use_weights: stats["weight_per_event"].append(normed_weight) stats["weight_per_hit"].append( np.full(shape=len(event_pulses), fill_value=normed_weight)) return stats
def process_events_dir(events_dirpath, pulse_series): """ Parameters ---------- events_dirpath : string pulse_series : string Returns ------- events_array : numpy ndarray ndarray dtype is `DATA_DOMS_IDX_T` if is data, otherwise `MC_DOMS_IDX_T` doms_array : numpy ndarray of dtype `DOM_PULSES_IDX_T` pulses_array : numpy ndarray of dtype `PULSE_T` """ try: events_dirpath = expand(events_dirpath) basedir = basename(events_dirpath) events = np.load(join(events_dirpath, "events.npy"), mmap_mode="r") if len(events) == 0: return None mask_vals = events["L5_oscNext_bool"] valid_event_indices = np.argwhere(mask_vals).flatten() num_valid_events = len(valid_event_indices) if num_valid_events == 0: return None truth = None weights = None is_noise = False if isfile(join(events_dirpath, "truth.npy")): # is Monte Carlo simulation is_data = False truth = np.load(join(events_dirpath, "truth.npy"), mmap_mode="r") weights = truth["weight"] events_dtype = MC_DOMS_IDX_T is_noise = "pdg_encoding" not in truth.dtype.names match = MC_DIRPATH_META_RE.match(basedir) if not match: raise ValueError(events_dirpath) finfo_d = match.groupdict() finfo_d["dataset"] = int(finfo_d["dataset"]) finfo_d["file_id"] = int(finfo_d["file_id"]) else: # is actual detector data is_data = True events_dtype = DATA_DOMS_IDX_T match = DATA_DIRPATH_META_RE.match(basename(events_dirpath)) if not match: raise ValueError(events_dirpath) finfo_d = match.groupdict() finfo_d["season"] = int(finfo_d["season"]) finfo_d["sub_run_id"] = int(finfo_d["sub_run_id"]) events_array = np.empty(shape=num_valid_events, dtype=events_dtype) doms_arrays = [] pulses_arrays = [] dom_idx0 = 0 pulses_idx0 = 0 pulses = load_pickle( join(events_dirpath, "pulses", "{}.pkl".format(pulse_series))) linefit_dc = np.load(join(events_dirpath, "recos", "LineFit_DC.npy")) for rel_idx, valid_idx in enumerate(valid_event_indices): events_array[rel_idx:rel_idx + 1][COPY_ID_FIELDS] = events[valid_idx][COPY_ID_FIELDS] events_array[rel_idx]["dom_idx0"] = dom_idx0 if is_data: events_array[rel_idx:rel_idx + 1][COPY_TIME_FIELDS] = ( events[valid_idx]["start_time"][COPY_TIME_FIELDS]) events_array[rel_idx]["season"] = finfo_d["season"] events_array[rel_idx]["actual_sub_run_id"] = finfo_d[ "sub_run_id"] else: events_array[rel_idx]["dataset"] = finfo_d["dataset"] events_array[rel_idx]["file_id"] = finfo_d["file_id"] events_array[rel_idx]["weight"] = weights[valid_idx] if is_noise: true_pdg = 0 true_energy = np.nan true_time = np.nan else: true_pdg = truth[valid_idx]["pdg_encoding"] true_energy = truth[valid_idx]["energy"] true_time = truth[valid_idx]["time"] events_array[rel_idx]["true_pdg"] = true_pdg #if abs(true_pdg) >= 128: # print("true_pdg =", true_pdg) # raise ValueError("true_pdg = {}".format(true_pdg)) events_array[rel_idx]["true_energy"] = true_energy events_array[rel_idx]["true_time"] = true_time if true_pdg in NEUTRINOS: events_array[rel_idx]["true_int"] = truth[valid_idx][ "InteractionType"] else: events_array[rel_idx]["true_int"] = 0 event_pulses = pulses[valid_idx] events_array[rel_idx]["num_hit_doms"] = num_hit_doms = len( event_pulses) doms_array = np.empty(shape=num_hit_doms, dtype=DOM_PULSES_IDX_T) event_num_pulses = 0 event_charge = 0. for dom_rel_idx, (omkey, dom_pulses) in enumerate(event_pulses): dom_num_pulses = len(dom_pulses) if dom_num_pulses >= 2**8: print("dom_num_pulses =", dom_num_pulses) raise ValueError( "dom_num_pulses = {}".format(dom_num_pulses)) dom_charge = np.sum(dom_pulses["charge"]) event_num_pulses += dom_num_pulses event_charge += dom_charge doms_array[dom_rel_idx]["string"] = omkey[0] doms_array[dom_rel_idx]["om"] = omkey[1] doms_array[dom_rel_idx]["pulses_idx0"] = pulses_idx0 doms_array[dom_rel_idx]["num_pulses"] = dom_num_pulses doms_array[dom_rel_idx]["charge"] = dom_charge simple_dom_pulses = np.empty(shape=dom_num_pulses, dtype=PULSE_T) simple_dom_pulses["time"] = dom_pulses["time"] simple_dom_pulses["charge"] = dom_pulses["charge"] simple_dom_pulses["width"] = dom_pulses["width"] simple_dom_pulses["flags"] = dom_pulses["flags"] pulses_arrays.append(simple_dom_pulses) pulses_idx0 += dom_num_pulses if event_num_pulses >= 2**32: print("event_num_pulses =", event_num_pulses) raise ValueError( "event_num_pulses = {}".format(event_num_pulses)) events_array[rel_idx]["num_pulses"] = event_num_pulses events_array[rel_idx]["charge"] = event_charge doms_arrays.append(doms_array) dom_idx0 += num_hit_doms events_array[COPY_LINEFIT_DC_DST_FIELDS] = ( linefit_dc[valid_event_indices][COPY_LINEFIT_DC_SRC_FIELDS]) doms_array = np.concatenate(doms_arrays) pulses_array = np.concatenate(pulses_arrays) except Exception: print('Failed on events_dirpath = "{}", pulse_series = "{}"'.format( events_dirpath, pulse_series)) raise return events_array, doms_array, pulses_array
def generate_histos( photons, hole_ice_model, t_max, num_bins, gcd=None, include_rde=True, include_noise=True, outfile=None, ): """Generate time histograms from photons extracted from CLSim (repated) forward event simulations. Parameters ---------- photons : string or mapping hole_ice_model : string Raw CLSim does not (currently) incorproate hole ice model; this is a modification to the angular acceptance of the phtons that CLSim returns, so must be specified (and applied) post-hoc (e.g., in this function). t_max : float Last edge in time binning (first edge is at 0), in units of ns. num_bins : int Number of time bins, which span from 0 to t_max. gcd : str or None, optional Path to GCD i3 or pkl file to get DOM coordinates, rde, and noise (where the latter two only have an effect if `include_rde` and/or `include_noise` are True). Regardless if this is specified, the code will attempt to automatically figure out the GCD file used to produce the table. If this succeeds and `gcd` is specified by the user, the user's value is checked against that found in the data. If the user does not specify `gcd`, the value found in the data is used. If neither `gcd` is provided nor one can be found in the data, an error is raised. include_rde : bool, optional Whether to use relative DOM efficiencies (RDE) to scale the results per DOM. RDE is included by default. include_noise : bool, optiional Whether to add the noise floor for each DOM to the results. Noise is included by default. outfile : str or None, optiional If a string is specified, save the histos to a pickle file by the name `outfile`. If not specified (or `None`), `histos` will not be written to a file. Returns ------- histos : OrderedDict Raises ------ ValueError If `gcd` is specified but does not match a GCD file found in the data ValueError If `gcd` is not specified and no GCD can be found in the data See also -------- i3processing.sim Perform the repeated simulation to get photons at DOMs. Generates an i3 file. i3processing.extract_photon_info Extract photon info (and pertinent metadata) from the i3 file produced from the above. retro_dom_pdfs Produce distributions corresponding to the histograms made here, but using Retro reco. """ photons_file_name = None if isinstance(photons, string_types): photons_file_name = photons photons = load_pickle(photons_file_name) dom_info = photons['doms'] bin_edges = np.linspace(0, t_max, num_bins + 1) bin_widths = np.diff(bin_edges) gcd_info = None if isinstance(gcd, string_types): exp_gcd = expanduser(expandvars(gcd)) if exp_gcd.endswith('.pkl'): gcd_info = load_pickle(exp_gcd) elif '.i3' in exp_gcd: gcd_info = extract_gcd(exp_gcd) else: raise ValueError('No idea how to handle GCD file "{}"'.format(gcd)) if photons['gcd']: try: gcd_from_data = expanduser(expandvars(photons['gcd'])) if gcd_from_data.endswith('.pkl'): gcd_info_from_data = load_pickle(gcd_from_data) else: gcd_info_from_data = extract_gcd(gcd_from_data) except (AttributeError, KeyError, ValueError): raise #assert gcd_info is not None else: if gcd_info is None: gcd_info = gcd_info_from_data else: pass #if not np.all(gcd_info == gcd_info_from_data): # print('WARNING: Using different GCD from the one used' # ' during simulation!') if gcd_info is None: if photons_file_name is not None: photons_err = ' filename "{}"'.format(photons_file_name) raise ValueError( 'No GCD info could be found from arg `gcd`={} or in `photons`' '{}'.format(gcd, photons_err)) rde = gcd_info['rde'] noise_rate_hz = gcd_info['noise'] mask = (rde == 0) | np.isnan(rde) | np.isinf(rde) operational_doms = ~mask rde = np.ma.masked_where(mask, rde) quantum_effieincy = rde histos = OrderedDict() keep_gcd_keys = ['source_gcd_name', 'source_gcd_md5', 'source_gcd_i3_md5'] histos['gcd_info'] = OrderedDict([(k, gcd_info[k]) for k in keep_gcd_keys]) histos['include_rde'] = include_rde histos['include_noise'] = include_noise histos['bin_edges'] = bin_edges histos['binning_spec'] = OrderedDict([('domain', (0, t_max)), ('num_bins', num_bins), ('spacing', 'linear'), ('units', 'ns')]) # Note the first number in the file is a number approximately equal (but # greater than) the peak in the distribution, so is useless for us. possible_paths = [ hole_ice_model, '$I3_SRC/ice-models/resources/models/angsens/' + hole_ice_model, '$I3_SRC/ice-models/resources/models/angsens/as.' + hole_ice_model, '$I3_SRC/ice-models/resources/models/angsens_flasher/' + hole_ice_model, '$I3_SRC/ice-models/resources/models/angsens_flasher/as.' + hole_ice_model, ] coeffs_loaded = False for path in possible_paths: path = expanduser(expandvars(path)) if not isfile(path): continue try: poly_coeffs = np.loadtxt(path)[1:] except: pass else: coeffs_loaded = True break if not coeffs_loaded: raise ValueError('Could not load hole ice model at any of\n{}'.format( possible_paths)) # We want coszen = -1 to correspond to upgoing particles, but angular # sensitivity is given w.r.t. the DOM axis (which points "down" towards earth, # and therefore is rotated 180-deg). So rotate the coszen polynomial about cz=0 # by negating the odd coefficients (coeffs are in ascending powers of "x". flipped_coeffs = np.empty_like(poly_coeffs) flipped_coeffs[0::2] = poly_coeffs[0::2] flipped_coeffs[1::2] = -poly_coeffs[1::2] angsens_poly = np.polynomial.Polynomial(flipped_coeffs, domain=(-1, 1)) # Attach the weights to the data num_sims = photons['num_sims'] for data_dict in photons['doms'].values(): cz = data_dict['coszen'] try: # Note that angular sensitivity will modify the total number of # photons detected, and the poly is normalized as such already, so no # normalization should be applied here. angsens_wt = angsens_poly(cz) except: print(np.min(cz), np.max(cz)) raise data_dict['weight'] = angsens_wt / num_sims for k, array in data_dict.items(): data_dict[k] = array.astype(np.float32) histos['results'] = results = OrderedDict() for (string, dom), data in dom_info.items(): string_idx, dom_idx = string - 1, dom - 1 if not operational_doms[string_idx, dom_idx]: continue hist, _ = np.histogram(data['time'], bins=bin_edges, weights=data['weight'], normed=False) if include_rde: hist *= quantum_effieincy[string_idx, dom_idx] if include_noise: hist += (noise_rate_hz[string_idx, dom_idx] / 1e9) * bin_widths results[(string, dom)] = hist if outfile is not None: outfile = expanduser(expandvars(outfile)) print('Writing histos to\n"{}"'.format(outfile)) pickle.dump(histos, open(outfile, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) return histos, dom_info
def plot_run_info(files, labels, outdir, fwd_hists=None, data_or_sim_label=None, paired=False, gradient=False, plot=True): """Plot `files` using `labels` (one for each file). Parameters ---------- files : string or iterable thereof labels : string or iterable thereof outdir : string fwd_hists : string, optional data_or_sim_label : string, optional """ if isinstance(files, string_types): files = [files] if isinstance(labels, string_types): labels = [labels] outdir = expand(outdir) if fwd_hists is not None: fwd_hists = load_pickle(fwd_hists) if 'binning' in fwd_hists: t_min = fwd_hists['binning']['t_min'] t_max = fwd_hists['binning']['t_max'] t_window = t_max - t_min num_bins = fwd_hists['binning']['num_bins'] spacing = fwd_hists['binning']['spacing'] assert spacing == 'linear', spacing fwd_hists_binning = np.linspace(t_min, t_max, num_bins + 1) elif 'bin_edges' in fwd_hists: fwd_hists_binning = fwd_hists['bin_edges'] t_window = np.max(fwd_hists_binning) - np.min(fwd_hists_binning) else: raise ValueError( 'Need "binning" or "bin_edges" in fwd_hists; keys are {}'. format(fwd_hists.keys())) hist_bin_widths = np.diff(fwd_hists_binning) if 'results' in fwd_hists: fwd_hists = fwd_hists['results'] else: raise ValueError('Could not find key "results" in fwd hists!') else: raise NotImplementedError('Need fwd hists for now.') if not isdir(outdir): makedirs(outdir) run_infos = [] all_string_dom_pairs = set() mc_true_params = None for filepath in files: filepath = expand(filepath) if isdir(filepath): filepath = join(filepath, 'run_info.pkl') run_info = load_pickle(filepath) run_infos.append(run_info) pairs = [] for sd_idx in run_info['sd_indices']: pairs.append(get_string_om_pair(sd_idx)) all_string_dom_pairs.update(pairs) if data_or_sim_label is None: data_or_sim_label = ( 'Simulation: ' + run_info['sim_to_test'].replace('_', ' ').capitalize()) if mc_true_params is None: if 'sim' in run_info: mc_true_params = run_info['sim']['mc_true_params'] else: print('mc_true_params not in run_info', filepath) params_label = None if mc_true_params is not None: params_label = [] for plab, pval in mc_true_params.items(): units = '' if plab == 't': pval = format(int(pval), 'd') #plab = r'{}'.format(plab) units = r'\, \rm{ ns}' elif plab in 'x y z'.split(): pval = format(pval, '0.1f') #plab = r'${}$'.format(plab) units = r'\, \rm{ m}' elif plab in 'track_energy cascade_energy'.split(): pval = format(int(pval), 'd') plab = r'E_{\rm %s}' % plab.split('_')[0] units = r'\, \rm{ GeV}' elif plab in 'track_azimuth track_zenith cascade_azimuth cascade_zenith'.split( ): pval = format(pval / np.pi, '.2f') if 'azimuth' in plab: ltr = r'\phi' elif 'zenith' in plab: ltr = r'\theta' plab = ltr + r'_{\rm %s}' % plab.split('_')[0] units = r'\, \pi' params_label.append('{}={}{}'.format(plab, pval, units)) params_label = '$' + r',\;'.join(params_label) + '$' if plot: fig, ax = plt.subplots(1, 1, figsize=(10, 8), dpi=72) t_indep_tots = [] tots_incl_noise = [] tots_excl_noise = [] kss = [] ref_tots_incl_noise = [] ref_tots_excl_noise = [] ref_areas_incl_noise = [] for string, dom in reversed(sorted(all_string_dom_pairs)): if plot: ax.clear() all_zeros = True xmin = np.inf xmax = -np.inf ref_y = None if fwd_hists: if (string, dom) in fwd_hists: # Hit rate per nanosecond in each bin (includes noise hit rate) ref_y = fwd_hists[(string, dom)] / hist_bin_widths # Duplicate first element for plotting via `plt.step` ref_y = np.array([ref_y[0]] + ref_y.tolist()) # Figure out "meaningful" range nonzero_mask = ref_y != 0 #~np.isclose(ref_y, 0) if np.any(nonzero_mask): all_zeros = False #ref_y_all_zeros = False min_mask = (ref_y - ref_y.min()) >= 0.01 * (ref_y.max() - ref_y.min()) xmin = min(xmin, fwd_hists_binning[min_mask].min()) xmax = max(xmax, fwd_hists_binning[min_mask].max()) else: ref_y = np.zeros_like(fwd_hists_binning) ref_y_areas = ref_y[1:] * hist_bin_widths ref_y_area = np.sum(ref_y_areas) ref_tots_incl_noise.append(ref_y_area) # Following only works if our time window is large enough s.t. exp # hits from event is zero somewhere, and then it'll only be noise # contributing at that time... ref_tots_excl_noise.append(np.sum(ref_y_areas - ref_y_areas.min())) ref_areas_incl_noise.append(ref_y_area) if plot: ax.step( fwd_hists_binning, ref_y, lw=1, label=(r'Fwd: $\Sigma \lambda_q \Delta t$={}'.format( num_fmt(ref_y_area))), clip_on=True, #color='C0' ) colors = ['C%d' % i for i in range(1, 10)] linestyles = ['-', '--'] linewidths = [5, 3, 2, 2, 2, 2, 2] for plt_i, (label, run_info) in enumerate(zip(labels, run_infos)): sample_hit_times = run_info['hit_times'] if len(tots_incl_noise) <= plt_i: tots_incl_noise.append([]) tots_excl_noise.append([]) t_indep_tots.append([]) kss.append([]) results = run_info['results'] if (string, dom) in pairs: rslt = results[get_sd_idx(string, dom)] if 'exp_p_at_hit_times' in rslt: y = rslt['exp_p_at_hit_times'] y_ti = rslt['exp_p_at_all_times'] t_indep_tots[plt_i].append(y_ti) else: y = rslt['pexp_at_hit_times'] nonzero_mask = y != y[0] #~np.isclose(y, 0) if np.any(nonzero_mask): all_zeros = False min_mask = y >= 0.01 * max(y) xmin = min(xmin, sample_hit_times[min_mask].min()) xmax = max(xmax, sample_hit_times[min_mask].max()) else: y = np.zeros_like(sample_hit_times) #y_area = np.sum( masked_y = np.ma.masked_invalid(y * hist_bin_widths) tot_excl_noise = np.sum(masked_y - masked_y.min()) tot_incl_noise = masked_y.sum() if tot_excl_noise != 0: tots_excl_noise[plt_i].append(tot_excl_noise) tots_incl_noise[plt_i].append(tot_incl_noise) else: tots_excl_noise[plt_i].append(0) tots_incl_noise[plt_i].append(0) kss[plt_i].append(ks_test(y, ref_y[1:])) #kl_div = None custom_label = r'{:3s}: $\Sigma \lambda_q \Delta t$={}, ti={}'.format( label, num_fmt(tots_incl_noise[plt_i][-1]), num_fmt(y_ti)) #if ref_y is not None: # and not ref_y_all_zeros: # abs_mean_diff = np.abs(np.mean(y - ref_y[1:])) # #rel_abs_mean_diff = abs_mean_diff / np.sum(ref_y[1:]) # mask = ref_y[1:] > 0 # kl_ref_vals = ref_y[1:][mask] # kl_ref_vals /= np.sum(kl_ref_vals) # y_prob_vals = y[mask] # y_prob_vals /= np.sum(y_prob_vals) # with np.errstate(divide='ignore'): # kl_div = -np.sum(kl_ref_vals * np.log(y_prob_vals / kl_ref_vals)) # custom_label = format(rel_abs_mean_diff, '9.6f') + ' ' + label if paired: c_idx, ls_idx = divmod(plt_i, 2) color = colors[c_idx] linestyle = linestyles[ls_idx] else: color = None linestyle = None if plot: ax.plot(sample_hit_times, y, label=custom_label, color=color, linestyle=linestyle, linewidth=linewidths[plt_i], clip_on=True) if all_zeros: continue if xmin == xmax: xmin = np.min(fwd_hists_binning) xmax = np.max(fwd_hists_binning) if plot: ax.set_xlim(xmin, xmax) ax.set_ylim(0, ax.get_ylim()[1]) for pos in 'bottom left top right'.split(): ax.spines[pos].set_visible(False) ax.xaxis.set_ticks_position('none') ax.yaxis.set_ticks_position('none') ax.xaxis.tick_bottom() ax.yaxis.tick_left() #if kl_div is not None: #title = ' '*6 + 'Abs diff'.ljust(8) + ' ' + 'Simulation' #else: title = 'Code' leg = ax.legend( #title=title, #loc='best', loc='upper right', #frameon=False, framealpha=0.7, prop=dict(family='monospace', size=12)) plt.setp(leg.get_title(), family='monospace', fontsize=12) #if kl_div is not None: #leg._legend_box.align = "left" leg.get_frame().set_linewidth(0) ax.set_xlabel('Time from event vertex (ns)', fontsize=14) if data_or_sim_label is not None: plt.text(0.5, 1.1, data_or_sim_label, ha='center', va='bottom', transform=ax.transAxes, fontsize=16) if params_label is not None: plt.text(0.5, 1.05, params_label, ha='center', va='bottom', transform=ax.transAxes, fontsize=12) ax.text(0.5, 1.0, 'String {}, DOM {}'.format(string, dom), ha='center', va='bottom', transform=ax.transAxes, fontsize=14) fbasename = 'string_{}_dom_{}'.format(string, dom) fig.savefig(join(outdir, fbasename + '.png')) sys.stdout.write('({}, {}) '.format(string, dom)) sys.stdout.flush() sys.stdout.write('\n\n') sys.stdout.flush() ref_tots_incl_noise = np.array(ref_tots_incl_noise) ref_tots_excl_noise = np.array(ref_tots_excl_noise) ref_areas_incl_noise = np.array(ref_areas_incl_noise) ref_tot_incl_noise = np.sum(ref_tots_incl_noise) ref_tot_excl_noise = np.sum(ref_tots_excl_noise) ref_area_incl_noise = np.sum(ref_areas_incl_noise) print('{:9s} {:9s} {:16s} {:16s} {:16s} {}'.format( 'wtd KS'.rjust(9), 'avg KS'.rjust(9), 'Ratio incl noise'.rjust(16), 'Ratio excl noise'.rjust(16), 't-indep ratio'.rjust(16), 'Label')) for label, ks, tot_incl_noise, tot_excl_noise, ti_tot in zip( labels, kss, tots_incl_noise, tots_excl_noise, t_indep_tots): ks = np.array(ks) mask = ~np.isnan(ks) ks_avg = np.mean(ks[mask]) ks_wtd_avg = (np.sum(ks[mask] * ref_tots_excl_noise[mask]) / np.sum(ref_tots_excl_noise[mask])) print('{:9s} {:9s} {:16s} {:16s} {:16s} {}'.format( format(ks_wtd_avg, '.7f').rjust(9), format(ks_avg, '.7f').rjust(9), format(np.sum(tot_excl_noise) / ref_tot_excl_noise, '.12f').rjust(16), format(np.sum(tot_incl_noise) / ref_tot_incl_noise, '.12f').rjust(16), format(np.sum(ti_tot) / ref_area_incl_noise, '.12f').rjust(16), label))
def plot_run_info2( fpath, only_string, subtract_noisefloor=True, plot_ref=True, normalize=False, scalefact=None, axes=None, ): """Plot information from `run_info.pkl` file as produced by `retro_dom_pdfs.py` script. Parameters ---------- fpath : str Full path to `run_info.pkl` file only_string : int in [1, 86] String to plot subtract_noisefloor : bool, optional Whether to subtract the miniminum value from each distribution, which (usually but not always) is the noise floor plot_ref : bool, optional Plot the forward-simulation distribution scalefact : float, optional If not specified, a scale factor will be derived from the ratio between the forward-simulation and Retro distributions axes : length-3 sequence of matplotlib.axis, optional Provide the axes on which to plot the distributions; otherwise, a new figure with 3 axes will be created Returns ------- fig : matplotlib.figure axes : length-3 list of matplotlib.axis """ if axes is None: fig, axes = plt.subplots(3, 1, figsize=(16, 24), dpi=120) else: assert len(axes) == 3 fig = axes[0].get_figure() subtract_noisefloor = 1 if subtract_noisefloor else 0 # -- Extract info from files -- # fpath = expand(fpath) if isdir(fpath): fpath = join(fpath, 'run_info.pkl') info = load_pickle(fpath) sd_indices = info['sd_indices'] hit_times = info['hit_times'] dom_exp = info['dom_exp'] hit_exp = info['hit_exp'] #dt = np.diff(hit_times) fwd = load_pickle(info['sim']['fwd_sim_histo_file']) bin_edges = fwd['bin_edges'] fwd_results = fwd['results'] # why? dt = np.diff(bin_edges) dt = np.ones_like(dt) # -- Figure out how many lines are to be plotted -- # total_num_lines = 0 for idx, sd_idx in enumerate(sd_indices): he = hit_exp[idx, :] string, dom = get_string_om_pair(sd_idx) if string != only_string or np.sum(he) == 0: continue total_num_lines += 1 # -- Get info from all distributions -- # weights = [] rats = [] xmin = np.inf ymax = -np.inf ymin_at_3k = np.inf absdiff3k = np.abs(hit_times - 3000) idx_at_3k = np.where(absdiff3k == np.min(absdiff3k))[0][0] for idx, sd_idx in enumerate(sd_indices): he = hit_exp[idx, :] de = dom_exp[idx] he -= np.min(he) string, dom = get_string_om_pair(sd_idx) if np.sum(he) == 0 or (string, dom) not in fwd_results: continue ref = fwd_results[(string, dom)] ref_tot = np.sum(ref) he_tot = np.sum(he) #print('ratio clsim vs. retro %.2f for (%s, %s)'%(ref_tot/de, string, dom)) ref -= np.min(ref) mask = (he > 1e-12) & (ref >= 1e-12) rats.append(np.sum((ref[mask] / he[mask]) * ref[mask])) weights.append(np.sum(ref[mask])) if string != only_string: continue xmin_idx = np.where(ref > 0)[0][0] xmin = min(xmin, hit_times[xmin_idx]) ymax = max(ymax, np.max(ref)) ymin_at_3k = min(ymin_at_3k, ref[idx_at_3k]) wtdavg_rat = np.sum(rats) / np.sum(weights) xmin -= 50 if ymin_at_3k == 0: ymin_at_3k = ymax / 1e6 if scalefact is None: print('wtdavg_rat:', wtdavg_rat, '(using as scalefact)') scalefact = wtdavg_rat else: print('wtdavg_rat:', wtdavg_rat, '(but using {} as scalefact)'.format(scalefact)) def innerplot(ax): # pylint: disable=missing-docstring for idx, sd_idx in enumerate(sd_indices): string, dom = get_string_om_pair(sd_idx) he = hit_exp[idx, :] de = dom_exp[idx] if np.sum(he) > 0: norm = de / np.sum(he) else: norm = 1 if (string, dom) in fwd_results: ref = fwd_results[(string, dom)] else: ref = he if normalize: mask = (he > 1e-12) & (ref >= 1e-12) tot_ref = np.sum(ref[mask] / dt[mask]) tot_he = np.sum(he[mask]) if tot_he == 0.: scale = 1. else: scale = tot_ref / tot_he else: scale = scalefact if string != only_string or np.sum(he) == 0: continue line, = ax.plot( hit_times, scale * (he * norm - subtract_noisefloor * np.min(he * norm)), '-', lw=1, label='({}, {})'.format(string, dom)) if not plot_ref or (string, dom) not in fwd_results: continue ax.plot(hit_times, ref - subtract_noisefloor * np.min(ref), linestyle='--', lw=0.5, color=line.get_color()) # -- Plot overview of distributions -- # ax = axes[0] num_lines = total_num_lines cm = plt.cm.gist_rainbow ax.set_prop_cycle('color', [cm(1. * i / num_lines) for i in range(num_lines)]) innerplot(ax) ax.set_ylim(ymin_at_3k, ymax * 2) ax.set_xlim(xmin, min(xmin + 2000, 3000)) ax.legend(loc='best', fontsize=8, ncol=4, frameon=False) # -- Zoom on peaks -- # ax = axes[1] num_lines = 20 cm = plt.cm.tab20 ax.set_prop_cycle('color', [cm(1. * i / num_lines) for i in range(num_lines)]) innerplot(ax) ax.set_ylim(ymax / 5e3, ymax * 3) ax.set_xlim(xmin + 25, xmin + 750) ax.legend(loc='best', fontsize=7, ncol=14, frameon=False) # -- Zoom on tails -- # ax = axes[2] num_lines = 20 cm = plt.cm.tab20 ax.set_prop_cycle('color', [cm(1. * i / num_lines) for i in range(num_lines)]) innerplot(ax) ax.set_xlim(xmin + 750, 3000) ax.set_ylim(ymin_at_3k / 2, ymin_at_3k * 1e3) ax.legend(loc='best', fontsize=7, ncol=6, frameon=False) # -- Set common plot things -- # axes[0].set_title(info['sim_to_test']) axes[-1].set_xlabel('Time (ns)') for ax in axes: ax.set_ylabel('Charge (PE)') ax.set_yscale('log') fig.tight_layout() return fig, axes
def extract_gcd(gcd_file, outdir=None): """Extract info from a GCD in i3 format, optionally saving to a simple Python pickle file. Parameters ---------- gcd_file : str outdir : str, optional If provided, the gcd info is saved to a .pkl file with same name as `gcd_file` just with extension replaced. Returns ------- gcd_info : OrderedDict 'source_gcd_name': basename of the `gcd_file` provided 'source_gcd_md5': direct md5sum of `gcd_file` (possibly compressed) 'source_gcd_i3_md5': md5sum of `gcd_file` after decompressing to .i3 'geo': (86, 60, 3) array of DOM x, y, z coords in m rel to IceCube coord system 'rde' : (86, 60) array with relative DOM efficiencies 'noise' : (86, 60) array with noise rate, in Hz, for each DOM """ gcd_file = expanduser(expandvars(gcd_file)) src_gcd_dir, src_gcd_basename = split(gcd_file) # Strip all recognized extensions to find base file name's "stem," then # attach ".pkl" extension to that src_gcd_stripped = src_gcd_basename while True: src_gcd_stripped, ext = splitext(src_gcd_stripped) if ext.lower().lstrip('.') not in ['i3', 'pkl', 'bz2', 'gz', 'zst']: # reattach unknown "extension"; presumably it's actually part of # the filename and not an extesion at all (or an extension we don't # care about, or an empty string in the case that there is no dot # remaining in the name) src_gcd_stripped += ext break pkl_outfname = src_gcd_stripped + '.pkl' pkl_outfpath = None if outdir is not None: outdir = expanduser(expandvars(outdir)) mkdir(outdir) pkl_outfpath = join(outdir, pkl_outfname) if isfile(pkl_outfpath): return load_pickle(pkl_outfpath) def save_pickle_if_appropriate(gcd_info): if pkl_outfpath is not None: with open(pkl_outfpath, 'wb') as fobj: pickle.dump(gcd_info, fobj, protocol=pickle.HIGHEST_PROTOCOL) # Look for existing extracted (pkl) version in choice directories look_in_dirs = [] if src_gcd_dir: look_in_dirs.append(src_gcd_dir) look_in_dirs += ['.', DATA_DIR] if 'I3_DATA' in os.environ: look_in_dirs.append('$I3_DATA/GCD') look_in_dirs = [expanduser(expandvars(d)) for d in look_in_dirs] for look_in_dir in look_in_dirs: uncompr_pkl_fpath = join(look_in_dir, pkl_outfname) if isfile(uncompr_pkl_fpath): gcd_info = load_pickle(uncompr_pkl_fpath) save_pickle_if_appropriate(gcd_info) return gcd_info # If we couldn't find the already-extracted file, find the source file # (if user doesn't specify a full path to the file, try in several possible # directories) if src_gcd_dir: look_in_dirs = [src_gcd_dir] else: look_in_dirs = ['.', DATA_DIR] if 'I3_DATA' in os.environ: look_in_dirs.append('$I3_DATA/GCD') look_in_dirs = [expanduser(expandvars(d)) for d in look_in_dirs] src_fpath = None for look_in_dir in look_in_dirs: fpath = join(look_in_dir, src_gcd_basename) if isfile(fpath): src_fpath = fpath break if src_fpath is None: raise IOError('Cannot find file "{}" in dir(s) {}'.format( src_gcd_basename, look_in_dirs)) # Figure out what compression algorithms are used on the file; final state # will have `ext_lower` containing either "i3" or "pkl" indicating the # basic type of file we have compression = [] src_gcd_stripped = src_gcd_basename while True: src_gcd_stripped, ext = splitext(src_gcd_stripped) ext_lower = ext.lower().lstrip('.') if ext_lower in ['gz', 'bz2', 'zst']: compression.append(ext_lower) elif ext_lower in ['i3', 'pkl']: break else: if ext: raise IOError( 'Unhandled extension "{}" found in GCD file "{}"'.format( ext, gcd_file)) raise IOError( 'Illegal filename "{}"; must have either ".i3" or ".pkl" extesion,' " optionally followed by compression extension(s)".format( gcd_file)) with open(src_fpath, 'rb') as fobj: decompressed = fobj.read() # Don't hash a pickle file; all we care about is the hash of the original # i3 file, which is a value already stored in the pickle file if ext_lower == 'i3': source_gcd_md5 = hashlib.md5(decompressed).hexdigest() for comp_alg in compression: if comp_alg == 'gz': decompressed = gzip.GzipFile(fileobj=BytesIO(decompressed)).read() elif comp_alg == 'bz2': decompressed = bz2.decompress(decompressed) elif comp_alg == 'zst': decompressor = zstandard.ZstdDecompressor() decompressed = decompressor.decompress(decompressed, max_output_size=100000000) if ext_lower == 'pkl': if PY2: gcd_info = pickle.loads(decompressed) else: gcd_info = pickle.loads(decompressed, encoding='latin1') save_pickle_if_appropriate(gcd_info) return gcd_info # -- If we get here, we have an i3 file -- # decompressed_gcd_md5 = hashlib.md5(decompressed).hexdigest() from I3Tray import I3Units, OMKey # pylint: disable=import-error from icecube import dataclasses, dataio # pylint: disable=import-error, unused-variable, unused-import gcd = dataio.I3File(gcd_file) # pylint: disable=no-member frame = gcd.pop_frame() omgeo, dom_cal = None, None while gcd.more() and (omgeo is None or dom_cal is None): frame = gcd.pop_frame() keys = list(frame.keys()) if 'I3Geometry' in keys: omgeo = frame['I3Geometry'].omgeo if 'I3Calibration' in keys: dom_cal = frame['I3Calibration'].dom_cal assert omgeo is not None assert dom_cal is not None # create output dict gcd_info = OrderedDict() gcd_info['source_gcd_name'] = src_gcd_basename gcd_info['source_gcd_md5'] = source_gcd_md5 gcd_info['source_gcd_i3_md5'] = decompressed_gcd_md5 gcd_info['geo'] = np.full(shape=(N_STRINGS, N_DOMS, 3), fill_value=np.nan) gcd_info['noise'] = np.full(shape=(N_STRINGS, N_DOMS), fill_value=np.nan) gcd_info['rde'] = np.full(shape=(N_STRINGS, N_DOMS), fill_value=np.nan) for string_idx in range(N_STRINGS): for dom_idx in range(N_DOMS): omkey = OMKey(string_idx + 1, dom_idx + 1) om = omgeo.get(omkey) gcd_info['geo'][string_idx, dom_idx, 0] = om.position.x gcd_info['geo'][string_idx, dom_idx, 1] = om.position.y gcd_info['geo'][string_idx, dom_idx, 2] = om.position.z try: gcd_info['noise'][string_idx, dom_idx] = (dom_cal[omkey].dom_noise_rate / I3Units.hertz) except KeyError: gcd_info['noise'][string_idx, dom_idx] = 0.0 try: gcd_info['rde'][string_idx, dom_idx] = dom_cal[omkey].relative_dom_eff except KeyError: gcd_info['rde'][string_idx, dom_idx] = 0.0 save_pickle_if_appropriate(gcd_info) return gcd_info