def remove_dimension(input_file, output_file, dim_name): """ Parameters ---------- input_file : str Path to input file (table) output_file : str Path to output file (table) dim_name : str Dimension to remove from the intput table """ input_file = expand(input_file) output_file = expand(output_file) input_dir = dirname(input_file) output_dir = dirname(output_file) if abspath(output_dir) == abspath(input_dir): raise ValueError("Will not allow output dir to be same as input dir") if not isdir(output_dir): mkdir(output_dir) input_table = np.load(input_file, mmap_mode="r") input_binning = np.load(join(input_dir, "binning.npy")) dim_num = [i for i, n in enumerate(input_binning.dtype.names) if n == dim_name][0] output_binning = input_binning[ [n for n in input_binning.dtype.names if n != dim_name] ] # Save the binning to the output directory np.save(join(output_dir, "binning.npy"), output_binning) # Legacy way of storing bin edges: store each dim individually for d_name in output_binning.dtype.names: bin_edges_fpath = join(output_dir, "{}_bin_edges.npy".format(d_name)) np.save(bin_edges_fpath, output_binning[d_name]) # If we find the removed dimension's bin edges in output dir, remove that file bin_edges_fpath = join(output_dir, "{}_bin_edges.npy".format(dim_name)) if isfile(bin_edges_fpath): remove(bin_edges_fpath) output_shape = tuple(n for i, n in enumerate(input_table.shape) if i != dim_num) output_table = np.empty(shape=output_shape, dtype=input_table.dtype) #output_table = np.memmap( # output_file, dtype=input_table.dtype, mode="w+", shape=output_shape #) # Perform the summation over the dimension to be removed # Note that setting dtype to float64 causes accumulator to be double # precision, even if output table is not input_table.sum(axis=dim_num, dtype=np.float64, out=output_table) np.save(output_file, output_table)
def centralize_gcds(root_infos, gcd_dir=GCD_DIR): """Move GCD files to a single directory, if they don't already exist there. Compression extensions should be ignored, so only one version of each GCD exists. Parameters ---------- root_infos : mapping gcd_dir : str, optional """ gcd_dir = expand(gcd_dir) mkdir(gcd_dir) existing_fnames = os.listdir(gcd_dir) existing_roots = set() for fname in existing_fnames: match = GENERIC_I3_FNAME_RE.match(fname) if not match: continue groupdict = match.groupdict() existing_roots.add(groupdict["base"]) for root, infos in root_infos.items(): for info in infos: is_link = islink(info["fpath"]) is_file = isfile(info["fpath"]) if is_link: if is_file: # link to an existing file if root not in existing_roots: shutil.copy2(info["fpath"], gcd_dir, follow_symlinks=True) existing_roots.add(root) else: # bad link (to nothing, or to a directory) if not isdir(info["fpath"]): print(f'os.remove({info["fpath"]})') os.remove(info["fpath"]) else: if root in existing_roots: if is_file: print(f'os.remove({info["fpath"]})') os.remove(info["fpath"]) else: print(f'shutil.move({info["fpath"]}, {gcd_dir})') shutil.move(info["fpath"], gcd_dir) existing_roots.add(root)
def concatenate_recos_and_save(outfile, **kwargs): """Concatenate recos and save to a file. Parameters ---------- outfile : str **kwargs Arguments passed to `concatenate_recos` """ outfile = expand(outfile) out_array = concatenate_recos(**kwargs) outdir = dirname(outfile) if not isdir(outdir): mkdir(outdir) np.save(outfile, out_array) sys.stdout.write('Saved concatenated array to "{}"\n'.format(outfile))
def extract_gcd_frames(g_frame, c_frame, d_frame, retro_gcd_dir, metadata=None): """Extract GCD info to Python/Numpy-readable objects stored to a central GCD directory, subdirs of which are named by the hex md5sum of each extracted GCD file. Parameters ---------- g_frame : icecube.icetray.I3Frame with stop I3Frame.Geometry c_frame : icecube.icetray.I3Frame with stop I3Frame.Calibration d_frame : icecube.icetray.I3Frame with stop I3Frame.DetectorStatus retro_gcd_dir : string metadata : None or mapping, optional If non-empty mapping (e.g., OrderedDict) is provided, the contents are written to the gcd file's subdirectory inside retro_gcd_dir as "metadata.json" Returns ------- gcd_md5_hex : len-32 string of chars 0-9 and/or a-f MD5 sum of _only_ the G, C, and D frames (in that order) dumped to an uncompressed i3 file. Note that this can result in a hash value different from hashing the original GCD file if other frames were present besides the GCD frames (such as an I frame, or Q/P/etc. if the GCD is embedded in a data i3 file) """ from icecube.dataio import I3File # pylint: disable=import-outside-toplevel retro_gcd_dir = expand(retro_gcd_dir) # Create root dir for gcd subdirs if necessary if not isdir(retro_gcd_dir): mkdir(retro_gcd_dir) # Add a vaguely useful README to gcd root dir readme_fpath = join(retro_gcd_dir, "README") if not isfile(readme_fpath): with io.open(readme_fpath, "w", encoding="utf-8") as fhandle: fhandle.write(GCD_README.strip() + "\n") # Find md5sum of an uncompressed GCD file created by these G, C, & D frames tempdir_path = mkdtemp(suffix="gcd") try: gcd_i3file_path = join(tempdir_path, "gcd.i3") gcd_i3file = I3File(gcd_i3file_path, "w") gcd_i3file.push(g_frame) gcd_i3file.push(c_frame) gcd_i3file.push(d_frame) gcd_i3file.close() gcd_md5_hex = get_file_md5(gcd_i3file_path) finally: try: rmtree(tempdir_path) except Exception: pass this_gcd_dir_path = join(retro_gcd_dir, gcd_md5_hex) if isdir(this_gcd_dir_path): # already extracted this GCD sys.stderr.write( "Already extracted GCD with md5sum {}\n".format(gcd_md5_hex)) return gcd_md5_hex tempdir_path = mkdtemp(suffix="." + gcd_md5_hex) try: # Extract GCD info into Python/Numpy-readable things gcd_info = OrderedDict() gcd_info["I3Geometry"] = extract_i3_geometry(g_frame) gcd_info["I3Calibration"] = extract_i3_calibration(c_frame) gcd_info["I3DetectorStatus"] = extract_i3_detector_status(d_frame) gcd_info.update(extract_bad_doms_lists(d_frame)) # Write info to files. Preferable to write a single array to a .npy file; # second most preferable is to write multiple arrays to (compressed) .npz # file (faster to load than pkl files); finally, I3DetectorStatus _has_ to # be stored as pickle to preserve varying-length items. for key, val in gcd_info.items(): if isinstance(val, Mapping): if key == "I3DetectorStatus": key_fpath = join(tempdir_path, key + ".pkl") with io.open(key_fpath, "wb") as fhandle: pickle.dump(val, fhandle, protocol=pickle.HIGHEST_PROTOCOL) else: np.savez_compressed(join(tempdir_path, key + ".npz"), **val) else: assert isinstance(val, np.ndarray) np.save(join(tempdir_path, key + ".npy"), val) if metadata: metadata_fpath = join(tempdir_path, "metadata.json") with open(metadata_fpath, "w") as fhandle: json.dump(metadata, fhandle, sort_keys=False, indent=4) try: copytree(tempdir_path, this_gcd_dir_path) except OSError as err: if err.errno != errno.EEXIST: raise finally: try: rmtree(tempdir_path) except Exception: pass return gcd_md5_hex
def combine_tdi_tiles( source_dir, dest_dir, table_hash, gcd, bin_edges_file, tile_spec_file, ): """Combine individual time-independent tiles (one produced per DOM) into a single TDI table. Parameters ---------- source_dir : str dest_dir : str bin_edges_file : str tile_spec_file : str """ source_dir = expand(source_dir) dest_dir = expand(dest_dir) gcd = expand(gcd) bin_edges_file = expand(bin_edges_file) tile_spec_file = expand(tile_spec_file) mkdir(dest_dir) assert isdir(source_dir) assert isfile(bin_edges_file) assert isfile(tile_spec_file) gcd = extract_gcd(gcd) bin_edges = load_pickle(bin_edges_file) x_edges = bin_edges['x'] y_edges = bin_edges['y'] z_edges = bin_edges['z'] ctdir_edges = bin_edges['costhetadir'] phidir_edges = bin_edges['phidir'] n_x = len(x_edges) - 1 n_y = len(y_edges) - 1 n_z = len(z_edges) - 1 n_ctdir = len(ctdir_edges) - 1 n_phidir = len(phidir_edges) - 1 n_dir_bins = n_ctdir * n_phidir x_bw = (x_edges.max() - x_edges.min()) / n_x y_bw = (y_edges.max() - y_edges.min()) / n_y z_bw = (z_edges.max() - z_edges.min()) / n_z bin_vol = x_bw * y_bw * z_bw ctdir_min = ctdir_edges.min() ctdir_max = ctdir_edges.max() phidir_min = phidir_edges.min() phidir_max = phidir_edges.max() with open(tile_spec_file, 'r') as f: tile_specs = [l.strip() for l in f.readlines()] table = np.zeros(shape=(n_x, n_y, n_z, n_ctdir, n_phidir), dtype=np.float32) # Slice all table dimensions to exclude {under,over}flow bins central_slice = (slice(1, -1), ) * 5 angsens_model = None ice_model = None disable_tilt = None disable_anisotropy = None n_phase = None n_group = None tiles_info = [] for tile_spec in tile_specs: info = None try: fields = tile_spec.split() info = OrderedDict() info['tbl_idx'] = int(fields[0]) info['string'] = int(fields[1]) info['dom'] = int(fields[2]) info['seed'] = int(fields[3]) info['n_events'] = int(fields[4]) info['x_min'] = float(fields[5]) info['x_max'] = float(fields[6]) info['n_x'] = int(fields[7]) info['y_min'] = float(fields[8]) info['y_max'] = float(fields[9]) info['n_y'] = int(fields[10]) info['z_min'] = float(fields[11]) info['z_max'] = float(fields[12]) info['n_z'] = int(fields[13]) info['n_ctdir'] = int(fields[14]) info['n_phidir'] = int(fields[15]) tiles_info.append(info) tile_fpath = glob( join( source_dir, 'clsim_table_set' '_{table_hash}' '_tile_{tbl_idx}' '_string_{string}' '_dom_{dom}' '_seed_{seed}' '_n_{n_events}' '.fits'.format(table_hash=table_hash, **info)))[0] try: fits_table = fits.open(tile_fpath, mode='readonly', memmap=True) except: wstderr('Failed on tile_fpath "{}"'.format(tile_fpath)) raise primary = fits_table[0] header = primary.header # pylint: disable=no-member keys = header.keys() this_gcd_i3_md5 = extract_meta_from_keys(keys, '_i3_gcd_i3_md5_') assert this_gcd_i3_md5 == gcd['source_gcd_i3_md5'], \ 'this: {}, ref: {}'.format(this_gcd_i3_md5, gcd['source_gcd_i3_md5']) this_angsens_model = extract_meta_from_keys(keys, '_i3_angsens_') if angsens_model is None: angsens_model = this_angsens_model _, avg_angsens = load_angsens_model(angsens_model) else: assert this_angsens_model == angsens_model this_table_hash = extract_meta_from_keys(keys, '_i3_hash_') assert this_table_hash == table_hash this_ice_model = extract_meta_from_keys(keys, '_i3_ice_') if ice_model is None: ice_model = this_ice_model else: assert this_ice_model == ice_model this_disable_anisotropy = header['_i3_disable_anisotropy'] if disable_anisotropy is None: disable_anisotropy = this_disable_anisotropy else: assert this_disable_anisotropy == disable_anisotropy this_disable_tilt = header['_i3_disable_tilt'] if disable_tilt is None: disable_tilt = this_disable_tilt else: assert this_disable_tilt == disable_tilt this_n_phase = header['_i3_n_phase'] if n_phase is None: n_phase = this_n_phase else: assert this_n_phase == n_phase this_n_group = header['_i3_n_group'] if n_group is None: n_group = this_n_group else: assert this_n_group == n_group assert info['n_ctdir'] == n_ctdir assert info['n_phidir'] == n_phidir assert np.isclose(header['_i3_costhetadir_min'], ctdir_min) assert np.isclose(header['_i3_costhetadir_max'], ctdir_max) assert np.isclose(header['_i3_phidir_min'], phidir_min) assert np.isclose(header['_i3_phidir_max'], phidir_max) n_photons = header['_i3_n_photons'] n_dir_bins = info['n_ctdir'] * info['n_phidir'] this_x_bw = (info['x_max'] - info['x_min']) / info['n_x'] this_y_bw = (info['y_max'] - info['y_min']) / info['n_y'] this_z_bw = (info['z_max'] - info['z_min']) / info['n_z'] assert this_x_bw == x_bw assert this_y_bw == y_bw assert this_z_bw == z_bw assert np.any(np.isclose(info['x_min'], x_edges)) assert np.any(np.isclose(info['x_max'], x_edges)) assert np.any(np.isclose(info['y_min'], y_edges)) assert np.any(np.isclose(info['y_max'], y_edges)) assert np.any(np.isclose(info['z_min'], z_edges)) assert np.any(np.isclose(info['z_max'], z_edges)) quantum_efficiency = 0.25 * gcd['rde'][info['string'] - 1, info['dom'] - 1] norm = n_dir_bins * quantum_efficiency * avg_angsens / (n_photons * bin_vol) if np.isnan(norm): print('\nTile {} norm is nan!'.format(info['tbl_idx'])) print(' quantum_efficiency = {}, n_photons = {}'.format( quantum_efficiency, n_photons)) elif norm == 0: print('\nTile {} norm is 0'.format(info['tbl_idx'])) x_start = np.digitize(info['x_min'] + x_bw / 2, x_edges) - 1 x_stop = np.digitize(info['x_max'] - x_bw / 2, x_edges) y_start = np.digitize(info['y_min'] + y_bw / 2, y_edges) - 1 y_stop = np.digitize(info['y_max'] - y_bw / 2, y_edges) z_start = np.digitize(info['z_min'] + z_bw / 2, z_edges) - 1 z_stop = np.digitize(info['z_max'] - z_bw / 2, z_edges) # NOTE: comparison excludes norm = 0 _and_ norm = NaN if norm > 0: assert not np.isnan(norm) table[x_start:x_stop, y_start:y_stop, z_start:z_stop, :, :] += ( norm * primary.data[central_slice] # pylint: disable=no-member ) except: wstderr('Failed on tile_spec {}'.format(tile_spec)) if info is not None: wstderr('Info:\n{}'.format(info)) raise wstderr('.') wstderr('\n') metadata = OrderedDict() metadata['table_hash'] = table_hash metadata['disable_tilt'] = disable_tilt metadata['disable_anisotropy'] = disable_anisotropy metadata['gcd'] = gcd metadata['angsens_model'] = angsens_model metadata['ice_model'] = ice_model metadata['n_phase'] = n_phase metadata['n_group'] = n_group metadata['tiles_info'] = tiles_info outdir = join( dest_dir, 'tdi_table_{}_tilt_{}_anisotropy_{}'.format( table_hash, 'off' if disable_tilt else 'on', 'off' if disable_anisotropy else 'on', )) mkdir(outdir) name = 'tdi_table.npy' outfpath = join(outdir, name) wstdout('saving table to "{}"\n'.format(outfpath)) np.save(outfpath, table) #outfpath = join(outdir, 'tdi_bin_edges.json') #wstdout('saving bin edges to "{}"\n'.format(outfpath)) #json.dump( # bin_edges, # file(outfpath, 'w'), # sort_keys=False, # indent=2, #) outfpath = join(outdir, 'tdi_bin_edges.pkl') wstdout('saving bin edges to "{}"\n'.format(outfpath)) pickle.dump( bin_edges, open(outfpath, 'wb'), protocol=pickle.HIGHEST_PROTOCOL, ) #outfpath = join(outdir, 'tdi_metadata.json') #wstdout('saving metadata to "{}"\n'.format(outfpath)) #json.dump( # metadata, # file(outfpath, 'w'), # sort_keys=False, # indent=2, #) outfpath = join(outdir, 'tdi_metadata.pkl') wstdout('saving metadata to "{}"\n'.format(outfpath)) pickle.dump( metadata, open(outfpath, 'wb'), protocol=pickle.HIGHEST_PROTOCOL, )
def combine_clsim_tables(table_fpaths, outdir=None, overwrite=False, step_length=1.0): """Combine multiple CLSim-produced tables together into a single table. All tables specified must have the same binnings defined. Tables should also be produced using different random seeds; if corresponding metadata files can be found in the same directories as the CLSim tables, this will be enforced prior to loading and combining the actual tables together. Parameters ---------- table_fpaths : string or iterable thereof Each string is glob-expanded outdir : string, optional Directory to which to save the combined table; if not specified, the resulting table will be returned but not saved to disk. overwrite : bool Overwrite an existing table. If a table is found at the output path and `overwrite` is False, the function simply returns. step_length : float > 0 in units of meters Needed for computing the normalization to apply to the `table` in order to generate the `t_indep_table` (if the latter doesn't already exist). Note that normalization constants due to `n_photons`, `quantum_efficiency`, and `angular_acceptance_fract` as well as normalization depending (only) upon radial bin (i.e 1/r^2 geometric factor) are _not_ applied to the tables. The _only_ normalization applied (and _only_ to `t_indep_table`) is the multiple-counting factor that is a function of `step_length` and whichever of the time or radial bin dimensions is smaller. Returns ------- combined_table """ t_start = time() # Get all input table filepaths, including glob expansion if isinstance(table_fpaths, basestring): table_fpaths = [table_fpaths] table_fpaths_tmp = [] for fpath in table_fpaths: table_fpaths_tmp.extend(glob(expand(fpath))) table_fpaths = sorted(table_fpaths_tmp) wstderr('Found {} tables to combine:\n {}\n'.format( len(table_fpaths), '\n '.join(table_fpaths))) # Formulate output filenames and check if they exist output_fpaths = None if outdir is not None: outdir = expand(outdir) mkdir(outdir) output_fpaths = OrderedDict( ((k, join(outdir, k + '.npy')) for k in ALL_KEYS)) output_fpaths['source_tables'] = join(outdir, 'source_tables.txt') if not overwrite: for fpath in output_fpaths: if isfile(fpath): raise IOError('File {} exists'.format(fpath)) wstderr('Output files will be written to:\n {}\n'.format('\n '.join( output_fpaths.values()))) # Combine the tables combined_table = None for fpath in table_fpaths: table = load_clsim_table_minimal(fpath, step_length=step_length, mmap=True) if combined_table is None: combined_table = table continue if set(table.keys()) != set(SUM_KEYS + VALIDATE_KEYS): raise ValueError( 'Table keys {} do not match expected keys {}'.format( sorted(table.keys()), sorted(ALL_KEYS))) for key in VALIDATE_KEYS: if not np.array_equal(table[key], combined_table[key]): raise ValueError('Unequal {} in file {}'.format(key, fpath)) for key in SUM_KEYS: combined_table[key] += table[key] del table # Force quantum_efficiency and angular_acceptance_fract to 1 (these should # be handled by the user at the time the table is used to represent a # particular or subgroup of DOMs) t_indep_table, _ = generate_time_indep_table(table=table, quantum_efficiency=1, angular_acceptance_fract=1) table['t_indep_table'] = t_indep_table # Save the data to npy files on disk (in a sub-directory for all of this # table's files) if outdir is not None: basenames = [] for fpath in table_fpaths: base = basename(fpath) rootname, ext = splitext(base) if ext.lstrip('.') in COMPR_EXTENSIONS: base = rootname basenames.append(base) wstderr('Writing files:\n') for key in ALL_KEYS: fpath = output_fpaths[key] wstderr(' {} ...'.format(fpath)) t0 = time() np.save(fpath, combined_table[key]) wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3))) fpath = output_fpaths['source_tables'] wstderr(' {} ...'.format(fpath)) t0 = time() with open(fpath, 'w') as fobj: fobj.write('\n'.join(sorted(basenames))) wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3))) wstderr('Total time to combine tables: {} s\n'.format( np.round(time() - t_start, 3))) return combined_table
def extract_gcd(gcd_file, outdir=None): """Extract info from a GCD in i3 format, optionally saving to a simple Python pickle file. Parameters ---------- gcd_file : str outdir : str, optional If provided, the gcd info is saved to a .pkl file with same name as `gcd_file` just with extension replaced. Returns ------- gcd_info : OrderedDict 'source_gcd_name': basename of the `gcd_file` provided 'source_gcd_md5': direct md5sum of `gcd_file` (possibly compressed) 'source_gcd_i3_md5': md5sum of `gcd_file` after decompressing to .i3 'geo': (86, 60, 3) array of DOM x, y, z coords in m rel to IceCube coord system 'rde' : (86, 60) array with relative DOM efficiencies 'noise' : (86, 60) array with noise rate, in Hz, for each DOM """ gcd_file = expanduser(expandvars(gcd_file)) src_gcd_dir = dirname(gcd_file) src_gcd_basename = basename(gcd_file) src_gcd_stripped = src_gcd_basename.rstrip('.bz2').rstrip('.gz').rstrip('.i3').rstrip('.pkl') outfname = src_gcd_stripped + '.pkl' data_dir_fpath = abspath(join(DATA_DIR, outfname)) outfpath = None if outdir is not None: outdir = expanduser(expandvars(outdir)) mkdir(outdir) outfpath = join(outdir, outfname) if isfile(data_dir_fpath) and data_dir_fpath != abspath(outfpath): copyfile(data_dir_fpath, outfpath) if isfile(data_dir_fpath): return pickle.load(open(data_dir_fpath, 'rb')) if outfpath is not None and isfile(outfpath): return pickle.load(open(outfpath, 'rb')) if src_gcd_dir: dirs = [src_gcd_dir] else: dirs = ['.'] if 'I3_DATA' in os.environ: dirs.append(expanduser(expandvars('$I3_DATA/GCD'))) compression = [] parsed = False src_gcd_stripped = src_gcd_basename for _ in range(10): root, ext = splitext(src_gcd_stripped) if ext == '.gz': compression.append('gz') src_gcd_stripped = root elif src_gcd_stripped.endswith('.bz2'): compression.append('bz2') src_gcd_stripped = root elif src_gcd_stripped.endswith('.i3'): parsed = True src_gcd_stripped = root break elif src_gcd_stripped.endswith('.pkl'): for src_dir in dirs: fpath = join(src_dir, src_gcd_stripped) if isfile(fpath): gcd_info = pickle.load(open(src_gcd_stripped, 'rb')) if outdir is not None and outdir != src_gcd_dir: copyfile(src_gcd_stripped, outfpath) return gcd_info if not parsed: raise ValueError( 'Could not parse compression suffixes for GCD file "{}"' .format(gcd_file) ) decompressed = open(gcd_file, 'rb').read() source_gcd_md5 = hashlib.md5(decompressed).hexdigest() for comp_alg in compression: if comp_alg == 'gz': decompressed = gzip.GzipFile(fileobj=StringIO(decompressed)).read() elif comp_alg == 'bz2': decompressed = bz2.decompress(decompressed) decompressed_gcd_md5 = hashlib.md5(decompressed).hexdigest() from I3Tray import I3Units, OMKey # pylint: disable=import-error from icecube import dataclasses, dataio # pylint: disable=import-error, unused-variable gcd = dataio.I3File(gcd_file) # pylint: disable=no-member frame = gcd.pop_frame() # get detector geometry key = 'I3Geometry' while key not in frame.keys(): frame = gcd.pop_frame() omgeo = frame[key].omgeo # get calibration key = 'I3Calibration' while key not in frame.keys(): frame = gcd.pop_frame() dom_cal = frame[key].dom_cal # create output dict gcd_info = OrderedDict() gcd_info['source_gcd_name'] = src_gcd_basename gcd_info['source_gcd_md5'] = source_gcd_md5 gcd_info['source_gcd_i3_md5'] = decompressed_gcd_md5 gcd_info['geo'] = geo = np.zeros((N_STRINGS, N_DOMS, 3)) gcd_info['noise'] = noise = np.zeros((N_STRINGS, N_DOMS)) gcd_info['rde'] = rde = np.zeros((N_STRINGS, N_DOMS)) for string_idx in range(N_STRINGS): for dom_idx in range(N_DOMS): omkey = OMKey(string_idx + 1, dom_idx + 1) geo[string_idx, dom_idx, 0] = omgeo.get(omkey).position.x geo[string_idx, dom_idx, 1] = omgeo.get(omkey).position.y geo[string_idx, dom_idx, 2] = omgeo.get(omkey).position.z try: noise[string_idx, dom_idx] = ( dom_cal[omkey].dom_noise_rate / I3Units.hertz ) except KeyError: noise[string_idx, dom_idx] = 0.0 try: rde[string_idx, dom_idx] = dom_cal[omkey].relative_dom_eff except KeyError: gcd_info['rde'][string_idx, dom_idx] = 0. #print(np.mean(gcd_info['rde'][:80])) #print(np.mean(gcd_info['rde'][79:])) if outfpath is not None: with open(outfpath, 'wb') as outfile: pickle.dump(gcd_info, outfile, protocol=pickle.HIGHEST_PROTOCOL) return gcd_info
def generate_time_indep_tables(table, outdir=None, kinds=('clsim', 'ckv'), overwrite=False): """Generate and save to disk time independent table(s) from the original CLSim table and/or a Cherenkov table. Parameters ---------- table : string outdir : string, optional kinds : string, optional overwrite : bool, optional Returns ------- t_indep_table : numpy.ndarray of size (n_r, n_costheta, n_costhetadir, n_deltaphidir) """ if isinstance(kinds, basestring): kinds = [kinds] kinds = [k.strip().lower() for k in kinds] clsim_table_path = None ckv_table_path = None table = expand(table) if outdir is None: if isdir(table): outdir = table elif table.endswith('.npy'): outdir = dirname(table) elif table.endswith('.fits'): outdir = table.rstrip('.fits') if isfile(table): table_basename = basename(table) if table_basename == 'table.npy' or table_basename.endswith('.fits'): clsim_table_path = table elif table_basename == 'ckv_table.npy': ckv_table_path = table elif isdir(table): if 'clsim' in kinds and isfile(join(table, 'table.npy')): clsim_table_path = table if 'ckv' in kinds and isfile(join(table, 'ckv_table.npy')): ckv_table_path = table t_indep_table_exists = False if 'clsim' in kinds and isfile(join(outdir, 't_indep_table.npy')): t_indep_table_exists = True t_indep_ckv_table_exists = False if 'ckv' in kinds and isfile(join(outdir, 't_indep_ckv_table.npy')): t_indep_ckv_table_exists = True if 'clsim' in kinds and (overwrite or not t_indep_table_exists): if clsim_table_path is None: raise ValueError( 'Told to generate t-indep table from CLSim table but CLSim' ' table does not exist.') print('generating t_indep_table') mkdir(outdir) t0 = time.time() clsim_table = load_clsim_table_minimal(clsim_table_path, mmap=True) t1 = time.time() if retro.DEBUG: print('loaded clsim table in {:.3f} s'.format(t1 - t0)) t_indep_table = clsim_table['table'][1:-1, 1:-1, 1:-1, 1:-1, 1:-1].sum(axis=2) t2 = time.time() if retro.DEBUG: print('summed over t-axis in {:.3f} s'.format(t2 - t1)) np.save(join(outdir, 't_indep_table.npy'), t_indep_table) t3 = time.time() if retro.DEBUG: print('saved t_indep_table.npy to disk in {:.3f} s'.format(t3 - t2)) del clsim_table, t_indep_table if 'ckv' in kinds and (overwrite or not t_indep_ckv_table_exists): if ckv_table_path is None: raise ValueError( 'Told to generate t-indep table from ckv table but ckv' ' table does not exist.') print('generating t_indep_ckv_table') mkdir(outdir) t0 = time.time() ckv_table = load_ckv_table(ckv_table_path, mmap=True) t1 = time.time() if retro.DEBUG: print('loaded ckv table in {:.3f} s'.format(t1 - t0)) t_indep_ckv_table = ckv_table['ckv_table'].sum(axis=2) t2 = time.time() if retro.DEBUG: print('summed over t-axis in {:.3f} s'.format(t2 - t1)) np.save(join(outdir, 't_indep_ckv_table.npy'), t_indep_ckv_table) t3 = time.time() if retro.DEBUG: print('saved t_indep_table.npy to disk in {:.3f} s'.format(t3 - t2)) del ckv_table, t_indep_ckv_table
def generate_stacked_tables(outdir, dom_tables_kw): """Stack a set of tables into a single numpy array for use of all tables in Numba. Currently, only ckv_templ_compr tables are supported. Parameters ---------- outdir : string Path ot directory into which the three resulting files will be stored. dom_tables_kw : mapping As returned by retro.init_obj.parse_args """ if dom_tables_kw['dom_tables_kind'] != 'ckv_templ_compr': raise NotImplementedError( '"{}" tables not supported; only "ckv_templ_compr"' .format(dom_tables_kw['dom_tables_kind']) ) # Use the convenience function to load the single-DOM tables into a # retro_5d_tables.Retro5DTables object, and then we can use the loaded # tables from there. dom_tables = init_obj.setup_dom_tables(**dom_tables_kw) assert np.all(dom_tables.sd_idx_table_indexer >= 0) table_meta = OrderedDict() table_meta['table_kind'] = dom_tables.table_kind table_meta['sd_idx_table_indexer'] = dom_tables.sd_idx_table_indexer table_meta.update(dom_tables.table_meta) table_meta['n_photons'] = 1.0 table_meta['n_photons_per_table'] = np.array(dom_tables.n_photons_per_table) outdir = expand(outdir) mkdir(outdir) fpath = join(outdir, 'stacked_{}_meta.pkl'.format(dom_tables.table_name)) sys.stdout.write('Writing metadata to "{}" ...'.format(fpath)) sys.stdout.flush() pickle.dump( table_meta, file(fpath, 'wb'), protocol=pickle.HIGHEST_PROTOCOL ) sys.stdout.write(' done.\n') sys.stdout.flush() if dom_tables.compute_t_indep_exp: # Renormalize to 1 photon stacked_t_indep_tables = np.stack( [tbl/n for tbl, n in zip(dom_tables.t_indep_tables, dom_tables.n_photons_per_table)] ) fpath = join( outdir, 'stacked_{}.npy'.format(dom_tables.t_indep_table_name) ) sys.stdout.write('Writing stacked t_indep tables to "{}" ...' .format(fpath)) sys.stdout.flush() np.save(fpath, stacked_t_indep_tables) sys.stdout.write(' done.\n') sys.stdout.flush() # Renormalize to 1 photon for template_map, n_photons in zip(dom_tables.tables, dom_tables.n_photons_per_table): template_map['weight'] /= n_photons stacked_tables = np.stack(dom_tables.tables) fpath = join(outdir, 'stacked_{}.npy'.format(dom_tables.table_name)) sys.stdout.write('Writing stacked tables to "{}" ...'.format(fpath)) sys.stdout.flush() np.save(fpath, stacked_tables) sys.stdout.write(' done.\n') sys.stdout.flush()
def extract_dom_coordinates(gcd, outdir): """Extract the DOM coordinates from a gcd file. Parameters ---------- gcd : string Path to GCD file outdir : string Path to directory into which to store the resulting .npy file containing the coordinates array """ gcd = expanduser(expandvars(gcd)) outdir = expanduser(expandvars(outdir)) gcd_md5 = get_file_md5(gcd) print('Extracting geometry from\n "{}"'.format(abspath(gcd))) print('File MD5 sum is\n {}'.format(gcd_md5)) print('Will output geom file and metadata file to directory\n' ' "{}"'.format(abspath(outdir))) if not isfile(gcd): raise IOError('`gcd` file does not exist at "{}"'.format(gcd)) mkdir(outdir) geofile = dataio.I3File(gcd) # pylint: disable=no-member geometry = None while geofile.more(): frame = geofile.pop_frame() if 'I3Geometry' in frame.keys(): geometry = frame['I3Geometry'] break if geometry is None: raise ValueError('Could not find geometry in file "{}"'.format(gcd)) omgeo = geometry.omgeo geom = np.full(shape=(N_STRINGS, N_OMS, 3), fill_value=np.nan) for string in range(N_STRINGS): for om in range(N_OMS): geom[string, om, :] = (omgeo.get(OMKey(string + 1, om + 1)).position.x, omgeo.get(OMKey(string + 1, om + 1)).position.y, omgeo.get(OMKey(string + 1, om + 1)).position.z) assert np.sum(np.isnan(geom)) == 0 geom_meta = generate_geom_meta(geom) geom_meta['sourcefile_path'] = gcd geom_meta['sourcefile_md5'] = gcd_md5 outpath = join(outdir, GEOM_FILE_PROTO.format(**geom_meta)) metapath = join(outdir, GEOM_META_PROTO.format(**geom_meta)) json.dump(geom_meta, open(metapath, 'w'), indent=2) print('Saved metadata to\n "{}"'.format(abspath(metapath))) np.save(outpath, geom) print('Saved geom to\n "{}"'.format(abspath(outpath)))
def plot_clsim_table_summary( summaries, formats=None, outdir=None, no_legend=False ): """Plot the table summary produced by `summarize_clsim_table`. Plots are made of marginalized 1D distributions, where mean, median, and/or max are used to marginalize out the remaining dimensions (where those are present in the summaries).. Parameters ---------- summaries : string, summary, or iterable thereof If string(s) are provided, each is glob-expanded. See :method:`glob.glob` for valid syntax. formats : None, string, or iterable of strings in {'pdf', 'png'} If no formats are provided, the plot will not be saved. outdir : None or string If `formats` is specified and `outdir` is None, the plots are saved to the present working directory. no_legend : bool, optional Do not display legend on plots (default is to display a legend) Returns ------- all_figs : list of three :class:`matplotlib.figure.Figure` all_axes : list of three lists of :class:`matplotlib.axes.Axes` summaries : list of :class:`collections.OrderedDict` List of all summaries loaded """ orig_summaries = deepcopy(summaries) if isinstance(summaries, (basestring, Mapping)): summaries = [summaries] tmp_summaries = [] for summary in summaries: if isinstance(summary, Mapping): tmp_summaries.append(summary) elif isinstance(summary, basestring): tmp_summaries.extend(glob(expand(summary))) summaries = tmp_summaries for summary_n, summary in enumerate(summaries): if isinstance(summary, basestring): summary = from_json(summary) summaries[summary_n] = summary if formats is None: formats = [] elif isinstance(formats, basestring): formats = [formats] if outdir is not None: outdir = expand(outdir) mkdir(outdir) n_summaries = len(summaries) if n_summaries == 0: raise ValueError( 'No summaries found based on argument `summaries`={}' .format(orig_summaries) ) for n, fmt in enumerate(formats): fmt = fmt.strip().lower() assert fmt in ('pdf', 'png'), fmt formats[n] = fmt all_items = OrderedDict() for summary in summaries: for key, value in summary.items(): if key == 'dimensions': continue if not all_items.has_key(key): all_items[key] = [] all_items[key].append(value) same_items = OrderedDict() different_items = OrderedDict() for key, values in all_items.items(): all_same = True ref_value = values[0] for value in values[1:]: if np.any(value != ref_value): all_same = False if all_same: same_items[key] = values[0] else: different_items[key] = values if n_summaries > 1: if same_items: print('Same for all:\n{}'.format(same_items.keys())) if different_items: print('Different for some or all:\n{}' .format(different_items.keys())) same_label = formatter(same_items) summary_has_detail = False if set(['string', 'depth_idx', 'seed']).issubset(all_items.keys()): summary_has_detail = True strings = sorted(set(all_items['string'])) depths = sorted(set(all_items['depth_idx'])) seeds = sorted(set(all_items['seed'])) plot_kinds = ('mean', 'median', 'max') plot_kinds_with_data = set() dim_names = summaries[0]['dimensions'].keys() n_dims = len(dim_names) fig_x = 10 # inches fig_header_y = 0.35 # inches fig_one_axis_y = 5 # inches fig_all_axes_y = n_dims * fig_one_axis_y fig_y = fig_header_y + fig_all_axes_y # inches all_figs = [] all_axes = [] for plot_kind in plot_kinds: fig, f_axes = plt.subplots( nrows=n_dims, ncols=1, squeeze=False, figsize=(fig_x, fig_y) ) all_figs.append(fig) f_axes = list(f_axes.flat) for ax in f_axes: ax.set_prop_cycle('color', COLOR_CYCLE_ORTHOG) all_axes.append(f_axes) n_lines = 0 xlims = [[np.inf, -np.inf]] * n_dims summaries_order = [] if summary_has_detail: for string, depth_idx, seed in product(strings, depths, seeds): for summary_n, summary in enumerate(summaries): if (summary['string'] != string or summary['depth_idx'] != depth_idx or summary['seed'] != seed): continue summaries_order.append((summary_n, summary)) else: for summary_n, summary in enumerate(summaries): summaries_order.append((summary_n, summary)) labels_assigned = set() for summary_n, summary in summaries_order: different_label = formatter({k: v[summary_n] for k, v in different_items.items()}) if different_label: label = different_label if label in labels_assigned: label = None else: labels_assigned.add(label) else: label = None for dim_num, dim_name in enumerate(dim_names): dim_info = summary['dimensions'][dim_name] dim_axes = [f_axes[dim_num] for f_axes in all_axes] bin_edges = summary[dim_name + '_bin_edges'] if dim_name == 'deltaphidir': bin_edges /= np.pi xlims[dim_num] = [ min(xlims[dim_num][0], np.min(bin_edges)), max(xlims[dim_num][1], np.max(bin_edges)) ] for ax, plot_kind in zip(dim_axes, plot_kinds): if plot_kind not in dim_info: continue plot_kinds_with_data.add(plot_kind) vals = dim_info[plot_kind] ax.step(bin_edges, [vals[0]] + list(vals), linewidth=1, clip_on=True, label=label) n_lines += 1 dim_labels = dict( r=r'$r$', costheta=r'$\cos\theta$', t=r'$t$', costhetadir=r'$\cos\theta_{\rm dir}$', deltaphidir=r'$\Delta\phi_{\rm dir}$' ) units = dict(r='m', t='ns', deltaphidir=r'rad/$\pi$') logx_dims = [] logy_dims = ['r', 'time', 'deltaphidir'] flabel = '' same_flabel = formatter(same_items, fname=True) different_flabel = formatter(different_items, key_only=True, fname=True) if same_flabel: flabel += '__same__' + same_flabel if different_flabel: flabel += '__differ__' + different_flabel for kind_idx, (plot_kind, fig) in enumerate(zip(plot_kinds, all_figs)): if plot_kind not in plot_kinds_with_data: continue for dim_num, (dim_name, ax) in enumerate(zip(dim_names, all_axes[kind_idx])): #if dim_num == 0 and different_items: if different_items and not no_legend: ax.legend(loc='best', frameon=False, prop=dict(size=7, family='monospace')) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.yaxis.set_ticks_position('none') ax.xaxis.set_ticks_position('none') ax.xaxis.tick_bottom() ax.yaxis.tick_left() ax.set_xlim(xlims[dim_num]) xlabel = dim_labels[dim_name] if dim_name in units: xlabel += ' ({})'.format(units[dim_name]) ax.set_xlabel(xlabel) if dim_name in logx_dims: ax.set_xscale('log') if dim_name in logy_dims: ax.set_yscale('log') fig.tight_layout(rect=(0, 0, 1, fig_all_axes_y/fig_y)) suptitle = ( 'Marginalized distributions (taking {} over all other axes)' .format(plot_kind) ) if same_label: suptitle += '\n' + same_label fig.suptitle(suptitle, y=(fig_all_axes_y + fig_header_y*0.8) / fig_y, fontsize=9) for fmt in formats: outfpath = ('clsim_table_summaries{}__{}.{}' .format(flabel, plot_kind, fmt)) if outdir: outfpath = join(outdir, outfpath) fig.savefig(outfpath, dpi=300) print('Saved image to "{}"'.format(outfpath)) return all_figs, all_axes, summaries
def combine_tables(table_fpaths, outdir=None, overwrite=False): """Combine multiple tables together into a single table. All tables specified must have the same binnings defined. Tables should also be produced using different random seeds (if all else besides n_photons is equal); if corresponding metadata files can be found in the same directories as the CLSim tables, this will be enforced prior to loading and combining the actual tables together. Parameters ---------- table_fpaths : string or iterable thereof Each string is glob-expanded outdir : string, optional Directory to which to save the combined table; if not specified, the resulting table will be returned but not saved to disk. overwrite : bool Overwrite an existing table. If a table is found at the output path and `overwrite` is False, the function simply returns without raising an exception. Returns ------- combined_table """ t_start = time() # Get all input table filepaths, including glob expansion orig_table_fpaths = deepcopy(table_fpaths) if isinstance(table_fpaths, string_types): table_fpaths = [table_fpaths] table_fpaths_tmp = [] for fpath in table_fpaths: table_fpaths_tmp.extend(glob(expand(fpath))) table_fpaths = sorted(table_fpaths_tmp, key=nsort_key_func) if not table_fpaths: raise ValueError( "Found no tables given `table_fpaths` = {}".format(orig_table_fpaths) ) wstderr( 'Found {} tables to combine:\n {}\n'.format( len(table_fpaths), '\n '.join(table_fpaths) ) ) # Create the output directory if outdir is not None: outdir = expand(outdir) mkdir(outdir) # Combine the tables combined_table = None table_keys = None for fpath in table_fpaths: table = load_clsim_table_minimal(fpath, mmap=True) base = basename(fpath) rootname, ext = splitext(base) if ext.lstrip('.') in COMPR_EXTENSIONS: base = rootname if 'source_tables' not in table: table['source_tables'] = np.array([base], dtype=np.string0) if combined_table is None: combined_table = table table_keys = set(table.keys()) # Formulate output file paths and check if they exist (do on first # table to avoid finding out we are going to overwrite a file # before loading all the source tables) if outdir is not None: output_fpaths = OrderedDict( ( (k, join(outdir, k + '.npy')) for k in sorted(table_keys.difference(NO_WRITE_KEYS)) ) ) if not overwrite: for fp in output_fpaths.values(): if isfile(fp): raise IOError( 'File at {} already exists, NOT overwriting'.format(fp) ) wstderr( 'Output files will be written to:\n {}\n'.format( '\n '.join(output_fpaths.values()) ) ) continue # Make sure keys are the same new_table_keys = set(table.keys()) missing_keys = sorted( table_keys .difference(new_table_keys) .difference(NO_VALIDATE_KEYS) ) additional_keys = sorted( new_table_keys .difference(table_keys) .difference(NO_VALIDATE_KEYS) ) if missing_keys or additional_keys: raise ValueError( 'Table is missing keys {} and/or has additional keys {}'.format( missing_keys, additional_keys ) ) # Validate keys that should be equal for key in sorted(table_keys.difference(NO_VALIDATE_KEYS)): if not np.array_equal(table[key], combined_table[key]): raise ValueError('Unequal "{}" in file {}'.format(key, fpath)) # Add values from keys that should be summed for key in SUM_KEYS: if key not in table: continue combined_table[key] += table[key] # Concatenate and sort new source table(s) in source_tables array combined_table['source_tables'] = np.sort( np.concatenate([combined_table['source_tables'], table['source_tables']]) ) # Make sure to clear table from memory since these can be quite large del table # Save the data to npy files on disk (in a sub-directory for all of this # table's files) if outdir is not None: wstderr('Writing files:\n') len_longest_fpath = np.max([len(p) for p in output_fpaths.values()]) for key in sorted(table_keys.difference(NO_WRITE_KEYS)): fpath = output_fpaths[key] wstderr(' {} ...'.format(fpath.ljust(len_longest_fpath))) t0 = time() np.save(fpath, combined_table[key]) wstderr(' ({:12.3f} s)\n'.format(time() - t0)) wstderr( 'Total time to combine tables: {} s\n'.format(np.round(time() - t_start, 3)) ) return combined_table
def generate_ckv_table( table, beta, oversample, num_cone_samples, outdir=None, mmap_src=True, mmap_dst=False, ): """ Parameters ---------- table : string or mapping If string, path to table file (or directory in the case of npy table). A mapping is assumed to be a table loaded as by `retro.table_readers.load_clsim_table_minimal`. beta : float in [0, 1] Beta factor, i.e. velocity of the charged particle divided by the speed of light in vacuum: `v/c`. oversample : int > 0 Sample from each directional bin (costhetadir and deltaphidir) this many times. Increase to obtain a more accurate average over the range of directions that the resulting ckv-emitter-direction can take within the same output (directional) bin. Note that there is no unique information given by sampling (more than once) in the spatial dimensions, so these dimensions ignore `oversample`. Therefore, the computational cost is `oversample**2`. num_cone_samples : int > 0 Number of samples around the circumference of the Cherenkov cone. outdir : string or None If a string, use this directory to place the .npy file containing the ckv table. If `outdir` is None and `table` is a .npy-file-directory, this directory is used for `outdir`. If `outdir` is None and `table` is the path to a .fits file, `outdir` is the same name but with the .fits extension stripped. If `outdir` is None and `table` is a mapping, a ValueError is raised. npy-file-directory will be placed. mmap_src : bool, optional Whether to (attempt to) memory map the source `table` (if `table` is a string pointing to the file/directory). Default is `True`, as tables can easily exceed the memory capacity of a machine. mmap_dst : bool, optional Whether to memory map the destination `ckv_table`. """ input_filename = None if isinstance(table, string_types): input_filename = expand(table) table = load_clsim_table_minimal(input_filename, mmap=mmap_src) if input_filename is None and outdir is None: raise ValueError('You must provide an `outdir` if `table` is a python' ' object (i.e. not a file or directory path).') # Store original table to keep binning info, etc. full_table = table if "binning" in full_table: costhetadir_bin_edges = full_table["binning"]["costhetadir"] deltaphidir_bin_edges = full_table["binning"]["deltaphidir"] else: costhetadir_bin_edges = full_table['costhetadir_bin_edges'] deltaphidir_bin_edges = full_table['deltaphidir_bin_edges'] n_phase = full_table['phase_refractive_index'] cos_ckv = 1 / (n_phase * beta) if cos_ckv > 1: raise ValueError( 'Particle moving at beta={} in medium with n_phase={} does not' ' produce Cherenkov light!'.format(beta, n_phase) ) table = full_table["table"] if outdir is None: if isdir(input_filename): outdir = input_filename elif isfile(input_filename): outdir = input_filename.rstrip('.fits') assert outdir != input_filename, str(input_filename) else: outdir = expand(outdir) if not isdir(outdir): mkdir(outdir) outdir = expand(outdir) ckv_table_fpath = join(outdir, 'ckv_table.npy') mkdir(outdir) if mmap_dst: # Allocate memory-mapped file ckv_table = np.lib.format.open_memmap( filename=ckv_table_fpath, mode='w+', dtype=np.float32, shape=table.shape, ) else: ckv_table = np.empty(shape=table.shape, dtype=np.float32) try: convolve_table( src=table, dst=ckv_table, cos_ckv=cos_ckv, num_cone_samples=num_cone_samples, oversample=oversample, costhetadir_min=costhetadir_bin_edges.min(), costhetadir_max=costhetadir_bin_edges.max(), phidir_min=deltaphidir_bin_edges.min(), phidir_max=deltaphidir_bin_edges.max(), ) except: del ckv_table if mmap_dst: remove(ckv_table_fpath) raise if not mmap_dst: np.save(ckv_table_fpath, ckv_table) return ckv_table
def get_retro_results( outdir, recos_root, events_root, point_estimator="median", recompute_estimate=False, overwrite=False, procs=None, ): """Extract all rectro reco results from a reco directory tree, merging with original event information from correspoding source events directory tree. Results are populated to a Pandas DataFrame, saved to disk, and this is returned to the user. Parameters ---------- outdir : string recos_root : string events_root : string point_estimator : string, optional Must be one of `VALID_POINT_ESTIMATORS`. Set to "median" by default. recompute_estimate : bool, optional overwrite : bool, optional procs : int > 0 or None Passing None uses `multiprocessing.cpu_count()`; Returns ------- all_events : pandas.DataFrame """ t0 = time.time() outdir = abspath(expand(outdir)) if not isdir(outdir): mkdir(outdir) outfile_path = join(outdir, 'reconstructed_events.pkl') if not overwrite and isfile(outfile_path): raise IOError( 'Output file path already exists at "{}"'.format(outfile_path)) if point_estimator not in VALID_POINT_ESTIMATORS: raise ValueError( "Point estimator must be one of {}".format(VALID_POINT_ESTIMATORS)) assert procs is None or procs >= 1 if procs is None or procs > 1: pool = Pool(procs) # Walk directory hierarchy results = [] for reco_dirpath, _, files in walk(recos_root, followlinks=True): is_leafdir = False for f in files: if f[-3:] == 'pkl' and f[:3] in ('slc', 'evt'): is_leafdir = True break if not is_leafdir: continue rel_dirpath = relpath(path=reco_dirpath, start=recos_root) if events_root is not None: event_dirpath = join(events_root, rel_dirpath) if not isdir(event_dirpath): raise IOError('Event directory does not exist: "{}"'.format( event_dirpath)) abs_reco_dirpath = abspath(reco_dirpath) filenum = basename(abs_reco_dirpath) flavdir = basename(dirname(abs_reco_dirpath)) kwargs = dict( recodir=reco_dirpath, eventdir=event_dirpath, flavdir=flavdir, filenum=filenum, recompute_estimate=recompute_estimate, point_estimator=point_estimator, ) if procs > 1: results.append(pool.apply_async(extract_from_leaf_dir, (), kwargs)) else: results.append(extract_from_leaf_dir(**kwargs)) if procs > 1: print(len(results)) results = [r.get() for r in results] all_events = reduce(add, results, []) # Convert to pandas DataFrame all_events = pd.DataFrame(all_events) # Save to disk all_events.to_pickle(outfile_path) print('\nAll_events saved to "{}"\n'.format(outfile_path)) nevents = len(all_events) dt = time.time() - t0 print('\nTook {:.3f} s to extract {} events'.format(dt, nevents)) return all_events
def generate_ckv_tdi_table( tdi_table, beta, oversample, num_cone_samples, n_phase=None, outdir=None, mmap_src=True, mmap_dst=False, ): """ Parameters ---------- tdi_table : string or mapping If string, path to TDI table file (or directory containing a `tdi_table.npy' file). beta : float in [0, 1] Beta factor, i.e. velocity of the charged particle divided by the speed of light in vacuum: `v/c`. oversample : int > 0 Sample from each directional bin (costhetadir and deltaphidir) this many times. Increase to obtain a more accurate average over the range of directions that the resulting ckv-emitter-direction can take within the same output (directional) bin. Note that there is no unique information given by sampling (more than once) in the spatial dimensions, so these dimensions ignore `oversample`. Therefore, the computational cost is `oversample**2`. num_cone_samples : int > 0 Number of samples around the circumference of the Cherenkov cone. n_phase : float or None Required if `tdi_table` is an array; if `tdi_table` specifies a table location, then `n_phase` will be read from the `tdi_metadata.pkl` file. outdir : string or None If a string, use this directory to place the resulting `ckv_tdi_table.npy` file. This is optional if `tdi_table` specifies a file or directory (in which case the `outdir` will be inferred from this path). mmap_src : bool, optional Whether to (attempt to) memory map the source `tdi_table` (if `table` is a string pointing to the file/directory). Default is `True`, as tables can easily exceed the memory capacity of a machine. mmap_dst : bool, optional Whether to memory map the destination `ckv_tdi_table.npy` file. """ input_filename = None input_dirname = None if isinstance(tdi_table, string_types): tdi_table = expand(tdi_table) if isdir(tdi_table): input_filename = join(tdi_table, 'tdi_table.npy') elif isfile(tdi_table): input_filename = tdi_table else: raise IOError( '`tdi_table` is not a directory or file: "{}"'.format( tdi_table)) input_dirname = dirname(input_filename) if input_filename is None and outdir is None: raise ValueError( 'You must provide an `outdir` if `tdi_table` is a python object' ' (i.e., not a file or directory path).') if input_filename is None and n_phase is None: raise ValueError( 'You must provide `n_phase` if `tdi_table` is a python object' ' (i.e., not a file or directory path).') if n_phase is None: meta = pickle.load(file(join(input_dirname, 'tdi_metadata.pkl'), 'rb')) n_phase = meta['n_phase'] if outdir is None: outdir = input_dirname mkdir(outdir) if input_filename is not None: tdi_table = np.load( input_filename, mmap_mode='r' if mmap_src else None, ) cos_ckv = 1 / (n_phase * beta) if cos_ckv > 1: raise ValueError( 'Particle moving at beta={} in medium with n_phase={} does not' ' produce Cherenkov light!'.format(beta, n_phase)) ckv_tdi_table_fpath = join(outdir, 'ckv_tdi_table.npy') if isfile(ckv_tdi_table_fpath): print('WARNING! Destination file exists "{}"'.format( ckv_tdi_table_fpath)) if mmap_dst: # Allocate memory-mapped file ckv_tdi_table = np.lib.format.open_memmap( filename=ckv_tdi_table_fpath, mode='w+', dtype=np.float32, shape=tdi_table.shape, ) else: ckv_tdi_table = np.empty(shape=tdi_table.shape, dtype=np.float32) try: convolve_table( src=tdi_table, dst=ckv_tdi_table, cos_ckv=cos_ckv, num_cone_samples=num_cone_samples, oversample=oversample, costhetadir_min=-1, costhetadir_max=+1, phidir_min=-np.pi, phidir_max=+np.pi, ) except: del ckv_tdi_table if mmap_dst: remove(ckv_tdi_table_fpath) raise if not mmap_dst: np.save(ckv_tdi_table_fpath, ckv_tdi_table) return ckv_tdi_table
def run_multinest( outdir, event_idx, event, dom_tables, hypo_handler, priors, importance_sampling, max_modes, const_eff, n_live, evidence_tol, sampling_eff, max_iter, seed, ): """Setup and run MultiNest on an event. See the README file from MultiNest for greater detail on parameters specific to to MultiNest (parameters from `importance_sampling` on). Parameters ---------- outdir event_idx event dom_tables, hypo_handler, priors : mapping importance_sampling max_modes const_eff n_live evidence_tol sampling_eff max_iter Note that this limit is the maximum number of sample replacements and _not_ max number of likelihoods evaluated. A replacement only occurs when a likelihood is found that exceeds the minimum likelihood among the live points. seed Returns ------- llhp : shape (num_llh,) structured array of dtype retro.LLHP_T LLH and the corresponding parameter values. mn_meta : OrderedDict Metadata used for running MultiNest, including priors, parameters, and the keyword args used to invoke the `pymultinest.run` function. """ # pylint: disable=missing-docstring # Import pymultinest here; it's a less common dependency, so other # functions / constants in this module will still be import-able w/o it. import pymultinest hits = event['hits'] hits_indexer = event['hits_indexer'] hits_summary = event['hits_summary'] priors_used = OrderedDict() prior_funcs = [] for dim_num, dim_name in enumerate(CUBE_DIMS): prior_kind, prior_params = priors[dim_name] if prior_kind is PRI_UNIFORM: # Time is special since prior is relative to hits in the event if dim_name == T: prior_params = (hits_summary['earliest_hit_time'] + prior_params[0], hits_summary['latest_hit_time'] + prior_params[1]) priors_used[dim_name] = (prior_kind, prior_params) if prior_params == (0, 1): continue #def prior_func(cube): # pylint: disable=unused-argument # pass elif np.min(prior_params[0]) == 0: maxval = np.max(prior_params) def prior_func(cube, n=dim_num, maxval=maxval): cube[n] = cube[n] * maxval else: minval = np.min(prior_params) width = np.max(prior_params) - minval def prior_func(cube, n=dim_num, width=width, minval=minval): cube[n] = cube[n] * width + minval elif prior_kind == PRI_LOG_UNIFORM: priors_used[dim_name] = (prior_kind, prior_params) log_min = np.log(np.min(prior_params)) log_width = np.log(np.max(prior_params) / np.min(prior_params)) def prior_func(cube, n=dim_num, log_width=log_width, log_min=log_min): cube[n] = exp(cube[n] * log_width + log_min) elif prior_kind == PRI_COSINE: priors_used[dim_name] = (prior_kind, prior_params) cos_min = np.min(prior_params) cos_width = np.max(prior_params) - cos_min def prior_func(cube, n=dim_num, cos_width=cos_width, cos_min=cos_min): cube[n] = acos(cube[n] * cos_width + cos_min) elif prior_kind == PRI_GAUSSIAN: priors_used[dim_name] = (prior_kind, prior_params) mean, stddev = prior_params norm = 1 / (stddev * np.sqrt(TWO_PI)) def prior_func(cube, n=dim_num, norm=norm, mean=mean, stddev=stddev): cube[n] = norm * exp(-((cube[n] - mean) / stddev)**2) elif prior_kind == PRI_LOG_NORMAL: priors_used[dim_name] = (prior_kind, prior_params) shape, loc, scale, low, high = prior_params lognorm = stats.lognorm(shape, loc, scale) def prior_func(cube, lognorm=lognorm, n=dim_num, low=low, high=high): cube[n] = np.clip(lognorm.isf(cube[n]), a_min=low, a_max=high) elif prior_kind == PRI_SPEFIT2: spe_fit_val = event['recos']['SPEFit2'][dim_name] rel_loc, scale, low, high = prior_params loc = spe_fit_val + rel_loc cauchy = stats.cauchy(loc=loc, scale=scale) if dim_name == T: low += hits_summary['time_window_start'] high += hits_summary['time_window_stop'] priors_used[dim_name] = (PRI_CAUCHY, (loc, scale, low, high)) def prior_func(cube, cauchy=cauchy, n=dim_num, low=low, high=high): cube[n] = np.clip(cauchy.isf(cube[n]), a_min=low, a_max=high) else: raise NotImplementedError( 'Prior "{}" not implemented.'.format(prior_kind)) prior_funcs.append(prior_func) param_values = [] log_likelihoods = [] t_start = [] report_after = 1000 def prior(cube, ndim, nparams): # pylint: disable=unused-argument """Function for pymultinest to translate the hypercube MultiNest uses (each value is in [0, 1]) into the dimensions of the parameter space. Note that the cube dimension names are defined in module variable `CUBE_DIMS` for reference elsewhere. """ for prior_func in prior_funcs: prior_func(cube) get_llh = dom_tables._get_llh # pylint: disable=protected-access dom_info = dom_tables.dom_info tables = dom_tables.tables table_norm = dom_tables.table_norm t_indep_tables = dom_tables.t_indep_tables t_indep_table_norm = dom_tables.t_indep_table_norm sd_idx_table_indexer = dom_tables.sd_idx_table_indexer time_window = np.float32(hits_summary['time_window_stop'] - hits_summary['time_window_start']) # TODO: implement logic allowing for not all DOMs to be used #hit_sd_indices = np.array( # sorted(dom_tables.use_sd_indices_set.union(hits_indexer['sd_idx'])), # dtype=np.uint32 #) hit_sd_indices = hits_indexer['sd_idx'] unhit_sd_indices = np.array(sorted( ALL_STRS_DOMS_SET.difference(hit_sd_indices)), dtype=np.uint32) # DEBUG #table_indices = [] #t_indep_indices = [] def loglike(cube, ndim, nparams): # pylint: disable=unused-argument """Function pymultinest calls to get llh values. Note that this is called _after_ `prior` has been called, so `cube` alsready contains the parameter values scaled to be in their physical ranges. """ if not t_start: t_start.append(time.time()) t0 = time.time() total_energy = cube[CUBE_ENERGY_IDX] track_fraction = cube[CUBE_TRACK_FRAC_IDX] if HYPO_PARAMS_T is HypoParams8D: hypo = HYPO_PARAMS_T(time=cube[CUBE_T_IDX], x=cube[CUBE_X_IDX], y=cube[CUBE_Y_IDX], z=cube[CUBE_Z_IDX], track_zenith=cube[CUBE_TRACK_ZEN_IDX], track_azimuth=cube[CUBE_TRACK_AZ_IDX], cascade_energy=total_energy * (1 - track_fraction), track_energy=total_energy * track_fraction) else: hypo = HYPO_PARAMS_T(time=cube[CUBE_T_IDX], x=cube[CUBE_X_IDX], y=cube[CUBE_Y_IDX], z=cube[CUBE_Z_IDX], track_zenith=cube[CUBE_TRACK_ZEN_IDX], track_azimuth=cube[CUBE_TRACK_AZ_IDX], cascade_energy=total_energy * (1 - track_fraction), track_energy=total_energy * track_fraction, cascade_zenith=cube[CUBE_CSCD_ZEN_IDX], cascade_azimuth=cube[CUBE_CSCD_AZ_IDX]) sources = hypo_handler.get_sources(hypo) llh = get_llh( sources=sources, hits=hits, hits_indexer=hits_indexer, unhit_sd_indices=unhit_sd_indices, sd_idx_table_indexer=sd_idx_table_indexer, time_window=time_window, dom_info=dom_info, tables=tables, table_norm=table_norm, t_indep_tables=t_indep_tables, t_indep_table_norm=t_indep_table_norm, # DEBUG #table_indices=table_indices, #t_indep_indices=t_indep_indices ) # DEBUG #print('') #with open('/tmp/get_llh.asm', 'w') as f: #print(get_llh.inspect_asm(get_llh.signatures[0])) #print('number of signatures:', len(get_llh.signatures)) #print('') #raise Exception() t1 = time.time() param_values.append(hypo) log_likelihoods.append(llh) n_calls = len(log_likelihoods) if n_calls % report_after == 0: t_now = time.time() best_idx = np.argmax(log_likelihoods) best_llh = log_likelihoods[best_idx] best_p = param_values[best_idx] print('') if HYPO_PARAMS_T is HypoParams8D: print(( 'best llh = {:.3f} @ ' '(t={:+.1f}, x={:+.1f}, y={:+.1f}, z={:+.1f},' ' zen={:.1f} deg, az={:.1f} deg, Etrk={:.1f}, Ecscd={:.1f})' ).format(best_llh, best_p.time, best_p.x, best_p.y, best_p.z, np.rad2deg(best_p.track_zenith), np.rad2deg(best_p.track_azimuth), best_p.track_energy, best_p.cascade_energy)) else: print(('best llh = {:.3f} @' ' (t={:+.1f}, x={:+.1f}, y={:+.1f}, z={:+.1f},' ' zen_trk={:.1f} deg, zen_csc={:.1f} deg,' ' az_trk={:.1f}, az_csc={:.1f},' ' Etrk={:.1f}, Ecscd={:.1f})').format( best_llh, best_p.time, best_p.x, best_p.y, best_p.z, np.rad2deg(best_p.track_zenith), np.rad2deg(best_p.cascade_zenith), np.rad2deg(best_p.track_azimuth), np.rad2deg(best_p.cascade_azimuth), best_p.track_energy, best_p.cascade_energy)) print('{} LLH computed'.format(n_calls)) print('avg time per llh: {:.3f} ms'.format( (t_now - t_start[0]) / n_calls * 1000)) print('this llh took: {:.3f} ms'.format((t1 - t0) * 1000)) print('') return llh n_dims = len(HYPO_PARAMS_T._fields) mn_kw = OrderedDict([ ('n_dims', n_dims), ('n_params', n_dims), ('n_clustering_params', n_dims), ('wrapped_params', [int('azimuth' in p.lower()) for p in CUBE_DIMS]), ('importance_nested_sampling', importance_sampling), ('multimodal', max_modes > 1), ('const_efficiency_mode', const_eff), ('n_live_points', n_live), ('evidence_tolerance', evidence_tol), ('sampling_efficiency', sampling_eff), ('null_log_evidence', -1e90), ('max_modes', max_modes), ('mode_tolerance', -1e90), ('seed', seed), ('log_zero', -1e100), ('max_iter', max_iter), ]) mn_meta = OrderedDict([ ('params', CUBE_DIMS), ('original_prior_specs', priors), ('priors_used', priors_used), ('time_window', time_window), ('kwargs', sort_dict(mn_kw)), ]) outdir = expand(outdir) mkdir(outdir) out_prefix = join(outdir, 'evt{}-'.format(event_idx)) print('Output files prefix: "{}"\n'.format(out_prefix)) print('Runing MultiNest...') t0 = time.time() pymultinest.run(LogLikelihood=loglike, Prior=prior, verbose=True, outputfiles_basename=out_prefix, resume=False, write_output=True, n_iter_before_update=5000, **mn_kw) t1 = time.time() llhp = np.empty(shape=len(param_values), dtype=LLHP_T) llhp['llh'] = log_likelihoods llhp[list(HYPO_PARAMS_T._fields)] = param_values llhp_outf = out_prefix + 'llhp.npy' print('Saving llhp to "{}"...'.format(llhp_outf)) np.save(llhp_outf, llhp) mn_meta['num_llhp'] = len(param_values) mn_meta['run_time'] = t1 - t0 mn_meta_outf = out_prefix + 'multinest_meta.pkl' print('Saving MultiNest metadata to "{}"'.format(mn_meta_outf)) pickle.dump(mn_meta, open(mn_meta_outf, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) # DEBUG #table_indices_outf = out_prefix + 'table_indices.pkl' #pickle.dump(table_indices, open(table_indices_outf, 'wb'), # protocol=pickle.HIGHEST_PROTOCOL) #t_indep_table_indices_outf = out_prefix + 't_indep_table_indices.pkl' #pickle.dump(t_indep_indices, open(t_indep_table_indices_outf, 'wb'), # protocol=pickle.HIGHEST_PROTOCOL) return llhp, mn_meta
def generate_clsim_table( outdir, gcd, ice_model, angular_sensitivity, disable_tilt, disable_anisotropy, string, dom, n_events, seed, coordinate_system, binning, tableset_hash=None, tile=None, overwrite=False, compress=False, ): """Generate a CLSim table. See wiki.icecube.wisc.edu/index.php/Ice for information about ice models. Parameters ---------- outdir : string gcd : string ice_model : str E.g. "spice_mie", "spice_lea", ... angular_sensitivity : str E.g. "h2-50cm", "9" (which is equivalent to "new25" because, like, duh) disable_tilt : bool Whether to force no layer tilt in simulation (if tilt is present in bulk ice model; otherwise, this has no effect) disable_anisotropy : bool Whether to force no bulk ice anisotropy (if anisotropy is present in bulk ice model; otherwise, this has no effect) string : int in [1, 86] dom : int in [1, 60] n_events : int > 0 Note that the number of photons is much larger than the number of events (related to the "brightness" of the defined source). seed : int in [0, 2**32) Seed for CLSim's random number generator coordinate_system : string in {"spherical", "cartesian"} If spherical, base coordinate system is .. :: (r, theta, phi, t, costhetadir, (optionally abs)deltaphidir) If Cartesian, base coordinate system is .. :: (x, y, z, costhetadir, phidir) but if any of the coordinate axes are specified to have 0 bins, they will be omitted (but the overall order is maintained). binning : mapping If `coordinate_system` is "spherical", keys should be: "n_r_bins" "n_t_bins" "n_costheta_bins" "n_phi_bins" "n_costhetadir_bins" "n_deltaphidir_bins" "r_max" "r_power" "t_max" "t_power" "deltaphidir_power" If `coordinate_system` is "cartesian", keys should be: "n_x_bins" "n_y_bins" "n_z_bins" "n_costhetadir_bins" "n_phidir_bins" "x_min" "x_max" "y_min" "y_max" "z_min" "z_max" tableset_hash : str, optional Specify if the table is a tile used to generate a larger table tile : int >= 0, optional Specify if the table is a tile used to generate a larger table overwrite : bool, optional Whether to overwrite an existing table (default: False) compress : bool, optional Whether to pass the resulting table through zstandard compression (default: True) Raises ------ ValueError If `compress` is True but `zstd` command-line utility cannot be found AssertionError, ValueError If illegal argument values are passed ValueError If `overwrite` is False and a table already exists at the target path Notes ----- Binnings are as follows: * Radial binning is regular in the space of r**(1/r_power), with `n_r_bins` spanning from 0 to `r_max` meters. * Time binning is regular in the space of t**(1/t_power), with `n_t_bins` spanning from 0 to `t_max` nanoseconds. * Position zenith angle is binned regularly in the cosine of the zenith angle with `n_costhetadir_bins` spanning from -1 to +1. * Position azimuth angle is binned regularly, with `n_phi_bins` spanning from -pi to pi radians. * Photon directionality zenith angle (relative to IcedCube coordinate system) is binned regularly in cosine-zenith space, with `n_costhetadir_bins` spanning from `costhetadir_min` to `costhetadir_max` * Photon directionality azimuth angle; sometimes assumed to be symmetric about line from DOM to the center of the bin, so is binned as an absolute value, i.e., from 0 to pi radians. Otherwise, binned from -np.pi to +np.pi The following are forced upon the above binning specifications (and remaining parameters are specified as arguments to the function) * t_min = 0 (ns) * r_min = 0 (m) * costheta_min = -1 * costheta_max = 1 * phi_min = -pi (rad) * phi_max = pi (rad) * costhetadir_min = -1 * costhetadir_max = 1 * deltaphidir_min = 0 (rad) * deltaphidir_min = pi (rad) """ assert isinstance(n_events, Integral) and n_events > 0 assert isinstance(seed, Integral) and 0 <= seed < 2**32 assert ((tableset_hash is not None and tile is not None) or (tableset_hash is None and tile is None)) n_bins_per_dim = [] for key, val in binning.items(): if not key.startswith('n_'): continue assert isinstance(val, Integral), '{} not an integer'.format(key) assert val >= 0, '{} must be >= 0'.format(key) n_bins_per_dim.append(val) # Note: + 2 accounts for under & overflow bins in each dimension n_bins = np.product([n + 2 for n in n_bins_per_dim if n > 0]) assert n_bins > 0 #if n_bins > 2**32: # raise ValueError( # 'The flattened bin index in CLSim is represented by uint32 which' # ' has a max of 4 294 967 296, but the binning specified comes to' # ' {} bins ({} times too many).' # .format(n_bins, n_bins / 2**32) # ) ice_model = ice_model.strip() angular_sensitivity = angular_sensitivity.strip() # For now, hole ice model is hard-coded in our CLSim branch; see # clsim/private/clsim/I3CLSimLightSourceToStepConverterFlasher.cxx # in the branch you're using to check that this is correct assert angular_sensitivity == 'flasher_p1_0.30_p2_-1' gcd_info = extract_gcd(gcd) if compress and not any( access(join(path, 'zstd'), X_OK) for path in environ['PATH'].split(pathsep)): raise ValueError('`zstd` command not found in path') outdir = expand(outdir) mkdir(outdir) axes = OrderedDict() binning_kw = OrderedDict() # Note that the actual binning in CLSim is performed using float32, so we # first "truncate" all values to that precision. However, the `LinearAxis` # function requires Python floats (which are 64 bits), so we have to # convert all values to to `float` when passing as kwargs to `LinearAxis` # (and presumably the values will be re-truncated to float32 within the # CLsim code somewhere). Hopefully following this procedure, the values # actually used within CLSim are what we want...? CLSim is stupid. ftype = np.float32 if coordinate_system == 'spherical': binning['t_min'] = ftype(0) # ns binning['r_min'] = ftype(0) # meters costheta_min = ftype(-1.0) costheta_max = ftype(1.0) # See # clsim/resources/kernels/spherical_coordinates.c.cl # in the branch you're using to check that the following are correct phi_min = ftype(3.0543261766433716e-01) phi_max = ftype(6.5886182785034180e+00) binning['costhetadir_min'] = ftype(-1.0) binning['costhetadir_max'] = ftype(1.0) binning['deltaphidir_min'] = ftype(-3.1808626651763916e+00) binning['deltaphidir_max'] = ftype(3.1023228168487549e+00) if binning['n_r_bins'] > 0: assert isinstance(binning['r_power'], Integral) and binning['r_power'] > 0 r_binning_kw = OrderedDict([ ('min', float(binning['r_min'])), ('max', float(binning['r_max'])), ('n_bins', int(binning['n_r_bins'])), ]) if binning['r_power'] == 1: axes['r'] = LinearAxis(**r_binning_kw) else: r_binning_kw['power'] = int(binning['r_power']) axes['r'] = PowerAxis(**r_binning_kw) binning_kw['r'] = r_binning_kw if binning['n_costheta_bins'] > 0: costheta_binning_kw = OrderedDict([ ('min', float(costheta_min)), ('max', float(costheta_max)), ('n_bins', int(binning['n_costheta_bins'])), ]) axes['costheta'] = LinearAxis(**costheta_binning_kw) binning_kw['costheta'] = costheta_binning_kw if binning['n_phi_bins'] > 0: phi_binning_kw = OrderedDict([ ('min', float(phi_min)), ('max', float(phi_max)), ('n_bins', int(binning['n_phi_bins'])), ]) axes['phi'] = LinearAxis(**phi_binning_kw) binning_kw['phi'] = phi_binning_kw if binning['n_t_bins'] > 0: assert isinstance(binning['t_power'], Integral) and binning['t_power'] > 0 t_binning_kw = OrderedDict([ ('min', float(binning['t_min'])), ('max', float(binning['t_max'])), ('n_bins', int(binning['n_t_bins'])), ]) if binning['t_power'] == 1: axes['t'] = LinearAxis(**t_binning_kw) else: t_binning_kw['power'] = int(binning['t_power']) axes['t'] = PowerAxis(**t_binning_kw) binning_kw['t'] = t_binning_kw if binning['n_costhetadir_bins'] > 0: costhetadir_binning_kw = OrderedDict([ ('min', float(binning['costhetadir_min'])), ('max', float(binning['costhetadir_max'])), ('n_bins', int(binning['n_costhetadir_bins'])), ]) axes['costhetadir'] = LinearAxis(**costhetadir_binning_kw) binning_kw['costhetadir'] = costhetadir_binning_kw if binning['n_deltaphidir_bins'] > 0: assert (isinstance(binning['deltaphidir_power'], Integral) and binning['deltaphidir_power'] > 0) deltaphidir_binning_kw = OrderedDict([ ('min', float(binning['deltaphidir_min'])), ('max', float(binning['deltaphidir_max'])), ('n_bins', int(binning['n_deltaphidir_bins'])), ]) if binning['deltaphidir_power'] == 1: axes['deltaphidir'] = LinearAxis(**deltaphidir_binning_kw) else: deltaphidir_binning_kw['power'] = int( binning['deltaphidir_power']) axes['deltaphidir'] = PowerAxis(**deltaphidir_binning_kw) binning_kw['deltaphidir'] = deltaphidir_binning_kw elif coordinate_system == 'cartesian': binning['t_min'] = ftype(0) # ns binning['costhetadir_min'], binning['costhetadir_max'] = ftype( -1.0), ftype(1.0) binning['phidir_min'], binning['phidir_max'] = ftype(-np.pi), ftype( np.pi) # rad if binning['n_x_bins'] > 0: x_binning_kw = OrderedDict([ ('min', float(binning['x_min'])), ('max', float(binning['x_max'])), ('n_bins', int(binning['n_x_bins'])), ]) axes['x'] = LinearAxis(**x_binning_kw) binning_kw['x'] = x_binning_kw if binning['n_y_bins'] > 0: y_binning_kw = OrderedDict([ ('min', float(binning['y_min'])), ('max', float(binning['y_max'])), ('n_bins', int(binning['n_y_bins'])), ]) axes['y'] = LinearAxis(**y_binning_kw) binning_kw['y'] = y_binning_kw if binning['n_z_bins'] > 0: z_binning_kw = OrderedDict([ ('min', float(binning['z_min'])), ('max', float(binning['z_max'])), ('n_bins', int(binning['n_z_bins'])), ]) axes['z'] = LinearAxis(**z_binning_kw) binning_kw['z'] = z_binning_kw if binning['n_t_bins'] > 0: assert isinstance(binning['t_power'], Integral) and binning['t_power'] > 0 t_binning_kw = OrderedDict([ ('min', float(binning['t_min'])), ('max', float(binning['t_max'])), ('n_bins', int(binning['n_t_bins'])), ]) if binning['t_power'] == 1: axes['t'] = LinearAxis(**t_binning_kw) else: t_binning_kw['power'] = int(binning['t_power']) axes['t'] = PowerAxis(**t_binning_kw) binning_kw['t'] = t_binning_kw if binning['n_costhetadir_bins'] > 0: costhetadir_binning_kw = OrderedDict([ ('min', float(binning['costhetadir_min'])), ('max', float(binning['costhetadir_max'])), ('n_bins', int(binning['n_costhetadir_bins'])), ]) axes['costhetadir'] = LinearAxis(**costhetadir_binning_kw) binning_kw['costhetadir'] = costhetadir_binning_kw if binning['n_phidir_bins'] > 0: phidir_binning_kw = OrderedDict([ ('min', float(binning['phidir_min'])), ('max', float(binning['phidir_max'])), ('n_bins', int(binning['n_phidir_bins'])), ]) axes['phidir'] = LinearAxis(**phidir_binning_kw) binning_kw['phidir'] = phidir_binning_kw binning_order = BINNING_ORDER[coordinate_system] missing_dims = set(axes.keys()).difference(binning_order) if missing_dims: raise ValueError( '`binning_order` specified is {} but is missing dimension(s) {}'. format(binning_order, missing_dims)) axes_ = OrderedDict() binning_kw_ = OrderedDict() for dim in binning_order: if dim in axes: axes_[dim] = axes[dim] binning_kw_[dim] = binning_kw[dim] axes = axes_ binning_kw = binning_kw_ # NOTE: use SphericalAxes even if we're actually binning Cartesian since we # don't care how it handles e.g. volumes, and Cartesian isn't implemented # in CLSim yet axes = SphericalAxes(axes.values()) # Construct metadata initially with items that will be hashed metadata = OrderedDict([ ('source_gcd_i3_md5', gcd_info['source_gcd_i3_md5']), ('coordinate_system', coordinate_system), ('binning_kw', binning_kw), ('ice_model', ice_model), ('angular_sensitivity', angular_sensitivity), ('disable_tilt', disable_tilt), ('disable_anisotropy', disable_anisotropy) ]) # TODO: this is hard-coded in our branch of CLSim; make parameter & fix here! if 't' in binning: metadata['t_is_residual_time'] = True if tableset_hash is None: hash_val = hash_obj(metadata, fmt='hex')[:8] print('derived hash:', hash_val) else: hash_val = tableset_hash print('tableset_hash:', hash_val) metadata['hash_val'] = hash_val if tile is not None: metadata['tile'] = tile dom_spec = OrderedDict([('string', string), ('dom', dom)]) if 'depth_idx' in dom_spec and ('subdet' in dom_spec or 'string' in dom_spec): if 'subdet' in dom_spec: dom_spec['string'] = dom_spec.pop('subdet') string = dom_spec['string'] depth_idx = dom_spec['depth_idx'] if isinstance(string, str): subdet = dom_spec['subdet'].lower() dom_x, dom_y = 0, 0 ic_avg_z, dc_avg_z = get_average_dom_z_coords(gcd_info['geo']) if string == 'ic': dom_z = ic_avg_z[depth_idx] elif string == 'dc': dom_z = dc_avg_z[depth_idx] else: raise ValueError('Unrecognized subdetector {}'.format(subdet)) else: dom_x, dom_y, dom_z = gcd_info['geo'][string - 1, depth_idx] metadata['string'] = string metadata['depth_idx'] = depth_idx if tile is not None: raise ValueError( 'Cannot produce tiled tables using "depth_idx"-style table groupings;' ' use "string"/"dom"-style tables instead.') clsim_table_fname_proto = CLSIM_TABLE_FNAME_PROTO[1] clsim_table_metaname_proto = CLSIM_TABLE_METANAME_PROTO[0] print('Subdetector {}, depth index {} (z_avg = {} m)'.format( subdet, depth_idx, dom_z)) elif 'string' in dom_spec and 'dom' in dom_spec: string = dom_spec['string'] dom = dom_spec['dom'] dom_x, dom_y, dom_z = gcd_info['geo'][string - 1, dom - 1] metadata['string'] = string metadata['dom'] = dom if tile is None: clsim_table_fname_proto = CLSIM_TABLE_FNAME_PROTO[2] clsim_table_metaname_proto = CLSIM_TABLE_METANAME_PROTO[1] else: clsim_table_fname_proto = CLSIM_TABLE_TILE_FNAME_PROTO[-1] clsim_table_metaname_proto = CLSIM_TABLE_TILE_METANAME_PROTO[-1] print( 'GCD = "{}"\nString {}, dom {}: (x, y, z) = ({}, {}, {}) m'.format( gcd, string, dom, dom_x, dom_y, dom_z)) else: raise ValueError('Cannot understand `dom_spec` {}'.format(dom_spec)) # Until someone figures out DOM tilt and ice column / bubble column / cable # orientations for sure, we'll just set DOM orientation to zenith=pi, # azimuth=0. dom_zenith = np.pi dom_azimuth = 0.0 # Now add other metadata items that are useful but not used for hashing metadata['dom_x'] = dom_x metadata['dom_y'] = dom_y metadata['dom_z'] = dom_z metadata['dom_zenith'] = dom_zenith metadata['dom_azimuth'] = dom_azimuth metadata['seed'] = seed metadata['n_events'] = n_events metapath = join(outdir, clsim_table_metaname_proto.format(**metadata)) tablepath = join(outdir, clsim_table_fname_proto.format(**metadata)) # Save metadata as a JSON file (so it's human-readable by any tool, not # just Python--in contrast to e.g. pickle files) json.dump(metadata, file(metapath, 'w'), sort_keys=False, indent=4) print('=' * 80) print('Metadata for the table set was written to\n "{}"'.format(metapath)) print('Table will be written to\n "{}"'.format(tablepath)) print('=' * 80) exists_at = [] for fpath in [tablepath, tablepath + '.zst']: if isfile(fpath): exists_at.append(fpath) if exists_at: names = ', '.join('"{}"'.format(fp) for fp in exists_at) if overwrite: print('WARNING! Deleting existing table(s) at ' + names) for fpath in exists_at: remove(fpath) else: raise ValueError('Table(s) already exist at {}; not' ' overwriting.'.format(names)) print('') tray = I3Tray() tray.AddSegment( TabulateRetroSources, 'TabulateRetroSources', source_gcd_i3_md5=gcd_info['source_gcd_i3_md5'], binning_kw=binning_kw, axes=axes, ice_model=ice_model, angular_sensitivity=angular_sensitivity, disable_tilt=disable_tilt, disable_anisotropy=disable_anisotropy, hash_val=hash_val, dom_spec=dom_spec, dom_x=dom_x, dom_y=dom_y, dom_z=dom_z, dom_zenith=dom_zenith, dom_azimuth=dom_azimuth, seed=seed, n_events=n_events, tablepath=tablepath, tile=tile, record_errors=False, ) logging.set_level_for_unit('I3CLSimStepToTableConverter', 'TRACE') logging.set_level_for_unit('I3CLSimTabulatorModule', 'DEBUG') logging.set_level_for_unit('I3CLSimLightSourceToStepConverterGeant4', 'TRACE') logging.set_level_for_unit('I3CLSimLightSourceToStepConverterFlasher', 'TRACE') tray.Execute() tray.Finish() if compress: print('Compressing table with zstandard via command line') print(' zstd -1 --rm "{}"'.format(tablepath)) subprocess.check_call(['zstd', '-1', '--rm', tablepath]) print('done.')
def deltaphidir_to_absdeltaphidir(input_file, output_file): """ Parameters ---------- input_file : str Path to input file (table) output_file : str Path to output file (table) """ dim_name = "deltaphidir" input_file = expand(input_file) output_file = expand(output_file) input_dir = dirname(input_file) output_dir = dirname(output_file) if abspath(output_dir) == abspath(input_dir): raise ValueError("Will not allow output dir to be same as input dir") if not isdir(output_dir): mkdir(output_dir) input_table = np.load(input_file, mmap_mode="r") input_binning = np.load(join(input_dir, "binning.npy")) dim_num = list(input_binning.dtype.names).index(dim_name) output_dtype_spec = [] output_bin_edges = [] for dim_descr in input_binning.dtype.descr: dname, dt, shape = dim_descr orig_dim_be = input_binning[dname] if dname == dim_name: be_in_pi = ((orig_dim_be + np.pi) % (2 * np.pi)) - np.pi closest_be_to_zero = np.min(np.abs(be_in_pi)) if np.isclose(closest_be_to_zero, 0): raise NotImplementedError() else: output_dim_shape = (int((shape[0] - 1) / 2 + 1), ) output_dim_be = (np.abs(orig_dim_be[orig_dim_be < 0])[::-1] - np.mean(np.diff(orig_dim_be)) / 2) output_dim_be -= output_dim_be[0] output_dim_be /= output_dim_be[-1] / np.pi output_bin_edges.append(tuple(output_dim_be.tolist())) output_dtype_spec.append((dname, dt, output_dim_shape)) else: output_dtype_spec.append(dim_descr) output_bin_edges.append(orig_dim_be.tolist()) output_binning = np.array(tuple(output_bin_edges), dtype=output_dtype_spec) output_shape = tuple(dim_spec[2][0] - 1 for dim_spec in output_binning.dtype.descr) output_table = np.zeros(shape=output_shape, dtype=np.float64) mapping = [] for input_bin_idx, (input_le, input_ue) in enumerate( zip(input_binning[dim_name][:-1], input_binning[dim_name][1:])): input_wid = input_ue - input_le for output_bin_idx, (output_le, output_ue) in enumerate( zip(output_binning[dim_name][:-1], output_binning[dim_name][1:])): overlap_fract = 0. for sign in [-1, +1]: if sign > 0: actual_output_le = output_le else: actual_output_le = -output_ue # Compute input bin edges relative to the lower output bin edge input_rel_le = ((input_le - actual_output_le) + np.pi) % (2 * np.pi) - np.pi input_rel_ue = ((input_ue - actual_output_le) + np.pi) % (2 * np.pi) - np.pi output_wid = abs(output_ue - output_le) input_clipped_rel_edges = np.clip( [input_rel_le, input_rel_ue], a_min=0, a_max=output_wid, ) overlap_fract = np.diff(input_clipped_rel_edges)[0] / input_wid if overlap_fract > 0: dupe_idx = None for idx, (obi, ibi, ofr) in enumerate(mapping): if obi == output_bin_idx and ibi == input_bin_idx: dupe_idx = idx overlap_fract += ofr entry = (output_bin_idx, input_bin_idx, overlap_fract) if dupe_idx is None: mapping.append(entry) else: mapping[dupe_idx] = entry output_slicer = [slice(None) for _ in output_binning.dtype.names] input_slicer = [slice(None) for _ in input_binning.dtype.names] for output_bin_idx, input_bin_idx, overlap_fract in mapping: output_slicer[dim_num] = output_bin_idx input_slicer[dim_num] = input_bin_idx output_table[ output_slicer] += overlap_fract * input_table[input_slicer] # Save the binning to the output directory np.save(join(output_dir, "binning.npy"), output_binning) # Legacy way of storing bin edges: store each dim individually for d_name in output_binning.dtype.names: bin_edges_fpath = join(output_dir, "{}_bin_edges.npy".format(d_name)) np.save(bin_edges_fpath, output_binning[d_name]) # Save the table np.save(output_file, output_table)
def scan_llh(dom_tables_kw, hypo_kw, events_kw, scan_kw): """Script "main" function""" t00 = time.time() scan_values = [] for dim in HYPO_PARAMS_T._fields: val_str = ''.join(scan_kw.pop(dim)) val_str = val_str.lower().replace('pi', format(np.pi, '.17e')) scan_values.append(hrlist2list(val_str)) dom_tables = init_obj.setup_dom_tables(**dom_tables_kw) hypo_handler = init_obj.setup_discrete_hypo(**hypo_kw) events_generator = init_obj.get_events(**events_kw) # Pop 'outdir' from `scan_kw` since we don't want to store this info in # the metadata dict. outdir = expand(scan_kw.pop('outdir')) mkdir(outdir) print('Scanning paramters') t0 = time.time() fast_llh = True if fast_llh: get_llh = dom_tables._get_llh dom_info = dom_tables.dom_info tables = dom_tables.tables table_norm = dom_tables.table_norm t_indep_tables = dom_tables.t_indep_tables t_indep_table_norm = dom_tables.t_indep_table_norm sd_idx_table_indexer = dom_tables.sd_idx_table_indexer metric_kw = {} def metric_wrapper(hypo, hits, hits_indexer, unhit_sd_indices, time_window): sources = hypo_handler.get_sources(hypo) return get_llh(sources=sources, hits=hits, hits_indexer=hits_indexer, unhit_sd_indices=unhit_sd_indices, sd_idx_table_indexer=sd_idx_table_indexer, time_window=time_window, dom_info=dom_info, tables=tables, table_norm=table_norm, t_indep_tables=t_indep_tables, t_indep_table_norm=t_indep_table_norm) else: metric_kw = dict(dom_tables=dom_tables, tdi_table=None) get_llh = likelihood.get_llh def metric_wrapper(hypo, **metric_kw): sources = hypo_handler.get_sources(hypo) return get_llh(sources=sources, **metric_kw) n_points_total = 0 metric_vals = [] for _, event in events_generator: hits = event['hits'] hits_indexer = event['hits_indexer'] hits_summary = event['hits_summary'] metric_kw['hits'] = hits metric_kw['hits_indexer'] = hits_indexer hit_sd_indices = hits_indexer['sd_idx'] unhit_sd_indices = np.array(sorted( ALL_STRS_DOMS_SET.difference(hit_sd_indices)), dtype=np.uint32) metric_kw['unhit_sd_indices'] = unhit_sd_indices metric_kw['time_window'] = np.float32( hits_summary['time_window_stop'] - hits_summary['time_window_start']) t1 = time.time() metric_vals.append(scan(scan_values, metric_wrapper, metric_kw)) dt = time.time() - t1 n_points = metric_vals[-1].size n_points_total += n_points print(' ---> {:.3f} s, {:d} points ({:.3f} ms per LLH)'.format( dt, n_points, dt / n_points * 1e3)) dt = time.time() - t0 info = OrderedDict([ ('hypo_params', HYPO_PARAMS_T._fields), ('scan_values', scan_values), ('metric_name', 'llh'), ('metric_vals', metric_vals), ('scan_kw', sort_dict(scan_kw)), ('dom_tables_kw', sort_dict(dom_tables_kw)), ('hypo_kw', sort_dict(hypo_kw)), ('events_kw', sort_dict(events_kw)), ]) outfpath = join(outdir, 'scan.pkl') print('Saving results in pickle file, path "{}"'.format(outfpath)) pickle.dump(info, open(outfpath, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) print('Total time to scan: {:.3f} s; {:.3f} ms avg per LLH'.format( time.time() - t00, dt / n_points_total * 1e3)) return metric_vals, info
def produce_arrays( indir, outdir, pulse_series, processes=None, ): """ Parameters ---------- indir outdir pulse_series processes : None or int > 0, optional """ if outdir is not None: outdir = expand(outdir) mkdir(outdir) if processes is None: processes = cpu_count() assert processes >= 1 serial = processes == 1 if not serial: pool = Pool(processes=processes) # -- Define a closure as callback function -- # # Capture the following (must be non-scalar to be persistent between calls # of function) events_arrays = [] doms_arrays = [] pulses_arrays = [] dom_idx0 = [0] pulses_idx0 = [0] def concatenate_results(result): """Closure""" if result is None: return events_array, doms_array, pulses_array = result if len(events_arrays) > 0: events_array["dom_idx0"] += dom_idx0[0] doms_array["pulses_idx0"] += pulses_idx0[0] events_arrays.append(events_array) doms_arrays.append(doms_array) pulses_arrays.append(pulses_array) dom_idx0[0] = events_array[-1]["dom_idx0"] + events_array[-1][ "num_hit_doms"] pulses_idx0[ 0] = doms_array[-1]["pulses_idx0"] + doms_array[-1]["num_pulses"] # -- Find leaf directories to process -- # args = tuple() for dirpath, dirs_, files in walk(indir, followlinks=True): if "events.npy" in files: dirs_.clear() else: dirs_.sort(key=nsort_key_func) continue kwargs = dict(events_dirpath=dirpath, pulse_series=pulse_series) if serial: result = process_events_dir(*args, **kwargs) concatenate_results(result) else: pool.apply_async( process_events_dir, args, kwargs, concatenate_results, ) if not serial: pool.close() pool.join() if len(events_arrays) == 0: assert len(doms_arrays) == 0 assert len(pulses_arrays) == 0 print("no events found in `indir`:", indir) return None events_array = np.concatenate(events_arrays) doms_array = np.concatenate(doms_arrays) pulses_array = np.concatenate(pulses_arrays) if outdir is not None: np.save(join(outdir, "{}__events_array.npy".format(pulse_series)), events_array) np.save(join(outdir, "{}__doms_array.npy".format(pulse_series)), doms_array) np.save(join(outdir, "{}__pulses_array.npy".format(pulse_series)), pulses_array) return events_array, doms_array, pulses_array
def generate_clsim_table(subdet, depth_idx, nevts, seed, tilt, r_max, r_power, n_r_bins, t_max, n_t_bins, n_costheta_bins, n_costhetadir_bins, n_deltaphidir_bins, outdir, overwrite=False, compress=True): """Generate a CLSim table. Parameters ---------- subdet : string, {'ic', 'dc'} depth_idx : int in [0, 59] nevts : int > 0 Note that the number of photons is much larger than the number of events (related to the "brightness" of the defined source) seed : int in [0, 2**32) Seed for CLSim's random number generator tilt : bool Whether to enable ice layer tilt in simulation r_max : float > 0 r_power : int > 0 t_max : float > 0 n_t_bins : int > 0 n_costheta_bins : int > 0 n_costhetadir_bins : int > 0 n_deltaphidir_bins : int > 0 outdir : string overwrite : bool, optional Whether to overwrite an existing table (default: False) compress : bool, optional Whether to pass the resulting table through zstandard compression (default: True) Raises ------ ValueError If `compress` but `zstd` command-line utility cannot be found AssertionError, ValueError If illegal argument values are passed ValueError If `overwrite` is False and a table already exists at the target path Notes ----- Binnings are as follows: * Radial binning is regular in the space of r**(1/r_power), with `n_r_bins` spanning from `r_min` to `r_max`. * Time binning is linearly spaced with `n_t_bins` spanning from `t_min` to `t_max` * Position zenith angle is binned regularly in the cosine of the zenith angle with `n_costhetadir_bins` spanning from `costheta_min` to `costheta_max`. * Position azimuth angle is _not_ binned * Photon directionality zenith angle is binned regularly in cosine-zenith space, with `n_costhetadir_bins` spanning from `costhetadir_min` to `costhetadir_max` * Photon directionality azimuth angle, since position azimuth angle is not binned, is translated into the absolute value of the azimuth angle relative to the azimuth position of the photon; this is called `deltaphidir`. There are `n_deltaphidir_bins` from `deltaphidir_min` to `deltaphidir_max`. The following are forced upon the above binning specifications (and remaining parameters are specified as arguments to the function) * t_min = 0 * r_min = 0 * costheta_min = -1 * costheta_max = 1 * costhetadir_min = -1 * costhetadir_max = 1 * deltaphidir_min = 0 * deltaphidir_min = pi (rad) """ assert isinstance(nevts, Integral) and nevts > 0 assert isinstance(seed, Integral) and 0 <= seed < 2**32 assert isinstance(r_power, Integral) and r_power > 0 assert isinstance(n_r_bins, Integral) and n_r_bins > 0 assert isinstance(n_t_bins, Integral) and n_t_bins > 0 assert isinstance(n_costheta_bins, Integral) and n_costheta_bins > 0 assert isinstance(n_costhetadir_bins, Integral) and n_costhetadir_bins > 0 assert isinstance(n_deltaphidir_bins, Integral) and n_deltaphidir_bins > 0 if compress and not any(access(join(path, 'zstd'), X_OK) for path in environ['PATH'].split(pathsep)): raise ValueError('`zstd` command not found in path') outdir = expand(outdir) mkdir(outdir) # Note: + 2 accounts for under/overflow bins in each dimension n_bins = np.product([n_bins + 2 for n_bins in (n_r_bins, n_costheta_bins, n_t_bins, n_costhetadir_bins, n_deltaphidir_bins)]) if n_bins > 2**32: raise ValueError( 'The flattened bin index in CLSim is represented by uint32 which' ' has a max of 4 294 967 296, but the binning specified comes to' ' {} bins ({} times too many).' .format(n_bins, n_bins / 2**32) ) # Average Z coordinate (depth) for each layer of DOMs (see # `average_z_position.py`) # TODO: make these command-line arguments t_min = 0 # ns r_min = 0 # meters costheta_min, costheta_max = -1.0, 1.0 costhetadir_min, costhetadir_max = -1.0, 1.0 deltaphidir_min, deltaphidir_max = 0.0, np.pi # rad r_binning_kw = dict( min=float(r_min), max=float(r_max), n_bins=int(n_r_bins), power=int(r_power) ) costheta_binning_kw = dict( min=float(costheta_min), max=float(costheta_max), n_bins=int(n_costheta_bins) ) t_binning_kw = dict( min=float(t_min), max=float(t_max), n_bins=int(n_t_bins) ) costhetadir_binning_kw = dict( min=float(costhetadir_min), max=float(costhetadir_max), n_bins=int(n_costhetadir_bins) ) deltaphidir_binning_kw = dict( min=float(deltaphidir_min), max=float(deltaphidir_max), n_bins=int(n_deltaphidir_bins) ) axes = SphericalAxes([ # r: photon location, radius (m) PowerAxis(**r_binning_kw), # costheta: photon location, coszenith LinearAxis(**costheta_binning_kw), # t: photon location, time (ns) LinearAxis(**t_binning_kw), # costhetadir: photon direction, coszenith LinearAxis(**costhetadir_binning_kw), # deltaphidir: photon direction, (impact) azimuth angle (rad) LinearAxis(**deltaphidir_binning_kw) ]) # yapf: disable if subdet.lower() == 'ic': z_pos = IC_AVG_Z[depth_idx] elif subdet.lower() == 'dc': z_pos = DC_AVG_Z[depth_idx] print('Subdetector {}, depth index {} (z_avg = {} m)' .format(subdet, depth_idx, z_pos)) # Parameters that will (or can be foreseen to) cause the tables to vary # depending on their values. These define what we will call a "set" of # tables. tray_kw_to_hash = dict( PhotonSource='retro', Zenith=180 * I3Units.degree, # orientation of source Azimuth=0 * I3Units.degree, # orientation of source # Number of events will affect the tables, but n=999 and n=1000 will be # very similar (and not statistically independent if the seed is the # same). But a user is likely to want to test out same settings but # different statistics, so these sets need different hashes (unless we # want the user to also specify the nevts when identifying a set...) # Therefore, this is included in the hash to indicate a common set of # tables NEvents=nevts, IceModel='spice_mie', DisableTilt=not tilt, PhotonPrescale=1, Sensor='none' ) hashable_params = dict( r_binning_kw=r_binning_kw, t_binning_kw=t_binning_kw, costheta_binning_kw=costheta_binning_kw, costhetadir_binning_kw=costhetadir_binning_kw, deltaphidir_binning_kw=deltaphidir_binning_kw, tray_kw_to_hash=tray_kw_to_hash ) hash_val, metaname = generate_clsim_table_meta(**hashable_params) metapath = join(outdir, metaname) filename = CLSIM_TABLE_FNAME_PROTO[-1].format( hash_val=hash_val, string=subdet, depth_idx=depth_idx, seed=seed ) filepath = abspath(join(outdir, filename)) #if isfile(metapath): # if overwrite: # print('WARNING! Overwriting table metadata file at "{}"' # .format(metapath)) # else: # raise ValueError( # 'Table metadata file already exists at "{}",' # ' assuming table already generated or in process; not' # ' overwriting.'.format(metapath) # ) json.dump(hashable_params, file(metapath, 'w'), sort_keys=True, indent=4) print('='*80) print('Metadata for the table set was written to\n "{}"'.format(metapath)) print('Table will be written to\n "{}"'.format(filepath)) print('='*80) exists_at = [] for fpath in [filepath, filepath + '.zst']: if isfile(fpath): exists_at.append(fpath) if exists_at: names = ', '.join('"{}"'.format(fp) for fp in exists_at) if overwrite: print('WARNING! Deleting existing table(s) at ' + names) for fpath in exists_at: remove(fpath) else: raise ValueError('Table(s) already exist at {}; not' ' overwriting.'.format(names)) print('') tray_kw_other = dict( # Note that hash includes the parameters used to construct the axes Axes=axes, # Parameters that indicate some "index" into the set defined above. # I.e., you will want to associate all seeds and all z positions # simulated together in the same set, but of course these parameters # will also change the tables produced. ZCoordinate=z_pos, # location of source Seed=seed, # Parameters that should have no bearing on the contents of the tables Energy=1 * I3Units.GeV, TabulateImpactAngle=True, Directions=None, Filename=filepath, FlasherWidth=127, FlasherBrightness=127, RecordErrors=False, ) all_tray_kw = {} all_tray_kw.update(tray_kw_to_hash) all_tray_kw.update(tray_kw_other) icetray.logging.set_level_for_unit( 'I3CLSimStepToTableConverter', 'TRACE' ) icetray.logging.set_level_for_unit( 'I3CLSimTabulatorModule', 'DEBUG' ) icetray.logging.set_level_for_unit( 'I3CLSimLightSourceToStepConverterGeant4', 'TRACE' ) icetray.logging.set_level_for_unit( 'I3CLSimLightSourceToStepConverterFlasher', 'TRACE' ) tray = I3Tray() tray.AddSegment(TabulatePhotonsFromSource, 'generator', **all_tray_kw) tray.Execute() tray.Finish() if compress: print('Compressing table with zstandard via command line') print(' zstd -1 --rm "{}"'.format(filepath)) check_call(['zstd', '-1', '--rm', filepath]) print('done.')
def get_all_stats( outdir, min_pulses_per_event, overwrite=False, only_sets=None, processes=None, verbosity=0, ): """Get stats for all data and MC sets. Parameters ---------- outdir : string min_pulses_per_event : int >= 0 overwrite : bool, optional Whether to overwrite any existing stats files only_sets : string, iterable thereof, or None, optional If specified, string(s) must be keys of `MC_NAME_DIRINFOS` and/or `DATA_NAME_DIRINFOS` dicts. processes : None or int > 0, optional verbosity : int >= 0, optional Returns ------- stats : OrderedDict Keys are dataset names and values are OrderedDicts containing the stats for the corresponding datasets. """ outdir = expand(outdir) if isinstance(only_sets, string_types): only_sets = [only_sets] to_process = chain.from_iterable( [MC_NAME_DIRINFOS.items(), DATA_NAME_DIRINFOS.items()]) if only_sets is not None: only_sets = [s.split("/") for s in only_sets] new_to_process = [] for set_name, subsets_list in to_process: new_subsets_list = [] for only_set in only_sets: if set_name != only_set[0]: continue if len(only_set) == 1: new_subsets_list = subsets_list break else: for subset in subsets_list: if subset["id"] == only_set[1]: new_subsets_list.append(subset) if len(new_subsets_list) > 0: new_to_process.append((set_name, new_subsets_list)) to_process = new_to_process #((key, val) for key, val in to_process if key in only_sets) print(to_process) mkdir(outdir) stats = OrderedDict() for name, dirinfos in to_process: t0 = time.time() this_stats = OrderedDict() for dirinfo in dirinfos: augmented_name = "{}.{}".format(name, dirinfo["id"]) outfile = join(outdir, "stats_{}.npz".format(augmented_name)) if isfile(outfile) and not overwrite: contents = OrderedDict([(k, v) for k, v in np.load(outfile).items()]) if verbosity >= 1: wstderr( 'loaded stats for set "{}" from file "{}" ({} sec)\n'. format(augmented_name, outfile, time.time() - t0)) else: contents = get_stats( min_pulses_per_event=min_pulses_per_event, dirinfo=dirinfo, processes=processes, verbosity=verbosity, ) #np.savez_compressed(outfile, **contents) np.savez(outfile, **contents) if verbosity >= 1: wstderr('saved stats for set "{}" to file "{}" ({} sec)\n'. format(name, outfile, time.time() - t0)) if name == "data": stats[dirinfo["id"]] = contents else: for key, vals in contents.items(): if key not in this_stats: this_stats[key] = [] this_stats[key].append(vals) del contents if name != "data": stats[name] = OrderedDict([(k, np.concatenate(v)) for k, v in this_stats.items()]) return stats
def summarize_clsim_table(table_fpath, table=None, save_summary=True, outdir=None): """ Parameters ---------- table_fpath : string Path to table (or just the table's filename if `outdir` is specified) table : mapping, optional If the table has already been loaded, it can be passed here to avoid re-loading the table. save_summary : bool Whether to save the table summary to disk. outdir : string, optional If `save_summary` is True, write the summary to this directory. If `outdir` is not specified and `save_summary` is True, the summary will be written to the same directory that contains `table_fpath`. Returns ------- table See `load_clsim_table` for details of the data structure summary : OrderedDict """ t_start = time() if save_summary: from pisa.utils.jsons import from_json, to_json table_fpath = expand(table_fpath) srcdir, clsim_fname = dirname(table_fpath), basename(table_fpath) invalid_fname = False try: fname_info = interpret_clsim_table_fname(clsim_fname) except ValueError: invalid_fname = True fname_info = {} if outdir is None: outdir = srcdir outdir = expand(outdir) mkdir(outdir) if invalid_fname: metapath = None else: metaname = (CLSIM_TABLE_METANAME_PROTO[-1].format( hash_val=fname_info['hash_val'])) metapath = join(outdir, metaname) if metapath and isfile(metapath): meta = from_json(metapath) else: meta = dict() if table is None: table = load_clsim_table(table_fpath) summary = OrderedDict() for key in table.keys(): if key == 'table': continue summary[key] = table[key] if fname_info: for key in ('hash_val', 'string', 'depth_idx', 'seed'): summary[key] = fname_info[key] # TODO: Add hole ice info when added to tray_kw_to_hash if meta: summary['n_events'] = meta['tray_kw_to_hash']['NEvents'] summary['ice_model'] = meta['tray_kw_to_hash']['IceModel'] summary['tilt'] = not meta['tray_kw_to_hash']['DisableTilt'] for key, val in meta.items(): if key.endswith('_binning_kw'): summary[key] = val elif 'fname_version' in fname_info and fname_info['fname_version'] == 1: summary['n_events'] = fname_info['n_events'] summary['ice_model'] = 'spice_mie' summary['tilt'] = False summary['r_binning_kw'] = dict(min=0.0, max=400.0, n_bins=200, power=2) summary['costheta_binning_kw'] = dict(min=-1, max=1, n_bins=40) summary['t_binning_kw'] = dict(min=0.0, max=3000.0, n_bins=300) summary['costhetadir_binning_kw'] = dict(min=-1, max=1, n_bins=20) summary['deltaphidir_binning_kw'] = dict(min=0.0, max=np.pi, n_bins=20) # Save marginal distributions and info to file norm = ( 1 / table['n_photons'] / (SPEED_OF_LIGHT_M_PER_NS / table['phase_refractive_index'] * np.mean(np.diff(table['t_bin_edges']))) #* table['angular_acceptance_fract'] * (len(table['costheta_bin_edges']) - 1)) summary['norm'] = norm dim_names = ('r', 'costheta', 't', 'costhetadir', 'deltaphidir') n_dims = len(table['table_shape']) assert n_dims == len(dim_names) # Apply norm to underflow and overflow so magnitudes can be compared # relative to plotted marginal distributions for flow, idx in product(('underflow', 'overflow'), iter(range(n_dims))): summary[flow][idx] = summary[flow][idx] * norm wstderr('Finding marginal distributions...\n') wstderr(' masking off zeros in table...') t0 = time() nonzero_table = np.ma.masked_equal(table['table'], 0) wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3))) t0_marg = time() summary['dimensions'] = OrderedDict() for keep_axis, ax_name in zip(tuple(range(n_dims)), dim_names): remove_axes = list(range(n_dims)) remove_axes.pop(keep_axis) remove_axes = tuple(remove_axes) axis = OrderedDict() wstderr(' mean across non-{} axes...'.format(ax_name)) t0 = time() axis['mean'] = norm * np.asarray( np.mean(table['table'], axis=remove_axes)) wstderr(' ({} s)\n'.format(np.round(time() - t0, 3))) wstderr(' median across non-{} axes...'.format(ax_name)) t0 = time() axis['median'] = norm * np.asarray( np.ma.median(nonzero_table, axis=remove_axes)) wstderr(' ({} s)\n'.format(np.round(time() - t0, 3))) wstderr(' max across non-{} axes...'.format(ax_name)) t0 = time() axis['max'] = norm * np.asarray( np.max(table['table'], axis=remove_axes)) wstderr(' ({} s)\n'.format(np.round(time() - t0, 3))) summary['dimensions'][ax_name] = axis wstderr(' Total time to find marginal distributions: {} s\n'.format( np.round(time() - t0_marg, 3))) if save_summary: ext = None base_fname = clsim_fname while ext not in ('', '.fits'): base_fname, ext = splitext(base_fname) ext = ext.lower() outfpath = join(outdir, base_fname + '_summary.json.bz2') to_json(summary, outfpath) print('saved summary to "{}"'.format(outfpath)) wstderr('Time to summarize table: {} s\n'.format( np.round(time() - t_start, 3))) return table, summary
def extract_gcd(gcd_file, outdir=None): """Extract info from a GCD in i3 format, optionally saving to a simple Python pickle file. Parameters ---------- gcd_file : str outdir : str, optional If provided, the gcd info is saved to a .pkl file with same name as `gcd_file` just with extension replaced. Returns ------- gcd_info : OrderedDict 'source_gcd_name': basename of the `gcd_file` provided 'source_gcd_md5': direct md5sum of `gcd_file` (possibly compressed) 'source_gcd_i3_md5': md5sum of `gcd_file` after decompressing to .i3 'geo': (86, 60, 3) array of DOM x, y, z coords in m rel to IceCube coord system 'rde' : (86, 60) array with relative DOM efficiencies 'noise' : (86, 60) array with noise rate, in Hz, for each DOM """ gcd_file = expanduser(expandvars(gcd_file)) src_gcd_dir, src_gcd_basename = split(gcd_file) # Strip all recognized extensions to find base file name's "stem," then # attach ".pkl" extension to that src_gcd_stripped = src_gcd_basename while True: src_gcd_stripped, ext = splitext(src_gcd_stripped) if ext.lower().lstrip('.') not in ['i3', 'pkl', 'bz2', 'gz', 'zst']: # reattach unknown "extension"; presumably it's actually part of # the filename and not an extesion at all (or an extension we don't # care about, or an empty string in the case that there is no dot # remaining in the name) src_gcd_stripped += ext break pkl_outfname = src_gcd_stripped + '.pkl' pkl_outfpath = None if outdir is not None: outdir = expanduser(expandvars(outdir)) mkdir(outdir) pkl_outfpath = join(outdir, pkl_outfname) if isfile(pkl_outfpath): return load_pickle(pkl_outfpath) def save_pickle_if_appropriate(gcd_info): if pkl_outfpath is not None: with open(pkl_outfpath, 'wb') as fobj: pickle.dump(gcd_info, fobj, protocol=pickle.HIGHEST_PROTOCOL) # Look for existing extracted (pkl) version in choice directories look_in_dirs = [] if src_gcd_dir: look_in_dirs.append(src_gcd_dir) look_in_dirs += ['.', DATA_DIR] if 'I3_DATA' in os.environ: look_in_dirs.append('$I3_DATA/GCD') look_in_dirs = [expanduser(expandvars(d)) for d in look_in_dirs] for look_in_dir in look_in_dirs: uncompr_pkl_fpath = join(look_in_dir, pkl_outfname) if isfile(uncompr_pkl_fpath): gcd_info = load_pickle(uncompr_pkl_fpath) save_pickle_if_appropriate(gcd_info) return gcd_info # If we couldn't find the already-extracted file, find the source file # (if user doesn't specify a full path to the file, try in several possible # directories) if src_gcd_dir: look_in_dirs = [src_gcd_dir] else: look_in_dirs = ['.', DATA_DIR] if 'I3_DATA' in os.environ: look_in_dirs.append('$I3_DATA/GCD') look_in_dirs = [expanduser(expandvars(d)) for d in look_in_dirs] src_fpath = None for look_in_dir in look_in_dirs: fpath = join(look_in_dir, src_gcd_basename) if isfile(fpath): src_fpath = fpath break if src_fpath is None: raise IOError('Cannot find file "{}" in dir(s) {}'.format( src_gcd_basename, look_in_dirs)) # Figure out what compression algorithms are used on the file; final state # will have `ext_lower` containing either "i3" or "pkl" indicating the # basic type of file we have compression = [] src_gcd_stripped = src_gcd_basename while True: src_gcd_stripped, ext = splitext(src_gcd_stripped) ext_lower = ext.lower().lstrip('.') if ext_lower in ['gz', 'bz2', 'zst']: compression.append(ext_lower) elif ext_lower in ['i3', 'pkl']: break else: if ext: raise IOError( 'Unhandled extension "{}" found in GCD file "{}"'.format( ext, gcd_file)) raise IOError( 'Illegal filename "{}"; must have either ".i3" or ".pkl" extesion,' " optionally followed by compression extension(s)".format( gcd_file)) with open(src_fpath, 'rb') as fobj: decompressed = fobj.read() # Don't hash a pickle file; all we care about is the hash of the original # i3 file, which is a value already stored in the pickle file if ext_lower == 'i3': source_gcd_md5 = hashlib.md5(decompressed).hexdigest() for comp_alg in compression: if comp_alg == 'gz': decompressed = gzip.GzipFile(fileobj=BytesIO(decompressed)).read() elif comp_alg == 'bz2': decompressed = bz2.decompress(decompressed) elif comp_alg == 'zst': decompressor = zstandard.ZstdDecompressor() decompressed = decompressor.decompress(decompressed, max_output_size=100000000) if ext_lower == 'pkl': if PY2: gcd_info = pickle.loads(decompressed) else: gcd_info = pickle.loads(decompressed, encoding='latin1') save_pickle_if_appropriate(gcd_info) return gcd_info # -- If we get here, we have an i3 file -- # decompressed_gcd_md5 = hashlib.md5(decompressed).hexdigest() from I3Tray import I3Units, OMKey # pylint: disable=import-error from icecube import dataclasses, dataio # pylint: disable=import-error, unused-variable, unused-import gcd = dataio.I3File(gcd_file) # pylint: disable=no-member frame = gcd.pop_frame() omgeo, dom_cal = None, None while gcd.more() and (omgeo is None or dom_cal is None): frame = gcd.pop_frame() keys = list(frame.keys()) if 'I3Geometry' in keys: omgeo = frame['I3Geometry'].omgeo if 'I3Calibration' in keys: dom_cal = frame['I3Calibration'].dom_cal assert omgeo is not None assert dom_cal is not None # create output dict gcd_info = OrderedDict() gcd_info['source_gcd_name'] = src_gcd_basename gcd_info['source_gcd_md5'] = source_gcd_md5 gcd_info['source_gcd_i3_md5'] = decompressed_gcd_md5 gcd_info['geo'] = np.full(shape=(N_STRINGS, N_DOMS, 3), fill_value=np.nan) gcd_info['noise'] = np.full(shape=(N_STRINGS, N_DOMS), fill_value=np.nan) gcd_info['rde'] = np.full(shape=(N_STRINGS, N_DOMS), fill_value=np.nan) for string_idx in range(N_STRINGS): for dom_idx in range(N_DOMS): omkey = OMKey(string_idx + 1, dom_idx + 1) om = omgeo.get(omkey) gcd_info['geo'][string_idx, dom_idx, 0] = om.position.x gcd_info['geo'][string_idx, dom_idx, 1] = om.position.y gcd_info['geo'][string_idx, dom_idx, 2] = om.position.z try: gcd_info['noise'][string_idx, dom_idx] = (dom_cal[omkey].dom_noise_rate / I3Units.hertz) except KeyError: gcd_info['noise'][string_idx, dom_idx] = 0.0 try: gcd_info['rde'][string_idx, dom_idx] = dom_cal[omkey].relative_dom_eff except KeyError: gcd_info['rde'][string_idx, dom_idx] = 0.0 save_pickle_if_appropriate(gcd_info) return gcd_info