コード例 #1
0
ファイル: remove_dimension.py プロジェクト: icecube/retro
def remove_dimension(input_file, output_file, dim_name):
    """
    Parameters
    ----------
    input_file : str
        Path to input file (table)

    output_file : str
        Path to output file (table)

    dim_name : str
        Dimension to remove from the intput table

    """
    input_file = expand(input_file)
    output_file = expand(output_file)

    input_dir = dirname(input_file)
    output_dir = dirname(output_file)

    if abspath(output_dir) == abspath(input_dir):
        raise ValueError("Will not allow output dir to be same as input dir")

    if not isdir(output_dir):
        mkdir(output_dir)

    input_table = np.load(input_file, mmap_mode="r")
    input_binning = np.load(join(input_dir, "binning.npy"))

    dim_num = [i for i, n in enumerate(input_binning.dtype.names) if n == dim_name][0]
    output_binning = input_binning[
        [n for n in input_binning.dtype.names if n != dim_name]
    ]

    # Save the binning to the output directory
    np.save(join(output_dir, "binning.npy"), output_binning)

    # Legacy way of storing bin edges: store each dim individually
    for d_name in output_binning.dtype.names:
        bin_edges_fpath = join(output_dir, "{}_bin_edges.npy".format(d_name))
        np.save(bin_edges_fpath, output_binning[d_name])

    # If we find the removed dimension's bin edges in output dir, remove that file
    bin_edges_fpath = join(output_dir, "{}_bin_edges.npy".format(dim_name))
    if isfile(bin_edges_fpath):
        remove(bin_edges_fpath)

    output_shape = tuple(n for i, n in enumerate(input_table.shape) if i != dim_num)
    output_table = np.empty(shape=output_shape, dtype=input_table.dtype)
    #output_table = np.memmap(
    #    output_file, dtype=input_table.dtype, mode="w+", shape=output_shape
    #)

    # Perform the summation over the dimension to be removed

    # Note that setting dtype to float64 causes accumulator to be double
    # precision, even if output table is not
    input_table.sum(axis=dim_num, dtype=np.float64, out=output_table)

    np.save(output_file, output_table)
コード例 #2
0
ファイル: find_unique_gcds.py プロジェクト: icecube/retro
def centralize_gcds(root_infos, gcd_dir=GCD_DIR):
    """Move GCD files to a single directory, if they don't already exist there.

    Compression extensions should be ignored, so only one version of each GCD
    exists.

    Parameters
    ----------
    root_infos : mapping
    gcd_dir : str, optional

    """
    gcd_dir = expand(gcd_dir)
    mkdir(gcd_dir)

    existing_fnames = os.listdir(gcd_dir)
    existing_roots = set()
    for fname in existing_fnames:
        match = GENERIC_I3_FNAME_RE.match(fname)
        if not match:
            continue
        groupdict = match.groupdict()
        existing_roots.add(groupdict["base"])

    for root, infos in root_infos.items():
        for info in infos:
            is_link = islink(info["fpath"])
            is_file = isfile(info["fpath"])

            if is_link:
                if is_file:  # link to an existing file
                    if root not in existing_roots:
                        shutil.copy2(info["fpath"],
                                     gcd_dir,
                                     follow_symlinks=True)
                        existing_roots.add(root)
                else:  # bad link (to nothing, or to a directory)
                    if not isdir(info["fpath"]):
                        print(f'os.remove({info["fpath"]})')
                        os.remove(info["fpath"])
            else:
                if root in existing_roots:
                    if is_file:
                        print(f'os.remove({info["fpath"]})')
                        os.remove(info["fpath"])
                else:
                    print(f'shutil.move({info["fpath"]}, {gcd_dir})')
                    shutil.move(info["fpath"], gcd_dir)
                    existing_roots.add(root)
コード例 #3
0
ファイル: concatenate_recos.py プロジェクト: icecube/retro
def concatenate_recos_and_save(outfile, **kwargs):
    """Concatenate recos and save to a file.

    Parameters
    ----------
    outfile : str
    **kwargs
        Arguments passed to `concatenate_recos`

    """
    outfile = expand(outfile)
    out_array = concatenate_recos(**kwargs)
    outdir = dirname(outfile)
    if not isdir(outdir):
        mkdir(outdir)
    np.save(outfile, out_array)
    sys.stdout.write('Saved concatenated array to "{}"\n'.format(outfile))
コード例 #4
0
def extract_gcd_frames(g_frame,
                       c_frame,
                       d_frame,
                       retro_gcd_dir,
                       metadata=None):
    """Extract GCD info to Python/Numpy-readable objects stored to a central
    GCD directory, subdirs of which are named by the hex md5sum of each
    extracted GCD file.

    Parameters
    ----------
    g_frame : icecube.icetray.I3Frame with stop I3Frame.Geometry
    c_frame : icecube.icetray.I3Frame with stop I3Frame.Calibration
    d_frame : icecube.icetray.I3Frame with stop I3Frame.DetectorStatus
    retro_gcd_dir : string
    metadata : None or mapping, optional
        If non-empty mapping (e.g., OrderedDict) is provided, the contents are
        written to the gcd file's subdirectory inside retro_gcd_dir as
        "metadata.json"

    Returns
    -------
    gcd_md5_hex : len-32 string of chars 0-9 and/or a-f
        MD5 sum of _only_ the G, C, and D frames (in that order) dumped to an
        uncompressed i3 file. Note that this can result in a hash value
        different from hashing the original GCD file if other frames were
        present besides the GCD frames (such as an I frame, or Q/P/etc. if the
        GCD is embedded in a data i3 file)

    """
    from icecube.dataio import I3File  # pylint: disable=import-outside-toplevel

    retro_gcd_dir = expand(retro_gcd_dir)

    # Create root dir for gcd subdirs if necessary
    if not isdir(retro_gcd_dir):
        mkdir(retro_gcd_dir)

    # Add a vaguely useful README to gcd root dir
    readme_fpath = join(retro_gcd_dir, "README")
    if not isfile(readme_fpath):
        with io.open(readme_fpath, "w", encoding="utf-8") as fhandle:
            fhandle.write(GCD_README.strip() + "\n")

    # Find md5sum of an uncompressed GCD file created by these G, C, & D frames
    tempdir_path = mkdtemp(suffix="gcd")
    try:
        gcd_i3file_path = join(tempdir_path, "gcd.i3")
        gcd_i3file = I3File(gcd_i3file_path, "w")
        gcd_i3file.push(g_frame)
        gcd_i3file.push(c_frame)
        gcd_i3file.push(d_frame)
        gcd_i3file.close()
        gcd_md5_hex = get_file_md5(gcd_i3file_path)
    finally:
        try:
            rmtree(tempdir_path)
        except Exception:
            pass

    this_gcd_dir_path = join(retro_gcd_dir, gcd_md5_hex)
    if isdir(this_gcd_dir_path):
        # already extracted this GCD
        sys.stderr.write(
            "Already extracted GCD with md5sum {}\n".format(gcd_md5_hex))
        return gcd_md5_hex

    tempdir_path = mkdtemp(suffix="." + gcd_md5_hex)
    try:
        # Extract GCD info into Python/Numpy-readable things
        gcd_info = OrderedDict()
        gcd_info["I3Geometry"] = extract_i3_geometry(g_frame)
        gcd_info["I3Calibration"] = extract_i3_calibration(c_frame)
        gcd_info["I3DetectorStatus"] = extract_i3_detector_status(d_frame)
        gcd_info.update(extract_bad_doms_lists(d_frame))

        # Write info to files. Preferable to write a single array to a .npy file;
        # second most preferable is to write multiple arrays to (compressed) .npz
        # file (faster to load than pkl files); finally, I3DetectorStatus _has_ to
        # be stored as pickle to preserve varying-length items.
        for key, val in gcd_info.items():
            if isinstance(val, Mapping):
                if key == "I3DetectorStatus":
                    key_fpath = join(tempdir_path, key + ".pkl")
                    with io.open(key_fpath, "wb") as fhandle:
                        pickle.dump(val,
                                    fhandle,
                                    protocol=pickle.HIGHEST_PROTOCOL)
                else:
                    np.savez_compressed(join(tempdir_path, key + ".npz"),
                                        **val)
            else:
                assert isinstance(val, np.ndarray)
                np.save(join(tempdir_path, key + ".npy"), val)

        if metadata:
            metadata_fpath = join(tempdir_path, "metadata.json")
            with open(metadata_fpath, "w") as fhandle:
                json.dump(metadata, fhandle, sort_keys=False, indent=4)

        try:
            copytree(tempdir_path, this_gcd_dir_path)
        except OSError as err:
            if err.errno != errno.EEXIST:
                raise

    finally:
        try:
            rmtree(tempdir_path)
        except Exception:
            pass

    return gcd_md5_hex
コード例 #5
0
def combine_tdi_tiles(
    source_dir,
    dest_dir,
    table_hash,
    gcd,
    bin_edges_file,
    tile_spec_file,
):
    """Combine individual time-independent tiles (one produced per DOM) into a single
    TDI table.

    Parameters
    ----------
    source_dir : str
    dest_dir : str
    bin_edges_file : str
    tile_spec_file : str

    """
    source_dir = expand(source_dir)
    dest_dir = expand(dest_dir)
    gcd = expand(gcd)
    bin_edges_file = expand(bin_edges_file)
    tile_spec_file = expand(tile_spec_file)
    mkdir(dest_dir)
    assert isdir(source_dir)
    assert isfile(bin_edges_file)
    assert isfile(tile_spec_file)

    gcd = extract_gcd(gcd)

    bin_edges = load_pickle(bin_edges_file)
    x_edges = bin_edges['x']
    y_edges = bin_edges['y']
    z_edges = bin_edges['z']
    ctdir_edges = bin_edges['costhetadir']
    phidir_edges = bin_edges['phidir']

    n_x = len(x_edges) - 1
    n_y = len(y_edges) - 1
    n_z = len(z_edges) - 1
    n_ctdir = len(ctdir_edges) - 1
    n_phidir = len(phidir_edges) - 1

    n_dir_bins = n_ctdir * n_phidir

    x_bw = (x_edges.max() - x_edges.min()) / n_x
    y_bw = (y_edges.max() - y_edges.min()) / n_y
    z_bw = (z_edges.max() - z_edges.min()) / n_z
    bin_vol = x_bw * y_bw * z_bw

    ctdir_min = ctdir_edges.min()
    ctdir_max = ctdir_edges.max()

    phidir_min = phidir_edges.min()
    phidir_max = phidir_edges.max()

    with open(tile_spec_file, 'r') as f:
        tile_specs = [l.strip() for l in f.readlines()]

    table = np.zeros(shape=(n_x, n_y, n_z, n_ctdir, n_phidir),
                     dtype=np.float32)

    # Slice all table dimensions to exclude {under,over}flow bins
    central_slice = (slice(1, -1), ) * 5

    angsens_model = None
    ice_model = None
    disable_tilt = None
    disable_anisotropy = None
    n_phase = None
    n_group = None

    tiles_info = []

    for tile_spec in tile_specs:
        info = None
        try:
            fields = tile_spec.split()

            info = OrderedDict()

            info['tbl_idx'] = int(fields[0])
            info['string'] = int(fields[1])
            info['dom'] = int(fields[2])
            info['seed'] = int(fields[3])
            info['n_events'] = int(fields[4])

            info['x_min'] = float(fields[5])
            info['x_max'] = float(fields[6])
            info['n_x'] = int(fields[7])

            info['y_min'] = float(fields[8])
            info['y_max'] = float(fields[9])
            info['n_y'] = int(fields[10])

            info['z_min'] = float(fields[11])
            info['z_max'] = float(fields[12])
            info['n_z'] = int(fields[13])

            info['n_ctdir'] = int(fields[14])
            info['n_phidir'] = int(fields[15])

            tiles_info.append(info)

            tile_fpath = glob(
                join(
                    source_dir, 'clsim_table_set'
                    '_{table_hash}'
                    '_tile_{tbl_idx}'
                    '_string_{string}'
                    '_dom_{dom}'
                    '_seed_{seed}'
                    '_n_{n_events}'
                    '.fits'.format(table_hash=table_hash, **info)))[0]
            try:
                fits_table = fits.open(tile_fpath,
                                       mode='readonly',
                                       memmap=True)
            except:
                wstderr('Failed on tile_fpath "{}"'.format(tile_fpath))
                raise

            primary = fits_table[0]

            header = primary.header  # pylint: disable=no-member
            keys = header.keys()

            this_gcd_i3_md5 = extract_meta_from_keys(keys, '_i3_gcd_i3_md5_')
            assert this_gcd_i3_md5 == gcd['source_gcd_i3_md5'], \
                    'this: {}, ref: {}'.format(this_gcd_i3_md5, gcd['source_gcd_i3_md5'])

            this_angsens_model = extract_meta_from_keys(keys, '_i3_angsens_')
            if angsens_model is None:
                angsens_model = this_angsens_model
                _, avg_angsens = load_angsens_model(angsens_model)
            else:
                assert this_angsens_model == angsens_model

            this_table_hash = extract_meta_from_keys(keys, '_i3_hash_')
            assert this_table_hash == table_hash

            this_ice_model = extract_meta_from_keys(keys, '_i3_ice_')
            if ice_model is None:
                ice_model = this_ice_model
            else:
                assert this_ice_model == ice_model

            this_disable_anisotropy = header['_i3_disable_anisotropy']
            if disable_anisotropy is None:
                disable_anisotropy = this_disable_anisotropy
            else:
                assert this_disable_anisotropy == disable_anisotropy

            this_disable_tilt = header['_i3_disable_tilt']
            if disable_tilt is None:
                disable_tilt = this_disable_tilt
            else:
                assert this_disable_tilt == disable_tilt

            this_n_phase = header['_i3_n_phase']
            if n_phase is None:
                n_phase = this_n_phase
            else:
                assert this_n_phase == n_phase

            this_n_group = header['_i3_n_group']
            if n_group is None:
                n_group = this_n_group
            else:
                assert this_n_group == n_group

            assert info['n_ctdir'] == n_ctdir
            assert info['n_phidir'] == n_phidir

            assert np.isclose(header['_i3_costhetadir_min'], ctdir_min)
            assert np.isclose(header['_i3_costhetadir_max'], ctdir_max)

            assert np.isclose(header['_i3_phidir_min'], phidir_min)
            assert np.isclose(header['_i3_phidir_max'], phidir_max)

            n_photons = header['_i3_n_photons']
            n_dir_bins = info['n_ctdir'] * info['n_phidir']

            this_x_bw = (info['x_max'] - info['x_min']) / info['n_x']
            this_y_bw = (info['y_max'] - info['y_min']) / info['n_y']
            this_z_bw = (info['z_max'] - info['z_min']) / info['n_z']

            assert this_x_bw == x_bw
            assert this_y_bw == y_bw
            assert this_z_bw == z_bw

            assert np.any(np.isclose(info['x_min'], x_edges))
            assert np.any(np.isclose(info['x_max'], x_edges))

            assert np.any(np.isclose(info['y_min'], y_edges))
            assert np.any(np.isclose(info['y_max'], y_edges))

            assert np.any(np.isclose(info['z_min'], z_edges))
            assert np.any(np.isclose(info['z_max'], z_edges))

            quantum_efficiency = 0.25 * gcd['rde'][info['string'] - 1,
                                                   info['dom'] - 1]
            norm = n_dir_bins * quantum_efficiency * avg_angsens / (n_photons *
                                                                    bin_vol)
            if np.isnan(norm):
                print('\nTile {} norm is nan!'.format(info['tbl_idx']))
                print('    quantum_efficiency = {}, n_photons = {}'.format(
                    quantum_efficiency, n_photons))
            elif norm == 0:
                print('\nTile {} norm is 0'.format(info['tbl_idx']))

            x_start = np.digitize(info['x_min'] + x_bw / 2, x_edges) - 1
            x_stop = np.digitize(info['x_max'] - x_bw / 2, x_edges)

            y_start = np.digitize(info['y_min'] + y_bw / 2, y_edges) - 1
            y_stop = np.digitize(info['y_max'] - y_bw / 2, y_edges)

            z_start = np.digitize(info['z_min'] + z_bw / 2, z_edges) - 1
            z_stop = np.digitize(info['z_max'] - z_bw / 2, z_edges)

            # NOTE: comparison excludes norm = 0 _and_ norm = NaN
            if norm > 0:
                assert not np.isnan(norm)
                table[x_start:x_stop, y_start:y_stop,
                      z_start:z_stop, :, :] += (
                          norm * primary.data[central_slice]  # pylint: disable=no-member
                      )
        except:
            wstderr('Failed on tile_spec {}'.format(tile_spec))
            if info is not None:
                wstderr('Info:\n{}'.format(info))
            raise
        wstderr('.')

    wstderr('\n')

    metadata = OrderedDict()
    metadata['table_hash'] = table_hash
    metadata['disable_tilt'] = disable_tilt
    metadata['disable_anisotropy'] = disable_anisotropy
    metadata['gcd'] = gcd
    metadata['angsens_model'] = angsens_model
    metadata['ice_model'] = ice_model
    metadata['n_phase'] = n_phase
    metadata['n_group'] = n_group
    metadata['tiles_info'] = tiles_info

    outdir = join(
        dest_dir, 'tdi_table_{}_tilt_{}_anisotropy_{}'.format(
            table_hash,
            'off' if disable_tilt else 'on',
            'off' if disable_anisotropy else 'on',
        ))
    mkdir(outdir)

    name = 'tdi_table.npy'
    outfpath = join(outdir, name)
    wstdout('saving table to "{}"\n'.format(outfpath))
    np.save(outfpath, table)

    #outfpath = join(outdir, 'tdi_bin_edges.json')
    #wstdout('saving bin edges to "{}"\n'.format(outfpath))
    #json.dump(
    #    bin_edges,
    #    file(outfpath, 'w'),
    #    sort_keys=False,
    #    indent=2,
    #)
    outfpath = join(outdir, 'tdi_bin_edges.pkl')
    wstdout('saving bin edges to "{}"\n'.format(outfpath))
    pickle.dump(
        bin_edges,
        open(outfpath, 'wb'),
        protocol=pickle.HIGHEST_PROTOCOL,
    )

    #outfpath = join(outdir, 'tdi_metadata.json')
    #wstdout('saving metadata to "{}"\n'.format(outfpath))
    #json.dump(
    #    metadata,
    #    file(outfpath, 'w'),
    #    sort_keys=False,
    #    indent=2,
    #)
    outfpath = join(outdir, 'tdi_metadata.pkl')
    wstdout('saving metadata to "{}"\n'.format(outfpath))
    pickle.dump(
        metadata,
        open(outfpath, 'wb'),
        protocol=pickle.HIGHEST_PROTOCOL,
    )
コード例 #6
0
def combine_clsim_tables(table_fpaths,
                         outdir=None,
                         overwrite=False,
                         step_length=1.0):
    """Combine multiple CLSim-produced tables together into a single table.

    All tables specified must have the same binnings defined. Tables should
    also be produced using different random seeds; if corresponding metadata
    files can be found in the same directories as the CLSim tables, this will
    be enforced prior to loading and combining the actual tables together.

    Parameters
    ----------
    table_fpaths : string or iterable thereof
        Each string is glob-expanded

    outdir : string, optional
        Directory to which to save the combined table; if not specified, the
        resulting table will be returned but not saved to disk.

    overwrite : bool
        Overwrite an existing table. If a table is found at the output path and
        `overwrite` is False, the function simply returns.

    step_length : float > 0 in units of meters
        Needed for computing the normalization to apply to the `table` in order
        to generate the `t_indep_table` (if the latter doesn't already exist).
        Note that normalization constants due to `n_photons`,
        `quantum_efficiency`, and `angular_acceptance_fract` as well as
        normalization depending (only) upon radial bin (i.e 1/r^2 geometric
        factor) are _not_ applied to the tables. The _only_ normalization
        applied (and _only_ to `t_indep_table`) is the multiple-counting factor
        that is a function of `step_length` and whichever of the time or radial
        bin dimensions is smaller.

    Returns
    -------
    combined_table

    """
    t_start = time()

    # Get all input table filepaths, including glob expansion

    if isinstance(table_fpaths, basestring):
        table_fpaths = [table_fpaths]
    table_fpaths_tmp = []
    for fpath in table_fpaths:
        table_fpaths_tmp.extend(glob(expand(fpath)))
    table_fpaths = sorted(table_fpaths_tmp)

    wstderr('Found {} tables to combine:\n  {}\n'.format(
        len(table_fpaths), '\n  '.join(table_fpaths)))

    # Formulate output filenames and check if they exist

    output_fpaths = None
    if outdir is not None:
        outdir = expand(outdir)
        mkdir(outdir)
        output_fpaths = OrderedDict(
            ((k, join(outdir, k + '.npy')) for k in ALL_KEYS))
        output_fpaths['source_tables'] = join(outdir, 'source_tables.txt')
        if not overwrite:
            for fpath in output_fpaths:
                if isfile(fpath):
                    raise IOError('File {} exists'.format(fpath))
        wstderr('Output files will be written to:\n  {}\n'.format('\n  '.join(
            output_fpaths.values())))

    # Combine the tables

    combined_table = None
    for fpath in table_fpaths:
        table = load_clsim_table_minimal(fpath,
                                         step_length=step_length,
                                         mmap=True)

        if combined_table is None:
            combined_table = table
            continue

        if set(table.keys()) != set(SUM_KEYS + VALIDATE_KEYS):
            raise ValueError(
                'Table keys {} do not match expected keys {}'.format(
                    sorted(table.keys()), sorted(ALL_KEYS)))

        for key in VALIDATE_KEYS:
            if not np.array_equal(table[key], combined_table[key]):
                raise ValueError('Unequal {} in file {}'.format(key, fpath))

        for key in SUM_KEYS:
            combined_table[key] += table[key]

        del table

    # Force quantum_efficiency and angular_acceptance_fract to 1 (these should
    # be handled by the user at the time the table is used to represent a
    # particular or subgroup of DOMs)
    t_indep_table, _ = generate_time_indep_table(table=table,
                                                 quantum_efficiency=1,
                                                 angular_acceptance_fract=1)
    table['t_indep_table'] = t_indep_table

    # Save the data to npy files on disk (in a sub-directory for all of this
    # table's files)
    if outdir is not None:
        basenames = []
        for fpath in table_fpaths:
            base = basename(fpath)
            rootname, ext = splitext(base)
            if ext.lstrip('.') in COMPR_EXTENSIONS:
                base = rootname
            basenames.append(base)

        wstderr('Writing files:\n')

        for key in ALL_KEYS:
            fpath = output_fpaths[key]
            wstderr('  {} ...'.format(fpath))
            t0 = time()
            np.save(fpath, combined_table[key])
            wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3)))

        fpath = output_fpaths['source_tables']
        wstderr('  {} ...'.format(fpath))
        t0 = time()
        with open(fpath, 'w') as fobj:
            fobj.write('\n'.join(sorted(basenames)))
        wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3)))

    wstderr('Total time to combine tables: {} s\n'.format(
        np.round(time() - t_start, 3)))

    return combined_table
コード例 #7
0
ファイル: extract_gcd.py プロジェクト: mhieronymus/retro
def extract_gcd(gcd_file, outdir=None):
    """Extract info from a GCD in i3 format, optionally saving to a simple
    Python pickle file.

    Parameters
    ----------
    gcd_file : str
    outdir : str, optional
        If provided, the gcd info is saved to a .pkl file with same name as
        `gcd_file` just with extension replaced.

    Returns
    -------
    gcd_info : OrderedDict
        'source_gcd_name': basename of the `gcd_file` provided
        'source_gcd_md5': direct md5sum of `gcd_file` (possibly compressed)
        'source_gcd_i3_md5': md5sum of `gcd_file` after decompressing to .i3
        'geo': (86, 60, 3) array of DOM x, y, z coords in m rel to IceCube coord system
        'rde' : (86, 60) array with relative DOM efficiencies
        'noise' : (86, 60) array with noise rate, in Hz, for each DOM

    """
    gcd_file = expanduser(expandvars(gcd_file))
    src_gcd_dir = dirname(gcd_file)
    src_gcd_basename = basename(gcd_file)
    src_gcd_stripped = src_gcd_basename.rstrip('.bz2').rstrip('.gz').rstrip('.i3').rstrip('.pkl')

    outfname = src_gcd_stripped + '.pkl'
    data_dir_fpath = abspath(join(DATA_DIR, outfname))

    outfpath = None
    if outdir is not None:
        outdir = expanduser(expandvars(outdir))
        mkdir(outdir)
        outfpath = join(outdir, outfname)

        if isfile(data_dir_fpath) and data_dir_fpath != abspath(outfpath):
            copyfile(data_dir_fpath, outfpath)

    if isfile(data_dir_fpath):
        return pickle.load(open(data_dir_fpath, 'rb'))

    if outfpath is not None and isfile(outfpath):
        return pickle.load(open(outfpath, 'rb'))

    if src_gcd_dir:
        dirs = [src_gcd_dir]
    else:
        dirs = ['.']
        if 'I3_DATA' in os.environ:
            dirs.append(expanduser(expandvars('$I3_DATA/GCD')))

    compression = []
    parsed = False
    src_gcd_stripped = src_gcd_basename
    for _ in range(10):
        root, ext = splitext(src_gcd_stripped)
        if ext == '.gz':
            compression.append('gz')
            src_gcd_stripped = root
        elif src_gcd_stripped.endswith('.bz2'):
            compression.append('bz2')
            src_gcd_stripped = root
        elif src_gcd_stripped.endswith('.i3'):
            parsed = True
            src_gcd_stripped = root
            break
        elif src_gcd_stripped.endswith('.pkl'):
            for src_dir in dirs:
                fpath = join(src_dir, src_gcd_stripped)
                if isfile(fpath):
                    gcd_info = pickle.load(open(src_gcd_stripped, 'rb'))
                    if outdir is not None and outdir != src_gcd_dir:
                        copyfile(src_gcd_stripped, outfpath)
                    return gcd_info

    if not parsed:
        raise ValueError(
            'Could not parse compression suffixes for GCD file "{}"'
            .format(gcd_file)
        )

    decompressed = open(gcd_file, 'rb').read()
    source_gcd_md5 = hashlib.md5(decompressed).hexdigest()
    for comp_alg in compression:
        if comp_alg == 'gz':
            decompressed = gzip.GzipFile(fileobj=StringIO(decompressed)).read()
        elif comp_alg == 'bz2':
            decompressed = bz2.decompress(decompressed)
    decompressed_gcd_md5 = hashlib.md5(decompressed).hexdigest()

    from I3Tray import I3Units, OMKey # pylint: disable=import-error
    from icecube import dataclasses, dataio # pylint: disable=import-error, unused-variable

    gcd = dataio.I3File(gcd_file) # pylint: disable=no-member
    frame = gcd.pop_frame()

    # get detector geometry
    key = 'I3Geometry'
    while key not in frame.keys():
        frame = gcd.pop_frame()
    omgeo = frame[key].omgeo

    # get calibration
    key = 'I3Calibration'
    while key not in frame.keys():
        frame = gcd.pop_frame()
    dom_cal = frame[key].dom_cal

    # create output dict
    gcd_info = OrderedDict()
    gcd_info['source_gcd_name'] = src_gcd_basename
    gcd_info['source_gcd_md5'] = source_gcd_md5
    gcd_info['source_gcd_i3_md5'] = decompressed_gcd_md5
    gcd_info['geo'] = geo = np.zeros((N_STRINGS, N_DOMS, 3))
    gcd_info['noise'] = noise = np.zeros((N_STRINGS, N_DOMS))
    gcd_info['rde'] = rde = np.zeros((N_STRINGS, N_DOMS))

    for string_idx in range(N_STRINGS):
        for dom_idx in range(N_DOMS):
            omkey = OMKey(string_idx + 1, dom_idx + 1)
            geo[string_idx, dom_idx, 0] = omgeo.get(omkey).position.x
            geo[string_idx, dom_idx, 1] = omgeo.get(omkey).position.y
            geo[string_idx, dom_idx, 2] = omgeo.get(omkey).position.z
            try:
                noise[string_idx, dom_idx] = (
                    dom_cal[omkey].dom_noise_rate / I3Units.hertz
                )
            except KeyError:
                noise[string_idx, dom_idx] = 0.0

            try:
                rde[string_idx, dom_idx] = dom_cal[omkey].relative_dom_eff
            except KeyError:
                gcd_info['rde'][string_idx, dom_idx] = 0.

    #print(np.mean(gcd_info['rde'][:80]))
    #print(np.mean(gcd_info['rde'][79:]))

    if outfpath is not None:
        with open(outfpath, 'wb') as outfile:
            pickle.dump(gcd_info, outfile, protocol=pickle.HIGHEST_PROTOCOL)

    return gcd_info
コード例 #8
0
def generate_time_indep_tables(table,
                               outdir=None,
                               kinds=('clsim', 'ckv'),
                               overwrite=False):
    """Generate and save to disk time independent table(s) from the original
    CLSim table and/or a Cherenkov table.

    Parameters
    ----------
    table : string
    outdir : string, optional
    kinds : string, optional
    overwrite : bool, optional

    Returns
    -------
    t_indep_table : numpy.ndarray of size (n_r, n_costheta, n_costhetadir, n_deltaphidir)

    """
    if isinstance(kinds, basestring):
        kinds = [kinds]
    kinds = [k.strip().lower() for k in kinds]

    clsim_table_path = None
    ckv_table_path = None

    table = expand(table)
    if outdir is None:
        if isdir(table):
            outdir = table
        elif table.endswith('.npy'):
            outdir = dirname(table)
        elif table.endswith('.fits'):
            outdir = table.rstrip('.fits')

    if isfile(table):
        table_basename = basename(table)
        if table_basename == 'table.npy' or table_basename.endswith('.fits'):
            clsim_table_path = table
        elif table_basename == 'ckv_table.npy':
            ckv_table_path = table

    elif isdir(table):
        if 'clsim' in kinds and isfile(join(table, 'table.npy')):
            clsim_table_path = table

        if 'ckv' in kinds and isfile(join(table, 'ckv_table.npy')):
            ckv_table_path = table

    t_indep_table_exists = False
    if 'clsim' in kinds and isfile(join(outdir, 't_indep_table.npy')):
        t_indep_table_exists = True

    t_indep_ckv_table_exists = False
    if 'ckv' in kinds and isfile(join(outdir, 't_indep_ckv_table.npy')):
        t_indep_ckv_table_exists = True

    if 'clsim' in kinds and (overwrite or not t_indep_table_exists):
        if clsim_table_path is None:
            raise ValueError(
                'Told to generate t-indep table from CLSim table but CLSim'
                ' table does not exist.')
        print('generating t_indep_table')
        mkdir(outdir)
        t0 = time.time()

        clsim_table = load_clsim_table_minimal(clsim_table_path, mmap=True)

        t1 = time.time()
        if retro.DEBUG:
            print('loaded clsim table in {:.3f} s'.format(t1 - t0))

        t_indep_table = clsim_table['table'][1:-1, 1:-1, 1:-1, 1:-1,
                                             1:-1].sum(axis=2)

        t2 = time.time()
        if retro.DEBUG:
            print('summed over t-axis in {:.3f} s'.format(t2 - t1))

        np.save(join(outdir, 't_indep_table.npy'), t_indep_table)

        t3 = time.time()
        if retro.DEBUG:
            print('saved t_indep_table.npy to disk in {:.3f} s'.format(t3 -
                                                                       t2))

        del clsim_table, t_indep_table

    if 'ckv' in kinds and (overwrite or not t_indep_ckv_table_exists):
        if ckv_table_path is None:
            raise ValueError(
                'Told to generate t-indep table from ckv table but ckv'
                ' table does not exist.')
        print('generating t_indep_ckv_table')
        mkdir(outdir)
        t0 = time.time()

        ckv_table = load_ckv_table(ckv_table_path, mmap=True)

        t1 = time.time()
        if retro.DEBUG:
            print('loaded ckv table in {:.3f} s'.format(t1 - t0))

        t_indep_ckv_table = ckv_table['ckv_table'].sum(axis=2)

        t2 = time.time()
        if retro.DEBUG:
            print('summed over t-axis in {:.3f} s'.format(t2 - t1))

        np.save(join(outdir, 't_indep_ckv_table.npy'), t_indep_ckv_table)

        t3 = time.time()
        if retro.DEBUG:
            print('saved t_indep_table.npy to disk in {:.3f} s'.format(t3 -
                                                                       t2))

        del ckv_table, t_indep_ckv_table
コード例 #9
0
def generate_stacked_tables(outdir, dom_tables_kw):
    """Stack a set of tables into a single numpy array for use of all tables in
    Numba.

    Currently, only ckv_templ_compr tables are supported.

    Parameters
    ----------
    outdir : string
        Path ot directory into which the three resulting files will be stored.

    dom_tables_kw : mapping
        As returned by retro.init_obj.parse_args

    """
    if dom_tables_kw['dom_tables_kind'] != 'ckv_templ_compr':
        raise NotImplementedError(
            '"{}" tables not supported; only "ckv_templ_compr"'
            .format(dom_tables_kw['dom_tables_kind'])
        )

    # Use the convenience function to load the single-DOM tables into a
    # retro_5d_tables.Retro5DTables object, and then we can use the loaded
    # tables from there.
    dom_tables = init_obj.setup_dom_tables(**dom_tables_kw)

    assert np.all(dom_tables.sd_idx_table_indexer >= 0)

    table_meta = OrderedDict()
    table_meta['table_kind'] = dom_tables.table_kind
    table_meta['sd_idx_table_indexer'] = dom_tables.sd_idx_table_indexer
    table_meta.update(dom_tables.table_meta)
    table_meta['n_photons'] = 1.0
    table_meta['n_photons_per_table'] = np.array(dom_tables.n_photons_per_table)

    outdir = expand(outdir)
    mkdir(outdir)

    fpath = join(outdir, 'stacked_{}_meta.pkl'.format(dom_tables.table_name))
    sys.stdout.write('Writing metadata to "{}" ...'.format(fpath))
    sys.stdout.flush()
    pickle.dump(
        table_meta,
        file(fpath, 'wb'),
        protocol=pickle.HIGHEST_PROTOCOL
    )
    sys.stdout.write(' done.\n')
    sys.stdout.flush()

    if dom_tables.compute_t_indep_exp:
        # Renormalize to 1 photon
        stacked_t_indep_tables = np.stack(
            [tbl/n for tbl, n in zip(dom_tables.t_indep_tables, dom_tables.n_photons_per_table)]
        )
        fpath = join(
            outdir,
            'stacked_{}.npy'.format(dom_tables.t_indep_table_name)
        )
        sys.stdout.write('Writing stacked t_indep tables to "{}" ...'
                         .format(fpath))
        sys.stdout.flush()
        np.save(fpath, stacked_t_indep_tables)
        sys.stdout.write(' done.\n')
        sys.stdout.flush()

    # Renormalize to 1 photon
    for template_map, n_photons in zip(dom_tables.tables, dom_tables.n_photons_per_table):
        template_map['weight'] /= n_photons

    stacked_tables = np.stack(dom_tables.tables)
    fpath = join(outdir, 'stacked_{}.npy'.format(dom_tables.table_name))
    sys.stdout.write('Writing stacked tables to "{}" ...'.format(fpath))
    sys.stdout.flush()
    np.save(fpath, stacked_tables)
    sys.stdout.write(' done.\n')
    sys.stdout.flush()
コード例 #10
0
def extract_dom_coordinates(gcd, outdir):
    """Extract the DOM coordinates from a gcd file.

    Parameters
    ----------
    gcd : string
        Path to GCD file

    outdir : string
        Path to directory into which to store the resulting .npy file
        containing the coordinates array

    """
    gcd = expanduser(expandvars(gcd))
    outdir = expanduser(expandvars(outdir))

    gcd_md5 = get_file_md5(gcd)

    print('Extracting geometry from\n  "{}"'.format(abspath(gcd)))
    print('File MD5 sum is\n  {}'.format(gcd_md5))
    print('Will output geom file and metadata file to directory\n'
          '  "{}"'.format(abspath(outdir)))

    if not isfile(gcd):
        raise IOError('`gcd` file does not exist at "{}"'.format(gcd))

    mkdir(outdir)

    geofile = dataio.I3File(gcd)  # pylint: disable=no-member
    geometry = None
    while geofile.more():
        frame = geofile.pop_frame()
        if 'I3Geometry' in frame.keys():
            geometry = frame['I3Geometry']
            break
    if geometry is None:
        raise ValueError('Could not find geometry in file "{}"'.format(gcd))

    omgeo = geometry.omgeo

    geom = np.full(shape=(N_STRINGS, N_OMS, 3), fill_value=np.nan)
    for string in range(N_STRINGS):
        for om in range(N_OMS):
            geom[string,
                 om, :] = (omgeo.get(OMKey(string + 1, om + 1)).position.x,
                           omgeo.get(OMKey(string + 1, om + 1)).position.y,
                           omgeo.get(OMKey(string + 1, om + 1)).position.z)

    assert np.sum(np.isnan(geom)) == 0

    geom_meta = generate_geom_meta(geom)
    geom_meta['sourcefile_path'] = gcd
    geom_meta['sourcefile_md5'] = gcd_md5

    outpath = join(outdir, GEOM_FILE_PROTO.format(**geom_meta))
    metapath = join(outdir, GEOM_META_PROTO.format(**geom_meta))

    json.dump(geom_meta, open(metapath, 'w'), indent=2)
    print('Saved metadata to\n  "{}"'.format(abspath(metapath)))
    np.save(outpath, geom)
    print('Saved geom to\n  "{}"'.format(abspath(outpath)))
コード例 #11
0
def plot_clsim_table_summary(
        summaries, formats=None, outdir=None, no_legend=False
    ):
    """Plot the table summary produced by `summarize_clsim_table`.

    Plots are made of marginalized 1D distributions, where mean, median, and/or
    max are used to marginalize out the remaining dimensions (where those are
    present in the summaries)..

    Parameters
    ----------
    summaries : string, summary, or iterable thereof
        If string(s) are provided, each is glob-expanded. See
        :method:`glob.glob` for valid syntax.

    formats : None, string, or iterable of strings in {'pdf', 'png'}
        If no formats are provided, the plot will not be saved.

    outdir : None or string
        If `formats` is specified and `outdir` is None, the plots are
        saved to the present working directory.

    no_legend : bool, optional
        Do not display legend on plots (default is to display a legend)

    Returns
    -------
    all_figs : list of three :class:`matplotlib.figure.Figure`

    all_axes : list of three lists of :class:`matplotlib.axes.Axes`

    summaries : list of :class:`collections.OrderedDict`
        List of all summaries loaded

    """
    orig_summaries = deepcopy(summaries)

    if isinstance(summaries, (basestring, Mapping)):
        summaries = [summaries]

    tmp_summaries = []
    for summary in summaries:
        if isinstance(summary, Mapping):
            tmp_summaries.append(summary)
        elif isinstance(summary, basestring):
            tmp_summaries.extend(glob(expand(summary)))
    summaries = tmp_summaries

    for summary_n, summary in enumerate(summaries):
        if isinstance(summary, basestring):
            summary = from_json(summary)
            summaries[summary_n] = summary

    if formats is None:
        formats = []
    elif isinstance(formats, basestring):
        formats = [formats]

    if outdir is not None:
        outdir = expand(outdir)
        mkdir(outdir)

    n_summaries = len(summaries)

    if n_summaries == 0:
        raise ValueError(
            'No summaries found based on argument `summaries`={}'
            .format(orig_summaries)
        )

    for n, fmt in enumerate(formats):
        fmt = fmt.strip().lower()
        assert fmt in ('pdf', 'png'), fmt
        formats[n] = fmt

    all_items = OrderedDict()
    for summary in summaries:
        for key, value in summary.items():
            if key == 'dimensions':
                continue
            if not all_items.has_key(key):
                all_items[key] = []
            all_items[key].append(value)

    same_items = OrderedDict()
    different_items = OrderedDict()
    for key, values in all_items.items():
        all_same = True
        ref_value = values[0]
        for value in values[1:]:
            if np.any(value != ref_value):
                all_same = False

        if all_same:
            same_items[key] = values[0]
        else:
            different_items[key] = values

    if n_summaries > 1:
        if same_items:
            print('Same for all:\n{}'.format(same_items.keys()))
        if different_items:
            print('Different for some or all:\n{}'
                  .format(different_items.keys()))

    same_label = formatter(same_items)

    summary_has_detail = False
    if set(['string', 'depth_idx', 'seed']).issubset(all_items.keys()):
        summary_has_detail = True
        strings = sorted(set(all_items['string']))
        depths = sorted(set(all_items['depth_idx']))
        seeds = sorted(set(all_items['seed']))

    plot_kinds = ('mean', 'median', 'max')
    plot_kinds_with_data = set()
    dim_names = summaries[0]['dimensions'].keys()
    n_dims = len(dim_names)

    fig_x = 10 # inches
    fig_header_y = 0.35 # inches
    fig_one_axis_y = 5 # inches
    fig_all_axes_y = n_dims * fig_one_axis_y
    fig_y = fig_header_y + fig_all_axes_y # inches

    all_figs = []
    all_axes = []

    for plot_kind in plot_kinds:
        fig, f_axes = plt.subplots(
            nrows=n_dims, ncols=1, squeeze=False, figsize=(fig_x, fig_y)
        )
        all_figs.append(fig)
        f_axes = list(f_axes.flat)
        for ax in f_axes:
            ax.set_prop_cycle('color', COLOR_CYCLE_ORTHOG)
        all_axes.append(f_axes)

    n_lines = 0
    xlims = [[np.inf, -np.inf]] * n_dims

    summaries_order = []
    if summary_has_detail:
        for string, depth_idx, seed in product(strings, depths, seeds):
            for summary_n, summary in enumerate(summaries):
                if (summary['string'] != string
                        or summary['depth_idx'] != depth_idx
                        or summary['seed'] != seed):
                    continue
                summaries_order.append((summary_n, summary))
    else:
        for summary_n, summary in enumerate(summaries):
            summaries_order.append((summary_n, summary))

    labels_assigned = set()
    for summary_n, summary in summaries_order:
        different_label = formatter({k: v[summary_n] for k, v in different_items.items()})

        if different_label:
            label = different_label
            if label in labels_assigned:
                label = None
            else:
                labels_assigned.add(label)
        else:
            label = None

        for dim_num, dim_name in enumerate(dim_names):
            dim_info = summary['dimensions'][dim_name]
            dim_axes = [f_axes[dim_num] for f_axes in all_axes]
            bin_edges = summary[dim_name + '_bin_edges']
            if dim_name == 'deltaphidir':
                bin_edges /= np.pi
            xlims[dim_num] = [
                min(xlims[dim_num][0], np.min(bin_edges)),
                max(xlims[dim_num][1], np.max(bin_edges))
            ]
            for ax, plot_kind in zip(dim_axes, plot_kinds):
                if plot_kind not in dim_info:
                    continue
                plot_kinds_with_data.add(plot_kind)
                vals = dim_info[plot_kind]
                ax.step(bin_edges, [vals[0]] + list(vals),
                        linewidth=1, clip_on=True,
                        label=label)
                n_lines += 1

    dim_labels = dict(
        r=r'$r$',
        costheta=r'$\cos\theta$',
        t=r'$t$',
        costhetadir=r'$\cos\theta_{\rm dir}$',
        deltaphidir=r'$\Delta\phi_{\rm dir}$'
    )
    units = dict(r='m', t='ns', deltaphidir=r'rad/$\pi$')

    logx_dims = []
    logy_dims = ['r', 'time', 'deltaphidir']

    flabel = ''
    same_flabel = formatter(same_items, fname=True)
    different_flabel = formatter(different_items, key_only=True, fname=True)
    if same_flabel:
        flabel += '__same__' + same_flabel
    if different_flabel:
        flabel += '__differ__' + different_flabel

    for kind_idx, (plot_kind, fig) in enumerate(zip(plot_kinds, all_figs)):
        if plot_kind not in plot_kinds_with_data:
            continue
        for dim_num, (dim_name, ax) in enumerate(zip(dim_names, all_axes[kind_idx])):
            #if dim_num == 0 and different_items:
            if different_items and not no_legend:
                ax.legend(loc='best', frameon=False,
                          prop=dict(size=7, family='monospace'))

            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            ax.yaxis.set_ticks_position('none')
            ax.xaxis.set_ticks_position('none')
            ax.xaxis.tick_bottom()
            ax.yaxis.tick_left()

            ax.set_xlim(xlims[dim_num])

            xlabel = dim_labels[dim_name]
            if dim_name in units:
                xlabel += ' ({})'.format(units[dim_name])
            ax.set_xlabel(xlabel)
            if dim_name in logx_dims:
                ax.set_xscale('log')
            if dim_name in logy_dims:
                ax.set_yscale('log')

        fig.tight_layout(rect=(0, 0, 1, fig_all_axes_y/fig_y))
        suptitle = (
            'Marginalized distributions (taking {} over all other axes)'
            .format(plot_kind)
        )
        if same_label:
            suptitle += '\n' + same_label
        fig.suptitle(suptitle, y=(fig_all_axes_y + fig_header_y*0.8) / fig_y,
                     fontsize=9)

        for fmt in formats:
            outfpath = ('clsim_table_summaries{}__{}.{}'
                        .format(flabel, plot_kind, fmt))
            if outdir:
                outfpath = join(outdir, outfpath)
            fig.savefig(outfpath, dpi=300)
            print('Saved image to "{}"'.format(outfpath))

    return all_figs, all_axes, summaries
コード例 #12
0
def combine_tables(table_fpaths, outdir=None, overwrite=False):
    """Combine multiple tables together into a single table.

    All tables specified must have the same binnings defined. Tables should
    also be produced using different random seeds (if all else besides
    n_photons is equal); if corresponding metadata files can be found in the
    same directories as the CLSim tables, this will be enforced prior to
    loading and combining the actual tables together.

    Parameters
    ----------
    table_fpaths : string or iterable thereof
        Each string is glob-expanded

    outdir : string, optional
        Directory to which to save the combined table; if not specified, the
        resulting table will be returned but not saved to disk.

    overwrite : bool
        Overwrite an existing table. If a table is found at the output path and
        `overwrite` is False, the function simply returns without raising an
        exception.

    Returns
    -------
    combined_table

    """
    t_start = time()

    # Get all input table filepaths, including glob expansion

    orig_table_fpaths = deepcopy(table_fpaths)
    if isinstance(table_fpaths, string_types):
        table_fpaths = [table_fpaths]
    table_fpaths_tmp = []
    for fpath in table_fpaths:
        table_fpaths_tmp.extend(glob(expand(fpath)))
    table_fpaths = sorted(table_fpaths_tmp, key=nsort_key_func)

    if not table_fpaths:
        raise ValueError(
            "Found no tables given `table_fpaths` = {}".format(orig_table_fpaths)
        )

    wstderr(
        'Found {} tables to combine:\n  {}\n'.format(
            len(table_fpaths), '\n  '.join(table_fpaths)
        )
    )

    # Create the output directory

    if outdir is not None:
        outdir = expand(outdir)
        mkdir(outdir)

    # Combine the tables

    combined_table = None
    table_keys = None
    for fpath in table_fpaths:
        table = load_clsim_table_minimal(fpath, mmap=True)

        base = basename(fpath)
        rootname, ext = splitext(base)
        if ext.lstrip('.') in COMPR_EXTENSIONS:
            base = rootname
        if 'source_tables' not in table:
            table['source_tables'] = np.array([base], dtype=np.string0)

        if combined_table is None:
            combined_table = table
            table_keys = set(table.keys())

            # Formulate output file paths and check if they exist (do on first
            # table to avoid finding out we are going to overwrite a file
            # before loading all the source tables)
            if outdir is not None:
                output_fpaths = OrderedDict(
                    (
                        (k, join(outdir, k + '.npy'))
                        for k in sorted(table_keys.difference(NO_WRITE_KEYS))
                    )
                )
                if not overwrite:
                    for fp in output_fpaths.values():
                        if isfile(fp):
                            raise IOError(
                                'File at {} already exists, NOT overwriting'.format(fp)
                            )
                wstderr(
                    'Output files will be written to:\n  {}\n'.format(
                        '\n  '.join(output_fpaths.values())
                    )
                )

            continue

        # Make sure keys are the same

        new_table_keys = set(table.keys())
        missing_keys = sorted(
            table_keys
            .difference(new_table_keys)
            .difference(NO_VALIDATE_KEYS)
        )
        additional_keys = sorted(
            new_table_keys
            .difference(table_keys)
            .difference(NO_VALIDATE_KEYS)
        )
        if missing_keys or additional_keys:
            raise ValueError(
                'Table is missing keys {} and/or has additional keys {}'.format(
                    missing_keys, additional_keys
                )
            )

        # Validate keys that should be equal

        for key in sorted(table_keys.difference(NO_VALIDATE_KEYS)):
            if not np.array_equal(table[key], combined_table[key]):
                raise ValueError('Unequal "{}" in file {}'.format(key, fpath))

        # Add values from keys that should be summed

        for key in SUM_KEYS:
            if key not in table:
                continue
            combined_table[key] += table[key]

        # Concatenate and sort new source table(s) in source_tables array

        combined_table['source_tables'] = np.sort(
            np.concatenate([combined_table['source_tables'], table['source_tables']])
        )

        # Make sure to clear table from memory since these can be quite large

        del table

    # Save the data to npy files on disk (in a sub-directory for all of this
    # table's files)
    if outdir is not None:
        wstderr('Writing files:\n')

        len_longest_fpath = np.max([len(p) for p in output_fpaths.values()])
        for key in sorted(table_keys.difference(NO_WRITE_KEYS)):
            fpath = output_fpaths[key]
            wstderr('  {} ...'.format(fpath.ljust(len_longest_fpath)))
            t0 = time()
            np.save(fpath, combined_table[key])
            wstderr(' ({:12.3f} s)\n'.format(time() - t0))

    wstderr(
        'Total time to combine tables: {} s\n'.format(np.round(time() - t_start, 3))
    )

    return combined_table
コード例 #13
0
def generate_ckv_table(
    table,
    beta,
    oversample,
    num_cone_samples,
    outdir=None,
    mmap_src=True,
    mmap_dst=False,
):
    """
    Parameters
    ----------
    table : string or mapping
        If string, path to table file (or directory in the case of npy table).
        A mapping is assumed to be a table loaded as by
        `retro.table_readers.load_clsim_table_minimal`.

    beta : float in [0, 1]
        Beta factor, i.e. velocity of the charged particle divided by the speed
        of light in vacuum: `v/c`.

    oversample : int > 0
        Sample from each directional bin (costhetadir and deltaphidir) this
        many times. Increase to obtain a more accurate average over the range
        of directions that the resulting ckv-emitter-direction can take within
        the same output (directional) bin. Note that there is no unique
        information given by sampling (more than once) in the spatial
        dimensions, so these dimensions ignore `oversample`. Therefore,
        the computational cost is `oversample**2`.

    num_cone_samples : int > 0
        Number of samples around the circumference of the Cherenkov cone.

    outdir : string or None
        If a string, use this directory to place the .npy file containing the
        ckv table. If `outdir` is None and `table` is a .npy-file-directory,
        this directory is used for `outdir`. If `outdir` is None and `table` is
        the path to a .fits file, `outdir` is the same name but with the .fits
        extension stripped. If `outdir` is None and `table` is a mapping, a
        ValueError is raised.
        npy-file-directory will be placed.

    mmap_src : bool, optional
        Whether to (attempt to) memory map the source `table` (if `table` is a
        string pointing to the file/directory). Default is `True`, as tables
        can easily exceed the memory capacity of a machine.

    mmap_dst : bool, optional
        Whether to memory map the destination `ckv_table`.

    """
    input_filename = None
    if isinstance(table, string_types):
        input_filename = expand(table)
        table = load_clsim_table_minimal(input_filename, mmap=mmap_src)

    if input_filename is None and outdir is None:
        raise ValueError('You must provide an `outdir` if `table` is a python'
                         ' object (i.e. not a file or directory path).')

    # Store original table to keep binning info, etc.
    full_table = table

    if "binning" in full_table:
        costhetadir_bin_edges = full_table["binning"]["costhetadir"]
        deltaphidir_bin_edges = full_table["binning"]["deltaphidir"]
    else:
        costhetadir_bin_edges = full_table['costhetadir_bin_edges']
        deltaphidir_bin_edges = full_table['deltaphidir_bin_edges']

    n_phase = full_table['phase_refractive_index']
    cos_ckv = 1 / (n_phase * beta)
    if cos_ckv > 1:
        raise ValueError(
            'Particle moving at beta={} in medium with n_phase={} does not'
            ' produce Cherenkov light!'.format(beta, n_phase)
        )

    table = full_table["table"]

    if outdir is None:
        if isdir(input_filename):
            outdir = input_filename
        elif isfile(input_filename):
            outdir = input_filename.rstrip('.fits')
            assert outdir != input_filename, str(input_filename)
    else:
        outdir = expand(outdir)
        if not isdir(outdir):
            mkdir(outdir)
    outdir = expand(outdir)
    ckv_table_fpath = join(outdir, 'ckv_table.npy')
    mkdir(outdir)

    if mmap_dst:
        # Allocate memory-mapped file
        ckv_table = np.lib.format.open_memmap(
            filename=ckv_table_fpath,
            mode='w+',
            dtype=np.float32,
            shape=table.shape,
        )
    else:
        ckv_table = np.empty(shape=table.shape, dtype=np.float32)

    try:
        convolve_table(
            src=table,
            dst=ckv_table,
            cos_ckv=cos_ckv,
            num_cone_samples=num_cone_samples,
            oversample=oversample,
            costhetadir_min=costhetadir_bin_edges.min(),
            costhetadir_max=costhetadir_bin_edges.max(),
            phidir_min=deltaphidir_bin_edges.min(),
            phidir_max=deltaphidir_bin_edges.max(),
        )
    except:
        del ckv_table
        if mmap_dst:
            remove(ckv_table_fpath)
        raise

    if not mmap_dst:
        np.save(ckv_table_fpath, ckv_table)

    return ckv_table
コード例 #14
0
def get_retro_results(
    outdir,
    recos_root,
    events_root,
    point_estimator="median",
    recompute_estimate=False,
    overwrite=False,
    procs=None,
):
    """Extract all rectro reco results from a reco directory tree, merging with original
    event information from correspoding source events directory tree. Results are
    populated to a Pandas DataFrame, saved to disk, and this is returned to the user.

    Parameters
    ----------
    outdir : string
    recos_root : string
    events_root : string
    point_estimator : string, optional
        Must be one of `VALID_POINT_ESTIMATORS`. Set to "median" by default.
    recompute_estimate : bool, optional
    overwrite : bool, optional
    procs : int > 0 or None
        Passing None uses `multiprocessing.cpu_count()`;

    Returns
    -------
    all_events : pandas.DataFrame

    """
    t0 = time.time()
    outdir = abspath(expand(outdir))
    if not isdir(outdir):
        mkdir(outdir)
    outfile_path = join(outdir, 'reconstructed_events.pkl')
    if not overwrite and isfile(outfile_path):
        raise IOError(
            'Output file path already exists at "{}"'.format(outfile_path))

    if point_estimator not in VALID_POINT_ESTIMATORS:
        raise ValueError(
            "Point estimator must be one of {}".format(VALID_POINT_ESTIMATORS))

    assert procs is None or procs >= 1

    if procs is None or procs > 1:
        pool = Pool(procs)

    # Walk directory hierarchy
    results = []
    for reco_dirpath, _, files in walk(recos_root, followlinks=True):
        is_leafdir = False
        for f in files:
            if f[-3:] == 'pkl' and f[:3] in ('slc', 'evt'):
                is_leafdir = True
                break
        if not is_leafdir:
            continue

        rel_dirpath = relpath(path=reco_dirpath, start=recos_root)
        if events_root is not None:
            event_dirpath = join(events_root, rel_dirpath)
            if not isdir(event_dirpath):
                raise IOError('Event directory does not exist: "{}"'.format(
                    event_dirpath))

        abs_reco_dirpath = abspath(reco_dirpath)
        filenum = basename(abs_reco_dirpath)
        flavdir = basename(dirname(abs_reco_dirpath))

        kwargs = dict(
            recodir=reco_dirpath,
            eventdir=event_dirpath,
            flavdir=flavdir,
            filenum=filenum,
            recompute_estimate=recompute_estimate,
            point_estimator=point_estimator,
        )
        if procs > 1:
            results.append(pool.apply_async(extract_from_leaf_dir, (), kwargs))
        else:
            results.append(extract_from_leaf_dir(**kwargs))

    if procs > 1:
        print(len(results))
        results = [r.get() for r in results]

    all_events = reduce(add, results, [])

    # Convert to pandas DataFrame
    all_events = pd.DataFrame(all_events)

    # Save to disk
    all_events.to_pickle(outfile_path)
    print('\nAll_events saved to "{}"\n'.format(outfile_path))

    nevents = len(all_events)
    dt = time.time() - t0
    print('\nTook {:.3f} s to extract {} events'.format(dt, nevents))

    return all_events
コード例 #15
0
def generate_ckv_tdi_table(
    tdi_table,
    beta,
    oversample,
    num_cone_samples,
    n_phase=None,
    outdir=None,
    mmap_src=True,
    mmap_dst=False,
):
    """
    Parameters
    ----------
    tdi_table : string or mapping
        If string, path to TDI table file (or directory containing a
        `tdi_table.npy' file).

    beta : float in [0, 1]
        Beta factor, i.e. velocity of the charged particle divided by the speed
        of light in vacuum: `v/c`.

    oversample : int > 0
        Sample from each directional bin (costhetadir and deltaphidir) this
        many times. Increase to obtain a more accurate average over the range
        of directions that the resulting ckv-emitter-direction can take within
        the same output (directional) bin. Note that there is no unique
        information given by sampling (more than once) in the spatial
        dimensions, so these dimensions ignore `oversample`. Therefore,
        the computational cost is `oversample**2`.

    num_cone_samples : int > 0
        Number of samples around the circumference of the Cherenkov cone.

    n_phase : float or None
        Required if `tdi_table` is an array; if `tdi_table` specifies a table
        location, then `n_phase` will be read from the `tdi_metadata.pkl`
        file.

    outdir : string or None
        If a string, use this directory to place the resulting
        `ckv_tdi_table.npy` file. This is optional if `tdi_table` specifies a
        file or directory (in which case the `outdir` will be inferred from
        this path).

    mmap_src : bool, optional
        Whether to (attempt to) memory map the source `tdi_table` (if `table`
        is a string pointing to the file/directory). Default is `True`, as
        tables can easily exceed the memory capacity of a machine.

    mmap_dst : bool, optional
        Whether to memory map the destination `ckv_tdi_table.npy` file.

    """
    input_filename = None
    input_dirname = None
    if isinstance(tdi_table, string_types):
        tdi_table = expand(tdi_table)
        if isdir(tdi_table):
            input_filename = join(tdi_table, 'tdi_table.npy')
        elif isfile(tdi_table):
            input_filename = tdi_table
        else:
            raise IOError(
                '`tdi_table` is not a directory or file: "{}"'.format(
                    tdi_table))
        input_dirname = dirname(input_filename)

    if input_filename is None and outdir is None:
        raise ValueError(
            'You must provide an `outdir` if `tdi_table` is a python object'
            ' (i.e., not a file or directory path).')

    if input_filename is None and n_phase is None:
        raise ValueError(
            'You must provide `n_phase` if `tdi_table` is a python object'
            ' (i.e., not a file or directory path).')

    if n_phase is None:
        meta = pickle.load(file(join(input_dirname, 'tdi_metadata.pkl'), 'rb'))
        n_phase = meta['n_phase']

    if outdir is None:
        outdir = input_dirname
    mkdir(outdir)

    if input_filename is not None:
        tdi_table = np.load(
            input_filename,
            mmap_mode='r' if mmap_src else None,
        )

    cos_ckv = 1 / (n_phase * beta)
    if cos_ckv > 1:
        raise ValueError(
            'Particle moving at beta={} in medium with n_phase={} does not'
            ' produce Cherenkov light!'.format(beta, n_phase))

    ckv_tdi_table_fpath = join(outdir, 'ckv_tdi_table.npy')
    if isfile(ckv_tdi_table_fpath):
        print('WARNING! Destination file exists "{}"'.format(
            ckv_tdi_table_fpath))

    if mmap_dst:
        # Allocate memory-mapped file
        ckv_tdi_table = np.lib.format.open_memmap(
            filename=ckv_tdi_table_fpath,
            mode='w+',
            dtype=np.float32,
            shape=tdi_table.shape,
        )
    else:
        ckv_tdi_table = np.empty(shape=tdi_table.shape, dtype=np.float32)

    try:
        convolve_table(
            src=tdi_table,
            dst=ckv_tdi_table,
            cos_ckv=cos_ckv,
            num_cone_samples=num_cone_samples,
            oversample=oversample,
            costhetadir_min=-1,
            costhetadir_max=+1,
            phidir_min=-np.pi,
            phidir_max=+np.pi,
        )
    except:
        del ckv_tdi_table
        if mmap_dst:
            remove(ckv_tdi_table_fpath)
        raise

    if not mmap_dst:
        np.save(ckv_tdi_table_fpath, ckv_tdi_table)

    return ckv_tdi_table
コード例 #16
0
ファイル: reco.py プロジェクト: mhieronymus/retro
def run_multinest(
    outdir,
    event_idx,
    event,
    dom_tables,
    hypo_handler,
    priors,
    importance_sampling,
    max_modes,
    const_eff,
    n_live,
    evidence_tol,
    sampling_eff,
    max_iter,
    seed,
):
    """Setup and run MultiNest on an event.

    See the README file from MultiNest for greater detail on parameters
    specific to to MultiNest (parameters from `importance_sampling` on).

    Parameters
    ----------
    outdir
    event_idx
    event
    dom_tables,
    hypo_handler,
    priors : mapping
    importance_sampling
    max_modes
    const_eff
    n_live
    evidence_tol
    sampling_eff
    max_iter
        Note that this limit is the maximum number of sample replacements and
        _not_ max number of likelihoods evaluated. A replacement only occurs
        when a likelihood is found that exceeds the minimum likelihood among
        the live points.
    seed

    Returns
    -------
    llhp : shape (num_llh,) structured array of dtype retro.LLHP_T
        LLH and the corresponding parameter values.

    mn_meta : OrderedDict
        Metadata used for running MultiNest, including priors, parameters, and
        the keyword args used to invoke the `pymultinest.run` function.

    """
    # pylint: disable=missing-docstring
    # Import pymultinest here; it's a less common dependency, so other
    # functions / constants in this module will still be import-able w/o it.
    import pymultinest

    hits = event['hits']
    hits_indexer = event['hits_indexer']
    hits_summary = event['hits_summary']

    priors_used = OrderedDict()

    prior_funcs = []
    for dim_num, dim_name in enumerate(CUBE_DIMS):
        prior_kind, prior_params = priors[dim_name]
        if prior_kind is PRI_UNIFORM:
            # Time is special since prior is relative to hits in the event
            if dim_name == T:
                prior_params = (hits_summary['earliest_hit_time'] +
                                prior_params[0],
                                hits_summary['latest_hit_time'] +
                                prior_params[1])
            priors_used[dim_name] = (prior_kind, prior_params)

            if prior_params == (0, 1):
                continue
                #def prior_func(cube): # pylint: disable=unused-argument
                #    pass
            elif np.min(prior_params[0]) == 0:
                maxval = np.max(prior_params)

                def prior_func(cube, n=dim_num, maxval=maxval):
                    cube[n] = cube[n] * maxval
            else:
                minval = np.min(prior_params)
                width = np.max(prior_params) - minval

                def prior_func(cube, n=dim_num, width=width, minval=minval):
                    cube[n] = cube[n] * width + minval

        elif prior_kind == PRI_LOG_UNIFORM:
            priors_used[dim_name] = (prior_kind, prior_params)
            log_min = np.log(np.min(prior_params))
            log_width = np.log(np.max(prior_params) / np.min(prior_params))

            def prior_func(cube,
                           n=dim_num,
                           log_width=log_width,
                           log_min=log_min):
                cube[n] = exp(cube[n] * log_width + log_min)

        elif prior_kind == PRI_COSINE:
            priors_used[dim_name] = (prior_kind, prior_params)
            cos_min = np.min(prior_params)
            cos_width = np.max(prior_params) - cos_min

            def prior_func(cube,
                           n=dim_num,
                           cos_width=cos_width,
                           cos_min=cos_min):
                cube[n] = acos(cube[n] * cos_width + cos_min)

        elif prior_kind == PRI_GAUSSIAN:
            priors_used[dim_name] = (prior_kind, prior_params)
            mean, stddev = prior_params
            norm = 1 / (stddev * np.sqrt(TWO_PI))

            def prior_func(cube,
                           n=dim_num,
                           norm=norm,
                           mean=mean,
                           stddev=stddev):
                cube[n] = norm * exp(-((cube[n] - mean) / stddev)**2)

        elif prior_kind == PRI_LOG_NORMAL:
            priors_used[dim_name] = (prior_kind, prior_params)
            shape, loc, scale, low, high = prior_params
            lognorm = stats.lognorm(shape, loc, scale)

            def prior_func(cube,
                           lognorm=lognorm,
                           n=dim_num,
                           low=low,
                           high=high):
                cube[n] = np.clip(lognorm.isf(cube[n]), a_min=low, a_max=high)

        elif prior_kind == PRI_SPEFIT2:
            spe_fit_val = event['recos']['SPEFit2'][dim_name]
            rel_loc, scale, low, high = prior_params
            loc = spe_fit_val + rel_loc
            cauchy = stats.cauchy(loc=loc, scale=scale)
            if dim_name == T:
                low += hits_summary['time_window_start']
                high += hits_summary['time_window_stop']
            priors_used[dim_name] = (PRI_CAUCHY, (loc, scale, low, high))

            def prior_func(cube, cauchy=cauchy, n=dim_num, low=low, high=high):
                cube[n] = np.clip(cauchy.isf(cube[n]), a_min=low, a_max=high)

        else:
            raise NotImplementedError(
                'Prior "{}" not implemented.'.format(prior_kind))

        prior_funcs.append(prior_func)

    param_values = []
    log_likelihoods = []
    t_start = []

    report_after = 1000

    def prior(cube, ndim, nparams):  # pylint: disable=unused-argument
        """Function for pymultinest to translate the hypercube MultiNest uses
        (each value is in [0, 1]) into the dimensions of the parameter space.

        Note that the cube dimension names are defined in module variable
        `CUBE_DIMS` for reference elsewhere.

        """
        for prior_func in prior_funcs:
            prior_func(cube)

    get_llh = dom_tables._get_llh  # pylint: disable=protected-access
    dom_info = dom_tables.dom_info
    tables = dom_tables.tables
    table_norm = dom_tables.table_norm
    t_indep_tables = dom_tables.t_indep_tables
    t_indep_table_norm = dom_tables.t_indep_table_norm
    sd_idx_table_indexer = dom_tables.sd_idx_table_indexer
    time_window = np.float32(hits_summary['time_window_stop'] -
                             hits_summary['time_window_start'])
    # TODO: implement logic allowing for not all DOMs to be used
    #hit_sd_indices = np.array(
    #    sorted(dom_tables.use_sd_indices_set.union(hits_indexer['sd_idx'])),
    #    dtype=np.uint32
    #)
    hit_sd_indices = hits_indexer['sd_idx']
    unhit_sd_indices = np.array(sorted(
        ALL_STRS_DOMS_SET.difference(hit_sd_indices)),
                                dtype=np.uint32)

    # DEBUG
    #table_indices = []
    #t_indep_indices = []

    def loglike(cube, ndim, nparams):  # pylint: disable=unused-argument
        """Function pymultinest calls to get llh values.

        Note that this is called _after_ `prior` has been called, so `cube`
        alsready contains the parameter values scaled to be in their physical
        ranges.

        """
        if not t_start:
            t_start.append(time.time())

        t0 = time.time()

        total_energy = cube[CUBE_ENERGY_IDX]
        track_fraction = cube[CUBE_TRACK_FRAC_IDX]

        if HYPO_PARAMS_T is HypoParams8D:
            hypo = HYPO_PARAMS_T(time=cube[CUBE_T_IDX],
                                 x=cube[CUBE_X_IDX],
                                 y=cube[CUBE_Y_IDX],
                                 z=cube[CUBE_Z_IDX],
                                 track_zenith=cube[CUBE_TRACK_ZEN_IDX],
                                 track_azimuth=cube[CUBE_TRACK_AZ_IDX],
                                 cascade_energy=total_energy *
                                 (1 - track_fraction),
                                 track_energy=total_energy * track_fraction)
        else:
            hypo = HYPO_PARAMS_T(time=cube[CUBE_T_IDX],
                                 x=cube[CUBE_X_IDX],
                                 y=cube[CUBE_Y_IDX],
                                 z=cube[CUBE_Z_IDX],
                                 track_zenith=cube[CUBE_TRACK_ZEN_IDX],
                                 track_azimuth=cube[CUBE_TRACK_AZ_IDX],
                                 cascade_energy=total_energy *
                                 (1 - track_fraction),
                                 track_energy=total_energy * track_fraction,
                                 cascade_zenith=cube[CUBE_CSCD_ZEN_IDX],
                                 cascade_azimuth=cube[CUBE_CSCD_AZ_IDX])
        sources = hypo_handler.get_sources(hypo)
        llh = get_llh(
            sources=sources,
            hits=hits,
            hits_indexer=hits_indexer,
            unhit_sd_indices=unhit_sd_indices,
            sd_idx_table_indexer=sd_idx_table_indexer,
            time_window=time_window,
            dom_info=dom_info,
            tables=tables,
            table_norm=table_norm,
            t_indep_tables=t_indep_tables,
            t_indep_table_norm=t_indep_table_norm,
            # DEBUG
            #table_indices=table_indices,
            #t_indep_indices=t_indep_indices
        )
        # DEBUG
        #print('')
        #with open('/tmp/get_llh.asm', 'w') as f:
        #print(get_llh.inspect_asm(get_llh.signatures[0]))
        #print('number of signatures:', len(get_llh.signatures))
        #print('')
        #raise Exception()

        t1 = time.time()

        param_values.append(hypo)
        log_likelihoods.append(llh)

        n_calls = len(log_likelihoods)

        if n_calls % report_after == 0:
            t_now = time.time()
            best_idx = np.argmax(log_likelihoods)
            best_llh = log_likelihoods[best_idx]
            best_p = param_values[best_idx]
            print('')
            if HYPO_PARAMS_T is HypoParams8D:
                print((
                    'best llh = {:.3f} @ '
                    '(t={:+.1f}, x={:+.1f}, y={:+.1f}, z={:+.1f},'
                    ' zen={:.1f} deg, az={:.1f} deg, Etrk={:.1f}, Ecscd={:.1f})'
                ).format(best_llh, best_p.time, best_p.x, best_p.y, best_p.z,
                         np.rad2deg(best_p.track_zenith),
                         np.rad2deg(best_p.track_azimuth), best_p.track_energy,
                         best_p.cascade_energy))
            else:
                print(('best llh = {:.3f} @'
                       ' (t={:+.1f}, x={:+.1f}, y={:+.1f}, z={:+.1f},'
                       ' zen_trk={:.1f} deg, zen_csc={:.1f} deg,'
                       ' az_trk={:.1f}, az_csc={:.1f},'
                       ' Etrk={:.1f}, Ecscd={:.1f})').format(
                           best_llh, best_p.time, best_p.x, best_p.y, best_p.z,
                           np.rad2deg(best_p.track_zenith),
                           np.rad2deg(best_p.cascade_zenith),
                           np.rad2deg(best_p.track_azimuth),
                           np.rad2deg(best_p.cascade_azimuth),
                           best_p.track_energy, best_p.cascade_energy))
            print('{} LLH computed'.format(n_calls))
            print('avg time per llh: {:.3f} ms'.format(
                (t_now - t_start[0]) / n_calls * 1000))
            print('this llh took:    {:.3f} ms'.format((t1 - t0) * 1000))
            print('')

        return llh

    n_dims = len(HYPO_PARAMS_T._fields)
    mn_kw = OrderedDict([
        ('n_dims', n_dims),
        ('n_params', n_dims),
        ('n_clustering_params', n_dims),
        ('wrapped_params', [int('azimuth' in p.lower()) for p in CUBE_DIMS]),
        ('importance_nested_sampling', importance_sampling),
        ('multimodal', max_modes > 1),
        ('const_efficiency_mode', const_eff),
        ('n_live_points', n_live),
        ('evidence_tolerance', evidence_tol),
        ('sampling_efficiency', sampling_eff),
        ('null_log_evidence', -1e90),
        ('max_modes', max_modes),
        ('mode_tolerance', -1e90),
        ('seed', seed),
        ('log_zero', -1e100),
        ('max_iter', max_iter),
    ])

    mn_meta = OrderedDict([
        ('params', CUBE_DIMS),
        ('original_prior_specs', priors),
        ('priors_used', priors_used),
        ('time_window', time_window),
        ('kwargs', sort_dict(mn_kw)),
    ])

    outdir = expand(outdir)
    mkdir(outdir)

    out_prefix = join(outdir, 'evt{}-'.format(event_idx))
    print('Output files prefix: "{}"\n'.format(out_prefix))

    print('Runing MultiNest...')
    t0 = time.time()
    pymultinest.run(LogLikelihood=loglike,
                    Prior=prior,
                    verbose=True,
                    outputfiles_basename=out_prefix,
                    resume=False,
                    write_output=True,
                    n_iter_before_update=5000,
                    **mn_kw)
    t1 = time.time()

    llhp = np.empty(shape=len(param_values), dtype=LLHP_T)
    llhp['llh'] = log_likelihoods
    llhp[list(HYPO_PARAMS_T._fields)] = param_values

    llhp_outf = out_prefix + 'llhp.npy'
    print('Saving llhp to "{}"...'.format(llhp_outf))
    np.save(llhp_outf, llhp)

    mn_meta['num_llhp'] = len(param_values)
    mn_meta['run_time'] = t1 - t0
    mn_meta_outf = out_prefix + 'multinest_meta.pkl'
    print('Saving MultiNest metadata to "{}"'.format(mn_meta_outf))
    pickle.dump(mn_meta,
                open(mn_meta_outf, 'wb'),
                protocol=pickle.HIGHEST_PROTOCOL)

    # DEBUG
    #table_indices_outf = out_prefix + 'table_indices.pkl'
    #pickle.dump(table_indices, open(table_indices_outf, 'wb'),
    #            protocol=pickle.HIGHEST_PROTOCOL)
    #t_indep_table_indices_outf = out_prefix + 't_indep_table_indices.pkl'
    #pickle.dump(t_indep_indices, open(t_indep_table_indices_outf, 'wb'),
    #            protocol=pickle.HIGHEST_PROTOCOL)

    return llhp, mn_meta
コード例 #17
0
def generate_clsim_table(
    outdir,
    gcd,
    ice_model,
    angular_sensitivity,
    disable_tilt,
    disable_anisotropy,
    string,
    dom,
    n_events,
    seed,
    coordinate_system,
    binning,
    tableset_hash=None,
    tile=None,
    overwrite=False,
    compress=False,
):
    """Generate a CLSim table.

    See wiki.icecube.wisc.edu/index.php/Ice for information about ice models.

    Parameters
    ----------
    outdir : string

    gcd : string

    ice_model : str
        E.g. "spice_mie", "spice_lea", ...

    angular_sensitivity : str
        E.g. "h2-50cm", "9" (which is equivalent to "new25" because, like, duh)

    disable_tilt : bool
        Whether to force no layer tilt in simulation (if tilt is present in
        bulk ice model; otherwise, this has no effect)

    disable_anisotropy : bool
        Whether to force no bulk ice anisotropy (if anisotropy is present in
        bulk ice model; otherwise, this has no effect)

    string : int in [1, 86]

    dom : int in [1, 60]

    n_events : int > 0
        Note that the number of photons is much larger than the number of
        events (related to the "brightness" of the defined source).

    seed : int in [0, 2**32)
        Seed for CLSim's random number generator

    coordinate_system : string in {"spherical", "cartesian"}
        If spherical, base coordinate system is .. ::

            (r, theta, phi, t, costhetadir, (optionally abs)deltaphidir)

        If Cartesian, base coordinate system is .. ::

            (x, y, z, costhetadir, phidir)

        but if any of the coordinate axes are specified to have 0 bins, they
        will be omitted (but the overall order is maintained).

    binning : mapping
        If `coordinate_system` is "spherical", keys should be:
            "n_r_bins"
            "n_t_bins"
            "n_costheta_bins"
            "n_phi_bins"
            "n_costhetadir_bins"
            "n_deltaphidir_bins"
            "r_max"
            "r_power"
            "t_max"
            "t_power"
            "deltaphidir_power"
        If `coordinate_system` is "cartesian", keys should be:
            "n_x_bins"
            "n_y_bins"
            "n_z_bins"
            "n_costhetadir_bins"
            "n_phidir_bins"
            "x_min"
            "x_max"
            "y_min"
            "y_max"
            "z_min"
            "z_max"

    tableset_hash : str, optional
        Specify if the table is a tile used to generate a larger table

    tile : int >= 0, optional
        Specify if the table is a tile used to generate a larger table

    overwrite : bool, optional
        Whether to overwrite an existing table (default: False)

    compress : bool, optional
        Whether to pass the resulting table through zstandard compression
        (default: True)

    Raises
    ------
    ValueError
        If `compress` is True but `zstd` command-line utility cannot be found

    AssertionError, ValueError
        If illegal argument values are passed

    ValueError
        If `overwrite` is False and a table already exists at the target path

    Notes
    -----
    Binnings are as follows:
        * Radial binning is regular in the space of r**(1/r_power), with
          `n_r_bins` spanning from 0 to `r_max` meters.
        * Time binning is regular in the space of t**(1/t_power), with
          `n_t_bins` spanning from 0 to `t_max` nanoseconds.
        * Position zenith angle is binned regularly in the cosine of the zenith
          angle with `n_costhetadir_bins` spanning from -1 to +1.
        * Position azimuth angle is binned regularly, with `n_phi_bins`
          spanning from -pi to pi radians.
        * Photon directionality zenith angle (relative to IcedCube coordinate
          system) is binned regularly in cosine-zenith space, with
          `n_costhetadir_bins` spanning from `costhetadir_min` to
          `costhetadir_max`
        * Photon directionality azimuth angle; sometimes assumed to be
          symmetric about line from DOM to the center of the bin, so is binned
          as an absolute value, i.e., from 0 to pi radians. Otherwise, binned
          from -np.pi to +np.pi

    The following are forced upon the above binning specifications (and
    remaining parameters are specified as arguments to the function)
        * t_min = 0 (ns)
        * r_min = 0 (m)
        * costheta_min = -1
        * costheta_max = 1
        * phi_min = -pi (rad)
        * phi_max = pi (rad)
        * costhetadir_min = -1
        * costhetadir_max = 1
        * deltaphidir_min = 0 (rad)
        * deltaphidir_min = pi (rad)

    """
    assert isinstance(n_events, Integral) and n_events > 0
    assert isinstance(seed, Integral) and 0 <= seed < 2**32
    assert ((tableset_hash is not None and tile is not None)
            or (tableset_hash is None and tile is None))

    n_bins_per_dim = []
    for key, val in binning.items():
        if not key.startswith('n_'):
            continue
        assert isinstance(val, Integral), '{} not an integer'.format(key)
        assert val >= 0, '{} must be >= 0'.format(key)
        n_bins_per_dim.append(val)

    # Note: + 2 accounts for under & overflow bins in each dimension
    n_bins = np.product([n + 2 for n in n_bins_per_dim if n > 0])

    assert n_bins > 0

    #if n_bins > 2**32:
    #    raise ValueError(
    #        'The flattened bin index in CLSim is represented by uint32 which'
    #        ' has a max of 4 294 967 296, but the binning specified comes to'
    #        ' {} bins ({} times too many).'
    #        .format(n_bins, n_bins / 2**32)
    #    )

    ice_model = ice_model.strip()
    angular_sensitivity = angular_sensitivity.strip()
    # For now, hole ice model is hard-coded in our CLSim branch; see
    #   clsim/private/clsim/I3CLSimLightSourceToStepConverterFlasher.cxx
    # in the branch you're using to check that this is correct
    assert angular_sensitivity == 'flasher_p1_0.30_p2_-1'

    gcd_info = extract_gcd(gcd)

    if compress and not any(
            access(join(path, 'zstd'), X_OK)
            for path in environ['PATH'].split(pathsep)):
        raise ValueError('`zstd` command not found in path')

    outdir = expand(outdir)
    mkdir(outdir)

    axes = OrderedDict()
    binning_kw = OrderedDict()

    # Note that the actual binning in CLSim is performed using float32, so we
    # first "truncate" all values to that precision. However, the `LinearAxis`
    # function requires Python floats (which are 64 bits), so we have to
    # convert all values to to `float` when passing as kwargs to `LinearAxis`
    # (and presumably the values will be re-truncated to float32 within the
    # CLsim code somewhere). Hopefully following this procedure, the values
    # actually used within CLSim are what we want...? CLSim is stupid.
    ftype = np.float32

    if coordinate_system == 'spherical':
        binning['t_min'] = ftype(0)  # ns
        binning['r_min'] = ftype(0)  # meters
        costheta_min = ftype(-1.0)
        costheta_max = ftype(1.0)
        # See
        #   clsim/resources/kernels/spherical_coordinates.c.cl
        # in the branch you're using to check that the following are correct
        phi_min = ftype(3.0543261766433716e-01)
        phi_max = ftype(6.5886182785034180e+00)
        binning['costhetadir_min'] = ftype(-1.0)
        binning['costhetadir_max'] = ftype(1.0)
        binning['deltaphidir_min'] = ftype(-3.1808626651763916e+00)
        binning['deltaphidir_max'] = ftype(3.1023228168487549e+00)

        if binning['n_r_bins'] > 0:
            assert isinstance(binning['r_power'],
                              Integral) and binning['r_power'] > 0
            r_binning_kw = OrderedDict([
                ('min', float(binning['r_min'])),
                ('max', float(binning['r_max'])),
                ('n_bins', int(binning['n_r_bins'])),
            ])
            if binning['r_power'] == 1:
                axes['r'] = LinearAxis(**r_binning_kw)
            else:
                r_binning_kw['power'] = int(binning['r_power'])
                axes['r'] = PowerAxis(**r_binning_kw)
            binning_kw['r'] = r_binning_kw

        if binning['n_costheta_bins'] > 0:
            costheta_binning_kw = OrderedDict([
                ('min', float(costheta_min)),
                ('max', float(costheta_max)),
                ('n_bins', int(binning['n_costheta_bins'])),
            ])
            axes['costheta'] = LinearAxis(**costheta_binning_kw)
            binning_kw['costheta'] = costheta_binning_kw

        if binning['n_phi_bins'] > 0:
            phi_binning_kw = OrderedDict([
                ('min', float(phi_min)),
                ('max', float(phi_max)),
                ('n_bins', int(binning['n_phi_bins'])),
            ])
            axes['phi'] = LinearAxis(**phi_binning_kw)
            binning_kw['phi'] = phi_binning_kw

        if binning['n_t_bins'] > 0:
            assert isinstance(binning['t_power'],
                              Integral) and binning['t_power'] > 0
            t_binning_kw = OrderedDict([
                ('min', float(binning['t_min'])),
                ('max', float(binning['t_max'])),
                ('n_bins', int(binning['n_t_bins'])),
            ])
            if binning['t_power'] == 1:
                axes['t'] = LinearAxis(**t_binning_kw)
            else:
                t_binning_kw['power'] = int(binning['t_power'])
                axes['t'] = PowerAxis(**t_binning_kw)
            binning_kw['t'] = t_binning_kw

        if binning['n_costhetadir_bins'] > 0:
            costhetadir_binning_kw = OrderedDict([
                ('min', float(binning['costhetadir_min'])),
                ('max', float(binning['costhetadir_max'])),
                ('n_bins', int(binning['n_costhetadir_bins'])),
            ])
            axes['costhetadir'] = LinearAxis(**costhetadir_binning_kw)
            binning_kw['costhetadir'] = costhetadir_binning_kw

        if binning['n_deltaphidir_bins'] > 0:
            assert (isinstance(binning['deltaphidir_power'], Integral)
                    and binning['deltaphidir_power'] > 0)
            deltaphidir_binning_kw = OrderedDict([
                ('min', float(binning['deltaphidir_min'])),
                ('max', float(binning['deltaphidir_max'])),
                ('n_bins', int(binning['n_deltaphidir_bins'])),
            ])
            if binning['deltaphidir_power'] == 1:
                axes['deltaphidir'] = LinearAxis(**deltaphidir_binning_kw)
            else:
                deltaphidir_binning_kw['power'] = int(
                    binning['deltaphidir_power'])
                axes['deltaphidir'] = PowerAxis(**deltaphidir_binning_kw)
            binning_kw['deltaphidir'] = deltaphidir_binning_kw

    elif coordinate_system == 'cartesian':
        binning['t_min'] = ftype(0)  # ns
        binning['costhetadir_min'], binning['costhetadir_max'] = ftype(
            -1.0), ftype(1.0)
        binning['phidir_min'], binning['phidir_max'] = ftype(-np.pi), ftype(
            np.pi)  # rad

        if binning['n_x_bins'] > 0:
            x_binning_kw = OrderedDict([
                ('min', float(binning['x_min'])),
                ('max', float(binning['x_max'])),
                ('n_bins', int(binning['n_x_bins'])),
            ])
            axes['x'] = LinearAxis(**x_binning_kw)
            binning_kw['x'] = x_binning_kw

        if binning['n_y_bins'] > 0:
            y_binning_kw = OrderedDict([
                ('min', float(binning['y_min'])),
                ('max', float(binning['y_max'])),
                ('n_bins', int(binning['n_y_bins'])),
            ])
            axes['y'] = LinearAxis(**y_binning_kw)
            binning_kw['y'] = y_binning_kw

        if binning['n_z_bins'] > 0:
            z_binning_kw = OrderedDict([
                ('min', float(binning['z_min'])),
                ('max', float(binning['z_max'])),
                ('n_bins', int(binning['n_z_bins'])),
            ])
            axes['z'] = LinearAxis(**z_binning_kw)
            binning_kw['z'] = z_binning_kw

        if binning['n_t_bins'] > 0:
            assert isinstance(binning['t_power'],
                              Integral) and binning['t_power'] > 0
            t_binning_kw = OrderedDict([
                ('min', float(binning['t_min'])),
                ('max', float(binning['t_max'])),
                ('n_bins', int(binning['n_t_bins'])),
            ])
            if binning['t_power'] == 1:
                axes['t'] = LinearAxis(**t_binning_kw)
            else:
                t_binning_kw['power'] = int(binning['t_power'])
                axes['t'] = PowerAxis(**t_binning_kw)
            binning_kw['t'] = t_binning_kw

        if binning['n_costhetadir_bins'] > 0:
            costhetadir_binning_kw = OrderedDict([
                ('min', float(binning['costhetadir_min'])),
                ('max', float(binning['costhetadir_max'])),
                ('n_bins', int(binning['n_costhetadir_bins'])),
            ])
            axes['costhetadir'] = LinearAxis(**costhetadir_binning_kw)
            binning_kw['costhetadir'] = costhetadir_binning_kw

        if binning['n_phidir_bins'] > 0:
            phidir_binning_kw = OrderedDict([
                ('min', float(binning['phidir_min'])),
                ('max', float(binning['phidir_max'])),
                ('n_bins', int(binning['n_phidir_bins'])),
            ])
            axes['phidir'] = LinearAxis(**phidir_binning_kw)
            binning_kw['phidir'] = phidir_binning_kw

    binning_order = BINNING_ORDER[coordinate_system]

    missing_dims = set(axes.keys()).difference(binning_order)
    if missing_dims:
        raise ValueError(
            '`binning_order` specified is {} but is missing dimension(s) {}'.
            format(binning_order, missing_dims))

    axes_ = OrderedDict()
    binning_kw_ = OrderedDict()
    for dim in binning_order:
        if dim in axes:
            axes_[dim] = axes[dim]
            binning_kw_[dim] = binning_kw[dim]
    axes = axes_
    binning_kw = binning_kw_

    # NOTE: use SphericalAxes even if we're actually binning Cartesian since we
    # don't care how it handles e.g. volumes, and Cartesian isn't implemented
    # in CLSim yet
    axes = SphericalAxes(axes.values())

    # Construct metadata initially with items that will be hashed
    metadata = OrderedDict([
        ('source_gcd_i3_md5', gcd_info['source_gcd_i3_md5']),
        ('coordinate_system', coordinate_system), ('binning_kw', binning_kw),
        ('ice_model', ice_model), ('angular_sensitivity', angular_sensitivity),
        ('disable_tilt', disable_tilt),
        ('disable_anisotropy', disable_anisotropy)
    ])
    # TODO: this is hard-coded in our branch of CLSim; make parameter & fix here!
    if 't' in binning:
        metadata['t_is_residual_time'] = True

    if tableset_hash is None:
        hash_val = hash_obj(metadata, fmt='hex')[:8]
        print('derived hash:', hash_val)
    else:
        hash_val = tableset_hash
        print('tableset_hash:', hash_val)
    metadata['hash_val'] = hash_val
    if tile is not None:
        metadata['tile'] = tile

    dom_spec = OrderedDict([('string', string), ('dom', dom)])

    if 'depth_idx' in dom_spec and ('subdet' in dom_spec
                                    or 'string' in dom_spec):
        if 'subdet' in dom_spec:
            dom_spec['string'] = dom_spec.pop('subdet')

        string = dom_spec['string']
        depth_idx = dom_spec['depth_idx']

        if isinstance(string, str):
            subdet = dom_spec['subdet'].lower()
            dom_x, dom_y = 0, 0

            ic_avg_z, dc_avg_z = get_average_dom_z_coords(gcd_info['geo'])
            if string == 'ic':
                dom_z = ic_avg_z[depth_idx]
            elif string == 'dc':
                dom_z = dc_avg_z[depth_idx]
            else:
                raise ValueError('Unrecognized subdetector {}'.format(subdet))
        else:
            dom_x, dom_y, dom_z = gcd_info['geo'][string - 1, depth_idx]

        metadata['string'] = string
        metadata['depth_idx'] = depth_idx

        if tile is not None:
            raise ValueError(
                'Cannot produce tiled tables using "depth_idx"-style table groupings;'
                ' use "string"/"dom"-style tables instead.')

        clsim_table_fname_proto = CLSIM_TABLE_FNAME_PROTO[1]
        clsim_table_metaname_proto = CLSIM_TABLE_METANAME_PROTO[0]

        print('Subdetector {}, depth index {} (z_avg = {} m)'.format(
            subdet, depth_idx, dom_z))

    elif 'string' in dom_spec and 'dom' in dom_spec:
        string = dom_spec['string']
        dom = dom_spec['dom']
        dom_x, dom_y, dom_z = gcd_info['geo'][string - 1, dom - 1]

        metadata['string'] = string
        metadata['dom'] = dom

        if tile is None:
            clsim_table_fname_proto = CLSIM_TABLE_FNAME_PROTO[2]
            clsim_table_metaname_proto = CLSIM_TABLE_METANAME_PROTO[1]
        else:
            clsim_table_fname_proto = CLSIM_TABLE_TILE_FNAME_PROTO[-1]
            clsim_table_metaname_proto = CLSIM_TABLE_TILE_METANAME_PROTO[-1]

        print(
            'GCD = "{}"\nString {}, dom {}: (x, y, z) = ({}, {}, {}) m'.format(
                gcd, string, dom, dom_x, dom_y, dom_z))

    else:
        raise ValueError('Cannot understand `dom_spec` {}'.format(dom_spec))

    # Until someone figures out DOM tilt and ice column / bubble column / cable
    # orientations for sure, we'll just set DOM orientation to zenith=pi,
    # azimuth=0.
    dom_zenith = np.pi
    dom_azimuth = 0.0

    # Now add other metadata items that are useful but not used for hashing
    metadata['dom_x'] = dom_x
    metadata['dom_y'] = dom_y
    metadata['dom_z'] = dom_z
    metadata['dom_zenith'] = dom_zenith
    metadata['dom_azimuth'] = dom_azimuth
    metadata['seed'] = seed
    metadata['n_events'] = n_events

    metapath = join(outdir, clsim_table_metaname_proto.format(**metadata))
    tablepath = join(outdir, clsim_table_fname_proto.format(**metadata))

    # Save metadata as a JSON file (so it's human-readable by any tool, not
    # just Python--in contrast to e.g. pickle files)
    json.dump(metadata, file(metapath, 'w'), sort_keys=False, indent=4)

    print('=' * 80)
    print('Metadata for the table set was written to\n  "{}"'.format(metapath))
    print('Table will be written to\n  "{}"'.format(tablepath))
    print('=' * 80)

    exists_at = []
    for fpath in [tablepath, tablepath + '.zst']:
        if isfile(fpath):
            exists_at.append(fpath)

    if exists_at:
        names = ', '.join('"{}"'.format(fp) for fp in exists_at)
        if overwrite:
            print('WARNING! Deleting existing table(s) at ' + names)
            for fpath in exists_at:
                remove(fpath)
        else:
            raise ValueError('Table(s) already exist at {}; not'
                             ' overwriting.'.format(names))
    print('')

    tray = I3Tray()
    tray.AddSegment(
        TabulateRetroSources,
        'TabulateRetroSources',
        source_gcd_i3_md5=gcd_info['source_gcd_i3_md5'],
        binning_kw=binning_kw,
        axes=axes,
        ice_model=ice_model,
        angular_sensitivity=angular_sensitivity,
        disable_tilt=disable_tilt,
        disable_anisotropy=disable_anisotropy,
        hash_val=hash_val,
        dom_spec=dom_spec,
        dom_x=dom_x,
        dom_y=dom_y,
        dom_z=dom_z,
        dom_zenith=dom_zenith,
        dom_azimuth=dom_azimuth,
        seed=seed,
        n_events=n_events,
        tablepath=tablepath,
        tile=tile,
        record_errors=False,
    )

    logging.set_level_for_unit('I3CLSimStepToTableConverter', 'TRACE')
    logging.set_level_for_unit('I3CLSimTabulatorModule', 'DEBUG')
    logging.set_level_for_unit('I3CLSimLightSourceToStepConverterGeant4',
                               'TRACE')
    logging.set_level_for_unit('I3CLSimLightSourceToStepConverterFlasher',
                               'TRACE')

    tray.Execute()
    tray.Finish()

    if compress:
        print('Compressing table with zstandard via command line')
        print('  zstd -1 --rm "{}"'.format(tablepath))
        subprocess.check_call(['zstd', '-1', '--rm', tablepath])
        print('done.')
コード例 #18
0
ファイル: phidir_to_absphidir.py プロジェクト: icecube/retro
def deltaphidir_to_absdeltaphidir(input_file, output_file):
    """
    Parameters
    ----------
    input_file : str
        Path to input file (table)

    output_file : str
        Path to output file (table)

    """
    dim_name = "deltaphidir"

    input_file = expand(input_file)
    output_file = expand(output_file)

    input_dir = dirname(input_file)
    output_dir = dirname(output_file)

    if abspath(output_dir) == abspath(input_dir):
        raise ValueError("Will not allow output dir to be same as input dir")

    if not isdir(output_dir):
        mkdir(output_dir)

    input_table = np.load(input_file, mmap_mode="r")
    input_binning = np.load(join(input_dir, "binning.npy"))

    dim_num = list(input_binning.dtype.names).index(dim_name)

    output_dtype_spec = []
    output_bin_edges = []
    for dim_descr in input_binning.dtype.descr:
        dname, dt, shape = dim_descr
        orig_dim_be = input_binning[dname]
        if dname == dim_name:
            be_in_pi = ((orig_dim_be + np.pi) % (2 * np.pi)) - np.pi
            closest_be_to_zero = np.min(np.abs(be_in_pi))

            if np.isclose(closest_be_to_zero, 0):
                raise NotImplementedError()
            else:
                output_dim_shape = (int((shape[0] - 1) / 2 + 1), )
                output_dim_be = (np.abs(orig_dim_be[orig_dim_be < 0])[::-1] -
                                 np.mean(np.diff(orig_dim_be)) / 2)
            output_dim_be -= output_dim_be[0]
            output_dim_be /= output_dim_be[-1] / np.pi
            output_bin_edges.append(tuple(output_dim_be.tolist()))
            output_dtype_spec.append((dname, dt, output_dim_shape))
        else:
            output_dtype_spec.append(dim_descr)
            output_bin_edges.append(orig_dim_be.tolist())

    output_binning = np.array(tuple(output_bin_edges), dtype=output_dtype_spec)

    output_shape = tuple(dim_spec[2][0] - 1
                         for dim_spec in output_binning.dtype.descr)
    output_table = np.zeros(shape=output_shape, dtype=np.float64)

    mapping = []

    for input_bin_idx, (input_le, input_ue) in enumerate(
            zip(input_binning[dim_name][:-1], input_binning[dim_name][1:])):
        input_wid = input_ue - input_le

        for output_bin_idx, (output_le, output_ue) in enumerate(
                zip(output_binning[dim_name][:-1],
                    output_binning[dim_name][1:])):
            overlap_fract = 0.

            for sign in [-1, +1]:
                if sign > 0:
                    actual_output_le = output_le
                else:
                    actual_output_le = -output_ue

                # Compute input bin edges relative to the lower output bin edge
                input_rel_le = ((input_le - actual_output_le) +
                                np.pi) % (2 * np.pi) - np.pi
                input_rel_ue = ((input_ue - actual_output_le) +
                                np.pi) % (2 * np.pi) - np.pi

                output_wid = abs(output_ue - output_le)

                input_clipped_rel_edges = np.clip(
                    [input_rel_le, input_rel_ue],
                    a_min=0,
                    a_max=output_wid,
                )

                overlap_fract = np.diff(input_clipped_rel_edges)[0] / input_wid
                if overlap_fract > 0:
                    dupe_idx = None
                    for idx, (obi, ibi, ofr) in enumerate(mapping):
                        if obi == output_bin_idx and ibi == input_bin_idx:
                            dupe_idx = idx
                            overlap_fract += ofr
                    entry = (output_bin_idx, input_bin_idx, overlap_fract)
                    if dupe_idx is None:
                        mapping.append(entry)
                    else:
                        mapping[dupe_idx] = entry

    output_slicer = [slice(None) for _ in output_binning.dtype.names]
    input_slicer = [slice(None) for _ in input_binning.dtype.names]

    for output_bin_idx, input_bin_idx, overlap_fract in mapping:
        output_slicer[dim_num] = output_bin_idx
        input_slicer[dim_num] = input_bin_idx
        output_table[
            output_slicer] += overlap_fract * input_table[input_slicer]

    # Save the binning to the output directory
    np.save(join(output_dir, "binning.npy"), output_binning)

    # Legacy way of storing bin edges: store each dim individually
    for d_name in output_binning.dtype.names:
        bin_edges_fpath = join(output_dir, "{}_bin_edges.npy".format(d_name))
        np.save(bin_edges_fpath, output_binning[d_name])

    # Save the table
    np.save(output_file, output_table)
コード例 #19
0
ファイル: scan_llh.py プロジェクト: mhieronymus/retro
def scan_llh(dom_tables_kw, hypo_kw, events_kw, scan_kw):
    """Script "main" function"""
    t00 = time.time()

    scan_values = []
    for dim in HYPO_PARAMS_T._fields:
        val_str = ''.join(scan_kw.pop(dim))
        val_str = val_str.lower().replace('pi', format(np.pi, '.17e'))
        scan_values.append(hrlist2list(val_str))

    dom_tables = init_obj.setup_dom_tables(**dom_tables_kw)
    hypo_handler = init_obj.setup_discrete_hypo(**hypo_kw)
    events_generator = init_obj.get_events(**events_kw)

    # Pop 'outdir' from `scan_kw` since we don't want to store this info in
    # the metadata dict.
    outdir = expand(scan_kw.pop('outdir'))
    mkdir(outdir)

    print('Scanning paramters')
    t0 = time.time()

    fast_llh = True

    if fast_llh:
        get_llh = dom_tables._get_llh
        dom_info = dom_tables.dom_info
        tables = dom_tables.tables
        table_norm = dom_tables.table_norm
        t_indep_tables = dom_tables.t_indep_tables
        t_indep_table_norm = dom_tables.t_indep_table_norm
        sd_idx_table_indexer = dom_tables.sd_idx_table_indexer
        metric_kw = {}

        def metric_wrapper(hypo, hits, hits_indexer, unhit_sd_indices,
                           time_window):
            sources = hypo_handler.get_sources(hypo)
            return get_llh(sources=sources,
                           hits=hits,
                           hits_indexer=hits_indexer,
                           unhit_sd_indices=unhit_sd_indices,
                           sd_idx_table_indexer=sd_idx_table_indexer,
                           time_window=time_window,
                           dom_info=dom_info,
                           tables=tables,
                           table_norm=table_norm,
                           t_indep_tables=t_indep_tables,
                           t_indep_table_norm=t_indep_table_norm)
    else:
        metric_kw = dict(dom_tables=dom_tables, tdi_table=None)
        get_llh = likelihood.get_llh

        def metric_wrapper(hypo, **metric_kw):
            sources = hypo_handler.get_sources(hypo)
            return get_llh(sources=sources, **metric_kw)

    n_points_total = 0
    metric_vals = []
    for _, event in events_generator:
        hits = event['hits']
        hits_indexer = event['hits_indexer']
        hits_summary = event['hits_summary']
        metric_kw['hits'] = hits
        metric_kw['hits_indexer'] = hits_indexer
        hit_sd_indices = hits_indexer['sd_idx']
        unhit_sd_indices = np.array(sorted(
            ALL_STRS_DOMS_SET.difference(hit_sd_indices)),
                                    dtype=np.uint32)
        metric_kw['unhit_sd_indices'] = unhit_sd_indices
        metric_kw['time_window'] = np.float32(
            hits_summary['time_window_stop'] -
            hits_summary['time_window_start'])

        t1 = time.time()
        metric_vals.append(scan(scan_values, metric_wrapper, metric_kw))
        dt = time.time() - t1

        n_points = metric_vals[-1].size
        n_points_total += n_points
        print('  ---> {:.3f} s, {:d} points ({:.3f} ms per LLH)'.format(
            dt, n_points, dt / n_points * 1e3))
    dt = time.time() - t0

    info = OrderedDict([
        ('hypo_params', HYPO_PARAMS_T._fields),
        ('scan_values', scan_values),
        ('metric_name', 'llh'),
        ('metric_vals', metric_vals),
        ('scan_kw', sort_dict(scan_kw)),
        ('dom_tables_kw', sort_dict(dom_tables_kw)),
        ('hypo_kw', sort_dict(hypo_kw)),
        ('events_kw', sort_dict(events_kw)),
    ])

    outfpath = join(outdir, 'scan.pkl')
    print('Saving results in pickle file, path "{}"'.format(outfpath))
    pickle.dump(info, open(outfpath, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)

    print('Total time to scan: {:.3f} s; {:.3f} ms avg per LLH'.format(
        time.time() - t00, dt / n_points_total * 1e3))

    return metric_vals, info
コード例 #20
0
def produce_arrays(
    indir,
    outdir,
    pulse_series,
    processes=None,
):
    """
    Parameters
    ----------
    indir
    outdir
    pulse_series
    processes : None or int > 0, optional

    """
    if outdir is not None:
        outdir = expand(outdir)
        mkdir(outdir)

    if processes is None:
        processes = cpu_count()
    assert processes >= 1
    serial = processes == 1

    if not serial:
        pool = Pool(processes=processes)

    # -- Define a closure as callback function -- #

    # Capture the following (must be non-scalar to be persistent between calls
    # of function)
    events_arrays = []
    doms_arrays = []
    pulses_arrays = []

    dom_idx0 = [0]
    pulses_idx0 = [0]

    def concatenate_results(result):
        """Closure"""
        if result is None:
            return

        events_array, doms_array, pulses_array = result

        if len(events_arrays) > 0:
            events_array["dom_idx0"] += dom_idx0[0]
            doms_array["pulses_idx0"] += pulses_idx0[0]

        events_arrays.append(events_array)
        doms_arrays.append(doms_array)
        pulses_arrays.append(pulses_array)

        dom_idx0[0] = events_array[-1]["dom_idx0"] + events_array[-1][
            "num_hit_doms"]
        pulses_idx0[
            0] = doms_array[-1]["pulses_idx0"] + doms_array[-1]["num_pulses"]

    # -- Find leaf directories to process -- #

    args = tuple()
    for dirpath, dirs_, files in walk(indir, followlinks=True):
        if "events.npy" in files:
            dirs_.clear()
        else:
            dirs_.sort(key=nsort_key_func)
            continue

        kwargs = dict(events_dirpath=dirpath, pulse_series=pulse_series)
        if serial:
            result = process_events_dir(*args, **kwargs)
            concatenate_results(result)
        else:
            pool.apply_async(
                process_events_dir,
                args,
                kwargs,
                concatenate_results,
            )

    if not serial:
        pool.close()
        pool.join()

    if len(events_arrays) == 0:
        assert len(doms_arrays) == 0
        assert len(pulses_arrays) == 0
        print("no events found in `indir`:", indir)
        return None

    events_array = np.concatenate(events_arrays)
    doms_array = np.concatenate(doms_arrays)
    pulses_array = np.concatenate(pulses_arrays)

    if outdir is not None:
        np.save(join(outdir, "{}__events_array.npy".format(pulse_series)),
                events_array)
        np.save(join(outdir, "{}__doms_array.npy".format(pulse_series)),
                doms_array)
        np.save(join(outdir, "{}__pulses_array.npy".format(pulse_series)),
                pulses_array)

    return events_array, doms_array, pulses_array
コード例 #21
0
def generate_clsim_table(subdet, depth_idx, nevts, seed, tilt,
                         r_max, r_power, n_r_bins,
                         t_max, n_t_bins,
                         n_costheta_bins, n_costhetadir_bins,
                         n_deltaphidir_bins,
                         outdir, overwrite=False, compress=True):
    """Generate a CLSim table.

    Parameters
    ----------
    subdet : string, {'ic', 'dc'}

    depth_idx : int in [0, 59]

    nevts : int > 0
        Note that the number of photons is much larger than the number of
        events (related to the "brightness" of the defined source)

    seed : int in [0, 2**32)
        Seed for CLSim's random number generator

    tilt : bool
        Whether to enable ice layer tilt in simulation

    r_max : float > 0

    r_power : int > 0

    t_max : float > 0

    n_t_bins : int > 0

    n_costheta_bins : int > 0

    n_costhetadir_bins : int > 0

    n_deltaphidir_bins : int > 0

    outdir : string

    overwrite : bool, optional
        Whether to overwrite an existing table (default: False)

    compress : bool, optional
        Whether to pass the resulting table through zstandard compression
        (default: True)

    Raises
    ------
    ValueError
        If `compress` but `zstd` command-line utility cannot be found

    AssertionError, ValueError
        If illegal argument values are passed

    ValueError
        If `overwrite` is False and a table already exists at the target path

    Notes
    -----
    Binnings are as follows:
        * Radial binning is regular in the space of r**(1/r_power), with
          `n_r_bins` spanning from `r_min` to `r_max`.
        * Time binning is linearly spaced with `n_t_bins` spanning from `t_min`
          to `t_max`
        * Position zenith angle is binned regularly in the cosine of the zenith
          angle with `n_costhetadir_bins` spanning from `costheta_min` to
          `costheta_max`.
        * Position azimuth angle is _not_ binned
        * Photon directionality zenith angle is binned regularly in
          cosine-zenith space, with `n_costhetadir_bins` spanning from
          `costhetadir_min` to `costhetadir_max`
        * Photon directionality azimuth angle, since position azimuth angle is
          not binned, is translated into the absolute value of the azimuth
          angle relative to the azimuth position of the photon; this is called
          `deltaphidir`. There are `n_deltaphidir_bins` from `deltaphidir_min`
          to `deltaphidir_max`.

    The following are forced upon the above binning specifications (and
    remaining parameters are specified as arguments to the function)
        * t_min = 0
        * r_min = 0
        * costheta_min = -1
        * costheta_max = 1
        * costhetadir_min = -1
        * costhetadir_max = 1
        * deltaphidir_min = 0
        * deltaphidir_min = pi (rad)

    """
    assert isinstance(nevts, Integral) and nevts > 0
    assert isinstance(seed, Integral) and 0 <= seed < 2**32
    assert isinstance(r_power, Integral) and r_power > 0
    assert isinstance(n_r_bins, Integral) and n_r_bins > 0
    assert isinstance(n_t_bins, Integral) and n_t_bins > 0
    assert isinstance(n_costheta_bins, Integral) and n_costheta_bins > 0
    assert isinstance(n_costhetadir_bins, Integral) and n_costhetadir_bins > 0
    assert isinstance(n_deltaphidir_bins, Integral) and n_deltaphidir_bins > 0

    if compress and not any(access(join(path, 'zstd'), X_OK)
                            for path in environ['PATH'].split(pathsep)):
        raise ValueError('`zstd` command not found in path')

    outdir = expand(outdir)
    mkdir(outdir)

    # Note: + 2 accounts for under/overflow bins in each dimension
    n_bins = np.product([n_bins + 2 for n_bins in (n_r_bins,
                                                   n_costheta_bins,
                                                   n_t_bins,
                                                   n_costhetadir_bins,
                                                   n_deltaphidir_bins)])

    if n_bins > 2**32:
        raise ValueError(
            'The flattened bin index in CLSim is represented by uint32 which'
            ' has a max of 4 294 967 296, but the binning specified comes to'
            ' {} bins ({} times too many).'
            .format(n_bins, n_bins / 2**32)
        )

    # Average Z coordinate (depth) for each layer of DOMs (see
    # `average_z_position.py`)
    # TODO: make these command-line arguments

    t_min = 0 # ns
    r_min = 0 # meters
    costheta_min, costheta_max = -1.0, 1.0
    costhetadir_min, costhetadir_max = -1.0, 1.0
    deltaphidir_min, deltaphidir_max = 0.0, np.pi # rad

    r_binning_kw = dict(
        min=float(r_min),
        max=float(r_max),
        n_bins=int(n_r_bins),
        power=int(r_power)
    )
    costheta_binning_kw = dict(
        min=float(costheta_min),
        max=float(costheta_max),
        n_bins=int(n_costheta_bins)
    )
    t_binning_kw = dict(
        min=float(t_min),
        max=float(t_max),
        n_bins=int(n_t_bins)
    )
    costhetadir_binning_kw = dict(
        min=float(costhetadir_min),
        max=float(costhetadir_max),
        n_bins=int(n_costhetadir_bins)
    )
    deltaphidir_binning_kw = dict(
        min=float(deltaphidir_min),
        max=float(deltaphidir_max),
        n_bins=int(n_deltaphidir_bins)
    )

    axes = SphericalAxes([
        # r: photon location, radius (m)
        PowerAxis(**r_binning_kw),
        # costheta: photon location, coszenith
        LinearAxis(**costheta_binning_kw),
        # t: photon location, time (ns)
        LinearAxis(**t_binning_kw),
        # costhetadir: photon direction, coszenith
        LinearAxis(**costhetadir_binning_kw),
        # deltaphidir: photon direction, (impact) azimuth angle (rad)
        LinearAxis(**deltaphidir_binning_kw)
    ]) # yapf: disable

    if subdet.lower() == 'ic':
        z_pos = IC_AVG_Z[depth_idx]
    elif subdet.lower() == 'dc':
        z_pos = DC_AVG_Z[depth_idx]

    print('Subdetector {}, depth index {} (z_avg = {} m)'
          .format(subdet, depth_idx, z_pos))

    # Parameters that will (or can be foreseen to) cause the tables to vary
    # depending on their values. These define what we will call a "set" of
    # tables.
    tray_kw_to_hash = dict(
        PhotonSource='retro',
        Zenith=180 * I3Units.degree, # orientation of source
        Azimuth=0 * I3Units.degree, # orientation of source
        # Number of events will affect the tables, but n=999 and n=1000 will be
        # very similar (and not statistically independent if the seed is the
        # same). But a user is likely to want to test out same settings but
        # different statistics, so these sets need different hashes (unless we
        # want the user to also specify the nevts when identifying a set...)
        # Therefore, this is included in the hash to indicate a common set of
        # tables
        NEvents=nevts,
        IceModel='spice_mie',
        DisableTilt=not tilt,
        PhotonPrescale=1,
        Sensor='none'
    )

    hashable_params = dict(
        r_binning_kw=r_binning_kw,
        t_binning_kw=t_binning_kw,
        costheta_binning_kw=costheta_binning_kw,
        costhetadir_binning_kw=costhetadir_binning_kw,
        deltaphidir_binning_kw=deltaphidir_binning_kw,
        tray_kw_to_hash=tray_kw_to_hash
    )

    hash_val, metaname = generate_clsim_table_meta(**hashable_params)
    metapath = join(outdir, metaname)

    filename = CLSIM_TABLE_FNAME_PROTO[-1].format(
        hash_val=hash_val, string=subdet, depth_idx=depth_idx, seed=seed
    )
    filepath = abspath(join(outdir, filename))

    #if isfile(metapath):
    #    if overwrite:
    #        print('WARNING! Overwriting table metadata file at "{}"'
    #              .format(metapath))
    #    else:
    #        raise ValueError(
    #            'Table metadata file already exists at "{}",'
    #            ' assuming table already generated or in process; not'
    #            ' overwriting.'.format(metapath)
    #        )
    json.dump(hashable_params, file(metapath, 'w'), sort_keys=True, indent=4)

    print('='*80)
    print('Metadata for the table set was written to\n  "{}"'.format(metapath))
    print('Table will be written to\n  "{}"'.format(filepath))
    print('='*80)

    exists_at = []
    for fpath in [filepath, filepath + '.zst']:
        if isfile(fpath):
            exists_at.append(fpath)

    if exists_at:
        names = ', '.join('"{}"'.format(fp) for fp in exists_at)
        if overwrite:
            print('WARNING! Deleting existing table(s) at ' + names)
            for fpath in exists_at:
                remove(fpath)
        else:
            raise ValueError('Table(s) already exist at {}; not'
                             ' overwriting.'.format(names))
    print('')

    tray_kw_other = dict(
        # Note that hash includes the parameters used to construct the axes
        Axes=axes,

        # Parameters that indicate some "index" into the set defined above.
        # I.e., you will want to associate all seeds and all z positions
        # simulated together in the same set, but of course these parameters
        # will also change the tables produced.
        ZCoordinate=z_pos, # location of source
        Seed=seed,

        # Parameters that should have no bearing on the contents of the tables
        Energy=1 * I3Units.GeV,
        TabulateImpactAngle=True,
        Directions=None,
        Filename=filepath,
        FlasherWidth=127,
        FlasherBrightness=127,
        RecordErrors=False,
    )

    all_tray_kw = {}
    all_tray_kw.update(tray_kw_to_hash)
    all_tray_kw.update(tray_kw_other)

    icetray.logging.set_level_for_unit(
        'I3CLSimStepToTableConverter', 'TRACE'
    )
    icetray.logging.set_level_for_unit(
        'I3CLSimTabulatorModule', 'DEBUG'
    )
    icetray.logging.set_level_for_unit(
        'I3CLSimLightSourceToStepConverterGeant4', 'TRACE'
    )
    icetray.logging.set_level_for_unit(
        'I3CLSimLightSourceToStepConverterFlasher', 'TRACE'
    )

    tray = I3Tray()
    tray.AddSegment(TabulatePhotonsFromSource, 'generator', **all_tray_kw)
    tray.Execute()
    tray.Finish()

    if compress:
        print('Compressing table with zstandard via command line')
        print('  zstd -1 --rm "{}"'.format(filepath))
        check_call(['zstd', '-1', '--rm', filepath])
        print('done.')
コード例 #22
0
ファイル: data_mc_agreement.py プロジェクト: icecube/retro
def get_all_stats(
    outdir,
    min_pulses_per_event,
    overwrite=False,
    only_sets=None,
    processes=None,
    verbosity=0,
):
    """Get stats for all data and MC sets.

    Parameters
    ----------
    outdir : string

    min_pulses_per_event : int >= 0

    overwrite : bool, optional
        Whether to overwrite any existing stats files

    only_sets : string, iterable thereof, or None, optional
        If specified, string(s) must be keys of `MC_NAME_DIRINFOS` and/or
        `DATA_NAME_DIRINFOS` dicts.

    processes : None or int > 0, optional

    verbosity : int >= 0, optional

    Returns
    -------
    stats : OrderedDict
        Keys are dataset names and values are OrderedDicts containing the stats
        for the corresponding datasets.

    """
    outdir = expand(outdir)

    if isinstance(only_sets, string_types):
        only_sets = [only_sets]

    to_process = chain.from_iterable(
        [MC_NAME_DIRINFOS.items(),
         DATA_NAME_DIRINFOS.items()])
    if only_sets is not None:
        only_sets = [s.split("/") for s in only_sets]
        new_to_process = []
        for set_name, subsets_list in to_process:
            new_subsets_list = []
            for only_set in only_sets:
                if set_name != only_set[0]:
                    continue
                if len(only_set) == 1:
                    new_subsets_list = subsets_list
                    break
                else:
                    for subset in subsets_list:
                        if subset["id"] == only_set[1]:
                            new_subsets_list.append(subset)
            if len(new_subsets_list) > 0:
                new_to_process.append((set_name, new_subsets_list))
        to_process = new_to_process  #((key, val) for key, val in to_process if key in only_sets)
        print(to_process)

    mkdir(outdir)
    stats = OrderedDict()
    for name, dirinfos in to_process:
        t0 = time.time()

        this_stats = OrderedDict()
        for dirinfo in dirinfos:
            augmented_name = "{}.{}".format(name, dirinfo["id"])
            outfile = join(outdir, "stats_{}.npz".format(augmented_name))
            if isfile(outfile) and not overwrite:
                contents = OrderedDict([(k, v)
                                        for k, v in np.load(outfile).items()])
                if verbosity >= 1:
                    wstderr(
                        'loaded stats for set "{}" from file "{}" ({} sec)\n'.
                        format(augmented_name, outfile,
                               time.time() - t0))
            else:
                contents = get_stats(
                    min_pulses_per_event=min_pulses_per_event,
                    dirinfo=dirinfo,
                    processes=processes,
                    verbosity=verbosity,
                )
                #np.savez_compressed(outfile, **contents)
                np.savez(outfile, **contents)
                if verbosity >= 1:
                    wstderr('saved stats for set "{}" to file "{}" ({} sec)\n'.
                            format(name, outfile,
                                   time.time() - t0))

            if name == "data":
                stats[dirinfo["id"]] = contents
            else:
                for key, vals in contents.items():
                    if key not in this_stats:
                        this_stats[key] = []
                    this_stats[key].append(vals)

            del contents

        if name != "data":
            stats[name] = OrderedDict([(k, np.concatenate(v))
                                       for k, v in this_stats.items()])

    return stats
コード例 #23
0
def summarize_clsim_table(table_fpath,
                          table=None,
                          save_summary=True,
                          outdir=None):
    """
    Parameters
    ----------
    table_fpath : string
        Path to table (or just the table's filename if `outdir` is specified)

    table : mapping, optional
        If the table has already been loaded, it can be passed here to avoid
        re-loading the table.

    save_summary : bool
        Whether to save the table summary to disk.

    outdir : string, optional
        If `save_summary` is True, write the summary to this directory. If
        `outdir` is not specified and `save_summary` is True, the summary will
        be written to the same directory that contains `table_fpath`.

    Returns
    -------
    table
        See `load_clsim_table` for details of the data structure

    summary : OrderedDict

    """
    t_start = time()
    if save_summary:
        from pisa.utils.jsons import from_json, to_json

    table_fpath = expand(table_fpath)
    srcdir, clsim_fname = dirname(table_fpath), basename(table_fpath)
    invalid_fname = False
    try:
        fname_info = interpret_clsim_table_fname(clsim_fname)
    except ValueError:
        invalid_fname = True
        fname_info = {}

    if outdir is None:
        outdir = srcdir
    outdir = expand(outdir)
    mkdir(outdir)

    if invalid_fname:
        metapath = None
    else:
        metaname = (CLSIM_TABLE_METANAME_PROTO[-1].format(
            hash_val=fname_info['hash_val']))
        metapath = join(outdir, metaname)
    if metapath and isfile(metapath):
        meta = from_json(metapath)
    else:
        meta = dict()

    if table is None:
        table = load_clsim_table(table_fpath)

    summary = OrderedDict()
    for key in table.keys():
        if key == 'table':
            continue
        summary[key] = table[key]
    if fname_info:
        for key in ('hash_val', 'string', 'depth_idx', 'seed'):
            summary[key] = fname_info[key]
    # TODO: Add hole ice info when added to tray_kw_to_hash
    if meta:
        summary['n_events'] = meta['tray_kw_to_hash']['NEvents']
        summary['ice_model'] = meta['tray_kw_to_hash']['IceModel']
        summary['tilt'] = not meta['tray_kw_to_hash']['DisableTilt']
        for key, val in meta.items():
            if key.endswith('_binning_kw'):
                summary[key] = val
    elif 'fname_version' in fname_info and fname_info['fname_version'] == 1:
        summary['n_events'] = fname_info['n_events']
        summary['ice_model'] = 'spice_mie'
        summary['tilt'] = False
        summary['r_binning_kw'] = dict(min=0.0, max=400.0, n_bins=200, power=2)
        summary['costheta_binning_kw'] = dict(min=-1, max=1, n_bins=40)
        summary['t_binning_kw'] = dict(min=0.0, max=3000.0, n_bins=300)
        summary['costhetadir_binning_kw'] = dict(min=-1, max=1, n_bins=20)
        summary['deltaphidir_binning_kw'] = dict(min=0.0, max=np.pi, n_bins=20)

    # Save marginal distributions and info to file
    norm = (
        1 / table['n_photons'] /
        (SPEED_OF_LIGHT_M_PER_NS / table['phase_refractive_index'] *
         np.mean(np.diff(table['t_bin_edges'])))
        #* table['angular_acceptance_fract']
        * (len(table['costheta_bin_edges']) - 1))
    summary['norm'] = norm

    dim_names = ('r', 'costheta', 't', 'costhetadir', 'deltaphidir')
    n_dims = len(table['table_shape'])
    assert n_dims == len(dim_names)

    # Apply norm to underflow and overflow so magnitudes can be compared
    # relative to plotted marginal distributions
    for flow, idx in product(('underflow', 'overflow'), iter(range(n_dims))):
        summary[flow][idx] = summary[flow][idx] * norm

    wstderr('Finding marginal distributions...\n')
    wstderr('    masking off zeros in table...')
    t0 = time()
    nonzero_table = np.ma.masked_equal(table['table'], 0)
    wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3)))

    t0_marg = time()
    summary['dimensions'] = OrderedDict()
    for keep_axis, ax_name in zip(tuple(range(n_dims)), dim_names):
        remove_axes = list(range(n_dims))
        remove_axes.pop(keep_axis)
        remove_axes = tuple(remove_axes)
        axis = OrderedDict()

        wstderr('    mean across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['mean'] = norm * np.asarray(
            np.mean(table['table'], axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))

        wstderr('    median across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['median'] = norm * np.asarray(
            np.ma.median(nonzero_table, axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))

        wstderr('    max across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['max'] = norm * np.asarray(
            np.max(table['table'], axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))
        summary['dimensions'][ax_name] = axis
    wstderr('  Total time to find marginal distributions: {} s\n'.format(
        np.round(time() - t0_marg, 3)))

    if save_summary:
        ext = None
        base_fname = clsim_fname
        while ext not in ('', '.fits'):
            base_fname, ext = splitext(base_fname)
            ext = ext.lower()
        outfpath = join(outdir, base_fname + '_summary.json.bz2')
        to_json(summary, outfpath)
        print('saved summary to "{}"'.format(outfpath))

    wstderr('Time to summarize table: {} s\n'.format(
        np.round(time() - t_start, 3)))

    return table, summary
コード例 #24
0
ファイル: extract_gcd.py プロジェクト: icecube/retro
def extract_gcd(gcd_file, outdir=None):
    """Extract info from a GCD in i3 format, optionally saving to a simple
    Python pickle file.

    Parameters
    ----------
    gcd_file : str
    outdir : str, optional
        If provided, the gcd info is saved to a .pkl file with same name as
        `gcd_file` just with extension replaced.

    Returns
    -------
    gcd_info : OrderedDict
        'source_gcd_name': basename of the `gcd_file` provided
        'source_gcd_md5': direct md5sum of `gcd_file` (possibly compressed)
        'source_gcd_i3_md5': md5sum of `gcd_file` after decompressing to .i3
        'geo': (86, 60, 3) array of DOM x, y, z coords in m rel to IceCube coord system
        'rde' : (86, 60) array with relative DOM efficiencies
        'noise' : (86, 60) array with noise rate, in Hz, for each DOM

    """
    gcd_file = expanduser(expandvars(gcd_file))
    src_gcd_dir, src_gcd_basename = split(gcd_file)

    # Strip all recognized extensions to find base file name's "stem," then
    # attach ".pkl" extension to that
    src_gcd_stripped = src_gcd_basename
    while True:
        src_gcd_stripped, ext = splitext(src_gcd_stripped)
        if ext.lower().lstrip('.') not in ['i3', 'pkl', 'bz2', 'gz', 'zst']:
            # reattach unknown "extension"; presumably it's actually part of
            # the filename and not an extesion at all (or an extension we don't
            # care about, or an empty string in the case that there is no dot
            # remaining in the name)
            src_gcd_stripped += ext
            break
    pkl_outfname = src_gcd_stripped + '.pkl'

    pkl_outfpath = None
    if outdir is not None:
        outdir = expanduser(expandvars(outdir))
        mkdir(outdir)
        pkl_outfpath = join(outdir, pkl_outfname)
        if isfile(pkl_outfpath):
            return load_pickle(pkl_outfpath)

    def save_pickle_if_appropriate(gcd_info):
        if pkl_outfpath is not None:
            with open(pkl_outfpath, 'wb') as fobj:
                pickle.dump(gcd_info, fobj, protocol=pickle.HIGHEST_PROTOCOL)

    # Look for existing extracted (pkl) version in choice directories
    look_in_dirs = []
    if src_gcd_dir:
        look_in_dirs.append(src_gcd_dir)
    look_in_dirs += ['.', DATA_DIR]
    if 'I3_DATA' in os.environ:
        look_in_dirs.append('$I3_DATA/GCD')
    look_in_dirs = [expanduser(expandvars(d)) for d in look_in_dirs]

    for look_in_dir in look_in_dirs:
        uncompr_pkl_fpath = join(look_in_dir, pkl_outfname)
        if isfile(uncompr_pkl_fpath):
            gcd_info = load_pickle(uncompr_pkl_fpath)
            save_pickle_if_appropriate(gcd_info)
            return gcd_info

    # If we couldn't find the already-extracted file, find the source file
    # (if user doesn't specify a full path to the file, try in several possible
    # directories)
    if src_gcd_dir:
        look_in_dirs = [src_gcd_dir]
    else:
        look_in_dirs = ['.', DATA_DIR]
        if 'I3_DATA' in os.environ:
            look_in_dirs.append('$I3_DATA/GCD')
    look_in_dirs = [expanduser(expandvars(d)) for d in look_in_dirs]

    src_fpath = None
    for look_in_dir in look_in_dirs:
        fpath = join(look_in_dir, src_gcd_basename)
        if isfile(fpath):
            src_fpath = fpath
            break

    if src_fpath is None:
        raise IOError('Cannot find file "{}" in dir(s) {}'.format(
            src_gcd_basename, look_in_dirs))

    # Figure out what compression algorithms are used on the file; final state
    # will have `ext_lower` containing either "i3" or "pkl" indicating the
    # basic type of file we have
    compression = []
    src_gcd_stripped = src_gcd_basename
    while True:
        src_gcd_stripped, ext = splitext(src_gcd_stripped)
        ext_lower = ext.lower().lstrip('.')
        if ext_lower in ['gz', 'bz2', 'zst']:
            compression.append(ext_lower)
        elif ext_lower in ['i3', 'pkl']:
            break
        else:
            if ext:
                raise IOError(
                    'Unhandled extension "{}" found in GCD file "{}"'.format(
                        ext, gcd_file))
            raise IOError(
                'Illegal filename "{}"; must have either ".i3" or ".pkl" extesion,'
                " optionally followed by compression extension(s)".format(
                    gcd_file))

    with open(src_fpath, 'rb') as fobj:
        decompressed = fobj.read()

    # Don't hash a pickle file; all we care about is the hash of the original
    # i3 file, which is a value already stored in the pickle file
    if ext_lower == 'i3':
        source_gcd_md5 = hashlib.md5(decompressed).hexdigest()

    for comp_alg in compression:
        if comp_alg == 'gz':
            decompressed = gzip.GzipFile(fileobj=BytesIO(decompressed)).read()
        elif comp_alg == 'bz2':
            decompressed = bz2.decompress(decompressed)
        elif comp_alg == 'zst':
            decompressor = zstandard.ZstdDecompressor()
            decompressed = decompressor.decompress(decompressed,
                                                   max_output_size=100000000)

    if ext_lower == 'pkl':
        if PY2:
            gcd_info = pickle.loads(decompressed)
        else:
            gcd_info = pickle.loads(decompressed, encoding='latin1')
        save_pickle_if_appropriate(gcd_info)
        return gcd_info

    # -- If we get here, we have an i3 file -- #

    decompressed_gcd_md5 = hashlib.md5(decompressed).hexdigest()

    from I3Tray import I3Units, OMKey  # pylint: disable=import-error
    from icecube import dataclasses, dataio  # pylint: disable=import-error, unused-variable, unused-import

    gcd = dataio.I3File(gcd_file)  # pylint: disable=no-member
    frame = gcd.pop_frame()

    omgeo, dom_cal = None, None
    while gcd.more() and (omgeo is None or dom_cal is None):
        frame = gcd.pop_frame()
        keys = list(frame.keys())
        if 'I3Geometry' in keys:
            omgeo = frame['I3Geometry'].omgeo
        if 'I3Calibration' in keys:
            dom_cal = frame['I3Calibration'].dom_cal

    assert omgeo is not None
    assert dom_cal is not None

    # create output dict
    gcd_info = OrderedDict()
    gcd_info['source_gcd_name'] = src_gcd_basename
    gcd_info['source_gcd_md5'] = source_gcd_md5
    gcd_info['source_gcd_i3_md5'] = decompressed_gcd_md5
    gcd_info['geo'] = np.full(shape=(N_STRINGS, N_DOMS, 3), fill_value=np.nan)
    gcd_info['noise'] = np.full(shape=(N_STRINGS, N_DOMS), fill_value=np.nan)
    gcd_info['rde'] = np.full(shape=(N_STRINGS, N_DOMS), fill_value=np.nan)

    for string_idx in range(N_STRINGS):
        for dom_idx in range(N_DOMS):
            omkey = OMKey(string_idx + 1, dom_idx + 1)
            om = omgeo.get(omkey)
            gcd_info['geo'][string_idx, dom_idx, 0] = om.position.x
            gcd_info['geo'][string_idx, dom_idx, 1] = om.position.y
            gcd_info['geo'][string_idx, dom_idx, 2] = om.position.z
            try:
                gcd_info['noise'][string_idx,
                                  dom_idx] = (dom_cal[omkey].dom_noise_rate /
                                              I3Units.hertz)
            except KeyError:
                gcd_info['noise'][string_idx, dom_idx] = 0.0

            try:
                gcd_info['rde'][string_idx,
                                dom_idx] = dom_cal[omkey].relative_dom_eff
            except KeyError:
                gcd_info['rde'][string_idx, dom_idx] = 0.0

    save_pickle_if_appropriate(gcd_info)

    return gcd_info