def __init__(
        self,
        energy_bins_file=join(RETRO_DIR, "data", "muon_secondaries_light_output/energy_bins.npy"),
        histograms_file=join(RETRO_DIR, "data", "muon_secondaries_light_output/histograms.npy"),
    ):
        self.interpolators = []
        """linear interpolation function for each curve in histarray"""

        self.average_track_lengths = []
        """the "native" muon length for each curve in histarray"""

        # Load files for interpolating light output vs. track length for energy
        # ranges.
        self.energy_bins = np.load(expand(energy_bins_file))
        self.histarray = np.load(expand(histograms_file))

        for curve in self.histarray:
            curvex = []
            curveE = []
            for tup in curve:
                curvex.append(tup[0])
                curveE.append(tup[1])
            curvex = np.array(curvex)
            curveE = np.array(curveE)
            interpolator = interpolate.interp1d(
                curvex,
                curveE,
                kind="linear",
                bounds_error=False,
                fill_value=(curveE[0], curveE[-1]),
            )
            self.interpolators.append(interpolator)
            self.average_track_lengths.append(
                float(curve[-1][0] + (curve[-1][0] - curve[-2][0]) / 2)
            )
Esempio n. 2
0
def remove_dimension(input_file, output_file, dim_name):
    """
    Parameters
    ----------
    input_file : str
        Path to input file (table)

    output_file : str
        Path to output file (table)

    dim_name : str
        Dimension to remove from the intput table

    """
    input_file = expand(input_file)
    output_file = expand(output_file)

    input_dir = dirname(input_file)
    output_dir = dirname(output_file)

    if abspath(output_dir) == abspath(input_dir):
        raise ValueError("Will not allow output dir to be same as input dir")

    if not isdir(output_dir):
        mkdir(output_dir)

    input_table = np.load(input_file, mmap_mode="r")
    input_binning = np.load(join(input_dir, "binning.npy"))

    dim_num = [i for i, n in enumerate(input_binning.dtype.names) if n == dim_name][0]
    output_binning = input_binning[
        [n for n in input_binning.dtype.names if n != dim_name]
    ]

    # Save the binning to the output directory
    np.save(join(output_dir, "binning.npy"), output_binning)

    # Legacy way of storing bin edges: store each dim individually
    for d_name in output_binning.dtype.names:
        bin_edges_fpath = join(output_dir, "{}_bin_edges.npy".format(d_name))
        np.save(bin_edges_fpath, output_binning[d_name])

    # If we find the removed dimension's bin edges in output dir, remove that file
    bin_edges_fpath = join(output_dir, "{}_bin_edges.npy".format(dim_name))
    if isfile(bin_edges_fpath):
        remove(bin_edges_fpath)

    output_shape = tuple(n for i, n in enumerate(input_table.shape) if i != dim_num)
    output_table = np.empty(shape=output_shape, dtype=input_table.dtype)
    #output_table = np.memmap(
    #    output_file, dtype=input_table.dtype, mode="w+", shape=output_shape
    #)

    # Perform the summation over the dimension to be removed

    # Note that setting dtype to float64 causes accumulator to be double
    # precision, even if output table is not
    input_table.sum(axis=dim_num, dtype=np.float64, out=output_table)

    np.save(output_file, output_table)
Esempio n. 3
0
def combine_table_clusters(
    cluster_file,
    source_path_proto,
    basedir,
    cluster_idx,
    t_is_residual_time=None,
    overwrite=False,
):
    """Combine clustered tables together.

    The file should be Numpy .npy format and contain a structured Numpy array
    with field 'label'; subsequent fields are used to format the source table
    filename prototype (e.g. fields 'string' and 'dom' can be used if prototype
    is "table_{string}_{dom}.fits").

    Output table files are stored to a `cl{cluster_idx}` subdirectory within
    the specified directory.

    Parameters
    ----------
    cluster_file : str
    source_path_proto : str
    basedir : str
    cluster_idx : int
    t_is_residual_time : bool, optional
    overwrite : bool, optional

    """
    cluster_file = expand(cluster_file)
    outdir = join(expand(basedir), 'cl{}'.format(cluster_idx))

    clusters = np.load(cluster_file)
    labels = clusters['label']
    members = clusters[labels == cluster_idx]
    assert len(members) > 0

    omkeys = np.empty(len(members), dtype=OMKEY_T)
    if 'dom' in members.dtype.names:
        omkeys[['string', 'om']] = members[['string', 'dom']]
        omkeys[:]['pmt'] = 0
    else:
        omkeys[['string', 'om', 'pmt']] = members[['string', 'om', 'pmt']]

    table_fpaths = []
    names = members.dtype.names
    for member in members:
        pathspec = expand(source_path_proto.format(**dict(zip(names, member))))
        fpaths = glob(pathspec)
        if len(fpaths) == 0:
            raise ValueError('Cannot file(s) "{}"'.format(pathspec))
        table_fpaths.extend(fpaths)

    combine_clsim_tables(
        table_fpaths=table_fpaths,
        t_is_residual_time=t_is_residual_time,
        outdir=outdir,
        overwrite=overwrite,
    )
    np.save(join(outdir, 'omkeys.npy'), omkeys)
Esempio n. 4
0
def find_unique_gcds():
    """Find unique GCD files in data"""
    with open(expand("~/all_data_gcd_files.txt"), "r") as f:
        fpaths = [expand(l.strip()) for l in f.readlines()]

    original_num_files = len(fpaths)
    root_infos = {}
    original_size = 0
    final_size = 0
    for fpath in fpaths:
        base = basename(fpath)
        size = getsize(fpath)
        fname_info = GENERIC_I3_FNAME_RE.match(base).groupdict()
        root = fname_info["base"]
        compext = fname_info.get("compext", None)
        original_size += size
        if root not in root_infos:
            root_infos[root] = []
            final_size += size

        root_infos[root].append(
            dict(
                fpath=fpath,
                base=base,
                root=root,
                compext=compext,
                size=size,
            ))

    root_infos = OrderedDict([
        (rn, root_infos[rn])
        for rn in sorted(root_infos.keys(), key=nsort_key_func)
    ])
    final_num_files = len(root_infos)

    #unequal_sizes = False
    #for rn, finfos in root_infos.items():
    #    file_paths = []
    #    file_sizes = []
    #    for fpath, fsize in finfos:
    #        file_paths.append(fsize)
    #        file_sizes.append(fsize)
    #    file_sizes = np.array(file_sizes)
    #    if not np.all(file_sizes == file_sizes[0]):
    #        unequal_sizes = True
    #        for file_path, file_size in finfos:
    #            print("{:14d} b : {}".format(file_size, file_path))

    print("original number of files = {}, final = {}".format(
        original_num_files, final_num_files))

    print("original size = {:.0f} GiB, final size = {:.0f} GiB".format(
        original_size / (1024**3), final_size / (1024**3)))

    return root_infos
Esempio n. 5
0
    def load_stacked_tables(
        self,
        stacked_tables_meta_fpath,
        stacked_tables_fpath,
        stacked_t_indep_tables_fpath,
        mmap_tables=False,
        mmap_t_indep=False,
    ):
        if self.is_stacked is not None:
            assert self.is_stacked

        stacked_tables_meta_fpath = expand(stacked_tables_meta_fpath)
        stacked_tables_fpath = expand(stacked_tables_fpath)
        stacked_t_indep_tables_fpath = expand(stacked_t_indep_tables_fpath)

        tables_mmap_mode = 'r' if mmap_tables else None
        t_indep_mmap_mode = 'r' if mmap_t_indep else None

        self.table_meta = load_pickle(stacked_tables_meta_fpath)
        self.tables = np.load(stacked_tables_fpath, mmap_mode=tables_mmap_mode)
        self.tables.setflags(write=False, align=True, uic=False)
        num_tables = self.tables.shape[0]

        self.t_is_residual_time = bool(
            self.table_meta.get('t_is_residual_time', False))

        self.t_indep_tables = np.load(stacked_t_indep_tables_fpath,
                                      mmap_mode=t_indep_mmap_mode)
        self.t_indep_tables.setflags(write=False, align=True, uic=False)
        assert self.t_indep_tables.shape[0] == num_tables

        self.sd_idx_table_indexer = deepcopy(
            self.table_meta['sd_idx_table_indexer'])
        self.sd_idx_table_indexer.setflags(write=False, align=True, uic=False)

        self.loaded_sd_indices = np.where(self.sd_idx_table_indexer >= 0)[0]
        self.n_photons_per_table = self.table_meta['n_photons_per_table']

        # Note that in creating the stacked tables, each indiividual table
        # is scaled such that the effective number of photons used to generate
        # the table is one (to avoid different norms across the tables if
        # different number of photons was used originally to create each).
        self.table_norm, self.t_indep_table_norm = get_table_norm(
            avg_angsens=self.avg_angsens,
            quantum_efficiency=1,
            norm_version=self.norm_version,
            **{k: self.table_meta[k]
               for k in TABLE_NORM_KEYS})

        self.table_norms = [self.table_norm] * num_tables
        self.t_indep_table_norms = [self.t_indep_table_norm] * num_tables

        self.is_stacked = True
Esempio n. 6
0
    def __init__(self,
                 tables_dir,
                 hash_val,
                 geom,
                 use_directionality,
                 ic_exponent=1,
                 dc_exponent=1,
                 naming_version=None):
        # Translation and validation of args
        tables_dir = expand(tables_dir)
        assert isdir(tables_dir)
        assert len(geom.shape) == 3
        assert isinstance(use_directionality, bool)
        assert ic_exponent >= 0
        assert dc_exponent >= 0
        if naming_version is None:
            naming_version = len(RETRO_DOM_TABLE_FNAME_PROTO) - 1
        self.naming_version = naming_version
        self.dom_table_fname_proto = RETRO_DOM_TABLE_FNAME_PROTO[
            naming_version]

        self.tables_dir = tables_dir
        self.hash_val = hash_val
        self.geom = geom
        self.use_directionality = use_directionality
        self.ic_exponent = ic_exponent
        self.dc_exponent = dc_exponent
        self.tables = {'ic': {}, 'dc': {}}
        self.bin_edges = {'ic': {}, 'dc': {}}
Esempio n. 7
0
def load_angsens_model(model):
    """Load an angular sensitivity model.

    Note that this tries to look in current directory, in Retro data
    directory, and in the $I3_SRC directory--if it is defined--for the model
    named `model` _and_ `model` prefixed by "as." if it is not already.

    Returns
    -------
    angsens_poly : numpy.polynomial.Polynomial
    avg_angsens : float

    """
    models = [model]
    if not basename(model).startswith('as.'):
        models += [join(dirname(model), 'as.' + basename(model))]

    possible_dirs = [
        '.',
        join(RETRO_DIR, 'data'),
    ]
    if 'I3_SRC' in os.environ:
        possible_dirs.append('$I3_SRC/ice-models/resources/models/angsens')

    possible_paths = []
    for model_name in models:
        possible_paths += [join(d, model_name) for d in possible_dirs]

    coeffs_loaded = False
    for path in possible_paths:
        path = expand(path)
        if not isfile(path):
            continue
        # The first number in the file is approximately equal (but greater
        # than) the peak in the distribution, used for scaling before
        # rejection sampling, so is useless for us (and makes simulation
        # less efficient).
        poly_coeffs = np.loadtxt(path)[1:]
        coeffs_loaded = True
        break

    if not coeffs_loaded:
        raise ValueError('Could not load hole ice model at any of\n{}'.format(
            possible_paths))

    # We want coszen = -1 to correspond to upgoing particles, but angular
    # sensitivity is given w.r.t. the DOM axis (which points "down" towards
    # earth, and therefore is rotated 180-deg). So rotate the coszen
    # polynomial about cz=0 by negating the odd coefficients (coeffs are in
    # ascending powers of "x").
    flipped_coeffs = np.empty_like(poly_coeffs)
    flipped_coeffs[0::2] = poly_coeffs[0::2]
    flipped_coeffs[1::2] = -poly_coeffs[1::2]
    angsens_poly = np.polynomial.Polynomial(flipped_coeffs, domain=(-1, 1))

    integral_poly = angsens_poly.integ(m=1)
    avg_angsens = (integral_poly(1) - integral_poly(-1)) / 2

    return angsens_poly, avg_angsens
Esempio n. 8
0
def find_problematic_pulses(indir, pulse_series):
    """Find missing, bad, or old extracted pulse series and print the paths of
    the corresponding events directories.

    Parameters
    ----------
    indir : str
    pulse_series : str or iterable thereof

    """
    if isinstance(pulse_series, str):
        pulse_series = [pulse_series]
    indir = expand(indir)

    for dirpath, dirs_, files in walk(indir, followlinks=True):
        if "events.npy" in files:
            dirs_.clear()
        else:
            dirs_.sort(key=nsort_key_func)
            files.sort(key=nsort_key_func)

            for fname in files:
                match = OSCNEXT_FNAME_RE.match(fname)
                if not match:
                    continue

                i3f_dname = join(dirpath, match.groupdict()["basename"])
                if isdir(i3f_dname):
                    if not isfile(join(i3f_dname, "events.npy")):
                        print(i3f_dname)
                else:
                    print(i3f_dname)

            continue

        sys.stderr.write(".")
        sys.stderr.flush()

        # If any one of the named pulse series are missing or bad, record
        # the path and move on without checking the other pulse series
        for ps_name in pulse_series:
            pulses_fpath = join(dirpath, "pulses", ps_name + ".pkl")
            if not isfile(pulses_fpath):
                print(dirpath)
                break
            try:
                pulses = load_pickle(pulses_fpath)
                if len(pulses) > 0 and "flags" not in pulses[0][0][1].dtype.names:
                    print(dirpath)
                    break
            except Exception:
                print(dirpath)
                break
Esempio n. 9
0
def get_estimate(fpath, verbose=True):
    """Get estimate from llhp.npy file"""
    fpath = expand(fpath)
    llhp = np.load(fpath)
    estimate = estimate_from_llhp(llhp)
    if verbose:
        for dim, est in estimate.items():
            mean, low, high = est['mean'], est['low'], est['high']
            print(
                '{:s} : mean = {:9.3f} ; 95% interval = [{:9.3f}, {:9.3f}] {}'.
                format(dim.rjust(20), mean, low, high, UNITS[dim]))
    return llhp, estimate
Esempio n. 10
0
def centralize_gcds(root_infos, gcd_dir=GCD_DIR):
    """Move GCD files to a single directory, if they don't already exist there.

    Compression extensions should be ignored, so only one version of each GCD
    exists.

    Parameters
    ----------
    root_infos : mapping
    gcd_dir : str, optional

    """
    gcd_dir = expand(gcd_dir)
    mkdir(gcd_dir)

    existing_fnames = os.listdir(gcd_dir)
    existing_roots = set()
    for fname in existing_fnames:
        match = GENERIC_I3_FNAME_RE.match(fname)
        if not match:
            continue
        groupdict = match.groupdict()
        existing_roots.add(groupdict["base"])

    for root, infos in root_infos.items():
        for info in infos:
            is_link = islink(info["fpath"])
            is_file = isfile(info["fpath"])

            if is_link:
                if is_file:  # link to an existing file
                    if root not in existing_roots:
                        shutil.copy2(info["fpath"],
                                     gcd_dir,
                                     follow_symlinks=True)
                        existing_roots.add(root)
                else:  # bad link (to nothing, or to a directory)
                    if not isdir(info["fpath"]):
                        print(f'os.remove({info["fpath"]})')
                        os.remove(info["fpath"])
            else:
                if root in existing_roots:
                    if is_file:
                        print(f'os.remove({info["fpath"]})')
                        os.remove(info["fpath"])
                else:
                    print(f'shutil.move({info["fpath"]}, {gcd_dir})')
                    shutil.move(info["fpath"], gcd_dir)
                    existing_roots.add(root)
Esempio n. 11
0
def main():
    """Main function for calling summarize_clsim_table as a script"""
    t0 = time()
    args = parse_args()
    kwargs = vars(args)
    table_fpaths = []
    for fpath in kwargs.pop('table-fpaths'):
        table_fpaths.extend(glob(expand(fpath)))
    for fpath in table_fpaths:
        kwargs['table_fpath'] = fpath
        summarize_clsim_table(**kwargs)
    total_time = time() - t0
    if len(table_fpaths) > 1:
        avg = np.round(total_time / len(table_fpaths), 3)
        wstderr('Average time to summarize tables: {} s/table\n'.format(avg))
Esempio n. 12
0
def concatenate_recos_and_save(outfile, **kwargs):
    """Concatenate recos and save to a file.

    Parameters
    ----------
    outfile : str
    **kwargs
        Arguments passed to `concatenate_recos`

    """
    outfile = expand(outfile)
    out_array = concatenate_recos(**kwargs)
    outdir = dirname(outfile)
    if not isdir(outdir):
        mkdir(outdir)
    np.save(outfile, out_array)
    sys.stdout.write('Saved concatenated array to "{}"\n'.format(outfile))
Esempio n. 13
0
def main():
    """Load file specified on command line and print results of parsing it."""
    with open(expand(sys.argv[1]), 'r') as f:
        contents = f.readlines()

    try:
        points, errors = parse(contents)
    except ValueError:
        print('Failed to parse file "{}"'.format(sys.argv[1]))
        raise

    for dim in 't x y z track_zenith track_azimuth track_energy cascade_energy'.split():
        pt = points[dim]
        sd = errors[dim]
        if dim in ['track_zenith', 'track_azimuth']:
            pt = np.rad2deg(pt)
            sd = np.rad2deg(sd)

        print('{:14s} = {:8.3f} +/- {:5.1f} {}'.format(dim, pt, sd, UNITS[dim]))
Esempio n. 14
0
def setup_tdi_tables(tdi=None, mmap=False):
    """Load and instantiate (Cherenkov) TDI tables.

    Parameters
    ----------
    tdi : sequence of strings, optional
        Path to TDI tables' `ckv_tdi_table.npy` files, or paths to
        directories containing those files; one entry per TDI table

    mmap : bool

    Returns
    -------
    tdi_tables : tuple of 0 or more numpy arrays
    tdi_metas : tuple of 0 or more OrderedDicts

    """
    if tdi is None:
        return (), ()

    mmap_mode = 'r' if mmap else None

    tdi_tables = []
    tdi_metas = []
    for tdi_ in tdi:
        if tdi_ is None:
            continue
        tdi_ = expand(tdi_)
        if isdir(tdi_):
            tdi_ = join(tdi_, 'ckv_tdi_table.npy')

        print('Loading and instantiating TDI table at "{}"'.format(tdi_))

        be = load_pickle(join(dirname(tdi_), 'tdi_bin_edges.pkl'))
        meta = load_pickle(join(dirname(tdi_), 'tdi_metadata.pkl'))
        meta['bin_edges'] = be
        tdi_table = np.load(tdi_, mmap_mode=mmap_mode)

        tdi_metas.append(meta)
        tdi_tables.append(tdi_table)

    return tuple(tdi_tables), tuple(tdi_metas)
Esempio n. 15
0
def find_gcds_in_dirs(dirs, gcd_fname_re, recurse=True):
    """Find data run GCD files in directories.

    Parameters
    ----------
    dirs : str or iterable thereof
    recurse : bool

    Returns
    -------
    data_run_gcds : dict
        Keys are <tuple>(<str>2-digit season, <str>run number) and values are
        <str> path to corresponding GCD file

    """
    if isinstance(dirs, str):
        dirs = [dirs]
    dirs = [expand(rootdir) for rootdir in dirs]

    data_run_gcds = {}
    for rootdir in dirs:
        for dirpath, subdirs, files in walk(rootdir):
            if recurse:
                subdirs.sort(key=nsort_key_func)
            else:
                del subdirs[:]
            files.sort(key=nsort_key_func)

            for fname in files:
                gcd_match = gcd_fname_re.match(fname)
                if gcd_match:
                    gcd_groupdict = gcd_match.groupdict()
                    # get 2 digit year
                    year = "{:02d}".format(int(gcd_groupdict["year"]) % 2000)
                    key = (year, gcd_groupdict["run"])
                    # prefer "levelXpassY_* gcd files
                    if key in data_run_gcds and gcd_groupdict["pass"] is None:
                        continue
                    data_run_gcds[key] = join(dirpath, fname)

    return data_run_gcds
Esempio n. 16
0
def generate_stacked_tables(outdir, dom_tables_kw):
    """Stack a set of tables into a single numpy array for use of all tables in
    Numba.

    Currently, only ckv_templ_compr tables are supported.

    Parameters
    ----------
    outdir : string
        Path ot directory into which the three resulting files will be stored.

    dom_tables_kw : mapping
        As returned by retro.init_obj.parse_args

    """
    if dom_tables_kw['dom_tables_kind'] != 'ckv_templ_compr':
        raise NotImplementedError(
            '"{}" tables not supported; only "ckv_templ_compr"'
            .format(dom_tables_kw['dom_tables_kind'])
        )

    # Use the convenience function to load the single-DOM tables into a
    # retro_5d_tables.Retro5DTables object, and then we can use the loaded
    # tables from there.
    dom_tables = init_obj.setup_dom_tables(**dom_tables_kw)

    assert np.all(dom_tables.sd_idx_table_indexer >= 0)

    table_meta = OrderedDict()
    table_meta['table_kind'] = dom_tables.table_kind
    table_meta['sd_idx_table_indexer'] = dom_tables.sd_idx_table_indexer
    table_meta.update(dom_tables.table_meta)
    table_meta['n_photons'] = 1.0
    table_meta['n_photons_per_table'] = np.array(dom_tables.n_photons_per_table)

    outdir = expand(outdir)
    mkdir(outdir)

    fpath = join(outdir, 'stacked_{}_meta.pkl'.format(dom_tables.table_name))
    sys.stdout.write('Writing metadata to "{}" ...'.format(fpath))
    sys.stdout.flush()
    pickle.dump(
        table_meta,
        file(fpath, 'wb'),
        protocol=pickle.HIGHEST_PROTOCOL
    )
    sys.stdout.write(' done.\n')
    sys.stdout.flush()

    if dom_tables.compute_t_indep_exp:
        # Renormalize to 1 photon
        stacked_t_indep_tables = np.stack(
            [tbl/n for tbl, n in zip(dom_tables.t_indep_tables, dom_tables.n_photons_per_table)]
        )
        fpath = join(
            outdir,
            'stacked_{}.npy'.format(dom_tables.t_indep_table_name)
        )
        sys.stdout.write('Writing stacked t_indep tables to "{}" ...'
                         .format(fpath))
        sys.stdout.flush()
        np.save(fpath, stacked_t_indep_tables)
        sys.stdout.write(' done.\n')
        sys.stdout.flush()

    # Renormalize to 1 photon
    for template_map, n_photons in zip(dom_tables.tables, dom_tables.n_photons_per_table):
        template_map['weight'] /= n_photons

    stacked_tables = np.stack(dom_tables.tables)
    fpath = join(outdir, 'stacked_{}.npy'.format(dom_tables.table_name))
    sys.stdout.write('Writing stacked tables to "{}" ...'.format(fpath))
    sys.stdout.flush()
    np.save(fpath, stacked_tables)
    sys.stdout.write(' done.\n')
    sys.stdout.flush()
Esempio n. 17
0
def load_clsim_table_minimal(fpath, mmap=False, include_overflow=False):
    """Load a CLSim table from disk (optionally compressed with zstd).

    Similar to the `load_clsim_table` function but the full table, including
    under/overflow bins, is kept and no normalization or further processing is
    performed on the table data besides populating the ouptput OrderedDict.

    Parameters
    ----------
    fpath : string
        Path to file to be loaded. If the file has extension 'zst', 'zstd', or
        'zstandard', the file will be decompressed using the `python-zstandard`
        Python library before passing to `fits` for interpreting.

    mmap : bool, optional
        Whether to memory map the table

    include_overflow : bool, optional
        By default, overflow bins (if present) are removed

    Returns
    -------
    table : OrderedDict

    """
    t0 = time()

    table = OrderedDict()
    fpath = expand(fpath)

    if DEBUG:
        wstderr('Loading table from {} ...\n'.format(fpath))

    if isdir(fpath):
        indir = fpath
        if mmap:
            mmap_mode = 'r'
        else:
            mmap_mode = None

        for rel_fpath in listdir(indir):
            key, ext = splitext(rel_fpath)
            abs_fpath = join(indir, rel_fpath)

            if not (isfile(abs_fpath) and ext == '.npy'):
                continue

            if DEBUG:
                wstderr('    loading {} from "{}" ...'.format(key, abs_fpath))

            t1 = time()
            val = np.load(abs_fpath, mmap_mode=mmap_mode)

            # Pull "small" things (less than 10 MiB) into memory so we don't
            # have too many file handles open due to memory mapping
            if mmap and val.nbytes < 10 * 1024**2:
                val = np.copy(val)

            table[key] = val

            if DEBUG:
                wstderr(' ({} ms)\n'.format(np.round((time() - t1) * 1e3, 3)))

    elif isfile(fpath):
        from astropy.io import fits
        fobj = get_decompressd_fobj(fpath)
        pf_table = None
        try:
            pf_table = fits.open(fobj, mode='readonly', memmap=mmap)

            header = pf_table[0].header  # pylint: disable=no-member
            table['table_shape'] = np.array(pf_table[0].data.shape, dtype=int)  # pylint: disable=no-member
            table['group_refractive_index'] = set_explicit_dtype(
                force_little_endian(header['_i3_n_group']))
            table['phase_refractive_index'] = set_explicit_dtype(
                force_little_endian(header['_i3_n_phase']))

            n_dims = len(table['table_shape'])

            new_style = False
            axnames = [None] * n_dims
            binning = [None] * n_dims
            for key in header.keys():
                if not key.startswith('_i3_ax_'):
                    continue
                new_style = True
                axnum = header[key]
                axname = key[len('_i3_ax_'):]
                be0 = header['_i3_{}_min'.format(axname)]
                be1 = header['_i3_{}_max'.format(axname)]
                n_bins = header['_i3_{}_n_bins'.format(axname)]
                power = header.get('_i3_{}_power'.format(axname), 1)
                bin_edges = force_little_endian(pf_table[axnum + 1].data)  # pylint: disable=no-member
                assert np.isclose(bin_edges[0],
                                  be0), '%f .. %f' % (be0, bin_edges[0])
                assert np.isclose(bin_edges[-1],
                                  be1), '%f .. %f' % (be1, bin_edges[-1])
                assert len(bin_edges) == n_bins + 1, '%d vs. %d' % (
                    len(bin_edges), n_bins + 1)
                assert np.allclose(
                    bin_edges,
                    powerspace(start=be0,
                               stop=be1,
                               num=n_bins + 1,
                               power=power),
                )
                axnames[axnum] = axname
                binning[axnum] = bin_edges

            if not new_style:
                if n_dims == 5:
                    axnames = [
                        'r', 'costheta', 't', 'costhetadir', 'deltaphidir'
                    ]
                elif n_dims == 6:
                    axnames = [
                        'r', 'costheta', 'phi', 't', 'costhetadir',
                        'deltaphidir'
                    ]
                else:
                    raise NotImplementedError(
                        '{}-dimensional table not handled for old-style CLSim'
                        ' tables'.format(n_dims))
                binning = [
                    force_little_endian(pf_table[i + 1].data).flat
                    for i in range(len(axnames))
                ]  # pylint: disable=no-member

            for axnum, (axname, bin_edges) in enumerate(zip(axnames, binning)):
                assert axname is not None, 'missing axis %d name' % axnum
                assert bin_edges is not None, 'missing axis %d binning' % axnum

            dtype = np.dtype([(axname, np.float64, dim.size)
                              for axname, dim in zip(axnames, binning)])
            table['binning'] = np.array(tuple(binning), dtype=dtype)

            for keyroot in GENERIC_KEYS:
                keyname = '_i3_' + keyroot
                if keyname in header:
                    val = force_little_endian(header[keyname])
                    if keyroot in (
                            't_is_residual_time',
                            'disable_tilt',
                            'disable_anisotropy',
                    ):
                        val = np.bool8(val)
                    else:
                        val = set_explicit_dtype(val)
                    table[keyroot] = val

            # Get string values from keys that have a prefix preceded by the
            # value all in the key (I3 software had issues saving strings as
            # values in the header "dict" so the workaround was to store the
            # string value in this way)
            for infix in INFIX_KEYS:
                keyroot = '_i3_' + infix + '_'
                for keyname in header.keys():
                    if not keyname.startswith(keyroot):
                        continue
                    val = keyname[len(keyroot):]
                    table[infix] = np.string0(val)

            if include_overflow:
                slicer = (slice(None), ) * n_dims
            else:
                slicer = (slice(1, -1), ) * n_dims
            table['table'] = force_little_endian(pf_table[0].data[slicer])  # pylint: disable=no-member

            wstderr('    (load took {} s)\n'.format(np.round(time() - t0, 3)))

        except:
            wstderr('ERROR: Failed to load "{}"\n'.format(fpath))
            raise

        finally:
            del pf_table
            if hasattr(fobj, 'close'):
                fobj.close()
            del fobj

    else:  # fpath is neither dir nor file
        raise ValueError('Table does not exist at path "{}"'.format(fpath))

    if 'step_length' not in table:
        table['step_length'] = 1

    if 't_is_residual_time' not in table:
        table['t_is_residual_time'] = True

    if DEBUG:
        wstderr('  Total time to load: {} s\n'.format(np.round(time() - t0,
                                                               3)))

    return table
Esempio n. 18
0
def generate_time_indep_tables(table,
                               outdir=None,
                               kinds=('clsim', 'ckv'),
                               overwrite=False):
    """Generate and save to disk time independent table(s) from the original
    CLSim table and/or a Cherenkov table.

    Parameters
    ----------
    table : string
    outdir : string, optional
    kinds : string, optional
    overwrite : bool, optional

    Returns
    -------
    t_indep_table : numpy.ndarray of size (n_r, n_costheta, n_costhetadir, n_deltaphidir)

    """
    if isinstance(kinds, basestring):
        kinds = [kinds]
    kinds = [k.strip().lower() for k in kinds]

    clsim_table_path = None
    ckv_table_path = None

    table = expand(table)
    if outdir is None:
        if isdir(table):
            outdir = table
        elif table.endswith('.npy'):
            outdir = dirname(table)
        elif table.endswith('.fits'):
            outdir = table.rstrip('.fits')

    if isfile(table):
        table_basename = basename(table)
        if table_basename == 'table.npy' or table_basename.endswith('.fits'):
            clsim_table_path = table
        elif table_basename == 'ckv_table.npy':
            ckv_table_path = table

    elif isdir(table):
        if 'clsim' in kinds and isfile(join(table, 'table.npy')):
            clsim_table_path = table

        if 'ckv' in kinds and isfile(join(table, 'ckv_table.npy')):
            ckv_table_path = table

    t_indep_table_exists = False
    if 'clsim' in kinds and isfile(join(outdir, 't_indep_table.npy')):
        t_indep_table_exists = True

    t_indep_ckv_table_exists = False
    if 'ckv' in kinds and isfile(join(outdir, 't_indep_ckv_table.npy')):
        t_indep_ckv_table_exists = True

    if 'clsim' in kinds and (overwrite or not t_indep_table_exists):
        if clsim_table_path is None:
            raise ValueError(
                'Told to generate t-indep table from CLSim table but CLSim'
                ' table does not exist.')
        print('generating t_indep_table')
        mkdir(outdir)
        t0 = time.time()

        clsim_table = load_clsim_table_minimal(clsim_table_path, mmap=True)

        t1 = time.time()
        if retro.DEBUG:
            print('loaded clsim table in {:.3f} s'.format(t1 - t0))

        t_indep_table = clsim_table['table'][1:-1, 1:-1, 1:-1, 1:-1,
                                             1:-1].sum(axis=2)

        t2 = time.time()
        if retro.DEBUG:
            print('summed over t-axis in {:.3f} s'.format(t2 - t1))

        np.save(join(outdir, 't_indep_table.npy'), t_indep_table)

        t3 = time.time()
        if retro.DEBUG:
            print('saved t_indep_table.npy to disk in {:.3f} s'.format(t3 -
                                                                       t2))

        del clsim_table, t_indep_table

    if 'ckv' in kinds and (overwrite or not t_indep_ckv_table_exists):
        if ckv_table_path is None:
            raise ValueError(
                'Told to generate t-indep table from ckv table but ckv'
                ' table does not exist.')
        print('generating t_indep_ckv_table')
        mkdir(outdir)
        t0 = time.time()

        ckv_table = load_ckv_table(ckv_table_path, mmap=True)

        t1 = time.time()
        if retro.DEBUG:
            print('loaded ckv table in {:.3f} s'.format(t1 - t0))

        t_indep_ckv_table = ckv_table['ckv_table'].sum(axis=2)

        t2 = time.time()
        if retro.DEBUG:
            print('summed over t-axis in {:.3f} s'.format(t2 - t1))

        np.save(join(outdir, 't_indep_ckv_table.npy'), t_indep_ckv_table)

        t3 = time.time()
        if retro.DEBUG:
            print('saved t_indep_table.npy to disk in {:.3f} s'.format(t3 -
                                                                       t2))

        del ckv_table, t_indep_ckv_table
Esempio n. 19
0
def load_t_r_theta_table(fpath,
                         depth_idx,
                         scale=1,
                         exponent=1,
                         photon_info=None):
    """Extract info from a file containing a (t, r, theta)-binned Retro table.

    Parameters
    ----------
    fpath : string
        Path to FITS file corresponding to the passed ``depth_idx``.

    depth_idx : int
        Depth index (e.g. from 0 to 59)

    scale : float
        Scaling factor to apply to the photon survival probability from the
        table, e.g. for quantum efficiency. This is applied _before_
        `exponent`. See `Notes` for more info.

    exponent : float >= 0, optional
        Modify probabilties in the table by ``prob = 1 - (1 - prob)**exponent``
        to allow for up- and down-scaling the efficiency of the DOMs. This is
        applied to each DOM's table _after_ `scale`. See `Notes` for more
        info.

    photon_info : None or RetroPhotonInfo namedtuple of dicts
        If None, creates a new RetroPhotonInfo namedtuple with empty dicts to
        fill. If one is provided, the existing component dictionaries are
        updated.

    Returns
    -------
    photon_info : RetroPhotonInfo namedtuple of dicts
        Tuple fields are 'survival_prob', 'theta', 'phi', and 'length'. Each
        dict is keyed by `depth_idx` and values are the arrays loaded
        from the FITS file.

    bin_edges : TimeSphCoord namedtuple
        Each element of the tuple is an array of bin edges.

    Notes
    -----
    The parameters `scale` and `exponent` modify a table's probability `P` by::

        P = 1 - (1 - P*scale)**exponent

    This allows for `scale` (which must be from 0 to 1) to be used for e.g.
    quantum efficiency--which always reduces the detection probability--and
    `exponent` (which must be 0 or greater) to be used as a systematic that
    modifies the post-`scale` probabilities up and down while keeping them
    valid (i.e., between 0 and 1). Larger values of `scale` (i.e., closer to 1)
    indicate a more efficient DOM. Likewise, values of `exponent` greater than
    one scale up the DOM efficiency, while values of `exponent` between 0 and 1
    scale the efficiency down.

    """
    # pylint: disable=no-member
    from astropy.io import fits

    assert 0 <= scale <= 1
    assert exponent >= 0

    if photon_info is None:
        empty_dicts = []
        for _ in RetroPhotonInfo._fields:
            empty_dicts.append({})
        photon_info = RetroPhotonInfo(*empty_dicts)

    with fits.open(expand(fpath)) as table:
        data = force_little_endian(table[0].data)

        if scale == exponent == 1:
            photon_info.survival_prob[depth_idx] = data
        else:
            photon_info.survival_prob[depth_idx] = (
                1 - (1 - data * scale)**exponent)

        photon_info.theta[depth_idx] = force_little_endian(table[1].data)

        photon_info.deltaphi[depth_idx] = force_little_endian(table[2].data)

        photon_info.length[depth_idx] = force_little_endian(table[3].data)

        # Note that we invert (reverse and multiply by -1) time edges; also,
        # no phi edges are defined in these tables.
        data = force_little_endian(table[4].data)
        t = -data[::-1]

        r = force_little_endian(table[5].data)

        # Previously used the following to get "agreement" w/ raw photon sim
        #r_volumes = np.square(0.5 * (r[1:] + r[:-1]))
        #r_volumes = (0.5 * (r[1:] + r[:-1]))**2 * (r[1:] - r[:-1])
        r_volumes = 0.25 * (r[1:]**3 - r[:-1]**3)

        photon_info.survival_prob[depth_idx] /= r_volumes[np.newaxis, :,
                                                          np.newaxis]

        photon_info.time_indep_survival_prob[depth_idx] = np.sum(
            photon_info.survival_prob[depth_idx], axis=0)

        theta = force_little_endian(table[6].data)

        bin_edges = TimeSphCoord(t=t,
                                 r=r,
                                 theta=theta,
                                 phi=np.array([], dtype=t.dtype))

    return photon_info, bin_edges
Esempio n. 20
0
def plotit(toplot, outdir=None):
    """
    Parameters
    ----------
    toplot : sequence of (events, str) tuples
        Each events should be 
    """
    plot_recos = [
        #"LineFit_DC",
        #"L5_SPEFit11",
        "retro_crs_prefit",
        "retro_mn8d",
        "Pegleg_Fit_MN",
    ][::-1]
    plot_params = [
        "x",
        "y",
        "z",
        "time",
        "azimuth",
        "zenith",
        "coszen",
        "energy",
        "track_energy",
        "track_zenith",
        "track_coszen",
        "track_azimuth",
    ]
    use_weights = False
    n_bins = 71
    n_ebins = 10
    ebin_edges = np.logspace(0, 3, n_ebins + 1)

    lower_disp_pctile, upper_disp_pctile = 2.5, 97.5
    lower_pct, upper_pct = 25, 75
    iq_pct = upper_pct - lower_pct

    longest_recolen = 0
    for reco in plot_recos:
        longest_recolen = max(longest_recolen, len(reco))
        recos = toplot[reco]["recos"]
        truth = toplot[reco]["truth"]
        if recos.dtype.names and "run_time" in recos.dtype.names:
            print("Mean run time, {}: {:.1f} s; mean energy = {:.1f}".format(
                reco, recos["run_time"].mean(), truth["energy"].mean()))

    #nx = int(np.ceil(np.round(np.sqrt(len(plot_params)*2)/2)*2))
    nx = 4
    ny = int(np.ceil(len(plot_params) * 4 / nx))
    f = 1.5

    fig, axes = plt.subplots(ny,
                             nx,
                             figsize=(4 * f * nx, 3 * f * ny),
                             dpi=120,
                             squeeze=False)
    axit = iter(axes.flat)

    for param in plot_params:
        err_lower, err_upper = np.inf, -np.inf
        lower, upper = np.inf, -np.inf
        stuff = OrderedDict()

        plabel = LABELS[param] if param in LABELS else param
        ulabel = " ({})".format(UNITS[param]) if param in UNITS else ""
        bare_ulabel = " {}".format(UNITS[param]) if param in UNITS else ""

        for reco in plot_recos:
            rinfo = toplot[reco]
            try:
                if param == "energy":
                    track_energy = get_pval(rinfo["recos"], "track_energy")
                    cascade_energy = get_pval(rinfo["recos"], "cascade_energy")
                    recos = track_energy + 2. * cascade_energy
                else:
                    recos = get_pval(rinfo["recos"], param)
                truth = get_pval(rinfo["truth"], param)
            except:
                print('exception', param, reco)
                continue
            recos = get_point_estimate(recos,
                                       estimator="median",
                                       expect_scalar=False)
            if not np.all(np.isfinite(recos)):
                n_nonfinite = np.count_nonzero(
                    np.logical_not(np.isfinite(recos)))
                print('not all finite: {}, {}: {} / {} not finite'.format(
                    param, reco, n_nonfinite, recos.size))
                continue
            weight = rinfo['truth']['weight']
            if "azimuth" in param:
                error = (recos - truth + np.pi) % (2 * np.pi) - np.pi
            elif "energy" in param:
                error = recos / truth - 1
            else:
                error = recos - truth

            stuff[reco] = (recos, truth, error, weight)
            if use_weights:
                lower_, upper_ = weighted_percentile(
                    error[np.isfinite(error)],
                    [lower_disp_pctile, upper_disp_pctile],
                    weight,
                )
            else:
                lower_, upper_ = np.percentile(
                    error[np.isfinite(error)],
                    [lower_disp_pctile, upper_disp_pctile],
                )
            err_lower = np.nanmin([lower_, err_lower])
            err_upper = np.nanmax([upper_, err_upper])

            for array in (truth, recos):
                mask = np.isfinite(array)
                if "energy" in param:
                    mask &= array > 0
                if use_weights:
                    lower_, upper_ = weighted_percentile(
                        array[mask],
                        [lower_disp_pctile, upper_disp_pctile],
                        weight,
                    )
                else:
                    lower_, upper_ = np.percentile(
                        array[mask],
                        [lower_disp_pctile, upper_disp_pctile],
                    )
                lower = np.nanmin([lower_, lower])
                upper = np.nanmax([upper_, upper])

        # -- Plot raw distributions -- #

        ax = next(axit)
        if "energy" in param:
            bins = np.logspace(np.log10(lower), np.log10(upper), n_bins)
            xscale = "log"
        else:
            bins = np.linspace(lower, upper, n_bins)
            xscale = "linear"
        mask = np.isfinite(truth)
        nf = np.count_nonzero(mask)
        if nf != mask.size:
            print(param, "truth", nf, mask.size)

        for reco, (recos, truth, error, weight) in stuff.items():
            mask = np.isfinite(recos)
            nf = np.count_nonzero(mask)
            if nf != mask.size:
                print(reco, recos, nf, mask.size)
            if use_weights:
                pc_lower, median, pc_upper = weighted_percentile(
                    recos[mask],
                    [lower_pct, 50, upper_pct],
                    weight,
                )
            else:
                pc_lower, median, pc_upper = np.percentile(
                    recos[mask], [lower_pct, 50, upper_pct])
            iq = pc_upper - pc_lower
            try:
                _, _, out = ax.hist(
                    recos[mask],
                    weights=weight[mask] if use_weights else None,
                    bins=bins,
                    #label="{}".format(reco.rjust(longest_recolen)),
                    histtype='step',
                    lw=1,
                )
            except:
                print(
                    reco,
                    param,
                    np.all(np.isfinite(recos)),
                    np.nanmin(recos),
                    np.nanmax(recos),
                    lower,
                    upper,
                )
                raise

        recos, truth, error, weight = stuff.values()[0]
        pc_lower, median, pc_upper = np.percentile(truth[mask],
                                                   [lower_pct, 50, upper_pct])
        iq = pc_upper - pc_lower
        _, _, out = ax.hist(
            truth[mask],
            weights=weight[mask] if use_weights else None,
            bins=bins,
            label="MC truth",  #"{}".format("truth".rjust(longest_recolen)),
            histtype='step',
            lw=1.5,
            edgecolor='k',
            zorder=-10,
        )

        ax.set_xlim(lower, upper)
        ax.set_xscale(xscale)
        leg = ax.legend(loc="lower center", frameon=False)
        leg._legend_box.align = "left"
        plt.setp(leg.get_title(), family='monospace')
        ax.set_yticks([])
        ax.set_title("{} distribution, all E".format(param))

        # -- Plot errors across all events -- #

        ax = next(axit)
        bins = np.linspace(err_lower, err_upper, n_bins)
        for reco, (recos, truth, error, weight) in stuff.items():
            mask = np.isfinite(error)
            nf = np.count_nonzero(mask)
            if nf != mask.size:
                print(param, reco, "error", nf, mask.size)

            if use_weights:
                pc_lower, median, pc_upper = weighted_percentile(
                    error[mask],
                    [lower_pct, 50, upper_pct],
                    weight,
                )
            else:
                pc_lower, median, pc_upper = np.percentile(
                    error[mask], [lower_pct, 50, upper_pct])

            iq = pc_upper - pc_lower
            try:
                _, _, out = ax.hist(
                    error[mask],
                    weights=weight[mask] if use_weights else None,
                    bins=bins,
                    label="{} {:6.2f} {:6.2f}".format(
                        reco.rjust(longest_recolen), median, iq),
                    histtype='step',
                    lw=1,
                )
            except:
                print(
                    reco,
                    param,
                    np.all(np.isfinite(error)),
                    np.nanmin(error),
                    np.nanmax(error),
                    lower,
                    upper,
                )
                raise
        ax.set_xlim(err_lower, err_upper)
        ax.set_ylim(0, ax.get_ylim()[1] * 1.3)
        leg = ax.legend(
            loc="upper right",
            title=("{} {} {}".format(" " * longest_recolen, "median",
                                     "IQ {:2d}%".format(iq_pct))),
            markerfirst=False,
            frameon=False,
            framealpha=0.2,
            prop=dict(family='monospace'),
        )
        leg._legend_box.align = "left"
        plt.setp(leg.get_title(), family='monospace')
        ax.set_yticks([])
        if "energy" in param:
            title = "fract {} error, all E".format(param)
        else:
            title = "{} error, all E".format(param)
        ax.set_title(title)

        # -- Plot errors vs. true energy -- #

        ax = next(axit)
        colors = ['C{}'.format(i) for i in range(8)]
        colors_iter = iter(colors)
        for reco, (recos, truth, error, weight) in stuff.items():
            mask = np.isfinite(error)
            true_en = get_pval(toplot[reco]['truth'], "energy")
            idxs = np.digitize(true_en, ebin_edges) - 1
            pc_l = []
            pc_u = []
            for idx in range(n_ebins):
                bin_error = error[(idxs == idx) & mask]
                if use_weights:
                    pc_l_, med_, pc_u_ = weighted_percentile(
                        bin_error, [lower_pct, 50, upper_pct], weight)
                else:
                    pc_l_, med_, pc_u_ = np.percentile(
                        bin_error, [lower_pct, 50, upper_pct])
                pc_l.append(pc_l_)
                pc_u.append(pc_u_)

            color = next(colors_iter)
            #ax.fill_between(
            #    x=ebin_edges,
            #    y1=[pc_l[0]] + pc_l,
            #    y2=[pc_u[0]] + pc_u,
            #    interpolate=False,
            #    step="post",
            #    facecolor='none',
            #    edgecolor=color,
            #)
            ax.step(
                x=ebin_edges,
                y=[pc_l[0]] + pc_l,
                #facecolor='none',
                color=color,
            )
            ax.step(
                x=ebin_edges,
                y=[pc_u[0]] + pc_u,
                #facecolor='none',
                color=color,
            )

            ax.set_xscale('log')
            ax.set_xlim(ebin_edges[0], ebin_edges[-1])
            #if "energy" in param:
            #    title = "fractional {} error [{}, {}]% vs. true E".format(
            #        param, lower_pct, upper_pct
            #    )
            #else:
            #    title = "{} error [{}, {}]% vs true E".format(
            #        param, lower_pct, upper_pct
            #    )
            if "energy" in param:
                title = "Fractional error [{}, {}]% vs. true E".format(
                    lower_pct, upper_pct)
                ax.set_ylabel("Fractional {} error".format(plabel))
            else:
                title = "Error [{}, {}]% vs true E".format(
                    lower_pct, upper_pct)
                ax.set_ylabel("{} error{}".format(plabel, ulabel))

            ax.set_title(title)

        # -- Plot error WIDTHS vs. true energy -- #

        ax = next(axit)
        colors_iter = iter(colors)
        for reco, (recos, truth, error, weight) in stuff.items():
            mask = np.isfinite(error)

            if use_weights:
                pc_l_, pc_u_ = weighted_percentile(error[mask],
                                                   [lower_pct, upper_pct],
                                                   weights=weight[mask])
            else:
                pc_l_, pc_u_ = np.percentile(error[mask],
                                             [lower_pct, upper_pct])
            overall_iq_width = pc_u_ - pc_l_

            true_en = get_pval(toplot[reco]['truth'], "energy")
            idxs = np.digitize(true_en, ebin_edges) - 1
            widths = []
            for idx in range(n_ebins):
                bin_error = error[(idxs == idx) & mask]
                if use_weights:
                    pc_l_, med_, pc_u_ = weighted_percentile(
                        bin_error, [lower_pct, 50, upper_pct], weight)
                else:
                    pc_l_, med_, pc_u_ = np.percentile(
                        bin_error, [lower_pct, 50, upper_pct])
                widths.append(pc_u_ - pc_l_)
            color = next(colors_iter)
            ax.step(
                x=ebin_edges,
                y=[widths[0]] + widths,
                #facecolor='none',
                color=color,
            )
            ax.plot(
                ebin_edges[[0, -1]],
                [overall_iq_width] * 2,
                linestyle='--',
                color=color,
                label=r"IQ{}% width $\forall$ E = {:.2f}{}".format(
                    iq_pct, overall_iq_width, bare_ulabel),
            )

            ax.set_xscale('log')
            ax.set_xlim(ebin_edges[0], ebin_edges[-1])
            if "energy" in param:
                title = "Fractional error IQ{}% width vs true E".format(iq_pct)
                ax.set_ylabel("Fractional {} error width".format(plabel))
            else:
                title = "Error IQ{}% width vs true E".format(iq_pct)
                ax.set_ylabel("{} error width{}".format(plabel, ulabel))
            ax.set_title(title)
            ax.legend(loc="best", frameon=False)

    for ax in axit:
        ax.remove()

    fig.tight_layout(h_pad=1, w_pad=0.01)
    #fig.subplots_adjust(wspace=0.1, hspace=0.25)
    plt.draw()
    plt.show()

    if outdir is not None:
        fbasename = join(expand(outdir), "distributions")
        fig.savefig(fbasename + ".pdf")
        fig.savefig(fbasename + ".png", dpi=120)
Esempio n. 21
0
def combine_tdi_tiles(
    source_dir,
    dest_dir,
    table_hash,
    gcd,
    bin_edges_file,
    tile_spec_file,
):
    """Combine individual time-independent tiles (one produced per DOM) into a single
    TDI table.

    Parameters
    ----------
    source_dir : str
    dest_dir : str
    bin_edges_file : str
    tile_spec_file : str

    """
    source_dir = expand(source_dir)
    dest_dir = expand(dest_dir)
    gcd = expand(gcd)
    bin_edges_file = expand(bin_edges_file)
    tile_spec_file = expand(tile_spec_file)
    mkdir(dest_dir)
    assert isdir(source_dir)
    assert isfile(bin_edges_file)
    assert isfile(tile_spec_file)

    gcd = extract_gcd(gcd)

    bin_edges = load_pickle(bin_edges_file)
    x_edges = bin_edges['x']
    y_edges = bin_edges['y']
    z_edges = bin_edges['z']
    ctdir_edges = bin_edges['costhetadir']
    phidir_edges = bin_edges['phidir']

    n_x = len(x_edges) - 1
    n_y = len(y_edges) - 1
    n_z = len(z_edges) - 1
    n_ctdir = len(ctdir_edges) - 1
    n_phidir = len(phidir_edges) - 1

    n_dir_bins = n_ctdir * n_phidir

    x_bw = (x_edges.max() - x_edges.min()) / n_x
    y_bw = (y_edges.max() - y_edges.min()) / n_y
    z_bw = (z_edges.max() - z_edges.min()) / n_z
    bin_vol = x_bw * y_bw * z_bw

    ctdir_min = ctdir_edges.min()
    ctdir_max = ctdir_edges.max()

    phidir_min = phidir_edges.min()
    phidir_max = phidir_edges.max()

    with open(tile_spec_file, 'r') as f:
        tile_specs = [l.strip() for l in f.readlines()]

    table = np.zeros(shape=(n_x, n_y, n_z, n_ctdir, n_phidir),
                     dtype=np.float32)

    # Slice all table dimensions to exclude {under,over}flow bins
    central_slice = (slice(1, -1), ) * 5

    angsens_model = None
    ice_model = None
    disable_tilt = None
    disable_anisotropy = None
    n_phase = None
    n_group = None

    tiles_info = []

    for tile_spec in tile_specs:
        info = None
        try:
            fields = tile_spec.split()

            info = OrderedDict()

            info['tbl_idx'] = int(fields[0])
            info['string'] = int(fields[1])
            info['dom'] = int(fields[2])
            info['seed'] = int(fields[3])
            info['n_events'] = int(fields[4])

            info['x_min'] = float(fields[5])
            info['x_max'] = float(fields[6])
            info['n_x'] = int(fields[7])

            info['y_min'] = float(fields[8])
            info['y_max'] = float(fields[9])
            info['n_y'] = int(fields[10])

            info['z_min'] = float(fields[11])
            info['z_max'] = float(fields[12])
            info['n_z'] = int(fields[13])

            info['n_ctdir'] = int(fields[14])
            info['n_phidir'] = int(fields[15])

            tiles_info.append(info)

            tile_fpath = glob(
                join(
                    source_dir, 'clsim_table_set'
                    '_{table_hash}'
                    '_tile_{tbl_idx}'
                    '_string_{string}'
                    '_dom_{dom}'
                    '_seed_{seed}'
                    '_n_{n_events}'
                    '.fits'.format(table_hash=table_hash, **info)))[0]
            try:
                fits_table = fits.open(tile_fpath,
                                       mode='readonly',
                                       memmap=True)
            except:
                wstderr('Failed on tile_fpath "{}"'.format(tile_fpath))
                raise

            primary = fits_table[0]

            header = primary.header  # pylint: disable=no-member
            keys = header.keys()

            this_gcd_i3_md5 = extract_meta_from_keys(keys, '_i3_gcd_i3_md5_')
            assert this_gcd_i3_md5 == gcd['source_gcd_i3_md5'], \
                    'this: {}, ref: {}'.format(this_gcd_i3_md5, gcd['source_gcd_i3_md5'])

            this_angsens_model = extract_meta_from_keys(keys, '_i3_angsens_')
            if angsens_model is None:
                angsens_model = this_angsens_model
                _, avg_angsens = load_angsens_model(angsens_model)
            else:
                assert this_angsens_model == angsens_model

            this_table_hash = extract_meta_from_keys(keys, '_i3_hash_')
            assert this_table_hash == table_hash

            this_ice_model = extract_meta_from_keys(keys, '_i3_ice_')
            if ice_model is None:
                ice_model = this_ice_model
            else:
                assert this_ice_model == ice_model

            this_disable_anisotropy = header['_i3_disable_anisotropy']
            if disable_anisotropy is None:
                disable_anisotropy = this_disable_anisotropy
            else:
                assert this_disable_anisotropy == disable_anisotropy

            this_disable_tilt = header['_i3_disable_tilt']
            if disable_tilt is None:
                disable_tilt = this_disable_tilt
            else:
                assert this_disable_tilt == disable_tilt

            this_n_phase = header['_i3_n_phase']
            if n_phase is None:
                n_phase = this_n_phase
            else:
                assert this_n_phase == n_phase

            this_n_group = header['_i3_n_group']
            if n_group is None:
                n_group = this_n_group
            else:
                assert this_n_group == n_group

            assert info['n_ctdir'] == n_ctdir
            assert info['n_phidir'] == n_phidir

            assert np.isclose(header['_i3_costhetadir_min'], ctdir_min)
            assert np.isclose(header['_i3_costhetadir_max'], ctdir_max)

            assert np.isclose(header['_i3_phidir_min'], phidir_min)
            assert np.isclose(header['_i3_phidir_max'], phidir_max)

            n_photons = header['_i3_n_photons']
            n_dir_bins = info['n_ctdir'] * info['n_phidir']

            this_x_bw = (info['x_max'] - info['x_min']) / info['n_x']
            this_y_bw = (info['y_max'] - info['y_min']) / info['n_y']
            this_z_bw = (info['z_max'] - info['z_min']) / info['n_z']

            assert this_x_bw == x_bw
            assert this_y_bw == y_bw
            assert this_z_bw == z_bw

            assert np.any(np.isclose(info['x_min'], x_edges))
            assert np.any(np.isclose(info['x_max'], x_edges))

            assert np.any(np.isclose(info['y_min'], y_edges))
            assert np.any(np.isclose(info['y_max'], y_edges))

            assert np.any(np.isclose(info['z_min'], z_edges))
            assert np.any(np.isclose(info['z_max'], z_edges))

            quantum_efficiency = 0.25 * gcd['rde'][info['string'] - 1,
                                                   info['dom'] - 1]
            norm = n_dir_bins * quantum_efficiency * avg_angsens / (n_photons *
                                                                    bin_vol)
            if np.isnan(norm):
                print('\nTile {} norm is nan!'.format(info['tbl_idx']))
                print('    quantum_efficiency = {}, n_photons = {}'.format(
                    quantum_efficiency, n_photons))
            elif norm == 0:
                print('\nTile {} norm is 0'.format(info['tbl_idx']))

            x_start = np.digitize(info['x_min'] + x_bw / 2, x_edges) - 1
            x_stop = np.digitize(info['x_max'] - x_bw / 2, x_edges)

            y_start = np.digitize(info['y_min'] + y_bw / 2, y_edges) - 1
            y_stop = np.digitize(info['y_max'] - y_bw / 2, y_edges)

            z_start = np.digitize(info['z_min'] + z_bw / 2, z_edges) - 1
            z_stop = np.digitize(info['z_max'] - z_bw / 2, z_edges)

            # NOTE: comparison excludes norm = 0 _and_ norm = NaN
            if norm > 0:
                assert not np.isnan(norm)
                table[x_start:x_stop, y_start:y_stop,
                      z_start:z_stop, :, :] += (
                          norm * primary.data[central_slice]  # pylint: disable=no-member
                      )
        except:
            wstderr('Failed on tile_spec {}'.format(tile_spec))
            if info is not None:
                wstderr('Info:\n{}'.format(info))
            raise
        wstderr('.')

    wstderr('\n')

    metadata = OrderedDict()
    metadata['table_hash'] = table_hash
    metadata['disable_tilt'] = disable_tilt
    metadata['disable_anisotropy'] = disable_anisotropy
    metadata['gcd'] = gcd
    metadata['angsens_model'] = angsens_model
    metadata['ice_model'] = ice_model
    metadata['n_phase'] = n_phase
    metadata['n_group'] = n_group
    metadata['tiles_info'] = tiles_info

    outdir = join(
        dest_dir, 'tdi_table_{}_tilt_{}_anisotropy_{}'.format(
            table_hash,
            'off' if disable_tilt else 'on',
            'off' if disable_anisotropy else 'on',
        ))
    mkdir(outdir)

    name = 'tdi_table.npy'
    outfpath = join(outdir, name)
    wstdout('saving table to "{}"\n'.format(outfpath))
    np.save(outfpath, table)

    #outfpath = join(outdir, 'tdi_bin_edges.json')
    #wstdout('saving bin edges to "{}"\n'.format(outfpath))
    #json.dump(
    #    bin_edges,
    #    file(outfpath, 'w'),
    #    sort_keys=False,
    #    indent=2,
    #)
    outfpath = join(outdir, 'tdi_bin_edges.pkl')
    wstdout('saving bin edges to "{}"\n'.format(outfpath))
    pickle.dump(
        bin_edges,
        open(outfpath, 'wb'),
        protocol=pickle.HIGHEST_PROTOCOL,
    )

    #outfpath = join(outdir, 'tdi_metadata.json')
    #wstdout('saving metadata to "{}"\n'.format(outfpath))
    #json.dump(
    #    metadata,
    #    file(outfpath, 'w'),
    #    sort_keys=False,
    #    indent=2,
    #)
    outfpath = join(outdir, 'tdi_metadata.pkl')
    wstdout('saving metadata to "{}"\n'.format(outfpath))
    pickle.dump(
        metadata,
        open(outfpath, 'wb'),
        protocol=pickle.HIGHEST_PROTOCOL,
    )
Esempio n. 22
0
def extract_gcd_frames(g_frame,
                       c_frame,
                       d_frame,
                       retro_gcd_dir,
                       metadata=None):
    """Extract GCD info to Python/Numpy-readable objects stored to a central
    GCD directory, subdirs of which are named by the hex md5sum of each
    extracted GCD file.

    Parameters
    ----------
    g_frame : icecube.icetray.I3Frame with stop I3Frame.Geometry
    c_frame : icecube.icetray.I3Frame with stop I3Frame.Calibration
    d_frame : icecube.icetray.I3Frame with stop I3Frame.DetectorStatus
    retro_gcd_dir : string
    metadata : None or mapping, optional
        If non-empty mapping (e.g., OrderedDict) is provided, the contents are
        written to the gcd file's subdirectory inside retro_gcd_dir as
        "metadata.json"

    Returns
    -------
    gcd_md5_hex : len-32 string of chars 0-9 and/or a-f
        MD5 sum of _only_ the G, C, and D frames (in that order) dumped to an
        uncompressed i3 file. Note that this can result in a hash value
        different from hashing the original GCD file if other frames were
        present besides the GCD frames (such as an I frame, or Q/P/etc. if the
        GCD is embedded in a data i3 file)

    """
    from icecube.dataio import I3File  # pylint: disable=import-outside-toplevel

    retro_gcd_dir = expand(retro_gcd_dir)

    # Create root dir for gcd subdirs if necessary
    if not isdir(retro_gcd_dir):
        mkdir(retro_gcd_dir)

    # Add a vaguely useful README to gcd root dir
    readme_fpath = join(retro_gcd_dir, "README")
    if not isfile(readme_fpath):
        with io.open(readme_fpath, "w", encoding="utf-8") as fhandle:
            fhandle.write(GCD_README.strip() + "\n")

    # Find md5sum of an uncompressed GCD file created by these G, C, & D frames
    tempdir_path = mkdtemp(suffix="gcd")
    try:
        gcd_i3file_path = join(tempdir_path, "gcd.i3")
        gcd_i3file = I3File(gcd_i3file_path, "w")
        gcd_i3file.push(g_frame)
        gcd_i3file.push(c_frame)
        gcd_i3file.push(d_frame)
        gcd_i3file.close()
        gcd_md5_hex = get_file_md5(gcd_i3file_path)
    finally:
        try:
            rmtree(tempdir_path)
        except Exception:
            pass

    this_gcd_dir_path = join(retro_gcd_dir, gcd_md5_hex)
    if isdir(this_gcd_dir_path):
        # already extracted this GCD
        sys.stderr.write(
            "Already extracted GCD with md5sum {}\n".format(gcd_md5_hex))
        return gcd_md5_hex

    tempdir_path = mkdtemp(suffix="." + gcd_md5_hex)
    try:
        # Extract GCD info into Python/Numpy-readable things
        gcd_info = OrderedDict()
        gcd_info["I3Geometry"] = extract_i3_geometry(g_frame)
        gcd_info["I3Calibration"] = extract_i3_calibration(c_frame)
        gcd_info["I3DetectorStatus"] = extract_i3_detector_status(d_frame)
        gcd_info.update(extract_bad_doms_lists(d_frame))

        # Write info to files. Preferable to write a single array to a .npy file;
        # second most preferable is to write multiple arrays to (compressed) .npz
        # file (faster to load than pkl files); finally, I3DetectorStatus _has_ to
        # be stored as pickle to preserve varying-length items.
        for key, val in gcd_info.items():
            if isinstance(val, Mapping):
                if key == "I3DetectorStatus":
                    key_fpath = join(tempdir_path, key + ".pkl")
                    with io.open(key_fpath, "wb") as fhandle:
                        pickle.dump(val,
                                    fhandle,
                                    protocol=pickle.HIGHEST_PROTOCOL)
                else:
                    np.savez_compressed(join(tempdir_path, key + ".npz"),
                                        **val)
            else:
                assert isinstance(val, np.ndarray)
                np.save(join(tempdir_path, key + ".npy"), val)

        if metadata:
            metadata_fpath = join(tempdir_path, "metadata.json")
            with open(metadata_fpath, "w") as fhandle:
                json.dump(metadata, fhandle, sort_keys=False, indent=4)

        try:
            copytree(tempdir_path, this_gcd_dir_path)
        except OSError as err:
            if err.errno != errno.EEXIST:
                raise

    finally:
        try:
            rmtree(tempdir_path)
        except Exception:
            pass

    return gcd_md5_hex
Esempio n. 23
0
def extract_gcd_files(gcd_files, retro_gcd_dir, verbosity=0):
    """
    Parameters
    ----------
    gcd_files : string or iterable thereof

    retro_gcd_dir : string
        Path to communal Retro-extracted GCD dir

    verbosity : int in [0, 1]

    Returns
    -------
    gcd_md5_hexs : len(gcd_files)-list of strings

    """
    # Import here so module can be read without access to IceCube software
    from icecube.dataio import I3File  # pylint: disable=no-name-in-module, import-outside-toplevel
    from icecube.icetray import I3Frame  # pylint: disable=no-name-in-module, import-outside-toplevel

    if isinstance(gcd_files, string_types):
        gcd_files = [gcd_files]

    gcd_md5_hexs = []
    for gcd_fpath in gcd_files:
        gcd_fpath = expand(gcd_fpath)
        i3f = I3File(gcd_fpath)
        gcd_frames = OrderedDict()
        while i3f.more():
            frame = i3f.pop_frame()
            if frame.Stop == I3Frame.Geometry:
                if "g_frame" in gcd_frames:
                    raise ValueError(
                        'GCD file "{}" contains multiple G frames'.format(
                            gcd_fpath))
                gcd_frames["g_frame"] = frame
            elif frame.Stop == I3Frame.Calibration:
                if "c_frame" in gcd_frames:
                    raise ValueError(
                        'GCD file "{}" contains multiple C frames'.format(
                            gcd_fpath))
                gcd_frames["c_frame"] = frame
            elif frame.Stop == I3Frame.DetectorStatus:
                if "d_frame" in gcd_frames:
                    raise ValueError(
                        'GCD file "{}" contains multiple D frames'.format(
                            gcd_fpath))
                gcd_frames["d_frame"] = frame
        for frame_type in ("g", "c", "d"):
            if "{}_frame".format(frame_type) not in gcd_frames:
                raise ValueError('No {} frame found in GCD file "{}"'.format(
                    frame_type, gcd_fpath))
        metadata = OrderedDict()
        metadata["extracted_on_fqdn"] = socket.getfqdn()
        metadata["path_to_sourcefile"] = abspath(gcd_fpath)
        metadata["sourcefile_md5sum"] = get_file_md5(gcd_fpath)
        try:
            gcd_md5_hex = extract_gcd_frames(retro_gcd_dir=retro_gcd_dir,
                                             metadata=metadata,
                                             **gcd_frames)
        except Exception:
            sys.stderr.write(
                'failed to extract GCD file "{}"\n'.format(gcd_fpath))
            raise
        gcd_md5_hexs.append(gcd_md5_hex)
        if verbosity:
            sys.stdout.write("{}  {}\n".format(gcd_md5_hex, gcd_fpath))

    return gcd_md5_hexs
Esempio n. 24
0
def find_files_to_extract(roots,
                          overwrite,
                          find_gcd_in_dir=False,
                          data_run_gcds=None):
    """Find missing, bad, or old extracted pulse series and print the paths of
    the corresponding events directories.

    Parameters
    ----------
    roots : str

    overwrite : bool

    find_gcd_in_dir : bool or str
        If True, search for a data run's GCD file in the same directory as each
        i3 data file that is found. If False, do not search for a data run's
        GCD file.

        If `find_gcd_in_dir` is a string, interpret as a directory path; search
        for a data run's GCD file in that directory.

        Note that Monte Carlo i3 files will not return a `gcd_fpath`, as it is
        difficult to logically ascertain which GCD was used for a MC run and
        where it exists.

    data_run_gcds : dict or None, optional
        Keys must be <tuple>(<str>2-digit IC86 season, <str>Run number). Each
        value is a string full path to the corresponding GCD file.

    Yields
    ------
    fpath : str
        Full path to data/MC i3 file

    gcd_fpath : str
        Full path to GCD file corresponding to the data i3 file at fpath

    fname_groupdict : dict
        As returned by OSCNEXT_I3_FNAME_RE.match(...).groupdict()

    """
    if isinstance(roots, str):
        roots = [roots]
    roots = [expand(root) for root in roots]

    # If `find_gcd_in_dir` is a string, interpret as a directory and search for
    # GCD's in that directory (recursively)
    found_data_run_gcds = None
    if isinstance(find_gcd_in_dir, str):
        find_gcd_in_dir = expand(find_gcd_in_dir)
        assert isdir(find_gcd_in_dir), str(find_gcd_in_dir)
        found_data_run_gcds = find_gcds_in_dirs(find_gcd_in_dir,
                                                gcd_fname_re=DATA_GCD_FNAME_RE,
                                                recurse=True)

    def get_i3_events_file_info(dirpath, fname):
        """Closure to find only i3 events file names and, in that case, grab a
        relevant GCD file (if file contains data events and such a GCD can be
        found in `dirpath`), and return the info extracted from the file name.

        Parameters
        ----------
        dirpath : str
            Fully qualified path to file's directory

        fname : str
            (basename) of the file (i.e., excluding any directories)

        Returns
        -------
        retval : None or 3-tuple
            Returns `None` if the file is determined to not be an i3 events
            file (based on filename alone). Otherwise, returns .. ::

                fpath : str
                    fully qualified (including directories) path to the i3
                    events file

                gcd_fpath : str or None
                    fully qualified (including directories) path to a relevant
                    GCD file found in the same dir, or None if none is found

                fname_groupdict : mapping
                    Filename info as returned by regex

        """
        fname_match = OSCNEXT_I3_FNAME_RE.match(fname)
        if not fname_match:
            return None

        fname_groupdict = fname_match.groupdict()

        i3_retro_dir = join(dirpath, fname_groupdict["basename"])
        if (not overwrite and isdir(i3_retro_dir)
                and isfile(join(i3_retro_dir, "events.npy"))):
            return None

        fpath = join(dirpath, fname)

        gcd_fpath = None
        if fname_groupdict["kind"] == "data":
            key = (fname_groupdict["season"], fname_groupdict["run"])

            if data_run_gcds:
                gcd_fpath = data_run_gcds.get(key, None)

            if gcd_fpath is None and found_data_run_gcds:
                gcd_fpath = found_data_run_gcds.get(key, None)

            if gcd_fpath is None and thisdir_data_run_gcds:
                gcd_fpath = thisdir_data_run_gcds.get(key, None)

        return fpath, gcd_fpath, fname_groupdict

    for root in roots:
        if isfile(root):
            retval = get_i3_events_file_info(dirpath=dirname(root),
                                             fname=basename(root))
            if retval is not None:
                yield retval
            continue

        for dirpath, dirs, files in walk(root, followlinks=True):
            if "events.npy" in files:
                # No need to recurse into an existing retro events directory,
                # so clear out remaining directories
                del dirs[:]
                continue

            dirs.sort(key=nsort_key_func)
            files.sort(key=nsort_key_func)

            # If `find_gcd_in_dir` is True (i.e., not a string and not False),
            # look in current directory for all data-run GCD files
            thisdir_data_run_gcds = None
            if find_gcd_in_dir is True:
                thisdir_data_run_gcds = find_gcds_in_dirs(
                    dirpath, gcd_fname_re=DATA_GCD_FNAME_RE, recurse=False)

            for fname in files:
                retval = get_i3_events_file_info(dirpath=dirpath, fname=fname)
                if retval is not None:
                    yield retval
Esempio n. 25
0
def main(description=__doc__):
    """Script interface to `extract_events` function: Parse command line args
    and call function."""

    hostname = gethostname()
    dflt = {}
    if hostname in ["schwyz", "luzern", "uri", "unterwalden"]:
        sim_gcd_dir = "/data/icecube/gcd"
        dflt["retro_gcd_dir"] = "/data/icecube/retro_gcd"
        dflt["data_gcd_dir"] = "/data/icecube/gcd"
        dflt["procs"] = cpu_count()
    elif hostname.endswith(".aci.ics.psu.edu"):
        sim_gcd_dir = "/gpfs/group/dfc13/default/gcd/mc"
        dflt["retro_gcd_dir"] = "/gpfs/group/dfc13/default/retro_gcd"
        dflt["data_gcd_dir"] = None
        dflt["procs"] = 1
    else:  # wisconsin?
        sim_gcd_dir = "/data/sim/DeepCore/2018/pass2/gcd"
        dflt["retro_gcd_dir"] = "~/retro_gcd"
        dflt["data_gcd_dir"] = None
        dflt["procs"] = 1
        raise ValueError("Unknown host: {}".format(hostname))

    dflt["sim_gcd"] = join(
        expand(sim_gcd_dir),
        "GeoCalibDetectorStatus_AVG_55697-57531_PASS2_SPE_withScaledNoise.i3.gz",
    )

    parser = ArgumentParser(
        description=description,
        formatter_class=ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "--roots",
        nargs="+",
        required=True,
        help=
        """i3 file(s) and/or directories to search for i3 files to extract""",
    )
    parser.add_argument(
        "--overwrite",
        action="store_true",
        help="""If event was already extracted, overwrite existing object(s)
        (existing files will not be deleted, so excess objects not specified
        here will not be removed)""",
    )
    parser.add_argument(
        "--retro-gcd-dir",
        required=False,
        default=dflt["retro_gcd_dir"],  #dflt.get("retro_gcd_dir", None),
        help="""Directory into which to store any extracted GCD info""",
    )
    parser.add_argument(
        "--sim-gcd",
        required=False,
        default=dflt.get("sim_gcd", None),
        help="""Specify an external GCD file or md5sum (as returned by
        `retro.i3processing.extract_gcd_frames`, i.e., the md5sum of an
        uncompressed i3 file containing _only_ the G, C, and D frames). It is
        not required to specify --gcd if the G, C, and D frames are embedded in
        all files specified by --i3-files. Any GCD frames within said files
        will also take precedent if --gcd _is_ specified.""",
    )
    parser.add_argument(
        "--data-gcd-dir",
        required=False,
        default=dflt.get("data_gcd_dir", None),
        help="""If data GCDs all live in one directory, specify it here.""",
    )
    parser.add_argument(
        "--outdir",
        required=False,
        help="""Directory into which to store the extracted directories and
        files. If not specified, the directory where each i3 file is stored is
        used (a leaf directory is created with the same name as each i3 file
        but with .i3 and any compression extensions removed).""")
    #parser.add_argument(
    #    "--photons",
    #    nargs="+",
    #    default=[],
    #    help="""Photon series names to extract from each event""",
    #)
    parser.add_argument(
        "--pulses",
        required=False,
        nargs="+",
        default=["SRTTWOfflinePulsesDC", "SplitInIcePulses"],
        help="""Pulse series names to extract from each event""",
    )
    parser.add_argument(
        "--recos",
        required=False,
        nargs="+",
        help="""Reco names to extract from each event. If not specified,
        "L5_SPEFit11", "LineFit_DC", and "retro_crs_prefit" (if the file name
        matches L6 processing or above) are extracted.""",
    )
    parser.add_argument(
        "--triggers",
        nargs="+",
        default=["I3TriggerHierarchy"],
        help="""Trigger hierarchy names to extract from each event""",
    )
    parser.add_argument(
        "--no-truth",
        action="store_true",
        help="""Do not extract truth information from Monte Carlo events""",
    )
    parser.add_argument(
        "--additional-keys",
        default=None,
        nargs="+",
        help="""Additional keys to extract from event I3 frame""",
    )
    parser.add_argument(
        "--procs",
        default=dflt["procs"],
        type=int,
        help="""Number of (sub)processes to use for converting files""",
    )
    args = parser.parse_args()
    kwargs = vars(args)

    find_func_kwargs = {
        k: kwargs.pop(k)
        for k in getargspec(find_files_to_extract).args if k in kwargs
    }

    no_truth = kwargs.pop("no_truth")
    data_gcd_dir = kwargs.pop("data_gcd_dir", None)
    sim_gcd = kwargs.pop("sim_gcd", None)
    procs = kwargs.pop("procs", None)

    if data_gcd_dir:
        data_run_gcds = find_gcds_in_dirs(data_gcd_dir,
                                          gcd_fname_re=DATA_GCD_FNAME_RE)
    else:
        data_run_gcds = None

    kwargs["additional_keys"] = kwargs.pop("additional_keys", None)
    if not kwargs["additional_keys"]:
        from processing.samples.oscNext.verification.general_mc_data_harvest_and_plot import (
            L5_VARS, L6_VARS, L7_VARS)

    pool = Pool(procs)
    requests = []
    for fpath, gcd_fpath, fname_groupdict in find_files_to_extract(
            find_gcd_in_dir=True, data_run_gcds=data_run_gcds,
            **find_func_kwargs):
        print(fpath)
        extract_events_kwargs = deepcopy(kwargs)
        extract_events_kwargs["i3_files"] = [fpath]

        is_data = fname_groupdict["kind"].lower() == "data"
        if is_data:
            assert gcd_fpath is not None
            extract_events_kwargs["gcd"] = gcd_fpath
        else:
            extract_events_kwargs["truth"] = not is_data and not no_truth
            extract_events_kwargs["gcd"] = sim_gcd

        level = int(fname_groupdict["level"])

        if "recos" not in extract_events_kwargs or not extract_events_kwargs[
                "recos"]:
            recos = []
            if level >= 5:
                recos.extend(["LineFit_DC", "L5_SPEFit11"])
            if level >= 6:
                recos.append("retro_crs_prefit")
            extract_events_kwargs["recos"] = recos

        if not extract_events_kwargs["additional_keys"]:
            additional_keys = []
            if level >= 5:
                additional_keys.extend(L5_VARS.keys())
            if level >= 6:
                additional_keys.extend(L6_VARS.keys())
            if level >= 7:
                additional_keys.extend(L7_VARS.keys())
            extract_events_kwargs["additional_keys"] = sorted(additional_keys)

        requests.append((
            extract_events_kwargs,
            pool.apply_async(wrapped_extract_events, tuple(),
                             extract_events_kwargs),
        ))

    failed_i3_files = []
    for extract_events_kwargs, async_result in requests:
        retval = async_result.get()
        if not retval:
            failed_i3_files.append(extract_events_kwargs["i3_files"])

    pool.close()
    pool.join()

    if failed_i3_files:
        for failure in chain(*failed_i3_files):
            print('"{}"'.format(failure))

    print("\n{} failures out of {} i3 files found that needed to be extracted".
          format(len(failed_i3_files), len(requests)))
Esempio n. 26
0
def generate_clsim_table(
    outdir,
    gcd,
    ice_model,
    angular_sensitivity,
    disable_tilt,
    disable_anisotropy,
    string,
    dom,
    n_events,
    seed,
    coordinate_system,
    binning,
    tableset_hash=None,
    tile=None,
    overwrite=False,
    compress=False,
):
    """Generate a CLSim table.

    See wiki.icecube.wisc.edu/index.php/Ice for information about ice models.

    Parameters
    ----------
    outdir : string

    gcd : string

    ice_model : str
        E.g. "spice_mie", "spice_lea", ...

    angular_sensitivity : str
        E.g. "h2-50cm", "9" (which is equivalent to "new25" because, like, duh)

    disable_tilt : bool
        Whether to force no layer tilt in simulation (if tilt is present in
        bulk ice model; otherwise, this has no effect)

    disable_anisotropy : bool
        Whether to force no bulk ice anisotropy (if anisotropy is present in
        bulk ice model; otherwise, this has no effect)

    string : int in [1, 86]

    dom : int in [1, 60]

    n_events : int > 0
        Note that the number of photons is much larger than the number of
        events (related to the "brightness" of the defined source).

    seed : int in [0, 2**32)
        Seed for CLSim's random number generator

    coordinate_system : string in {"spherical", "cartesian"}
        If spherical, base coordinate system is .. ::

            (r, theta, phi, t, costhetadir, (optionally abs)deltaphidir)

        If Cartesian, base coordinate system is .. ::

            (x, y, z, costhetadir, phidir)

        but if any of the coordinate axes are specified to have 0 bins, they
        will be omitted (but the overall order is maintained).

    binning : mapping
        If `coordinate_system` is "spherical", keys should be:
            "n_r_bins"
            "n_t_bins"
            "n_costheta_bins"
            "n_phi_bins"
            "n_costhetadir_bins"
            "n_deltaphidir_bins"
            "r_max"
            "r_power"
            "t_max"
            "t_power"
            "deltaphidir_power"
        If `coordinate_system` is "cartesian", keys should be:
            "n_x_bins"
            "n_y_bins"
            "n_z_bins"
            "n_costhetadir_bins"
            "n_phidir_bins"
            "x_min"
            "x_max"
            "y_min"
            "y_max"
            "z_min"
            "z_max"

    tableset_hash : str, optional
        Specify if the table is a tile used to generate a larger table

    tile : int >= 0, optional
        Specify if the table is a tile used to generate a larger table

    overwrite : bool, optional
        Whether to overwrite an existing table (default: False)

    compress : bool, optional
        Whether to pass the resulting table through zstandard compression
        (default: True)

    Raises
    ------
    ValueError
        If `compress` is True but `zstd` command-line utility cannot be found

    AssertionError, ValueError
        If illegal argument values are passed

    ValueError
        If `overwrite` is False and a table already exists at the target path

    Notes
    -----
    Binnings are as follows:
        * Radial binning is regular in the space of r**(1/r_power), with
          `n_r_bins` spanning from 0 to `r_max` meters.
        * Time binning is regular in the space of t**(1/t_power), with
          `n_t_bins` spanning from 0 to `t_max` nanoseconds.
        * Position zenith angle is binned regularly in the cosine of the zenith
          angle with `n_costhetadir_bins` spanning from -1 to +1.
        * Position azimuth angle is binned regularly, with `n_phi_bins`
          spanning from -pi to pi radians.
        * Photon directionality zenith angle (relative to IcedCube coordinate
          system) is binned regularly in cosine-zenith space, with
          `n_costhetadir_bins` spanning from `costhetadir_min` to
          `costhetadir_max`
        * Photon directionality azimuth angle; sometimes assumed to be
          symmetric about line from DOM to the center of the bin, so is binned
          as an absolute value, i.e., from 0 to pi radians. Otherwise, binned
          from -np.pi to +np.pi

    The following are forced upon the above binning specifications (and
    remaining parameters are specified as arguments to the function)
        * t_min = 0 (ns)
        * r_min = 0 (m)
        * costheta_min = -1
        * costheta_max = 1
        * phi_min = -pi (rad)
        * phi_max = pi (rad)
        * costhetadir_min = -1
        * costhetadir_max = 1
        * deltaphidir_min = 0 (rad)
        * deltaphidir_min = pi (rad)

    """
    assert isinstance(n_events, Integral) and n_events > 0
    assert isinstance(seed, Integral) and 0 <= seed < 2**32
    assert ((tableset_hash is not None and tile is not None)
            or (tableset_hash is None and tile is None))

    n_bins_per_dim = []
    for key, val in binning.items():
        if not key.startswith('n_'):
            continue
        assert isinstance(val, Integral), '{} not an integer'.format(key)
        assert val >= 0, '{} must be >= 0'.format(key)
        n_bins_per_dim.append(val)

    # Note: + 2 accounts for under & overflow bins in each dimension
    n_bins = np.product([n + 2 for n in n_bins_per_dim if n > 0])

    assert n_bins > 0

    #if n_bins > 2**32:
    #    raise ValueError(
    #        'The flattened bin index in CLSim is represented by uint32 which'
    #        ' has a max of 4 294 967 296, but the binning specified comes to'
    #        ' {} bins ({} times too many).'
    #        .format(n_bins, n_bins / 2**32)
    #    )

    ice_model = ice_model.strip()
    angular_sensitivity = angular_sensitivity.strip()
    # For now, hole ice model is hard-coded in our CLSim branch; see
    #   clsim/private/clsim/I3CLSimLightSourceToStepConverterFlasher.cxx
    # in the branch you're using to check that this is correct
    assert angular_sensitivity == 'flasher_p1_0.30_p2_-1'

    gcd_info = extract_gcd(gcd)

    if compress and not any(
            access(join(path, 'zstd'), X_OK)
            for path in environ['PATH'].split(pathsep)):
        raise ValueError('`zstd` command not found in path')

    outdir = expand(outdir)
    mkdir(outdir)

    axes = OrderedDict()
    binning_kw = OrderedDict()

    # Note that the actual binning in CLSim is performed using float32, so we
    # first "truncate" all values to that precision. However, the `LinearAxis`
    # function requires Python floats (which are 64 bits), so we have to
    # convert all values to to `float` when passing as kwargs to `LinearAxis`
    # (and presumably the values will be re-truncated to float32 within the
    # CLsim code somewhere). Hopefully following this procedure, the values
    # actually used within CLSim are what we want...? CLSim is stupid.
    ftype = np.float32

    if coordinate_system == 'spherical':
        binning['t_min'] = ftype(0)  # ns
        binning['r_min'] = ftype(0)  # meters
        costheta_min = ftype(-1.0)
        costheta_max = ftype(1.0)
        # See
        #   clsim/resources/kernels/spherical_coordinates.c.cl
        # in the branch you're using to check that the following are correct
        phi_min = ftype(3.0543261766433716e-01)
        phi_max = ftype(6.5886182785034180e+00)
        binning['costhetadir_min'] = ftype(-1.0)
        binning['costhetadir_max'] = ftype(1.0)
        binning['deltaphidir_min'] = ftype(-3.1808626651763916e+00)
        binning['deltaphidir_max'] = ftype(3.1023228168487549e+00)

        if binning['n_r_bins'] > 0:
            assert isinstance(binning['r_power'],
                              Integral) and binning['r_power'] > 0
            r_binning_kw = OrderedDict([
                ('min', float(binning['r_min'])),
                ('max', float(binning['r_max'])),
                ('n_bins', int(binning['n_r_bins'])),
            ])
            if binning['r_power'] == 1:
                axes['r'] = LinearAxis(**r_binning_kw)
            else:
                r_binning_kw['power'] = int(binning['r_power'])
                axes['r'] = PowerAxis(**r_binning_kw)
            binning_kw['r'] = r_binning_kw

        if binning['n_costheta_bins'] > 0:
            costheta_binning_kw = OrderedDict([
                ('min', float(costheta_min)),
                ('max', float(costheta_max)),
                ('n_bins', int(binning['n_costheta_bins'])),
            ])
            axes['costheta'] = LinearAxis(**costheta_binning_kw)
            binning_kw['costheta'] = costheta_binning_kw

        if binning['n_phi_bins'] > 0:
            phi_binning_kw = OrderedDict([
                ('min', float(phi_min)),
                ('max', float(phi_max)),
                ('n_bins', int(binning['n_phi_bins'])),
            ])
            axes['phi'] = LinearAxis(**phi_binning_kw)
            binning_kw['phi'] = phi_binning_kw

        if binning['n_t_bins'] > 0:
            assert isinstance(binning['t_power'],
                              Integral) and binning['t_power'] > 0
            t_binning_kw = OrderedDict([
                ('min', float(binning['t_min'])),
                ('max', float(binning['t_max'])),
                ('n_bins', int(binning['n_t_bins'])),
            ])
            if binning['t_power'] == 1:
                axes['t'] = LinearAxis(**t_binning_kw)
            else:
                t_binning_kw['power'] = int(binning['t_power'])
                axes['t'] = PowerAxis(**t_binning_kw)
            binning_kw['t'] = t_binning_kw

        if binning['n_costhetadir_bins'] > 0:
            costhetadir_binning_kw = OrderedDict([
                ('min', float(binning['costhetadir_min'])),
                ('max', float(binning['costhetadir_max'])),
                ('n_bins', int(binning['n_costhetadir_bins'])),
            ])
            axes['costhetadir'] = LinearAxis(**costhetadir_binning_kw)
            binning_kw['costhetadir'] = costhetadir_binning_kw

        if binning['n_deltaphidir_bins'] > 0:
            assert (isinstance(binning['deltaphidir_power'], Integral)
                    and binning['deltaphidir_power'] > 0)
            deltaphidir_binning_kw = OrderedDict([
                ('min', float(binning['deltaphidir_min'])),
                ('max', float(binning['deltaphidir_max'])),
                ('n_bins', int(binning['n_deltaphidir_bins'])),
            ])
            if binning['deltaphidir_power'] == 1:
                axes['deltaphidir'] = LinearAxis(**deltaphidir_binning_kw)
            else:
                deltaphidir_binning_kw['power'] = int(
                    binning['deltaphidir_power'])
                axes['deltaphidir'] = PowerAxis(**deltaphidir_binning_kw)
            binning_kw['deltaphidir'] = deltaphidir_binning_kw

    elif coordinate_system == 'cartesian':
        binning['t_min'] = ftype(0)  # ns
        binning['costhetadir_min'], binning['costhetadir_max'] = ftype(
            -1.0), ftype(1.0)
        binning['phidir_min'], binning['phidir_max'] = ftype(-np.pi), ftype(
            np.pi)  # rad

        if binning['n_x_bins'] > 0:
            x_binning_kw = OrderedDict([
                ('min', float(binning['x_min'])),
                ('max', float(binning['x_max'])),
                ('n_bins', int(binning['n_x_bins'])),
            ])
            axes['x'] = LinearAxis(**x_binning_kw)
            binning_kw['x'] = x_binning_kw

        if binning['n_y_bins'] > 0:
            y_binning_kw = OrderedDict([
                ('min', float(binning['y_min'])),
                ('max', float(binning['y_max'])),
                ('n_bins', int(binning['n_y_bins'])),
            ])
            axes['y'] = LinearAxis(**y_binning_kw)
            binning_kw['y'] = y_binning_kw

        if binning['n_z_bins'] > 0:
            z_binning_kw = OrderedDict([
                ('min', float(binning['z_min'])),
                ('max', float(binning['z_max'])),
                ('n_bins', int(binning['n_z_bins'])),
            ])
            axes['z'] = LinearAxis(**z_binning_kw)
            binning_kw['z'] = z_binning_kw

        if binning['n_t_bins'] > 0:
            assert isinstance(binning['t_power'],
                              Integral) and binning['t_power'] > 0
            t_binning_kw = OrderedDict([
                ('min', float(binning['t_min'])),
                ('max', float(binning['t_max'])),
                ('n_bins', int(binning['n_t_bins'])),
            ])
            if binning['t_power'] == 1:
                axes['t'] = LinearAxis(**t_binning_kw)
            else:
                t_binning_kw['power'] = int(binning['t_power'])
                axes['t'] = PowerAxis(**t_binning_kw)
            binning_kw['t'] = t_binning_kw

        if binning['n_costhetadir_bins'] > 0:
            costhetadir_binning_kw = OrderedDict([
                ('min', float(binning['costhetadir_min'])),
                ('max', float(binning['costhetadir_max'])),
                ('n_bins', int(binning['n_costhetadir_bins'])),
            ])
            axes['costhetadir'] = LinearAxis(**costhetadir_binning_kw)
            binning_kw['costhetadir'] = costhetadir_binning_kw

        if binning['n_phidir_bins'] > 0:
            phidir_binning_kw = OrderedDict([
                ('min', float(binning['phidir_min'])),
                ('max', float(binning['phidir_max'])),
                ('n_bins', int(binning['n_phidir_bins'])),
            ])
            axes['phidir'] = LinearAxis(**phidir_binning_kw)
            binning_kw['phidir'] = phidir_binning_kw

    binning_order = BINNING_ORDER[coordinate_system]

    missing_dims = set(axes.keys()).difference(binning_order)
    if missing_dims:
        raise ValueError(
            '`binning_order` specified is {} but is missing dimension(s) {}'.
            format(binning_order, missing_dims))

    axes_ = OrderedDict()
    binning_kw_ = OrderedDict()
    for dim in binning_order:
        if dim in axes:
            axes_[dim] = axes[dim]
            binning_kw_[dim] = binning_kw[dim]
    axes = axes_
    binning_kw = binning_kw_

    # NOTE: use SphericalAxes even if we're actually binning Cartesian since we
    # don't care how it handles e.g. volumes, and Cartesian isn't implemented
    # in CLSim yet
    axes = SphericalAxes(axes.values())

    # Construct metadata initially with items that will be hashed
    metadata = OrderedDict([
        ('source_gcd_i3_md5', gcd_info['source_gcd_i3_md5']),
        ('coordinate_system', coordinate_system), ('binning_kw', binning_kw),
        ('ice_model', ice_model), ('angular_sensitivity', angular_sensitivity),
        ('disable_tilt', disable_tilt),
        ('disable_anisotropy', disable_anisotropy)
    ])
    # TODO: this is hard-coded in our branch of CLSim; make parameter & fix here!
    if 't' in binning:
        metadata['t_is_residual_time'] = True

    if tableset_hash is None:
        hash_val = hash_obj(metadata, fmt='hex')[:8]
        print('derived hash:', hash_val)
    else:
        hash_val = tableset_hash
        print('tableset_hash:', hash_val)
    metadata['hash_val'] = hash_val
    if tile is not None:
        metadata['tile'] = tile

    dom_spec = OrderedDict([('string', string), ('dom', dom)])

    if 'depth_idx' in dom_spec and ('subdet' in dom_spec
                                    or 'string' in dom_spec):
        if 'subdet' in dom_spec:
            dom_spec['string'] = dom_spec.pop('subdet')

        string = dom_spec['string']
        depth_idx = dom_spec['depth_idx']

        if isinstance(string, str):
            subdet = dom_spec['subdet'].lower()
            dom_x, dom_y = 0, 0

            ic_avg_z, dc_avg_z = get_average_dom_z_coords(gcd_info['geo'])
            if string == 'ic':
                dom_z = ic_avg_z[depth_idx]
            elif string == 'dc':
                dom_z = dc_avg_z[depth_idx]
            else:
                raise ValueError('Unrecognized subdetector {}'.format(subdet))
        else:
            dom_x, dom_y, dom_z = gcd_info['geo'][string - 1, depth_idx]

        metadata['string'] = string
        metadata['depth_idx'] = depth_idx

        if tile is not None:
            raise ValueError(
                'Cannot produce tiled tables using "depth_idx"-style table groupings;'
                ' use "string"/"dom"-style tables instead.')

        clsim_table_fname_proto = CLSIM_TABLE_FNAME_PROTO[1]
        clsim_table_metaname_proto = CLSIM_TABLE_METANAME_PROTO[0]

        print('Subdetector {}, depth index {} (z_avg = {} m)'.format(
            subdet, depth_idx, dom_z))

    elif 'string' in dom_spec and 'dom' in dom_spec:
        string = dom_spec['string']
        dom = dom_spec['dom']
        dom_x, dom_y, dom_z = gcd_info['geo'][string - 1, dom - 1]

        metadata['string'] = string
        metadata['dom'] = dom

        if tile is None:
            clsim_table_fname_proto = CLSIM_TABLE_FNAME_PROTO[2]
            clsim_table_metaname_proto = CLSIM_TABLE_METANAME_PROTO[1]
        else:
            clsim_table_fname_proto = CLSIM_TABLE_TILE_FNAME_PROTO[-1]
            clsim_table_metaname_proto = CLSIM_TABLE_TILE_METANAME_PROTO[-1]

        print(
            'GCD = "{}"\nString {}, dom {}: (x, y, z) = ({}, {}, {}) m'.format(
                gcd, string, dom, dom_x, dom_y, dom_z))

    else:
        raise ValueError('Cannot understand `dom_spec` {}'.format(dom_spec))

    # Until someone figures out DOM tilt and ice column / bubble column / cable
    # orientations for sure, we'll just set DOM orientation to zenith=pi,
    # azimuth=0.
    dom_zenith = np.pi
    dom_azimuth = 0.0

    # Now add other metadata items that are useful but not used for hashing
    metadata['dom_x'] = dom_x
    metadata['dom_y'] = dom_y
    metadata['dom_z'] = dom_z
    metadata['dom_zenith'] = dom_zenith
    metadata['dom_azimuth'] = dom_azimuth
    metadata['seed'] = seed
    metadata['n_events'] = n_events

    metapath = join(outdir, clsim_table_metaname_proto.format(**metadata))
    tablepath = join(outdir, clsim_table_fname_proto.format(**metadata))

    # Save metadata as a JSON file (so it's human-readable by any tool, not
    # just Python--in contrast to e.g. pickle files)
    json.dump(metadata, file(metapath, 'w'), sort_keys=False, indent=4)

    print('=' * 80)
    print('Metadata for the table set was written to\n  "{}"'.format(metapath))
    print('Table will be written to\n  "{}"'.format(tablepath))
    print('=' * 80)

    exists_at = []
    for fpath in [tablepath, tablepath + '.zst']:
        if isfile(fpath):
            exists_at.append(fpath)

    if exists_at:
        names = ', '.join('"{}"'.format(fp) for fp in exists_at)
        if overwrite:
            print('WARNING! Deleting existing table(s) at ' + names)
            for fpath in exists_at:
                remove(fpath)
        else:
            raise ValueError('Table(s) already exist at {}; not'
                             ' overwriting.'.format(names))
    print('')

    tray = I3Tray()
    tray.AddSegment(
        TabulateRetroSources,
        'TabulateRetroSources',
        source_gcd_i3_md5=gcd_info['source_gcd_i3_md5'],
        binning_kw=binning_kw,
        axes=axes,
        ice_model=ice_model,
        angular_sensitivity=angular_sensitivity,
        disable_tilt=disable_tilt,
        disable_anisotropy=disable_anisotropy,
        hash_val=hash_val,
        dom_spec=dom_spec,
        dom_x=dom_x,
        dom_y=dom_y,
        dom_z=dom_z,
        dom_zenith=dom_zenith,
        dom_azimuth=dom_azimuth,
        seed=seed,
        n_events=n_events,
        tablepath=tablepath,
        tile=tile,
        record_errors=False,
    )

    logging.set_level_for_unit('I3CLSimStepToTableConverter', 'TRACE')
    logging.set_level_for_unit('I3CLSimTabulatorModule', 'DEBUG')
    logging.set_level_for_unit('I3CLSimLightSourceToStepConverterGeant4',
                               'TRACE')
    logging.set_level_for_unit('I3CLSimLightSourceToStepConverterFlasher',
                               'TRACE')

    tray.Execute()
    tray.Finish()

    if compress:
        print('Compressing table with zstandard via command line')
        print('  zstd -1 --rm "{}"'.format(tablepath))
        subprocess.check_call(['zstd', '-1', '--rm', tablepath])
        print('done.')
Esempio n. 27
0
def load_ckv_table_compr(fpath, mmap):
    """Load a Cherenkov table from disk.

    Parameters
    ----------
    fpath : string
        Path to directory containing the table's .npy files.

    mmap : bool
        Whether to memory map the table (if it's stored in a directory
        containing .npy files).

    Returns
    -------
    table : OrderedDict
        Items are
        - 'n_photons' :
        - 'group_refractive_index' :
        - 'phase_refractive_index' :
        - 'r_bin_edges' :
        - 'costheta_bin_edges' :
        - 't_bin_edges' :
        - 'costhetadir_bin_edges' :
        - 'deltaphidir_bin_edges' :
        - 'ckv_template_map' : np.ndarray
        - 't_indep_ckv_table' : np.ndarray (if available)

    """
    fpath = expand(fpath)
    table = OrderedDict()

    if mmap:
        mmap_mode = 'r'
    else:
        mmap_mode = None

    if DEBUG:
        wstderr('Loading ckv table from {} ...\n'.format(fpath))

    if isfile(fpath):
        assert basename(fpath) == 'ckv_table.npy'
        fpath = dirname(fpath)

    t0 = time()
    indir = fpath

    for key in CKV_TABLE_KEYS + ['t_indep_ckv_table']:
        fpath = join(indir, key + '.npy')
        if DEBUG:
            wstderr('    loading {} from "{}" ...'.format(key, fpath))

        t1 = time()
        if isfile(fpath):
            table[key] = np.load(fpath)

        elif key != 't_indep_ckv_table':
            raise ValueError(
                'Could not find file "{}" for loading table key "{}"'
                .format(fpath, key)
            )

        if DEBUG:
            wstderr(' ({} ms)\n'.format(np.round((time() - t1)*1e3, 3)))

    if DEBUG:
        wstderr('  Total time to load: {} s\n'.format(np.round(time() - t0, 3)))

    return table
Esempio n. 28
0
def setup_dom_tables(
        dom_tables_kind,
        dom_tables_fname_proto,
        gcd,
        angsens_model,
        norm_version,
        use_sd_indices=const.ALL_STRS_DOMS,
        step_length=1.0,
        num_phi_samples=None,
        ckv_sigma_deg=None,
        template_library=None,
        compute_t_indep_exp=True,
        use_directionality=True,
        no_noise=False,
        force_no_mmap=False,
    ):
    """Instantiate and load single-DOM tables

    """
    print('Instantiating and loading single-DOM tables')
    t0 = time.time()

    # TODO: set mmap based on memory?
    if force_no_mmap:
        mmap = False
    else:
        mmap = 'uncompr' in dom_tables_kind

    if dom_tables_kind in ['raw_templ_compr', 'ckv_templ_compr']:
        template_library = np.load(expand(template_library))
    else:
        template_library = None

    gcd = extract_gcd(gcd)

    if no_noise:
        gcd['noise'] = np.zeros_like(gcd['noise'])

    # Instantiate single-DOM tables class
    dom_tables = Retro5DTables(
        table_kind=dom_tables_kind,
        geom=gcd['geo'],
        rde=gcd['rde'],
        noise_rate_hz=gcd['noise'],
        angsens_model=angsens_model,
        compute_t_indep_exp=compute_t_indep_exp,
        use_directionality=use_directionality,
        norm_version=norm_version,
        num_phi_samples=num_phi_samples,
        ckv_sigma_deg=ckv_sigma_deg,
        template_library=template_library,
        use_sd_indices=use_sd_indices
    )

    if '{subdet' in dom_tables_fname_proto:
        doms = const.ALL_DOMS
        for subdet in ['ic', 'dc']:
            if subdet == 'ic':
                strings = const.IC_STRS
            else:
                strings = const.DC_STRS

            for dom in doms:
                fpath = dom_tables_fname_proto.format(
                    subdet=subdet, dom=dom, depth_idx=dom-1
                )
                shared_table_sd_indices = []
                for string in strings:
                    sd_idx = const.get_sd_idx(string=string, dom=dom)
                    if sd_idx not in use_sd_indices:
                        continue
                    shared_table_sd_indices.append(sd_idx)

                if not shared_table_sd_indices:
                    continue

                dom_tables.load_table(
                    fpath=fpath,
                    sd_indices=shared_table_sd_indices,
                    step_length=step_length,
                    mmap=mmap
                )
    elif '{string}' in dom_tables_fname_proto:
        raise NotImplementedError('dom_tables_fname_proto with {string} not'
                                  ' implemented')
    elif '{string_idx}' in dom_tables_fname_proto:
        raise NotImplementedError('dom_tables_fname_proto with {string_idx}'
                                  ' not implemented')
    else:
        stacked_tables_fpath = expand(join(
            dom_tables_fname_proto,
            'stacked_{}.npy'.format(dom_tables.table_name)
        ))
        stacked_tables_meta_fpath = expand(join(
            dom_tables_fname_proto,
            'stacked_{}_meta.pkl'.format(dom_tables.table_name)
        ))
        stacked_t_indep_tables_fpath = expand(join(
            dom_tables_fname_proto,
            'stacked_{}.npy'.format(dom_tables.t_indep_table_name)
        ))
        dom_tables.load_stacked_tables(
            stacked_tables_meta_fpath=stacked_tables_meta_fpath,
            stacked_tables_fpath=stacked_tables_fpath,
            stacked_t_indep_tables_fpath=stacked_t_indep_tables_fpath,
            mmap_t_indep=mmap
        )

    print('  -> {:.3f} s\n'.format(time.time() - t0))

    return dom_tables
Esempio n. 29
0
def summarize_clsim_table(table_fpath,
                          table=None,
                          save_summary=True,
                          outdir=None):
    """
    Parameters
    ----------
    table_fpath : string
        Path to table (or just the table's filename if `outdir` is specified)

    table : mapping, optional
        If the table has already been loaded, it can be passed here to avoid
        re-loading the table.

    save_summary : bool
        Whether to save the table summary to disk.

    outdir : string, optional
        If `save_summary` is True, write the summary to this directory. If
        `outdir` is not specified and `save_summary` is True, the summary will
        be written to the same directory that contains `table_fpath`.

    Returns
    -------
    table
        See `load_clsim_table` for details of the data structure

    summary : OrderedDict

    """
    t_start = time()
    if save_summary:
        from pisa.utils.jsons import from_json, to_json

    table_fpath = expand(table_fpath)
    srcdir, clsim_fname = dirname(table_fpath), basename(table_fpath)
    invalid_fname = False
    try:
        fname_info = interpret_clsim_table_fname(clsim_fname)
    except ValueError:
        invalid_fname = True
        fname_info = {}

    if outdir is None:
        outdir = srcdir
    outdir = expand(outdir)
    mkdir(outdir)

    if invalid_fname:
        metapath = None
    else:
        metaname = (CLSIM_TABLE_METANAME_PROTO[-1].format(
            hash_val=fname_info['hash_val']))
        metapath = join(outdir, metaname)
    if metapath and isfile(metapath):
        meta = from_json(metapath)
    else:
        meta = dict()

    if table is None:
        table = load_clsim_table(table_fpath)

    summary = OrderedDict()
    for key in table.keys():
        if key == 'table':
            continue
        summary[key] = table[key]
    if fname_info:
        for key in ('hash_val', 'string', 'depth_idx', 'seed'):
            summary[key] = fname_info[key]
    # TODO: Add hole ice info when added to tray_kw_to_hash
    if meta:
        summary['n_events'] = meta['tray_kw_to_hash']['NEvents']
        summary['ice_model'] = meta['tray_kw_to_hash']['IceModel']
        summary['tilt'] = not meta['tray_kw_to_hash']['DisableTilt']
        for key, val in meta.items():
            if key.endswith('_binning_kw'):
                summary[key] = val
    elif 'fname_version' in fname_info and fname_info['fname_version'] == 1:
        summary['n_events'] = fname_info['n_events']
        summary['ice_model'] = 'spice_mie'
        summary['tilt'] = False
        summary['r_binning_kw'] = dict(min=0.0, max=400.0, n_bins=200, power=2)
        summary['costheta_binning_kw'] = dict(min=-1, max=1, n_bins=40)
        summary['t_binning_kw'] = dict(min=0.0, max=3000.0, n_bins=300)
        summary['costhetadir_binning_kw'] = dict(min=-1, max=1, n_bins=20)
        summary['deltaphidir_binning_kw'] = dict(min=0.0, max=np.pi, n_bins=20)

    # Save marginal distributions and info to file
    norm = (
        1 / table['n_photons'] /
        (SPEED_OF_LIGHT_M_PER_NS / table['phase_refractive_index'] *
         np.mean(np.diff(table['t_bin_edges'])))
        #* table['angular_acceptance_fract']
        * (len(table['costheta_bin_edges']) - 1))
    summary['norm'] = norm

    dim_names = ('r', 'costheta', 't', 'costhetadir', 'deltaphidir')
    n_dims = len(table['table_shape'])
    assert n_dims == len(dim_names)

    # Apply norm to underflow and overflow so magnitudes can be compared
    # relative to plotted marginal distributions
    for flow, idx in product(('underflow', 'overflow'), iter(range(n_dims))):
        summary[flow][idx] = summary[flow][idx] * norm

    wstderr('Finding marginal distributions...\n')
    wstderr('    masking off zeros in table...')
    t0 = time()
    nonzero_table = np.ma.masked_equal(table['table'], 0)
    wstderr(' ({} ms)\n'.format(np.round((time() - t0) * 1e3, 3)))

    t0_marg = time()
    summary['dimensions'] = OrderedDict()
    for keep_axis, ax_name in zip(tuple(range(n_dims)), dim_names):
        remove_axes = list(range(n_dims))
        remove_axes.pop(keep_axis)
        remove_axes = tuple(remove_axes)
        axis = OrderedDict()

        wstderr('    mean across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['mean'] = norm * np.asarray(
            np.mean(table['table'], axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))

        wstderr('    median across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['median'] = norm * np.asarray(
            np.ma.median(nonzero_table, axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))

        wstderr('    max across non-{} axes...'.format(ax_name))
        t0 = time()
        axis['max'] = norm * np.asarray(
            np.max(table['table'], axis=remove_axes))
        wstderr(' ({} s)\n'.format(np.round(time() - t0, 3)))
        summary['dimensions'][ax_name] = axis
    wstderr('  Total time to find marginal distributions: {} s\n'.format(
        np.round(time() - t0_marg, 3)))

    if save_summary:
        ext = None
        base_fname = clsim_fname
        while ext not in ('', '.fits'):
            base_fname, ext = splitext(base_fname)
            ext = ext.lower()
        outfpath = join(outdir, base_fname + '_summary.json.bz2')
        to_json(summary, outfpath)
        print('saved summary to "{}"'.format(outfpath))

    wstderr('Time to summarize table: {} s\n'.format(
        np.round(time() - t_start, 3)))

    return table, summary
Esempio n. 30
0
def scan_llh(dom_tables_kw, hypo_kw, events_kw, scan_kw):
    """Script "main" function"""
    t00 = time.time()

    scan_values = []
    for dim in HYPO_PARAMS_T._fields:
        val_str = ''.join(scan_kw.pop(dim))
        val_str = val_str.lower().replace('pi', format(np.pi, '.17e'))
        scan_values.append(hrlist2list(val_str))

    dom_tables = init_obj.setup_dom_tables(**dom_tables_kw)
    hypo_handler = init_obj.setup_discrete_hypo(**hypo_kw)
    events_generator = init_obj.get_events(**events_kw)

    # Pop 'outdir' from `scan_kw` since we don't want to store this info in
    # the metadata dict.
    outdir = expand(scan_kw.pop('outdir'))
    mkdir(outdir)

    print('Scanning paramters')
    t0 = time.time()

    fast_llh = True

    if fast_llh:
        get_llh = dom_tables._get_llh
        dom_info = dom_tables.dom_info
        tables = dom_tables.tables
        table_norm = dom_tables.table_norm
        t_indep_tables = dom_tables.t_indep_tables
        t_indep_table_norm = dom_tables.t_indep_table_norm
        sd_idx_table_indexer = dom_tables.sd_idx_table_indexer
        metric_kw = {}

        def metric_wrapper(hypo, hits, hits_indexer, unhit_sd_indices,
                           time_window):
            sources = hypo_handler.get_sources(hypo)
            return get_llh(sources=sources,
                           hits=hits,
                           hits_indexer=hits_indexer,
                           unhit_sd_indices=unhit_sd_indices,
                           sd_idx_table_indexer=sd_idx_table_indexer,
                           time_window=time_window,
                           dom_info=dom_info,
                           tables=tables,
                           table_norm=table_norm,
                           t_indep_tables=t_indep_tables,
                           t_indep_table_norm=t_indep_table_norm)
    else:
        metric_kw = dict(dom_tables=dom_tables, tdi_table=None)
        get_llh = likelihood.get_llh

        def metric_wrapper(hypo, **metric_kw):
            sources = hypo_handler.get_sources(hypo)
            return get_llh(sources=sources, **metric_kw)

    n_points_total = 0
    metric_vals = []
    for _, event in events_generator:
        hits = event['hits']
        hits_indexer = event['hits_indexer']
        hits_summary = event['hits_summary']
        metric_kw['hits'] = hits
        metric_kw['hits_indexer'] = hits_indexer
        hit_sd_indices = hits_indexer['sd_idx']
        unhit_sd_indices = np.array(sorted(
            ALL_STRS_DOMS_SET.difference(hit_sd_indices)),
                                    dtype=np.uint32)
        metric_kw['unhit_sd_indices'] = unhit_sd_indices
        metric_kw['time_window'] = np.float32(
            hits_summary['time_window_stop'] -
            hits_summary['time_window_start'])

        t1 = time.time()
        metric_vals.append(scan(scan_values, metric_wrapper, metric_kw))
        dt = time.time() - t1

        n_points = metric_vals[-1].size
        n_points_total += n_points
        print('  ---> {:.3f} s, {:d} points ({:.3f} ms per LLH)'.format(
            dt, n_points, dt / n_points * 1e3))
    dt = time.time() - t0

    info = OrderedDict([
        ('hypo_params', HYPO_PARAMS_T._fields),
        ('scan_values', scan_values),
        ('metric_name', 'llh'),
        ('metric_vals', metric_vals),
        ('scan_kw', sort_dict(scan_kw)),
        ('dom_tables_kw', sort_dict(dom_tables_kw)),
        ('hypo_kw', sort_dict(hypo_kw)),
        ('events_kw', sort_dict(events_kw)),
    ])

    outfpath = join(outdir, 'scan.pkl')
    print('Saving results in pickle file, path "{}"'.format(outfpath))
    pickle.dump(info, open(outfpath, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)

    print('Total time to scan: {:.3f} s; {:.3f} ms avg per LLH'.format(
        time.time() - t00, dt / n_points_total * 1e3))

    return metric_vals, info