Exemple #1
0
def collect_events(dtype, icc=False):

    ### files to be looped through
    filenames = folder[dtype]
    nfiles = float(len(filenames))
    print('nfiles: {0}'.format(nfiles))

    ### initialize an event holder
    events = {key: [] for key in variables.keys()}

    ### get prob map if genie
    ### also oscillate NC by default
    probmap = ProbMap (matter=True, params=params) \
              if 'genie' in dtype else None

    ### loop through files
    for filename in filenames:
        ## update me
        print('+-- {0}'.format(filename))
        ## open file
        i3file = I3File(filename, 'r')
        ## loop through frames
        while i3file.more():
            #try:
            frame = i3file.pop_physics()
            # append when physics frame
            if frame:
                if passed(frame, icc=icc):
                    events = append(frame,
                                    dtype,
                                    events,
                                    nfiles,
                                    probmap=probmap,
                                    icc=icc)
            else:
                print('| this frame is empty ...')
                continue
            #except:
            #    print ('| this file is busted :/')
            #    pass
        ## close file
        print('| unweighted nevents: {0}'.format(len(events['weight'])))
        print('| weighted   nevents: {0}'.format(np.sum(events['weight'])))
        i3file.close()

    ### numpify
    events = {key: np.array(value) for key, value in events.items()}

    ### update me
    weight = events['weight']
    print('+-------------------------------------')
    print('| unweighted before L6cuts : {0}'.format(len(weight)))
    print('| weighted   before L6cuts : {0:4f} mHZ'.format(
        np.sum(weight) * 1000.))

    ### return output
    return events
Exemple #2
0
def CmpI3File(rep,
              fileNameCurrent,
              fileNameReference,
              restrictFrames=None,
              restrictKeys=None,
              ignoreKeys=None,
              strictOnLayout=False,
              keyBasedCmp=None):
    """
    Compare contents of two I3Files.

    Files are compared frame by frame. The set of keys to compare can be
    restricted (restrictKeys) or selected frame keys can be ignored (ignoreKeys).
    If strictOnLayout is False, curFrame may contain more keys than refFrame;
    otherwise the key sets have to be identical. Key-based comparison functions
    can be passed (keyBasedCmp) and will overwrite type-based comparison.
    """

    from icecube.dataio import I3File

    with rep.section("[File] %s <-> %s" %
                     (fileNameCurrent, fileNameReference)):
        fileCur = I3File(fileNameCurrent)
        fileRef = I3File(fileNameReference)

        iFrame = -1
        while True:
            iFrame += 1
            frameCur = fileCur.pop_frame()
            frameRef = fileRef.pop_frame()

            if frameRef is None:
                break

            if restrictFrames and iFrame not in restrictFrames:
                continue

            with rep.section("[Frame] %i" % iFrame):
                if frameCur is None:
                    rep.failure("Frame missing LEFT")
                else:
                    CmpI3Frame(rep, frameCur, frameRef, restrictKeys,
                               ignoreKeys, strictOnLayout, keyBasedCmp)
    def setUp(self):
        ref = I3File(seekable_file)

        self.ref_frame_info = []
        while ref.more():
            frame = ref.pop_frame()
            self.ref_frame_info.append({
                "stop": frame.Stop,
                "keys": set(frame.keys())
            })

        for x in self.ref_frame_info:
            print x["stop"], len(x["keys"])
Exemple #4
0
def extract_gcd_files(gcd_files, retro_gcd_dir, verbosity=0):
    """
    Parameters
    ----------
    gcd_files : string or iterable thereof

    retro_gcd_dir : string
        Path to communal Retro-extracted GCD dir

    verbosity : int in [0, 1]

    Returns
    -------
    gcd_md5_hexs : len(gcd_files)-list of strings

    """
    # Import here so module can be read without access to IceCube software
    from icecube.dataio import I3File  # pylint: disable=no-name-in-module, import-outside-toplevel
    from icecube.icetray import I3Frame  # pylint: disable=no-name-in-module, import-outside-toplevel

    if isinstance(gcd_files, string_types):
        gcd_files = [gcd_files]

    gcd_md5_hexs = []
    for gcd_fpath in gcd_files:
        gcd_fpath = expand(gcd_fpath)
        i3f = I3File(gcd_fpath)
        gcd_frames = OrderedDict()
        while i3f.more():
            frame = i3f.pop_frame()
            if frame.Stop == I3Frame.Geometry:
                if "g_frame" in gcd_frames:
                    raise ValueError(
                        'GCD file "{}" contains multiple G frames'.format(
                            gcd_fpath))
                gcd_frames["g_frame"] = frame
            elif frame.Stop == I3Frame.Calibration:
                if "c_frame" in gcd_frames:
                    raise ValueError(
                        'GCD file "{}" contains multiple C frames'.format(
                            gcd_fpath))
                gcd_frames["c_frame"] = frame
            elif frame.Stop == I3Frame.DetectorStatus:
                if "d_frame" in gcd_frames:
                    raise ValueError(
                        'GCD file "{}" contains multiple D frames'.format(
                            gcd_fpath))
                gcd_frames["d_frame"] = frame
        for frame_type in ("g", "c", "d"):
            if "{}_frame".format(frame_type) not in gcd_frames:
                raise ValueError('No {} frame found in GCD file "{}"'.format(
                    frame_type, gcd_fpath))
        metadata = OrderedDict()
        metadata["extracted_on_fqdn"] = socket.getfqdn()
        metadata["path_to_sourcefile"] = abspath(gcd_fpath)
        metadata["sourcefile_md5sum"] = get_file_md5(gcd_fpath)
        try:
            gcd_md5_hex = extract_gcd_frames(retro_gcd_dir=retro_gcd_dir,
                                             metadata=metadata,
                                             **gcd_frames)
        except Exception:
            sys.stderr.write(
                'failed to extract GCD file "{}"\n'.format(gcd_fpath))
            raise
        gcd_md5_hexs.append(gcd_md5_hex)
        if verbosity:
            sys.stdout.write("{}  {}\n".format(gcd_md5_hex, gcd_fpath))

    return gcd_md5_hexs
Exemple #5
0
def extract_gcd_frames(g_frame,
                       c_frame,
                       d_frame,
                       retro_gcd_dir,
                       metadata=None):
    """Extract GCD info to Python/Numpy-readable objects stored to a central
    GCD directory, subdirs of which are named by the hex md5sum of each
    extracted GCD file.

    Parameters
    ----------
    g_frame : icecube.icetray.I3Frame with stop I3Frame.Geometry
    c_frame : icecube.icetray.I3Frame with stop I3Frame.Calibration
    d_frame : icecube.icetray.I3Frame with stop I3Frame.DetectorStatus
    retro_gcd_dir : string
    metadata : None or mapping, optional
        If non-empty mapping (e.g., OrderedDict) is provided, the contents are
        written to the gcd file's subdirectory inside retro_gcd_dir as
        "metadata.json"

    Returns
    -------
    gcd_md5_hex : len-32 string of chars 0-9 and/or a-f
        MD5 sum of _only_ the G, C, and D frames (in that order) dumped to an
        uncompressed i3 file. Note that this can result in a hash value
        different from hashing the original GCD file if other frames were
        present besides the GCD frames (such as an I frame, or Q/P/etc. if the
        GCD is embedded in a data i3 file)

    """
    from icecube.dataio import I3File  # pylint: disable=import-outside-toplevel

    retro_gcd_dir = expand(retro_gcd_dir)

    # Create root dir for gcd subdirs if necessary
    if not isdir(retro_gcd_dir):
        mkdir(retro_gcd_dir)

    # Add a vaguely useful README to gcd root dir
    readme_fpath = join(retro_gcd_dir, "README")
    if not isfile(readme_fpath):
        with io.open(readme_fpath, "w", encoding="utf-8") as fhandle:
            fhandle.write(GCD_README.strip() + "\n")

    # Find md5sum of an uncompressed GCD file created by these G, C, & D frames
    tempdir_path = mkdtemp(suffix="gcd")
    try:
        gcd_i3file_path = join(tempdir_path, "gcd.i3")
        gcd_i3file = I3File(gcd_i3file_path, "w")
        gcd_i3file.push(g_frame)
        gcd_i3file.push(c_frame)
        gcd_i3file.push(d_frame)
        gcd_i3file.close()
        gcd_md5_hex = get_file_md5(gcd_i3file_path)
    finally:
        try:
            rmtree(tempdir_path)
        except Exception:
            pass

    this_gcd_dir_path = join(retro_gcd_dir, gcd_md5_hex)
    if isdir(this_gcd_dir_path):
        # already extracted this GCD
        sys.stderr.write(
            "Already extracted GCD with md5sum {}\n".format(gcd_md5_hex))
        return gcd_md5_hex

    tempdir_path = mkdtemp(suffix="." + gcd_md5_hex)
    try:
        # Extract GCD info into Python/Numpy-readable things
        gcd_info = OrderedDict()
        gcd_info["I3Geometry"] = extract_i3_geometry(g_frame)
        gcd_info["I3Calibration"] = extract_i3_calibration(c_frame)
        gcd_info["I3DetectorStatus"] = extract_i3_detector_status(d_frame)
        gcd_info.update(extract_bad_doms_lists(d_frame))

        # Write info to files. Preferable to write a single array to a .npy file;
        # second most preferable is to write multiple arrays to (compressed) .npz
        # file (faster to load than pkl files); finally, I3DetectorStatus _has_ to
        # be stored as pickle to preserve varying-length items.
        for key, val in gcd_info.items():
            if isinstance(val, Mapping):
                if key == "I3DetectorStatus":
                    key_fpath = join(tempdir_path, key + ".pkl")
                    with io.open(key_fpath, "wb") as fhandle:
                        pickle.dump(val,
                                    fhandle,
                                    protocol=pickle.HIGHEST_PROTOCOL)
                else:
                    np.savez_compressed(join(tempdir_path, key + ".npz"),
                                        **val)
            else:
                assert isinstance(val, np.ndarray)
                np.save(join(tempdir_path, key + ".npy"), val)

        if metadata:
            metadata_fpath = join(tempdir_path, "metadata.json")
            with open(metadata_fpath, "w") as fhandle:
                json.dump(metadata, fhandle, sort_keys=False, indent=4)

        try:
            copytree(tempdir_path, this_gcd_dir_path)
        except OSError as err:
            if err.errno != errno.EEXIST:
                raise

    finally:
        try:
            rmtree(tempdir_path)
        except Exception:
            pass

    return gcd_md5_hex
Exemple #6
0
def retro_recos_to_i3files(
    eventsdir,
    point_estimator,
    recos=None,
    reco_suffix=None,
    i3dir=None,
    overwrite=False,
    replace_existing_frame_items=False,
):
    """Take retro recos found in .npy files / retro directory structure and
    corresponding i3 files and generate new i3 files like the original but
    populated with the retro reco information.

    Parameters
    ----------
    eventsdir : str
    point_estimator : str in {"mean", "median", "max"}
    recos : str or iterable thereof, optional
        If not specified, all "retro_*" recos found will be populated
    reco_suffix : str or None, optional
    i3dir : str, optional
        If None or not specified, defaults to `eventsdir`
    overwrite : bool, optional
    replace_existing_frame_items : bool, optional

    """
    eventsdir = abspath(expanduser(expandvars(eventsdir)))
    # If the leaf reco/events/truth dir "recos" was specified, must go one up
    # to find events/truth
    if basename(eventsdir) == "recos":
        eventsdir = dirname(eventsdir)

    if recos is None:
        recos = [
            splitext(basename(n))[0]
            for n in glob(join(eventsdir, "recos", "retro_*.npy"))
        ]

    if isinstance(recos, string_types):
        recos = [recos]
    else:
        recos = sorted(list(recos))
    for reco in recos:
        if not reco.startswith("retro_"):
            raise ValueError(
                'Can only populate "retro_*" recos; "{}" is invalid'.format(
                    reco))

    reco_suffix = reco_suffix if reco_suffix else ""

    if i3dir is None:
        i3dir = eventsdir
    else:
        i3dir = abspath(expanduser(expandvars(i3dir)))

    # -- Walk directories and match (events, recos) to i3 paths -- #

    for events_dirpath, dirs, filenames in walk(eventsdir):
        dirs.sort(key=nsort_key_func)
        if "events.npy" not in filenames:
            continue

        missing_recos = []
        reco_filepaths = {}
        for reco in recos:
            reco_filepath = join(events_dirpath, "recos",
                                 "{}.npy".format(reco))
            if isfile(reco_filepath):
                reco_filepaths[reco] = reco_filepath
            else:
                missing_recos.append(reco)

        if missing_recos:
            print('WARNING: Missing recos {} in dir "{}"'.format(
                missing_recos, events_dirpath))
            if set(missing_recos) == set(recos):
                continue

        eventsdir_basename = basename(events_dirpath)
        i3filedir = join(i3dir,
                         relpath(dirname(events_dirpath), start=eventsdir))
        i3filepaths = sorted(
            glob(join(i3filedir, "{}.i3*".format(eventsdir_basename))),
            key=nsort_key_func,
        )
        if not i3filepaths:
            raise IOError(
                'No matching i3 file "{}.i3*" in directory "{}"'.format(
                    eventsdir_basename, i3filedir))
        input_i3filepath = i3filepaths[0]
        if len(i3filepaths) > 1:
            print(
                'WARNING: found multiple i3 files in dir, picking first one "{}"'
                .format(i3filepaths))
        print("input_i3filepath:", input_i3filepath)

        suffix = "__" + "__".join(
            sorted((r + reco_suffix) for r in reco_filepaths.keys()))
        output_i3filepath = join(
            i3filedir,
            "{base}{suffix}{extensions}".format(
                base=basename(input_i3filepath)[:len(eventsdir_basename)],
                suffix=suffix,
                extensions=".i3.zst",
            ),
        )
        if not overwrite and isfile(output_i3filepath):
            print(
                'WARNING: skipping writing output path that already exists: "{}"'
                .format(output_i3filepath))
            continue
        print("output_i3filepath:", output_i3filepath)

        print("events_dirpath:", events_dirpath)
        events = np.load(join(events_dirpath, "events.npy"))
        recos_d = OrderedDict()
        for reco, reco_filepath in reco_filepaths.items():
            recos_d[reco] = np.load(reco_filepath)
            if len(recos_d[reco]) != len(events):
                raise ValueError("{} has len {}, events has len {}".format(
                    reco, len(recos_d[reco]), len(events)))

        # Collect frames into an event chain until we hit a physics frame, a
        # second DAQ frame, or the end of the file.
        #
        # * If we have only a DAQ frame in the chain, create a new Physics
        #   frame and populate the reco(s) to it.
        #
        # * If we have a Physics frame, populate the recos to that frame.
        #
        # * If we have no DAQ or Physics frames in the chain, we should be
        #   done. Make sure we've accounted for all the recos in the npy files
        #   and quit.
        #
        # When done with the chain, push all frames in the chain to the output file.
        # physics frame, we have a new "event" to process; populate recos to
        # that frame. Then, regardless of why we finished the event chain,
        # write the frames in the chain out to the new i3 file.

        input_i3file = I3File(input_i3filepath, "r")
        output_i3file = I3File(output_i3filepath, "w")

        id_fields = [
            "run_id", "sub_run_id", "event_id", "sub_event_id",
            "sub_event_stream"
        ]

        frame_buffer = []
        chain_has_daq_frame = False
        chain_has_physics_frame = False
        frame_counter = 0
        event_index = -1

        try:
            while True:
                if input_i3file.more():
                    try:
                        next_frame = input_i3file.pop_frame()
                    except:
                        sys.stderr.write(
                            "Failed to pop frame #{}\n".format(frame_counter +
                                                               1))
                        raise
                    frame_counter += 1
                else:
                    next_frame = None

                # Current chain has ended and a new one will have to be started
                # (or we're at the end of the file).

                # Populate the reco to the current chain, push all of the
                # current chain's frames to the output file, and start a new
                # chain with the next frame (or quit if we're at the end of the
                # file).
                if (next_frame is None or next_frame.Stop == I3Frame.DAQ
                        or (chain_has_physics_frame
                            and next_frame.Stop == I3Frame.Physics)):
                    if frame_buffer:
                        # Events are identified as a chain with daq frame being
                        # present with no physics frame, physics frame present
                        # with no daq frame, or both being present (existence
                        # of other frames is considered to be irrelevant)

                        # TODO: oscNext v01.01 by L5, i3 file processing was
                        # messed up, there were Q frames followed by I frames
                        # and no associated P frame. Therefore we have to only
                        # count chains with P frames in them as events, or else
                        # the recos won't be put back in the right place /
                        # indices run out.
                        #if chain_has_daq_frame or chain_has_physics_frame:

                        if chain_has_physics_frame:
                            event_index += 1

                            # Make sure event headers match
                            pframe = None
                            for frame in frame_buffer[::-1]:
                                if frame.Stop == I3Frame.Physics:
                                    pframe = frame
                            assert pframe is not None
                            i3hdr = pframe["I3EventHeader"]
                            i3hdr_id = tuple(
                                getattr(i3hdr, field) for field in id_fields)
                            retro_event_id = tuple(
                                events[event_index][id_fields])
                            if retro_event_id != i3hdr_id:
                                raise ValueError(
                                    "retro event {} != frame event {}".format(
                                        retro_event_id, i3hdr_id))

                            populate_pframe(
                                event_index=event_index,
                                frame_buffer=frame_buffer,
                                recos_d=recos_d,
                                reco_suffix=reco_suffix,
                                point_estimator=point_estimator,
                                replace_existing_frame_items=
                                replace_existing_frame_items,
                            )

                        # Regardless if there was an event identified in the
                        # chain, push all frames to the output file
                        for frame in frame_buffer:
                            output_i3file.push(frame)

                    # No next frame indicates we hit the end of the file; quit
                    if next_frame is None:
                        break

                    # Create a new chain, starting with the next frame
                    frame_buffer = [next_frame]
                    chain_has_daq_frame = next_frame.Stop == I3Frame.DAQ
                    chain_has_physics_frame = next_frame.Stop == I3Frame.Physics

                # Otherwise, we have just another frame in the current chain;
                # append it and move on.
                else:
                    frame_buffer.append(next_frame)
                    chain_has_daq_frame |= next_frame.Stop == I3Frame.DAQ
                    chain_has_physics_frame |= next_frame.Stop == I3Frame.Physics

        except:
            output_i3file.close()
            del output_i3file
            remove(output_i3filepath)

            sys.stderr.write('ERROR! file "{}", frame #{}\n'.format(
                input_i3filepath, frame_counter + 1))
            raise

        else:
            output_i3file.close()
            del output_i3file
Exemple #7
0
import numpy as np
from numpy.linalg import norm
import glob
import os

from icecube.dataio import I3File
from icecube import icetray, dataclasses, recclasses, simclasses

#data files location
data_location = '/cr/data01/hagne/John_project/Donghwa/'
# where to save the frames that passed the quality cuts
cut_location = "./data/i3files"

# get geometry info
geom_file = I3File("GeoCalibDetectorStatus_2012.56063_V1_OctSnow.i3.gz")
geom_frame = geom_file.pop_frame()
geometry = geom_frame['I3Geometry']
geom_file.close()
iron_energy = []
proton_energy = []

# --------------------------------------------------------------------------
# Now get the data from the files ------------------------------------------

# list of appropriate files in folder
files = glob.glob(data_location + 'Level2*')

total_frames = 0  # want to print out how many frames passed

# loop through files
Exemple #8
0
# ------------------------------------------------------------------------------
# MAIN -------------------------------------------------------------------------
# ------------------------------------------------------------------------------

#load the showers
data_location = './data/'
protondata = np.load(data_location + 'proton_showers.npy')
#irondata = np.load(data_location + 'iron_showers.npy')


#corsika file location
corsika_location = '/cr/data01/hagne/John_project/CORSIKA/muonsPROPER/'


# get geometry info
geom_file  = I3File("./data/GeoCalibDetectorStatus_2012.56063_V1_OctSnow.i3.gz")
geom_frame = geom_file.pop_frame()
geometry   = geom_frame['I3Geometry']
geom_file.close()

# list of tanks
tanks = []
for i in range(1,82):
    # arrays are [station #, tank #, number of muons detected]
    # tank 1 contains DOMs 61,62; tank 2 contains DOMs 63,64
    tanks.append([i,1])
    tanks.append([i,2])
# tank coordinates -------------------
for tank in tanks:
    station = tank[0]
    if tank[1] == 1: