Esempio n. 1
0
    def __init__(self, val=None):
        self.metadata = OrderedDict([
            ('detector', ''),
            ('geom', ''),
            ('runs', []),
            ('proc_ver', ''),
            ('cuts', []),
            ('flavints_joined', []),
        ])

        meta = OrderedDict()
        data = FlavIntData()
        if isinstance(val, (str, h5py.Group)):
            data = hdf.from_hdf(val)
            meta = getattr(data, 'attrs', OrderedDict())
        elif isinstance(val, Events):
            meta = deepcopy(val.metadata)
            data = deepcopy(val)
        elif isinstance(val, Mapping):
            data = deepcopy(val)
            if hasattr(val, 'metadata'):
                meta = deepcopy(val.metadata)
            elif hasattr(val, 'attrs'):
                meta = deepcopy(val.attrs)

        for key, val_ in meta.items():
            if hasattr(val_, 'tolist') and callable(val_.tolist):
                meta[key] = val_.tolist()

        self.metadata.update(meta)
        self.validate(data)
        self.update(data)
        self.update_hash()
Esempio n. 2
0
    def apply_cuts(self, data, cuts, boolean_op='&', return_fields=None):
        """Perform `cuts` on `data` and return a dict containing
        `return_fields` from events that pass the cuts.

        Parameters
        ----------
        data : single-level dict or FlavIntData object
        cuts : string or dict, or sequence thereof
        boolean_op : string
        return_fields : string or sequence thereof
        """
        if isinstance(data, FlavIntData):
            outdata = FlavIntData()
            for flavint in data.flavints:
                outdata[flavint] = self.apply_cuts(data[flavint],
                                                   cuts=cuts,
                                                   boolean_op=boolean_op,
                                                   return_fields=return_fields)
            return outdata

        if isinstance(cuts, (str, dict)):
            cuts = [cuts]

        # Default is to return all fields
        if return_fields is None:
            return_fields = data.keys()

        # If no cuts specified, return all data from specified fields
        if len(cuts) == 0:
            return self.subselect(data, return_fields)

        cut_strings = set()
        cut_fields = set()
        for cut in cuts:
            if isinstance(cut, dict):
                self.validate_cut_spec(cut)
            elif cut.lower() in self['cuts']:
                cut = self['cuts'][cut.lower()]
            else:
                raise Exception('Unrecognized or invalid cut: "' + str(cut) +
                                '"')
            cut_strings.add(cut['pass_if'])
            cut_fields.update(cut['fields'])

        # Combine cut criteria strings together with boolean operation
        cut_string = boolean_op.join(['(' + cs + ')' for cs in cut_strings])

        # Load the fields necessary for the cut into the global namespace
        for field in set(cut_fields):
            globals()[field] = data[field]

        # Evaluate cuts, returning a boolean array
        try:
            bool_idx = eval(cut_string)  # pylint: disable=eval-used
        except:
            logging.error('Failed to evaluate `cut_string` "%s"', cut_string)
            raise

        # Return specified (or all) fields, indexed by boolean array
        return {f: np.array(data[f])[bool_idx] for f in return_fields}
Esempio n. 3
0
 def subselect(data, fields, indices=None):
     if isinstance(data, FlavIntData):
         outdata = FlavIntData()
         for flavint in data.flavints:
             outdata[flavint] = DataProcParams.subselect(data[flavint],
                                                         fields=fields,
                                                         indices=indices)
     elif isinstance(data, Mapping):
         if indices is None:
             return {k:v for k, v in data.items() if k in fields}
         return {k:v[indices] for k, v in data.items() if k in fields}
Esempio n. 4
0
 def __init__(self, val=None):
     self.metadata = OrderedDict([
         ('detector', ''),
         ('geom', ''),
         ('runs', []),
         ('proc_ver', ''),
         ('cuts', []),
         ('flavints_joined', []),
     ])
     meta = {}
     data = FlavIntData()
     if isinstance(val, (basestring, h5py.Group)):
         data, meta = self.__load(val)
     elif isinstance(val, Events):
         meta = deepcopy(val.metadata)
         data = deepcopy(val)
     elif isinstance(val, dict):
         data = deepcopy(val)
     self.metadata.update(meta)
     self.validate(data)
     self.update(data)
     self.update_hash()
Esempio n. 5
0
    def validate_xsec(energy, xsec):
        """Validate cross sections"""
        # TODO: different validation based on cross sections version string

        # Make sure the basics are present
        xsec = FlavIntData(xsec)

        ## No NaN's
        assert not np.any(np.isnan(energy))
        # Energy spans at least 1-100 GeV
        assert np.min(energy) <= 1
        assert np.max(energy) >= 100

        # All event flavints need to be present
        for k in ALL_NUFLAVINTS:
            # Uses "standard" PISA indexing scheme
            x = xsec[k]
            # Arrays are same lengths
            assert len(x) == len(energy)
            # No NaN's
            assert np.sum(np.isnan(x)) == 0
            # Max xsec/energy value is in range for units of [m^2/GeV]
            assert np.max(x/energy) < 40e-42, np.max(x/energy)
Esempio n. 6
0
    def load_root_file(fpath, ver, tot_sfx='_tot', o_sfx='_o16', h_sfx='_h1',
                       plt_sfx='_plot'):
        """Load cross sections from root file, where graphs are first-level in
        hierarchy. This is yet crude and not very flexible, but at least it's
        recorded here for posterity.

        Requires ROOT and ROOT python module be installed

        Parameters
        ----------
        fpath : string
            Path to ROOT file
        ver : string
            Necessary to differentaite among different file formats that Ken
            has sent out
        tot_sfx : string (default = '_tot')
            Suffix for finding total cross sections in ROOT file (if these
            fields are found, the oxygen/hydrogen fields are skipped)
        o_sfx : string (default = '_o16')
            Suffix for finding oxygen-16 cross sections in ROOT file
        h_sfx : string (default = '_h1')
            Suffix for finding hydrogen-1 cross sections in ROOT file
        plt_sfx : string (default = '_plt')
            Suffix for plots containing cross sections per GeV in ROOT file

        Returns
        -------
        xsec : :class:`pisa.utils.flavInt.FlavIntData`
            Object containing the loaded cross sections
        """
        import ROOT

        def extractData(f, key):
            """Extract x and y info from (already-opened) ROOT TFile."""
            try:
                g = ROOT.gDirectory.Get(key)
                x = np.array(g.GetX())
                y = np.array(g.GetY())
            except AttributeError:
                raise ValueError('Possibly missing file "%s" or missing key'
                                 ' "%s" within that file?' % (f, key))
            return x, y

        rfile = ROOT.TFile(fpath) # pylint: disable=no-member
        try:
            energy = None
            xsec = FlavIntData()
            for flavint in ALL_NUFLAVINTS:
                if ver == 'genie_2.6.4':
                    # Expected to contain xsect per atom; summing 2*Hydrogen
                    # and 1*Oxygen yields total cross section for water
                    # molecule.
                    # Format as found in, e.g., "genie_2.6.4_simplified.root"
                    key = str(flavint) + o_sfx
                    o16_e, o16_xs = extractData(rfile, key)

                    key = str(flavint) + h_sfx
                    h1_e, h1_xs = extractData(rfile, key)

                    tot_xs = h1_xs*2 + o16_xs*1
                    assert np.alltrue(h1_e == o16_e)
                    ext_e = o16_e

                elif ver == 'genie_2.8.6':
                    # Expected to contain xsect-per-nucleon-per-energy, so
                    # multiplying by energy and by # of nucleons (18) yields
                    # cross sections per molecule.
                    # Format as found in, e.g., "genie_2.8.6_simplified.root"
                    key = str(flavint) + plt_sfx
                    ext_e, fract_xs = extractData(rfile, key)
                    tot_xs = fract_xs * ext_e * 18

                else:
                    raise ValueError('Invalid or not implemented `ver`: "%s"'
                                     % ver)
                if energy is None:
                    energy = ext_e

                assert np.alltrue(ext_e == energy)

                # Note that units in the ROOT files are [1e-38 cm^2] but PISA
                # requires units of [m^2], so this conversion is made here.
                xsec[flavint] = tot_xs * 1e-38 * 1e-4
        finally:
            rfile.Close()

        CrossSections.validate_xsec(energy, xsec)

        return energy, xsec
Esempio n. 7
0
    def get_combined_xsec(fpath, ver=None):
        """Load the cross-section values from a ROOT file and instantiate a
        CombinedSpline object."""
        # NOTE: ROOT import here as it is optional but still want to import
        # module for e.g. building docs
        import ROOT

        fpath = find_resource(fpath)
        logging.info('Loading GENIE ROOT cross-section file %s', fpath)

        # Name of neutrino flavours in the ROOT file.
        flavs = ('nu_e', 'nu_mu', 'nu_tau', 'nu_e_bar', 'nu_mu_bar',
                 'nu_tau_bar')

        rfile = ROOT.TFile.Open(fpath, 'read')  # pylint: disable=no-member
        xsec_splines = FlavIntData()
        for flav in flavs:
            for int_ in ALL_NUINT_TYPES:
                xsec_splines[flav, int_] = {}
                for part in ('O16', 'H1'):
                    str_repr = flav + '_' + part + '/' + 'tot_' + str(int_)
                    xsec_splines[flav + str(int_)][part] = \
                        ROOT.gDirectory.Get(str_repr) # pylint: disable=no-member
        rfile.Close()

        def eval_spl(spline,
                     binning,
                     out_units=ureg.m**2,
                     x_energy_scale=1,
                     **kwargs):
            init_names = ['true_energy']
            init_units = [ureg.GeV]

            if set(binning.names) != set(init_names):
                raise ValueError('Input binning names {0} does not match '
                                 'instantiation binning names '
                                 '{1}'.format(binning.names, init_names))

            if set(map(str, binning.units)) != set(map(str, init_units)):
                for name in init_names:
                    binning[name].to(init_units)

            bin_centers = [x.m for x in binning.weighted_centers][0]

            nu_O16, nu_H1 = [], []
            for e_val in bin_centers:
                nu_O16.append(spline['O16'].Eval(e_val))
                nu_H1.append(spline['H1'].Eval(e_val))

            nu_O16, nu_H1 = map(np.array, (nu_O16, nu_H1))
            nu_xsec = ((0.8879 * nu_O16) +
                       (0.1121 * nu_H1)) * 1E-38 * ureg.cm**2

            nu_xsec_hist = nu_xsec.to(out_units).magnitude
            return Map(hist=nu_xsec_hist, binning=binning, **kwargs)

        def validate_spl(binning):
            if np.all(binning.true_energy.midpoints.m > 1E3):
                raise ValueError('Energy value {0} out of range in array '
                                 '{0}'.format(binning.true_energy))

        inXSec = []
        for flav in flavs:
            for int_ in ALL_NUINT_TYPES:
                flavint = NuFlavInt(flav + str(int_))
                xsec = Spline(name=str(flavint),
                              spline=xsec_splines[flavint],
                              eval_spl=eval_spl,
                              validate_spl=validate_spl)
                inXSec.append(xsec)

        return CombinedSpline(inXSec, interactions=True, ver=ver)
Esempio n. 8
0
def makeEventsFile(data_files,
                   detector,
                   proc_ver,
                   cut,
                   outdir,
                   run_settings=None,
                   data_proc_params=None,
                   join=None,
                   cust_cuts=None,
                   extract_fields=EXTRACT_FIELDS,
                   output_fields=OUTPUT_FIELDS):
    r"""Take the simulated and reconstructed HDF5 file(s) (as converted from I3
    by icecube.hdfwriter.I3HDFTableService) as input and write out a simplified
    PISA-standard-format HDF5 file for use in aeff, reco, and/or PID stages.

    Parameters
    ----------
    data_files : dict
        File paths for finding data files for each run, formatted as:
            {
                <string run>: <list of file paths>,
                <string run>: <list of file paths>,
                ...
                <string run>: <list of file paths>,
            }

    detector : string
        Name of the detector (e.g. IceCube, DeepCore, PINGU, etc.) as found in
        e.g. mc_sim_run_settings.json and data_proc_params.json files.

    proc_ver
        Version of processing applied to the events, as found in e.g.
        data_proc_params.json.

    cut
        Name of a standard cut to use; must be specified in the relevant
        detector/processing version node of the data processing parameters
        (file from which the data_proc_params object was instantiated)

    outdir
        Directory path in which to store resulting files; will be generated if
        it does not already exist (including any parent directories that do not
        exist)

    run_settings : string or MCSimRunSettings
        Resource location of mc_sim_run_settings.json or an MCSimRunSettings
        object instantiated therefrom.

    data_proc_params : string or DataProcParams
        Resource location of data_proc_params.json or a DataProcParams object
        instantiated therefrom.

    join
        String specifying any flavor/interaction types (flavInts) to join
        together. Separate flavInts with commas (',') and separate groups
        with semicolons (';'). E.g. an acceptable string is:
            'numucc+numubarcc; nuall bar NC, nuall NC'

    cust_cuts
        dict with a single DataProcParams cut specification or list of same
        (see help for DataProcParams for detailed description of cut spec)

    extract_fields : None or iterable of strings
        Field names to extract from source HDF5 file. If None, extract all
        fields.

    output_fields : None or iterable of strings
        Fields to include in the generated PISA-standard-format events HDF5
        file; note that if 'weighted_aeff' is not preent, effective area will
        not be computed. If None, all fields will be written.

    Notes
    -----
    Compute "weighted_aeff" field:

    Within each int type (CC or NC), ngen should be added together;
    events recorded of that int type then get their one_weight divided by the
    total *for that int type only* to obtain the "weighted_aeff" for that
    event (even if int types are being grouped/joined together).

    This has the effect that within a group, ...
      ... and within an interaction type, effective area is a weighted
      average of that of the flavors being combined. E.g. for CC,

                     \sum_{run x}\sum_{flav y} (Aeff_{x,y} * ngen_{x,y})
          Aeff_CC = ----------------------------------------------------- ,
                          \sum_{run x}\sum_{flav y} (ngen_{x,y})

      ... and then across interaction types, the results of the above for
      each int type need to be summed together, i.e.:

          Aeff_total = Aeff_CC + Aeff_NC

    Note that each grouping of flavors is calculated with the above math
    completely independently from other flavor groupings specified.

    See Justin Lanfranchi's presentation on the PINGU Analysis call,
    2015-10-21, for more details:
      https://wikispaces.psu.edu/download/attachments/282040606/meff_report_jllanfranchi_v05_2015-10-21.pdf

    """
    if isinstance(run_settings, str):
        run_settings = DetMCSimRunsSettings(find_resource(run_settings),
                                            detector=detector)
    assert isinstance(run_settings, DetMCSimRunsSettings)
    assert run_settings.detector == detector

    if isinstance(data_proc_params, str):
        data_proc_params = DataProcParams(
            detector=detector,
            proc_ver=proc_ver,
            data_proc_params=find_resource(data_proc_params))
    assert data_proc_params.detector == detector
    assert data_proc_params.proc_ver == proc_ver

    runs = sorted(data_files.keys())

    all_flavs = []
    flavs_by_run = {}
    run_norm_factors = {}
    bin_edges = set()

    runs_by_flavint = FlavIntData()
    for flavint in runs_by_flavint.flavints:
        runs_by_flavint[flavint] = []

    #ngen_flavint_by_run = {run:FlavIntData() for run in runs}
    ##ngen_per_flav_by_run = {run:FlavIntData() for run in runs}
    #eint_per_flav_by_run = {run:FlavIntData() for run in runs}
    #for run in runs:
    #    flavints_in_run = run_settings.get_flavints(run=run)
    #    e_range = run_settings.get_energy_range(run)
    #    gamma = run_settings.get_spectral_index(run)
    #    for flavint in flavints_in_run:
    #        runs_by_flavint[flavint].append(run)
    #        ngen_flav = run_settings.get_num_gen(
    #            run=run, flav_or_flavint=flavint, include_physical_fract=True
    #        )
    #        #runs_by_flavint[flavint].append(run)
    #        #this_flav = flavint.
    #        #xsec_fract_en_wtd_avg[run][flavint] = \
    #        ngen_flavint_by_run[run][flavint] = \
    #                xsec.get_xs_ratio_integral(
    #                    flavintgrp0=flavint,
    #                    flavintgrp1=flavint.flav,
    #                    e_range=e_range,
    #                    gamma=gamma,
    #                    average=True
    #                )
    #    xsec_ver = run_settings.get_xsec_version(run=run)
    #    if xsec_ver_ref is None:
    #        xsec_ver_ref = xsec_ver
    #    # An assumption of below logic is that all MC is generated using the
    #    # same cross sections version.
    #    #
    #    # TODO / NOTE:
    #    # It would be possible to combine runs with different cross sections so
    #    # long as each (flavor, interaction type) cross sections are
    #    # weighted-averaged together using weights
    #    #   N_gen_{n,flav+inttype} * E_x^{-gamma_n} /
    #    #       ( \int_{E_min_n}^{E_max_n} E^{-\gamma_n} dE )
    #    # where E_x are the energy sample points specified in the cross
    #    # sections (and hence these must also be identical across all cross
    #    # sections that get combined, unless interpolation is performed).
    #    assert xsec_ver == xsec_ver_ref
    #    #ngen_weighted_energy_integral[str(run)] = powerLawIntegral(
    #    #flavs_by_run[run] = run_settings.flavs(run)
    ##flavs_present =

    detector_geom = run_settings[runs[0]]['geom']

    # Create Events object to store data
    evts = Events()
    evts.metadata.update({
        'detector': run_settings.detector,
        'proc_ver': data_proc_params.proc_ver,
        'geom': detector_geom,
        'runs': runs,
    })

    cuts = []
    if isinstance(cust_cuts, dict):
        cust_cuts = [cust_cuts]
    if cut is not None:
        evts.metadata['cuts'].append(cut)
        cuts.append(cut)
    if cust_cuts is not None:
        for ccut in cust_cuts:
            evts.metadata['cuts'].append('custom: ' + ccut['pass_if'])
            cuts.append(ccut)

    orig_outdir = outdir
    outdir = expand(outdir)
    logging.info('Output dir spec\'d: %s', orig_outdir)
    if outdir != orig_outdir:
        logging.info('Output dir expands to: %s', outdir)
    mkdir(outdir)

    detector_label = str(data_proc_params.detector)
    proc_label = 'proc_' + str(data_proc_params.proc_ver)

    # What flavints to group together
    if join is None or join == '':
        grouped = []
        ungrouped = [NuFlavIntGroup(k) for k in ALL_NUFLAVINTS]
        groups_label = 'unjoined'
        logging.info('Events in the following groups will be joined together:'
                     ' (none)')
    else:
        grouped, ungrouped = xlateGroupsStr(join)
        evts.metadata['flavints_joined'] = [str(g) for g in grouped]
        groups_label = 'joined_G_' + '_G_'.join([str(g) for g in grouped])
        logging.info(
            'Events in the following groups will be joined together: ' +
            '; '.join([str(g) for g in grouped]))

    # Find any flavints not included in the above groupings
    flavint_groupings = grouped + ungrouped
    if len(ungrouped) == 0:
        ungrouped = ['(none)']
    logging.info('Events of the following flavints will NOT be joined'
                 'together: ' + '; '.join([str(k) for k in ungrouped]))

    # Enforce that flavints composing groups are mutually exclusive
    for grp_n, flavintgrp0 in enumerate(flavint_groupings[:-1]):
        for flavintgrp1 in flavint_groupings[grp_n + 1:]:
            assert len(set(flavintgrp0).intersection(set(flavintgrp1))) == 0

    flavintgrp_names = [str(flavintgrp) for flavintgrp in flavint_groupings]

    # Instantiate storage for all intermediate destination fields;
    # The data structure looks like:
    #   extracted_data[group #][interaction type][field name] = list of data
    if extract_fields is None:
        extracted_data = [{inttype: {}
                           for inttype in ALL_NUINT_TYPES}
                          for _ in flavintgrp_names]
    else:
        extracted_data = [{
            inttype: {field: []
                      for field in extract_fields}
            for inttype in ALL_NUINT_TYPES
        } for _ in flavintgrp_names]

    # Instantiate generated-event counts for destination fields; count
    # CClseparately from NC because aeff's for CC & NC add, whereas
    # aeffs intra-CC should be weighted-averaged (as for intra-NC)
    ngen = [{inttype: {}
             for inttype in ALL_NUINT_TYPES} for _ in flavintgrp_names]

    # Loop through all of the files, retrieving the events, filtering,
    # and recording the number of generated events pertinent to
    # calculating aeff
    filecount = {}
    detector_geom = None
    bad_files = []
    for run, fnames in data_files.items():
        file_count = 0
        for fname in fnames:
            # Retrieve data from all nodes specified in the processing
            # settings file
            logging.trace('Trying to get data from file %s', fname)
            try:
                data = data_proc_params.get_data(fname,
                                                 run_settings=run_settings)
            except (ValueError, KeyError, IOError):
                logging.warning('Bad file encountered: %s', fname)
                bad_files.append(fname)
                continue

            file_count += 1

            # Check to make sure only one run is present in the data
            runs_in_data = set(data['run'])
            assert len(runs_in_data) == 1, 'Must be just one run in data'

            #run = int(data['run'][0])
            if not run in filecount:
                filecount[run] = 0
            filecount[run] += 1
            rs_run = run_settings[run]

            # Record geom; check that geom is consistent with other runs
            if detector_geom is None:
                detector_geom = rs_run['geom']
            assert rs_run['geom'] == detector_geom, \
                    'All runs\' geometries must match!'

            # Loop through all flavints spec'd for run
            for run_flavint in rs_run['flavints']:
                barnobar = run_flavint.bar_code
                int_type = run_flavint.intType

                # Retrieve this-interaction-type- & this-barnobar-only events
                # that also pass cuts. (note that cut names are strings)
                intonly_cut_data = data_proc_params.apply_cuts(
                    data,
                    cuts=cuts + [str(int_type), str(barnobar)],
                    return_fields=extract_fields)

                # Record the generated count and data for this run/flavor for
                # each group to which it's applicable
                for grp_n, flavint_group in enumerate(flavint_groupings):
                    if not run_flavint in flavint_group:
                        continue

                    # Instantiate a field for particles and antiparticles,
                    # keyed by the output of the bar_code property for each
                    if not run in ngen[grp_n][int_type]:
                        ngen[grp_n][int_type][run] = {
                            NuFlav(12).bar_code: 0,
                            NuFlav(-12).bar_code: 0,
                        }

                    # Record count only if it hasn't already been recorded
                    if ngen[grp_n][int_type][run][barnobar] == 0:
                        # Note that one_weight includes cc/nc:total fraction,
                        # so DO NOT specify the full flavint here, only flav
                        # (since one_weight does NOT take bar/nobar fraction,
                        # it must be included here in the ngen computation)
                        flav_ngen = run_settings.get_num_gen(run=run,
                                                             barnobar=barnobar)
                        ngen[grp_n][int_type][run][barnobar] = flav_ngen

                    # Append the data. Note that extracted_data is:
                    # extracted_data[group n][int_type][extract field name] =
                    #   list
                    if extract_fields is None:
                        for f in intonly_cut_data.keys():
                            if f not in extracted_data[grp_n][int_type]:
                                extracted_data[grp_n][int_type][f] = []
                            extracted_data[grp_n][int_type][f].extend(
                                intonly_cut_data[f])
                    else:
                        for f in extract_fields:
                            extracted_data[grp_n][int_type][f].extend(
                                intonly_cut_data[f])
        logging.info('File count for run %s: %d', run, file_count)
    to_file(bad_files, '/tmp/bad_files.json')

    if ((output_fields is None and
         (extract_fields is None or 'one_weight' in extract_fields))
            or 'weighted_aeff' in output_fields):
        fmtfields = (' ' * 12 + 'flavint_group', 'int type', '     run',
                     'part/anti', 'part/anti count', 'aggregate count')
        fmt_n = [len(f) for f in fmtfields]
        fmt = '  '.join([r'%' + str(n) + r's' for n in fmt_n])
        lines = '  '.join(['-' * n for n in fmt_n])
        logging.info(fmt, fmtfields)
        logging.info(lines)
        for grp_n, flavint_group in enumerate(flavint_groupings):
            for int_type in set([fi.intType for fi in flavint_group.flavints]):
                ngen_it_tot = 0
                for run, run_counts in ngen[grp_n][int_type].items():
                    for barnobar, barnobar_counts in run_counts.items():
                        ngen_it_tot += barnobar_counts
                        logging.info(fmt, flavint_group.simple_str(), int_type,
                                     str(run), barnobar, int(barnobar_counts),
                                     int(ngen_it_tot))
                # Convert data to numpy array
                if extract_fields is None:
                    for field in extracted_data[grp_n][int_type].keys():
                        extracted_data[grp_n][int_type][field] = \
                                np.array(extracted_data[grp_n][int_type][field])
                else:
                    for field in extract_fields:
                        extracted_data[grp_n][int_type][field] = \
                                np.array(extracted_data[grp_n][int_type][field])
                # Generate weighted_aeff field for this group / int type's data
                extracted_data[grp_n][int_type]['weighted_aeff'] = \
                        extracted_data[grp_n][int_type]['one_weight'] \
                        / ngen_it_tot * CMSQ_TO_MSQ

    # Report file count per run
    for run, count in filecount.items():
        logging.info('Files read, run %s: %d', run, count)
        ref_num_i3_files = run_settings[run]['num_i3_files']
        if count != ref_num_i3_files:
            logging.warning(
                'Run %s, Number of files read (%d) != number of '
                'source I3 files (%d), which may indicate an error.', run,
                count, ref_num_i3_files)

    # Generate output data
    for flavint in ALL_NUFLAVINTS:
        int_type = flavint.intType
        for grp_n, flavint_group in enumerate(flavint_groupings):
            if not flavint in flavint_group:
                logging.trace('flavint %s not in flavint_group %s, passing.',
                              flavint, flavint_group)
                continue
            else:
                logging.trace(
                    'flavint %s **IS** in flavint_group %s, storing.', flavint,
                    flavint_group)
            if output_fields is None:
                evts[flavint] = extracted_data[grp_n][int_type]
            else:
                evts[flavint] = {
                    f: extracted_data[grp_n][int_type][f]
                    for f in output_fields
                }

    # Generate file name
    numerical_runs = []
    alphanumerical_runs = []
    for run in runs:
        try:
            int(run)
            numerical_runs.append(int(run))
        except ValueError:
            alphanumerical_runs.append(str(run))
    run_labels = []
    if len(numerical_runs) > 0:
        run_labels.append(list2hrlist(numerical_runs))
    if len(alphanumerical_runs) > 0:
        run_labels += sorted(alphanumerical_runs)
    run_label = 'runs_' + ','.join(run_labels)
    geom_label = '' + detector_geom
    fname = 'events__' + '__'.join([
        detector_label,
        geom_label,
        run_label,
        proc_label,
        groups_label,
    ]) + '.hdf5'

    outfpath = os.path.join(outdir, fname)
    logging.info('Writing events to %s', outfpath)

    # Save data to output file
    evts.save(outfpath)