Python expand Examples, pisa.utils.fileio.expand Python Examples

Example #1

0

Show file

def main(return_outputs=False):
    """Main; call as script with `return_outputs=False` or interactively with
    `return_outputs=True`"""
    from pisa.utils.plotter import Plotter
    args = parse_args()
    set_verbosity(args.v)
    plot_formats = []
    if args.pdf:
        plot_formats.append('pdf')
    if args.png:
        plot_formats.append('png')

    distribution_maker = DistributionMaker(pipelines=args.pipeline)  # pylint: disable=redefined-outer-name
    if args.select is not None:
        distribution_maker.select_params(args.select)

    outputs = distribution_maker.get_outputs(return_sum=args.return_sum)  # pylint: disable=redefined-outer-name
    if args.outdir:
        # TODO: unique filename: append hash (or hash per pipeline config)
        fname = 'distribution_maker_outputs.json.bz2'
        mkdir(args.outdir)
        fpath = expand(os.path.join(args.outdir, fname))
        to_file(outputs, fpath)

    if args.outdir and plot_formats:
        my_plotter = Plotter(outdir=args.outdir,
                             fmt=plot_formats,
                             log=False,
                             annotate=False)
        for num, output in enumerate(outputs):
            my_plotter.plot_2d_array(output, fname='dist_output_%d' % num)

    if return_outputs:
        return distribution_maker, outputs

Example #2

0

Show file

File: detectors.py Project: terliuk/pisa

def main(return_outputs=False):
    """Main; call as script with `return_outputs=False` or interactively with
    `return_outputs=True`"""
    from pisa.utils.plotter import Plotter
    args = parse_args()
    set_verbosity(args.v)
    plot_formats = []
    if args.pdf:
        plot_formats.append('pdf')
    if args.png:
        plot_formats.append('png')
        
    detectors = Detectors(args.pipeline,shared_params=args.shared_params)
    Names = detectors.det_names
    if args.select is not None:
        detectors.select_params(args.select)

    outputs = detectors.get_outputs(return_sum=args.return_sum)

    #outputs = outputs[0].fluctuate(
     #               method='poisson', random_state=get_random_state([0, 0, 0]))

    if args.outdir:
        # TODO: unique filename: append hash (or hash per pipeline config)
        fname = 'detectors_outputs.json.bz2'
        mkdir(args.outdir)
        fpath = expand(os.path.join(args.outdir, fname))
        to_file(outputs, fpath)

    if args.outdir and plot_formats:
        my_plotter = Plotter(
            outdir=args.outdir,
            fmt=plot_formats, log=False,
            annotate=False
        )
        for num, output in enumerate(outputs):
            if args.return_sum:
                my_plotter.plot_2d_array(
                    output,
                    fname=Names[num]
                )
            else:
                for out in output:
                    my_plotter.plot_2d_array(
                        out,
                        fname=Names[num]
                    )

    if return_outputs:
        return detectors, outputs

Example #3

0

Show file

def stability_test(func, func_kw, ref_path, ignore_fails=False, define_as_ref=False):
    """basic stability test of a Numba CPUDispatcher function (i.e., function
    compiled via @jit / @njit)"""
    func_name = func.py_func.__name__
    logging.info("stability testing `%s`", func_name)
    ref_path = expand(ref_path)

    test = execute_func(func=func, func_kw=func_kw)

    if define_as_ref:
        to_file(test, ref_path)

    # Even when we define the test case as ref, round-trip to/from file to
    # ensure that doesn't corrupt the values
    ref = from_file(ref_path)

    check(test=test, ref=ref, label=func_name, ignore_fails=ignore_fails)

    return test, ref

Example #4

0

Show file

File: run_unit_tests.py Project: LeanderFischer/pisa

def find_unit_tests(path):
    """Find .py file(s) and any tests to run within them, starting at `path`
    (which can be a single file or a directory, which is recursively searched
    for .py files)

    Parameters
    ----------
    path : str
        Path to a file or directory

    Returns
    -------
    tests : dict
        Each key is the path to the .py file relative to PISA_PATH and each
        value is a list of the "test_*" function names within that file (empty
        if no such functions are found)

    """
    path = expand(path, absolute=True, resolve_symlinks=True)

    tests = {}
    if isfile(path):
        filerelpath = relpath(path, start=PISA_PATH)
        tests[filerelpath] = find_unit_tests_in_file(path)
        return tests

    for dirpath, dirs, files in walk(path, followlinks=True):
        files.sort(key=nsort_key_func)
        dirs.sort(key=nsort_key_func)

        for filename in files:
            if not filename.endswith(".py"):
                continue
            filepath = join(dirpath, filename)
            filerelpath = relpath(filepath, start=PISA_PATH)
            tests[filerelpath] = find_unit_tests_in_file(filepath)

    return tests

Example #5

0

Show file

File: run_unit_tests.py Project: LeanderFischer/pisa

def find_unit_tests_in_file(filepath):
    """Find test functions defined by "def test_*" within a file at `filepath`

    Parameters
    ----------
    filepath : str
         Path to python file

    Returns
    -------
    tests : list of str

    """
    filepath = expand(filepath, absolute=True, resolve_symlinks=True)
    assert isfile(filepath), str(filepath)
    tests = []
    with open(filepath, "r") as f:
        for line in f.readlines():
            tokens = line.split()
            if tokens and tokens[0] == "def" and tokens[1].startswith("test_"):
                funcname = tokens[1].split("(")[0].strip()
                tests.append(funcname)
    return tests

Example #6

0

Show file

def test_CrossSections(outdir=None):
    """Unit tests for CrossSections class"""
    from shutil import rmtree
    from tempfile import mkdtemp

    remove_dir = False
    if outdir is None:
        remove_dir = True
        outdir = mkdtemp()

    try:
        # "Standard" location of cross sections file in PISA; retrieve 2.6.4 for
        # testing purposes
        pisa_xs_file = 'cross_sections/cross_sections.json'
        xs = CrossSections(ver='genie_2.6.4', xsec=pisa_xs_file)

        # Location of the root file to use (not included in PISA at the moment)
        test_dir = expand(os.path.join('/tmp', 'pisa_tests', 'cross_sections'))
        #root_xs_file = os.path.join(test_dir, 'genie_2.6.4_simplified.root')
        root_xs_file = find_resource(os.path.join(
            #'tests', 'data', 'xsec', 'genie_2.6.4_simplified.root'
            'cross_sections', 'genie_xsec_H2O.root'
        ))

        # Make sure that the XS newly-imported from ROOT match those stored in
        # PISA
        if os.path.isfile(root_xs_file):
            xs_from_root = CrossSections.new_from_root(root_xs_file,
                                                       ver='genie_2.6.4')
            logging.info('Found and loaded ROOT source cross sections file %s',
                         root_xs_file)
            #assert xs_from_root.allclose(xs, rtol=1e-7)

        # Check XS ratio for numu_cc to numu_cc + numu_nc (user must inspect)
        kg0 = NuFlavIntGroup('numu_cc')
        kg1 = NuFlavIntGroup('numu_nc')
        logging.info(
            r'\int_1^80 xs(numu_cc) E^{-1} dE = %e',
            xs.get_xs_ratio_integral(kg0, None, e_range=[1, 80], gamma=1)
        )
        logging.info(
            '(int E^{-gamma} * (sigma_numu_cc)/int(sigma_(numu_cc+numu_nc)) dE)'
            ' / (int E^{-gamma} dE) = %e',
            xs.get_xs_ratio_integral(kg0, kg0+kg1, e_range=[1, 80], gamma=1,
                                     average=True)
        )
        # Check that XS ratio for numu_cc+numu_nc to the same is 1.0
        int_val = xs.get_xs_ratio_integral(kg0+kg1, kg0+kg1, e_range=[1, 80],
                                           gamma=1, average=True)
        if not recursiveEquality(int_val, 1):
            raise ValueError('Integral of nc + cc should be 1.0; get %e'
                             ' instead.' % int_val)

        # Check via plot that the

        # Plot all cross sections stored in PISA xs file
        try:
            alldata = from_file(pisa_xs_file)
            xs_versions = alldata.keys()
            for ver in xs_versions:
                xs = CrossSections(ver=ver, xsec=pisa_xs_file)
                xs.plot(save=os.path.join(
                    outdir, 'pisa_' + ver + '_nuxCCNC_H2O_cross_sections.pdf'
                ))
        except ImportError as exc:
            logging.debug('Could not plot; possible that matplotlib not'
                          'installed. ImportError: %s', exc)

    finally:
        if remove_dir:
            rmtree(outdir)

Example #7

0

Show file

File: run_unit_tests.py Project: LeanderFischer/pisa

def run_unit_tests(path=PISA_PATH,
                   allow_missing=OPTIONAL_MODULES,
                   verbosity=Levels.WARN):
    """Run all tests found at `path` (or recursively below if `path` is a
    directory).

    Each module is imported and each test function is run initially with
    `set_verbosity(verbosity)`, but if an exception is caught, the module is
    re-imported or the test function is re-run with
    `set_verbosity(Levels.TRACE)`, then the traceback from the (original)
    exception emitted is displayed.

    Parameters
    ----------
    path : str
        Path to file or directory

    allow_missing : None or sequence of str

    verbosity : int in pisa.utils.log.Levels

    Raises
    ------
    Exception
        If any import or test fails not in `allow_missing`

    """
    set_verbosity(verbosity)
    logging.info("%sPlatform information:", PFX)
    logging.info("%s  HOSTNAME = %s", PFX, socket.gethostname())
    logging.info("%s  FQDN = %s", PFX, socket.getfqdn())
    logging.info("%s  OS = %s %s", PFX, platform.system(), platform.release())
    for key, val in cpuinfo.get_cpu_info().items():
        logging.info("%s  %s = %s", PFX, key, val)
    logging.info(PFX)
    logging.info("%sModule versions:", PFX)
    for module_name in REQUIRED_MODULES + OPTIONAL_MODULES:
        try:
            module = import_module(module_name)
        except ImportError:
            if module_name in REQUIRED_MODULES:
                raise
            ver = "optional module not installed or not import-able"
        else:
            if hasattr(module, "__version__"):
                ver = module.__version__
            else:
                ver = "?"
        logging.info("%s  %s : %s", PFX, module_name, ver)
    logging.info(PFX)

    path = expand(path, absolute=True, resolve_symlinks=True)
    if allow_missing is None:
        allow_missing = []
    elif isinstance(allow_missing, str):
        allow_missing = [allow_missing]

    tests = find_unit_tests(path)

    module_pypaths_succeeded = []
    module_pypaths_failed = []
    module_pypaths_failed_ignored = []
    test_pypaths_succeeded = []
    test_pypaths_failed = []
    test_pypaths_failed_ignored = []

    for rel_file_path, test_func_names in tests.items():
        pypath = ["pisa"] + rel_file_path[:-3].split("/")
        parent_pypath = ".".join(pypath[:-1])
        module_name = pypath[-1].replace(".", "_")
        module_pypath = f"{parent_pypath}.{module_name}"

        try:
            set_verbosity(verbosity)
            logging.info(PFX + f"importing {module_pypath}")

            set_verbosity(Levels.WARN)
            module = import_module(module_pypath, package=parent_pypath)

        except Exception as err:
            if (isinstance(err, ImportError) and hasattr(err, "name")
                    and err.name in allow_missing  # pylint: disable=no-member
                ):
                err_name = err.name  # pylint: disable=no-member
                module_pypaths_failed_ignored.append(module_pypath)
                logging.warning(
                    f"{PFX}module {err_name} failed to import wile importing"
                    f" {module_pypath}, but ok to ignore")
                continue

            module_pypaths_failed.append(module_pypath)

            set_verbosity(verbosity)
            msg = f"<< FAILURE IMPORTING : {module_pypath} >>"
            logging.error(PFX + "=" * len(msg))
            logging.error(PFX + msg)
            logging.error(PFX + "=" * len(msg))

            # Reproduce the failure with full output
            set_verbosity(Levels.TRACE)
            try:
                import_module(module_name, package=parent_pypath)
            except Exception:
                pass

            set_verbosity(Levels.TRACE)
            logging.exception(err)

            set_verbosity(verbosity)
            logging.error(PFX + "#" * len(msg))

            continue

        else:
            module_pypaths_succeeded.append(module_pypath)

        for test_func_name in test_func_names:
            test_pypath = f"{module_pypath}.{test_func_name}"
            try:
                set_verbosity(verbosity)
                logging.debug(PFX + f"getattr({module}, {test_func_name})")

                set_verbosity(Levels.WARN)
                test_func = getattr(module, test_func_name)

                # Run the test function
                set_verbosity(verbosity)
                logging.info(PFX + f"{test_pypath}()")

                set_verbosity(Levels.WARN)
                test_func()

            except Exception as err:
                if (isinstance(err, ImportError) and hasattr(err, "name")
                        and err.name in allow_missing  # pylint: disable=no-member
                    ):
                    err_name = err.name  # pylint: disable=no-member
                    test_pypaths_failed_ignored.append(module_pypath)
                    logging.warning(
                        PFX +
                        f"{test_pypath} failed because module {err_name} failed to"
                        + f" load, but ok to ignore")

                    continue

                test_pypaths_failed.append(test_pypath)
                set_verbosity(verbosity)
                msg = f"<< FAILURE RUNNING : {test_pypath} >>"
                logging.error(PFX + "=" * len(msg))
                logging.error(PFX + msg)
                logging.error(PFX + "=" * len(msg))

                # Reproduce the error with full output

                set_verbosity(Levels.TRACE)
                try:
                    test_func = getattr(module, test_func_name)
                    with np.printoptions(
                            precision=np.finfo(pisa.FTYPE).precision + 2,
                            floatmode="fixed",
                            sign=" ",
                            linewidth=200,
                    ):
                        test_func()
                except Exception:
                    pass

                set_verbosity(Levels.TRACE)
                logging.exception(err)

                set_verbosity(verbosity)
                logging.error(PFX + "#" * len(msg))

            else:
                test_pypaths_succeeded.append(test_pypath)

            finally:
                # remove references to the test function, e.g. to remove refs
                # to pycuda / numba.cuda contexts so these can be closed
                try:
                    del test_func
                except NameError:
                    pass

        # NOTE: Until we get all GPU code into Numba, need to unload pycuda
        # and/or numba.cuda contexts before a module requiring the other one is
        # to be imported.
        # NOTE: the following causes a traceback to be emitted at the very end
        # of the script, regardless of the exception catching here.
        if (pisa.TARGET == "cuda" and pycuda is not None
                and hasattr(pycuda, "autoinit")
                and hasattr(pycuda.autoinit, "context")):
            try:
                pycuda.autoinit.context.detach()
            except Exception:
                pass

        # Attempt to unload the imported module
        # TODO: pipeline, etc. fail as isinstance(service, (Stage, PiStage)) is False
        #if module_pypath in sys.modules and module_pypath != "pisa":
        #    del sys.modules[module_pypath]
        #del module

        # TODO: crashes program; subseqeunt calls in same shell crash(!?!?)
        # if pisa.TARGET == 'cuda' and nbcuda is not None:
        #    try:
        #        nbcuda.close()
        #    except Exception:
        #        pass

    # Summarize results

    n_import_successes = len(module_pypaths_succeeded)
    n_import_failures = len(module_pypaths_failed)
    n_import_failures_ignored = len(module_pypaths_failed_ignored)
    n_test_successes = len(test_pypaths_succeeded)
    n_test_failures = len(test_pypaths_failed)
    n_test_failures_ignored = len(test_pypaths_failed_ignored)

    set_verbosity(verbosity)
    logging.info(
        PFX + f"<< IMPORT TESTS : {n_import_successes} imported,"
        f" {n_import_failures} failed,"
        f" {n_import_failures_ignored} failed to import but ok to ignore >>")
    logging.info(PFX + f"<< UNIT TESTS : {n_test_successes} succeeded,"
                 f" {n_test_failures} failed,"
                 f" {n_test_failures_ignored} failed but ok to ignore >>")

    # Exit with error if any failures (import or unit test)

    if module_pypaths_failed or test_pypaths_failed:
        msgs = []
        if module_pypaths_failed:
            msgs.append(
                f"{n_import_failures} module(s) failed to import:\n  " +
                ", ".join(module_pypaths_failed))
        if test_pypaths_failed:
            msgs.append(f"{n_test_failures} unit test(s) failed:\n  " +
                        ", ".join(test_pypaths_failed))

        # Note the extra newlines before the exception to make it stand out;
        # and newlines after the exception are due to the pycuda error message
        # that is emitted when we call pycuda.autoinit.context.detach()
        sys.stdout.flush()
        sys.stderr.write("\n\n\n")
        raise Exception("\n".join(msgs) + "\n\n\n")

Example #8

0

Show file

File: run_unit_tests.py Project: LeanderFischer/pisa

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License."""

# TODO: add timing for imports & unit test; faster => more used, more useful

PISA_PATH = expand(dirname(pisa.__file__),
                   absolute=True,
                   resolve_symlinks=True)

# TODO: get optional & required automatically (e.g., from setup.py?)
OPTIONAL_MODULES = (
    "pandas",
    "emcee",
    "pycuda",
    "pycuda.driver",
    "ROOT",
    "libPyROOT",
    "MCEq",
    "nuSQUIDSpy",
)
"""Okay if imports or test_* functions fail due to these not being import-able"""

Example #9

0

Show file

File: make_events_file.py Project: thehrh/pisa-1

def makeEventsFile(data_files,
                   detector,
                   proc_ver,
                   cut,
                   outdir,
                   run_settings=None,
                   data_proc_params=None,
                   join=None,
                   cust_cuts=None,
                   extract_fields=EXTRACT_FIELDS,
                   output_fields=OUTPUT_FIELDS):
    r"""Take the simulated and reconstructed HDF5 file(s) (as converted from I3
    by icecube.hdfwriter.I3HDFTableService) as input and write out a simplified
    PISA-standard-format HDF5 file for use in aeff, reco, and/or PID stages.

    Parameters
    ----------
    data_files : dict
        File paths for finding data files for each run, formatted as:
            {
                <string run>: <list of file paths>,
                <string run>: <list of file paths>,
                ...
                <string run>: <list of file paths>,
            }

    detector : string
        Name of the detector (e.g. IceCube, DeepCore, PINGU, etc.) as found in
        e.g. mc_sim_run_settings.json and data_proc_params.json files.

    proc_ver
        Version of processing applied to the events, as found in e.g.
        data_proc_params.json.

    cut
        Name of a standard cut to use; must be specified in the relevant
        detector/processing version node of the data processing parameters
        (file from which the data_proc_params object was instantiated)

    outdir
        Directory path in which to store resulting files; will be generated if
        it does not already exist (including any parent directories that do not
        exist)

    run_settings : string or MCSimRunSettings
        Resource location of mc_sim_run_settings.json or an MCSimRunSettings
        object instantiated therefrom.

    data_proc_params : string or DataProcParams
        Resource location of data_proc_params.json or a DataProcParams object
        instantiated therefrom.

    join
        String specifying any flavor/interaction types (flavInts) to join
        together. Separate flavInts with commas (',') and separate groups
        with semicolons (';'). E.g. an acceptable string is:
            'numucc+numubarcc; nuall bar NC, nuall NC'

    cust_cuts
        dict with a single DataProcParams cut specification or list of same
        (see help for DataProcParams for detailed description of cut spec)

    extract_fields : None or iterable of strings
        Field names to extract from source HDF5 file. If None, extract all
        fields.

    output_fields : None or iterable of strings
        Fields to include in the generated PISA-standard-format events HDF5
        file; note that if 'weighted_aeff' is not preent, effective area will
        not be computed. If None, all fields will be written.

    Notes
    -----
    Compute "weighted_aeff" field:

    Within each int type (CC or NC), ngen should be added together;
    events recorded of that int type then get their one_weight divided by the
    total *for that int type only* to obtain the "weighted_aeff" for that
    event (even if int types are being grouped/joined together).

    This has the effect that within a group, ...
      ... and within an interaction type, effective area is a weighted
      average of that of the flavors being combined. E.g. for CC,

                     \sum_{run x}\sum_{flav y} (Aeff_{x,y} * ngen_{x,y})
          Aeff_CC = ----------------------------------------------------- ,
                          \sum_{run x}\sum_{flav y} (ngen_{x,y})

      ... and then across interaction types, the results of the above for
      each int type need to be summed together, i.e.:

          Aeff_total = Aeff_CC + Aeff_NC

    Note that each grouping of flavors is calculated with the above math
    completely independently from other flavor groupings specified.

    See Justin Lanfranchi's presentation on the PINGU Analysis call,
    2015-10-21, for more details:
      https://wikispaces.psu.edu/download/attachments/282040606/meff_report_jllanfranchi_v05_2015-10-21.pdf

    """
    if isinstance(run_settings, str):
        run_settings = DetMCSimRunsSettings(find_resource(run_settings),
                                            detector=detector)
    assert isinstance(run_settings, DetMCSimRunsSettings)
    assert run_settings.detector == detector

    if isinstance(data_proc_params, str):
        data_proc_params = DataProcParams(
            detector=detector,
            proc_ver=proc_ver,
            data_proc_params=find_resource(data_proc_params))
    assert data_proc_params.detector == detector
    assert data_proc_params.proc_ver == proc_ver

    runs = sorted(data_files.keys())

    all_flavs = []
    flavs_by_run = {}
    run_norm_factors = {}
    bin_edges = set()

    runs_by_flavint = FlavIntData()
    for flavint in runs_by_flavint.flavints:
        runs_by_flavint[flavint] = []

    #ngen_flavint_by_run = {run:FlavIntData() for run in runs}
    ##ngen_per_flav_by_run = {run:FlavIntData() for run in runs}
    #eint_per_flav_by_run = {run:FlavIntData() for run in runs}
    #for run in runs:
    #    flavints_in_run = run_settings.get_flavints(run=run)
    #    e_range = run_settings.get_energy_range(run)
    #    gamma = run_settings.get_spectral_index(run)
    #    for flavint in flavints_in_run:
    #        runs_by_flavint[flavint].append(run)
    #        ngen_flav = run_settings.get_num_gen(
    #            run=run, flav_or_flavint=flavint, include_physical_fract=True
    #        )
    #        #runs_by_flavint[flavint].append(run)
    #        #this_flav = flavint.
    #        #xsec_fract_en_wtd_avg[run][flavint] = \
    #        ngen_flavint_by_run[run][flavint] = \
    #                xsec.get_xs_ratio_integral(
    #                    flavintgrp0=flavint,
    #                    flavintgrp1=flavint.flav,
    #                    e_range=e_range,
    #                    gamma=gamma,
    #                    average=True
    #                )
    #    xsec_ver = run_settings.get_xsec_version(run=run)
    #    if xsec_ver_ref is None:
    #        xsec_ver_ref = xsec_ver
    #    # An assumption of below logic is that all MC is generated using the
    #    # same cross sections version.
    #    #
    #    # TODO / NOTE:
    #    # It would be possible to combine runs with different cross sections so
    #    # long as each (flavor, interaction type) cross sections are
    #    # weighted-averaged together using weights
    #    #   N_gen_{n,flav+inttype} * E_x^{-gamma_n} /
    #    #       ( \int_{E_min_n}^{E_max_n} E^{-\gamma_n} dE )
    #    # where E_x are the energy sample points specified in the cross
    #    # sections (and hence these must also be identical across all cross
    #    # sections that get combined, unless interpolation is performed).
    #    assert xsec_ver == xsec_ver_ref
    #    #ngen_weighted_energy_integral[str(run)] = powerLawIntegral(
    #    #flavs_by_run[run] = run_settings.flavs(run)
    ##flavs_present =

    detector_geom = run_settings[runs[0]]['geom']

    # Create Events object to store data
    evts = Events()
    evts.metadata.update({
        'detector': run_settings.detector,
        'proc_ver': data_proc_params.proc_ver,
        'geom': detector_geom,
        'runs': runs,
    })

    cuts = []
    if isinstance(cust_cuts, dict):
        cust_cuts = [cust_cuts]
    if cut is not None:
        evts.metadata['cuts'].append(cut)
        cuts.append(cut)
    if cust_cuts is not None:
        for ccut in cust_cuts:
            evts.metadata['cuts'].append('custom: ' + ccut['pass_if'])
            cuts.append(ccut)

    orig_outdir = outdir
    outdir = expand(outdir)
    logging.info('Output dir spec\'d: %s', orig_outdir)
    if outdir != orig_outdir:
        logging.info('Output dir expands to: %s', outdir)
    mkdir(outdir)

    detector_label = str(data_proc_params.detector)
    proc_label = 'proc_' + str(data_proc_params.proc_ver)

    # What flavints to group together
    if join is None or join == '':
        grouped = []
        ungrouped = [NuFlavIntGroup(k) for k in ALL_NUFLAVINTS]
        groups_label = 'unjoined'
        logging.info('Events in the following groups will be joined together:'
                     ' (none)')
    else:
        grouped, ungrouped = xlateGroupsStr(join)
        evts.metadata['flavints_joined'] = [str(g) for g in grouped]
        groups_label = 'joined_G_' + '_G_'.join([str(g) for g in grouped])
        logging.info(
            'Events in the following groups will be joined together: ' +
            '; '.join([str(g) for g in grouped]))

    # Find any flavints not included in the above groupings
    flavint_groupings = grouped + ungrouped
    if len(ungrouped) == 0:
        ungrouped = ['(none)']
    logging.info('Events of the following flavints will NOT be joined'
                 'together: ' + '; '.join([str(k) for k in ungrouped]))

    # Enforce that flavints composing groups are mutually exclusive
    for grp_n, flavintgrp0 in enumerate(flavint_groupings[:-1]):
        for flavintgrp1 in flavint_groupings[grp_n + 1:]:
            assert len(set(flavintgrp0).intersection(set(flavintgrp1))) == 0

    flavintgrp_names = [str(flavintgrp) for flavintgrp in flavint_groupings]

    # Instantiate storage for all intermediate destination fields;
    # The data structure looks like:
    #   extracted_data[group #][interaction type][field name] = list of data
    if extract_fields is None:
        extracted_data = [{inttype: {}
                           for inttype in ALL_NUINT_TYPES}
                          for _ in flavintgrp_names]
    else:
        extracted_data = [{
            inttype: {field: []
                      for field in extract_fields}
            for inttype in ALL_NUINT_TYPES
        } for _ in flavintgrp_names]

    # Instantiate generated-event counts for destination fields; count
    # CClseparately from NC because aeff's for CC & NC add, whereas
    # aeffs intra-CC should be weighted-averaged (as for intra-NC)
    ngen = [{inttype: {}
             for inttype in ALL_NUINT_TYPES} for _ in flavintgrp_names]

    # Loop through all of the files, retrieving the events, filtering,
    # and recording the number of generated events pertinent to
    # calculating aeff
    filecount = {}
    detector_geom = None
    bad_files = []
    for run, fnames in data_files.items():
        file_count = 0
        for fname in fnames:
            # Retrieve data from all nodes specified in the processing
            # settings file
            logging.trace('Trying to get data from file %s', fname)
            try:
                data = data_proc_params.get_data(fname,
                                                 run_settings=run_settings)
            except (ValueError, KeyError, IOError):
                logging.warning('Bad file encountered: %s', fname)
                bad_files.append(fname)
                continue

            file_count += 1

            # Check to make sure only one run is present in the data
            runs_in_data = set(data['run'])
            assert len(runs_in_data) == 1, 'Must be just one run in data'

            #run = int(data['run'][0])
            if not run in filecount:
                filecount[run] = 0
            filecount[run] += 1
            rs_run = run_settings[run]

            # Record geom; check that geom is consistent with other runs
            if detector_geom is None:
                detector_geom = rs_run['geom']
            assert rs_run['geom'] == detector_geom, \
                    'All runs\' geometries must match!'

            # Loop through all flavints spec'd for run
            for run_flavint in rs_run['flavints']:
                barnobar = run_flavint.bar_code
                int_type = run_flavint.intType

                # Retrieve this-interaction-type- & this-barnobar-only events
                # that also pass cuts. (note that cut names are strings)
                intonly_cut_data = data_proc_params.apply_cuts(
                    data,
                    cuts=cuts + [str(int_type), str(barnobar)],
                    return_fields=extract_fields)

                # Record the generated count and data for this run/flavor for
                # each group to which it's applicable
                for grp_n, flavint_group in enumerate(flavint_groupings):
                    if not run_flavint in flavint_group:
                        continue

                    # Instantiate a field for particles and antiparticles,
                    # keyed by the output of the bar_code property for each
                    if not run in ngen[grp_n][int_type]:
                        ngen[grp_n][int_type][run] = {
                            NuFlav(12).bar_code: 0,
                            NuFlav(-12).bar_code: 0,
                        }

                    # Record count only if it hasn't already been recorded
                    if ngen[grp_n][int_type][run][barnobar] == 0:
                        # Note that one_weight includes cc/nc:total fraction,
                        # so DO NOT specify the full flavint here, only flav
                        # (since one_weight does NOT take bar/nobar fraction,
                        # it must be included here in the ngen computation)
                        flav_ngen = run_settings.get_num_gen(run=run,
                                                             barnobar=barnobar)
                        ngen[grp_n][int_type][run][barnobar] = flav_ngen

                    # Append the data. Note that extracted_data is:
                    # extracted_data[group n][int_type][extract field name] =
                    #   list
                    if extract_fields is None:
                        for f in intonly_cut_data.keys():
                            if f not in extracted_data[grp_n][int_type]:
                                extracted_data[grp_n][int_type][f] = []
                            extracted_data[grp_n][int_type][f].extend(
                                intonly_cut_data[f])
                    else:
                        for f in extract_fields:
                            extracted_data[grp_n][int_type][f].extend(
                                intonly_cut_data[f])
        logging.info('File count for run %s: %d', run, file_count)
    to_file(bad_files, '/tmp/bad_files.json')

    if ((output_fields is None and
         (extract_fields is None or 'one_weight' in extract_fields))
            or 'weighted_aeff' in output_fields):
        fmtfields = (' ' * 12 + 'flavint_group', 'int type', '     run',
                     'part/anti', 'part/anti count', 'aggregate count')
        fmt_n = [len(f) for f in fmtfields]
        fmt = '  '.join([r'%' + str(n) + r's' for n in fmt_n])
        lines = '  '.join(['-' * n for n in fmt_n])
        logging.info(fmt, fmtfields)
        logging.info(lines)
        for grp_n, flavint_group in enumerate(flavint_groupings):
            for int_type in set([fi.intType for fi in flavint_group.flavints]):
                ngen_it_tot = 0
                for run, run_counts in ngen[grp_n][int_type].items():
                    for barnobar, barnobar_counts in run_counts.items():
                        ngen_it_tot += barnobar_counts
                        logging.info(fmt, flavint_group.simple_str(), int_type,
                                     str(run), barnobar, int(barnobar_counts),
                                     int(ngen_it_tot))
                # Convert data to numpy array
                if extract_fields is None:
                    for field in extracted_data[grp_n][int_type].keys():
                        extracted_data[grp_n][int_type][field] = \
                                np.array(extracted_data[grp_n][int_type][field])
                else:
                    for field in extract_fields:
                        extracted_data[grp_n][int_type][field] = \
                                np.array(extracted_data[grp_n][int_type][field])
                # Generate weighted_aeff field for this group / int type's data
                extracted_data[grp_n][int_type]['weighted_aeff'] = \
                        extracted_data[grp_n][int_type]['one_weight'] \
                        / ngen_it_tot * CMSQ_TO_MSQ

    # Report file count per run
    for run, count in filecount.items():
        logging.info('Files read, run %s: %d', run, count)
        ref_num_i3_files = run_settings[run]['num_i3_files']
        if count != ref_num_i3_files:
            logging.warning(
                'Run %s, Number of files read (%d) != number of '
                'source I3 files (%d), which may indicate an error.', run,
                count, ref_num_i3_files)

    # Generate output data
    for flavint in ALL_NUFLAVINTS:
        int_type = flavint.intType
        for grp_n, flavint_group in enumerate(flavint_groupings):
            if not flavint in flavint_group:
                logging.trace('flavint %s not in flavint_group %s, passing.',
                              flavint, flavint_group)
                continue
            else:
                logging.trace(
                    'flavint %s **IS** in flavint_group %s, storing.', flavint,
                    flavint_group)
            if output_fields is None:
                evts[flavint] = extracted_data[grp_n][int_type]
            else:
                evts[flavint] = {
                    f: extracted_data[grp_n][int_type][f]
                    for f in output_fields
                }

    # Generate file name
    numerical_runs = []
    alphanumerical_runs = []
    for run in runs:
        try:
            int(run)
            numerical_runs.append(int(run))
        except ValueError:
            alphanumerical_runs.append(str(run))
    run_labels = []
    if len(numerical_runs) > 0:
        run_labels.append(list2hrlist(numerical_runs))
    if len(alphanumerical_runs) > 0:
        run_labels += sorted(alphanumerical_runs)
    run_label = 'runs_' + ','.join(run_labels)
    geom_label = '' + detector_geom
    fname = 'events__' + '__'.join([
        detector_label,
        geom_label,
        run_label,
        proc_label,
        groups_label,
    ]) + '.hdf5'

    outfpath = os.path.join(outdir, fname)
    logging.info('Writing events to %s', outfpath)

    # Save data to output file
    evts.save(outfpath)