def main(return_outputs=False): """Main; call as script with `return_outputs=False` or interactively with `return_outputs=True`""" from pisa.utils.plotter import Plotter args = parse_args() set_verbosity(args.v) plot_formats = [] if args.pdf: plot_formats.append('pdf') if args.png: plot_formats.append('png') distribution_maker = DistributionMaker(pipelines=args.pipeline) # pylint: disable=redefined-outer-name if args.select is not None: distribution_maker.select_params(args.select) outputs = distribution_maker.get_outputs(return_sum=args.return_sum) # pylint: disable=redefined-outer-name if args.outdir: # TODO: unique filename: append hash (or hash per pipeline config) fname = 'distribution_maker_outputs.json.bz2' mkdir(args.outdir) fpath = expand(os.path.join(args.outdir, fname)) to_file(outputs, fpath) if args.outdir and plot_formats: my_plotter = Plotter(outdir=args.outdir, fmt=plot_formats, log=False, annotate=False) for num, output in enumerate(outputs): my_plotter.plot_2d_array(output, fname='dist_output_%d' % num) if return_outputs: return distribution_maker, outputs
def main(return_outputs=False): """Main; call as script with `return_outputs=False` or interactively with `return_outputs=True`""" from pisa.utils.plotter import Plotter args = parse_args() set_verbosity(args.v) plot_formats = [] if args.pdf: plot_formats.append('pdf') if args.png: plot_formats.append('png') detectors = Detectors(args.pipeline,shared_params=args.shared_params) Names = detectors.det_names if args.select is not None: detectors.select_params(args.select) outputs = detectors.get_outputs(return_sum=args.return_sum) #outputs = outputs[0].fluctuate( # method='poisson', random_state=get_random_state([0, 0, 0])) if args.outdir: # TODO: unique filename: append hash (or hash per pipeline config) fname = 'detectors_outputs.json.bz2' mkdir(args.outdir) fpath = expand(os.path.join(args.outdir, fname)) to_file(outputs, fpath) if args.outdir and plot_formats: my_plotter = Plotter( outdir=args.outdir, fmt=plot_formats, log=False, annotate=False ) for num, output in enumerate(outputs): if args.return_sum: my_plotter.plot_2d_array( output, fname=Names[num] ) else: for out in output: my_plotter.plot_2d_array( out, fname=Names[num] ) if return_outputs: return detectors, outputs
def stability_test(func, func_kw, ref_path, ignore_fails=False, define_as_ref=False): """basic stability test of a Numba CPUDispatcher function (i.e., function compiled via @jit / @njit)""" func_name = func.py_func.__name__ logging.info("stability testing `%s`", func_name) ref_path = expand(ref_path) test = execute_func(func=func, func_kw=func_kw) if define_as_ref: to_file(test, ref_path) # Even when we define the test case as ref, round-trip to/from file to # ensure that doesn't corrupt the values ref = from_file(ref_path) check(test=test, ref=ref, label=func_name, ignore_fails=ignore_fails) return test, ref
def find_unit_tests(path): """Find .py file(s) and any tests to run within them, starting at `path` (which can be a single file or a directory, which is recursively searched for .py files) Parameters ---------- path : str Path to a file or directory Returns ------- tests : dict Each key is the path to the .py file relative to PISA_PATH and each value is a list of the "test_*" function names within that file (empty if no such functions are found) """ path = expand(path, absolute=True, resolve_symlinks=True) tests = {} if isfile(path): filerelpath = relpath(path, start=PISA_PATH) tests[filerelpath] = find_unit_tests_in_file(path) return tests for dirpath, dirs, files in walk(path, followlinks=True): files.sort(key=nsort_key_func) dirs.sort(key=nsort_key_func) for filename in files: if not filename.endswith(".py"): continue filepath = join(dirpath, filename) filerelpath = relpath(filepath, start=PISA_PATH) tests[filerelpath] = find_unit_tests_in_file(filepath) return tests
def find_unit_tests_in_file(filepath): """Find test functions defined by "def test_*" within a file at `filepath` Parameters ---------- filepath : str Path to python file Returns ------- tests : list of str """ filepath = expand(filepath, absolute=True, resolve_symlinks=True) assert isfile(filepath), str(filepath) tests = [] with open(filepath, "r") as f: for line in f.readlines(): tokens = line.split() if tokens and tokens[0] == "def" and tokens[1].startswith("test_"): funcname = tokens[1].split("(")[0].strip() tests.append(funcname) return tests
def test_CrossSections(outdir=None): """Unit tests for CrossSections class""" from shutil import rmtree from tempfile import mkdtemp remove_dir = False if outdir is None: remove_dir = True outdir = mkdtemp() try: # "Standard" location of cross sections file in PISA; retrieve 2.6.4 for # testing purposes pisa_xs_file = 'cross_sections/cross_sections.json' xs = CrossSections(ver='genie_2.6.4', xsec=pisa_xs_file) # Location of the root file to use (not included in PISA at the moment) test_dir = expand(os.path.join('/tmp', 'pisa_tests', 'cross_sections')) #root_xs_file = os.path.join(test_dir, 'genie_2.6.4_simplified.root') root_xs_file = find_resource(os.path.join( #'tests', 'data', 'xsec', 'genie_2.6.4_simplified.root' 'cross_sections', 'genie_xsec_H2O.root' )) # Make sure that the XS newly-imported from ROOT match those stored in # PISA if os.path.isfile(root_xs_file): xs_from_root = CrossSections.new_from_root(root_xs_file, ver='genie_2.6.4') logging.info('Found and loaded ROOT source cross sections file %s', root_xs_file) #assert xs_from_root.allclose(xs, rtol=1e-7) # Check XS ratio for numu_cc to numu_cc + numu_nc (user must inspect) kg0 = NuFlavIntGroup('numu_cc') kg1 = NuFlavIntGroup('numu_nc') logging.info( r'\int_1^80 xs(numu_cc) E^{-1} dE = %e', xs.get_xs_ratio_integral(kg0, None, e_range=[1, 80], gamma=1) ) logging.info( '(int E^{-gamma} * (sigma_numu_cc)/int(sigma_(numu_cc+numu_nc)) dE)' ' / (int E^{-gamma} dE) = %e', xs.get_xs_ratio_integral(kg0, kg0+kg1, e_range=[1, 80], gamma=1, average=True) ) # Check that XS ratio for numu_cc+numu_nc to the same is 1.0 int_val = xs.get_xs_ratio_integral(kg0+kg1, kg0+kg1, e_range=[1, 80], gamma=1, average=True) if not recursiveEquality(int_val, 1): raise ValueError('Integral of nc + cc should be 1.0; get %e' ' instead.' % int_val) # Check via plot that the # Plot all cross sections stored in PISA xs file try: alldata = from_file(pisa_xs_file) xs_versions = alldata.keys() for ver in xs_versions: xs = CrossSections(ver=ver, xsec=pisa_xs_file) xs.plot(save=os.path.join( outdir, 'pisa_' + ver + '_nuxCCNC_H2O_cross_sections.pdf' )) except ImportError as exc: logging.debug('Could not plot; possible that matplotlib not' 'installed. ImportError: %s', exc) finally: if remove_dir: rmtree(outdir)
def run_unit_tests(path=PISA_PATH, allow_missing=OPTIONAL_MODULES, verbosity=Levels.WARN): """Run all tests found at `path` (or recursively below if `path` is a directory). Each module is imported and each test function is run initially with `set_verbosity(verbosity)`, but if an exception is caught, the module is re-imported or the test function is re-run with `set_verbosity(Levels.TRACE)`, then the traceback from the (original) exception emitted is displayed. Parameters ---------- path : str Path to file or directory allow_missing : None or sequence of str verbosity : int in pisa.utils.log.Levels Raises ------ Exception If any import or test fails not in `allow_missing` """ set_verbosity(verbosity) logging.info("%sPlatform information:", PFX) logging.info("%s HOSTNAME = %s", PFX, socket.gethostname()) logging.info("%s FQDN = %s", PFX, socket.getfqdn()) logging.info("%s OS = %s %s", PFX, platform.system(), platform.release()) for key, val in cpuinfo.get_cpu_info().items(): logging.info("%s %s = %s", PFX, key, val) logging.info(PFX) logging.info("%sModule versions:", PFX) for module_name in REQUIRED_MODULES + OPTIONAL_MODULES: try: module = import_module(module_name) except ImportError: if module_name in REQUIRED_MODULES: raise ver = "optional module not installed or not import-able" else: if hasattr(module, "__version__"): ver = module.__version__ else: ver = "?" logging.info("%s %s : %s", PFX, module_name, ver) logging.info(PFX) path = expand(path, absolute=True, resolve_symlinks=True) if allow_missing is None: allow_missing = [] elif isinstance(allow_missing, str): allow_missing = [allow_missing] tests = find_unit_tests(path) module_pypaths_succeeded = [] module_pypaths_failed = [] module_pypaths_failed_ignored = [] test_pypaths_succeeded = [] test_pypaths_failed = [] test_pypaths_failed_ignored = [] for rel_file_path, test_func_names in tests.items(): pypath = ["pisa"] + rel_file_path[:-3].split("/") parent_pypath = ".".join(pypath[:-1]) module_name = pypath[-1].replace(".", "_") module_pypath = f"{parent_pypath}.{module_name}" try: set_verbosity(verbosity) logging.info(PFX + f"importing {module_pypath}") set_verbosity(Levels.WARN) module = import_module(module_pypath, package=parent_pypath) except Exception as err: if (isinstance(err, ImportError) and hasattr(err, "name") and err.name in allow_missing # pylint: disable=no-member ): err_name = err.name # pylint: disable=no-member module_pypaths_failed_ignored.append(module_pypath) logging.warning( f"{PFX}module {err_name} failed to import wile importing" f" {module_pypath}, but ok to ignore") continue module_pypaths_failed.append(module_pypath) set_verbosity(verbosity) msg = f"<< FAILURE IMPORTING : {module_pypath} >>" logging.error(PFX + "=" * len(msg)) logging.error(PFX + msg) logging.error(PFX + "=" * len(msg)) # Reproduce the failure with full output set_verbosity(Levels.TRACE) try: import_module(module_name, package=parent_pypath) except Exception: pass set_verbosity(Levels.TRACE) logging.exception(err) set_verbosity(verbosity) logging.error(PFX + "#" * len(msg)) continue else: module_pypaths_succeeded.append(module_pypath) for test_func_name in test_func_names: test_pypath = f"{module_pypath}.{test_func_name}" try: set_verbosity(verbosity) logging.debug(PFX + f"getattr({module}, {test_func_name})") set_verbosity(Levels.WARN) test_func = getattr(module, test_func_name) # Run the test function set_verbosity(verbosity) logging.info(PFX + f"{test_pypath}()") set_verbosity(Levels.WARN) test_func() except Exception as err: if (isinstance(err, ImportError) and hasattr(err, "name") and err.name in allow_missing # pylint: disable=no-member ): err_name = err.name # pylint: disable=no-member test_pypaths_failed_ignored.append(module_pypath) logging.warning( PFX + f"{test_pypath} failed because module {err_name} failed to" + f" load, but ok to ignore") continue test_pypaths_failed.append(test_pypath) set_verbosity(verbosity) msg = f"<< FAILURE RUNNING : {test_pypath} >>" logging.error(PFX + "=" * len(msg)) logging.error(PFX + msg) logging.error(PFX + "=" * len(msg)) # Reproduce the error with full output set_verbosity(Levels.TRACE) try: test_func = getattr(module, test_func_name) with np.printoptions( precision=np.finfo(pisa.FTYPE).precision + 2, floatmode="fixed", sign=" ", linewidth=200, ): test_func() except Exception: pass set_verbosity(Levels.TRACE) logging.exception(err) set_verbosity(verbosity) logging.error(PFX + "#" * len(msg)) else: test_pypaths_succeeded.append(test_pypath) finally: # remove references to the test function, e.g. to remove refs # to pycuda / numba.cuda contexts so these can be closed try: del test_func except NameError: pass # NOTE: Until we get all GPU code into Numba, need to unload pycuda # and/or numba.cuda contexts before a module requiring the other one is # to be imported. # NOTE: the following causes a traceback to be emitted at the very end # of the script, regardless of the exception catching here. if (pisa.TARGET == "cuda" and pycuda is not None and hasattr(pycuda, "autoinit") and hasattr(pycuda.autoinit, "context")): try: pycuda.autoinit.context.detach() except Exception: pass # Attempt to unload the imported module # TODO: pipeline, etc. fail as isinstance(service, (Stage, PiStage)) is False #if module_pypath in sys.modules and module_pypath != "pisa": # del sys.modules[module_pypath] #del module # TODO: crashes program; subseqeunt calls in same shell crash(!?!?) # if pisa.TARGET == 'cuda' and nbcuda is not None: # try: # nbcuda.close() # except Exception: # pass # Summarize results n_import_successes = len(module_pypaths_succeeded) n_import_failures = len(module_pypaths_failed) n_import_failures_ignored = len(module_pypaths_failed_ignored) n_test_successes = len(test_pypaths_succeeded) n_test_failures = len(test_pypaths_failed) n_test_failures_ignored = len(test_pypaths_failed_ignored) set_verbosity(verbosity) logging.info( PFX + f"<< IMPORT TESTS : {n_import_successes} imported," f" {n_import_failures} failed," f" {n_import_failures_ignored} failed to import but ok to ignore >>") logging.info(PFX + f"<< UNIT TESTS : {n_test_successes} succeeded," f" {n_test_failures} failed," f" {n_test_failures_ignored} failed but ok to ignore >>") # Exit with error if any failures (import or unit test) if module_pypaths_failed or test_pypaths_failed: msgs = [] if module_pypaths_failed: msgs.append( f"{n_import_failures} module(s) failed to import:\n " + ", ".join(module_pypaths_failed)) if test_pypaths_failed: msgs.append(f"{n_test_failures} unit test(s) failed:\n " + ", ".join(test_pypaths_failed)) # Note the extra newlines before the exception to make it stand out; # and newlines after the exception are due to the pycuda error message # that is emitted when we call pycuda.autoinit.context.detach() sys.stdout.flush() sys.stderr.write("\n\n\n") raise Exception("\n".join(msgs) + "\n\n\n")
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.""" # TODO: add timing for imports & unit test; faster => more used, more useful PISA_PATH = expand(dirname(pisa.__file__), absolute=True, resolve_symlinks=True) # TODO: get optional & required automatically (e.g., from setup.py?) OPTIONAL_MODULES = ( "pandas", "emcee", "pycuda", "pycuda.driver", "ROOT", "libPyROOT", "MCEq", "nuSQUIDSpy", ) """Okay if imports or test_* functions fail due to these not being import-able"""
def makeEventsFile(data_files, detector, proc_ver, cut, outdir, run_settings=None, data_proc_params=None, join=None, cust_cuts=None, extract_fields=EXTRACT_FIELDS, output_fields=OUTPUT_FIELDS): r"""Take the simulated and reconstructed HDF5 file(s) (as converted from I3 by icecube.hdfwriter.I3HDFTableService) as input and write out a simplified PISA-standard-format HDF5 file for use in aeff, reco, and/or PID stages. Parameters ---------- data_files : dict File paths for finding data files for each run, formatted as: { <string run>: <list of file paths>, <string run>: <list of file paths>, ... <string run>: <list of file paths>, } detector : string Name of the detector (e.g. IceCube, DeepCore, PINGU, etc.) as found in e.g. mc_sim_run_settings.json and data_proc_params.json files. proc_ver Version of processing applied to the events, as found in e.g. data_proc_params.json. cut Name of a standard cut to use; must be specified in the relevant detector/processing version node of the data processing parameters (file from which the data_proc_params object was instantiated) outdir Directory path in which to store resulting files; will be generated if it does not already exist (including any parent directories that do not exist) run_settings : string or MCSimRunSettings Resource location of mc_sim_run_settings.json or an MCSimRunSettings object instantiated therefrom. data_proc_params : string or DataProcParams Resource location of data_proc_params.json or a DataProcParams object instantiated therefrom. join String specifying any flavor/interaction types (flavInts) to join together. Separate flavInts with commas (',') and separate groups with semicolons (';'). E.g. an acceptable string is: 'numucc+numubarcc; nuall bar NC, nuall NC' cust_cuts dict with a single DataProcParams cut specification or list of same (see help for DataProcParams for detailed description of cut spec) extract_fields : None or iterable of strings Field names to extract from source HDF5 file. If None, extract all fields. output_fields : None or iterable of strings Fields to include in the generated PISA-standard-format events HDF5 file; note that if 'weighted_aeff' is not preent, effective area will not be computed. If None, all fields will be written. Notes ----- Compute "weighted_aeff" field: Within each int type (CC or NC), ngen should be added together; events recorded of that int type then get their one_weight divided by the total *for that int type only* to obtain the "weighted_aeff" for that event (even if int types are being grouped/joined together). This has the effect that within a group, ... ... and within an interaction type, effective area is a weighted average of that of the flavors being combined. E.g. for CC, \sum_{run x}\sum_{flav y} (Aeff_{x,y} * ngen_{x,y}) Aeff_CC = ----------------------------------------------------- , \sum_{run x}\sum_{flav y} (ngen_{x,y}) ... and then across interaction types, the results of the above for each int type need to be summed together, i.e.: Aeff_total = Aeff_CC + Aeff_NC Note that each grouping of flavors is calculated with the above math completely independently from other flavor groupings specified. See Justin Lanfranchi's presentation on the PINGU Analysis call, 2015-10-21, for more details: https://wikispaces.psu.edu/download/attachments/282040606/meff_report_jllanfranchi_v05_2015-10-21.pdf """ if isinstance(run_settings, str): run_settings = DetMCSimRunsSettings(find_resource(run_settings), detector=detector) assert isinstance(run_settings, DetMCSimRunsSettings) assert run_settings.detector == detector if isinstance(data_proc_params, str): data_proc_params = DataProcParams( detector=detector, proc_ver=proc_ver, data_proc_params=find_resource(data_proc_params)) assert data_proc_params.detector == detector assert data_proc_params.proc_ver == proc_ver runs = sorted(data_files.keys()) all_flavs = [] flavs_by_run = {} run_norm_factors = {} bin_edges = set() runs_by_flavint = FlavIntData() for flavint in runs_by_flavint.flavints: runs_by_flavint[flavint] = [] #ngen_flavint_by_run = {run:FlavIntData() for run in runs} ##ngen_per_flav_by_run = {run:FlavIntData() for run in runs} #eint_per_flav_by_run = {run:FlavIntData() for run in runs} #for run in runs: # flavints_in_run = run_settings.get_flavints(run=run) # e_range = run_settings.get_energy_range(run) # gamma = run_settings.get_spectral_index(run) # for flavint in flavints_in_run: # runs_by_flavint[flavint].append(run) # ngen_flav = run_settings.get_num_gen( # run=run, flav_or_flavint=flavint, include_physical_fract=True # ) # #runs_by_flavint[flavint].append(run) # #this_flav = flavint. # #xsec_fract_en_wtd_avg[run][flavint] = \ # ngen_flavint_by_run[run][flavint] = \ # xsec.get_xs_ratio_integral( # flavintgrp0=flavint, # flavintgrp1=flavint.flav, # e_range=e_range, # gamma=gamma, # average=True # ) # xsec_ver = run_settings.get_xsec_version(run=run) # if xsec_ver_ref is None: # xsec_ver_ref = xsec_ver # # An assumption of below logic is that all MC is generated using the # # same cross sections version. # # # # TODO / NOTE: # # It would be possible to combine runs with different cross sections so # # long as each (flavor, interaction type) cross sections are # # weighted-averaged together using weights # # N_gen_{n,flav+inttype} * E_x^{-gamma_n} / # # ( \int_{E_min_n}^{E_max_n} E^{-\gamma_n} dE ) # # where E_x are the energy sample points specified in the cross # # sections (and hence these must also be identical across all cross # # sections that get combined, unless interpolation is performed). # assert xsec_ver == xsec_ver_ref # #ngen_weighted_energy_integral[str(run)] = powerLawIntegral( # #flavs_by_run[run] = run_settings.flavs(run) ##flavs_present = detector_geom = run_settings[runs[0]]['geom'] # Create Events object to store data evts = Events() evts.metadata.update({ 'detector': run_settings.detector, 'proc_ver': data_proc_params.proc_ver, 'geom': detector_geom, 'runs': runs, }) cuts = [] if isinstance(cust_cuts, dict): cust_cuts = [cust_cuts] if cut is not None: evts.metadata['cuts'].append(cut) cuts.append(cut) if cust_cuts is not None: for ccut in cust_cuts: evts.metadata['cuts'].append('custom: ' + ccut['pass_if']) cuts.append(ccut) orig_outdir = outdir outdir = expand(outdir) logging.info('Output dir spec\'d: %s', orig_outdir) if outdir != orig_outdir: logging.info('Output dir expands to: %s', outdir) mkdir(outdir) detector_label = str(data_proc_params.detector) proc_label = 'proc_' + str(data_proc_params.proc_ver) # What flavints to group together if join is None or join == '': grouped = [] ungrouped = [NuFlavIntGroup(k) for k in ALL_NUFLAVINTS] groups_label = 'unjoined' logging.info('Events in the following groups will be joined together:' ' (none)') else: grouped, ungrouped = xlateGroupsStr(join) evts.metadata['flavints_joined'] = [str(g) for g in grouped] groups_label = 'joined_G_' + '_G_'.join([str(g) for g in grouped]) logging.info( 'Events in the following groups will be joined together: ' + '; '.join([str(g) for g in grouped])) # Find any flavints not included in the above groupings flavint_groupings = grouped + ungrouped if len(ungrouped) == 0: ungrouped = ['(none)'] logging.info('Events of the following flavints will NOT be joined' 'together: ' + '; '.join([str(k) for k in ungrouped])) # Enforce that flavints composing groups are mutually exclusive for grp_n, flavintgrp0 in enumerate(flavint_groupings[:-1]): for flavintgrp1 in flavint_groupings[grp_n + 1:]: assert len(set(flavintgrp0).intersection(set(flavintgrp1))) == 0 flavintgrp_names = [str(flavintgrp) for flavintgrp in flavint_groupings] # Instantiate storage for all intermediate destination fields; # The data structure looks like: # extracted_data[group #][interaction type][field name] = list of data if extract_fields is None: extracted_data = [{inttype: {} for inttype in ALL_NUINT_TYPES} for _ in flavintgrp_names] else: extracted_data = [{ inttype: {field: [] for field in extract_fields} for inttype in ALL_NUINT_TYPES } for _ in flavintgrp_names] # Instantiate generated-event counts for destination fields; count # CClseparately from NC because aeff's for CC & NC add, whereas # aeffs intra-CC should be weighted-averaged (as for intra-NC) ngen = [{inttype: {} for inttype in ALL_NUINT_TYPES} for _ in flavintgrp_names] # Loop through all of the files, retrieving the events, filtering, # and recording the number of generated events pertinent to # calculating aeff filecount = {} detector_geom = None bad_files = [] for run, fnames in data_files.items(): file_count = 0 for fname in fnames: # Retrieve data from all nodes specified in the processing # settings file logging.trace('Trying to get data from file %s', fname) try: data = data_proc_params.get_data(fname, run_settings=run_settings) except (ValueError, KeyError, IOError): logging.warning('Bad file encountered: %s', fname) bad_files.append(fname) continue file_count += 1 # Check to make sure only one run is present in the data runs_in_data = set(data['run']) assert len(runs_in_data) == 1, 'Must be just one run in data' #run = int(data['run'][0]) if not run in filecount: filecount[run] = 0 filecount[run] += 1 rs_run = run_settings[run] # Record geom; check that geom is consistent with other runs if detector_geom is None: detector_geom = rs_run['geom'] assert rs_run['geom'] == detector_geom, \ 'All runs\' geometries must match!' # Loop through all flavints spec'd for run for run_flavint in rs_run['flavints']: barnobar = run_flavint.bar_code int_type = run_flavint.intType # Retrieve this-interaction-type- & this-barnobar-only events # that also pass cuts. (note that cut names are strings) intonly_cut_data = data_proc_params.apply_cuts( data, cuts=cuts + [str(int_type), str(barnobar)], return_fields=extract_fields) # Record the generated count and data for this run/flavor for # each group to which it's applicable for grp_n, flavint_group in enumerate(flavint_groupings): if not run_flavint in flavint_group: continue # Instantiate a field for particles and antiparticles, # keyed by the output of the bar_code property for each if not run in ngen[grp_n][int_type]: ngen[grp_n][int_type][run] = { NuFlav(12).bar_code: 0, NuFlav(-12).bar_code: 0, } # Record count only if it hasn't already been recorded if ngen[grp_n][int_type][run][barnobar] == 0: # Note that one_weight includes cc/nc:total fraction, # so DO NOT specify the full flavint here, only flav # (since one_weight does NOT take bar/nobar fraction, # it must be included here in the ngen computation) flav_ngen = run_settings.get_num_gen(run=run, barnobar=barnobar) ngen[grp_n][int_type][run][barnobar] = flav_ngen # Append the data. Note that extracted_data is: # extracted_data[group n][int_type][extract field name] = # list if extract_fields is None: for f in intonly_cut_data.keys(): if f not in extracted_data[grp_n][int_type]: extracted_data[grp_n][int_type][f] = [] extracted_data[grp_n][int_type][f].extend( intonly_cut_data[f]) else: for f in extract_fields: extracted_data[grp_n][int_type][f].extend( intonly_cut_data[f]) logging.info('File count for run %s: %d', run, file_count) to_file(bad_files, '/tmp/bad_files.json') if ((output_fields is None and (extract_fields is None or 'one_weight' in extract_fields)) or 'weighted_aeff' in output_fields): fmtfields = (' ' * 12 + 'flavint_group', 'int type', ' run', 'part/anti', 'part/anti count', 'aggregate count') fmt_n = [len(f) for f in fmtfields] fmt = ' '.join([r'%' + str(n) + r's' for n in fmt_n]) lines = ' '.join(['-' * n for n in fmt_n]) logging.info(fmt, fmtfields) logging.info(lines) for grp_n, flavint_group in enumerate(flavint_groupings): for int_type in set([fi.intType for fi in flavint_group.flavints]): ngen_it_tot = 0 for run, run_counts in ngen[grp_n][int_type].items(): for barnobar, barnobar_counts in run_counts.items(): ngen_it_tot += barnobar_counts logging.info(fmt, flavint_group.simple_str(), int_type, str(run), barnobar, int(barnobar_counts), int(ngen_it_tot)) # Convert data to numpy array if extract_fields is None: for field in extracted_data[grp_n][int_type].keys(): extracted_data[grp_n][int_type][field] = \ np.array(extracted_data[grp_n][int_type][field]) else: for field in extract_fields: extracted_data[grp_n][int_type][field] = \ np.array(extracted_data[grp_n][int_type][field]) # Generate weighted_aeff field for this group / int type's data extracted_data[grp_n][int_type]['weighted_aeff'] = \ extracted_data[grp_n][int_type]['one_weight'] \ / ngen_it_tot * CMSQ_TO_MSQ # Report file count per run for run, count in filecount.items(): logging.info('Files read, run %s: %d', run, count) ref_num_i3_files = run_settings[run]['num_i3_files'] if count != ref_num_i3_files: logging.warning( 'Run %s, Number of files read (%d) != number of ' 'source I3 files (%d), which may indicate an error.', run, count, ref_num_i3_files) # Generate output data for flavint in ALL_NUFLAVINTS: int_type = flavint.intType for grp_n, flavint_group in enumerate(flavint_groupings): if not flavint in flavint_group: logging.trace('flavint %s not in flavint_group %s, passing.', flavint, flavint_group) continue else: logging.trace( 'flavint %s **IS** in flavint_group %s, storing.', flavint, flavint_group) if output_fields is None: evts[flavint] = extracted_data[grp_n][int_type] else: evts[flavint] = { f: extracted_data[grp_n][int_type][f] for f in output_fields } # Generate file name numerical_runs = [] alphanumerical_runs = [] for run in runs: try: int(run) numerical_runs.append(int(run)) except ValueError: alphanumerical_runs.append(str(run)) run_labels = [] if len(numerical_runs) > 0: run_labels.append(list2hrlist(numerical_runs)) if len(alphanumerical_runs) > 0: run_labels += sorted(alphanumerical_runs) run_label = 'runs_' + ','.join(run_labels) geom_label = '' + detector_geom fname = 'events__' + '__'.join([ detector_label, geom_label, run_label, proc_label, groups_label, ]) + '.hdf5' outfpath = os.path.join(outdir, fname) logging.info('Writing events to %s', outfpath) # Save data to output file evts.save(outfpath)