def from_hdf(filename): """Open a file in HDF5 format, parse the content and return as dictionary with numpy arrays""" # Function for iteratively parsing the file to create the dictionary def visit_group(obj, sdict): name = obj.name.split('/')[-1] #indent = len(obj.name.split('/'))-1 #print " "*indent,name, obj.value if (type(obj) == h5py.Dataset) else ":" if type(obj) in [h5py.Dataset]: sdict[name] = obj.value if type(obj) in [h5py.Group, h5py.File]: sdict[name] = {} for sobj in obj.values(): visit_group(sobj, sdict[name]) data = {} try: h5file = h5py.File(os.path.expandvars(filename), 'r') # Run over the whole dataset for obj in h5file.values(): visit_group(obj, data) except IOError, e: logging.error("Unable to read HDF5 file \'%s\'" % filename) logging.error(e) raise e
def apply_cuts(self, data, cuts, boolean_op='&', return_fields=None): """Perform `cuts` on `data` and return a dict containing `return_fields` from events that pass the cuts. Parameters ---------- data : single-level dict or FlavIntData object cuts : string or dict, or sequence thereof boolean_op : string return_fields : string or sequence thereof """ if isinstance(data, FlavIntData): outdata = FlavIntData() for flavint in data.flavints: outdata[flavint] = self.apply_cuts(data[flavint], cuts=cuts, boolean_op=boolean_op, return_fields=return_fields) return outdata if isinstance(cuts, (str, dict)): cuts = [cuts] # Default is to return all fields if return_fields is None: return_fields = data.keys() # If no cuts specified, return all data from specified fields if len(cuts) == 0: return self.subselect(data, return_fields) cut_strings = set() cut_fields = set() for cut in cuts: if isinstance(cut, dict): self.validate_cut_spec(cut) elif cut.lower() in self['cuts']: cut = self['cuts'][cut.lower()] else: raise Exception('Unrecognized or invalid cut: "' + str(cut) + '"') cut_strings.add(cut['pass_if']) cut_fields.update(cut['fields']) # Combine cut criteria strings together with boolean operation cut_string = boolean_op.join(['(' + cs + ')' for cs in cut_strings]) # Load the fields necessary for the cut into the global namespace for field in set(cut_fields): globals()[field] = data[field] # Evaluate cuts, returning a boolean array try: bool_idx = eval(cut_string) # pylint: disable=eval-used except: logging.error('Failed to evaluate `cut_string` "%s"', cut_string) raise # Return specified (or all) fields, indexed by boolean array return {f: np.array(data[f])[bool_idx] for f in return_fields}
def load_discr_sys(self, sys_list): """Load the fit results from the file and make some check compatibility""" self.fit_results = from_file(self.params['fit_results_file'].value) if not set(self.input_names) == set(self.fit_results['map_names']): for name in self.input_names: if not name in self.fit_results['map_names']: #check if there is somethingi uniquely compatible compatible_names = [ mapname in name for mapname in self.fit_results['map_names'] ] if sum(compatible_names) == 1: # compatible compatible_name = self.fit_results['map_names'][ compatible_names.index(True)] self.fit_results[name] = self.fit_results[ compatible_name] logging.warning( 'Substituting hyperplane parameterization %s for %s' % (compatible_name, name)) else: logging.error('No compatible map for %s found!' % name) assert set(sys_list) == set(self.fit_results['sys_list']) self.sys_list = self.fit_results['sys_list']
def default(self, obj): # Import here to avoid circular imports from pisa.utils.log import logging if isinstance(obj, np.ndarray): return obj.astype(np.float64).tolist() # TODO: poor form to have a way to get this into a JSON file but no way # to get it out of a JSON file... so either write a deserializer, or # remove this and leave it to other objects to do the following. if isinstance(obj, ureg.Quantity): return obj.to_tuple() # NOTE: np.bool_ is the *Numpy* bool type, while np.bool is alias for # Python bool type, hence this conversion if isinstance(obj, np.bool_): return bool(obj) if hasattr(obj, 'serializable_state'): return obj.serializable_state if isinstance(obj, np.float32): return float(obj) try: return json.JSONEncoder.default(self, obj) except: logging.error('JSON serialization for %s, type %s not implemented', obj, type(obj)) raise
def from_hdf(filename): """Open a file in HDF5 format, parse the content and return as dictionary with numpy arrays""" # Function for iteratively parsing the file to create the dictionary def visit_group(obj, sdict): name = obj.name.split('/')[-1] #indent = len(obj.name.split('/'))-1 #print " "*indent,name, obj.value if (type(obj) == h5py.Dataset) else ":" if type(obj) in [ h5py.Dataset ]: sdict[name] = obj.value if type(obj) in [ h5py.Group, h5py.File ]: sdict[name] = {} for sobj in obj.values(): visit_group(sobj, sdict[name]) data = {} try: h5file = h5py.File(os.path.expandvars(filename), 'r') # Run over the whole dataset for obj in h5file.values(): visit_group(obj, data) except IOError, e: logging.error("Unable to read HDF5 file \'%s\'" % filename) logging.error(e) raise e
def to_hdf(d, filename): """Store a (possibly nested) dictionary to HDF5 file""" def store_recursively(fh, node, path=[]): if isinstance(node, dict): try: fh.create_group('/' + '/'.join(path)) except ValueError: pass for key in sorted(node.iterkeys()): val = node[key] new_path = path + [key] store_recursively(fh=fh, node=val, path=new_path) else: fh.create_dataset(name = '/' + '/'.join(path), data = node, chunks = True, maxshape = np.shape(node), compression = None, shuffle = True, fletcher32 = False) try: hdf5_data = h5py.File(os.path.expandvars(filename), 'w') except IOError, e: logging.error("Unable to write to HDF5 file \'%s\'" % filename) logging.error(e) raise e
def main(description=__doc__): """Script interface to `run_unit_tests` function""" parser = ArgumentParser(description=description, formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument( "path", nargs="?", default=PISA_PATH, help="""Specify a specific path to a file or directory in which to find and run unit tests""", ) parser.add_argument( "--allow-missing", nargs="+", default=list(OPTIONAL_MODULES), help="""Allow ImportError (or subclasses) for these modules""", ) parser.add_argument("-v", action="count", default=Levels.WARN, help="set verbosity level") kwargs = vars(parser.parse_args()) kwargs["verbosity"] = kwargs.pop("v") try: run_unit_tests(**kwargs) except Exception as e: if hasattr(e, 'message'): msg = e.message else: msg = str(e) logging.error("\n" + msg) sys.exit(1)
def kernel_from_simfile(self, simfile=None, **kwargs): logging.info('Opening file: %s'%(simfile)) try: fh = h5py.File(find_resource(simfile),'r') except IOError,e: logging.error("Unable to open event data file %s"%simfile) logging.error(e) sys.exit(1)
def kernel_from_simfile(self, simfile=None, **kwargs): logging.info("Opening file: %s" % (simfile)) try: fh = h5py.File(find_resource(simfile), "r") except IOError, e: logging.error("Unable to open event data file %s" % simfile) logging.error(e) sys.exit(1)
def from_hdf(filename): """Open a file in HDF5 format, parse the content and return as dictionary with numpy arrays""" try: hdf5_data = h5py.File(os.path.expandvars(filename), 'r') except IOError, e: logging.error("Unable to read HDF5 file \'%s\'" % filename) logging.error(e) raise e
def from_json(filename): """Open a file in JSON format an parse the content""" try: content = json.load(open(os.path.expandvars(filename)), cls=NumpyDecoder) return content except (IOError, JSONDecodeError), e: logging.error("Unable to read JSON file '%s'" % filename) logging.error(e) sys.exit(1)
def get_osc_probLT_dict_hdf5(filename): ''' Returns a dictionary of osc_prob_maps from the lookup table .hdf5 files. ''' try: fh = h5py.File(find_resource(filename),'r') except IOError,e: logging.error("Unable to open oscillation map file %s"%filename) logging.error(e) sys.exit(1)
def get_osc_probLT_dict_hdf5(filename): ''' Returns a dictionary of osc_prob_maps from the lookup table .hdf5 files. ''' try: fh = h5py.File(find_resource(filename), 'r') except IOError, e: logging.error("Unable to open oscillation map file %s" % filename) logging.error(e) sys.exit(1)
def from_json(filename): """Open a file in JSON format an parse the content""" try: content = json.load(open(os.path.expandvars(filename)), cls=NumpyDecoder) return content except (IOError, JSONDecodeError), e: logging.error("Unable to read JSON file \'%s\'"%filename) logging.error(e) raise e
def compare_numeric(test, ref, label=None, ac_kw=deepcopy(AC_KW), ignore_fails=False): """Compare scalars or numpy ndarrays. Parameters ---------- test : scalar or numpy.ndarray ref : scalar or numpy.ndarray label : str or None, optional ac_kw : mapping, optional Keyword args to pass via **ac_kw to `numpy.isclose` / `numpy.allclose` ignore_fails : bool, optional Returns ------- rslt : bool """ pfx = f"{label} :: " if label else "" with np.printoptions(**PRINTOPTS): if np.isscalar(test): if np.isclose(test, ref, **ac_kw): return True msg = f"{pfx}test: {test} != ref: {ref}" if ignore_fails: logging.warning(msg) else: logging.error(msg) return False # Arrays if np.allclose(test, ref, **ac_kw): return True diff = test - ref msg = f"{pfx}test:" f"\n{(test)}\n!= ref:\n{(ref)}" f"\ndiff:\n{(diff)}" if not np.all(ref == 1): nzmask = ref != 0 zmask = ref == 0 fdiff = np.empty_like(ref) fdiff[nzmask] = diff[nzmask] / ref[nzmask] fdiff[zmask] = np.nan msg += f"\nfractdiff:\n{(fdiff)}" if ignore_fails: logging.warning(msg) else: logging.error(msg) return False
def from_json(filename): """Open a file in JSON format (optionally compressed with bz2 or xor-scrambled) and parse the content into Python objects. Note that this currently only recognizes a bz2-compressed or xor-scrambled file by its extension (i.e., the file must be <base>.json.bz2 if it is compressed or <base>.json.xor if it is scrambled). Parameters ---------- filename : str Returns ------- content: OrderedDict with contents of JSON file """ # Import here to avoid circular imports from pisa.utils.log import logging from pisa.utils.resources import open_resource _, ext = os.path.splitext(filename) ext = ext.replace('.', '').lower() assert ext in JSON_EXTS or ext in ZIP_EXTS + XOR_EXTS try: if ext == 'bz2': bz2_content = open_resource(filename).read() decompressed = bz2.decompress(bz2_content) del bz2_content content = json.loads(decompressed, cls=NumpyDecoder, object_pairs_hook=OrderedDict) del decompressed elif ext == 'xor': # Create tempfile temp = tempfile.TemporaryFile(mode='w+b') with open(filename, 'rb') as infile: for line in infile: # Decrypt with key 42 line = ''.join([chr(ord(c) ^ 42) for c in line]) temp.write(line) # Rewind temp.seek(0) content = json.load(temp, cls=NumpyDecoder, object_pairs_hook=OrderedDict) else: content = json.load(open_resource(filename), cls=NumpyDecoder, object_pairs_hook=OrderedDict) except: logging.error('Failed to load JSON, `filename`="%s"', filename) raise return content
def from_json(filename): """Open a file in JSON format (optionally compressed with bz2 or xor-scrambled) and parse the content into Python objects. Note that this currently only recognizes a bz2-compressed or xor-scrambled file by its extension (i.e., the file must be <base>.json.bz2 if it is compressed or <base>.json.xor if it is scrambled). Parameters ---------- filename : str Returns ------- content: OrderedDict with contents of JSON file """ # Import here to avoid circular imports from pisa.utils.log import logging from pisa.utils.resources import open_resource _, ext = os.path.splitext(filename) ext = ext.replace('.', '').lower() assert ext in JSON_EXTS or ext in ZIP_EXTS + XOR_EXTS try: if ext == 'bz2': bz2_content = open_resource(filename, 'rb').read() decompressed = bz2.decompress(bz2_content).decode() del bz2_content content = json.loads(decompressed, cls=NumpyDecoder, object_pairs_hook=OrderedDict) del decompressed elif ext == 'xor': with open(filename, 'rb') as infile: encrypted_bytes = infile.read() # decrypt with key 42 decypted_bytes = bytearray() for byte in encrypted_bytes: decypted_bytes.append(byte ^ 42) content = json.loads(decypted_bytes.decode(), cls=NumpyDecoder, object_pairs_hook=OrderedDict) else: content = json.load(open_resource(filename), cls=NumpyDecoder, object_pairs_hook=OrderedDict) except: logging.error('Failed to load JSON, `filename`="%s"', filename) raise return content
def open_resource(filename): ''' Find the resource file (see find_resource), open it and return a file handle. ''' try: return open(find_resource(filename)) except (IOError, OSError), e: logging.error('Unable to open resource "%s"'%filename) logging.error(e) sys.exit(1)
def __init__(self,ebins,czbins,aeff_weight_file=None,**kwargs): self.ebins = ebins self.czbins = czbins logging.info('Initializing AeffServiceMC...') logging.info('Opening file: %s'%(aeff_weight_file)) try: fh = h5py.File(find_resource(aeff_weight_file),'r') except IOError,e: logging.error("Unable to open aeff_weight_file %s"%aeff_weight_file) logging.error(e) sys.exit(1)
def __init__(self,ebins,czbins,reco_weight_file=None,**kwargs): self.ebins = ebins self.czbins = czbins logging.info("Initializing RecoService...") logging.info('Opening file: %s'%(reco_weight_file)) try: fh = h5py.File(find_resource(reco_weight_file),'r') except IOError,e: logging.error("Unable to open event data file %s"%reco_weight_file) logging.error(e) sys.exit(1)
def __init__(self, ebins, czbins, aeff_weight_file=None, **kwargs): self.ebins = ebins self.czbins = czbins logging.info('Initializing AeffServiceMC...') logging.info('Opening file: %s' % (aeff_weight_file)) try: fh = h5py.File(find_resource(aeff_weight_file), 'r') except IOError, e: logging.error("Unable to open aeff_weight_file %s" % aeff_weight_file) logging.error(e) sys.exit(1)
def get_param_priors(params): """ Returns a list of Prior objects, one for each param. """ priors = [] for pname, param in sorted(params.items()): try: prior = Prior(**param["prior"]) except TypeError: logging.error(" Check template settings format, " "may have old-style priors") raise priors.append(prior) return priors
def add_params(setA,setB): ''' Join the parameters in setA and setB, making sure that no parameters are overwritten ''' #check for overlap if any(p in setA for p in setB): pnames = set(setA.keys()) & set(setB.keys()) logging.error('Trying to store parameter(s) %s twice'%pnames) raise KeyError('Trying to store parameter(s) %s twice'%pnames) #Otherwise append return dict(setA.items() + setB.items())
def check_fine_binning(fine_bins, coarse_bins): """ This function checks whether the specified fine binning exists and is actually finer than the coarse one. """ if fine_bins is not None: if is_coarser_binning(coarse_bins, fine_bins): logging.info('Using requested binning for oversampling.') #everything is fine return True else: errmsg = 'Requested oversampled binning is coarser ' + \ 'than output binning. Aborting.' logging.error(errmsg) raise ValueError(errmsg) return False
def check_agreement(testname, thresh_ratio, ratio, thresh_diff, diff): ratio_pass = np.abs(ratio) <= np.abs(thresh_ratio) diff_pass = np.abs(diff) <= np.abs(thresh_diff) thresh_ratio_str = order_str(thresh_ratio) ratio_ord_str = order_str(ratio) ratio_pass_str = 'PASS' if ratio_pass else 'FAIL' thresh_diff_str = order_str(thresh_diff) diff_ord_str = order_str(diff) diff_pass_str = 'PASS' if diff_pass else 'FAIL' headline = '<< {pass_str:s} : {testname:s}, {kind:s} >>' detail_str = ('... agree to (( 10^{level:s} )) ; ' 'thresh = (( 10^{thresh:s} ))') ratio_headline = headline.format( testname=testname, kind='fract diff', pass_str=ratio_pass_str ) ratio_detail = detail_str.format( level=ratio_ord_str, thresh=thresh_ratio_str ) diff_headline = headline.format( testname=testname, kind='diff', pass_str=diff_pass_str, ) diff_detail = detail_str.format( level=diff_ord_str, thresh=thresh_diff_str ) err_messages = [] if ratio_pass: logging.info(ratio_headline) logging.info(ratio_detail) else: err_messages += [ratio_headline, ratio_detail] if diff_pass: logging.info(diff_headline) logging.info(diff_detail) else: err_messages += [diff_headline, diff_detail] if not (ratio_pass and diff_pass): for m in err_messages: logging.error(m) raise ValueError('\n '.join(err_messages))
def get_earth_model(self, model): """ Check whether the specified Earth density profile has a correct NuCraft preface. If not, create a temporary file that does. """ logging.debug('Trying to construct Earth model from "%s"' % model) try: resource_path = find_resource(model) self.earth_model = EarthModel(resource_path) logging.info("Loaded Earth model from %s" % model) except SyntaxError: # Probably the file is lacking the correct preamble logging.info( "Failed to construct NuCraft Earth model directly from" " %s! Adding default preamble..." % resource_path ) # Generate tempfile with preamble with open(resource_path, "r") as infile: profile_lines = infile.readlines() preamble = [ "# nuCraft Earth model with PREM density " "values for use as template; keep structure " "of the first six lines unmodified!\n", "(0.4656,0.4656,0.4957) # tuple of (relative) " #'(0.5, 0.5, 0.5) # tuple of (relative) ' "electron numbers for mantle, outer core, " "and inner core\n", "6371. # radius of the Earth\n", "3480. # radius of the outer core\n", "1121.5 # radius of the inner core\n", "# two-columned list of radii and corresponding " "matter density values in km and kg/dm^3; " "add, remove or modify lines as necessary\n", ] tfile = NamedTemporaryFile() tfile.writelines(preamble + profile_lines) tfile.flush() try: self.earth_model = EarthModel(tfile.name) except: logging.error("Could not construct Earth model from %s: %s" % (model, sys.exc_info()[1])) sys.exit(1) logging.info("Successfully constructed Earth model") tfile.close() except IOError: logging.info('Using NuCraft built-in Earth model "%s"' % model) self.earth_model = EarthModel(model)
def to_file(obj, fname, fmt=None): """Dispatch correct file writer based on fmt (if specified) or guess based on file name's extension""" if fmt is None: base, ext = os.path.splitext(fname) ext = ext.replace('.', '').lower() else: ext = fmt.lower() if ext in JSON_EXTS: return jsons.to_json(obj, fname) elif ext in HDF5_EXTS: return hdf.to_hdf(obj, fname) elif ext in PKL_EXTS: return cPickle.dump(obj, file(fname, 'wb')) else: errmsg = 'Unrecognized file type/extension: ' + ext logging.error(errmsg) raise TypeError(errmsg)
def get_earth_model(self, model): """ Check whether the specified Earth density profile has a correct NuCraft preface. If not, create a temporary file that does. """ logging.debug('Trying to construct Earth model from "%s"'%model) try: resource_path = find_resource(model) self.earth_model = EarthModel(resource_path) logging.info('Loaded Earth model from %s'%model) except SyntaxError: #Probably the file is lacking the correct preamble logging.info('Failed to construct NuCraft Earth model directly from' ' %s! Adding default preamble...'%resource_path) #Generate tempfile with preamble with open(resource_path, 'r') as infile: profile_lines = infile.readlines() preamble = ['# nuCraft Earth model with PREM density ' 'values for use as template; keep structure ' 'of the first six lines unmodified!\n', '(0.4656,0.4656,0.4957) # tuple of (relative) ' #'(0.5, 0.5, 0.5) # tuple of (relative) ' 'electron numbers for mantle, outer core, ' 'and inner core\n', '6371. # radius of the Earth\n', '3480. # radius of the outer core\n', '1121.5 # radius of the inner core\n', '# two-columned list of radii and corresponding ' 'matter density values in km and kg/dm^3; ' 'add, remove or modify lines as necessary\n'] tfile = NamedTemporaryFile() tfile.writelines(preamble+profile_lines) tfile.flush() try: self.earth_model = EarthModel(tfile.name) except: logging.error('Could not construct Earth model from %s: %s' %(model, sys.exc_info()[1])) sys.exit(1) logging.info('Successfully constructed Earth model') tfile.close() except IOError: logging.info('Using NuCraft built-in Earth model "%s"'%model) self.earth_model = EarthModel(model)
def to_hdf(d, filename): """Store a (possibly nested) dictionary to HDF5 file, creating hardlinks for repeated leaf nodes (datasets). NOTE: Branch nodes are sorted before storing for consistency in the generated file despite Python dictionaries having no defined ordering among keys.""" if not isinstance(d, dict): errmsg = 'Only dictionaries may be written to HDF5 files.' logging.error(errmsg) raise TypeError(errmsg) # Define a function for iteratively doing the work def store_recursively(fhandle, node, path=[], node_hashes={}): full_path = '/' + '/'.join(path) if isinstance(node, dict): try: fhandle.create_group(full_path) except ValueError: pass for key in sorted(node.iterkeys()): key_str = str(key) if not isinstance(key, str): logging.warn('Stringifying key "' + key_str + '"for use as name in HDF5 file') val = node[key] new_path = path + [key_str] store_recursively(fhandle=fhandle, node=val, path=new_path, node_hashes=node_hashes) else: # Check for existing node node_hash = utils.utils.hash_obj(node) if node_hash in node_hashes: # Hardlink the matching existing dataset fhandle[full_path] = fhandle[node_hashes[node_hash]] return node_hashes[node_hash] = full_path # "Scalar datasets don't support chunk/filter options"; extra # checking that a sequence isn't a string, also. Shuffling is # a good idea since subsequent compression will generally benefit; # shuffling requires chunking. Compression is not done here # since it is slow. if hasattr(node, '__iter__') and not isinstance(node, basestring): shuffle = True chunks = True else: shuffle = False chunks = None fhandle.create_dataset(name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False) # Perform the actual operation using the dict passed in by user try: h5file = h5py.File(os.path.expandvars(filename), 'w') store_recursively(fhandle=h5file, node=d) except IOError, e: logging.error("Unable to write to HDF5 file \'%s\'" % filename) logging.error(e) raise e
def add_params(setA,setB): ''' Join the parameters in setA and setB, making sure that no parameters are overwritten ''' #check for overlap if any(p in setA for p in setB): pnames = set(setA.keys()) & set(setB.keys()) # Since energy scale is implemented in two stages, need to allow this. # So ensure equality then continue. if 'energy_scale' in pnames: if (setA['energy_scale'] != setB['energy_scale']): logging.error( 'setA energy scale: %f, setB energy scale: %f' %(setA['energy_scale'],setB['energy_scale'])) raise KeyError('SetA and SetB have different energy scale values!') pnames = pnames.difference(['energy_scale']) if len(pnames) > 0: logging.error('Trying to store parameter(s) %s twice'%pnames) raise KeyError('Trying to store parameter(s) %s twice'%pnames) #Otherwise append return dict(setA.items() + setB.items())
def add_params(setA, setB): """ Join the parameters in setA and setB, making sure that no parameters are overwritten """ # check for overlap if any(p in setA for p in setB): pnames = set(setA.keys()) & set(setB.keys()) # Since energy scale is implemented in two stages, need to allow this. # So ensure equality then continue. if "energy_scale" in pnames: if setA["energy_scale"] != setB["energy_scale"]: logging.error( "setA energy scale: %f, setB energy scale: %f" % (setA["energy_scale"], setB["energy_scale"]) ) raise KeyError("SetA and SetB have different energy scale values!") pnames = pnames.difference(["energy_scale"]) if len(pnames) > 0: logging.error("Trying to store parameter(s) %s twice" % pnames) raise KeyError("Trying to store parameter(s) %s twice" % pnames) # Otherwise append return dict(setA.items() + setB.items())
def _compute_nominal_outputs(self): ''' load events, perform sanity check and put them into histograms, if alt_bg file is specified, also put these events into separate histograms, that are normalized to the nominal ones (we are only interested in the shape difference) ''' # get params icc_bg_file = self.params.icc_bg_file.value if 'shape' in self.error_method: alt_icc_bg_file = self.params.alt_icc_bg_file.value else: alt_icc_bg_file = None sim_ver = self.params.sim_ver.value use_def1 = self.params.use_def1.value bdt_cut = self.params.bdt_cut.m_as('dimensionless') self.bin_names = self.output_binning.names self.bin_edges = [] for name in self.bin_names: if 'energy' in name: bin_edges = self.output_binning[name].bin_edges.to('GeV').magnitude else: bin_edges = self.output_binning[name].bin_edges.magnitude self.bin_edges.append(bin_edges) # the rest of this function is PISA v2 legacy code... logging.info('Initializing BackgroundServiceICC...') logging.info('Opening file: %s'%(icc_bg_file)) try: bg_file = h5py.File(find_resource(icc_bg_file),'r') if alt_icc_bg_file is not None: alt_bg_file = h5py.File(find_resource(alt_icc_bg_file),'r') except IOError,e: logging.error("Unable to open icc_bg_file %s"%icc_bg_file) logging.error(e) sys.exit(1)
fhandle.create_dataset(name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False) except: logging.error(' full_path: ' + full_path) logging.error(' chunks : ' + str(chunks)) logging.error(' shuffle : ' + str(shuffle)) logging.error(' node : ' + str(node)) raise # Perform the actual operation using the dict passed in by user if isinstance(tgt, basestring): try: h5file = h5py.File(os.path.expandvars(tgt), 'w') store_recursively(fhandle=h5file, node=data_dict) except IOError, e: logging.error(e) logging.error("to_hdf: Unable to open `%s` for writing" % tgt) raise finally: h5file.close() elif isinstance(tgt, h5py.Group): store_recursively(fhandle=tgt, node=data_dict) else: errmsg = "to_hdf: Invalid `tgt` type: `" + type(target_entity) + "`" logging.error(errmsg) raise TypeError(errmsg)
def test_nsi_parameterization(): """Unit test for Hvac-like NSI parameterization.""" rand = np.random.RandomState(0) alpha1, alpha2, deltansi = rand.rand(3) * 2. * np.pi phi12, phi13, phi23 = rand.rand(3) * 2*np.pi - np.pi eps_max_abs = 10.0 eps_scale, eps_prime = rand.rand(2) * 2 * eps_max_abs - eps_max_abs nsi_params = VacuumLikeNSIParams() nsi_params.eps_scale = eps_scale nsi_params.eps_prime = eps_prime nsi_params.phi12 = phi12 nsi_params.phi13 = phi13 nsi_params.phi23 = phi23 nsi_params.alpha1 = alpha1 nsi_params.alpha2 = alpha2 nsi_params.deltansi = deltansi logging.trace('Checking agreement between numerical & analytical NSI matrix...') eps_mat_numerical = nsi_params.eps_matrix eps_mat_analytical = nsi_params.eps_matrix_analytical try: close = np.isclose(eps_mat_numerical, eps_mat_analytical, **ALLCLOSE_KW) if not np.all(close): logging.debug( "Numerical NSI matrix:\n%s", np.array2string(eps_mat_numerical, **ARY2STR_KW) ) logging.debug( "Analytical expansion (by hand):\n%s", np.array2string(eps_mat_analytical, **ARY2STR_KW) ) raise ValueError( 'Evaluating analytical expressions for NSI matrix elements' ' does not give agreement with numerical calculation!' ' Elementwise agreement:\n%s' % close ) except ValueError as err: logging.warning( "%s\nThis is expected." " Going ahead with numerical calculation for now.", err ) logging.trace('Now checking agreement with sympy calculation...') eps_mat_sympy = nsi_sympy_mat_mult( eps_scale_val=eps_scale, eps_prime_val=eps_prime, phi12_val=phi12, phi13_val=phi13, phi23_val=phi23, alpha1_val=alpha1, alpha2_val=alpha2, deltansi_val=deltansi ) logging.trace('ALLCLOSE_KW = {}'.format(ALLCLOSE_KW)) close = np.isclose(eps_mat_numerical, eps_mat_sympy, **ALLCLOSE_KW) if not np.all(close): logging.error( 'Numerical NSI matrix:\n%s', np.array2string(eps_mat_numerical, **ARY2STR_KW) ) logging.error( 'Sympy NSI matrix:\n%s', np.array2string(eps_mat_sympy, **ARY2STR_KW) ) raise ValueError( 'Sympy and numerical calculations disagree! Elementwise agreement:\n' '%s' % close )
def _init_stages(self): """Stage factory: Instantiate stages specified by self.config. Conventions required for this to work: * Stage and service names must be lower-case * Service implementations must be found at Python path `pisa.stages.<stage_name>.<service_name>` * `service` cannot be an instantiation argument for a service """ stages = [] for stage_num, item in enumerate(self.config.items()): try: name, settings = item if isinstance(name, str): if name == 'pipeline': continue stage_name, service_name = name # old cfgs compatibility if service_name.startswith('pi_'): logging.warning( f"Old stage name `{service_name}` is automatically renamed to `{service_name.replace('pi_', '')}`. " + "Please change your config in the future!") service_name = service_name.replace('pi_', '') logging.debug("instantiating stage %s / service %s", stage_name, service_name) # Import service's module logging.trace( f"Importing service module: {stage_name}.{service_name}") try: module_path = f"pisa.stages.{stage_name}.{service_name}" module = import_module(module_path) except: logging.debug( f"Module {stage_name}.{service_name} not found in PISA, trying " "to import from external definition.") module_path = f"{stage_name}.{service_name}" module = import_module(module_path) # Get service class from module service_cls = getattr(module, service_name) # Instantiate service logging.trace( "initializing stage.service %s.%s with settings %s" % (stage_name, service_name, settings)) try: service = service_cls(**settings, profile=self._profile) except Exception: logging.error( "Failed to instantiate stage.service %s.%s with settings %s", stage_name, service_name, settings.keys(), ) raise if not isinstance(service, Stage): raise TypeError( 'Trying to create service "%s" for stage #%d (%s),' " but object %s instantiated from class %s is not a" " PISA Stage type but instead is of type %s." % ( service_name, stage_num, stage_name, service, service_cls, type(service), )) stages.append(service) except: logging.error( "Failed to initialize stage #%d (stage=%s, service=%s).", stage_num, stage_name, service_name, ) raise # set parameters with an identical name to the same object # otherwise we get inconsistent behaviour when setting repeated params # See Isues #566 and #648 all_parans = self.params self.update_params(all_parans, existing_must_match=True, extend=False) param_selections = set() for service in stages: param_selections.update(service.param_selections) param_selections = sorted(param_selections) for stage in stages: stage.select_params(param_selections, error_on_missing=False) self._stages = stages self.setup()
def main(return_outputs=False): """Run unit tests if `pipeline.py` is called as a script.""" from pisa.utils.plotter import Plotter args = parse_args() set_verbosity(args.v) # Even if user specifies an integer on command line, it comes in as a # string. Try to convert to int (e.g. if `'1'` is passed to indicate the # second stage), and -- if successful -- use this as `args.only_stage`. # Otherwise, the string value passed will be used (e.g. `'osc'` could be # passed). try: only_stage_int = int(args.only_stage) except (ValueError, TypeError): pass else: args.only_stage = only_stage_int if args.outdir: mkdir(args.outdir) else: if args.pdf or args.png: raise ValueError("No --outdir provided, so cannot save images.") # Most basic parsing of the pipeline config (parsing only to this level # allows for simple strings to be specified as args for updating) bcp = PISAConfigParser() bcp.read(args.pipeline) # Update the config with any args specified on command line if args.arg is not None: for arg_list in args.arg: if len(arg_list) < 2: raise ValueError( 'Args must be formatted as: "section arg=val". Got "%s"' " instead." % " ".join(arg_list)) section = arg_list[0] remainder = " ".join(arg_list[1:]) eq_split = remainder.split("=") newarg = eq_split[0].strip() value = ("=".join(eq_split[1:])).strip() logging.debug('Setting config section "%s" arg "%s" = "%s"', section, newarg, value) try: bcp.set(section, newarg, value) except NoSectionError: logging.error( 'Invalid section "%s" specified. Must be one of %s', section, bcp.sections(), ) raise # Instantiate the pipeline pipeline = Pipeline(bcp) # pylint: disable=redefined-outer-name if args.select is not None: pipeline.select_params(args.select, error_on_missing=True) if args.only_stage is None: stop_idx = args.stop_after_stage try: stop_idx = int(stop_idx) except (TypeError, ValueError): pass if isinstance(stop_idx, str): stop_idx = pipeline.index(stop_idx) outputs = pipeline.get_outputs(idx=stop_idx) # pylint: disable=redefined-outer-name if stop_idx is not None: stop_idx += 1 indices = slice(0, stop_idx) else: assert args.stop_after_stage is None idx = pipeline.index(args.only_stage) stage = pipeline[idx] indices = slice(idx, idx + 1) # Create dummy inputs if necessary inputs = None if hasattr(stage, "input_binning"): logging.warning( "Stage requires input, so building dummy" " inputs of random numbers, with random state set to the input" " index according to alphabetical ordering of input names and" " filled in alphabetical ordering of dimension names.") input_maps = [] tmp = deepcopy(stage.input_binning) alphabetical_binning = tmp.reorder_dimensions(sorted(tmp.names)) for input_num, input_name in enumerate(sorted(stage.input_names)): # Create a new map with all 3's; name according to the input hist = np.full(shape=alphabetical_binning.shape, fill_value=3.0) input_map = Map(name=input_name, binning=alphabetical_binning, hist=hist) # Apply Poisson fluctuations to randomize the values in the map input_map.fluctuate(method="poisson", random_state=input_num) # Reorder dimensions according to user's original binning spec input_map.reorder_dimensions(stage.input_binning) input_maps.append(input_map) inputs = MapSet(maps=input_maps, name="ones", hash=1) outputs = stage.run(inputs=inputs) for stage in pipeline[indices]: if not args.outdir: break stg_svc = stage.stage_name + "__" + stage.service_name fbase = os.path.join(args.outdir, stg_svc) if args.intermediate or stage == pipeline[indices][-1]: stage.outputs.to_json(fbase + "__output.json.bz2") # also only plot if args intermediate or last stage if args.intermediate or stage == pipeline[indices][-1]: formats = OrderedDict(png=args.png, pdf=args.pdf) if isinstance(stage.outputs, Data): # TODO(shivesh): plots made here will use the most recent # "pisa_weight" column and so all stages will have identical plots # (one workaround is to turn on "memcache_deepcopy") # TODO(shivesh): intermediate stages have no output binning if stage.output_binning is None: logging.debug("Skipping plot of intermediate stage %s", stage) continue outputs = stage.outputs.histogram_set( binning=stage.output_binning, nu_weights_col="pisa_weight", mu_weights_col="pisa_weight", noise_weights_col="pisa_weight", mapset_name=stg_svc, errors=True, ) try: for fmt, enabled in formats.items(): if not enabled: continue my_plotter = Plotter( stamp="Event rate", outdir=args.outdir, fmt=fmt, log=False, annotate=args.annotate, ) my_plotter.ratio = True my_plotter.plot_2d_array(outputs, fname=stg_svc + "__output", cmap="RdBu") except ValueError as exc: logging.error( "Failed to save plot to format %s. See exception" " message below", fmt, ) traceback.format_exc() logging.exception(exc) logging.warning("I can't go on, I'll go on.") if return_outputs: return pipeline, outputs
def to_hdf(data_dict, tgt): """ Store a (possibly nested) dictionary to an HDF5 file or branch node within an HDF5 file (an h5py Group). This creates hardlinks for duplicate non-trivial leaf nodes (h5py Datasets) to minimize storage space required for redundant datasets. Duplication is detected via object hashing. NOTE: Branch nodes are sorted before storing for consistency in the generated file despite Python dictionaries having no defined ordering among keys. Arguments --------- data_dict : dict Dictionary to be stored tgt : str or h5py.Group Target for storing data. If `tgt` is a str, it is interpreted as a filename; a file is created with that name (overwriting an existing file, if present). After writing, the file is closed. If `tgt` is an h5py.Group, the data is simply written to that Group and it is left open at function return. """ if not isinstance(data_dict, dict): errmsg = 'to_hdf: `data_dict` only accepts top-level dict.' logging.error(errmsg) raise TypeError(errmsg) # Define a function for iteratively doing the work def store_recursively(fhandle, node, path=None, node_hashes=None): if path is None: path = [] if node_hashes is None: node_hashes = {} full_path = '/' + '/'.join(path) if isinstance(node, dict): logging.trace(" creating Group `%s`" % full_path) try: fhandle.create_group(full_path) except ValueError: pass for key in sorted(node.iterkeys()): key_str = str(key) if not isinstance(key, str): logging.warn('Stringifying key `' + key_str + '`for use as name in HDF5 file') val = node[key] new_path = path + [key_str] store_recursively(fhandle=fhandle, node=val, path=new_path, node_hashes=node_hashes) else: # Check for existing node node_hash = utils.hash_obj(node) if node_hash in node_hashes: logging.trace(" creating hardlink for Dataset: `%s` -> `%s`" % (full_path, node_hashes[node_hash])) # Hardlink the matching existing dataset fhandle[full_path] = fhandle[node_hashes[node_hash]] return # For now, convert None to np.nan since h5py appears to not handle None if node is None: node = np.nan logging.warn(" encountered `None` at node `%s`; converting to" " np.nan" % full_path) # "Scalar datasets don't support chunk/filter options". Shuffling # is a good idea otherwise since subsequent compression will # generally benefit; shuffling requires chunking. Compression is # not done here since it is slow. if np.isscalar(node): shuffle = False chunks = None else: shuffle = True chunks = True # Store the node_hash for linking to later if this is more than # a scalar datatype. Assumed that "None" has node_hashes[node_hash] = full_path # TODO: Treat strings as follows? Would this break compatibility # with pytables/Pandas? What are benefits? Leaving out for now. # if isinstance(node, basestr): # dtype = h5py.special_dtype(vlen=str) # fh.create_dataset(k,data=v,dtype=dtype) logging.trace(" creating dataset at node `%s`" % full_path) try: fhandle.create_dataset(name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False) except TypeError: try: shuffle = False chunks = None fhandle.create_dataset(name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False) except: logging.error(' full_path: ' + full_path) logging.error(' chunks : ' + str(chunks)) logging.error(' shuffle : ' + str(shuffle)) logging.error(' node : ' + str(node)) raise # Perform the actual operation using the dict passed in by user if isinstance(tgt, basestring): try: h5file = h5py.File(os.path.expandvars(tgt), 'w') store_recursively(fhandle=h5file, node=data_dict) except IOError, e: logging.error(e) logging.error("to_hdf: Unable to open `%s` for writing" % tgt) raise finally:
def store_recursively(fhandle, node, path=None, node_hashes=None): if path is None: path = [] if node_hashes is None: node_hashes = {} full_path = '/' + '/'.join(path) if isinstance(node, dict): logging.trace(" creating Group `%s`" % full_path) try: fhandle.create_group(full_path) except ValueError: pass for key in sorted(node.iterkeys()): key_str = str(key) if not isinstance(key, str): logging.warn('Stringifying key `' + key_str + '`for use as name in HDF5 file') val = node[key] new_path = path + [key_str] store_recursively(fhandle=fhandle, node=val, path=new_path, node_hashes=node_hashes) else: # Check for existing node node_hash = utils.hash_obj(node) if node_hash in node_hashes: logging.trace(" creating hardlink for Dataset: `%s` -> `%s`" % (full_path, node_hashes[node_hash])) # Hardlink the matching existing dataset fhandle[full_path] = fhandle[node_hashes[node_hash]] return # For now, convert None to np.nan since h5py appears to not handle None if node is None: node = np.nan logging.warn(" encountered `None` at node `%s`; converting to" " np.nan" % full_path) # "Scalar datasets don't support chunk/filter options". Shuffling # is a good idea otherwise since subsequent compression will # generally benefit; shuffling requires chunking. Compression is # not done here since it is slow. if np.isscalar(node): shuffle = False chunks = None else: shuffle = True chunks = True # Store the node_hash for linking to later if this is more than # a scalar datatype. Assumed that "None" has node_hashes[node_hash] = full_path # TODO: Treat strings as follows? Would this break compatibility # with pytables/Pandas? What are benefits? Leaving out for now. # if isinstance(node, basestr): # dtype = h5py.special_dtype(vlen=str) # fh.create_dataset(k,data=v,dtype=dtype) logging.trace(" creating dataset at node `%s`" % full_path) try: fhandle.create_dataset(name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False) except TypeError: try: shuffle = False chunks = None fhandle.create_dataset(name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False) except: logging.error(' full_path: ' + full_path) logging.error(' chunks : ' + str(chunks)) logging.error(' shuffle : ' + str(shuffle)) logging.error(' node : ' + str(node)) raise
def parse_param(config, section, selector, fullname, pname, value): """Parse a param specification from a PISA config file. Note that if the param sepcification does not include ``fixed``, ``prior``, and/or ``range``, the defaults for these are: ``fixed = True``, ``prior = None``, and ``range = None``. If a prior is specified explicitly via ``.prior``, this takes precendence, but if no ``.prior`` is specified and the param's value is parsed to be a :class:`uncertainties.AffineScalarFunc` (i.e. have `std_dev` attribute), a Gaussian prior is constructed from that and then the AffineScalarFunc is stripped out of the param's value (such that it is just a :class:`~pint.quantity.Quantity`). Parameters ---------- config : pisa.utils.config_parser.PISAConfigParser section : string selector : string or None fullname : string pname : string value : string Returns ------- param : pisa.core.param.Param """ # Note: imports placed here to avoid circular imports from pisa.core.param import Param from pisa.core.prior import Prior kwargs = dict(name=pname, is_fixed=True, prior=None, range=None) try: value = parse_quantity(value) kwargs['value'] = value.nominal_value * value.units except ValueError: value = parse_string_literal(value) kwargs['value'] = value # Search for explicit attr specifications if config.has_option(section, fullname + '.fixed'): kwargs['is_fixed'] = config.getboolean(section, fullname + '.fixed') if config.has_option(section, fullname + '.unique_id'): kwargs['unique_id'] = config.get(section, fullname + '.unique_id') if config.has_option(section, fullname + '.range'): range_ = config.get(section, fullname + '.range') # Note: `nominal` and `sigma` are called out in the `range_` string if 'nominal' in range_: nominal = value.n * value.units # pylint: disable=unused-variable if 'sigma' in range_: sigma = value.s * value.units # pylint: disable=unused-variable range_ = range_.replace('[', 'np.array([') range_ = range_.replace(']', '])') # Strip out uncertainties from value itself (as we will rely on the # prior from here on out) kwargs['range'] = eval(range_).to(value.units) # pylint: disable=eval-used if config.has_option(section, fullname + '.prior'): prior = str(config.get(section, fullname + '.prior')).strip().lower() if prior == 'uniform': kwargs['prior'] = Prior(kind='uniform') elif prior == 'jeffreys': kwargs['prior'] = Prior(kind='jeffreys', A=kwargs['range'][0], B=kwargs['range'][1]) elif prior == 'spline': priorname = pname if selector is not None: priorname += '_' + selector data = config.get(section, fullname + '.prior.data') data = from_file(data) data = data[priorname] knots = ureg.Quantity(np.asarray(data['knots']), data['units']) knots = knots.to(value.units) coeffs = np.asarray(data['coeffs']) deg = data['deg'] kwargs['prior'] = Prior(kind='spline', knots=knots, coeffs=coeffs, deg=deg) elif prior == 'none': kwargs['prior'] = None elif 'gauss' in prior: raise Exception('Please use new style +/- notation for gaussian' ' priors in config') else: raise Exception('Prior type unknown') elif hasattr(value, 'std_dev') and value.std_dev != 0: kwargs['prior'] = Prior(kind='gaussian', mean=value.nominal_value * value.units, stddev=value.std_dev * value.units) # Strip out any uncertainties from value itself (an explicit ``.prior`` # specification takes precedence over this) if hasattr(value, 'std_dev'): value = value.nominal_value * value.units try: param = Param(**kwargs) except: logging.error('Failed to instantiate new Param object with kwargs %s', kwargs) raise return param
def retrieve_expression(h5group, expression): """Retrieve data from an HDF5 group `h5group` according to `expresssion`. This can apply expressions with simple mathematical operators and numpy functions to multiple fields within the HDF5 file to derive the output. Python keywords are _not_ allowed, since they may alias with a name. Refer to any numpy functions by prefixing with either "np.<func>" or "numpy.<func>". In order to specify division, spaces must surround the forward slash, such that it isn't interpreted as a path. Nodes in the HDF5 hierarchy are separated by forward slashes ("/") in a path spec. We restrict valid HDF5 node names to contain the characters a-z, A-Z, 0-9, peroids ("."), and underscores ("_"). with the additional restriction that the node name must not start with a period or a number, and a path cannot start with a slash. Parameters ---------- h5group : h5py Group expression : string Expression to evaluate. Returns ------- result : result of evaluating `expression` Examples -------- >>> retrieve_expression('np.sqrt(MCneutrino/x**2 + MCneutrino/y**2)') Indexing into the data arrays can also be performed, and numpy masks used as usual: >>> expr = 'I3MCTree/energy[I3MCTree/event == I3EventHeader[0] """ h5path_re = re.compile( r''' ([a-z_] # First character must be letter or underscore [a-z0-9_.]* # 0 or more legal chars: letters, numbers, _, . (?: # (Do not return the following group separately) [/]{0,1} # Next character CAN be no or 1 front-slash [a-z0-9_.]+ # But a slash *must* be followed by legal chars )* # Slash+chars pattern might not occur, or repeat )''', re.VERBOSE | re.IGNORECASE ) numpy_re = re.compile(r'^(np|numpy)\.[a-z_.]+', re.IGNORECASE) eval_str = expression intermediate_data = {} for h5path in h5path_re.findall(expression): if numpy_re.match(h5path): continue intermediate_data[h5path] = DataProcParams.retrieve_node_data( h5group, h5path ) eval_str = eval_str.replace(h5path, "intermediate_data['%s']"%h5path) try: result = eval(eval_str) # pylint: disable=eval-used except: logging.error('`expression` "%s" was translated into `eval_str`' ' "%s" and failed to evaluate.', expression, eval_str) raise return result
def test_find_index(): """Unit tests for `find_index` function. Correctness is defined as producing the same histogram as numpy.histogramdd by using the output of `find_index` (ignoring underflow and overflow values). Additionally, -1 should be returned if a value is below the range (underflow) or is nan, and num_bins should be returned for a value above the range (overflow). """ # Negative, positive, integer, non-integer, binary-unrepresentable (0.1) edges basic_bin_edges = [-1, -0.5, -0.1, 0, 0.1, 0.5, 1, 2, 3, 4] failures = 0 for basic_bin_edges in [ # Negative, positive, integer, non-integer, binary-unrepresentable (0.1) edges [-1, -0.5, -0.1, 0, 0.1, 0.5, 1, 2, 3, 4], # A single infinite bin: [-np.inf, np.inf] [], # Half-infinite bins (lower or upper edge) & [-inf, .1, +inf] [0.1], # Single bin with finite edges & +/-inf-edge(s)-added variants [-0.1, 0.1], ]: # Bin edges from above, w/ and w/o +/-inf as left and/or right edges for le, re in [(None, None), (-np.inf, None), (None, np.inf), (-np.inf, np.inf)]: bin_edges = deepcopy(basic_bin_edges) if le is not None: bin_edges = [le] + bin_edges if re is not None: bin_edges = bin_edges + [re] if len(bin_edges) < 2: continue logging.debug('bin_edges being tested: %s', bin_edges) bin_edges = np.array(bin_edges, dtype=FTYPE) num_bins = len(bin_edges) - 1 underflow_idx = -1 overflow_idx = num_bins # # Construct test values to try out # non_finite_vals = [-np.inf, +np.inf, np.nan] # Values within bins (i.e., not on edges) inbin_vals = [] for idx in range(len(bin_edges) - 1): lower_be = bin_edges[idx] upper_be = bin_edges[idx + 1] if np.isfinite(lower_be): if np.isfinite(upper_be): inbin_val = (lower_be + upper_be) / 2 else: inbin_val = lower_be + 10.5 else: if np.isfinite(upper_be): inbin_val = upper_be - 10.5 else: inbin_val = 10.5 inbin_vals.append(inbin_val) # Values above/below bin edges by one unit of floating point # accuracy eps = np.finfo(FTYPE).eps # pylint: disable=no-member below_edges_vals = [FTYPE((1 - eps) * be) for be in bin_edges] above_edges_vals = [FTYPE((1 + eps) * be) for be in bin_edges] test_vals = np.concatenate([ non_finite_vals, bin_edges, inbin_vals, below_edges_vals, above_edges_vals, ]) logging.trace('test_vals = %s', test_vals) # # Run tests # for val in test_vals: val = FTYPE(val) np_histvals, _ = np.histogramdd([val], np.atleast_2d(bin_edges)) nonzero_indices = np.nonzero(np_histvals)[ 0] # select first & only dim if np.isnan(val): assert len(nonzero_indices) == 0, str(len(nonzero_indices)) expected_idx = underflow_idx elif val < bin_edges[0]: assert len(nonzero_indices) == 0, str(len(nonzero_indices)) expected_idx = underflow_idx elif val > bin_edges[-1]: assert len(nonzero_indices) == 0, str(len(nonzero_indices)) expected_idx = overflow_idx else: assert len(nonzero_indices) == 1, str(len(nonzero_indices)) expected_idx = nonzero_indices[0] found_idx = find_index(val, bin_edges) if found_idx != expected_idx: failures += 1 msg = 'val={}, edges={}: Expected idx={}, found idx={}'.format( val, bin_edges, expected_idx, found_idx) logging.error(msg) assert failures == 0, f"{failures} failures, inspect ERROR messages above for info" logging.info('<< PASS : test_find_index >>')
def parse_pipeline_config(config): """Parse pipeline config. Parameters ---------- config : string or ConfigParser Returns ------- stage_dicts : OrderedDict Keys are (stage_name, service_name) tuples and values are OrderedDicts with keys the argnames and values the arguments' values. Some known arg values are parsed out fully into Python objects, while the rest remain as strings that must be used or parsed elsewhere. """ # Note: imports placed here to avoid circular imports from pisa.core.binning import MultiDimBinning, OneDimBinning from pisa.core.param import ParamSelector if isinstance(config, basestring): config = from_file(config) elif isinstance(config, PISAConfigParser): pass else: raise TypeError( '`config` must either be a string or PISAConfigParser. Got %s ' 'instead.' % type(config)) if not config.has_section('binning'): raise NoSectionError( "Could not find 'binning'. Only found sections: %s" % config.sections()) # Create binning objects binning_dict = {} for name, value in config['binning'].items(): if name.endswith('.order'): order = split(config.get('binning', name)) binning, _ = split(name, sep='.') bins = [] for bin_name in order: try: def_raw = config.get('binning', binning + '.' + bin_name) except: dims_defined = [ split(dim, sep='.')[1] for dim in config['binning'].keys() if dim.startswith(binning + '.') and not dim.endswith('.order') ] logging.error( "Failed to find definition of '%s' dimension of '%s'" " binning entry. Only found definition(s) of: %s", bin_name, binning, dims_defined) del dims_defined raise try: kwargs = eval(def_raw) # pylint: disable=eval-used except: logging.error( "Failed to evaluate definition of '%s' dimension of" " '%s' binning entry:\n'%s'", bin_name, binning, def_raw) raise try: bins.append(OneDimBinning(bin_name, **kwargs)) except: logging.error( "Failed to instantiate new `OneDimBinning` from '%s'" " dimension of '%s' binning entry with definition:\n" "'%s'\n", bin_name, binning, kwargs) raise binning_dict[binning] = MultiDimBinning(bins) # Pipeline section section = 'pipeline' # Get and parse the order of the stages (and which services implement them) order = [split(x, STAGE_SEP) for x in split(config.get(section, 'order'))] param_selections = [] if config.has_option(section, 'param_selections'): param_selections = split(config.get(section, 'param_selections')) detector_name = None if config.has_option(section, 'detector_name'): detector_name = config.get(section, 'detector_name') # Parse [stage.<stage_name>] sections and store to stage_dicts stage_dicts = OrderedDict() for stage, service in order: old_section_header = 'stage%s%s' % (STAGE_SEP, stage) new_section_header = '%s%s%s' % (stage, STAGE_SEP, service) if config.has_section(old_section_header): logging.warning( '"%s" is an old-style section header, in the future use "%s"' % (old_section_header, new_section_header)) section = old_section_header elif config.has_section(new_section_header): section = new_section_header else: raise IOError( 'missing section in cfg for stage "%s" service "%s"' % (stage, service)) # Instantiate dict to store args to pass to this stage service_kwargs = OrderedDict() param_selector = ParamSelector(selections=param_selections) service_kwargs['params'] = param_selector n_params = 0 for fullname in config.options(section): try: value = config.get(section, fullname) except: logging.error( 'Unable to obtain value of option "%s" in section "%s".' % (fullname, section)) raise # See if this matches a param specification param_match = PARAM_RE.match(fullname) if param_match is not None: n_params += 1 param_match_dict = param_match.groupdict() param_subfields = param_match_dict['subfields'].split('.') # Figure out what the dotted fields represent... infodict = interpret_param_subfields(subfields=param_subfields) # If field is an attr, skip since these are located manually if infodict['attr'] is not None: continue # Check if this param already exists in a previous stage; if # so, make sure there are no specs for this param, but just a # link to previous the param object that is already # instantiated. for kw in stage_dicts.values(): # Stage did not get a `params` argument from config if not kw.has_key('params'): continue # Retrieve the param from the ParamSelector try: param = kw['params'].get(name=infodict['pname'], selector=infodict['selector']) except KeyError: continue # Make sure there are no other specs (in this section) for # the param defined defined in previous section for a in PARAM_ATTRS: if config.has_option(section, '%s.%s' % (fullname, a)): raise ValueError("Parameter spec. '%s' of '%s' " "found in section '%s', but " "parameter exists in previous " "stage!" % (a, fullname, section)) break # Param *not* found in a previous stage (i.e., no explicit # `break` encountered in `for` loop above); therefore must # instantiate it. else: param = parse_param(config=config, section=section, selector=infodict['selector'], fullname=fullname, pname=infodict['pname'], value=value) param_selector.update(param, selector=infodict['selector']) # If it's not a param spec but contains 'binning', assume it's a # binning spec for CAKE stages elif 'binning' in fullname: service_kwargs[fullname] = binning_dict[value] # it's gonna be a PI stage elif '_specs' in fullname: value = parse_string_literal(value) # is it None? if value is None: service_kwargs[fullname] = value # is it evts? elif value in ['evnts', 'events']: service_kwargs[fullname] = 'events' # so it gotta be a binning else: service_kwargs[fullname] = binning_dict[value] # it's a list on in/output names list elif fullname.endswith('_names'): value = split(value) service_kwargs[fullname] = value # Otherwise it's some other stage instantiation argument; identify # this by its full name and try to interpret and instantiate a # Python object using the string else: try: value = parse_quantity(value) value = value.nominal_value * value.units except ValueError: value = parse_string_literal(value) service_kwargs[fullname] = value # If no params actually specified in config, remove 'params' from the # service's keyword args if n_params == 0: service_kwargs.pop('params') # Store the service's kwargs to the stage_dicts stage_dicts[(stage, service)] = service_kwargs stage_dicts['detector_name'] = detector_name return stage_dicts
try: shuffle = False chunks = None fhandle.create_dataset(name=full_path, data=node, chunks=chunks, compression=None, shuffle=shuffle, fletcher32=False) except: logging.error(' full_path: ' + full_path) logging.error(' chunks : ' + str(chunks)) logging.error(' shuffle : ' + str(shuffle)) logging.error(' node : ' + str(node)) raise # Perform the actual operation using the dict passed in by user if isinstance(tgt, basestring): try: h5file = h5py.File(os.path.expandvars(tgt), 'w') store_recursively(fhandle=h5file, node=data_dict) except IOError, e: logging.error(e) logging.error("to_hdf: Unable to open `%s` for writing" % tgt) raise finally: h5file.close() elif isinstance(tgt, h5py.Group): store_recursively(fhandle=tgt, node=data_dict) else: errmsg = "to_hdf: Invalid `tgt` type: `"+ type(target_entity)+"`" logging.error(errmsg) raise TypeError(errmsg)
def parse_fit_config(fit_cfg): """Perform sanity checks on and parse fit configuration file. Parameters ---------- fit_cfg : str path to a fit configuration file Returns ------- fit_cfg : PISAConfigParser parsed fit configuration sys_list : list of str parsed names of systematic parameters units_list : list of str units corresponding to each discrete systematic combine_regex : list of str each string is a regular expression for combining pipeline outputs; see :func:`pisa.core.map.MapSet.combine_regex` for details. """ fit_cfg = from_file(fit_cfg) no_ws_section_map = {s.strip(): s for s in fit_cfg.sections()} if GENERAL_SECTION_NAME not in no_ws_section_map.values(): raise KeyError('Fit config is missing the "%s" section!' % GENERAL_SECTION_NAME) general_section = fit_cfg[GENERAL_SECTION_NAME] if SYS_LIST_OPTION not in general_section: raise KeyError( "Fit config has to specify systematic parameters as" ' "%s" option in "%s" section (comma-separated list of names).' % (SYS_LIST_OPTION, GENERAL_SECTION_NAME)) sys_list = [s.strip() for s in general_section[SYS_LIST_OPTION].split(",")] if UNITS_OPTION in general_section: units_list = [] units_specs = (general_section[UNITS_OPTION].replace( UNITS_SPECIFIER, "").split(",")) for units_spec in units_specs: # Make sure units are interpret-able by Pint try: ureg.Unit(units_spec) except: logging.error( 'Unit "%s" specified by "%s" option in "general" section is not' "interpret-able by Pint", units_spec, UNITS_OPTION, ) raise units_list.append(units_spec) else: units_list = ["dimensionless" for s in sys_list] logging.warn( "No %s option found in %s section; assuming systematic parameters are" " dimensionless", UNITS_OPTION, GENERAL_SECTION_NAME, ) if len(units_list) != len(sys_list): raise ValueError( '{} units specified by "{}" option but {} systematics specified by "{}"' "option; must be same number of each.".format( len(units_list), UNITS_OPTION, len(sys_list), SYS_LIST_OPTION)) logging.info( "Found systematic parameters %s", ["{} ({})".format(s, u) for s, u in zip(sys_list, units_list)], ) combine_regex = general_section.get(COMBINE_REGEX_OPTION, None) if combine_regex: try: combine_regex = literal_eval(combine_regex) except (SyntaxError, ValueError): logging.warn( 'Deprecated syntax for "combine_re" (make into a Python-evaluatable' "sequence of strings instead) :: combine_regex = %s", combine_regex, ) combine_regex = [r.strip() for r in combine_regex.split(",")] if APPLY_ALL_SECTION_NAME in no_ws_section_map: apply_all_section = fit_cfg[no_ws_section_map[APPLY_ALL_SECTION_NAME]] for no_ws_sname, sname in no_ws_section_map.items(): if not (no_ws_sname.startswith(NOMINAL_SET_PFX) or no_ws_sname.startswith(SYS_SET_PFX)): continue sys_set_section = fit_cfg[sname] for option, val in apply_all_section.items(): sys_set_section[option] = val return fit_cfg, sys_list, units_list, combine_regex
def _compute_nominal_outputs(self): ''' load events, perform sanity check and put them into histograms, if alt_bg file is specified, also put these events into separate histograms, that are normalized to the nominal ones (we are only interested in the shape difference) ''' # get params icc_bg_file = self.params.icc_bg_file.value if 'shape' in self.error_method: alt_icc_bg_file = self.params.alt_icc_bg_file.value else: alt_icc_bg_file = None sim_ver = self.params.sim_ver.value use_def1 = self.params.use_def1.value bdt_cut = self.params.bdt_cut.m_as('dimensionless') self.bin_names = self.output_binning.names self.bin_edges = [] for name in self.bin_names: if 'energy' in name: bin_edges = self.output_binning[name].bin_edges.to( 'GeV').magnitude else: bin_edges = self.output_binning[name].bin_edges.magnitude self.bin_edges.append(bin_edges) # the rest of this function is PISA v2 legacy code... logging.info('Initializing BackgroundServiceICC...') logging.info('Opening file: %s', icc_bg_file) try: bg_file = h5py.File(find_resource(icc_bg_file), 'r') if alt_icc_bg_file is not None: alt_bg_file = h5py.File(find_resource(alt_icc_bg_file), 'r') except IOError as e: logging.error("Unable to open icc_bg_file %s", icc_bg_file) logging.error(e) sys.exit(1) # sanity check santa_doms = bg_file['IC86_Dunkman_L6_SANTA_DirectDOMs']['value'] l3 = bg_file['IC86_Dunkman_L3']['value'] l4 = bg_file['IC86_Dunkman_L4']['result'] l5 = bg_file['IC86_Dunkman_L5']['bdt_score'] l6 = bg_file['IC86_Dunkman_L6'] if use_def1: l4_pass = np.all(l4 == 1) else: if sim_ver in ['5digit', 'dima']: l4_invVICH = bg_file['IC86_Dunkman_L4']['result_invertedVICH'] l4_pass = np.all(np.logical_or(l4 == 1, l4_invVICH == 1)) else: logging.info( 'For the old simulation, def.2 background not done yet,' ' so still use def1 for it.') l4_pass = np.all(l4 == 1) assert (np.all(santa_doms >= 3) and np.all(l3 == 1) and l4_pass and np.all(l5 >= 0.1)) corridor_doms_over_threshold = l6['corridor_doms_over_threshold'] inverted_corridor_cut = corridor_doms_over_threshold > 1 assert (np.all(inverted_corridor_cut) and np.all(l6['santa_direct_doms'] >= 3) and np.all(l6['mn_start_contained'] == 1.) and np.all(l6['mn_stop_contained'] == 1.)) #load events if sim_ver == '4digit': variable = 'IC86_Dunkman_L6_MultiNest8D_PDG_Neutrino' elif sim_ver in ['5digit', 'dima']: variable = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_NumuCC' else: raise ValueError('Only allow sim_ver 4digit, 5 digit or dima!') reco_energy_all = np.array(bg_file[variable]['energy']) reco_coszen_all = np.array(np.cos(bg_file[variable]['zenith'])) pid_all = np.array(bg_file['IC86_Dunkman_L6']['delta_LLH']) if alt_icc_bg_file is not None: alt_reco_energy_all = np.array(alt_bg_file[variable]['energy']) alt_reco_coszen_all = np.array( np.cos(alt_bg_file[variable]['zenith'])) alt_pid_all = np.array(alt_bg_file['IC86_Dunkman_L6']['delta_LLH']) alt_l5 = alt_bg_file['IC86_Dunkman_L5']['bdt_score'] # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make data/MC # agree much better) cut_events = {} cut = l5 >= bdt_cut cut_events['reco_energy'] = reco_energy_all[cut] cut_events['reco_coszen'] = reco_coszen_all[cut] cut_events['pid'] = pid_all[cut] if alt_icc_bg_file is not None: # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make # data/MC agree much better) alt_cut_events = {} alt_cut = alt_l5 >= bdt_cut alt_cut_events['reco_energy'] = alt_reco_energy_all[alt_cut] alt_cut_events['reco_coszen'] = alt_reco_coszen_all[alt_cut] alt_cut_events['pid'] = alt_pid_all[alt_cut] logging.info("Creating a ICC background hists...") # make histo if self.params.kde_hist.value: self.icc_bg_hist = self.kde_histogramdd( np.array([cut_events[bin_name] for bin_name in self.bin_names]).T, binning=self.output_binning, coszen_name='reco_coszen', use_cuda=True, bw_method='silverman', alpha=0.3, oversample=10, coszen_reflection=0.5, adaptive=True) else: self.icc_bg_hist, _ = np.histogramdd(sample=np.array( [cut_events[bin_name] for bin_name in self.bin_names]).T, bins=self.bin_edges) conversion = self.params.atm_muon_scale.value.m_as( 'dimensionless') / ureg('common_year').to('seconds').m logging.info('nominal ICC rate at %.6E Hz', self.icc_bg_hist.sum() * conversion) if alt_icc_bg_file is not None: if self.params.kde_hist.value: self.alt_icc_bg_hist = self.kde_histogramdd( np.array([ alt_cut_events[bin_name] for bin_name in self.bin_names ]).T, binning=self.output_binning, coszen_name='reco_coszen', use_cuda=True, bw_method='silverman', alpha=0.3, oversample=10, coszen_reflection=0.5, adaptive=True) else: self.alt_icc_bg_hist, _ = np.histogramdd(sample=np.array([ alt_cut_events[bin_name] for bin_name in self.bin_names ]).T, bins=self.bin_edges) # only interested in shape difference, not rate scale = self.icc_bg_hist.sum() / self.alt_icc_bg_hist.sum() self.alt_icc_bg_hist *= scale