Esempio n. 1
0
File: hdf.py Progetto: mamday/pisa
def from_hdf(filename):
    """Open a file in HDF5 format, parse the content and return as dictionary
    with numpy arrays"""

    # Function for iteratively parsing the file to create the dictionary
    def visit_group(obj, sdict):
        name = obj.name.split('/')[-1]
        #indent = len(obj.name.split('/'))-1
        #print "  "*indent,name, obj.value if (type(obj) == h5py.Dataset) else ":"
        if type(obj) in [h5py.Dataset]:
            sdict[name] = obj.value
        if type(obj) in [h5py.Group, h5py.File]:
            sdict[name] = {}
            for sobj in obj.values():
                visit_group(sobj, sdict[name])

    data = {}
    try:
        h5file = h5py.File(os.path.expandvars(filename), 'r')
        # Run over the whole dataset
        for obj in h5file.values():
            visit_group(obj, data)
    except IOError, e:
        logging.error("Unable to read HDF5 file \'%s\'" % filename)
        logging.error(e)
        raise e
Esempio n. 2
0
    def apply_cuts(self, data, cuts, boolean_op='&', return_fields=None):
        """Perform `cuts` on `data` and return a dict containing
        `return_fields` from events that pass the cuts.

        Parameters
        ----------
        data : single-level dict or FlavIntData object
        cuts : string or dict, or sequence thereof
        boolean_op : string
        return_fields : string or sequence thereof
        """
        if isinstance(data, FlavIntData):
            outdata = FlavIntData()
            for flavint in data.flavints:
                outdata[flavint] = self.apply_cuts(data[flavint],
                                                   cuts=cuts,
                                                   boolean_op=boolean_op,
                                                   return_fields=return_fields)
            return outdata

        if isinstance(cuts, (str, dict)):
            cuts = [cuts]

        # Default is to return all fields
        if return_fields is None:
            return_fields = data.keys()

        # If no cuts specified, return all data from specified fields
        if len(cuts) == 0:
            return self.subselect(data, return_fields)

        cut_strings = set()
        cut_fields = set()
        for cut in cuts:
            if isinstance(cut, dict):
                self.validate_cut_spec(cut)
            elif cut.lower() in self['cuts']:
                cut = self['cuts'][cut.lower()]
            else:
                raise Exception('Unrecognized or invalid cut: "' + str(cut) +
                                '"')
            cut_strings.add(cut['pass_if'])
            cut_fields.update(cut['fields'])

        # Combine cut criteria strings together with boolean operation
        cut_string = boolean_op.join(['(' + cs + ')' for cs in cut_strings])

        # Load the fields necessary for the cut into the global namespace
        for field in set(cut_fields):
            globals()[field] = data[field]

        # Evaluate cuts, returning a boolean array
        try:
            bool_idx = eval(cut_string)  # pylint: disable=eval-used
        except:
            logging.error('Failed to evaluate `cut_string` "%s"', cut_string)
            raise

        # Return specified (or all) fields, indexed by boolean array
        return {f: np.array(data[f])[bool_idx] for f in return_fields}
Esempio n. 3
0
 def load_discr_sys(self, sys_list):
     """Load the fit results from the file and make some check
     compatibility"""
     self.fit_results = from_file(self.params['fit_results_file'].value)
     if not set(self.input_names) == set(self.fit_results['map_names']):
         for name in self.input_names:
             if not name in self.fit_results['map_names']:
                 #check if there is somethingi uniquely compatible
                 compatible_names = [
                     mapname in name
                     for mapname in self.fit_results['map_names']
                 ]
                 if sum(compatible_names) == 1:
                     # compatible
                     compatible_name = self.fit_results['map_names'][
                         compatible_names.index(True)]
                     self.fit_results[name] = self.fit_results[
                         compatible_name]
                     logging.warning(
                         'Substituting hyperplane parameterization %s for %s'
                         % (compatible_name, name))
                 else:
                     logging.error('No compatible map for %s found!' % name)
     assert set(sys_list) == set(self.fit_results['sys_list'])
     self.sys_list = self.fit_results['sys_list']
Esempio n. 4
0
    def default(self, obj):
        # Import here to avoid circular imports
        from pisa.utils.log import logging

        if isinstance(obj, np.ndarray):
            return obj.astype(np.float64).tolist()

        # TODO: poor form to have a way to get this into a JSON file but no way
        # to get it out of a JSON file... so either write a deserializer, or
        # remove this and leave it to other objects to do the following.
        if isinstance(obj, ureg.Quantity):
            return obj.to_tuple()

        # NOTE: np.bool_ is the *Numpy* bool type, while np.bool is alias for
        # Python bool type, hence this conversion
        if isinstance(obj, np.bool_):
            return bool(obj)

        if hasattr(obj, 'serializable_state'):
            return obj.serializable_state

        if isinstance(obj, np.float32):
            return float(obj)

        try:
            return json.JSONEncoder.default(self, obj)
        except:
            logging.error('JSON serialization for %s, type %s not implemented',
                          obj, type(obj))
            raise
Esempio n. 5
0
File: hdf.py Progetto: lkijmj/pisa
def from_hdf(filename):
    """Open a file in HDF5 format, parse the content and return as dictionary
    with numpy arrays"""
    # Function for iteratively parsing the file to create the dictionary
    def visit_group(obj, sdict):
        name = obj.name.split('/')[-1]
        #indent = len(obj.name.split('/'))-1
        #print "  "*indent,name, obj.value if (type(obj) == h5py.Dataset) else ":"
        if type(obj) in [ h5py.Dataset ]:
            sdict[name] = obj.value
        if type(obj) in [ h5py.Group, h5py.File ]:
            sdict[name] = {}
            for sobj in obj.values():
                visit_group(sobj, sdict[name])

    data = {}
    try:
        h5file = h5py.File(os.path.expandvars(filename), 'r')
        # Run over the whole dataset
        for obj in h5file.values():
            visit_group(obj, data)
    except IOError, e:
        logging.error("Unable to read HDF5 file \'%s\'" % filename)
        logging.error(e)
        raise e
Esempio n. 6
0
File: hdf.py Progetto: hignight/pisa
def to_hdf(d, filename):
    """Store a (possibly nested) dictionary to HDF5 file"""

    def store_recursively(fh, node, path=[]):
        if isinstance(node, dict):
            try:
                fh.create_group('/' + '/'.join(path))
            except ValueError:
                pass
            for key in sorted(node.iterkeys()):
                val = node[key]
                new_path = path + [key]
                store_recursively(fh=fh, node=val, path=new_path)
        else:
            fh.create_dataset(name = '/' + '/'.join(path),
                              data = node,
                              chunks = True,
                              maxshape = np.shape(node),
                              compression = None,
                              shuffle = True,
                              fletcher32 = False)

    try:
        hdf5_data = h5py.File(os.path.expandvars(filename), 'w')
    except IOError, e:
        logging.error("Unable to write to HDF5 file \'%s\'" % filename)
        logging.error(e)
        raise e
Esempio n. 7
0
def main(description=__doc__):
    """Script interface to `run_unit_tests` function"""
    parser = ArgumentParser(description=description,
                            formatter_class=ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "path",
        nargs="?",
        default=PISA_PATH,
        help="""Specify a specific path to a file or directory in which to find
        and run unit tests""",
    )
    parser.add_argument(
        "--allow-missing",
        nargs="+",
        default=list(OPTIONAL_MODULES),
        help="""Allow ImportError (or subclasses) for these modules""",
    )
    parser.add_argument("-v",
                        action="count",
                        default=Levels.WARN,
                        help="set verbosity level")
    kwargs = vars(parser.parse_args())
    kwargs["verbosity"] = kwargs.pop("v")
    try:
        run_unit_tests(**kwargs)
    except Exception as e:
        if hasattr(e, 'message'):
            msg = e.message
        else:
            msg = str(e)
        logging.error("\n" + msg)
        sys.exit(1)
Esempio n. 8
0
 def kernel_from_simfile(self, simfile=None, **kwargs):
     logging.info('Opening file: %s'%(simfile))
     try:
         fh = h5py.File(find_resource(simfile),'r')
     except IOError,e:
         logging.error("Unable to open event data file %s"%simfile)
         logging.error(e)
         sys.exit(1)
Esempio n. 9
0
 def kernel_from_simfile(self, simfile=None, **kwargs):
     logging.info("Opening file: %s" % (simfile))
     try:
         fh = h5py.File(find_resource(simfile), "r")
     except IOError, e:
         logging.error("Unable to open event data file %s" % simfile)
         logging.error(e)
         sys.exit(1)
Esempio n. 10
0
File: hdf.py Progetto: hignight/pisa
def from_hdf(filename):
    """Open a file in HDF5 format, parse the content and return as dictionary
    with numpy arrays"""
    try:
        hdf5_data = h5py.File(os.path.expandvars(filename), 'r')
    except IOError, e:
        logging.error("Unable to read HDF5 file \'%s\'" % filename)
        logging.error(e)
        raise e
Esempio n. 11
0
File: jsons.py Progetto: olivas/pisa
def from_json(filename):
    """Open a file in JSON format an parse the content"""
    try:
        content = json.load(open(os.path.expandvars(filename)), cls=NumpyDecoder)
        return content
    except (IOError, JSONDecodeError), e:
        logging.error("Unable to read JSON file '%s'" % filename)
        logging.error(e)
        sys.exit(1)
Esempio n. 12
0
def get_osc_probLT_dict_hdf5(filename):
    '''
    Returns a dictionary of osc_prob_maps from the lookup table .hdf5 files. 
    '''
    try:
      fh = h5py.File(find_resource(filename),'r')
    except IOError,e:
      logging.error("Unable to open oscillation map file %s"%filename)
      logging.error(e)
      sys.exit(1)
Esempio n. 13
0
def get_osc_probLT_dict_hdf5(filename):
    '''
    Returns a dictionary of osc_prob_maps from the lookup table .hdf5 files. 
    '''
    try:
        fh = h5py.File(find_resource(filename), 'r')
    except IOError, e:
        logging.error("Unable to open oscillation map file %s" % filename)
        logging.error(e)
        sys.exit(1)
Esempio n. 14
0
File: jsons.py Progetto: mamday/pisa
def from_json(filename):
    """Open a file in JSON format an parse the content"""
    try:
        content = json.load(open(os.path.expandvars(filename)),
                            cls=NumpyDecoder)
        return content
    except (IOError, JSONDecodeError), e:
        logging.error("Unable to read JSON file \'%s\'"%filename)
        logging.error(e)
        raise e
Esempio n. 15
0
def compare_numeric(test,
                    ref,
                    label=None,
                    ac_kw=deepcopy(AC_KW),
                    ignore_fails=False):
    """Compare scalars or numpy ndarrays.

    Parameters
    ----------
    test : scalar or numpy.ndarray
    ref : scalar or numpy.ndarray
    label : str or None, optional
    ac_kw : mapping, optional
        Keyword args to pass via **ac_kw to `numpy.isclose` / `numpy.allclose`
    ignore_fails : bool, optional

    Returns
    -------
    rslt : bool

    """
    pfx = f"{label} :: " if label else ""
    with np.printoptions(**PRINTOPTS):
        if np.isscalar(test):
            if np.isclose(test, ref, **ac_kw):
                return True

            msg = f"{pfx}test: {test} != ref: {ref}"
            if ignore_fails:
                logging.warning(msg)
            else:
                logging.error(msg)
            return False

        # Arrays
        if np.allclose(test, ref, **ac_kw):
            return True

        diff = test - ref
        msg = f"{pfx}test:" f"\n{(test)}\n!= ref:\n{(ref)}" f"\ndiff:\n{(diff)}"

        if not np.all(ref == 1):
            nzmask = ref != 0
            zmask = ref == 0
            fdiff = np.empty_like(ref)
            fdiff[nzmask] = diff[nzmask] / ref[nzmask]
            fdiff[zmask] = np.nan
            msg += f"\nfractdiff:\n{(fdiff)}"

        if ignore_fails:
            logging.warning(msg)
        else:
            logging.error(msg)

        return False
Esempio n. 16
0
def from_json(filename):
    """Open a file in JSON format (optionally compressed with bz2 or
    xor-scrambled) and parse the content into Python objects.

    Note that this currently only recognizes a bz2-compressed or xor-scrambled
    file by its extension (i.e., the file must be <base>.json.bz2 if it is
    compressed or <base>.json.xor if it is scrambled).

    Parameters
    ----------
    filename : str

    Returns
    -------
    content: OrderedDict with contents of JSON file

    """
    # Import here to avoid circular imports
    from pisa.utils.log import logging
    from pisa.utils.resources import open_resource

    _, ext = os.path.splitext(filename)
    ext = ext.replace('.', '').lower()
    assert ext in JSON_EXTS or ext in ZIP_EXTS + XOR_EXTS
    try:
        if ext == 'bz2':
            bz2_content = open_resource(filename).read()
            decompressed = bz2.decompress(bz2_content)
            del bz2_content
            content = json.loads(decompressed,
                                 cls=NumpyDecoder,
                                 object_pairs_hook=OrderedDict)
            del decompressed
        elif ext == 'xor':
            # Create tempfile
            temp = tempfile.TemporaryFile(mode='w+b')
            with open(filename, 'rb') as infile:
                for line in infile:
                    # Decrypt with key 42
                    line = ''.join([chr(ord(c) ^ 42) for c in line])
                    temp.write(line)
            # Rewind
            temp.seek(0)
            content = json.load(temp,
                                cls=NumpyDecoder,
                                object_pairs_hook=OrderedDict)
        else:
            content = json.load(open_resource(filename),
                                cls=NumpyDecoder,
                                object_pairs_hook=OrderedDict)
    except:
        logging.error('Failed to load JSON, `filename`="%s"', filename)
        raise
    return content
Esempio n. 17
0
def from_json(filename):
    """Open a file in JSON format (optionally compressed with bz2 or
    xor-scrambled) and parse the content into Python objects.

    Note that this currently only recognizes a bz2-compressed or xor-scrambled
    file by its extension (i.e., the file must be <base>.json.bz2 if it is
    compressed or <base>.json.xor if it is scrambled).

    Parameters
    ----------
    filename : str

    Returns
    -------
    content: OrderedDict with contents of JSON file

    """
    # Import here to avoid circular imports
    from pisa.utils.log import logging
    from pisa.utils.resources import open_resource

    _, ext = os.path.splitext(filename)
    ext = ext.replace('.', '').lower()
    assert ext in JSON_EXTS or ext in ZIP_EXTS + XOR_EXTS
    try:
        if ext == 'bz2':
            bz2_content = open_resource(filename, 'rb').read()
            decompressed = bz2.decompress(bz2_content).decode()
            del bz2_content
            content = json.loads(decompressed,
                                 cls=NumpyDecoder,
                                 object_pairs_hook=OrderedDict)
            del decompressed
        elif ext == 'xor':

            with open(filename, 'rb') as infile:
                encrypted_bytes = infile.read()

            # decrypt with key 42
            decypted_bytes = bytearray()
            for byte in encrypted_bytes:
                decypted_bytes.append(byte ^ 42)

            content = json.loads(decypted_bytes.decode(),
                                 cls=NumpyDecoder,
                                 object_pairs_hook=OrderedDict)
        else:
            content = json.load(open_resource(filename),
                                cls=NumpyDecoder,
                                object_pairs_hook=OrderedDict)
    except:
        logging.error('Failed to load JSON, `filename`="%s"', filename)
        raise
    return content
Esempio n. 18
0
def open_resource(filename):
    '''
    Find the resource file (see find_resource), open it and return a file
    handle.
    '''
    try:
        return open(find_resource(filename))
    except (IOError, OSError), e:
        logging.error('Unable to open resource "%s"'%filename)
        logging.error(e)
        sys.exit(1)
Esempio n. 19
0
    def __init__(self,ebins,czbins,aeff_weight_file=None,**kwargs):
        self.ebins = ebins
        self.czbins = czbins
        logging.info('Initializing AeffServiceMC...')

        logging.info('Opening file: %s'%(aeff_weight_file))
        try:
            fh = h5py.File(find_resource(aeff_weight_file),'r')
        except IOError,e:
            logging.error("Unable to open aeff_weight_file %s"%aeff_weight_file)
            logging.error(e)
            sys.exit(1)
Esempio n. 20
0
    def __init__(self,ebins,czbins,reco_weight_file=None,**kwargs):
        self.ebins = ebins
        self.czbins = czbins

        logging.info("Initializing RecoService...")

        logging.info('Opening file: %s'%(reco_weight_file))
        try:
            fh = h5py.File(find_resource(reco_weight_file),'r')
        except IOError,e:
            logging.error("Unable to open event data file %s"%reco_weight_file)
            logging.error(e)
            sys.exit(1)
Esempio n. 21
0
    def __init__(self, ebins, czbins, aeff_weight_file=None, **kwargs):
        self.ebins = ebins
        self.czbins = czbins
        logging.info('Initializing AeffServiceMC...')

        logging.info('Opening file: %s' % (aeff_weight_file))
        try:
            fh = h5py.File(find_resource(aeff_weight_file), 'r')
        except IOError, e:
            logging.error("Unable to open aeff_weight_file %s" %
                          aeff_weight_file)
            logging.error(e)
            sys.exit(1)
Esempio n. 22
0
def get_param_priors(params):
    """
    Returns a list of Prior objects, one for each param.
    """
    priors = []
    for pname, param in sorted(params.items()):
        try:
            prior = Prior(**param["prior"])
        except TypeError:
            logging.error("  Check template settings format, " "may have old-style priors")
            raise
        priors.append(prior)
    return priors
Esempio n. 23
0
File: proc.py Progetto: olivas/pisa
def add_params(setA,setB):
    '''
    Join the parameters in setA and setB,
    making sure that no parameters are overwritten
    '''
    #check for overlap
    if any(p in setA for p in setB):
        pnames = set(setA.keys()) & set(setB.keys())
        logging.error('Trying to store parameter(s) %s twice'%pnames)
        raise KeyError('Trying to store parameter(s) %s twice'%pnames)

    #Otherwise append
    return dict(setA.items() + setB.items())
Esempio n. 24
0
def check_fine_binning(fine_bins, coarse_bins):
    """
    This function checks whether the specified fine binning exists and
    is actually finer than the coarse one.
    """
    if fine_bins is not None:
        if is_coarser_binning(coarse_bins, fine_bins):
            logging.info('Using requested binning for oversampling.')
            #everything is fine
            return True
        else:
            errmsg = 'Requested oversampled binning is coarser ' + \
                    'than output binning. Aborting.'
            logging.error(errmsg)
            raise ValueError(errmsg)
    return False
Esempio n. 25
0
def check_fine_binning(fine_bins, coarse_bins):
    """
    This function checks whether the specified fine binning exists and
    is actually finer than the coarse one.
    """
    if fine_bins is not None:
        if is_coarser_binning(coarse_bins, fine_bins):
            logging.info('Using requested binning for oversampling.')
            #everything is fine
            return True
        else:
            errmsg = 'Requested oversampled binning is coarser ' + \
                    'than output binning. Aborting.'
            logging.error(errmsg)
            raise ValueError(errmsg)
    return False
Esempio n. 26
0
def check_agreement(testname, thresh_ratio, ratio, thresh_diff, diff):
    ratio_pass = np.abs(ratio) <= np.abs(thresh_ratio)
    diff_pass = np.abs(diff) <= np.abs(thresh_diff)

    thresh_ratio_str = order_str(thresh_ratio)
    ratio_ord_str = order_str(ratio)
    ratio_pass_str = 'PASS' if ratio_pass else 'FAIL'

    thresh_diff_str = order_str(thresh_diff)
    diff_ord_str = order_str(diff)
    diff_pass_str = 'PASS' if diff_pass else 'FAIL'

    headline = '<< {pass_str:s} : {testname:s}, {kind:s} >>'
    detail_str = ('... agree to (( 10^{level:s} )) ; '
                  'thresh = (( 10^{thresh:s} ))')

    ratio_headline = headline.format(
        testname=testname, kind='fract diff', pass_str=ratio_pass_str
    )
    ratio_detail = detail_str.format(
        level=ratio_ord_str, thresh=thresh_ratio_str
    )

    diff_headline = headline.format(
        testname=testname, kind='diff', pass_str=diff_pass_str,
    )
    diff_detail = detail_str.format(
        level=diff_ord_str, thresh=thresh_diff_str
    )

    err_messages = []
    if ratio_pass:
        logging.info(ratio_headline)
        logging.info(ratio_detail)
    else:
        err_messages += [ratio_headline, ratio_detail]

    if diff_pass:
        logging.info(diff_headline)
        logging.info(diff_detail)
    else:
        err_messages += [diff_headline, diff_detail]

    if not (ratio_pass and diff_pass):
        for m in err_messages:
            logging.error(m)
        raise ValueError('\n    '.join(err_messages))
Esempio n. 27
0
 def get_earth_model(self, model):
     """
     Check whether the specified Earth density profile has a correct
     NuCraft preface. If not, create a temporary file that does.
     """
     logging.debug('Trying to construct Earth model from "%s"' % model)
     try:
         resource_path = find_resource(model)
         self.earth_model = EarthModel(resource_path)
         logging.info("Loaded Earth model from %s" % model)
     except SyntaxError:
         # Probably the file is lacking the correct preamble
         logging.info(
             "Failed to construct NuCraft Earth model directly from"
             " %s! Adding default preamble..." % resource_path
         )
         # Generate tempfile with preamble
         with open(resource_path, "r") as infile:
             profile_lines = infile.readlines()
         preamble = [
             "# nuCraft Earth model with PREM density "
             "values for use as template; keep structure "
             "of the first six lines unmodified!\n",
             "(0.4656,0.4656,0.4957)   # tuple of (relative) "
             #'(0.5, 0.5, 0.5)   # tuple of (relative) '
             "electron numbers for mantle, outer core, " "and inner core\n",
             "6371.    # radius of the Earth\n",
             "3480.    # radius of the outer core\n",
             "1121.5   # radius of the inner core\n",
             "# two-columned list of radii and corresponding "
             "matter density values in km and kg/dm^3; "
             "add, remove or modify lines as necessary\n",
         ]
         tfile = NamedTemporaryFile()
         tfile.writelines(preamble + profile_lines)
         tfile.flush()
         try:
             self.earth_model = EarthModel(tfile.name)
         except:
             logging.error("Could not construct Earth model from %s: %s" % (model, sys.exc_info()[1]))
             sys.exit(1)
         logging.info("Successfully constructed Earth model")
         tfile.close()
     except IOError:
         logging.info('Using NuCraft built-in Earth model "%s"' % model)
         self.earth_model = EarthModel(model)
Esempio n. 28
0
def to_file(obj, fname, fmt=None):
    """Dispatch correct file writer based on fmt (if specified) or guess
    based on file name's extension"""
    if fmt is None:
        base, ext = os.path.splitext(fname)
        ext = ext.replace('.', '').lower()
    else:
        ext = fmt.lower()
    if ext in JSON_EXTS:
        return jsons.to_json(obj, fname)
    elif ext in HDF5_EXTS:
        return hdf.to_hdf(obj, fname)
    elif ext in PKL_EXTS:
        return cPickle.dump(obj, file(fname, 'wb'))
    else:
        errmsg = 'Unrecognized file type/extension: ' + ext
        logging.error(errmsg)
        raise TypeError(errmsg)
Esempio n. 29
0
File: utils.py Progetto: lkijmj/pisa
def to_file(obj, fname, fmt=None):
    """Dispatch correct file writer based on fmt (if specified) or guess
    based on file name's extension"""
    if fmt is None:
        base, ext = os.path.splitext(fname)
        ext = ext.replace('.', '').lower()
    else:
        ext = fmt.lower()
    if ext in JSON_EXTS:
        return jsons.to_json(obj, fname)
    elif ext in HDF5_EXTS:
        return hdf.to_hdf(obj, fname)
    elif ext in PKL_EXTS:
        return cPickle.dump(obj, file(fname, 'wb'))
    else:
        errmsg = 'Unrecognized file type/extension: ' + ext
        logging.error(errmsg)
        raise TypeError(errmsg)
Esempio n. 30
0
 def get_earth_model(self, model):
     """
     Check whether the specified Earth density profile has a correct
     NuCraft preface. If not, create a temporary file that does.
     """
     logging.debug('Trying to construct Earth model from "%s"'%model)
     try:
         resource_path = find_resource(model)
         self.earth_model = EarthModel(resource_path)
         logging.info('Loaded Earth model from %s'%model)
     except SyntaxError:
         #Probably the file is lacking the correct preamble
         logging.info('Failed to construct NuCraft Earth model directly from'
                      ' %s! Adding default preamble...'%resource_path)
         #Generate tempfile with preamble
         with open(resource_path, 'r') as infile:
             profile_lines = infile.readlines()
         preamble = ['# nuCraft Earth model with PREM density '
                      'values for use as template; keep structure '
                      'of the first six lines unmodified!\n',
                     '(0.4656,0.4656,0.4957)   # tuple of (relative) '
                     #'(0.5, 0.5, 0.5)   # tuple of (relative) '
                      'electron numbers for mantle, outer core, '
                      'and inner core\n',
                     '6371.    # radius of the Earth\n',
                     '3480.    # radius of the outer core\n',
                     '1121.5   # radius of the inner core\n',
                     '# two-columned list of radii and corresponding '
                      'matter density values in km and kg/dm^3; '
                      'add, remove or modify lines as necessary\n']
         tfile = NamedTemporaryFile()
         tfile.writelines(preamble+profile_lines)
         tfile.flush()
         try:
             self.earth_model = EarthModel(tfile.name)
         except:
             logging.error('Could not construct Earth model from %s: %s'
                           %(model, sys.exc_info()[1]))
             sys.exit(1)
         logging.info('Successfully constructed Earth model')
         tfile.close()
     except IOError:
         logging.info('Using NuCraft built-in Earth model "%s"'%model)
         self.earth_model = EarthModel(model)
Esempio n. 31
0
File: hdf.py Progetto: lkijmj/pisa
def to_hdf(d, filename):
    """Store a (possibly nested) dictionary to HDF5 file, creating hardlinks
    for repeated leaf nodes (datasets).
    
    NOTE: Branch nodes are sorted before storing for consistency in the
    generated file despite Python dictionaries having no defined ordering among
    keys."""
    if not isinstance(d, dict):
        errmsg = 'Only dictionaries may be written to HDF5 files.'
        logging.error(errmsg)
        raise TypeError(errmsg)

    # Define a function for iteratively doing the work
    def store_recursively(fhandle, node, path=[], node_hashes={}):
        full_path = '/' + '/'.join(path)
        if isinstance(node, dict):
            try:
                fhandle.create_group(full_path)
            except ValueError:
                pass
            for key in sorted(node.iterkeys()):
                key_str = str(key)
                if not isinstance(key, str):
                    logging.warn('Stringifying key "' + key_str +
                                 '"for use as name in HDF5 file')
                val = node[key]
                new_path = path + [key_str]
                store_recursively(fhandle=fhandle, node=val, path=new_path,
                                  node_hashes=node_hashes)
        else:
            # Check for existing node
            node_hash = utils.utils.hash_obj(node)
            if node_hash in node_hashes:
                # Hardlink the matching existing dataset
                fhandle[full_path] = fhandle[node_hashes[node_hash]]
                return
            node_hashes[node_hash] = full_path
            # "Scalar datasets don't support chunk/filter options"; extra
            # checking that a sequence isn't a string, also. Shuffling is
            # a good idea since subsequent compression will generally benefit;
            # shuffling requires chunking. Compression is not done here
            # since it is slow.
            if hasattr(node, '__iter__') and not isinstance(node, basestring):
                shuffle = True
                chunks = True
            else:
                shuffle = False
                chunks = None
            fhandle.create_dataset(name=full_path, data=node, chunks=chunks,
                              compression=None, shuffle=shuffle,
                              fletcher32=False)
    
    # Perform the actual operation using the dict passed in by user
    try:
        h5file = h5py.File(os.path.expandvars(filename), 'w')
        store_recursively(fhandle=h5file, node=d)
    except IOError, e:
        logging.error("Unable to write to HDF5 file \'%s\'" % filename)
        logging.error(e)
        raise e
Esempio n. 32
0
File: proc.py Progetto: mamday/pisa
def add_params(setA,setB):
    '''
    Join the parameters in setA and setB,
    making sure that no parameters are overwritten
    '''
    #check for overlap
    if any(p in setA for p in setB):
        pnames = set(setA.keys()) & set(setB.keys())
        # Since energy scale is implemented in two stages, need to allow this.
        # So ensure equality then continue.
        if 'energy_scale' in pnames:
            if (setA['energy_scale'] != setB['energy_scale']):
                logging.error(
                    'setA energy scale: %f, setB energy scale: %f'
                    %(setA['energy_scale'],setB['energy_scale']))
                raise KeyError('SetA and SetB have different energy scale values!')
            pnames = pnames.difference(['energy_scale'])
            if len(pnames) > 0:
                logging.error('Trying to store parameter(s) %s twice'%pnames)
                raise KeyError('Trying to store parameter(s) %s twice'%pnames)

    #Otherwise append
    return dict(setA.items() + setB.items())
Esempio n. 33
0
File: proc.py Progetto: mamday/pisa
def add_params(setA, setB):
    """
    Join the parameters in setA and setB,
    making sure that no parameters are overwritten
    """
    # check for overlap
    if any(p in setA for p in setB):
        pnames = set(setA.keys()) & set(setB.keys())
        # Since energy scale is implemented in two stages, need to allow this.
        # So ensure equality then continue.
        if "energy_scale" in pnames:
            if setA["energy_scale"] != setB["energy_scale"]:
                logging.error(
                    "setA energy scale: %f, setB energy scale: %f" % (setA["energy_scale"], setB["energy_scale"])
                )
                raise KeyError("SetA and SetB have different energy scale values!")
            pnames = pnames.difference(["energy_scale"])
            if len(pnames) > 0:
                logging.error("Trying to store parameter(s) %s twice" % pnames)
                raise KeyError("Trying to store parameter(s) %s twice" % pnames)

    # Otherwise append
    return dict(setA.items() + setB.items())
Esempio n. 34
0
File: icc.py Progetto: terliuk/pisa
    def _compute_nominal_outputs(self):
        '''
        load events, perform sanity check and put them into histograms,
        if alt_bg file is specified, also put these events into separate histograms,
        that are normalized to the nominal ones (we are only interested in the shape difference)
        '''
        # get params
        icc_bg_file = self.params.icc_bg_file.value
        if 'shape' in self.error_method:
            alt_icc_bg_file = self.params.alt_icc_bg_file.value
        else:
            alt_icc_bg_file = None
        sim_ver = self.params.sim_ver.value
        use_def1 = self.params.use_def1.value
        bdt_cut = self.params.bdt_cut.m_as('dimensionless')

        self.bin_names = self.output_binning.names
        self.bin_edges = []
        for name in self.bin_names:
            if 'energy' in  name:
                bin_edges = self.output_binning[name].bin_edges.to('GeV').magnitude
            else:
                bin_edges = self.output_binning[name].bin_edges.magnitude
            self.bin_edges.append(bin_edges)

        # the rest of this function is PISA v2 legacy code...
        logging.info('Initializing BackgroundServiceICC...')
        logging.info('Opening file: %s'%(icc_bg_file))

        try:
            bg_file = h5py.File(find_resource(icc_bg_file),'r')
            if alt_icc_bg_file is not None:
                alt_bg_file = h5py.File(find_resource(alt_icc_bg_file),'r')
        except IOError,e:
            logging.error("Unable to open icc_bg_file %s"%icc_bg_file)
            logging.error(e)
            sys.exit(1)
Esempio n. 35
0
                    fhandle.create_dataset(name=full_path,
                                           data=node,
                                           chunks=chunks,
                                           compression=None,
                                           shuffle=shuffle,
                                           fletcher32=False)
                except:
                    logging.error('  full_path: ' + full_path)
                    logging.error('  chunks   : ' + str(chunks))
                    logging.error('  shuffle  : ' + str(shuffle))
                    logging.error('  node     : ' + str(node))
                    raise

    # Perform the actual operation using the dict passed in by user
    if isinstance(tgt, basestring):
        try:
            h5file = h5py.File(os.path.expandvars(tgt), 'w')
            store_recursively(fhandle=h5file, node=data_dict)
        except IOError, e:
            logging.error(e)
            logging.error("to_hdf: Unable to open `%s` for writing" % tgt)
            raise
        finally:
            h5file.close()
    elif isinstance(tgt, h5py.Group):
        store_recursively(fhandle=tgt, node=data_dict)
    else:
        errmsg = "to_hdf: Invalid `tgt` type: `" + type(target_entity) + "`"
        logging.error(errmsg)
        raise TypeError(errmsg)
Esempio n. 36
0
def test_nsi_parameterization():
    """Unit test for Hvac-like NSI parameterization."""
    rand = np.random.RandomState(0)
    alpha1, alpha2, deltansi = rand.rand(3) * 2. * np.pi
    phi12, phi13, phi23 = rand.rand(3) * 2*np.pi - np.pi
    eps_max_abs = 10.0
    eps_scale, eps_prime = rand.rand(2) * 2 * eps_max_abs - eps_max_abs
    nsi_params = VacuumLikeNSIParams()
    nsi_params.eps_scale = eps_scale
    nsi_params.eps_prime = eps_prime
    nsi_params.phi12 = phi12
    nsi_params.phi13 = phi13
    nsi_params.phi23 = phi23
    nsi_params.alpha1 = alpha1
    nsi_params.alpha2 = alpha2
    nsi_params.deltansi = deltansi

    logging.trace('Checking agreement between numerical & analytical NSI matrix...')

    eps_mat_numerical = nsi_params.eps_matrix
    eps_mat_analytical = nsi_params.eps_matrix_analytical

    try:
        close = np.isclose(eps_mat_numerical, eps_mat_analytical, **ALLCLOSE_KW)
        if not np.all(close):
            logging.debug(
                "Numerical NSI matrix:\n%s",
                np.array2string(eps_mat_numerical, **ARY2STR_KW)
            )
            logging.debug(
                "Analytical expansion (by hand):\n%s",
                np.array2string(eps_mat_analytical, **ARY2STR_KW)
            )
            raise ValueError(
                'Evaluating analytical expressions for NSI matrix elements'
                ' does not give agreement with numerical calculation!'
                ' Elementwise agreement:\n%s'
                % close
            )
    except ValueError as err:
        logging.warning(
            "%s\nThis is expected."
            " Going ahead with numerical calculation for now.", err
        )

    logging.trace('Now checking agreement with sympy calculation...')

    eps_mat_sympy = nsi_sympy_mat_mult(
        eps_scale_val=eps_scale,
        eps_prime_val=eps_prime,
        phi12_val=phi12,
        phi13_val=phi13,
        phi23_val=phi23,
        alpha1_val=alpha1,
        alpha2_val=alpha2,
        deltansi_val=deltansi
    )

    logging.trace('ALLCLOSE_KW = {}'.format(ALLCLOSE_KW))
    close = np.isclose(eps_mat_numerical, eps_mat_sympy, **ALLCLOSE_KW)
    if not np.all(close):
        logging.error(
            'Numerical NSI matrix:\n%s',
            np.array2string(eps_mat_numerical, **ARY2STR_KW)
        )
        logging.error(
            'Sympy NSI matrix:\n%s', np.array2string(eps_mat_sympy, **ARY2STR_KW)
        )
        raise ValueError(
            'Sympy and numerical calculations disagree! Elementwise agreement:\n'
            '%s' % close
        )
Esempio n. 37
0
    def _init_stages(self):
        """Stage factory: Instantiate stages specified by self.config.

        Conventions required for this to work:
            * Stage and service names must be lower-case
            * Service implementations must be found at Python path
              `pisa.stages.<stage_name>.<service_name>`
            * `service` cannot be an instantiation argument for a service

        """
        stages = []
        for stage_num, item in enumerate(self.config.items()):
            try:
                name, settings = item

                if isinstance(name, str):
                    if name == 'pipeline':
                        continue

                stage_name, service_name = name

                # old cfgs compatibility
                if service_name.startswith('pi_'):
                    logging.warning(
                        f"Old stage name `{service_name}` is automatically renamed to `{service_name.replace('pi_', '')}`. "
                        + "Please change your config in the future!")
                service_name = service_name.replace('pi_', '')

                logging.debug("instantiating stage %s / service %s",
                              stage_name, service_name)

                # Import service's module
                logging.trace(
                    f"Importing service module: {stage_name}.{service_name}")
                try:
                    module_path = f"pisa.stages.{stage_name}.{service_name}"
                    module = import_module(module_path)
                except:
                    logging.debug(
                        f"Module {stage_name}.{service_name} not found in PISA, trying "
                        "to import from external definition.")
                    module_path = f"{stage_name}.{service_name}"
                    module = import_module(module_path)

                # Get service class from module
                service_cls = getattr(module, service_name)

                # Instantiate service
                logging.trace(
                    "initializing stage.service %s.%s with settings %s" %
                    (stage_name, service_name, settings))
                try:
                    service = service_cls(**settings, profile=self._profile)
                except Exception:
                    logging.error(
                        "Failed to instantiate stage.service %s.%s with settings %s",
                        stage_name,
                        service_name,
                        settings.keys(),
                    )
                    raise

                if not isinstance(service, Stage):
                    raise TypeError(
                        'Trying to create service "%s" for stage #%d (%s),'
                        " but object %s instantiated from class %s is not a"
                        " PISA Stage type but instead is of type %s." % (
                            service_name,
                            stage_num,
                            stage_name,
                            service,
                            service_cls,
                            type(service),
                        ))

                stages.append(service)

            except:
                logging.error(
                    "Failed to initialize stage #%d (stage=%s, service=%s).",
                    stage_num,
                    stage_name,
                    service_name,
                )
                raise

        # set parameters with an identical name to the same object
        # otherwise we get inconsistent behaviour when setting repeated params
        # See Isues #566 and #648
        all_parans = self.params
        self.update_params(all_parans, existing_must_match=True, extend=False)

        param_selections = set()
        for service in stages:
            param_selections.update(service.param_selections)
        param_selections = sorted(param_selections)

        for stage in stages:
            stage.select_params(param_selections, error_on_missing=False)

        self._stages = stages

        self.setup()
Esempio n. 38
0
def main(return_outputs=False):
    """Run unit tests if `pipeline.py` is called as a script."""
    from pisa.utils.plotter import Plotter

    args = parse_args()
    set_verbosity(args.v)

    # Even if user specifies an integer on command line, it comes in as a
    # string. Try to convert to int (e.g. if `'1'` is passed to indicate the
    # second stage), and -- if successful -- use this as `args.only_stage`.
    # Otherwise, the string value passed will be used (e.g. `'osc'` could be
    # passed).
    try:
        only_stage_int = int(args.only_stage)
    except (ValueError, TypeError):
        pass
    else:
        args.only_stage = only_stage_int

    if args.outdir:
        mkdir(args.outdir)
    else:
        if args.pdf or args.png:
            raise ValueError("No --outdir provided, so cannot save images.")

    # Most basic parsing of the pipeline config (parsing only to this level
    # allows for simple strings to be specified as args for updating)
    bcp = PISAConfigParser()
    bcp.read(args.pipeline)

    # Update the config with any args specified on command line
    if args.arg is not None:
        for arg_list in args.arg:
            if len(arg_list) < 2:
                raise ValueError(
                    'Args must be formatted as: "section arg=val". Got "%s"'
                    " instead." % " ".join(arg_list))
            section = arg_list[0]
            remainder = " ".join(arg_list[1:])
            eq_split = remainder.split("=")
            newarg = eq_split[0].strip()
            value = ("=".join(eq_split[1:])).strip()
            logging.debug('Setting config section "%s" arg "%s" = "%s"',
                          section, newarg, value)
            try:
                bcp.set(section, newarg, value)
            except NoSectionError:
                logging.error(
                    'Invalid section "%s" specified. Must be one of %s',
                    section,
                    bcp.sections(),
                )
                raise

    # Instantiate the pipeline
    pipeline = Pipeline(bcp)  # pylint: disable=redefined-outer-name

    if args.select is not None:
        pipeline.select_params(args.select, error_on_missing=True)

    if args.only_stage is None:
        stop_idx = args.stop_after_stage
        try:
            stop_idx = int(stop_idx)
        except (TypeError, ValueError):
            pass
        if isinstance(stop_idx, str):
            stop_idx = pipeline.index(stop_idx)
        outputs = pipeline.get_outputs(idx=stop_idx)  # pylint: disable=redefined-outer-name
        if stop_idx is not None:
            stop_idx += 1
        indices = slice(0, stop_idx)
    else:
        assert args.stop_after_stage is None
        idx = pipeline.index(args.only_stage)
        stage = pipeline[idx]
        indices = slice(idx, idx + 1)

        # Create dummy inputs if necessary
        inputs = None
        if hasattr(stage, "input_binning"):
            logging.warning(
                "Stage requires input, so building dummy"
                " inputs of random numbers, with random state set to the input"
                " index according to alphabetical ordering of input names and"
                " filled in alphabetical ordering of dimension names.")
            input_maps = []
            tmp = deepcopy(stage.input_binning)
            alphabetical_binning = tmp.reorder_dimensions(sorted(tmp.names))
            for input_num, input_name in enumerate(sorted(stage.input_names)):
                # Create a new map with all 3's; name according to the input
                hist = np.full(shape=alphabetical_binning.shape,
                               fill_value=3.0)
                input_map = Map(name=input_name,
                                binning=alphabetical_binning,
                                hist=hist)

                # Apply Poisson fluctuations to randomize the values in the map
                input_map.fluctuate(method="poisson", random_state=input_num)

                # Reorder dimensions according to user's original binning spec
                input_map.reorder_dimensions(stage.input_binning)
                input_maps.append(input_map)
            inputs = MapSet(maps=input_maps, name="ones", hash=1)

        outputs = stage.run(inputs=inputs)

    for stage in pipeline[indices]:
        if not args.outdir:
            break
        stg_svc = stage.stage_name + "__" + stage.service_name
        fbase = os.path.join(args.outdir, stg_svc)
        if args.intermediate or stage == pipeline[indices][-1]:
            stage.outputs.to_json(fbase + "__output.json.bz2")

        # also only plot if args intermediate or last stage
        if args.intermediate or stage == pipeline[indices][-1]:
            formats = OrderedDict(png=args.png, pdf=args.pdf)
            if isinstance(stage.outputs, Data):
                # TODO(shivesh): plots made here will use the most recent
                # "pisa_weight" column and so all stages will have identical plots
                # (one workaround is to turn on "memcache_deepcopy")
                # TODO(shivesh): intermediate stages have no output binning
                if stage.output_binning is None:
                    logging.debug("Skipping plot of intermediate stage %s",
                                  stage)
                    continue
                outputs = stage.outputs.histogram_set(
                    binning=stage.output_binning,
                    nu_weights_col="pisa_weight",
                    mu_weights_col="pisa_weight",
                    noise_weights_col="pisa_weight",
                    mapset_name=stg_svc,
                    errors=True,
                )

            try:
                for fmt, enabled in formats.items():
                    if not enabled:
                        continue
                    my_plotter = Plotter(
                        stamp="Event rate",
                        outdir=args.outdir,
                        fmt=fmt,
                        log=False,
                        annotate=args.annotate,
                    )
                    my_plotter.ratio = True
                    my_plotter.plot_2d_array(outputs,
                                             fname=stg_svc + "__output",
                                             cmap="RdBu")
            except ValueError as exc:
                logging.error(
                    "Failed to save plot to format %s. See exception"
                    " message below",
                    fmt,
                )
                traceback.format_exc()
                logging.exception(exc)
                logging.warning("I can't go on, I'll go on.")

    if return_outputs:
        return pipeline, outputs
Esempio n. 39
0
def to_hdf(data_dict, tgt):
    """
    Store a (possibly nested) dictionary to an HDF5 file or branch node within
    an HDF5 file (an h5py Group).
    
    This creates hardlinks for duplicate non-trivial leaf nodes (h5py Datasets)
    to minimize storage space required for redundant datasets. Duplication is
    detected via object hashing.
    
    NOTE: Branch nodes are sorted before storing for consistency in the
    generated file despite Python dictionaries having no defined ordering among
    keys.
    
    Arguments
    ---------
    data_dict : dict
        Dictionary to be stored
    tgt : str or h5py.Group
        Target for storing data. If `tgt` is a str, it is interpreted as a
        filename; a file is created with that name (overwriting an existing
        file, if present). After writing, the file is closed. If `tgt` is an
        h5py.Group, the data is simply written to that Group and it is left
        open at function return.
    """
    if not isinstance(data_dict, dict):
        errmsg = 'to_hdf: `data_dict` only accepts top-level dict.'
        logging.error(errmsg)
        raise TypeError(errmsg)

    # Define a function for iteratively doing the work
    def store_recursively(fhandle, node, path=None, node_hashes=None):
        if path is None:
            path = []
        if node_hashes is None:
            node_hashes = {}
        full_path = '/' + '/'.join(path)
        if isinstance(node, dict):
            logging.trace("  creating Group `%s`" % full_path)
            try:
                fhandle.create_group(full_path)
            except ValueError:
                pass
            for key in sorted(node.iterkeys()):
                key_str = str(key)
                if not isinstance(key, str):
                    logging.warn('Stringifying key `' + key_str +
                                 '`for use as name in HDF5 file')
                val = node[key]
                new_path = path + [key_str]
                store_recursively(fhandle=fhandle,
                                  node=val,
                                  path=new_path,
                                  node_hashes=node_hashes)
        else:
            # Check for existing node
            node_hash = utils.hash_obj(node)
            if node_hash in node_hashes:
                logging.trace("  creating hardlink for Dataset: `%s` -> `%s`" %
                              (full_path, node_hashes[node_hash]))
                # Hardlink the matching existing dataset
                fhandle[full_path] = fhandle[node_hashes[node_hash]]
                return
            # For now, convert None to np.nan since h5py appears to not handle None
            if node is None:
                node = np.nan
                logging.warn("  encountered `None` at node `%s`; converting to"
                             " np.nan" % full_path)
            # "Scalar datasets don't support chunk/filter options". Shuffling
            # is a good idea otherwise since subsequent compression will
            # generally benefit; shuffling requires chunking. Compression is
            # not done here since it is slow.
            if np.isscalar(node):
                shuffle = False
                chunks = None
            else:
                shuffle = True
                chunks = True
                # Store the node_hash for linking to later if this is more than
                # a scalar datatype. Assumed that "None" has
                node_hashes[node_hash] = full_path
            # TODO: Treat strings as follows? Would this break compatibility
            # with pytables/Pandas? What are benefits? Leaving out for now.
            # if isinstance(node, basestr):
            #     dtype = h5py.special_dtype(vlen=str)
            #     fh.create_dataset(k,data=v,dtype=dtype)
            logging.trace("  creating dataset at node `%s`" % full_path)
            try:
                fhandle.create_dataset(name=full_path,
                                       data=node,
                                       chunks=chunks,
                                       compression=None,
                                       shuffle=shuffle,
                                       fletcher32=False)
            except TypeError:
                try:
                    shuffle = False
                    chunks = None
                    fhandle.create_dataset(name=full_path,
                                           data=node,
                                           chunks=chunks,
                                           compression=None,
                                           shuffle=shuffle,
                                           fletcher32=False)
                except:
                    logging.error('  full_path: ' + full_path)
                    logging.error('  chunks   : ' + str(chunks))
                    logging.error('  shuffle  : ' + str(shuffle))
                    logging.error('  node     : ' + str(node))
                    raise

    # Perform the actual operation using the dict passed in by user
    if isinstance(tgt, basestring):
        try:
            h5file = h5py.File(os.path.expandvars(tgt), 'w')
            store_recursively(fhandle=h5file, node=data_dict)
        except IOError, e:
            logging.error(e)
            logging.error("to_hdf: Unable to open `%s` for writing" % tgt)
            raise
        finally:
Esempio n. 40
0
 def store_recursively(fhandle, node, path=None, node_hashes=None):
     if path is None:
         path = []
     if node_hashes is None:
         node_hashes = {}
     full_path = '/' + '/'.join(path)
     if isinstance(node, dict):
         logging.trace("  creating Group `%s`" % full_path)
         try:
             fhandle.create_group(full_path)
         except ValueError:
             pass
         for key in sorted(node.iterkeys()):
             key_str = str(key)
             if not isinstance(key, str):
                 logging.warn('Stringifying key `' + key_str +
                              '`for use as name in HDF5 file')
             val = node[key]
             new_path = path + [key_str]
             store_recursively(fhandle=fhandle,
                               node=val,
                               path=new_path,
                               node_hashes=node_hashes)
     else:
         # Check for existing node
         node_hash = utils.hash_obj(node)
         if node_hash in node_hashes:
             logging.trace("  creating hardlink for Dataset: `%s` -> `%s`" %
                           (full_path, node_hashes[node_hash]))
             # Hardlink the matching existing dataset
             fhandle[full_path] = fhandle[node_hashes[node_hash]]
             return
         # For now, convert None to np.nan since h5py appears to not handle None
         if node is None:
             node = np.nan
             logging.warn("  encountered `None` at node `%s`; converting to"
                          " np.nan" % full_path)
         # "Scalar datasets don't support chunk/filter options". Shuffling
         # is a good idea otherwise since subsequent compression will
         # generally benefit; shuffling requires chunking. Compression is
         # not done here since it is slow.
         if np.isscalar(node):
             shuffle = False
             chunks = None
         else:
             shuffle = True
             chunks = True
             # Store the node_hash for linking to later if this is more than
             # a scalar datatype. Assumed that "None" has
             node_hashes[node_hash] = full_path
         # TODO: Treat strings as follows? Would this break compatibility
         # with pytables/Pandas? What are benefits? Leaving out for now.
         # if isinstance(node, basestr):
         #     dtype = h5py.special_dtype(vlen=str)
         #     fh.create_dataset(k,data=v,dtype=dtype)
         logging.trace("  creating dataset at node `%s`" % full_path)
         try:
             fhandle.create_dataset(name=full_path,
                                    data=node,
                                    chunks=chunks,
                                    compression=None,
                                    shuffle=shuffle,
                                    fletcher32=False)
         except TypeError:
             try:
                 shuffle = False
                 chunks = None
                 fhandle.create_dataset(name=full_path,
                                        data=node,
                                        chunks=chunks,
                                        compression=None,
                                        shuffle=shuffle,
                                        fletcher32=False)
             except:
                 logging.error('  full_path: ' + full_path)
                 logging.error('  chunks   : ' + str(chunks))
                 logging.error('  shuffle  : ' + str(shuffle))
                 logging.error('  node     : ' + str(node))
                 raise
Esempio n. 41
0
def parse_param(config, section, selector, fullname, pname, value):
    """Parse a param specification from a PISA config file.

    Note that if the param sepcification does not include ``fixed``,
    ``prior``, and/or ``range``, the defaults for these are:
    ``fixed = True``, ``prior = None``, and ``range = None``.

    If a prior is specified explicitly via ``.prior``, this takes precendence,
    but if no ``.prior`` is specified and the param's value is parsed to be a
    :class:`uncertainties.AffineScalarFunc` (i.e. have `std_dev` attribute), a
    Gaussian prior is constructed from that and then the AffineScalarFunc is
    stripped out of the param's value (such that it is just a
    :class:`~pint.quantity.Quantity`).

    Parameters
    ----------
    config : pisa.utils.config_parser.PISAConfigParser
    section : string
    selector : string or None
    fullname : string
    pname : string
    value : string

    Returns
    -------
    param : pisa.core.param.Param

    """
    # Note: imports placed here to avoid circular imports
    from pisa.core.param import Param
    from pisa.core.prior import Prior
    kwargs = dict(name=pname, is_fixed=True, prior=None, range=None)
    try:
        value = parse_quantity(value)
        kwargs['value'] = value.nominal_value * value.units
    except ValueError:
        value = parse_string_literal(value)
        kwargs['value'] = value

    # Search for explicit attr specifications
    if config.has_option(section, fullname + '.fixed'):
        kwargs['is_fixed'] = config.getboolean(section, fullname + '.fixed')

    if config.has_option(section, fullname + '.unique_id'):
        kwargs['unique_id'] = config.get(section, fullname + '.unique_id')

    if config.has_option(section, fullname + '.range'):
        range_ = config.get(section, fullname + '.range')
        # Note: `nominal` and `sigma` are called out in the `range_` string
        if 'nominal' in range_:
            nominal = value.n * value.units  # pylint: disable=unused-variable
        if 'sigma' in range_:
            sigma = value.s * value.units  # pylint: disable=unused-variable
        range_ = range_.replace('[', 'np.array([')
        range_ = range_.replace(']', '])')
        # Strip out uncertainties from value itself (as we will rely on the
        # prior from here on out)
        kwargs['range'] = eval(range_).to(value.units)  # pylint: disable=eval-used

    if config.has_option(section, fullname + '.prior'):
        prior = str(config.get(section, fullname + '.prior')).strip().lower()
        if prior == 'uniform':
            kwargs['prior'] = Prior(kind='uniform')
        elif prior == 'jeffreys':
            kwargs['prior'] = Prior(kind='jeffreys',
                                    A=kwargs['range'][0],
                                    B=kwargs['range'][1])
        elif prior == 'spline':
            priorname = pname
            if selector is not None:
                priorname += '_' + selector
            data = config.get(section, fullname + '.prior.data')
            data = from_file(data)
            data = data[priorname]
            knots = ureg.Quantity(np.asarray(data['knots']), data['units'])
            knots = knots.to(value.units)
            coeffs = np.asarray(data['coeffs'])
            deg = data['deg']
            kwargs['prior'] = Prior(kind='spline',
                                    knots=knots,
                                    coeffs=coeffs,
                                    deg=deg)
        elif prior == 'none':
            kwargs['prior'] = None
        elif 'gauss' in prior:
            raise Exception('Please use new style +/- notation for gaussian'
                            ' priors in config')
        else:
            raise Exception('Prior type unknown')

    elif hasattr(value, 'std_dev') and value.std_dev != 0:
        kwargs['prior'] = Prior(kind='gaussian',
                                mean=value.nominal_value * value.units,
                                stddev=value.std_dev * value.units)

    # Strip out any uncertainties from value itself (an explicit ``.prior``
    # specification takes precedence over this)
    if hasattr(value, 'std_dev'):
        value = value.nominal_value * value.units
    try:
        param = Param(**kwargs)
    except:
        logging.error('Failed to instantiate new Param object with kwargs %s',
                      kwargs)
        raise

    return param
Esempio n. 42
0
    def retrieve_expression(h5group, expression):
        """Retrieve data from an HDF5 group `h5group` according to
        `expresssion`. This can apply expressions with simple mathematical
        operators and numpy functions to multiple fields within the HDF5 file
        to derive the output. Python keywords are _not_ allowed, since they
        may alias with a name.

        Refer to any numpy functions by prefixing with either "np.<func>" or
        "numpy.<func>". In order to specify division, spaces must surround the
        forward slash, such that it isn't interpreted as a path.

        Nodes in the HDF5 hierarchy are separated by forward slashes ("/") in a
        path spec. We restrict valid HDF5 node names to contain the characters
        a-z, A-Z, 0-9, peroids ("."), and underscores ("_"). with the
        additional restriction that the node name must not start with a period
        or a number, and a path cannot start with a slash.


        Parameters
        ----------
        h5group : h5py Group
        expression : string
            Expression to evaluate.

        Returns
        -------
        result : result of evaluating `expression`

        Examples
        --------
        >>> retrieve_expression('np.sqrt(MCneutrino/x**2 + MCneutrino/y**2)')

        Indexing into the data arrays can also be performed, and numpy masks
        used as usual:

        >>> expr = 'I3MCTree/energy[I3MCTree/event == I3EventHeader[0]

        """
        h5path_re = re.compile(
            r'''
            ([a-z_]          # First character must be letter or underscore
             [a-z0-9_.]*     # 0 or more legal chars: letters, numbers, _, .
             (?:             # (Do not return the following group separately)
                [/]{0,1}     # Next character CAN be no or 1 front-slash
                [a-z0-9_.]+  # But a slash *must* be followed by legal chars
             )*              # Slash+chars pattern might not occur, or repeat
            )''', re.VERBOSE | re.IGNORECASE
        )
        numpy_re = re.compile(r'^(np|numpy)\.[a-z_.]+', re.IGNORECASE)

        eval_str = expression
        intermediate_data = {}
        for h5path in h5path_re.findall(expression):
            if numpy_re.match(h5path):
                continue
            intermediate_data[h5path] = DataProcParams.retrieve_node_data(
                h5group, h5path
            )
            eval_str = eval_str.replace(h5path,
                                        "intermediate_data['%s']"%h5path)

        try:
            result = eval(eval_str) # pylint: disable=eval-used
        except:
            logging.error('`expression` "%s" was translated into `eval_str`'
                          ' "%s" and failed to evaluate.',
                          expression, eval_str)
            raise

        return result
Esempio n. 43
0
def test_find_index():
    """Unit tests for `find_index` function.

    Correctness is defined as producing the same histogram as numpy.histogramdd
    by using the output of `find_index` (ignoring underflow and overflow values).
    Additionally, -1 should be returned if a value is below the range
    (underflow) or is nan, and num_bins should be returned for a value above
    the range (overflow).
    """
    # Negative, positive, integer, non-integer, binary-unrepresentable (0.1) edges
    basic_bin_edges = [-1, -0.5, -0.1, 0, 0.1, 0.5, 1, 2, 3, 4]

    failures = 0
    for basic_bin_edges in [
            # Negative, positive, integer, non-integer, binary-unrepresentable (0.1) edges
        [-1, -0.5, -0.1, 0, 0.1, 0.5, 1, 2, 3, 4],

            # A single infinite bin: [-np.inf, np.inf]
        [],

            # Half-infinite bins (lower or upper edge) & [-inf, .1, +inf]
        [0.1],

            # Single bin with finite edges & +/-inf-edge(s)-added variants
        [-0.1, 0.1],
    ]:
        # Bin edges from above, w/ and w/o +/-inf as left and/or right edges
        for le, re in [(None, None), (-np.inf, None), (None, np.inf),
                       (-np.inf, np.inf)]:
            bin_edges = deepcopy(basic_bin_edges)
            if le is not None:
                bin_edges = [le] + bin_edges
            if re is not None:
                bin_edges = bin_edges + [re]
            if len(bin_edges) < 2:
                continue
            logging.debug('bin_edges being tested: %s', bin_edges)
            bin_edges = np.array(bin_edges, dtype=FTYPE)

            num_bins = len(bin_edges) - 1
            underflow_idx = -1
            overflow_idx = num_bins

            #
            # Construct test values to try out
            #

            non_finite_vals = [-np.inf, +np.inf, np.nan]

            # Values within bins (i.e., not on edges)
            inbin_vals = []
            for idx in range(len(bin_edges) - 1):
                lower_be = bin_edges[idx]
                upper_be = bin_edges[idx + 1]
                if np.isfinite(lower_be):
                    if np.isfinite(upper_be):
                        inbin_val = (lower_be + upper_be) / 2
                    else:
                        inbin_val = lower_be + 10.5
                else:
                    if np.isfinite(upper_be):
                        inbin_val = upper_be - 10.5
                    else:
                        inbin_val = 10.5
                inbin_vals.append(inbin_val)

            # Values above/below bin edges by one unit of floating point
            # accuracy
            eps = np.finfo(FTYPE).eps  # pylint: disable=no-member
            below_edges_vals = [FTYPE((1 - eps) * be) for be in bin_edges]
            above_edges_vals = [FTYPE((1 + eps) * be) for be in bin_edges]

            test_vals = np.concatenate([
                non_finite_vals,
                bin_edges,
                inbin_vals,
                below_edges_vals,
                above_edges_vals,
            ])
            logging.trace('test_vals = %s', test_vals)

            #
            # Run tests
            #
            for val in test_vals:
                val = FTYPE(val)

                np_histvals, _ = np.histogramdd([val],
                                                np.atleast_2d(bin_edges))
                nonzero_indices = np.nonzero(np_histvals)[
                    0]  # select first & only dim
                if np.isnan(val):
                    assert len(nonzero_indices) == 0, str(len(nonzero_indices))
                    expected_idx = underflow_idx
                elif val < bin_edges[0]:
                    assert len(nonzero_indices) == 0, str(len(nonzero_indices))
                    expected_idx = underflow_idx
                elif val > bin_edges[-1]:
                    assert len(nonzero_indices) == 0, str(len(nonzero_indices))
                    expected_idx = overflow_idx
                else:
                    assert len(nonzero_indices) == 1, str(len(nonzero_indices))
                    expected_idx = nonzero_indices[0]

                found_idx = find_index(val, bin_edges)

                if found_idx != expected_idx:
                    failures += 1
                    msg = 'val={}, edges={}: Expected idx={}, found idx={}'.format(
                        val, bin_edges, expected_idx, found_idx)
                    logging.error(msg)

    assert failures == 0, f"{failures} failures, inspect ERROR messages above for info"

    logging.info('<< PASS : test_find_index >>')
Esempio n. 44
0
def parse_pipeline_config(config):
    """Parse pipeline config.

    Parameters
    ----------
    config : string or ConfigParser

    Returns
    -------
    stage_dicts : OrderedDict
        Keys are (stage_name, service_name) tuples and values are OrderedDicts
        with keys the argnames and values the arguments' values. Some known arg
        values are parsed out fully into Python objects, while the rest remain
        as strings that must be used or parsed elsewhere.

    """
    # Note: imports placed here to avoid circular imports
    from pisa.core.binning import MultiDimBinning, OneDimBinning
    from pisa.core.param import ParamSelector

    if isinstance(config, basestring):
        config = from_file(config)
    elif isinstance(config, PISAConfigParser):
        pass
    else:
        raise TypeError(
            '`config` must either be a string or PISAConfigParser. Got %s '
            'instead.' % type(config))

    if not config.has_section('binning'):
        raise NoSectionError(
            "Could not find 'binning'. Only found sections: %s" %
            config.sections())

    # Create binning objects
    binning_dict = {}
    for name, value in config['binning'].items():
        if name.endswith('.order'):
            order = split(config.get('binning', name))
            binning, _ = split(name, sep='.')
            bins = []
            for bin_name in order:
                try:
                    def_raw = config.get('binning', binning + '.' + bin_name)
                except:
                    dims_defined = [
                        split(dim, sep='.')[1]
                        for dim in config['binning'].keys()
                        if dim.startswith(binning +
                                          '.') and not dim.endswith('.order')
                    ]
                    logging.error(
                        "Failed to find definition of '%s' dimension of '%s'"
                        " binning entry. Only found definition(s) of: %s",
                        bin_name, binning, dims_defined)
                    del dims_defined
                    raise
                try:
                    kwargs = eval(def_raw)  # pylint: disable=eval-used
                except:
                    logging.error(
                        "Failed to evaluate definition of '%s' dimension of"
                        " '%s' binning entry:\n'%s'", bin_name, binning,
                        def_raw)
                    raise
                try:
                    bins.append(OneDimBinning(bin_name, **kwargs))
                except:
                    logging.error(
                        "Failed to instantiate new `OneDimBinning` from '%s'"
                        " dimension of '%s' binning entry with definition:\n"
                        "'%s'\n", bin_name, binning, kwargs)
                    raise
            binning_dict[binning] = MultiDimBinning(bins)

    # Pipeline section
    section = 'pipeline'

    # Get and parse the order of the stages (and which services implement them)
    order = [split(x, STAGE_SEP) for x in split(config.get(section, 'order'))]

    param_selections = []
    if config.has_option(section, 'param_selections'):
        param_selections = split(config.get(section, 'param_selections'))

    detector_name = None
    if config.has_option(section, 'detector_name'):
        detector_name = config.get(section, 'detector_name')

    # Parse [stage.<stage_name>] sections and store to stage_dicts
    stage_dicts = OrderedDict()
    for stage, service in order:
        old_section_header = 'stage%s%s' % (STAGE_SEP, stage)
        new_section_header = '%s%s%s' % (stage, STAGE_SEP, service)
        if config.has_section(old_section_header):
            logging.warning(
                '"%s" is an old-style section header, in the future use "%s"' %
                (old_section_header, new_section_header))
            section = old_section_header
        elif config.has_section(new_section_header):
            section = new_section_header
        else:
            raise IOError(
                'missing section in cfg for stage "%s" service "%s"' %
                (stage, service))

        # Instantiate dict to store args to pass to this stage
        service_kwargs = OrderedDict()

        param_selector = ParamSelector(selections=param_selections)
        service_kwargs['params'] = param_selector

        n_params = 0
        for fullname in config.options(section):
            try:
                value = config.get(section, fullname)
            except:
                logging.error(
                    'Unable to obtain value of option "%s" in section "%s".' %
                    (fullname, section))
                raise
            # See if this matches a param specification
            param_match = PARAM_RE.match(fullname)
            if param_match is not None:
                n_params += 1

                param_match_dict = param_match.groupdict()
                param_subfields = param_match_dict['subfields'].split('.')

                # Figure out what the dotted fields represent...
                infodict = interpret_param_subfields(subfields=param_subfields)

                # If field is an attr, skip since these are located manually
                if infodict['attr'] is not None:
                    continue

                # Check if this param already exists in a previous stage; if
                # so, make sure there are no specs for this param, but just a
                # link to previous the param object that is already
                # instantiated.
                for kw in stage_dicts.values():
                    # Stage did not get a `params` argument from config
                    if not kw.has_key('params'):
                        continue

                    # Retrieve the param from the ParamSelector
                    try:
                        param = kw['params'].get(name=infodict['pname'],
                                                 selector=infodict['selector'])
                    except KeyError:
                        continue

                    # Make sure there are no other specs (in this section) for
                    # the param defined defined in previous section
                    for a in PARAM_ATTRS:
                        if config.has_option(section, '%s.%s' % (fullname, a)):
                            raise ValueError("Parameter spec. '%s' of '%s' "
                                             "found in section '%s', but "
                                             "parameter exists in previous "
                                             "stage!" % (a, fullname, section))

                    break

                # Param *not* found in a previous stage (i.e., no explicit
                # `break` encountered in `for` loop above); therefore must
                # instantiate it.
                else:
                    param = parse_param(config=config,
                                        section=section,
                                        selector=infodict['selector'],
                                        fullname=fullname,
                                        pname=infodict['pname'],
                                        value=value)

                param_selector.update(param, selector=infodict['selector'])

            # If it's not a param spec but contains 'binning', assume it's a
            # binning spec for CAKE stages
            elif 'binning' in fullname:
                service_kwargs[fullname] = binning_dict[value]

            # it's gonna be a PI stage
            elif '_specs' in fullname:
                value = parse_string_literal(value)
                # is it None?
                if value is None:
                    service_kwargs[fullname] = value
                # is it evts?
                elif value in ['evnts', 'events']:
                    service_kwargs[fullname] = 'events'
                # so it gotta be a binning
                else:
                    service_kwargs[fullname] = binning_dict[value]

            # it's a list on in/output names list
            elif fullname.endswith('_names'):
                value = split(value)
                service_kwargs[fullname] = value
            # Otherwise it's some other stage instantiation argument; identify
            # this by its full name and try to interpret and instantiate a
            # Python object using the string
            else:
                try:
                    value = parse_quantity(value)
                    value = value.nominal_value * value.units
                except ValueError:
                    value = parse_string_literal(value)
                service_kwargs[fullname] = value

        # If no params actually specified in config, remove 'params' from the
        # service's keyword args
        if n_params == 0:
            service_kwargs.pop('params')

        # Store the service's kwargs to the stage_dicts
        stage_dicts[(stage, service)] = service_kwargs

    stage_dicts['detector_name'] = detector_name
    return stage_dicts
Esempio n. 45
0
File: hdf.py Progetto: gkrueckl/pisa
                try:
                    shuffle = False
                    chunks = None
                    fhandle.create_dataset(name=full_path, data=node,
                                           chunks=chunks, compression=None,
                                           shuffle=shuffle, fletcher32=False)
                except:
                    logging.error('  full_path: ' + full_path)
                    logging.error('  chunks   : ' + str(chunks))
                    logging.error('  shuffle  : ' + str(shuffle))
                    logging.error('  node     : ' + str(node))
                    raise

    # Perform the actual operation using the dict passed in by user
    if isinstance(tgt, basestring):
        try:
            h5file = h5py.File(os.path.expandvars(tgt), 'w')
            store_recursively(fhandle=h5file, node=data_dict)
        except IOError, e:
            logging.error(e)
            logging.error("to_hdf: Unable to open `%s` for writing" % tgt)
            raise
        finally:
            h5file.close()
    elif isinstance(tgt, h5py.Group):
        store_recursively(fhandle=tgt, node=data_dict)
    else:
        errmsg = "to_hdf: Invalid `tgt` type: `"+ type(target_entity)+"`"
        logging.error(errmsg)
        raise TypeError(errmsg)
Esempio n. 46
0
def parse_fit_config(fit_cfg):
    """Perform sanity checks on and parse fit configuration file.

    Parameters
    ----------
    fit_cfg : str
        path to a fit configuration file

    Returns
    -------
    fit_cfg : PISAConfigParser
        parsed fit configuration
    sys_list : list of str
        parsed names of systematic parameters
    units_list : list of str
        units corresponding to each discrete systematic
    combine_regex : list of str
        each string is a regular expression for combining pipeline outputs; see
        :func:`pisa.core.map.MapSet.combine_regex` for details.

    """
    fit_cfg = from_file(fit_cfg)
    no_ws_section_map = {s.strip(): s for s in fit_cfg.sections()}

    if GENERAL_SECTION_NAME not in no_ws_section_map.values():
        raise KeyError('Fit config is missing the "%s" section!' %
                       GENERAL_SECTION_NAME)

    general_section = fit_cfg[GENERAL_SECTION_NAME]
    if SYS_LIST_OPTION not in general_section:
        raise KeyError(
            "Fit config has to specify systematic parameters as"
            ' "%s" option in "%s" section (comma-separated list of names).' %
            (SYS_LIST_OPTION, GENERAL_SECTION_NAME))

    sys_list = [s.strip() for s in general_section[SYS_LIST_OPTION].split(",")]

    if UNITS_OPTION in general_section:
        units_list = []
        units_specs = (general_section[UNITS_OPTION].replace(
            UNITS_SPECIFIER, "").split(","))
        for units_spec in units_specs:
            # Make sure units are interpret-able by Pint
            try:
                ureg.Unit(units_spec)
            except:
                logging.error(
                    'Unit "%s" specified by "%s" option in "general" section is not'
                    "interpret-able by Pint",
                    units_spec,
                    UNITS_OPTION,
                )
                raise
            units_list.append(units_spec)
    else:
        units_list = ["dimensionless" for s in sys_list]
        logging.warn(
            "No %s option found in %s section; assuming systematic parameters are"
            " dimensionless",
            UNITS_OPTION,
            GENERAL_SECTION_NAME,
        )

    if len(units_list) != len(sys_list):
        raise ValueError(
            '{} units specified by "{}" option but {} systematics specified by "{}"'
            "option; must be same number of each.".format(
                len(units_list), UNITS_OPTION, len(sys_list), SYS_LIST_OPTION))

    logging.info(
        "Found systematic parameters %s",
        ["{} ({})".format(s, u) for s, u in zip(sys_list, units_list)],
    )

    combine_regex = general_section.get(COMBINE_REGEX_OPTION, None)
    if combine_regex:
        try:
            combine_regex = literal_eval(combine_regex)
        except (SyntaxError, ValueError):
            logging.warn(
                'Deprecated syntax for "combine_re" (make into a Python-evaluatable'
                "sequence of strings instead) :: combine_regex = %s",
                combine_regex,
            )
            combine_regex = [r.strip() for r in combine_regex.split(",")]

    if APPLY_ALL_SECTION_NAME in no_ws_section_map:
        apply_all_section = fit_cfg[no_ws_section_map[APPLY_ALL_SECTION_NAME]]
        for no_ws_sname, sname in no_ws_section_map.items():
            if not (no_ws_sname.startswith(NOMINAL_SET_PFX)
                    or no_ws_sname.startswith(SYS_SET_PFX)):
                continue
            sys_set_section = fit_cfg[sname]
            for option, val in apply_all_section.items():
                sys_set_section[option] = val

    return fit_cfg, sys_list, units_list, combine_regex
Esempio n. 47
0
File: hdf.py Progetto: gkrueckl/pisa
def to_hdf(data_dict, tgt):
    """
    Store a (possibly nested) dictionary to an HDF5 file or branch node within
    an HDF5 file (an h5py Group).
    
    This creates hardlinks for duplicate non-trivial leaf nodes (h5py Datasets)
    to minimize storage space required for redundant datasets. Duplication is
    detected via object hashing.
    
    NOTE: Branch nodes are sorted before storing for consistency in the
    generated file despite Python dictionaries having no defined ordering among
    keys.
    
    Arguments
    ---------
    data_dict : dict
        Dictionary to be stored
    tgt : str or h5py.Group
        Target for storing data. If `tgt` is a str, it is interpreted as a
        filename; a file is created with that name (overwriting an existing
        file, if present). After writing, the file is closed. If `tgt` is an
        h5py.Group, the data is simply written to that Group and it is left
        open at function return.
    """
    if not isinstance(data_dict, dict):
        errmsg = 'to_hdf: `data_dict` only accepts top-level dict.'
        logging.error(errmsg)
        raise TypeError(errmsg)

    # Define a function for iteratively doing the work
    def store_recursively(fhandle, node, path=None, node_hashes=None):
        if path is None:
            path = []
        if node_hashes is None:
            node_hashes = {}
        full_path = '/' + '/'.join(path)
        if isinstance(node, dict):
            logging.trace("  creating Group `%s`" % full_path)
            try:
                fhandle.create_group(full_path)
            except ValueError:
                pass
            for key in sorted(node.iterkeys()):
                key_str = str(key)
                if not isinstance(key, str):
                    logging.warn('Stringifying key `' + key_str +
                                 '`for use as name in HDF5 file')
                val = node[key]
                new_path = path + [key_str]
                store_recursively(fhandle=fhandle, node=val, path=new_path,
                                  node_hashes=node_hashes)
        else:
            # Check for existing node
            node_hash = utils.hash_obj(node)
            if node_hash in node_hashes:
                logging.trace("  creating hardlink for Dataset: `%s` -> `%s`" %
                              (full_path, node_hashes[node_hash]))
                # Hardlink the matching existing dataset
                fhandle[full_path] = fhandle[node_hashes[node_hash]]
                return
            # For now, convert None to np.nan since h5py appears to not handle None
            if node is None:
                node = np.nan
                logging.warn("  encountered `None` at node `%s`; converting to"
                             " np.nan" % full_path)
            # "Scalar datasets don't support chunk/filter options". Shuffling
            # is a good idea otherwise since subsequent compression will
            # generally benefit; shuffling requires chunking. Compression is
            # not done here since it is slow.
            if np.isscalar(node):
                shuffle = False
                chunks = None
            else:
                shuffle = True
                chunks = True
                # Store the node_hash for linking to later if this is more than
                # a scalar datatype. Assumed that "None" has 
                node_hashes[node_hash] = full_path
            # TODO: Treat strings as follows? Would this break compatibility
            # with pytables/Pandas? What are benefits? Leaving out for now.
            # if isinstance(node, basestr):
            #     dtype = h5py.special_dtype(vlen=str)
            #     fh.create_dataset(k,data=v,dtype=dtype)
            logging.trace("  creating dataset at node `%s`" % full_path)
            try:
                fhandle.create_dataset(name=full_path, data=node,
                                       chunks=chunks, compression=None,
                                       shuffle=shuffle, fletcher32=False)
            except TypeError:
                try:
                    shuffle = False
                    chunks = None
                    fhandle.create_dataset(name=full_path, data=node,
                                           chunks=chunks, compression=None,
                                           shuffle=shuffle, fletcher32=False)
                except:
                    logging.error('  full_path: ' + full_path)
                    logging.error('  chunks   : ' + str(chunks))
                    logging.error('  shuffle  : ' + str(shuffle))
                    logging.error('  node     : ' + str(node))
                    raise

    # Perform the actual operation using the dict passed in by user
    if isinstance(tgt, basestring):
        try:
            h5file = h5py.File(os.path.expandvars(tgt), 'w')
            store_recursively(fhandle=h5file, node=data_dict)
        except IOError, e:
            logging.error(e)
            logging.error("to_hdf: Unable to open `%s` for writing" % tgt)
            raise
        finally:
Esempio n. 48
0
File: icc.py Progetto: thehrh/pisa-1
    def _compute_nominal_outputs(self):
        '''
        load events, perform sanity check and put them into histograms,
        if alt_bg file is specified, also put these events into separate histograms,
        that are normalized to the nominal ones (we are only interested in the shape difference)
        '''
        # get params
        icc_bg_file = self.params.icc_bg_file.value
        if 'shape' in self.error_method:
            alt_icc_bg_file = self.params.alt_icc_bg_file.value
        else:
            alt_icc_bg_file = None
        sim_ver = self.params.sim_ver.value
        use_def1 = self.params.use_def1.value
        bdt_cut = self.params.bdt_cut.m_as('dimensionless')

        self.bin_names = self.output_binning.names
        self.bin_edges = []
        for name in self.bin_names:
            if 'energy' in name:
                bin_edges = self.output_binning[name].bin_edges.to(
                    'GeV').magnitude
            else:
                bin_edges = self.output_binning[name].bin_edges.magnitude
            self.bin_edges.append(bin_edges)

        # the rest of this function is PISA v2 legacy code...
        logging.info('Initializing BackgroundServiceICC...')
        logging.info('Opening file: %s', icc_bg_file)

        try:
            bg_file = h5py.File(find_resource(icc_bg_file), 'r')
            if alt_icc_bg_file is not None:
                alt_bg_file = h5py.File(find_resource(alt_icc_bg_file), 'r')
        except IOError as e:
            logging.error("Unable to open icc_bg_file %s", icc_bg_file)
            logging.error(e)
            sys.exit(1)

        # sanity check
        santa_doms = bg_file['IC86_Dunkman_L6_SANTA_DirectDOMs']['value']
        l3 = bg_file['IC86_Dunkman_L3']['value']
        l4 = bg_file['IC86_Dunkman_L4']['result']
        l5 = bg_file['IC86_Dunkman_L5']['bdt_score']
        l6 = bg_file['IC86_Dunkman_L6']
        if use_def1:
            l4_pass = np.all(l4 == 1)
        else:
            if sim_ver in ['5digit', 'dima']:
                l4_invVICH = bg_file['IC86_Dunkman_L4']['result_invertedVICH']
                l4_pass = np.all(np.logical_or(l4 == 1, l4_invVICH == 1))
            else:
                logging.info(
                    'For the old simulation, def.2 background not done yet,'
                    ' so still use def1 for it.')
                l4_pass = np.all(l4 == 1)
        assert (np.all(santa_doms >= 3) and np.all(l3 == 1) and l4_pass
                and np.all(l5 >= 0.1))
        corridor_doms_over_threshold = l6['corridor_doms_over_threshold']

        inverted_corridor_cut = corridor_doms_over_threshold > 1
        assert (np.all(inverted_corridor_cut)
                and np.all(l6['santa_direct_doms'] >= 3)
                and np.all(l6['mn_start_contained'] == 1.)
                and np.all(l6['mn_stop_contained'] == 1.))

        #load events
        if sim_ver == '4digit':
            variable = 'IC86_Dunkman_L6_MultiNest8D_PDG_Neutrino'
        elif sim_ver in ['5digit', 'dima']:
            variable = 'IC86_Dunkman_L6_PegLeg_MultiNest8D_NumuCC'
        else:
            raise ValueError('Only allow sim_ver  4digit, 5 digit or dima!')
        reco_energy_all = np.array(bg_file[variable]['energy'])
        reco_coszen_all = np.array(np.cos(bg_file[variable]['zenith']))
        pid_all = np.array(bg_file['IC86_Dunkman_L6']['delta_LLH'])
        if alt_icc_bg_file is not None:
            alt_reco_energy_all = np.array(alt_bg_file[variable]['energy'])
            alt_reco_coszen_all = np.array(
                np.cos(alt_bg_file[variable]['zenith']))
            alt_pid_all = np.array(alt_bg_file['IC86_Dunkman_L6']['delta_LLH'])
            alt_l5 = alt_bg_file['IC86_Dunkman_L5']['bdt_score']

        # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make data/MC
        # agree much better)
        cut_events = {}
        cut = l5 >= bdt_cut
        cut_events['reco_energy'] = reco_energy_all[cut]
        cut_events['reco_coszen'] = reco_coszen_all[cut]
        cut_events['pid'] = pid_all[cut]

        if alt_icc_bg_file is not None:
            # Cut: Only keep bdt score >= 0.2 (from MSU latest result, make
            # data/MC agree much better)
            alt_cut_events = {}
            alt_cut = alt_l5 >= bdt_cut
            alt_cut_events['reco_energy'] = alt_reco_energy_all[alt_cut]
            alt_cut_events['reco_coszen'] = alt_reco_coszen_all[alt_cut]
            alt_cut_events['pid'] = alt_pid_all[alt_cut]

        logging.info("Creating a ICC background hists...")
        # make histo
        if self.params.kde_hist.value:
            self.icc_bg_hist = self.kde_histogramdd(
                np.array([cut_events[bin_name]
                          for bin_name in self.bin_names]).T,
                binning=self.output_binning,
                coszen_name='reco_coszen',
                use_cuda=True,
                bw_method='silverman',
                alpha=0.3,
                oversample=10,
                coszen_reflection=0.5,
                adaptive=True)
        else:
            self.icc_bg_hist, _ = np.histogramdd(sample=np.array(
                [cut_events[bin_name] for bin_name in self.bin_names]).T,
                                                 bins=self.bin_edges)

        conversion = self.params.atm_muon_scale.value.m_as(
            'dimensionless') / ureg('common_year').to('seconds').m
        logging.info('nominal ICC rate at %.6E Hz',
                     self.icc_bg_hist.sum() * conversion)

        if alt_icc_bg_file is not None:
            if self.params.kde_hist.value:
                self.alt_icc_bg_hist = self.kde_histogramdd(
                    np.array([
                        alt_cut_events[bin_name] for bin_name in self.bin_names
                    ]).T,
                    binning=self.output_binning,
                    coszen_name='reco_coszen',
                    use_cuda=True,
                    bw_method='silverman',
                    alpha=0.3,
                    oversample=10,
                    coszen_reflection=0.5,
                    adaptive=True)
            else:
                self.alt_icc_bg_hist, _ = np.histogramdd(sample=np.array([
                    alt_cut_events[bin_name] for bin_name in self.bin_names
                ]).T,
                                                         bins=self.bin_edges)
            # only interested in shape difference, not rate
            scale = self.icc_bg_hist.sum() / self.alt_icc_bg_hist.sum()
            self.alt_icc_bg_hist *= scale
Esempio n. 49
0
File: hdf.py Progetto: gkrueckl/pisa
 def store_recursively(fhandle, node, path=None, node_hashes=None):
     if path is None:
         path = []
     if node_hashes is None:
         node_hashes = {}
     full_path = '/' + '/'.join(path)
     if isinstance(node, dict):
         logging.trace("  creating Group `%s`" % full_path)
         try:
             fhandle.create_group(full_path)
         except ValueError:
             pass
         for key in sorted(node.iterkeys()):
             key_str = str(key)
             if not isinstance(key, str):
                 logging.warn('Stringifying key `' + key_str +
                              '`for use as name in HDF5 file')
             val = node[key]
             new_path = path + [key_str]
             store_recursively(fhandle=fhandle, node=val, path=new_path,
                               node_hashes=node_hashes)
     else:
         # Check for existing node
         node_hash = utils.hash_obj(node)
         if node_hash in node_hashes:
             logging.trace("  creating hardlink for Dataset: `%s` -> `%s`" %
                           (full_path, node_hashes[node_hash]))
             # Hardlink the matching existing dataset
             fhandle[full_path] = fhandle[node_hashes[node_hash]]
             return
         # For now, convert None to np.nan since h5py appears to not handle None
         if node is None:
             node = np.nan
             logging.warn("  encountered `None` at node `%s`; converting to"
                          " np.nan" % full_path)
         # "Scalar datasets don't support chunk/filter options". Shuffling
         # is a good idea otherwise since subsequent compression will
         # generally benefit; shuffling requires chunking. Compression is
         # not done here since it is slow.
         if np.isscalar(node):
             shuffle = False
             chunks = None
         else:
             shuffle = True
             chunks = True
             # Store the node_hash for linking to later if this is more than
             # a scalar datatype. Assumed that "None" has 
             node_hashes[node_hash] = full_path
         # TODO: Treat strings as follows? Would this break compatibility
         # with pytables/Pandas? What are benefits? Leaving out for now.
         # if isinstance(node, basestr):
         #     dtype = h5py.special_dtype(vlen=str)
         #     fh.create_dataset(k,data=v,dtype=dtype)
         logging.trace("  creating dataset at node `%s`" % full_path)
         try:
             fhandle.create_dataset(name=full_path, data=node,
                                    chunks=chunks, compression=None,
                                    shuffle=shuffle, fletcher32=False)
         except TypeError:
             try:
                 shuffle = False
                 chunks = None
                 fhandle.create_dataset(name=full_path, data=node,
                                        chunks=chunks, compression=None,
                                        shuffle=shuffle, fletcher32=False)
             except:
                 logging.error('  full_path: ' + full_path)
                 logging.error('  chunks   : ' + str(chunks))
                 logging.error('  shuffle  : ' + str(shuffle))
                 logging.error('  node     : ' + str(node))
                 raise