Exemple #1
0
    def __init__(self, val=None):
        self.metadata = OrderedDict([
            ('detector', ''),
            ('geom', ''),
            ('runs', []),
            ('proc_ver', ''),
            ('cuts', []),
            ('flavints_joined', []),
        ])

        meta = OrderedDict()
        data = FlavIntData()
        if isinstance(val, (str, h5py.Group)):
            data = hdf.from_hdf(val)
            meta = getattr(data, 'attrs', OrderedDict())
        elif isinstance(val, Events):
            meta = deepcopy(val.metadata)
            data = deepcopy(val)
        elif isinstance(val, Mapping):
            data = deepcopy(val)
            if hasattr(val, 'metadata'):
                meta = deepcopy(val.metadata)
            elif hasattr(val, 'attrs'):
                meta = deepcopy(val.attrs)

        for key, val_ in meta.items():
            if hasattr(val_, 'tolist') and callable(val_.tolist):
                meta[key] = val_.tolist()

        self.metadata.update(meta)
        self.validate(data)
        self.update(data)
        self.update_hash()
Exemple #2
0
 def __load(self, fname):
     fpath = resources.find_resource(fname)
     with h5py.File(fpath, 'r') as open_file:
         meta = dict(open_file.attrs)
         for k, v in meta.items():
             if hasattr(v, 'tolist'):
                 meta[k] = v.tolist()
         data = hdf.from_hdf(open_file)
     self.validate(data)
     return data, meta
Exemple #3
0
def from_file(fname, fmt=None, **kwargs):
    """Dispatch correct file reader based on `fmt` (if specified) or guess
    based on file name's extension.

    Parameters
    ----------
    fname : string
        File path / name from which to load data.

    fmt : None or string
        If string, for interpretation of the file according to this format. If
        None, file format is deduced by an extension found in `fname`.

    **kwargs
        All other arguments are passed to the function dispatched to read the
        file.

    Returns
    -------
    Object instantiated from the file (string, dictionary, ...). Each format
    is interpreted differently.

    Raises
    ------
    ValueError
        If extension is not recognized

    """
    if fmt is None:
        rootname, ext = os.path.splitext(fname)
        ext = ext.replace('.', '').lower()
    else:
        rootname = fname
        ext = fmt.lower()

    if ext in ZIP_EXTS or ext in XOR_EXTS:
        rootname, inner_ext = os.path.splitext(rootname)
        inner_ext = inner_ext.replace('.', '').lower()
        ext = inner_ext

    fname = resources.find_resource(fname)
    if ext in jsons.JSON_EXTS:
        return jsons.from_json(fname, **kwargs)
    if ext in hdf.HDF5_EXTS:
        return hdf.from_hdf(fname, **kwargs)
    if ext in PKL_EXTS:
        return from_pickle(fname, **kwargs)
    if ext in CFG_EXTS:
        return from_cfg(fname, **kwargs)
    if ext in TXT_EXTS:
        return from_txt(fname, **kwargs)
    errmsg = 'File "%s": unrecognized extension "%s"' % (fname, ext)
    log.logging.error(errmsg)
    raise ValueError(errmsg)
Exemple #4
0
def from_file(fname, fmt=None):
    """Dispatch correct file reader based on fmt (if specified) or guess
    based on file name's extension"""
    if fmt is None:
        base, ext = os.path.splitext(fname)
        ext = ext.replace('.', '').lower()
    else:
        ext = fmt.lower()
    if ext in JSON_EXTS:
        return jsons.from_json(fname)
    elif ext in HDF5_EXTS:
        return hdf.from_hdf(fname)
    elif ext in PKL_EXTS:
        return cPickle.load(file(fname, 'rb'))
    else:
        errmsg = 'Unrecognized file type/extension: ' + ext
        logging.error(errmsg)
        raise TypeError(errmsg)
Exemple #5
0
def from_file(fname, fmt=None):
    """Dispatch correct file reader based on fmt (if specified) or guess
    based on file name's extension"""
    if fmt is None:
        base, ext = os.path.splitext(fname)
        ext = ext.replace('.', '').lower()
    else:
        ext = fmt.lower()
    if ext in JSON_EXTS:
        return jsons.from_json(fname)
    elif ext in HDF5_EXTS:
        return hdf.from_hdf(fname)
    elif ext in PKL_EXTS:
        return cPickle.load(file(fname,'rb'))
    else:
        errmsg = 'Unrecognized file type/extension: ' + ext
        logging.error(errmsg)
        raise TypeError(errmsg)
Exemple #6
0
    def _get_reco_kernels(self, reco_vbwkde_evts_file=None, evts_dict=None,
                          reco_vbwkde_make_plots=False, **kwargs):
        """Given a reco events resource (resource file name or dictionary),
        retrieve data from it then serialize and hash the data. If the object
        attribute kernels were computed from the same source data, simply
        return those. Otherwise, compute the kernels anew and return them.

        Arguments
        ---------
        NOTE: One--and only one--of the two arguments must be specified.

        reco_vbwkde_evts_file : str (or dict)
            Name or path to file containing event reco info. See doc for
            __init__ method for details about contents. If a dict is passed
            in, it is automatically populated to evts_dict (see below).

        evts_dict : dict
            Dictionary containing event reco info. Allows user to pass in a
            non-string-object to avoid re-loading a file to check whether the
            contents have changed each time. See doc for __init__ method for
            details about the dictionary's format.

        reco_vbwkde_make_plots : bool
        """
        if not isinstance(reco_vbwkde_make_plots, bool):
            raise ValueError("Option reco_vbwkde_make_plots must be specified and of bool type")

        for reco_scale in ['e_reco_scale', 'cz_reco_scale']:
            if reco_scale in kwargs and kwargs[reco_scale] != 1:
                raise ValueError('%s = %.2f, must be 1.0 for RecoServiceVBWKDE!'
                                  %(reco_scale, kwargs[reco_scale]))
        

        REMOVE_SIM_DOWNGOING = True

        if (reco_vbwkde_evts_file is not None) and (evts_dict is not None):
            raise TypeError(
                'One--and only one--of {reco_vbwkde_evts_file|evts_dict} ' +
                'may be specified'
            )

        if isinstance(reco_vbwkde_evts_file, dict):
            evts_dict = reco_vbwkde_evts_file
            evts_dict = None

        if isinstance(reco_vbwkde_evts_file, str):
            logging.info('Constructing VBWKDEs from event true & reco ' +
                         'info in file: %s' % reco_vbwkde_evts_file)
            fpath = find_resource(reco_vbwkde_evts_file)
            eventsdict = hdf.from_hdf(fpath)
            new_hash = utils.hash_file(fpath)
        elif isinstance(evts_dict, dict):
            eventsdict = evts_dict
            new_hash = utils.hash_obj(eventsdict)
        else:
            raise TypeError('A {reco_vbwkde_evts_file|evts_dict} must be' +
                            'provided, where the former must be a str ' +
                            'and the latter must be a dict.')

        if (self.kernels is not None) and (new_hash == self.reco_events_hash):
            return self.kernels

        self.kernels = self.all_kernels_from_events(
            eventsdict=eventsdict, remove_sim_downgoing=REMOVE_SIM_DOWNGOING,
            make_plots=reco_vbwkde_make_plots
        )
        self.reco_events_hash = new_hash

        return self.kernels
Exemple #7
0
    def _get_reco_kernels(self,
                          reco_vbwkde_evts_file=None,
                          evts_dict=None,
                          reco_vbwkde_make_plots=False,
                          **kwargs):
        """Given a reco events resource (resource file name or dictionary),
        retrieve data from it then serialize and hash the data. If the object
        attribute kernels were computed from the same source data, simply
        return those. Otherwise, compute the kernels anew and return them.

        Arguments
        ---------
        NOTE: One--and only one--of the two arguments must be specified.

        reco_vbwkde_evts_file : str (or dict)
            Name or path to file containing event reco info. See doc for
            __init__ method for details about contents. If a dict is passed
            in, it is automatically populated to evts_dict (see below).

        evts_dict : dict
            Dictionary containing event reco info. Allows user to pass in a
            non-string-object to avoid re-loading a file to check whether the
            contents have changed each time. See doc for __init__ method for
            details about the dictionary's format.

        reco_vbwkde_make_plots : bool
        """
        if not isinstance(reco_vbwkde_make_plots, bool):
            raise ValueError(
                "Option reco_vbwkde_make_plots must be specified and of bool type"
            )

        for reco_scale in ['e_reco_scale', 'cz_reco_scale']:
            if reco_scale in kwargs and kwargs[reco_scale] != 1:
                raise ValueError(
                    '%s = %.2f, must be 1.0 for RecoServiceVBWKDE!' %
                    (reco_scale, kwargs[reco_scale]))

        REMOVE_SIM_DOWNGOING = True

        if (reco_vbwkde_evts_file is not None) and (evts_dict is not None):
            raise TypeError(
                'One--and only one--of {reco_vbwkde_evts_file|evts_dict} ' +
                'may be specified')

        if isinstance(reco_vbwkde_evts_file, dict):
            evts_dict = reco_vbwkde_evts_file
            evts_dict = None

        if isinstance(reco_vbwkde_evts_file, str):
            logging.info('Constructing VBWKDEs from event true & reco ' +
                         'info in file: %s' % reco_vbwkde_evts_file)
            fpath = find_resource(reco_vbwkde_evts_file)
            eventsdict = hdf.from_hdf(fpath)
            new_hash = utils.hash_file(fpath)
        elif isinstance(evts_dict, dict):
            eventsdict = evts_dict
            new_hash = utils.hash_obj(eventsdict)
        else:
            raise TypeError('A {reco_vbwkde_evts_file|evts_dict} must be' +
                            'provided, where the former must be a str ' +
                            'and the latter must be a dict.')

        if (self.kernels is not None) and (new_hash == self.reco_events_hash):
            return self.kernels

        self.kernels = self.all_kernels_from_events(
            eventsdict=eventsdict,
            remove_sim_downgoing=REMOVE_SIM_DOWNGOING,
            make_plots=reco_vbwkde_make_plots)
        self.reco_events_hash = new_hash

        return self.kernels
def get_template_settings(llh_file):
    datafile = from_hdf(llh_file)
    return datafile["template_settings"]["params"]
Exemple #9
0
    def __init__(self, val=None, flavint_groups=None, metadata=None):
        # TODO(shivesh): add noise implementation
        self.metadata = OrderedDict([
            ('name', ''),
            ('detector', ''),
            ('geom', ''),
            ('runs', []),
            ('proc_ver', ''),
            ('cuts', []),
            ('flavints_joined', []),
        ])
        self.contains_neutrinos = False
        self.contains_muons = False
        self.contains_noise = False

        # Get data and metadata from val
        meta = OrderedDict()
        if isinstance(val, (str, h5py.Group)):
            data = hdf.from_hdf(val)
            meta = getattr(data, 'attrs', OrderedDict())
        elif isinstance(val, Data):
            data = val
            meta = getattr(val, 'metadata', OrderedDict())
        elif isinstance(val, (Mapping, FlavIntDataGroup)):
            data = val
            if hasattr(data, 'metadata'):
                meta = data.metadata
            elif hasattr(data, 'attrs'):
                meta = data.attrs
        else:
            raise TypeError('Unrecognized `val` type %s' % type(val))

        for key, val_ in meta.items():
            if hasattr(val_, 'tolist') and callable(val_.tolist):
                meta[key] = val_.tolist()

        # Check consistency of metadata from val and from input
        if meta is not None:
            if metadata is not None and meta != metadata:
                raise AssertionError('Input `metadata` does not match '
                                     'metadata inside `val`')
            self.metadata.update(meta)
        elif metadata is not None:
            self.metadata.update(metadata)

        # Find and deal with any muon data if it exists
        if self.metadata['flavints_joined'] == list([]):
            if 'muons' in data:
                self.muons = data.pop('muons')
        elif 'muons' in self.metadata['flavints_joined']:
            if 'muons' not in data:
                raise AssertionError('Metadata has muons specified but '
                                     'they are not found in the data')
            else:
                self.muons = data.pop('muons')
        elif 'muons' in data:
            raise AssertionError('Found muons in data but not found in '
                                 'metadata key `flavints_joined`')

        # Find and deal with any noise data if it exists
        if self.metadata['flavints_joined'] == list([]):
            if 'noise' in data:
                self.noise = data.pop('noise')
        elif 'noise' in self.metadata['flavints_joined']:
            if 'noise' not in data:
                raise AssertionError('Metadata has noise specified but '
                                     'they are not found in the data')
            else:
                self.noise = data.pop('noise')
        elif 'noise' in data:
            raise AssertionError('Found noise in data but not found in '
                                 'metadata key `flavints_joined`')

        # Instantiate a FlavIntDataGroup
        if data == dict():
            self._flavint_groups = []
        else:
            super().__init__(val=data, flavint_groups=flavint_groups)
            self.contains_neutrinos = True

        # Check consistency of flavints_joined
        if self.metadata['flavints_joined']:
            combined_types = []
            if self.contains_neutrinos:
                combined_types += [str(f) for f in self.flavint_groups]
            if self.contains_muons:
                combined_types += ['muons']
            if self.contains_noise:
                combined_types += ['noise']
            if set(self.metadata['flavints_joined']) != \
               set(combined_types):
                raise AssertionError(
                    '`flavint_groups` metadata does not match the '
                    'flavint_groups in the data\n{0} != '
                    '{1}'.format(set(self.metadata['flavints_joined']),
                                 set(combined_types)))
        else:
            self.metadata['flavints_joined'] = [
                str(f) for f in self.flavint_groups
            ]
            if self.contains_muons:
                self.metadata['flavints_joined'] += ['muons']
            if self.contains_noise:
                self.metadata['flavints_joined'] += ['noise']

        self._hash = None
        self.update_hash()
def get_template_settings(llh_file):
    datafile = from_hdf(llh_file)
    return datafile['template_settings']['params']
Exemple #11
0
                    help='''Plot the true_h_fiducial posteriors rather than the
                    false_h_best_fit by default.''')

parser.add_argument('-s','--save_fig',action='store_true',default=False,
                    help='Save all figures')
parser.add_argument('-v', '--verbose', action='count', default=0,
                    help='set verbosity level')

args = parser.parse_args()
set_verbosity(args.verbose)

# Configure plot settings
sns.set_context("poster") #if args.present else sns.set_context("talk")
sns.set_style("white")

llh_data = from_hdf(args.llh_file)
df_true_h, df_false_h = get_llr_data_frames(llh_data)
template_params = llh_data['template_settings']['params']

if args.verbose > 1: show_frame(df_true_h)

print "\n  columns: ",df_true_h[0].columns

################################################################
### 1) Plot LLR Distributions
################################################################

# df_true_h MUST be filled, but df_false_h is allowed to be empty
llr_dict_true_h = get_llh_ratios(df_true_h)
if (len(df_false_h) == 0 or args.no_false_h):
    logging.warn("No false hierarchy best fit llr distributions...")