Ejemplo n.º 1
0
    def get_weights(self,model,model_kwargs = None):
        """
        Calculate weights for the variables in this category

        Args:
            model (callable): A model to be evaluated

        Keyword Args:
            model_kwargs (dict): Will be passed to model
        """
        if not self.mc_p_readout:
            self.read_mc_primary()

        if model_kwargs is None:
            model_kwargs = dict()
        func_kwargs = {MC_P_EN : self.get(MC_P_EN),\
                       MC_P_TY : self.get(MC_P_TY),\
                       MC_P_WE : self.get(MC_P_WE)}

        for key in MC_P_ZE,MC_P_GW,MC_P_TS,DATASETS:
            reg = key
            if key == DATASETS:
                reg = 'mc_datasets'
            try:
                func_kwargs[reg] = self.get(key)
            except KeyError:
                Logger.warning("No MCPrimary {0} informatiion! Trying to omit..".format(key))

        func_kwargs.update(model_kwargs)
        Logger.info("Getting weights for datasets {}".format(self.datasets.__repr__()))
        self._weights = pd.Series(self._weightfunction(model,self.datasets,\
                                 **func_kwargs))
Ejemplo n.º 2
0
    def set_run_start_stop(self,runstart_var=variables.Variable(None),runstop_var=variables.Variable(None)):
        """
        Let the simulation category know which 
        are the paramters describing the primary

        Keyword Args:
            runstart_var (pyevself.variables.variables.Variable): beginning of a run
            runstop_var (pyevself.variables.variables.Variable): beginning of a run

        """
        #FIXME
        for var,name in [(runstart_var,RUN_START),(runstop_var,RUN_STOP)]:
            if var.name is None:
                Logger.warning("No {0} available".format(name))
            elif name in self.vardict:
                Logger.info("..{0} already defined, skipping...".format(name))
                continue
            
            else:
                if var.name != name:
                    Logger.info("..renaming {0} to {1}..".format(var.name,name))
                    var.name = name
                newvar = deepcopy(var)
                self.vardict[name] = newvar

        self._runstartstop_set = True
Ejemplo n.º 3
0
    def read_mc_primary(self,energy_var=MC_P_EN,\
                       type_var=MC_P_TY,\
                       zenith_var=MC_P_ZE,\
                       weight_var=MC_P_WE):
        """
        Trigger the readout of MC Primary information
        Rename variables to magic keywords if necessary

        Keyword Args:
            energy_var (str): simulated primary energy
            type_var (str): simulated primary type
            zenith_var (str): simulated primary zenith
            weight_var (str): a weight, e.g. interaction propability
        """

        self.read_variables([energy_var,type_var,zenith_var,weight_var])
        for varname,defaultname in [(energy_var,MC_P_EN),\
                                    (type_var,MC_P_TY),\
                                    (zenith_var,MC_P_ZE),
                                    (weight_var,MC_P_WE)]:
            if varname != defaultname:
                Logger.warning("..renaming {} to {}..".format(varname,defaultname))
                self.vardict[varname].name = defaultname

        self._mc_p_readout = True
Ejemplo n.º 4
0
    def estimate_livetime(self,force=False):
        """
        Calculate the livetime from run start/stop times, account for gaps
        
        Keyword Args:
            force (bool): overide existing livetime
        """
        if self.livetime and (not self.livetime=="guess"):
            Logger.warning("There is already a livetime of {:4.2f} ".format(self.livetime))
            if force:
                Logger.warning("Applying force...")
            else:
                Logger.warning("If you really want to do this, use force = True")
                return
        
        if not self._runstartstop_set:
            if (RUN_STOP in self.vardict.keys()) and (RUN_START in self.vardict.keys()):
                self._runstartstop_set = True
            else:
                Logger.warning("Need to set run start and stop times first! use object.set_run_start_stop")
                return

        Logger.warning("This is a crude estimate! Rather use a good run list or something!")
        lengths = self.get(RUN_STOP) - self.get(RUN_START)
        gaps    = self.get(RUN_START)[1:] - self.get(RUN_STOP)[:-1] #trust me!
        #h = self.nodes["header"].read()
        #h0 = h[:-1]
        #h1 = h[1:]
        ##FIXME
        #lengths = ((h["time_end_mjd_day"] - h["time_start_mjd_day"]) * 24. * 3600. +
        #           (h["time_end_mjd_sec"] - h["time_start_mjd_sec"]) +
        #           (h["time_end_mjd_ns"] - h["time_start_mjd_ns"])*1e-9 )
 
        #gaps = ((h1["time_start_mjd_day"] - h0["time_end_mjd_day"]) * 24.  * 3600. +
        #        (h1["time_start_mjd_sec"] - h0["time_end_mjd_sec"]) +
        #        (h1["time_start_mjd_ns"] - h0["time_end_mjd_ns"])*1e-9)
 

        # detector livetime is the duration of all events + the length of      all
        # gaps between events that are short enough to be not downtime. (     guess: 30s)
        est_ltime =  ( lengths.sum() + gaps[(0<gaps) & (gaps<30)].sum() )
        self.set_livetime(est_ltime)
        return 
Ejemplo n.º 5
0
    def get_files(self,*args,**kwargs):
        """
        Load files for this category
        uses pyevsel.utils.files.harvest_files

        Args:
            *args (list of strings): Path to possible files

        Keyword Args:
            datasets (dict(dataset_id : nfiles)): i given, load only files from dataset dataset_id  set nfiles parameter to amount of L2 files the loaded files will represent
            force (bool): forcibly reload filelist (pre-readout vars will be lost)
            all other kwargs will be passed to
            utils.files.harvest_files
        """
        force = False
        if "force" in kwargs:
            force = kwargs.pop("force")
        if self.is_harvested:
            Logger.info("Variables have already been harvested!\
                         if you really want to reload the filelist,\
                         use 'force=True'.\
                         If you do so, all your harvested variables will be deleted!")
            if not force:
                return
            else:
                Logger.warning("..using force..")

        if "datasets" in kwargs:
            filtered_files = []
            self.datasets = kwargs.pop("datasets")
            files = harvest_files(*args,**kwargs)
            datasets = [self._ds_regexp(x) for x in files]
            assert len(datasets) == len(files)

            ds_files = zip(datasets,files)
            for k in self.datasets.keys():
                filtered_files.extend([x[1] for x in ds_files if x[0] == k])
            files = filtered_files
        else:
            files = harvest_files(*args,**kwargs)

        self.files = files
Ejemplo n.º 6
0
def GetCategoryConfig(name):
    """
    Get the relevant config section from the actual
    config for a category

    Args:
        name (string): Name of a category to search for
    """

    configs = yaml.load(open(CONFIGFILE, "r"))
    for cfg in configs["categories"]:
        if cfg["name"] == name:
            # FIXME little hack for bad latex parsing
            # by yaml
            # cleanlabel = cfg["label"]
            cleanlabel = SLASHES.sub(r"\\", cfg["label"])
            cfg["label"] = cleanlabel
            return cfg
    Logger.warning("No config for {0} found!".format(name))
    return cfg
Ejemplo n.º 7
0
    def __getitem__(self,item):
        if item in self:
            return self.get(item)
        else:
            return item


seaborn_loaded = False
try:
    import seaborn.apionly as sb

    seaborn_loaded = True
    Logger.debug("Seaborn found!")
except ImportError:
    Logger.warning("Seaborn not found! Using predefined color palette")

    
def get_color_palette(name="dark"):
    """
    Load a color pallete, use seaborn if available
    """
    if not seaborn_loaded:
        color_palette = ColorDict()   # stolen from seaborn color-palette
        color_palette[0]           = (0.2980392156862745, 0.4470588235294118, 0.6901960784313725)
        color_palette[5]     = (0.8, 0.7254901960784313, 0.4549019607843137)#(0.3921568627450    9803, 0.7098039215686275, 0.803921568627451)
        color_palette["k"]           = "k"
        color_palette[1]       = (0.3333333333333333, 0.6588235294117647, 0.40784313725490196)
        color_palette[2] = (0.7686274509803922, 0.3058823529411765, 0.3215686274509804)
        color_palette[3]    = (0.5058823529411764, 0.4470588235294118, 0.6980392156862745)
        color_palette['prohibited'] = 'grey'
Ejemplo n.º 8
0
def load_dataset(config, variables=None):
    """
    Loads a dataset according to a 
    configuration file
    
    Args:
        config (str): json style config file
    """

    # FIXME: os.path exits tests
    cfg = commentjson.load(open(config))
    categories = dict()
    weightfunctions = dict()
    models = dict()
    files_basepath = cfg["files_basepath"]
    for cat in cfg["categories"].keys():
        thiscat = cfg["categories"][cat]
        if thiscat["datatype"] == "simulation":
            categories[cat] = c.Simulation(cat)
            # remember that json keys are strings, so
            # convert to int
            datasets = {int(x): int(thiscat["datasets"][x]) for x in thiscat["datasets"]}
            categories[cat].get_files(
                os.path.join(files_basepath, thiscat["subpath"]),
                prefix=thiscat["file_prefix"],
                datasets=datasets,
                ending=thiscat["file_type"],
            )
            try:
                fluxclass, flux = thiscat["model"].split(".")
                models[cat] = getattr(dict(inspect.getmembers(fluxes))[fluxclass], flux)
            except ValueError:
                Logger.warning(
                    "{} does not seem to be a valid model for {}. This might cause troubles. If not, it is probably fine!".format(
                        thiscat["model"], cat
                    )
                )
                models[cat] = None
            weightfunctions[cat] = dict(inspect.getmembers(wgt))[thiscat["model_method"]]
        elif thiscat["datatype"] == "data":
            categories[cat] = c.Data(cat)
            categories[cat].get_files(
                os.path.join(files_basepath, thiscat["subpath"]),
                prefix=thiscat["file_prefix"],
                ending=thiscat["file_type"],
            )
            models[cat] = float(thiscat["livetime"])
            weightfunctions[cat] = dict(inspect.getmembers(wgt))[thiscat["model_method"]]

        elif thiscat["datatype"] == "reweighted":
            pass
        else:
            raise TypeError("Data type not understood. Has to be either 'simulation', 'reweighted' or 'data'!!")
    # at last we can take care of reweighted categories
    for cat in cfg["categories"].keys():
        thiscat = cfg["categories"][cat]
        if thiscat["datatype"] == "reweighted":
            categories[cat] = c.ReweightedSimulation(cat, categories[thiscat["parent"]])
            if thiscat["model"]:
                fluxclass, flux = thiscat["model"].split(".")
                models[cat] = getattr(dict(inspect.getmembers(fluxes))[fluxclass], flux)
                weightfunctions[cat] = dict(inspect.getmembers(wgt))[thiscat["model_method"]]
        elif thiscat["datatype"] in ["data", "simulation"]:
            pass
        else:
            raise TypeError("Data type not understood. Has to be either 'simulation', 'reweighted' or 'data'!!")

    # combined categories
    combined_categories = dict()
    for k in combined_categories.keys():
        combined_categories[k] = [categories[l] for l in cfg["combined_categories"]]

    # import variable defs
    vardefs = __import__(cfg["variable_definitions"])

    dataset = ds.Dataset(*categories.values(), combined_categories=combined_categories)
    dataset.read_variables(vardefs, names=variables)
    dataset.set_weightfunction(weightfunctions)
    dataset.get_weights(models=models)
    return dataset
Ejemplo n.º 9
0
    def read_variables(self,names=None):
        """
        Harvest the variables in self.vardict

        Keyword Args:
            names (list): havest only these variables
        """

        if names is None:
            names = self.vardict.keys()
        compound_variables = [] #harvest them later

        executor = fut.ProcessPoolExecutor(max_workers=MAX_CORES)
        future_to_varname = {}

        # first read out variables,
        # then compound variables
        # so make sure they are in the 
        # right order
        simple_vars = []
        for varname in names:
            try:
                if isinstance(self.vardict[varname],variables.CompoundVariable):
                    compound_variables.append(varname)
                    continue

                elif isinstance(self.vardict[varname],variables.VariableList):
                    compound_variables.append(varname)
                    continue
                else:
                    simple_vars.append(varname)
            except KeyError:
                Logger.warning("Cannot find {} in variables!".format(varname))
                continue
        for varname in simple_vars:
            # FIXME: Make it an option to not use
            # multi cpu readout!
            #self.vardict[varname].data = variables.harvest(self.files,self.vardict[varname].definitions)
            future_to_varname[executor.submit(variables.harvest,self.files,self.vardict[varname].definitions)] = varname
        #for future in tqdm.tqdm(fut.as_completed(future_to_varname),desc="Reading {0} variables".format(self.name), leave=True):
        progbar = False
        try:
            import pyprind
            n_it = len(future_to_varname.keys())
            bar = pyprind.ProgBar(n_it,monitor=False,bar_char='#',title=self.name)
            progbar = True
        except ImportError:
            pass

        exc_caught = """"""
        for future in fut.as_completed(future_to_varname):
            varname = future_to_varname[future]
            Logger.debug("Reading {} finished".format(varname))
            try:
                data = future.result()
                Logger.debug("Found {} entries ...".format(len(data)))
                data = self.vardict[varname].transform(data)
            except Exception as exc:
                exc_caught += "Reading {} for {} generated an exception: {}\n".format(varname,self.name, exc)
                data = pd.Series([])

            self.vardict[varname].data = data
            self.vardict[varname].declare_harvested()
            if progbar: bar.update()
        for varname in compound_variables:
            #FIXME check if this causes a memory leak
            self.vardict[varname].rewire_variables(self.vardict)
            self.vardict[varname].harvest()
        if exc_caught:
            Logger.warning("During the variable readout some exceptions occured!\n" + exc_caught)
        self._is_harvested = True
Ejemplo n.º 10
0
import os
import pandas as pd
import tables
import abc

from pyevsel.utils import files as f
from pyevsel.utils.logger import Logger


DEFAULT_BINS = 70
REGISTERED_FILEEXTENSIONS = [".h5",".root"]

try:
    import root_numpy as rn
except ImportError:
    Logger.warning("No root_numpy found, root support is limited!")
    REGISTERED_FILEEXTENSIONS.remove(".root")

################################################################
# define a non-member function so that it can be used in a
# multiprocessing approach

#def harvest_single_file(filename, filetype, definitions):
#    """
#    Get the variable data from a fileobject
#    Optimized for hdf files
#
#    Args:
#        filename (str):
#        filetype (str): the extension of the filename, eg "h5"
#