Python hash_obj Beispiele, pisa.utils.hash.hash_obj Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: roounfold.py Projekt: thehrh/pisa-1

    def get_inv_eff(self, signal_data=None, gen_data=None):
        this_hash = hash_obj(
            [self.true_binning.hash, self.output_str, 'inv_eff'],
            full_hash=self.full_hash)
        assert len(set([signal_data is None, gen_data is None])) == 1
        if signal_data is None and gen_data is None:
            if self.inv_eff_hash == this_hash:
                logging.trace('Loading inv eff from mem cache')
                return self._inv_eff
            if this_hash in self.disk_cache:
                logging.debug('Loading inv eff histogram from disk cache.')
                inv_eff = self.disk_cache[this_hash]
            else:
                raise ValueError(
                    'inverse efficiency histogram with correct hash not found '
                    'in disk_cache')
        else:
            this_hash = hash_obj([this_hash, self.fit_hash],
                                 full_hash=self.full_hash)
            if self.inv_eff_hash == this_hash:
                logging.trace('Loading inv eff from mem cache')
                return self._inv_eff
            inv_eff = self._get_inv_eff(signal_data, gen_data,
                                        self.true_binning, self.output_str)

            if self.disk_cache is not None:
                if this_hash not in self.disk_cache:
                    logging.debug('Caching inv eff histogram to disk.')
                    self.disk_cache[this_hash] = inv_eff

        self.inv_eff_hash = this_hash
        self._inv_eff = inv_eff
        return inv_eff

Beispiel #2

0

Datei anzeigen

Datei: roounfold.py Projekt: thehrh/pisa-1

    def create_response(self,
                        reco_norm_data=None,
                        true_norm_data=None,
                        data=None):
        """Create the response object from the signal data."""
        unfold_bg = self.params['unfold_bg'].value
        unfold_eff = self.params['unfold_eff'].value
        unfold_unweighted = self.params['unfold_unweighted'].value
        this_hash = hash_obj([
            self.reco_binning.hash, self.true_binning.hash, unfold_bg,
            unfold_eff, unfold_unweighted, self.output_str, 'response'
        ],
                             full_hash=self.full_hash)
        assert len(
            set([reco_norm_data is None, true_norm_data is None,
                 data is None])) == 1
        if reco_norm_data is None and true_norm_data is None and data is None:
            if self.response_hash == this_hash:
                logging.trace('Loading response from mem cache')
                return self._response
            else:
                try:
                    del self._response
                except:
                    pass
            if this_hash in self.disk_cache:
                logging.debug('Loading response from disk cache.')
                response = self.disk_cache[this_hash]
            else:
                raise ValueError(
                    'response object with correct hash not found in disk_cache'
                )
        else:
            this_hash = hash_obj([this_hash, self.fit_hash] +
                                 list(self.params.values),
                                 full_hash=self.full_hash)
            if self.response_hash == this_hash:
                logging.debug('Loading response from mem cache')
                return self._response
            else:
                try:
                    del self._response
                    del self.t_th1d
                except:
                    pass

            # Truth histogram also gets returned if response matrix is created
            response, self.t_th1d = self._create_response(
                reco_norm_data, true_norm_data, data, self.reco_binning,
                self.true_binning)

            if self.disk_cache is not None:
                if this_hash not in self.disk_cache:
                    logging.debug('Caching response object to disk.')
                    self.disk_cache[this_hash] = response

        self.response_hash = this_hash
        self._response = response
        return response

Beispiel #3

0

Datei anzeigen

 def hash(self):
     """Combines source_code_hash and params.hash for checking/tagging
     provenance of persisted (on-disk) objects."""
     objects_to_hash = [self.source_code_hash, self.params.hash]
     for attr in sorted(self._attrs_to_hash):
         objects_to_hash.append(
             hash_obj(getattr(self, attr), full_hash=self.full_hash))
     return hash_obj(objects_to_hash, full_hash=self.full_hash)

Beispiel #4

0

Datei anzeigen

Datei: fit.py Projekt: terliuk/pisa

    def calculate_fit_coeffs(self):
        """
        Calculate the fit coefficients for each systematic, flavint, bin
        for a polynomial.
        """
        this_hash = hash_obj(
            [self.fit_binning.hash, self.weight_hash] +
            [self.params[name].value for name in self.fit_params],
            full_hash=self.full_hash
        )
        if self.fitcoeffs_hash == this_hash:
            return self._fit_coeffs

        if self.neutrinos:
            nu_params = self.nu_params
        else:
            nu_params = None
        if self.muons:
            mu_params = self.mu_params
        else:
            mu_params = None

        if self.params['cache_fit'].value:
            this_cache_hash = hash_obj(
                [self._data.metadata['name'], self._data.metadata['sample'],
                 self._data.metadata['cuts'], self.fit_binning.hash] +
                [self.params[name].value for name in self.fit_params],
                full_hash=self.full_hash
            )

            if self.fitcoeffs_cache_hash == this_cache_hash:
                fit_coeffs = deepcopy(self._cached_fc)
            elif this_cache_hash in self.disk_cache:
                logging.info('Loading fit coefficients from cache.')
                self._cached_fc = self.disk_cache[this_cache_hash]
                fit_coeffs = deepcopy(self._cached_fc)
                self.fitcoeffs_cache_hash = this_cache_hash
            else:
                fit_coeffs = self._calculate_fit_coeffs(
                    self._data, ParamSet(p for p in self.params
                                         if p.name in self.fit_params),
                    self.fit_binning, nu_params, mu_params
                )
        else:
            fit_coeffs = self._calculate_fit_coeffs(
                self._data, ParamSet(p for p in self.params
                                     if p.name in self.fit_params),
                self.fit_binning, nu_params, mu_params
            )

        if self.params['cache_fit'].value:
            if this_cache_hash not in self.disk_cache:
                logging.info('Caching fit coefficients values to disk.')
                self.disk_cache[this_cache_hash] = fit_coeffs

        self.fitcoeffs_hash = this_hash
        self._fit_coeffs = fit_coeffs
        return fit_coeffs

Beispiel #5

0

Datei anzeigen

    def _derive_nominal_transforms_hash(self):
        """Derive a hash to uniquely identify the nominal transform. This
        should be unique across processes and invocations bacuase the nominal
        transforms can be non-volatile (cached to disk) and must still be
        valid given their hash value upon loading from disk in the future.

        This implementation uses the nominal parameter values' hash
        combined with the source code hash to generate the final nominal
        transforms hash.

        Notes
        -----
        The hashing scheme implemented here might be sufficiently unique for
        many cases, but override this method in services according to the
        following guidelines:

        * Stages that use a nominal transform should override this method if
          the hash is more accurately computed differently from here.

        * Stages that use transforms but do not use nominal transforms can
          override this method with a simpler version that simply returns None
          to save computation time (if this method is found to be a significant
          performance hit). (This method is called each time an output
          is computed if `self.use_transforms == True`.)

        * Stages that use no transforms (i.e., `self.use_transforms == False`)
          will not call any built-in methods related to transforms, so
          overriding this method is irrelevant to such stages.

        If this method *is* overridden (and not just to return None), since the
        nominal transform may be stored to a disk cache, make sure that
        `self.source_code_hash` is included in the objects used to compute the
        final hash value. Even if all parameters are the same, a nominal
        transform stored to disk is ***invalid if the source code changes***,
        and `_derive_nominal_transforms_hash` must reflect this.

        """
        id_objects = []
        id_objects.append(self.params.nominal_values_hash)
        for attr in sorted(self._attrs_to_hash):
            val = getattr(self, attr)
            if hasattr(val, "hash"):
                attr_hash = val.hash
            elif self.full_hash:
                norm_val = normQuant(val)
                attr_hash = hash_obj(norm_val, full_hash=self.full_hash)
            else:
                attr_hash = hash_obj(val, full_hash=self.full_hash)
            id_objects.append(attr_hash)
        id_objects.append(self.source_code_hash)

        # If any hashes are missing (i.e, None), invalidate the entire hash
        if any([(h is None) for h in id_objects]):
            nominal_transforms_hash = None
        else:
            nominal_transforms_hash = hash_obj(id_objects, full_hash=self.full_hash)
        return nominal_transforms_hash

Beispiel #6

0

Datei anzeigen

    def _compute_outputs(self, inputs=None):
        # Following is just so that we only produce new maps when params
        # change, but produce the same maps with the same param values
        # (for a more realistic test of caching).
        seed = hash_obj(self.params.values, hash_to='int') % (2**32 - 1)
        np.random.seed(seed)

        # Convert a parameter that the user can specify in any (compatible)
        # units to the units used for compuation
        height = self.params['test'].to('meter').magnitude

        output_maps = []
        for output_name in self.output_names:
            # Generate the fake per-bin "fluxes", modified by the parameter
            hist = np.random.random(self.output_binning.shape) * height

            # Put the "fluxes" into a Map object, give it the output_name
            m = Map(name=output_name, hist=hist, binning=self.output_binning)

            # Optionally turn on errors here, that will be propagated through
            # rest of pipeline (slows things down, but essential in some cases)
            #m.set_poisson_errors()
            output_maps.append(m)

        # Combine the output maps into a single MapSet object to return.
        # The MapSet contains the varous things that are necessary to make
        # caching work and also provides a nice interface for the user to all
        # of the contained maps
        return MapSet(maps=output_maps, name='flux maps')

Beispiel #7

0

Datei anzeigen

Datei: roounfold.py Projekt: thehrh/pisa-1

    def load_gen_data(self):
        logging.debug('Loading generator level sample')
        unfold_pipeline_cfg = self.params['unfold_pipeline_cfg'].value
        if isinstance(unfold_pipeline_cfg, str):
            pipeline_cfg = from_file(unfold_pipeline_cfg)
            pipeline_hash = pipeline_cfg
            sa_cfg = from_file(
                pipeline_cfg.get('stage.data', 'param.data_sample_config'))
            template_maker = Pipeline(pipeline_cfg)
        elif isinstance(unfold_pipeline_cfg, Pipeline):
            pipeline_hash = unfold_pipeline_cfg.state_hash
            sa_cfg = from_file(
                unfold_pipeline_cfg.params['data_sample_config'].value)
            template_maker = unfold_pipeline_cfg
        gen_cfg = from_file(sa_cfg.get('neutrinos|gen_lvl', 'gen_cfg_file'))
        this_hash = hash_obj([gen_cfg, pipeline_hash, self.output_str],
                             full_hash=self.full_hash)
        if self.gen_data_hash == this_hash:
            return self._gen_data

        full_gen_data = template_maker.get_outputs()
        if not isinstance(full_gen_data, Data):
            raise AssertionError(
                'Output of pipeline is not a Data object, instead is type '
                '{0}'.format(type(full_gen_data)))
        trans_data = full_gen_data.transform_groups(self.output_str)
        gen_data = trans_data[self.output_str]

        self._gen_data = gen_data
        self.gen_data_hash = this_hash
        return gen_data

Beispiel #8

0

Datei anzeigen

Datei: roounfold.py Projekt: thehrh/pisa-1

    def split_data(self):
        this_hash = hash_obj([
            self.fit_hash, self.output_str, self._data.contains_muons,
            self._data.contains_noise
        ],
                             full_hash=self.full_hash)
        if self.split_data_hash == this_hash:
            return self._signal_data, self._bg_data, self._all_data

        if self.params['real_data'].value:
            return self._data, None, self._data

        trans_data = self._data.transform_groups(self.output_str)
        [trans_data[fig].pop('sample_weight') for fig in trans_data]
        bg_str = [fig for fig in trans_data if fig != self.output_str]
        if trans_data.contains_muons:
            trans_data['muons'].pop('sample_weight')
            bg_str.append('muons')
        if trans_data.contains_noise:
            trans_data['noise'].pop('sample_weight')
            bg_str.append('noise')

        signal_data = trans_data[self.output_str]
        bg_data = [trans_data[bg] for bg in bg_str]
        bg_data = reduce(Data._merge, bg_data)
        all_data = Data._merge(deepcopy(bg_data), signal_data)

        self._signal_data = signal_data
        self._bg_data = bg_data
        self._all_data = all_data
        self.split_data_hash = this_hash
        return signal_data, bg_data, all_data

Beispiel #9

0

Datei anzeigen

Datei: transform.py Projekt: terliuk/pisa

 def hash(self):
     """int : Hash for entire set of transforms"""
     hashes = self.hashes
     if len(hashes) > 0:
         if all([(h is not None and h == hashes[0]) for h in hashes]):
             return hashes[0]
         if all([(h is not None) for h in hashes]):
             return hash_obj(hashes)
     return None

Beispiel #10

0

Datei anzeigen

Datei: distribution_maker.py Projekt: marialiubarska/pisa

    def source_code_hash(self):
        """Hash for the source code of this object's class.

        Not meant to be perfect, but should suffice for tracking provenance of
        an object stored to disk that were produced by a Stage.
        """
        if self._source_code_hash is None:
            self._source_code_hash = hash_obj(inspect.getsource(self.__class__))
        return self._source_code_hash

Beispiel #11

0

Datei anzeigen

Datei: param.py Projekt: thehrh/pisa-1

    def load_pid_energy_param(self, source):
        """Load pid energy-dependent parameterisation from file or dictionary.

        Parameters
        ----------
        source : string
            Resource location of the file

        """
        this_hash = hash_obj(source)
        if (self._pid_energy_param_hash is not None
                and this_hash == self._pid_energy_param_hash):
            return

        # Invalidate the hash and clear the entry, so we aren't left in an
        # inconsistent state if any of the below fails
        self._pid_energy_param_hash = None
        self.pid_energy_param_dict = None

        # Call external function for basic loading and conversion
        pid_energy_param_dict = load_pid_energy_param(source)

        # Perform validation
        for flavintgroup, subdict in pid_energy_param_dict.items():
            if set(subdict.keys()) != set(self.signatures):
                raise ValueError(
                    'Expected PID specs for %s, but the energy PID'
                    ' parameterization for %s specifies %s instead.'
                    % (self.signatures, flavintgroup, subdict.keys())
                )

        # Transform groups are implicitly defined by keys
        implicit_transform_groups = pid_energy_param_dict.keys()

        # Make sure these match the transform groups specified for the stage
        if set(implicit_transform_groups) != set(self.transform_groups):
            raise ValueError(
                'Transform groups (%s) defined implicitly by `source` "%s" do'
                ' not match those defined as the stage\'s configured'
                ' `transform_groups` (%s).'
                % (implicit_transform_groups, source, self.transform_groups)
            )

        # Verify that each input name--which specifies a flavint or
        # flavintgroup--is wholly encapsulated by one of the transform
        # flavintgroups
        for name in self.input_names:
            if not any(name in group for group in implicit_transform_groups):
                raise ValueError(
                    'Input "%s" either not present in or spans multiple'
                    ' transform groups (transform_groups = %s)'
                    % (name, implicit_transform_groups)
                )

        self.pid_energy_param_dict = pid_energy_param_dict
        self._pid_energy_param_hash = this_hash

Beispiel #12

0

Datei anzeigen

    def load_xsec_splines(self):
        """Load the cross-sections splines from the ROOT file."""
        xsec_file = self.params['xsec_file'].value
        this_hash = hash_obj(xsec_file, full_hash=self.full_hash)
        if this_hash == self.xsec_hash:
            self.xsec.reset()
            return

        logging.info('Extracting cross-section spline from file: %s', xsec_file)
        self.xsec = self.get_combined_xsec(xsec_file, ver='v2.10.0')
        self.xsec_hash = this_hash

Beispiel #13

0

Datei anzeigen

    def _derive_transforms_hash(self, nominal_transforms_hash=None):
        """Compute a hash that uniquely identifies the transforms that will be
        produced from the current configuration. Note that this hash needs only
        to be valid for this run (i.e., it is a volatile hash).

        This implementation returns a hash from the current parameters' values.

        """
        id_objects = []
        h = self.params.values_hash
        logging.trace("self.params.values_hash = %s" % h)
        id_objects.append(h)

        # Grab any provided nominal transforms hash, or derive it again
        if nominal_transforms_hash is None:
            nominal_transforms_hash = self._derive_nominal_transforms_hash()
        # If a valid hash has been gotten, include it
        if nominal_transforms_hash is not None:
            id_objects.append(nominal_transforms_hash)

        for attr in sorted(self._attrs_to_hash):
            val = getattr(self, attr)
            if hasattr(val, "hash"):
                attr_hash = val.hash
            elif self.full_hash:
                norm_val = normQuant(val)
                attr_hash = hash_obj(norm_val, full_hash=self.full_hash)
            else:
                attr_hash = hash_obj(val, full_hash=self.full_hash)
            id_objects.append(attr_hash)

        # If any hashes are missing (i.e, None), invalidate the entire hash
        if any([(h is None) for h in id_objects]):
            transforms_hash = None
        else:
            transforms_hash = hash_obj(id_objects, full_hash=self.full_hash)

        return transforms_hash, nominal_transforms_hash

Beispiel #14

0

Datei anzeigen

Datei: roounfold.py Projekt: thehrh/pisa-1

    def get_bg_hist(self, bg_data=None):
        """Histogram the bg data unless using real data, in which case load
        the bg hist from disk cache."""
        this_hash = hash_obj(
            [self.reco_binning.hash, self.output_str, 'bg_hist'],
            full_hash=self.full_hash)
        if bg_data is None:
            if self.bg_hist_hash == this_hash:
                logging.trace('Loading bg hist from mem cache')
                return self._bg_hist
            if this_hash in self.disk_cache:
                logging.debug('Loading bg hist from disk cache.')
                bg_hist = self.disk_cache[this_hash]
            else:
                raise ValueError(
                    'bg hist object with correct hash not found in disk_cache')
        else:
            this_hash = hash_obj([this_hash, self.fit_hash],
                                 full_hash=self.full_hash)
            if self.bg_hist_hash == this_hash:
                logging.trace('Loading bg hist from mem cache')
                return self._bg_hist
            bg_hist = self._histogram(events=bg_data,
                                      binning=self.reco_binning,
                                      weights=bg_data['pisa_weight'],
                                      errors=True,
                                      name='background',
                                      tex=r'\rm{background}')

            if self.disk_cache is not None:
                if this_hash not in self.disk_cache:
                    logging.debug('Caching bg hist to disk.')
                    self.disk_cache[this_hash] = bg_hist

        self.bg_hist_hash = this_hash
        self._bg_hist = bg_hist
        return bg_hist

Beispiel #15

0

Datei anzeigen

    def _compute_outputs(self, inputs=None):
        """Apply basic cuts and compute histograms for output channels."""

        logging.debug('Entering events_to_data._compute_outputs')

        #Hashing
        #TODO What should I hash??
        hash_property = [
            self.events_file, self.params['dataset'].value, self.output_names
        ]
        this_hash = hash_obj(hash_property, full_hash=self.full_hash)
        #if this_hash == self.sample_hash: #TODO Fix this and replace...
        #    return

        #TODO Check there are no inputs

        #Fill an events instance from a file
        events = Events(self.events_file)

        #TODO Handle nominal, etc, etc datasets?

        #Extract the neutrino data from the 'Events' instance
        nu_data = []
        flav_fidg = FlavIntDataGroup(flavint_groups=events.flavints)
        for flavint in events.present_flavints:
            flav_fidg[flavint] = {
                var: events[flavint][var]
                for var in events[flavint].keys()
            }
        nu_data.append(flav_fidg)

        #Create the data instance, including the metadata
        #Note that there is no muon or noise data  in the 'Events'
        data = Data(reduce(add, nu_data), metadata=deepcopy(events.metadata))

        #Make cuts
        if self.params['keep_criteria'].value is not None:
            self._data.applyCut(self.params['keep_criteria'].value
                                )  #TODO Shivesh says this needs testing
            self._data.update_hash()

        #Update hashes
        self.sample_hash = this_hash
        data.metadata['sample_hash'] = this_hash
        data.update_hash()

        return data

Beispiel #16

0

Datei anzeigen

    def cut_events(self, keep_criteria):
        """Apply a cut to `self.events`, keeping only events that pass
        `keep_criteria`.

        Parameters
        ----------
        keep_criteria : string
             See pisa.core.Events.applyCut for more info on specifying this.

        """
        if isinstance(keep_criteria, Param):
            keep_criteria = keep_criteria.value

        if keep_criteria is not None:
            events = self.events.applyCut(keep_criteria=keep_criteria)
            events_hash = hash_obj(events, full_hash=self.full_hash)

            self.events = events
            self._events_hash = events_hash

Beispiel #17

0

Datei anzeigen

    def load_events(self, events):
        """Load events from path given by `events`. Stored as `self.events`.

        Parameters
        ----------
        events : string or Events object
            If string, load events from that location. If Events object,
            deepcopy to obtain `self.events`

        """
        if isinstance(events, Param):
            events = events.value
        elif isinstance(events, basestring):
            events = find_resource(events)
        this_hash = hash_obj(events, full_hash=self.full_hash)
        if self._events_hash is not None and this_hash == self._events_hash:
            return
        logging.debug("Extracting events from Events obj or file: %s", events)
        events_obj = Events(events)
        events_hash = this_hash

        self.events = events_obj
        self._events_hash = events_hash

Beispiel #18

0

Datei anzeigen

Datei: transform.py Projekt: terliuk/pisa

    def apply(self, inputs):
        """Apply each transform to `inputs`; return computed outputs.

        Parameters
        -----------
        inputs : sequence of objects

        Returns
        -------
        outputs : container with computed outputs (no sideband objects)

        """
        output_names = []
        outputs = []

        # If any outputs have the same name, add them together to form a single
        # output for that name
        for xform in self:
            output = xform.apply(inputs)
            name = output.name
            try:
                idx = output_names.index(name)
                outputs[idx] = outputs[idx] + output
                outputs[idx].name = name
            except ValueError:
                outputs.append(output)
                output_names.append(name)

        # Automatically attach a sensible hash (this may be overwritten, but
        # the below should be a reasonable hash in most cases)
        if inputs.hash is None or self.hash is None:
            hash_ = None
        else:
            hash_ = hash_obj((inputs.hash, self.hash))

        # TODO: what to set for map set's name, tex, etc. ?
        return MapSet(maps=outputs, hash=hash_)

Beispiel #19

0

Datei anzeigen

Datei: dummy.py Projekt: thehrh/pisa-1

    def _compute_transforms(self):
        """Compute new oscillation transforms."""
        # The seed is created from parameter values to produce different sets
        # of transforms for different sets of parameters
        seed = hash_obj(self.params.values, hash_to='int') % (2**32 - 1)
        np.random.seed(seed)

        # Read parameters in in the units used for computation, e.g.
        theta23 = self.params.theta23.m_as('rad')

        transforms = []
        for out_idx, output_name in enumerate(self.output_names):
            if out_idx < 3:
                # neutrinos (-> input names are neutrinos)
                input_names = self.input_names[0:2]
            else:
                # anti-neutrinos (-> input names are anti-neutrinos)
                input_names = self.input_names[2:4]

            # generate the "oscillation probabilities"
            xform = self.create_dummy_osc_probs()

            # create object of type `BinnedTensorTransform` and attach
            # to list of transforms with correct set of input names for the
            # output name in question
            transforms.append(
                BinnedTensorTransform(
                    input_names=input_names,
                    output_name=output_name,
                    # we have already made sure that input and output binnings
                    # are identical
                    input_binning=self.input_binning,
                    output_binning=self.output_binning,
                    xform_array=xform))

        return TransformSet(transforms=transforms)

Beispiel #20

0

Datei anzeigen

Datei: pipeline.py Projekt: atrettin/pisa

 def hash(self):
     """int : Hash of the state of the pipeline. This hashes together a hash
     of the Pipeline class's source code and a hash of the state of each
     contained stage."""
     return hash_obj([self.source_code_hash] +
                     [stage.hash for stage in self])

Beispiel #21

0

Datei anzeigen

    def _derive_outputs_hash(self):
        """Derive a hash value that unique identifies the outputs that will be
        generated based upon the current state of the stage.

        This implementation hashes together:
        * Input and output binning objects' hash values (if either input or
          output binning is not None)
        * Current params' values hash
        * Hashes from any input objects with names in `self.input_names`

        If any of the above objects is specified but returns None for its hash
        value, the entire output hash is invalidated, and None is returned.

        """
        id_objects = []

        # If stage uses inputs, grab hash from the inputs container object
        if self.outputs_cache is not None and len(self.input_names) > 0:
            inhash = self.inputs.hash
            logging.trace("inputs.hash = %s" % inhash)
            id_objects.append(inhash)

        # If stage uses transforms, get hash from the transforms
        transforms_hash = None
        if self.use_transforms:
            transforms_hash, nominal_transforms_hash = self._derive_transforms_hash()
            id_objects.append(transforms_hash)
            logging.trace("derived transforms hash = %s" % id_objects[-1])

        # Otherwise, generate sub-hash on binning and param values here
        else:
            transforms_hash, nominal_transforms_hash = None, None

            if self.outputs_cache is not None:
                id_subobjects = []
                # Include all parameter values
                id_subobjects.append(self.params.values_hash)

                # Include additional attributes of this object
                for attr in sorted(self._attrs_to_hash):
                    val = getattr(self, attr)
                    if hasattr(val, "hash"):
                        attr_hash = val.hash
                    elif self.full_hash:
                        norm_val = normQuant(val)
                        attr_hash = hash_obj(norm_val, full_hash=self.full_hash)
                    else:
                        attr_hash = hash_obj(val, full_hash=self.full_hash)
                    id_subobjects.append(attr_hash)

                # Generate the "sub-hash"
                if any([(h is None) for h in id_subobjects]):
                    sub_hash = None
                else:
                    sub_hash = hash_obj(id_subobjects, full_hash=self.full_hash)
                id_objects.append(sub_hash)

        # If any hashes are missing (i.e, None), invalidate the entire hash
        if self.outputs_cache is None or any([(h is None) for h in id_objects]):
            outputs_hash = None
        else:
            outputs_hash = hash_obj(id_objects, full_hash=self.full_hash)

        return outputs_hash, transforms_hash, nominal_transforms_hash

Beispiel #22

0

Datei anzeigen

Datei: fit.py Projekt: thehrh/pisa-1

    def reweight(self):
        """Main rewighting function."""
        this_hash = hash_obj([self.weight_hash, self.params.values_hash],
                             full_hash=self.full_hash)
        if this_hash == self.fit_hash:
            return

        fit_coeffs = self.calculate_fit_coeffs()

        sample_config = from_file(self.params['discr_sys_sample_config'].value)
        degree = int(self.params['poly_degree'].value)
        force_through_nominal = self.params['force_through_nominal'].value

        if force_through_nominal:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, [1.] + list(poly_coeffs))
        else:

            def fit_func(vals, *poly_coeffs):
                return np.polynomial.polynomial.polyval(
                    vals, list(poly_coeffs))

            # add free param for constant term
            degree += 1

        def parse(string):
            return string.replace(' ', '').split(',')

        if self.neutrinos:
            sys_list = parse(sample_config.get('neutrinos', 'sys_list'))

            for fig in self._data.keys():
                self._data[fig]['fit_weight'] = \
                    deepcopy(self._data[fig]['weight_weight'])

            for sys in sys_list:
                nominal = sample_config.get('neutrinos|' + sys, 'nominal')
                for fig in self._data.keys():
                    fit_map = unp.nominal_values(fit_coeffs[sys][fig].hist)

                    if self.params['smoothing'].value == 'gauss':
                        # TODO(shivesh): new MapSet functions?
                        for d in range(degree):
                            fit_map[..., d] = gaussian_filter(fit_map[..., d],
                                                              sigma=1)

                    shape = self.fit_binning.shape
                    transform = np.ones(shape)
                    sys_offset = self.params['nu_' +
                                             sys].value.m - float(nominal)
                    for idx in np.ndindex(shape):
                        transform[idx] *= fit_func(sys_offset, *fit_map[idx])

                    hist_idxs = self._data.digitize(
                        kinds=fig,
                        binning=self.fit_binning,
                    )

                    # Discrete systematics reweighting
                    # TODO(shivesh): speedup this
                    for idx, wght in enumerate(
                            np.nditer(self._data[fig]['fit_weight'],
                                      op_flags=['readwrite'])):
                        idx_slice = tuple(hist_idxs[idx])
                        if shape[0] == 0 or shape[1] == 0 or \
                           idx_slice[0] > shape[0] or idx_slice[1] > shape[1]:
                            # Outside binning range
                            wght *= 0
                        else:
                            wght *= transform[tuple([x - 1
                                                     for x in idx_slice])]

            for fig in self._data.keys():
                self._data[fig]['pisa_weight'] = \
                    deepcopy(self._data[fig]['fit_weight'])

        if self.muons:
            sys_list = parse(sample_config.get('muons', 'sys_list'))

            self._data['muons']['fit_weight'] = \
                deepcopy(self._data['muons']['weight_weight'])

            for sys in sys_list:
                fit_map = unp.nominal_values(fit_coeffs[sys]['muons'].hist)

                if self.params['smoothing'].value == 'gauss':
                    # TODO(shivesh): new MapSet functions?
                    for d in range(degree):
                        fit_map[..., d] = gaussian_filter(fit_map[..., d],
                                                          sigma=1)

                shape = self.fit_binning.shape
                transform = np.ones(shape)
                for idx in np.ndindex(shape):
                    transform[idx] *= fit_func(self.params['mu_' + sys].value,
                                               *fit_map[idx])

                hist_idxs = self._data.digitize(
                    kinds='muons',
                    binning=self.fit_binning,
                )

                # Discrete systematics reweighting
                for idx, wght in enumerate(self._data['muons']['fit_weight']):
                    idx_slice = tuple(hist_idxs[idx])
                    if shape[0] == 0 or shape[1] == 0 or \
                       idx_slice[0] > shape[0] or idx_slice[1] > shape[1]:
                        # Outside binning range
                        wght *= 0
                    else:
                        wght *= transform[tuple([x - 1 for x in idx_slice])]

                self._data['muons']['pisa_weight'] = \
                    deepcopy(self._data['muons']['fit_weight'])

        self.fit_hash = this_hash
        self._data.metadata['fit_hash'] = self.fit_hash
        self._data.update_hash()

Beispiel #23

0

Datei anzeigen

    def store_recursively(fhandle,
                          node,
                          path=None,
                          attrs=None,
                          node_hashes=None):
        """Function for interatively doing the work"""
        path = [] if path is None else path
        node_hashes = OrderedDict() if node_hashes is None else node_hashes
        full_path = '/' + '/'.join(path)
        if attrs is not None:
            if isinstance(attrs, OrderedDict):
                sorted_attr_keys = attrs.keys()
            else:
                sorted_attr_keys = sorted(attrs.keys())
        if isinstance(node, Mapping):
            logging.trace('  creating Group "%s"', full_path)
            try:
                dset = fhandle.create_group(full_path)
                if attrs is not None:
                    for key in sorted_attr_keys:
                        dset.attrs[key] = attrs[key]
            except ValueError:
                pass

            for key in sorted(node.keys()):
                if isinstance(key, basestring):
                    key_str = key
                else:
                    key_str = str(key)
                    logging.warn(
                        'Making string from key "%s", %s for use as'
                        ' name in HDF5 file', key_str, type(key))
                val = node[key]
                new_path = path + [key_str]
                store_recursively(fhandle=fhandle,
                                  node=val,
                                  path=new_path,
                                  node_hashes=node_hashes)
        else:
            # Check for existing node
            node_hash = hash_obj(node)
            if node_hash in node_hashes:
                logging.trace('  creating hardlink for Dataset: "%s" -> "%s"',
                              full_path, node_hashes[node_hash])
                # Hardlink the matching existing dataset
                fhandle[full_path] = fhandle[node_hashes[node_hash]]
                return
            # For now, convert None to np.nan since h5py appears to not handle
            # None
            if node is None:
                node = np.nan
                logging.warn(
                    '  encountered `None` at node "%s"; converting to'
                    ' np.nan', full_path)
            # "Scalar datasets don't support chunk/filter options". Shuffling
            # is a good idea otherwise since subsequent compression will
            # generally benefit; shuffling requires chunking. Compression is
            # not done here since it is slow, but can be done by
            # post-processing the generated file(s).
            if np.isscalar(node):
                shuffle = False
                chunks = None
            else:
                shuffle = True
                chunks = True
                # Store the node_hash for linking to later if this is more than
                # a scalar datatype. Assumed that "None" has
                node_hashes[node_hash] = full_path
            if isinstance(node, basestring):
                # TODO: Treat strings as follows? Would this break
                # compatibility with pytables/Pandas? What are benefits?
                # Leaving the following two lines out for now...

                #dtype = h5py.special_dtype(vlen=str)
                #fh.create_dataset(k,data=v,dtype=dtype)

                # ... Instead: creating length-1 array out of string; this
                # seems to be compatible with both h5py and pytables
                node = np.array(node)

            logging.trace('  creating dataset at node "%s", hash %s',
                          full_path, node_hash)
            try:
                dset = fhandle.create_dataset(name=full_path,
                                              data=node,
                                              chunks=chunks,
                                              compression=None,
                                              shuffle=shuffle,
                                              fletcher32=False)
            except TypeError:
                try:
                    shuffle = False
                    chunks = None
                    dset = fhandle.create_dataset(name=full_path,
                                                  data=node,
                                                  chunks=chunks,
                                                  compression=None,
                                                  shuffle=shuffle,
                                                  fletcher32=False)
                except:
                    logging.error('  full_path: %s', full_path)
                    logging.error('  chunks   : %s', str(chunks))
                    logging.error('  shuffle  : %s', str(shuffle))
                    logging.error('  node     : %s', str(node))
                    raise

            if attrs is not None:
                for key in sorted_attr_keys:
                    dset.attrs[key] = attrs[key]

Beispiel #24

0

Datei anzeigen

    def _compute_nominal_transforms(self):
        """Compute parameterised effective area transforms"""
        energy_param_source = self.params.aeff_energy_paramfile.value
        coszen_param_source = self.params.aeff_coszen_paramfile.value

        energy_param_hash = hash_obj(energy_param_source)
        coszen_param_hash = hash_obj(coszen_param_source)

        load_energy = False
        load_coszen = False
        if (self._param_hashes['energy'] is None
                or energy_param_hash != self._param_hashes['energy']):
            load_energy = True

        if (self.has_cz
                and (self._param_hashes['coszen'] is None
                     or energy_param_hash != self._param_hashes)):
            load_coszen = True

        if energy_param_source is None:
            raise ValueError(
                'non-None energy parameterization params.aeff_energy_paramfile'
                ' must be provided'
            )
        if not self.has_cz and coszen_param_source is not None:
            raise ValueError(
                'true_coszen dimension was not found in the binning but a'
                ' coszen parameterisation file has been provided by'
                ' `params.aeff_coszen_paramfile`.'
            )

        if not (load_energy or load_coszen):
            return

        dims = ['energy', 'coszen']
        loads = [load_energy, load_coszen]
        sources = [energy_param_source, coszen_param_source]
        hashes = [energy_param_hash, coszen_param_hash]

        for dim, load, source, hash_ in zip(dims, loads, sources, hashes):
            if not load:
                continue
            self._param_hashes[dim] = None
            self.aeff_params[dim] = None
            params = load_aeff_param(source)

            # Transform groups are implicitly defined by the contents of the
            # `pid_energy_paramfile`'s keys
            implicit_transform_groups = params.keys()

            # Make sure these match transform groups specified for the stage
            if set(implicit_transform_groups) != set(self.transform_groups):
                raise ValueError(
                    'Transform groups (%s) defined implicitly by'
                    ' %s aeff parameterizations "%s"  do not match those'
                    ' defined as the stage\'s `transform_groups` (%s).'
                    % (implicit_transform_groups, dim, source,
                       self.transform_groups)
                )

            self.aeff_params[dim] = params
            self._param_hashes[dim] = hash_

        nominal_transforms = []
        for xform_flavints in self.transform_groups:
            logging.debug('Working on %s effective areas xform',
                          xform_flavints)

            energy_param_func = self.aeff_params['energy'][xform_flavints]
            coszen_param_func = None
            if self.aeff_params['coszen'] is not None:
                coszen_param_func = self.aeff_params['coszen'][xform_flavints]

            # Now calculate the 1D aeff along energy
            aeff_vs_e = energy_param_func(self.ecen)

            # NOTE/TODO: Below is taken from the PISA 2 implementation of this.
            # Almost certainly comes from the fact that the highest knot there
            # was 79.5 GeV with the upper energy bin edge being 80 GeV. There's
            # probably something better that could be done here...

            # Correct for final energy bin, since interpolation does not
            # extend to JUST right outside the final bin
            if aeff_vs_e[-1] == 0:
                aeff_vs_e[-1] = aeff_vs_e[-2]

            if self.has_cz:
                aeff_vs_e = self.input_binning.broadcast(
                    aeff_vs_e, from_dim='true_energy', to_dims='true_coszen'
                )

                if coszen_param_func is not None:
                    aeff_vs_cz = coszen_param_func(self.czcen)
                    # Normalize
                    aeff_vs_cz *= len(aeff_vs_cz) / np.sum(aeff_vs_cz)
                else:
                    aeff_vs_cz = np.ones(shape=len(self.czcen))

                cz_broadcasted = self.input_binning.broadcast(
                    aeff_vs_cz, from_dim='true_coszen', to_dims='true_energy'
                )
                aeff_transform = aeff_vs_e * cz_broadcasted
            else:
                aeff_transform = aeff_vs_e

            nominal_transforms.extend(
                populate_transforms(
                    service=self,
                    xform_flavints=xform_flavints,
                    xform_array=aeff_transform
                )
            )

        return TransformSet(transforms=nominal_transforms)

Beispiel #25

0

Datei anzeigen

Datei: param.py Projekt: terliuk/pisa

    def _compute_transforms(self):
        """
        Generate reconstruction "smearing kernels" by reading in a set of
        parameterisation functions from a json file. This should have the same
        dimensionality as the input binning i.e. if you have energy and
        coszenith input binning then the kernels provided should have both
        energy and coszenith resolution functions.

        Any superposition of distributions from scipy.stats is supported.
        """
        res_scale_ref = self.params.res_scale_ref.value.strip().lower()
        assert res_scale_ref in ['zero']  # TODO: , 'mean', 'median']

        reco_param_source = self.params.reco_paramfile.value

        if reco_param_source is None:
            raise ValueError(
                'non-None reco parameterization params.reco_paramfile'
                ' must be provided')

        reco_param_hash = hash_obj(reco_param_source)

        if (self._reco_param_hash is None
                or reco_param_hash != self._reco_param_hash):
            reco_param = load_reco_param(reco_param_source)

            # Transform groups are implicitly defined by the contents of the
            # reco paramfile's keys
            implicit_transform_groups = reco_param.keys()

            # Make sure these match transform groups specified for the stage
            if set(implicit_transform_groups) != set(self.transform_groups):
                raise ValueError(
                    'Transform groups (%s) defined implicitly by'
                    ' %s reco parameterizations do not match those'
                    ' defined as the stage\'s `transform_groups` (%s).' %
                    (implicit_transform_groups, reco_param_source,
                     self.transform_groups))

            self.param_dict = reco_param
            self._reco_param_hash = reco_param_hash

            self.eval_dict = self.evaluate_reco_param()
            self.reco_scales_and_biases_applicable()

        # everything seems to be fine, so rescale and shift distributions
        eval_dict = self.scale_and_shift_reco_dists()

        # Computational units must be the following for compatibility with
        # events file
        comp_units = dict(true_energy='GeV',
                          true_coszen=None,
                          true_azimuth='rad',
                          reco_energy='GeV',
                          reco_coszen=None,
                          reco_azimuth='rad',
                          pid=None)

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.input_binning
        }
        out_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.output_binning
        }

        # These binnings will be in the computational units defined above
        input_binning = self.input_binning.to(**in_units)
        output_binning = self.output_binning.to(**out_units)
        en_centers_in = self.input_binning[
            'true_energy'].weighted_centers.magnitude
        en_edges_in = self.input_binning['true_energy'].bin_edges.magnitude
        cz_centers_in = self.input_binning[
            'true_coszen'].weighted_centers.magnitude
        cz_edges_in = self.input_binning['true_coszen'].bin_edges.magnitude
        en_edges_out = self.output_binning['reco_energy'].bin_edges.magnitude
        cz_edges_out = self.output_binning['reco_coszen'].bin_edges.magnitude

        n_e_in = len(en_centers_in)
        n_cz_in = len(cz_centers_in)
        n_e_out = len(en_edges_out) - 1
        n_cz_out = len(cz_edges_out) - 1

        if self.coszen_flipback:
            cz_edges_out, flipback_mask, keep = \
                self.extend_binning_for_coszen(ext_low=-3., ext_high=+3.)

        xforms = []
        for xform_flavints in self.transform_groups:
            logging.debug("Working on %s reco kernel..." % xform_flavints)

            this_params = eval_dict[xform_flavints]
            reco_kernel = np.zeros((n_e_in, n_cz_in, n_e_out, n_cz_out))

            for (i, j) in itertools.product(range(n_e_in), range(n_cz_in)):
                e_kern_cdf = self.make_cdf(bin_edges=en_edges_out,
                                           enval=en_centers_in[i],
                                           enindex=i,
                                           czval=None,
                                           czindex=j,
                                           dist_params=this_params['energy'])
                cz_kern_cdf = self.make_cdf(bin_edges=cz_edges_out,
                                            enval=en_centers_in[i],
                                            enindex=i,
                                            czval=cz_centers_in[j],
                                            czindex=j,
                                            dist_params=this_params['coszen'])

                if self.coszen_flipback:
                    cz_kern_cdf = perform_coszen_flipback(
                        cz_kern_cdf, flipback_mask, keep)

                reco_kernel[i, j] = np.outer(e_kern_cdf, cz_kern_cdf)

            # Sanity check of reco kernels - intolerable negative values?
            logging.trace(" Ensuring reco kernel sanity...")
            kern_neg_invalid = reco_kernel < -EQUALITY_PREC
            if np.any(kern_neg_invalid):
                raise ValueError("Detected intolerable negative entries in"
                                 " reco kernel! Min.: %.15e" %
                                 np.min(reco_kernel))

            # Set values numerically compatible with zero to zero
            np.where((np.abs(reco_kernel) < EQUALITY_PREC), reco_kernel, 0)
            sum_over_axes = tuple(range(-len(self.output_binning), 0))
            totals = np.sum(reco_kernel, axis=sum_over_axes)
            totals_large = totals > (1 + EQUALITY_PREC)
            if np.any(totals_large):
                raise ValueError("Detected overflow in reco kernel! Max.:"
                                 " %0.15e" % (np.max(totals)))

            if self.input_binning.basenames[0] == "coszen":
                # The reconstruction kernel has been set up with energy as its
                # first dimension, so swap axes if it is applied to an input
                # binning where 'coszen' is the first
                logging.trace(" Swapping kernel dimensions since 'coszen' has"
                              " been requested as the first.")
                reco_kernel = np.swapaxes(reco_kernel, 0, 1)
                reco_kernel = np.swapaxes(reco_kernel, 2, 3)

            if self.sum_grouped_flavints:
                xform_input_names = []
                for input_name in self.input_names:
                    if set(NuFlavIntGroup(input_name)).isdisjoint(
                            xform_flavints):
                        continue
                    xform_input_names.append(input_name)

                for output_name in self.output_names:
                    if output_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=xform_input_names,
                        output_name=output_name,
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=reco_kernel,
                        sum_inputs=self.sum_grouped_flavints)
                    xforms.append(xform)
            # If *not* combining grouped flavints:
            # Copy the transform for each input flavor, regardless if the
            # transform is computed from a combination of flavors.
            else:
                for input_name in self.input_names:
                    if set(NuFlavIntGroup(input_name)).isdisjoint(
                            xform_flavints):
                        continue
                    for output_name in self.output_names:
                        if (output_name not in NuFlavIntGroup(input_name)
                                or output_name not in xform_flavints):
                            continue
                        logging.trace('  input: %s, output: %s, xform: %s',
                                      input_name, output_name, xform_flavints)

                        xform = BinnedTensorTransform(
                            input_names=input_name,
                            output_name=output_name,
                            input_binning=self.input_binning,
                            output_binning=self.output_binning,
                            xform_array=reco_kernel,
                            sum_inputs=self.sum_grouped_flavints)
                        xforms.append(xform)

        return TransformSet(transforms=xforms)

Beispiel #26

0

Datei anzeigen

 def hash(self):
     return hash_obj([self.source_code_hash] + [p.hash for p in self])

Beispiel #27

0

Datei anzeigen

 def __hash__(self):
     return hash_obj([(sec, (self.items(sec)))
                      for sec in sorted(self.sections())])

Beispiel #28

0

Datei anzeigen

Datei: hdf.py Projekt: thehrh/pisa-1

    def store_recursively(fhandle, node, path=None, attrs=None,
                          node_hashes=None):
        """Function for iteratively doing the work"""
        path = [] if path is None else path
        full_path = '/' + '/'.join(path)
        node_hashes = OrderedDict() if node_hashes is None else node_hashes

        if attrs is None:
            sorted_attr_keys = []
        else:
            if isinstance(attrs, OrderedDict):
                sorted_attr_keys = attrs.keys()
            else:
                sorted_attr_keys = sorted(attrs.keys())

        if isinstance(node, Mapping):
            logging.trace('  creating Group "%s"', full_path)
            try:
                dset = fhandle.create_group(full_path)
                for key in sorted_attr_keys:
                    dset.attrs[key] = attrs[key]
            except ValueError:
                pass

            for key in sorted(node.keys()):
                if isinstance(key, str):
                    key_str = key
                else:
                    key_str = str(key)
                    logging.warning(
                        'Making string from key "%s", %s for use as'
                        ' name in HDF5 file', key_str, type(key)
                    )
                val = node[key]
                new_path = path + [key_str]
                store_recursively(fhandle=fhandle, node=val, path=new_path,
                                  node_hashes=node_hashes)
        else:
            # Check for existing node
            node_hash = hash_obj(node)
            if node_hash in node_hashes:
                logging.trace('  creating hardlink for Dataset: "%s" -> "%s"',
                              full_path, node_hashes[node_hash])
                # Hardlink the matching existing dataset
                fhandle[full_path] = fhandle[node_hashes[node_hash]]
                return

            # For now, convert None to np.nan since h5py appears to not handle
            # None
            if node is None:
                node = np.nan
                logging.warning(
                    '  encountered `None` at node "%s"; converting to'
                    ' np.nan', full_path
                )

            # "Scalar datasets don't support chunk/filter options". Shuffling
            # is a good idea otherwise since subsequent compression will
            # generally benefit; shuffling requires chunking. Compression is
            # not done here since it is slow, but can be done by
            # post-processing the generated file(s).
            if np.isscalar(node):
                shuffle = False
                chunks = None
            else:
                shuffle = True
                chunks = True
                # Store the node_hash for linking to later if this is more than
                # a scalar datatype. Assumed that "None" has
                node_hashes[node_hash] = full_path

            # -- Handle special types -- #

            # See h5py docs at
            #
            #   https://docs.h5py.org/en/stable/strings.html#how-to-store-text-strings
            #
            # where using `bytes` objects (i.e., in numpy, np.string_) is
            # deemed the most compatible way to encode objects, but apparently
            # we don't have pytables compatibility right now.
            #
            # For boolean support, see
            #
            #   https://docs.h5py.org/en/stable/faq.html#faq

            # TODO: make written hdf5 files compatible with pytables
            # see docs at https://www.pytables.org/usersguide/datatypes.html

            if isinstance(node, string_types):
                node = np.string_(node)
            elif isinstance(node, bool):  # includes np.bool
                node = np.bool_(node)  # same as np.bool8
            elif isinstance(node, np.ndarray):
                if issubclass(node.dtype.type, string_types):
                    node = node.astype(np.string_)
                elif node.dtype.type in (bool, np.bool):
                    node = node.astype(np.bool_)

            logging.trace('  creating dataset at path "%s", hash %s',
                          full_path, node_hash)
            try:
                dset = fhandle.create_dataset(
                    name=full_path, data=node, chunks=chunks, compression=None,
                    shuffle=shuffle, fletcher32=False
                )
            except TypeError:
                try:
                    shuffle = False
                    chunks = None
                    dset = fhandle.create_dataset(
                        name=full_path, data=node, chunks=chunks,
                        compression=None, shuffle=shuffle, fletcher32=False
                    )
                except Exception:
                    logging.error('  full_path: "%s"', full_path)
                    logging.error('  chunks   : %s', str(chunks))
                    logging.error('  shuffle  : %s', str(shuffle))
                    logging.error('  node     : "%s"', str(node))
                    raise

            for key in sorted_attr_keys:
                dset.attrs[key] = attrs[key]

Beispiel #29

0

Datei anzeigen

    def load_sample_events(self):
        """Load the event sample given the configuration file and output
        groups. Hash this object using both the configuration file and
        the output types."""
        hash_property = [self.config, self.neutrinos, self.muons, self.noise,
                         self.params['dataset'].value]
        this_hash = hash_obj(hash_property, full_hash=self.full_hash)
        if this_hash == self.sample_hash:
            return

        name = self.config.get('general', 'name')
        event_types = split(self.config.get('general', 'event_type'))

        logging.info( "Event types in data sample '%s': %s" % (name,[str(e) for e in event_types]) )

        events = []
        if self.neutrinos:
            if 'neutrinos' not in event_types:
                raise AssertionError('`neutrinos` field not found in '
                                     'configuration file.')
            dataset = self.params['dataset'].value.lower()
            if 'neutrinos' not in dataset:
                dataset = 'nominal'
            nu_data = self.load_neutrino_events(
                config=self.config, dataset=dataset
            )
            events.append(nu_data)

        if self.muons:
            if 'muons' not in event_types:
                raise AssertionError('`muons` field not found in '
                                     'configuration file.')
            dataset = self.params['dataset'].value
            if 'muons' not in dataset:
                dataset = 'nominal'
            muon_events = self.load_muon_events(
                config=self.config, dataset=dataset
            )
            events.append(muon_events)

        if self.noise:
            if 'noise' not in event_types:
                raise AssertionError('`noise` field not found in '
                                     'configuration file.')
            dataset = self.params['dataset'].value
            if 'noise' not in dataset:
                dataset = 'nominal'
            noise_events = self.load_noise_events(
                config=self.config, dataset=dataset
            )
            events.append(noise_events)
        self._data = reduce(add, events)

        #If requested, add fix the truth variable names
        if self.fix_truth_variable_names :
            for event_key in self._data.metadata["flavints_joined"] :
                for var in self.truth_variables :
                    if var in self._data[event_key] :
                        new_var = self.truth_variable_prefix + var
                        self._data[event_key][new_var] = self._data[event_key].pop(var)

        self.sample_hash = this_hash
        self._data.metadata['sample_hash'] = this_hash
        self._data.update_hash()

Beispiel #30

0

Datei anzeigen

Datei: events.py Projekt: terliuk/pisa

 def update_hash(self):
     """Update the cached hash value"""
     self._hash = hash_obj(normQuant(self.metadata))