Beispiel #1
0
 def hashable_state(self):
     """OrderedDict : State of the objec that can be used for hashing"""
     state = super(BinnedTensorTransform, self).hashable_state
     state['xform_array'] = normQuant(self.nominal_values,
                                      sigfigs=HASH_SIGFIGS)
     state['error_array'] = normQuant(self.std_devs, sigfigs=HASH_SIGFIGS)
     return state
Beispiel #2
0
    def _derive_nominal_transforms_hash(self):
        """Derive a hash to uniquely identify the nominal transform. This
        should be unique across processes and invocations bacuase the nominal
        transforms can be non-volatile (cached to disk) and must still be
        valid given their hash value upon loading from disk in the future.

        This implementation uses the nominal parameter values' hash
        combined with the source code hash to generate the final nominal
        transforms hash.

        Notes
        -----
        The hashing scheme implemented here might be sufficiently unique for
        many cases, but override this method in services according to the
        following guidelines:

        * Stages that use a nominal transform should override this method if
          the hash is more accurately computed differently from here.

        * Stages that use transforms but do not use nominal transforms can
          override this method with a simpler version that simply returns None
          to save computation time (if this method is found to be a significant
          performance hit). (This method is called each time an output
          is computed if `self.use_transforms == True`.)

        * Stages that use no transforms (i.e., `self.use_transforms == False`)
          will not call any built-in methods related to transforms, so
          overriding this method is irrelevant to such stages.

        If this method *is* overridden (and not just to return None), since the
        nominal transform may be stored to a disk cache, make sure that
        `self.source_code_hash` is included in the objects used to compute the
        final hash value. Even if all parameters are the same, a nominal
        transform stored to disk is ***invalid if the source code changes***,
        and `_derive_nominal_transforms_hash` must reflect this.

        """
        id_objects = []
        id_objects.append(self.params.nominal_values_hash)
        for attr in sorted(self._attrs_to_hash):
            val = getattr(self, attr)
            if hasattr(val, "hash"):
                attr_hash = val.hash
            elif self.full_hash:
                norm_val = normQuant(val)
                attr_hash = hash_obj(norm_val, full_hash=self.full_hash)
            else:
                attr_hash = hash_obj(val, full_hash=self.full_hash)
            id_objects.append(attr_hash)
        id_objects.append(self.source_code_hash)

        # If any hashes are missing (i.e, None), invalidate the entire hash
        if any([(h is None) for h in id_objects]):
            nominal_transforms_hash = None
        else:
            nominal_transforms_hash = hash_obj(id_objects, full_hash=self.full_hash)
        return nominal_transforms_hash
Beispiel #3
0
    def _derive_transforms_hash(self, nominal_transforms_hash=None):
        """Compute a hash that uniquely identifies the transforms that will be
        produced from the current configuration. Note that this hash needs only
        to be valid for this run (i.e., it is a volatile hash).

        This implementation returns a hash from the current parameters' values.

        """
        id_objects = []
        h = self.params.values_hash
        logging.trace("self.params.values_hash = %s" % h)
        id_objects.append(h)

        # Grab any provided nominal transforms hash, or derive it again
        if nominal_transforms_hash is None:
            nominal_transforms_hash = self._derive_nominal_transforms_hash()
        # If a valid hash has been gotten, include it
        if nominal_transforms_hash is not None:
            id_objects.append(nominal_transforms_hash)

        for attr in sorted(self._attrs_to_hash):
            val = getattr(self, attr)
            if hasattr(val, "hash"):
                attr_hash = val.hash
            elif self.full_hash:
                norm_val = normQuant(val)
                attr_hash = hash_obj(norm_val, full_hash=self.full_hash)
            else:
                attr_hash = hash_obj(val, full_hash=self.full_hash)
            id_objects.append(attr_hash)

        # If any hashes are missing (i.e, None), invalidate the entire hash
        if any([(h is None) for h in id_objects]):
            transforms_hash = None
        else:
            transforms_hash = hash_obj(id_objects, full_hash=self.full_hash)

        return transforms_hash, nominal_transforms_hash
Beispiel #4
0
    def _derive_outputs_hash(self):
        """Derive a hash value that unique identifies the outputs that will be
        generated based upon the current state of the stage.

        This implementation hashes together:
        * Input and output binning objects' hash values (if either input or
          output binning is not None)
        * Current params' values hash
        * Hashes from any input objects with names in `self.input_names`

        If any of the above objects is specified but returns None for its hash
        value, the entire output hash is invalidated, and None is returned.

        """
        id_objects = []

        # If stage uses inputs, grab hash from the inputs container object
        if self.outputs_cache is not None and len(self.input_names) > 0:
            inhash = self.inputs.hash
            logging.trace("inputs.hash = %s" % inhash)
            id_objects.append(inhash)

        # If stage uses transforms, get hash from the transforms
        transforms_hash = None
        if self.use_transforms:
            transforms_hash, nominal_transforms_hash = self._derive_transforms_hash()
            id_objects.append(transforms_hash)
            logging.trace("derived transforms hash = %s" % id_objects[-1])

        # Otherwise, generate sub-hash on binning and param values here
        else:
            transforms_hash, nominal_transforms_hash = None, None

            if self.outputs_cache is not None:
                id_subobjects = []
                # Include all parameter values
                id_subobjects.append(self.params.values_hash)

                # Include additional attributes of this object
                for attr in sorted(self._attrs_to_hash):
                    val = getattr(self, attr)
                    if hasattr(val, "hash"):
                        attr_hash = val.hash
                    elif self.full_hash:
                        norm_val = normQuant(val)
                        attr_hash = hash_obj(norm_val, full_hash=self.full_hash)
                    else:
                        attr_hash = hash_obj(val, full_hash=self.full_hash)
                    id_subobjects.append(attr_hash)

                # Generate the "sub-hash"
                if any([(h is None) for h in id_subobjects]):
                    sub_hash = None
                else:
                    sub_hash = hash_obj(id_subobjects, full_hash=self.full_hash)
                id_objects.append(sub_hash)

        # If any hashes are missing (i.e, None), invalidate the entire hash
        if self.outputs_cache is None or any([(h is None) for h in id_objects]):
            outputs_hash = None
        else:
            outputs_hash = hash_obj(id_objects, full_hash=self.full_hash)

        return outputs_hash, transforms_hash, nominal_transforms_hash
Beispiel #5
0
 def update_hash(self):
     """Update the cached hash value"""
     self._hash = hash_obj(normQuant(self.metadata))