Esempio n. 1
0
    def get_hashval(self, hash_method=None):
        """Return a dictionary of our items with hashes for each file.

        Searches through dictionary items and if an item is a file, it
        calculates the md5 hash of the file contents and stores the
        file name and hash value as the new key value.

        However, the overall bunch hash is calculated only on the hash
        value of a file. The path and name of the file are not used in
        the overall hash calculation.

        Returns
        -------
        dict_withhash : dict
            Copy of our dictionary with the new file hashes included
            with each file.
        hashvalue : str
            The md5 hash value of the traited spec

        """

        dict_withhash = {}
        dict_nofilename = {}
        for name, val in sorted(self.get().items()):
            if isdefined(val):
                trait = self.trait(name)
                hash_files = not has_metadata(trait.trait_type, "hash_files", False)
                dict_nofilename[name] = self._get_sorteddict(val, hash_method=hash_method, hash_files=hash_files)
                dict_withhash[name] = self._get_sorteddict(val, True, hash_method=hash_method, hash_files=hash_files)
        return (dict_withhash, md5(str(dict_nofilename)).hexdigest())
Esempio n. 2
0
    def _get_hashval(self):
        """Return a hash of the input state"""
        self._get_inputs()
        if self._hashvalue is None and self._hashed_inputs is None:

            inputs = copy.deepcopy(self._interface.inputs)
            for f in self.ignore_cache:
                try:
                    delattr(inputs, f)
                except:
                    pass

            self._hashed_inputs, self._hashvalue = inputs.get_hashval(
                hash_method=self.config['execution']['hash_method'])

            rm_extra = self.config['execution']['remove_unnecessary_outputs']
            if str2bool(rm_extra) and self.needed_outputs:
                hashobject = md5()
                hashobject.update(self._hashvalue.encode())
                hashobject.update(str(self.needed_outputs).encode())
                self._hashvalue = hashobject.hexdigest()
                self._hashed_inputs.append(
                    ('needed_outputs', self.needed_outputs))

        return self._hashed_inputs, self._hashvalue
Esempio n. 3
0
    def _get_bunch_hash(self):
        """Return a dictionary of our items with hashes for each file.

        Searches through dictionary items and if an item is a file, it
        calculates the md5 hash of the file contents and stores the
        file name and hash value as the new key value.

        However, the overall bunch hash is calculated only on the hash
        value of a file. The path and name of the file are not used in
        the overall hash calculation.

        Returns
        -------
        dict_withhash : dict
            Copy of our dictionary with the new file hashes included
            with each file.
        hashvalue : str
            The md5 hash value of the `dict_withhash`

        """

        infile_list = []
        for key, val in self.items():
            if is_container(val):
                # XXX - SG this probably doesn't catch numpy arrays
                # containing embedded file names either.
                if isinstance(val, dict):
                    # XXX - SG should traverse dicts, but ignoring for now
                    item = None
                else:
                    if len(val) == 0:
                        raise AttributeError("%s attribute is empty" % key)
                    item = val[0]
            else:
                item = val
            try:
                if os.path.isfile(item):
                    infile_list.append(key)
            except TypeError:
                # `item` is not a file or string.
                continue
        dict_withhash = self.dictcopy()
        dict_nofilename = self.dictcopy()
        for item in infile_list:
            dict_withhash[item] = self._hash_infile(dict_withhash, item)
            dict_nofilename[item] = [val[1] for val in dict_withhash[item]]
        # Sort the items of the dictionary, before hashing the string
        # representation so we get a predictable order of the
        # dictionary.
        sorted_dict = str(sorted(dict_nofilename.items()))
        return (dict_withhash, md5(sorted_dict).hexdigest())
Esempio n. 4
0
 def _hash_infile(self, adict, key):
     # Inject file hashes into adict[key]
     stuff = adict[key]
     if not is_container(stuff):
         stuff = [stuff]
     file_list = []
     for afile in stuff:
         if os.path.isfile(afile):
             md5obj = md5()
             fp = file(afile, "rb")
             while True:
                 data = fp.read(8192)
                 if not data:
                     break
                 md5obj.update(data)
             fp.close()
             md5hex = md5obj.hexdigest()
         else:
             md5hex = None
         file_list.append((afile, md5hex))
     return file_list
Esempio n. 5
0
    def _get_hashval(self):
        """Compute hash including iterfield lists."""
        self._get_inputs()

        if self._hashvalue is not None and self._hashed_inputs is not None:
            return self._hashed_inputs, self._hashvalue

        self._check_iterfield()
        hashinputs = copy.deepcopy(self._interface.inputs)
        for name in self.iterfield:
            hashinputs.remove_trait(name)
            hashinputs.add_trait(
                name,
                InputMultiPath(
                    self._interface.inputs.traits()[name].trait_type))
            logger.debug('setting hashinput %s-> %s', name,
                         getattr(self._inputs, name))
            if self.nested:
                setattr(hashinputs, name, flatten(getattr(self._inputs, name)))
            else:
                setattr(hashinputs, name, getattr(self._inputs, name))

        for f in self.ignore_cache:
            try:
                delattr(hashinputs, f)
            except:
                pass

        hashed_inputs, hashvalue = hashinputs.get_hashval(
            hash_method=self.config['execution']['hash_method'])
        rm_extra = self.config['execution']['remove_unnecessary_outputs']
        if str2bool(rm_extra) and self.needed_outputs:
            hashobject = md5()
            hashobject.update(hashvalue.encode())
            sorted_outputs = sorted(self.needed_outputs)
            hashobject.update(str(sorted_outputs).encode())
            hashvalue = hashobject.hexdigest()
            hashed_inputs.append(('needed_outputs', sorted_outputs))
        self._hashed_inputs, self._hashvalue = hashed_inputs, hashvalue
        return self._hashed_inputs, self._hashvalue
Esempio n. 6
0
    def hashval(self):
        """Return a dictionary of our items with hashes for each file.

        Searches through dictionary items and if an item is a file, it
        calculates the md5 hash of the file contents and stores the
        file name and hash value as the new key value.

        However, the overall bunch hash is calculated only on the hash
        value of a file. The path and name of the file are not used in
        the overall hash calculation.

        Returns
        -------
        dict_withhash : dict
            Copy of our dictionary with the new file hashes included
            with each file.
        hashvalue : str
            The md5 hash value of the traited spec

        """
        dict_withhash = self._get_sorteddict(self.get(),True)
        dict_nofilename = self._get_sorteddict(self.get())
        return (dict_withhash, md5(str(dict_nofilename)).hexdigest())