def get_hashval(self, hash_method=None): """Return a dictionary of our items with hashes for each file. Searches through dictionary items and if an item is a file, it calculates the md5 hash of the file contents and stores the file name and hash value as the new key value. However, the overall bunch hash is calculated only on the hash value of a file. The path and name of the file are not used in the overall hash calculation. Returns ------- dict_withhash : dict Copy of our dictionary with the new file hashes included with each file. hashvalue : str The md5 hash value of the traited spec """ dict_withhash = {} dict_nofilename = {} for name, val in sorted(self.get().items()): if isdefined(val): trait = self.trait(name) hash_files = not has_metadata(trait.trait_type, "hash_files", False) dict_nofilename[name] = self._get_sorteddict(val, hash_method=hash_method, hash_files=hash_files) dict_withhash[name] = self._get_sorteddict(val, True, hash_method=hash_method, hash_files=hash_files) return (dict_withhash, md5(str(dict_nofilename)).hexdigest())
def _get_hashval(self): """Return a hash of the input state""" self._get_inputs() if self._hashvalue is None and self._hashed_inputs is None: inputs = copy.deepcopy(self._interface.inputs) for f in self.ignore_cache: try: delattr(inputs, f) except: pass self._hashed_inputs, self._hashvalue = inputs.get_hashval( hash_method=self.config['execution']['hash_method']) rm_extra = self.config['execution']['remove_unnecessary_outputs'] if str2bool(rm_extra) and self.needed_outputs: hashobject = md5() hashobject.update(self._hashvalue.encode()) hashobject.update(str(self.needed_outputs).encode()) self._hashvalue = hashobject.hexdigest() self._hashed_inputs.append( ('needed_outputs', self.needed_outputs)) return self._hashed_inputs, self._hashvalue
def _get_bunch_hash(self): """Return a dictionary of our items with hashes for each file. Searches through dictionary items and if an item is a file, it calculates the md5 hash of the file contents and stores the file name and hash value as the new key value. However, the overall bunch hash is calculated only on the hash value of a file. The path and name of the file are not used in the overall hash calculation. Returns ------- dict_withhash : dict Copy of our dictionary with the new file hashes included with each file. hashvalue : str The md5 hash value of the `dict_withhash` """ infile_list = [] for key, val in self.items(): if is_container(val): # XXX - SG this probably doesn't catch numpy arrays # containing embedded file names either. if isinstance(val, dict): # XXX - SG should traverse dicts, but ignoring for now item = None else: if len(val) == 0: raise AttributeError("%s attribute is empty" % key) item = val[0] else: item = val try: if os.path.isfile(item): infile_list.append(key) except TypeError: # `item` is not a file or string. continue dict_withhash = self.dictcopy() dict_nofilename = self.dictcopy() for item in infile_list: dict_withhash[item] = self._hash_infile(dict_withhash, item) dict_nofilename[item] = [val[1] for val in dict_withhash[item]] # Sort the items of the dictionary, before hashing the string # representation so we get a predictable order of the # dictionary. sorted_dict = str(sorted(dict_nofilename.items())) return (dict_withhash, md5(sorted_dict).hexdigest())
def _hash_infile(self, adict, key): # Inject file hashes into adict[key] stuff = adict[key] if not is_container(stuff): stuff = [stuff] file_list = [] for afile in stuff: if os.path.isfile(afile): md5obj = md5() fp = file(afile, "rb") while True: data = fp.read(8192) if not data: break md5obj.update(data) fp.close() md5hex = md5obj.hexdigest() else: md5hex = None file_list.append((afile, md5hex)) return file_list
def _get_hashval(self): """Compute hash including iterfield lists.""" self._get_inputs() if self._hashvalue is not None and self._hashed_inputs is not None: return self._hashed_inputs, self._hashvalue self._check_iterfield() hashinputs = copy.deepcopy(self._interface.inputs) for name in self.iterfield: hashinputs.remove_trait(name) hashinputs.add_trait( name, InputMultiPath( self._interface.inputs.traits()[name].trait_type)) logger.debug('setting hashinput %s-> %s', name, getattr(self._inputs, name)) if self.nested: setattr(hashinputs, name, flatten(getattr(self._inputs, name))) else: setattr(hashinputs, name, getattr(self._inputs, name)) for f in self.ignore_cache: try: delattr(hashinputs, f) except: pass hashed_inputs, hashvalue = hashinputs.get_hashval( hash_method=self.config['execution']['hash_method']) rm_extra = self.config['execution']['remove_unnecessary_outputs'] if str2bool(rm_extra) and self.needed_outputs: hashobject = md5() hashobject.update(hashvalue.encode()) sorted_outputs = sorted(self.needed_outputs) hashobject.update(str(sorted_outputs).encode()) hashvalue = hashobject.hexdigest() hashed_inputs.append(('needed_outputs', sorted_outputs)) self._hashed_inputs, self._hashvalue = hashed_inputs, hashvalue return self._hashed_inputs, self._hashvalue
def hashval(self): """Return a dictionary of our items with hashes for each file. Searches through dictionary items and if an item is a file, it calculates the md5 hash of the file contents and stores the file name and hash value as the new key value. However, the overall bunch hash is calculated only on the hash value of a file. The path and name of the file are not used in the overall hash calculation. Returns ------- dict_withhash : dict Copy of our dictionary with the new file hashes included with each file. hashvalue : str The md5 hash value of the traited spec """ dict_withhash = self._get_sorteddict(self.get(),True) dict_nofilename = self._get_sorteddict(self.get()) return (dict_withhash, md5(str(dict_nofilename)).hexdigest())