def process_key(key): dkey = as_deterministic(key) keystr = encode(dkey) keyhash = hash_str_to_u64(keystr) return keystr, keyhash
def hashing(obj): return hashlib.sha256(pickling(as_deterministic(obj))).digest()
def __call__(self, *args, **kwargs): """ look up a hierachical key object fill in the missing parts, and perform the computation at the leaf if so required """ if self.hierarchy: #apply the structure in hierarchy to the arguments fkey = kwargs.copy() fkey.update(enumerate(args)) hkey = [[fkey.pop(a) for a in level] for level in self.hierarchy] if fkey: hkey.append(fkey) #any arguments not part of the hierarchy spec are placed at the end else: hkey = [args + ((kwargs,) if kwargs else ())] #put all args in a single key #preprocess subkeys. this minimizes time spent in locked state hkey = map(as_deterministic, hkey) with self.lock: #fairly stupid thread locking. dont use threads; how about that? while True: try: with self.lock_file: #hierarchical key lookup; first key is prebound environment key previouskey = Partial(self.envrowid) for ikey, subkey in enumerate(hkey[:-1]): partialkey = previouskey, subkey rowid = self.shelve.getrowid(partialkey, *process_key(partialkey)) #read lock? previouskey = Partial(rowid) #leaf iteration ikey = len(hkey)-1 leafkey = previouskey, hkey[-1] value = self.shelve[leafkey] #read lock? if isinstance(value, Deferred): if value.expired(self.deferred_timeout): raise Exception() sleep(0.01) else: if self.validate: #check if recomputed value is identical under deterministic serialization newvalue = self.operation(*args, **kwargs) try: #note; new may differ from old in case aliasing in an ndarray was erased #by original serialization. is this an error? #id say so; depending on wether we have a cache hit, downstream code may react diffently #perhaps its best to use custom serializating for values too assert(as_deterministic(value)==as_deterministic(newvalue)) except: print 'Cache returned invalid value!' print 'arguments:' print args print kwargs print 'cached value' print value print 'recomputed value' print newvalue quit() #yes! hitting this return is what we are doing this all for! return value except: #lock for the writing branch. multiprocess does not benefit here, but so be it. #worst case we make multiple insertions into db, but this should do no harm for behavior if self.lock_file.is_locked(): #if lock not available, better to go back to waiting for a deferred to appear sleep(0.001) else: with self.lock_file: #hierarchical key insertion for subkey in hkey[ikey:-1]: partialkey = previouskey, subkey kstr, khash = process_key(partialkey) self.shelve.setitem(partialkey, None, kstr, khash) #wite lock rowid = self.shelve.getrowid(partialkey, kstr, khash) #read lock previouskey = Partial(rowid) #insert leaf node leafkey = previouskey, hkey[-1] kstr, khash = process_key(leafkey) self.shelve.setitem(leafkey, Deferred(), kstr, khash) #write lock #dont need lock while doing expensive things value = self.operation(*args, **kwargs) with self.lock_file: self.shelve.setitem(leafkey, value , kstr, khash) #write lock return value