def _function_wrapper(args_package): """Allow multiprocessing Pool to call generic functions/args/kwargs. Data are saved here rather than handed back to avoid the scenario where all of the output from a batch run is held in memory. """ (identifier, directory, funcname, args, kwargs) = args_package readable = utils.readable_call(funcname, args, kwargs) filename = "%s/%s.shelve" % (directory, identifier) filename = re.sub('/+', '/', filename) print "%s -> %s" % (readable, filename) result = utils.func_exec(funcname, args, kwargs, printcall=False) outfile = shelve.open(filename, 'n', protocol=-1) outfile["identifier"] = identifier outfile["filename"] = filename outfile["funcname"] = funcname outfile["args"] = args outfile["kwargs"] = kwargs outfile["result"] = result outfile.close() return identifier
def scatter(self, *args, **kwargs): if "execute_key" not in kwargs: print "need to identify an execution key for the task" return rehashed = [_make_serializable(item) for item in args] argpkl = pickle.dumps( (self.funcname, rehashed, tuple(sorted(kwargs.items()))), -1) identifier = hashlib.sha224(argpkl).hexdigest() readable = utils.readable_call(self.funcname, rehashed, kwargs) # delete the kwarg for this function to associate an ID to the output # so it does not interfere with the function call. execute_key = kwargs["execute_key"] del kwargs["execute_key"] jobfile_name = "%s/%s.job" % (pd.job_directory, identifier) donefile_name = "%s/%s.done" % (pd.job_directory, identifier) # first remove any lurking completed jobs try: os.remove(donefile_name) os.remove(jobfile_name) except OSError: if self.verbose: print "all clear: ", donefile_name job_shelve = shelve.open(jobfile_name, "n", protocol=-1) job_shelve['funcname'] = self.funcname job_shelve['args'] = args job_shelve['kwargs'] = kwargs job_shelve['tag'] = execute_key job_shelve['identifier'] = identifier job_shelve['call'] = readable #print job_shelve job_shelve.close() self.call_stack.append(identifier)
def scatter(self, *args, **kwargs): if "execute_key" not in kwargs: print "need to identify an execution key for the task" return rehashed = [_make_serializable(item) for item in args] argpkl = pickle.dumps((self.funcname, rehashed, tuple(sorted(kwargs.items()))), -1) identifier = hashlib.sha224(argpkl).hexdigest() readable = utils.readable_call(self.funcname, rehashed, kwargs) # delete the kwarg for this function to associate an ID to the output # so it does not interfere with the function call. execute_key = kwargs["execute_key"] del kwargs["execute_key"] jobfile_name = "%s/%s.job" % (pd.job_directory, identifier) donefile_name = "%s/%s.done" % (pd.job_directory, identifier) # first remove any lurking completed jobs try: os.remove(donefile_name) os.remove(jobfile_name) except OSError: if self.verbose: print "all clear: ", donefile_name job_shelve = shelve.open(jobfile_name, "n", protocol=-1) job_shelve["funcname"] = self.funcname job_shelve["args"] = args job_shelve["kwargs"] = kwargs job_shelve["tag"] = execute_key job_shelve["identifier"] = identifier job_shelve["call"] = readable # print job_shelve job_shelve.close() self.call_stack.append(identifier)
def memoize(*args, **kwargs): funcname = func.__name__ rehashed = [_make_serializable(item) for item in args] argpkl = pickle.dumps( (funcname, rehashed, tuple(sorted(kwargs.items()))), -1) identifier = hashlib.sha224(argpkl).hexdigest() readable = utils.readable_call(funcname, rehashed, kwargs) filename = "%s/%s.shelve" % (memoize_directory, identifier) filename = re.sub('/+', '/', filename) print "%s -> %s" % (readable, filename) done_filename = filename + ".done" busy_filename = filename + ".busy" # prevent the race condition where many threads want to # start writing a new cache file at the same time time.sleep(random.uniform(0, 0.5)) # if the result is cahced or being calculated elsewhere, if os.access(done_filename, os.F_OK) or \ os.access(busy_filename, os.F_OK): printed = False # wait for the other calculation process to finish while (not os.access(done_filename, os.F_OK)): time.sleep(1.) if not printed: print "waiting for %s" % filename printed = True # if many threads were waiting to read, space their reads time.sleep(random.uniform(0, 0.5)) print "ready to read %s" % filename try: input_shelve = shelve.open(filename, "r", protocol=-1) retval = input_shelve['result'] input_shelve.close() print "used cached value %s" % filename except: raise ValueError else: # first flag the cachefile as busy so other threads wait busyfile = open(busy_filename, "w") busyfile.write("working") busyfile.close() # recalculate the function print "no cache, recalculating %s" % filename start = time.time() retval = func(*args, **kwargs) outfile = shelve.open(filename, "n", protocol=-1) outfile["signature"] = identifier outfile["filename"] = filename outfile["funcname"] = funcname outfile["args"] = rehashed outfile["kwargs"] = kwargs outfile["result"] = retval outfile.close() time.sleep(0.2) # indicate that the function is done being recalculated donefile = open(done_filename, "w") donefile.write("%10.15f\n" % (time.time() - start)) donefile.close() # remove the busy flag os.remove(busy_filename) return retval
def memoize(*args, **kwargs): funcname = func.__name__ rehashed = [_make_serializable(item) for item in args] argpkl = pickle.dumps((funcname, rehashed, tuple(sorted(kwargs.items()))), -1) identifier = hashlib.sha224(argpkl).hexdigest() readable = utils.readable_call(funcname, rehashed, kwargs) filename = "%s/%s.shelve" % (memoize_directory, identifier) filename = re.sub('/+', '/', filename) print "%s -> %s" % (readable, filename) done_filename = filename + ".done" busy_filename = filename + ".busy" # prevent the race condition where many threads want to # start writing a new cache file at the same time time.sleep(random.uniform(0, 0.5)) # if the result is cahced or being calculated elsewhere, if os.access(done_filename, os.F_OK) or \ os.access(busy_filename, os.F_OK): printed = False # wait for the other calculation process to finish while(not os.access(done_filename, os.F_OK)): time.sleep(1.) if not printed: print "waiting for %s" % filename printed = True # if many threads were waiting to read, space their reads time.sleep(random.uniform(0, 0.5)) print "ready to read %s" % filename try: input_shelve = shelve.open(filename, "r", protocol=-1) retval = input_shelve['result'] input_shelve.close() print "used cached value %s" % filename except: raise ValueError else: # first flag the cachefile as busy so other threads wait busyfile = open(busy_filename, "w") busyfile.write("working") busyfile.close() # recalculate the function print "no cache, recalculating %s" % filename start = time.time() retval = func(*args, **kwargs) outfile = shelve.open(filename, "n", protocol=-1) outfile["signature"] = identifier outfile["filename"] = filename outfile["funcname"] = funcname outfile["args"] = rehashed outfile["kwargs"] = kwargs outfile["result"] = retval outfile.close() time.sleep(0.2) # indicate that the function is done being recalculated donefile = open(done_filename, "w") donefile.write("%10.15f\n" % (time.time() - start)) donefile.close() # remove the busy flag os.remove(busy_filename) return retval