def memoize(fn, paths): cwd = os.getcwd() def change_to_target_dir(): for dir in paths[:-1]: try: os.mkdir(dir) except OSError as e: pass os.chdir(dir) change_to_target_dir() filename = paths[-1] if os.path.exists(filename): data = hdf5.read(filename) os.chdir(cwd) return data os.chdir(cwd) data = fn() change_to_target_dir() tmp = '%s.pid.%d.tmp' % (filename, os.getpid()) hdf5.write(tmp, data) os.rename(tmp, filename) os.chdir(cwd) return jsdict(data)
def process_and_merge_segments(target, data_type, out_dir, metadata, N_jobs): filename_out = os.path.join(out_dir, '%s_%s.hdf5' % (target, data_type)) if os.path.exists(filename_out): return 0 print 'Processing %s ...' % filename_out filename_in_fmt = '%s_%s_segment_%%.4d' % (target, data_type) filename_out_fmt = '%s/%s_%s_segment_%%d.hdf5' % (out_dir, target, data_type) # process_data_sub_job(settings, filename_in_fmt, filename_out_fmt, 0, 1) pool = Pool(N_jobs) results = [pool.apply_async(process_data_sub_job, [settings, filename_in_fmt, filename_out_fmt, id, N_jobs]) for id in range(N_jobs)] pool.close() pool.join() num_processed = np.sum([r.get() for r in results]) for i in xrange(num_processed): data = hdf5.read(filename_out_fmt % i) collect_metadata(data, metadata) _, accum_meta = accumulate_data(settings, target, data_type, tag=None, output_to_original_data_dir=True, quiet=True) return accum_meta.num_segments
def process_and_merge_segments(target, data_type, out_dir, metadata, N_jobs): filename_out = os.path.join(out_dir, '%s_%s.hdf5' % (target, data_type)) if os.path.exists(filename_out): return 0 print('Processing %s ...' % filename_out) filename_in_fmt = '%s_%s_segment_%%.4d' % (target, data_type) filename_out_fmt = '%s/%s_%s_segment_%%d.hdf5' % (out_dir, target, data_type) # process_data_sub_job(settings, filename_in_fmt, filename_out_fmt, 0, 1) pool = Pool(N_jobs) results = [ pool.apply_async( process_data_sub_job, [settings, filename_in_fmt, filename_out_fmt, id, N_jobs]) for id in range(N_jobs) ] pool.close() pool.join() num_processed = np.sum([r.get() for r in results]) for i in range(num_processed): data = hdf5.read(filename_out_fmt % i) collect_metadata(data, metadata) _, accum_meta = accumulate_data(settings, target, data_type, tag=None, output_to_original_data_dir=True, quiet=True) return accum_meta.num_segments
# features in FeatureConcatPipeline. def memoize(fn, paths): cwd = os.getcwd() def change_to_target_dir(): for dir in paths[:-1]: try: os.mkdir(dir) except OSError, e: pass os.chdir(dir) change_to_target_dir() filename = paths[-1] if os.path.exists(filename): data = hdf5.read(filename) os.chdir(cwd) return data os.chdir(cwd) data = fn() change_to_target_dir() tmp = '%s.pid.%d.tmp' % (filename, os.getpid()) hdf5.write(tmp, data) os.rename(tmp, filename) os.chdir(cwd) return jsdict(data) # Fast process-if-not-yet-processed method for training data