def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride): """ This function is used for generating part indicator map for old data data_dir is the directory that you put all batch_datayes """ allfile = iu.getfilelist(data_dir, 'data_batch_\d+') meta_path = iu.fullfile(data_dir, 'batches.meta') iu.ensure_dir(save_dir) if iu.exists(meta_path, 'file'): d_meta = myio.unpickle(meta_path) if 'savedata_info' not in d_meta: d_meta['savedata_info'] = dict() d_meta['savedata_info']['indmap_para'] = dict() d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size d_meta['savedata_info']['indmap_para']['stride'] = stride d_meta['savedata_info']['indmap_para']['rate'] = rate myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta) for fn in allfile: print 'Processing %s' % fn d = myio.unpickle(iu.fullfile(data_dir, fn)) ndata = d['data'].shape[-1] nparts = 7 d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata), dtype=np.bool) for i in range(ndata): jts = d['joints8'][..., i] d['indmap'][..., i] = HMLPE.create_part_indicatormap( jts, part_idx, mdim, rate, filter_size, stride) myio.pickle(iu.fullfile(save_dir, fn), d)
def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride): """ This function is used for generating part indicator map for old data data_dir is the directory that you put all batch_datayes """ allfile = iu.getfilelist(data_dir, 'data_batch_\d+') meta_path = iu.fullfile(data_dir, 'batches.meta') iu.ensure_dir(save_dir) if iu.exists(meta_path, 'file'): d_meta = myio.unpickle(meta_path) if 'savedata_info' not in d_meta: d_meta['savedata_info'] = dict() d_meta['savedata_info']['indmap_para'] = dict() d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size d_meta['savedata_info']['indmap_para']['stride'] = stride d_meta['savedata_info']['indmap_para']['rate'] = rate myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta) for fn in allfile: print 'Processing %s' % fn d = myio.unpickle(iu.fullfile(data_dir, fn)) ndata = d['data'].shape[-1] nparts = 7 d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata), dtype=np.bool) for i in range(ndata): jts = d['joints8'][...,i] d['indmap'][...,i] = HMLPE.create_part_indicatormap(jts, part_idx, mdim, rate, filter_size, stride) myio.pickle(iu.fullfile(save_dir, fn), d)
def merge_batch_data(data_dir_list, save_dir, is_symbolic=True, batch_start_num=1): """ This function will merge all the data_batches in data_dir into one folder and rename them accordining. Of cause, meta data will be updated """ import os import shutil iu.ensure_dir(save_dir) meta = None for ddir in data_dir_list: cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta')) meta = HMLPE.merge_meta(meta, cur_meta) myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta) cur_id = batch_start_num for ddir in data_dir_list: all_file = iu.getfilelist(ddir, 'data_batch_\d+') print 'I find %d batches in %s' % (len(all_file), ddir) if is_symbolic: for fn in all_file: sn = iu.fullfile(save_dir, 'data_batch_%d' % cur_id) if iu.exists(sn, 'file'): os.remove(sn) os.symlink(iu.fullfile(ddir, fn), sn) cur_id = cur_id + 1 else: for fn in all_file: shutil.copyfile( iu.fullfile(ddir, fn), iu.fullfile(save_dir, 'data_batch_%d' % cur_id)) cur_id = cur_id + 1
def test_addmeta(metadir): metapath = iu.fullfile(metadir, 'batches.meta') d_meta = myio.unpickle(metapath) d_meta['ind_dim'] = dict() d_meta['ind_dim']['part_indmap'] = (8, 8) d_meta['ind_dim']['joint_indmap'] = (8, 8) myio.pickle(metapath, d_meta)
def test_addmeta(metadir): metapath = iu.fullfile(metadir, 'batches.meta') d_meta = myio.unpickle(metapath) d_meta['ind_dim'] = dict() d_meta['ind_dim']['part_indmap'] = (8,8) d_meta['ind_dim']['joint_indmap'] = (8,8) myio.pickle(metapath, d_meta)
def merge_batch_data(data_dir_list, save_dir, is_symbolic = True, batch_start_num = 1): """ This function will merge all the data_batches in data_dir into one folder and rename them accordining. Of cause, meta data will be updated """ import os import shutil iu.ensure_dir(save_dir) meta = None for ddir in data_dir_list: cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta')) meta = HMLPE.merge_meta(meta, cur_meta) myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta) cur_id = batch_start_num for ddir in data_dir_list: all_file = iu.getfilelist(ddir, 'data_batch_\d+') print 'I find %d batches in %s' % (len(all_file), ddir) if is_symbolic: for fn in all_file: sn = iu.fullfile(save_dir, 'data_batch_%d' % cur_id) if iu.exists(sn, 'file'): os.remove(sn) os.symlink(iu.fullfile(ddir, fn), sn) cur_id = cur_id + 1 else: for fn in all_file: shutil.copyfile(iu.fullfile(ddir, fn), iu.fullfile(save_dir, 'data_batch_%d' % cur_id)) cur_id = cur_id + 1
def shuffle_data(source_dir, target_dir, max_per_file=4000): """ This function will shuflle all the data in source_dir and save it to target_dir """ if source_dir == target_dir: raise HMLPEError('source dir can not be the same as target dir') import shutil import sys iu.ensure_dir(target_dir) shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \ iu.fullfile(target_dir, 'batches.meta')) meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta')) ndata = meta['ndata'] nbatch = (ndata - 1) / max_per_file + 1 nparts = meta['nparts'] njoints = meta['njoints'] newdim = meta['savedata_info']['newdim'] filter_size = meta['savedata_info']['indmap_para']['filter_size'] stride = meta['savedata_info']['indmap_para']['stride'] joint_filter_size = meta['savedata_info']['indmap_para'][ 'joint_filter_size'] joint_stride = meta['savedata_info']['indmap_para']['joint_stride'] mdim = HMLPE.get_indmapdim(newdim, filter_size, stride) jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride) print('There are %d data in total, I need %d batch to hold it' % (ndata, nbatch)) print 'Begin creating empty files' rest = ndata d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, max_per_file, nparts, njoints) HMLPE.adjust_savebuffer_shape(d) for b in range(nbatch): cur_n = min(max_per_file, rest) if b != nbatch - 1: saved = d else: saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, cur_n, nparts, njoints) HMLPE.adjust_savebuffer_shape(saved) myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved) rest = rest - cur_n print 'End creating' allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+') np.random.seed(7) perm = range(ndata) np.random.shuffle(perm) buf_cap = 12 # store six batch at most nround = (nbatch - 1) / buf_cap + 1 for rd in range(nround): print('Round %d of %d' % (rd, nround)) buf = dict() offset = 0 for fn in allbatchfn: print('Processing %s' % fn) d = myio.unpickle(iu.fullfile(source_dir, fn)) cur_n = d['data'].shape[-1] for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)): sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch)) sys.stdout.flush() sidx = b * max_per_file eidx = min(ndata, sidx + max_per_file) cur_idx_list = [ i for i in range(cur_n) if perm[offset + i] >= sidx and perm[offset + i] < eidx ] if len(cur_idx_list) == 0: continue if not b in buf: dsave = myio.unpickle( iu.fullfile(target_dir, 'data_batch_%d' % (b + 1))) buf[b] = dsave else: dsave = buf[b] save_idx_list = [perm[x + offset] - sidx for x in cur_idx_list] HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list) # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave) print 'Finished %s' % fn offset = offset + cur_n for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)): myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), buf[b])
def shuffle_data(source_dir, target_dir, max_per_file = 4000): """ This function will shuflle all the data in source_dir and save it to target_dir """ if source_dir == target_dir: raise HMLPEError('source dir can not be the same as target dir') import shutil import sys iu.ensure_dir( target_dir) shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \ iu.fullfile(target_dir, 'batches.meta')) meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta')) ndata = meta['ndata'] nbatch = (ndata - 1) / max_per_file + 1 nparts = meta['nparts'] njoints = meta['njoints'] newdim = meta['savedata_info']['newdim'] filter_size = meta['savedata_info']['indmap_para']['filter_size'] stride = meta['savedata_info']['indmap_para']['stride'] joint_filter_size = meta['savedata_info']['indmap_para']['joint_filter_size'] joint_stride = meta['savedata_info']['indmap_para']['joint_stride'] mdim = HMLPE.get_indmapdim(newdim, filter_size, stride) jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride) print('There are %d data in total, I need %d batch to hold it' %(ndata, nbatch)) print 'Begin creating empty files' rest = ndata d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, max_per_file, nparts, njoints) HMLPE.adjust_savebuffer_shape(d) for b in range(nbatch): cur_n = min(max_per_file, rest) if b != nbatch - 1: saved = d else: saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, cur_n, nparts, njoints) HMLPE.adjust_savebuffer_shape(saved) myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved) rest = rest - cur_n print 'End creating' allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+') np.random.seed(7) perm = range(ndata) np.random.shuffle(perm) buf_cap = 12 # store six batch at most nround = (nbatch - 1)/buf_cap + 1 for rd in range(nround): print ('Round %d of %d' % (rd,nround)) buf = dict() offset = 0 for fn in allbatchfn: print( 'Processing %s' % fn ) d = myio.unpickle(iu.fullfile(source_dir, fn)) cur_n = d['data'].shape[-1] for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)): sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch)) sys.stdout.flush() sidx = b * max_per_file eidx = min(ndata, sidx + max_per_file) cur_idx_list = [i for i in range(cur_n) if perm[offset + i] >= sidx and perm[offset + i] < eidx] if len(cur_idx_list) == 0: continue if not b in buf: dsave = myio.unpickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1))) buf[b] = dsave else: dsave = buf[b] save_idx_list = [perm[ x + offset] - sidx for x in cur_idx_list] HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list) # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave) print 'Finished %s' % fn offset = offset + cur_n for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)): myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), buf[b])