def collect_feature_meta(folder, re_exp='batch_feature_\w+$'): allfile = sorted(iu.getfilelist(folder, re_exp), key=lambda x:extract_batch_num(x)) feature_list_lst = [] feature_dim = None indexes_lst = [] feature_names = None if len(allfile) == 0: return dict() for f in allfile: print f p = iu.fullfile(folder, f) d = mio.unpickle(p) feature_list_lst += [d['feature_list']] if feature_dim: if feature_dim!= d['feature_dim']: raise Exception('feature dim inconsistent') else: feature_dim = d['feature_dim'] indexes_lst += [d['info']['indexes']] indexes = np.concatenate(indexes_lst) n_feature, n_batch = len(feature_dim), len(allfile) feature_list = [np.concatenate([feature_list_lst[i][k] for i in range(n_batch)], axis=-1) for k in range(n_feature)] return {'feature_list':feature_list, 'feature_dim':feature_dim, 'info':{'indexes':indexes, 'feature_names':d['info']['feature_names']}}
def MakeDataFromImages(imgdir, max_per_batch , save_dir = None, save_name=None): import iutils as iu import iconvnet_datacvt as icvt from PIL import Image if max_per_batch == 0: raise CifarError('max_per_batch can''t not be zero') allfiles = iu.getfilelist(imgdir, '.*jpg|.*bmp|.*png$') ndata = len(allfiles) iu.ensure_dir(save_dir) d = PrepareData(min(max_per_batch, ndata)) j = 0 if save_name is None: save_name = 'data_batch' bid = 1 for i,fn in enumerate(allfiles): if j == max_per_batch: j = 0 if not save_dir is None: icvt.ut.pickle(iu.fullfile(save_dir, save_name + '_' + str(bid)), d) bid = bid + 1 if ndata - i < max_per_batch: d = PrepareData(ndata-i) fp = iu.fullfile(imgdir, fn) img = iu.imgproc.ensure_rgb(np.asarray(Image.open(fp))) img = Image.fromarray(img).resize((img_size[0],img_size[1])) arr_img = np.asarray(img).reshape((dim_data), order='F') d['data'][...,j] = arr_img j = j + 1 if not save_dir is None: icvt.ut.pickle(iu.fullfile(save_dir, save_name + '_' + str(bid)), d)
def MakeDataFromImages(imgdir, max_per_batch, save_dir=None, save_name=None): import iutils as iu import iconvnet_datacvt as icvt from PIL import Image if max_per_batch == 0: raise CifarError('max_per_batch can' 't not be zero') allfiles = iu.getfilelist(imgdir, '.*jpg|.*bmp|.*png$') ndata = len(allfiles) iu.ensure_dir(save_dir) d = PrepareData(min(max_per_batch, ndata)) j = 0 if save_name is None: save_name = 'data_batch' bid = 1 for i, fn in enumerate(allfiles): if j == max_per_batch: j = 0 if not save_dir is None: icvt.ut.pickle( iu.fullfile(save_dir, save_name + '_' + str(bid)), d) bid = bid + 1 if ndata - i < max_per_batch: d = PrepareData(ndata - i) fp = iu.fullfile(imgdir, fn) img = iu.imgproc.ensure_rgb(np.asarray(Image.open(fp))) img = Image.fromarray(img).resize((img_size[0], img_size[1])) arr_img = np.asarray(img).reshape((dim_data), order='F') d['data'][..., j] = arr_img j = j + 1 if not save_dir is None: icvt.ut.pickle(iu.fullfile(save_dir, save_name + '_' + str(bid)), d)
def merge_batch_data(data_dir_list, save_dir, is_symbolic = True, batch_start_num = 1): """ This function will merge all the data_batches in data_dir into one folder and rename them accordining. Of cause, meta data will be updated """ import os import shutil iu.ensure_dir(save_dir) meta = None for ddir in data_dir_list: cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta')) meta = HMLPE.merge_meta(meta, cur_meta) myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta) cur_id = batch_start_num for ddir in data_dir_list: all_file = iu.getfilelist(ddir, 'data_batch_\d+') print 'I find %d batches in %s' % (len(all_file), ddir) if is_symbolic: for fn in all_file: sn = iu.fullfile(save_dir, 'data_batch_%d' % cur_id) if iu.exists(sn, 'file'): os.remove(sn) os.symlink(iu.fullfile(ddir, fn), sn) cur_id = cur_id + 1 else: for fn in all_file: shutil.copyfile(iu.fullfile(ddir, fn), iu.fullfile(save_dir, 'data_batch_%d' % cur_id)) cur_id = cur_id + 1
def generate_positive_data(self, generate_type, allfile=None): """ generate_type = 'rt': random translation 'ct' center block """ if allfile is None: allfile = iu.getfilelist(self.imgdata_info['imgdatapath'], '\w+\.mat') print 'imgdatapath=%s, %d files are found' % ( self.imgdata_info['imgdatapath'], len(allfile)) iu.ensure_dir(self.savedata_info['savedir']) self.batch_id = self.savedata_info['start_patch_id'] self.init_meta(generate_type) print self.meta np.random.seed(7) for fn in allfile: print 'Processing %s ' % fn mpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn) self.generate_positive_data_from_mat(generate_type, iu.fullfile(mpath)) if self.meta['ndata'] > 0: self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata'] self.meta['data_mean'] = self.meta['data_mean'].reshape((-1, 1)) else: self.meta['data_mean'] = 0 del self.meta['data_sum'] myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride): """ This function is used for generating part indicator map for old data data_dir is the directory that you put all batch_datayes """ allfile = iu.getfilelist(data_dir, 'data_batch_\d+') meta_path = iu.fullfile(data_dir, 'batches.meta') iu.ensure_dir(save_dir) if iu.exists(meta_path, 'file'): d_meta = myio.unpickle(meta_path) if 'savedata_info' not in d_meta: d_meta['savedata_info'] = dict() d_meta['savedata_info']['indmap_para'] = dict() d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size d_meta['savedata_info']['indmap_para']['stride'] = stride d_meta['savedata_info']['indmap_para']['rate'] = rate myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta) for fn in allfile: print 'Processing %s' % fn d = myio.unpickle(iu.fullfile(data_dir, fn)) ndata = d['data'].shape[-1] nparts = 7 d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata), dtype=np.bool) for i in range(ndata): jts = d['joints8'][...,i] d['indmap'][...,i] = HMLPE.create_part_indicatormap(jts, part_idx, mdim, rate, filter_size, stride) myio.pickle(iu.fullfile(save_dir, fn), d)
def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride): """ This function is used for generating part indicator map for old data data_dir is the directory that you put all batch_datayes """ allfile = iu.getfilelist(data_dir, 'data_batch_\d+') meta_path = iu.fullfile(data_dir, 'batches.meta') iu.ensure_dir(save_dir) if iu.exists(meta_path, 'file'): d_meta = myio.unpickle(meta_path) if 'savedata_info' not in d_meta: d_meta['savedata_info'] = dict() d_meta['savedata_info']['indmap_para'] = dict() d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size d_meta['savedata_info']['indmap_para']['stride'] = stride d_meta['savedata_info']['indmap_para']['rate'] = rate myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta) for fn in allfile: print 'Processing %s' % fn d = myio.unpickle(iu.fullfile(data_dir, fn)) ndata = d['data'].shape[-1] nparts = 7 d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata), dtype=np.bool) for i in range(ndata): jts = d['joints8'][..., i] d['indmap'][..., i] = HMLPE.create_part_indicatormap( jts, part_idx, mdim, rate, filter_size, stride) myio.pickle(iu.fullfile(save_dir, fn), d)
def merge_batch_data(data_dir_list, save_dir, is_symbolic=True, batch_start_num=1): """ This function will merge all the data_batches in data_dir into one folder and rename them accordining. Of cause, meta data will be updated """ import os import shutil iu.ensure_dir(save_dir) meta = None for ddir in data_dir_list: cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta')) meta = HMLPE.merge_meta(meta, cur_meta) myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta) cur_id = batch_start_num for ddir in data_dir_list: all_file = iu.getfilelist(ddir, 'data_batch_\d+') print 'I find %d batches in %s' % (len(all_file), ddir) if is_symbolic: for fn in all_file: sn = iu.fullfile(save_dir, 'data_batch_%d' % cur_id) if iu.exists(sn, 'file'): os.remove(sn) os.symlink(iu.fullfile(ddir, fn), sn) cur_id = cur_id + 1 else: for fn in all_file: shutil.copyfile( iu.fullfile(ddir, fn), iu.fullfile(save_dir, 'data_batch_%d' % cur_id)) cur_id = cur_id + 1
def collect_feature(folder, item, re_exp='batch_feature_\w+$'): allfile = sorted(iu.getfilelist(folder, re_exp), key=lambda x:extract_batch_num(x)) l = [] for f in allfile: p = iu.fullfile(folder, f) d = mio.unpickle(p) l = l + [d[item]] return np.concatenate(l, axis=1)
def __init__(self, imgdir): self.Image = __import__('Image') self.imgdir = imgdir self.cur_idx = -1 self.images_path = [iu.fullfile(imgdir, x) for x in \ sorted(iu.getfilelist(imgdir, '.*\.(jpg|png)'))] if len(self.images_path) == 0: raise DemoError('I cannot find image uder %s ' % self.images_path) print 'I got %d images' % len(self.images_path) ICameraBasic.__init__(self)
def do_accveval(self): images_folder = self.op.get_value('images_folder') # get all jpg file in images_folder allfiles = iu.getfilelist(images_folder, '.*\.jpg') images_path = [iu.fullfile(images_folder, p) for p in allfiles] n_image = len(images_path) images = self.load_images(images_path) mean_image_path = self.op.get_value('mean_image_path') mean_image = sio.loadmat(mean_image_path)['cropped_mean_image'] mean_image_arr = mean_image.reshape((-1, 1), order='F') input_images = images - mean_image_arr # pack input images into batch data data = [ input_images, np.zeros((51, n_image), dtype=np.single), np.zeros((1700, n_image), dtype=np.single) ] # allocate the buffer for prediction pred_buffer = np.zeros((n_image, 51), dtype=np.single) data.append(pred_buffer) ext_data = [ np.require(elem, dtype=np.single, requirements='C') for elem in data ] # run the model ## get the joint prediction layer indexes self.pred_layer_idx = self.get_layer_idx('fc_j2', check_type='fc') self.libmodel.startFeatureWriter(ext_data, self.pred_layer_idx) self.finish_batch() raw_pred = ext_data[-1].T pred = dhmlpe_features.convert_relskel2rel(raw_pred) * 1200.0 # show the first prediction show_idx = 0 img = np.array(Image.open(images_path[show_idx])) fig = pl.figure(0) ax1 = fig.add_subplot(121) ax1.imshow(img) ax2 = fig.add_subplot(122, projection='3d') cur_pred = pred[..., show_idx].reshape((3, -1), order='F') part_idx = iread.h36m_hmlpe.part_idx params = {'elev': -94, 'azim': -86, 'linewidth': 6, 'order': 'z'} dutils.show_3d_skeleton(cur_pred.T, part_idx, params)
def do_accveval(self): images_folder = self.op.get_value('images_folder') # get all jpg file in images_folder allfiles = iu.getfilelist(images_folder, '.*\.jpg') images_path = [iu.fullfile(images_folder, p) for p in allfiles] n_image = len(images_path) images = self.load_images(images_path) mean_image_path = self.op.get_value('mean_image_path') mean_image = sio.loadmat(mean_image_path)['cropped_mean_image'] mean_image_arr = mean_image.reshape((-1,1),order='F') input_images = images - mean_image_arr # pack input images into batch data data = [input_images, np.zeros((51,n_image),dtype=np.single), np.zeros((1700,n_image), dtype=np.single)] # allocate the buffer for prediction pred_buffer = np.zeros((n_image, 51),dtype=np.single) data.append(pred_buffer) ext_data = [np.require(elem,dtype=np.single, requirements='C') for elem in data] # run the model ## get the joint prediction layer indexes self.pred_layer_idx = self.get_layer_idx('fc_j2',check_type='fc') self.libmodel.startFeatureWriter(ext_data, self.pred_layer_idx) self.finish_batch() raw_pred = ext_data[-1].T pred = dhmlpe_features.convert_relskel2rel(raw_pred) * 1200.0 # show the first prediction show_idx = 0 img = np.array(Image.open(images_path[show_idx])) fig = pl.figure(0) ax1 = fig.add_subplot(121) ax1.imshow(img) ax2 = fig.add_subplot(122,projection='3d') cur_pred = pred[..., show_idx].reshape((3,-1),order='F') part_idx = iread.h36m_hmlpe.part_idx params = {'elev':-94, 'azim':-86, 'linewidth':6, 'order':'z'} dutils.show_3d_skeleton(cur_pred.T, part_idx, params)
def generate_data(self, generate_type, allfile = None): """ generate_type = 'rt' only """ if allfile is None: allfile = iu.getfilelist( self.imgdata_info['imgdata_path'], '\w+\.mat') print 'imgdatapath=%s, %d files are found' % (self.imgdata_info['imgdata_path'], len(allfile)) iu.ensure_dir(self.savedata_info['savedir']) self.batch_id = self.savedata_info['start_patch_id'] ndata = 0 self.meta = {'imgdata_info':self.imgdata_info,'savedata_info':self.savedata_info} self.meta['num_vis'] = iu.prod(self.savedata_info['newdim']) self.meta['data_sum'] = 0 self.meta['ndata'] = 0 self.meta['nparts'] = len(part_idx) for fn in allfile: if generate_type == 'rt': mpath = iu.fullfile(self.imgdata_info['imgdata_path'], fn) self.generate_rt_data(iu.fullfile(mpath)) if self.meta['ndata'] > 0: self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata'] del self.meta['data_sum'] myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
def process(op): data_folder = op.get_value('load_file') save_path = op.get_value('save_path') # data_folder = '/public/sijinli2/ibuffer/2015-01-16/net2_test_for_stat_2000' all_files = iu.getfilelist(data_folder, '\d+@\d+$') print all_files d = mio.unpickle(iu.fullfile(data_folder, all_files[0])) ms = d['model_state'] if op.get_value('cost_name') is not None: cost_names = op.get_value('cost_name').split(',') n_cost = len(cost_name) else: n_cost = len(d['solver_params']['train_error'][0]) cost_names = d['solver_params']['train_error'][0].keys() print 'Start to plot' start_time = time() for i in range(n_cost): pl.subplot(n_cost, 1, i + 1) plot_cost(op, d, cost_names[i]) print 'Cost {} seconds '.format(time()- start_time) if save_path: imgproc.imsave_tight(save_path) pl.show()
def process(op): data_folder = op.get_value('load_file') save_path = op.get_value('save_path') # data_folder = '/public/sijinli2/ibuffer/2015-01-16/net2_test_for_stat_2000' all_files = iu.getfilelist(data_folder, '\d+@\d+$') print all_files d = mio.unpickle(iu.fullfile(data_folder, all_files[0])) ms = d['model_state'] if op.get_value('cost_name') is not None: cost_names = op.get_value('cost_name').split(',') n_cost = len(cost_name) else: n_cost = len(d['solver_params']['train_error'][0]) cost_names = d['solver_params']['train_error'][0].keys() print 'Start to plot' start_time = time() for i in range(n_cost): pl.subplot(n_cost, 1, i + 1) plot_cost(op, d, cost_names[i]) print 'Cost {} seconds '.format(time() - start_time) if save_path: imgproc.imsave_tight(save_path) pl.show()
def generate_positive_data(self, generate_type, allfile = None): """ generate_type = 'rt': random translation 'ct' center block """ if allfile is None: allfile = iu.getfilelist( self.imgdata_info['imgdatapath'], '\w+\.mat') print 'imgdatapath=%s, %d files are found' % (self.imgdata_info['imgdatapath'], len(allfile)) iu.ensure_dir(self.savedata_info['savedir']) self.batch_id = self.savedata_info['start_patch_id'] self.init_meta(generate_type) print self.meta np.random.seed(7) for fn in allfile: print 'Processing %s ' % fn mpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn) self.generate_positive_data_from_mat(generate_type ,iu.fullfile(mpath)) if self.meta['ndata'] > 0: self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata'] self.meta['data_mean'] = self.meta['data_mean'].reshape((-1,1)) else: self.meta['data_mean'] = 0 del self.meta['data_sum'] myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
def shuffle_data(source_dir, target_dir, max_per_file=4000): """ This function will shuflle all the data in source_dir and save it to target_dir """ if source_dir == target_dir: raise HMLPEError('source dir can not be the same as target dir') import shutil import sys iu.ensure_dir(target_dir) shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \ iu.fullfile(target_dir, 'batches.meta')) meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta')) ndata = meta['ndata'] nbatch = (ndata - 1) / max_per_file + 1 nparts = meta['nparts'] njoints = meta['njoints'] newdim = meta['savedata_info']['newdim'] filter_size = meta['savedata_info']['indmap_para']['filter_size'] stride = meta['savedata_info']['indmap_para']['stride'] joint_filter_size = meta['savedata_info']['indmap_para'][ 'joint_filter_size'] joint_stride = meta['savedata_info']['indmap_para']['joint_stride'] mdim = HMLPE.get_indmapdim(newdim, filter_size, stride) jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride) print('There are %d data in total, I need %d batch to hold it' % (ndata, nbatch)) print 'Begin creating empty files' rest = ndata d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, max_per_file, nparts, njoints) HMLPE.adjust_savebuffer_shape(d) for b in range(nbatch): cur_n = min(max_per_file, rest) if b != nbatch - 1: saved = d else: saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, cur_n, nparts, njoints) HMLPE.adjust_savebuffer_shape(saved) myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved) rest = rest - cur_n print 'End creating' allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+') np.random.seed(7) perm = range(ndata) np.random.shuffle(perm) buf_cap = 12 # store six batch at most nround = (nbatch - 1) / buf_cap + 1 for rd in range(nround): print('Round %d of %d' % (rd, nround)) buf = dict() offset = 0 for fn in allbatchfn: print('Processing %s' % fn) d = myio.unpickle(iu.fullfile(source_dir, fn)) cur_n = d['data'].shape[-1] for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)): sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch)) sys.stdout.flush() sidx = b * max_per_file eidx = min(ndata, sidx + max_per_file) cur_idx_list = [ i for i in range(cur_n) if perm[offset + i] >= sidx and perm[offset + i] < eidx ] if len(cur_idx_list) == 0: continue if not b in buf: dsave = myio.unpickle( iu.fullfile(target_dir, 'data_batch_%d' % (b + 1))) buf[b] = dsave else: dsave = buf[b] save_idx_list = [perm[x + offset] - sidx for x in cur_idx_list] HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list) # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave) print 'Finished %s' % fn offset = offset + cur_n for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)): myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), buf[b])
def generate_negative_data_from_image(self, generate_type, allfile=None): """ generate_type = 'neg_sample' savedata_info should have 'neg_sample_num': indicating sampling how many negative window per image If some image is small, then it will try to generate as much as possible """ import Image if allfile is None: allfile = iu.getfilelist(self.imgdata_info['imgdatapath'], \ '\w+(\.png|\.jpg|\.pgm|.jpeg)') print 'imgdatapath=%s, %d images are found' % (self.imgdata_info['imgdatapath'], len(allfile)) iu.ensure_dir(self.savedata_info['savedir']) savedir = self.savedata_info['savedir'] self.batch_id = self.savedata_info['start_patch_id'] self.init_meta(generate_type) print(self.meta) sample_num = self.savedata_info['neg_sample_num'] totaldata = len(allfile) * sample_num self.meta['ndata'] = 0 newdim = self.savedata_info['newdim'] nparts = self.meta['nparts'] njoints = self.meta['njoints'] if njoints == 8: dicjtname = 'joints8' else: dicjtname = 'joints' #raise HMLPEError('njoints = %d are not supported yet' % njoints) filter_size = self.savedata_info['indmap_para']['filter_size'] stride = self.savedata_info['indmap_para']['stride'] #rate = self.savedata_info['indmap_para']['rate'] mdim = self.get_indmapdim(newdim, filter_size, stride) self.meta['ind_dim']['part_indmap'] = mdim joint_filter_size = self.savedata_info['indmap_para']['joint_filter_size'] joint_stride = self.savedata_info['indmap_para']['joint_stride'] jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride) self.meta['ind_dim']['joint_indmap'] = jtmdim per_size = min(totaldata, self.savedata_info['max_batch_size']) res = self.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, per_size, nparts, njoints) res[dicjtname][:] = 0 res['jointmasks'][:] = False res['indmap'][:] = False res['joint_indmap'][:] = False res['is_mirror'][:] = False res['is_positive'][:] = False pre_nc = 0 nc = 0 np.random.seed(7) for it, fn in enumerate(allfile): print('Processing %s' % fn) curimgpath= iu.fullfile(self.imgdata_info['imgdatapath'], fn) img = np.asarray(Image.open(curimgpath), dtype=np.uint8) imgdim = img.shape if imgdim[0] < newdim[0] or imgdim[1] < newdim[1]: print('small image, ignored') continue mesh = self.create_augumentation_mesh(imgdim, newdim, generate_type) ts = min(len(mesh), sample_num) l = (np.random.permutation(range(len(mesh))))[:ts] for p in l: r, c = mesh[p] timg = img[r:r+newdim[0],c:c+newdim[0],:] res['data'][...,nc-pre_nc] = timg res['joint_sample_offset'][...,nc-pre_nc] = [c,r] res['filenames'][nc-pre_nc] = curimgpath res['oribbox'][...,nc-pre_nc] = [c,r,c+newdim[1]-1,r+newdim[0]-1] nc = nc + 1 if sample_num + nc-pre_nc > per_size or it == len(allfile)-1: tmpres = self.truncated_copydic(res, nc-pre_nc) tmpres['data'] = tmpres['data'].reshape((-1,nc-pre_nc),order='F') self.meta['data_sum'] += tmpres['data'].sum(axis=1,dtype=float) self.meta['ndata'] += nc - pre_nc savepath = iu.fullfile(self.savedata_info['savedir'], \ self.savedata_info['savename'] + \ '_' + str(self.batch_id)) myio.pickle(savepath, tmpres) self.batch_id = self.batch_id + 1 pre_nc = nc if self.meta['ndata'] > 0: self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata'] self.meta['data_mean'] = self.meta['data_mean'].reshape((-1,1),order='F') else: self.meta['data_mean'] = 0 del self.meta['data_sum'] myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
def shuffle_data(source_dir, target_dir, max_per_file = 4000): """ This function will shuflle all the data in source_dir and save it to target_dir """ if source_dir == target_dir: raise HMLPEError('source dir can not be the same as target dir') import shutil import sys iu.ensure_dir( target_dir) shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \ iu.fullfile(target_dir, 'batches.meta')) meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta')) ndata = meta['ndata'] nbatch = (ndata - 1) / max_per_file + 1 nparts = meta['nparts'] njoints = meta['njoints'] newdim = meta['savedata_info']['newdim'] filter_size = meta['savedata_info']['indmap_para']['filter_size'] stride = meta['savedata_info']['indmap_para']['stride'] joint_filter_size = meta['savedata_info']['indmap_para']['joint_filter_size'] joint_stride = meta['savedata_info']['indmap_para']['joint_stride'] mdim = HMLPE.get_indmapdim(newdim, filter_size, stride) jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride) print('There are %d data in total, I need %d batch to hold it' %(ndata, nbatch)) print 'Begin creating empty files' rest = ndata d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, max_per_file, nparts, njoints) HMLPE.adjust_savebuffer_shape(d) for b in range(nbatch): cur_n = min(max_per_file, rest) if b != nbatch - 1: saved = d else: saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, cur_n, nparts, njoints) HMLPE.adjust_savebuffer_shape(saved) myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved) rest = rest - cur_n print 'End creating' allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+') np.random.seed(7) perm = range(ndata) np.random.shuffle(perm) buf_cap = 12 # store six batch at most nround = (nbatch - 1)/buf_cap + 1 for rd in range(nround): print ('Round %d of %d' % (rd,nround)) buf = dict() offset = 0 for fn in allbatchfn: print( 'Processing %s' % fn ) d = myio.unpickle(iu.fullfile(source_dir, fn)) cur_n = d['data'].shape[-1] for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)): sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch)) sys.stdout.flush() sidx = b * max_per_file eidx = min(ndata, sidx + max_per_file) cur_idx_list = [i for i in range(cur_n) if perm[offset + i] >= sidx and perm[offset + i] < eidx] if len(cur_idx_list) == 0: continue if not b in buf: dsave = myio.unpickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1))) buf[b] = dsave else: dsave = buf[b] save_idx_list = [perm[ x + offset] - sidx for x in cur_idx_list] HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list) # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave) print 'Finished %s' % fn offset = offset + cur_n for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)): myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), buf[b])
def generate_negative_data_from_image(self, generate_type, allfile=None): """ generate_type = 'neg_sample' savedata_info should have 'neg_sample_num': indicating sampling how many negative window per image If some image is small, then it will try to generate as much as possible """ import Image if allfile is None: allfile = iu.getfilelist(self.imgdata_info['imgdatapath'], \ '\w+(\.png|\.jpg|\.pgm|.jpeg)') print 'imgdatapath=%s, %d images are found' % ( self.imgdata_info['imgdatapath'], len(allfile)) iu.ensure_dir(self.savedata_info['savedir']) savedir = self.savedata_info['savedir'] self.batch_id = self.savedata_info['start_patch_id'] self.init_meta(generate_type) print(self.meta) sample_num = self.savedata_info['neg_sample_num'] totaldata = len(allfile) * sample_num self.meta['ndata'] = 0 newdim = self.savedata_info['newdim'] nparts = self.meta['nparts'] njoints = self.meta['njoints'] if njoints == 8: dicjtname = 'joints8' else: dicjtname = 'joints' #raise HMLPEError('njoints = %d are not supported yet' % njoints) filter_size = self.savedata_info['indmap_para']['filter_size'] stride = self.savedata_info['indmap_para']['stride'] #rate = self.savedata_info['indmap_para']['rate'] mdim = self.get_indmapdim(newdim, filter_size, stride) self.meta['ind_dim']['part_indmap'] = mdim joint_filter_size = self.savedata_info['indmap_para'][ 'joint_filter_size'] joint_stride = self.savedata_info['indmap_para']['joint_stride'] jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride) self.meta['ind_dim']['joint_indmap'] = jtmdim per_size = min(totaldata, self.savedata_info['max_batch_size']) res = self.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, per_size, nparts, njoints) res[dicjtname][:] = 0 res['jointmasks'][:] = False res['indmap'][:] = False res['joint_indmap'][:] = False res['is_mirror'][:] = False res['is_positive'][:] = False pre_nc = 0 nc = 0 np.random.seed(7) for it, fn in enumerate(allfile): print('Processing %s' % fn) curimgpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn) img = np.asarray(Image.open(curimgpath), dtype=np.uint8) imgdim = img.shape if imgdim[0] < newdim[0] or imgdim[1] < newdim[1]: print('small image, ignored') continue mesh = self.create_augumentation_mesh(imgdim, newdim, generate_type) ts = min(len(mesh), sample_num) l = (np.random.permutation(range(len(mesh))))[:ts] for p in l: r, c = mesh[p] timg = img[r:r + newdim[0], c:c + newdim[0], :] res['data'][..., nc - pre_nc] = timg res['joint_sample_offset'][..., nc - pre_nc] = [c, r] res['filenames'][nc - pre_nc] = curimgpath res['oribbox'][..., nc - pre_nc] = [ c, r, c + newdim[1] - 1, r + newdim[0] - 1 ] nc = nc + 1 if sample_num + nc - pre_nc > per_size or it == len(allfile) - 1: tmpres = self.truncated_copydic(res, nc - pre_nc) tmpres['data'] = tmpres['data'].reshape((-1, nc - pre_nc), order='F') self.meta['data_sum'] += tmpres['data'].sum(axis=1, dtype=float) self.meta['ndata'] += nc - pre_nc savepath = iu.fullfile(self.savedata_info['savedir'], \ self.savedata_info['savename'] + \ '_' + str(self.batch_id)) myio.pickle(savepath, tmpres) self.batch_id = self.batch_id + 1 pre_nc = nc if self.meta['ndata'] > 0: self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata'] self.meta['data_mean'] = self.meta['data_mean'].reshape((-1, 1), order='F') else: self.meta['data_mean'] = 0 del self.meta['data_sum'] myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)