def merge_batch_data(data_dir_list, save_dir, is_symbolic=True, batch_start_num=1): """ This function will merge all the data_batches in data_dir into one folder and rename them accordining. Of cause, meta data will be updated """ import os import shutil iu.ensure_dir(save_dir) meta = None for ddir in data_dir_list: cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta')) meta = HMLPE.merge_meta(meta, cur_meta) myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta) cur_id = batch_start_num for ddir in data_dir_list: all_file = iu.getfilelist(ddir, 'data_batch_\d+') print 'I find %d batches in %s' % (len(all_file), ddir) if is_symbolic: for fn in all_file: sn = iu.fullfile(save_dir, 'data_batch_%d' % cur_id) if iu.exists(sn, 'file'): os.remove(sn) os.symlink(iu.fullfile(ddir, fn), sn) cur_id = cur_id + 1 else: for fn in all_file: shutil.copyfile( iu.fullfile(ddir, fn), iu.fullfile(save_dir, 'data_batch_%d' % cur_id)) cur_id = cur_id + 1
def merge_batch_data(data_dir_list, save_dir, is_symbolic = True, batch_start_num = 1): """ This function will merge all the data_batches in data_dir into one folder and rename them accordining. Of cause, meta data will be updated """ import os import shutil iu.ensure_dir(save_dir) meta = None for ddir in data_dir_list: cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta')) meta = HMLPE.merge_meta(meta, cur_meta) myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta) cur_id = batch_start_num for ddir in data_dir_list: all_file = iu.getfilelist(ddir, 'data_batch_\d+') print 'I find %d batches in %s' % (len(all_file), ddir) if is_symbolic: for fn in all_file: sn = iu.fullfile(save_dir, 'data_batch_%d' % cur_id) if iu.exists(sn, 'file'): os.remove(sn) os.symlink(iu.fullfile(ddir, fn), sn) cur_id = cur_id + 1 else: for fn in all_file: shutil.copyfile(iu.fullfile(ddir, fn), iu.fullfile(save_dir, 'data_batch_%d' % cur_id)) cur_id = cur_id + 1
def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride): """ This function is used for generating part indicator map for old data data_dir is the directory that you put all batch_datayes """ allfile = iu.getfilelist(data_dir, 'data_batch_\d+') meta_path = iu.fullfile(data_dir, 'batches.meta') iu.ensure_dir(save_dir) if iu.exists(meta_path, 'file'): d_meta = myio.unpickle(meta_path) if 'savedata_info' not in d_meta: d_meta['savedata_info'] = dict() d_meta['savedata_info']['indmap_para'] = dict() d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size d_meta['savedata_info']['indmap_para']['stride'] = stride d_meta['savedata_info']['indmap_para']['rate'] = rate myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta) for fn in allfile: print 'Processing %s' % fn d = myio.unpickle(iu.fullfile(data_dir, fn)) ndata = d['data'].shape[-1] nparts = 7 d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata), dtype=np.bool) for i in range(ndata): jts = d['joints8'][..., i] d['indmap'][..., i] = HMLPE.create_part_indicatormap( jts, part_idx, mdim, rate, filter_size, stride) myio.pickle(iu.fullfile(save_dir, fn), d)
def generate_positive_data(self, generate_type, allfile=None): """ generate_type = 'rt': random translation 'ct' center block """ if allfile is None: allfile = iu.getfilelist(self.imgdata_info['imgdatapath'], '\w+\.mat') print 'imgdatapath=%s, %d files are found' % ( self.imgdata_info['imgdatapath'], len(allfile)) iu.ensure_dir(self.savedata_info['savedir']) self.batch_id = self.savedata_info['start_patch_id'] self.init_meta(generate_type) print self.meta np.random.seed(7) for fn in allfile: print 'Processing %s ' % fn mpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn) self.generate_positive_data_from_mat(generate_type, iu.fullfile(mpath)) if self.meta['ndata'] > 0: self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata'] self.meta['data_mean'] = self.meta['data_mean'].reshape((-1, 1)) else: self.meta['data_mean'] = 0 del self.meta['data_sum'] myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
def test_addmeta(metadir): metapath = iu.fullfile(metadir, 'batches.meta') d_meta = myio.unpickle(metapath) d_meta['ind_dim'] = dict() d_meta['ind_dim']['part_indmap'] = (8, 8) d_meta['ind_dim']['joint_indmap'] = (8, 8) myio.pickle(metapath, d_meta)
def test_addmeta(metadir): metapath = iu.fullfile(metadir, 'batches.meta') d_meta = myio.unpickle(metapath) d_meta['ind_dim'] = dict() d_meta['ind_dim']['part_indmap'] = (8,8) d_meta['ind_dim']['joint_indmap'] = (8,8) myio.pickle(metapath, d_meta)
def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride): """ This function is used for generating part indicator map for old data data_dir is the directory that you put all batch_datayes """ allfile = iu.getfilelist(data_dir, 'data_batch_\d+') meta_path = iu.fullfile(data_dir, 'batches.meta') iu.ensure_dir(save_dir) if iu.exists(meta_path, 'file'): d_meta = myio.unpickle(meta_path) if 'savedata_info' not in d_meta: d_meta['savedata_info'] = dict() d_meta['savedata_info']['indmap_para'] = dict() d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size d_meta['savedata_info']['indmap_para']['stride'] = stride d_meta['savedata_info']['indmap_para']['rate'] = rate myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta) for fn in allfile: print 'Processing %s' % fn d = myio.unpickle(iu.fullfile(data_dir, fn)) ndata = d['data'].shape[-1] nparts = 7 d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata), dtype=np.bool) for i in range(ndata): jts = d['joints8'][...,i] d['indmap'][...,i] = HMLPE.create_part_indicatormap(jts, part_idx, mdim, rate, filter_size, stride) myio.pickle(iu.fullfile(save_dir, fn), d)
def generate_data(self, generate_type, allfile = None): """ generate_type = 'rt' only """ if allfile is None: allfile = iu.getfilelist( self.imgdata_info['imgdata_path'], '\w+\.mat') print 'imgdatapath=%s, %d files are found' % (self.imgdata_info['imgdata_path'], len(allfile)) iu.ensure_dir(self.savedata_info['savedir']) self.batch_id = self.savedata_info['start_patch_id'] ndata = 0 self.meta = {'imgdata_info':self.imgdata_info,'savedata_info':self.savedata_info} self.meta['num_vis'] = iu.prod(self.savedata_info['newdim']) self.meta['data_sum'] = 0 self.meta['ndata'] = 0 self.meta['nparts'] = len(part_idx) for fn in allfile: if generate_type == 'rt': mpath = iu.fullfile(self.imgdata_info['imgdata_path'], fn) self.generate_rt_data(iu.fullfile(mpath)) if self.meta['ndata'] > 0: self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata'] del self.meta['data_sum'] myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
def generate_positive_data(self, generate_type, allfile = None): """ generate_type = 'rt': random translation 'ct' center block """ if allfile is None: allfile = iu.getfilelist( self.imgdata_info['imgdatapath'], '\w+\.mat') print 'imgdatapath=%s, %d files are found' % (self.imgdata_info['imgdatapath'], len(allfile)) iu.ensure_dir(self.savedata_info['savedir']) self.batch_id = self.savedata_info['start_patch_id'] self.init_meta(generate_type) print self.meta np.random.seed(7) for fn in allfile: print 'Processing %s ' % fn mpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn) self.generate_positive_data_from_mat(generate_type ,iu.fullfile(mpath)) if self.meta['ndata'] > 0: self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata'] self.meta['data_mean'] = self.meta['data_mean'].reshape((-1,1)) else: self.meta['data_mean'] = 0 del self.meta['data_sum'] myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
def shuffle_data(source_dir, target_dir, max_per_file=4000): """ This function will shuflle all the data in source_dir and save it to target_dir """ if source_dir == target_dir: raise HMLPEError('source dir can not be the same as target dir') import shutil import sys iu.ensure_dir(target_dir) shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \ iu.fullfile(target_dir, 'batches.meta')) meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta')) ndata = meta['ndata'] nbatch = (ndata - 1) / max_per_file + 1 nparts = meta['nparts'] njoints = meta['njoints'] newdim = meta['savedata_info']['newdim'] filter_size = meta['savedata_info']['indmap_para']['filter_size'] stride = meta['savedata_info']['indmap_para']['stride'] joint_filter_size = meta['savedata_info']['indmap_para'][ 'joint_filter_size'] joint_stride = meta['savedata_info']['indmap_para']['joint_stride'] mdim = HMLPE.get_indmapdim(newdim, filter_size, stride) jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride) print('There are %d data in total, I need %d batch to hold it' % (ndata, nbatch)) print 'Begin creating empty files' rest = ndata d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, max_per_file, nparts, njoints) HMLPE.adjust_savebuffer_shape(d) for b in range(nbatch): cur_n = min(max_per_file, rest) if b != nbatch - 1: saved = d else: saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, cur_n, nparts, njoints) HMLPE.adjust_savebuffer_shape(saved) myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved) rest = rest - cur_n print 'End creating' allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+') np.random.seed(7) perm = range(ndata) np.random.shuffle(perm) buf_cap = 12 # store six batch at most nround = (nbatch - 1) / buf_cap + 1 for rd in range(nround): print('Round %d of %d' % (rd, nround)) buf = dict() offset = 0 for fn in allbatchfn: print('Processing %s' % fn) d = myio.unpickle(iu.fullfile(source_dir, fn)) cur_n = d['data'].shape[-1] for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)): sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch)) sys.stdout.flush() sidx = b * max_per_file eidx = min(ndata, sidx + max_per_file) cur_idx_list = [ i for i in range(cur_n) if perm[offset + i] >= sidx and perm[offset + i] < eidx ] if len(cur_idx_list) == 0: continue if not b in buf: dsave = myio.unpickle( iu.fullfile(target_dir, 'data_batch_%d' % (b + 1))) buf[b] = dsave else: dsave = buf[b] save_idx_list = [perm[x + offset] - sidx for x in cur_idx_list] HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list) # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave) print 'Finished %s' % fn offset = offset + cur_n for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)): myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), buf[b])
def generate_positive_data_from_mat(self, generate_type, matpath): """ in each mat mat['X'] is image data mat['Y'] is npart x ndata array """ mat = sio.loadmat(matpath) dim = mat['dim'][0] newdim = self.savedata_info['newdim'] if newdim[0] > dim[0] or newdim[1] > dim[1]: raise HMLPEError('Invalid new size ') if self.meta['matdim'] is None: self.meta['matdim'] = dim # record the dimension before sampling else: if np.any(self.meta['matdim'] != dim): raise HMLPEError( 'Inconsistent matdim: Previous dim is %s, current mat dim is %s' % (str(self.meta['matdim']), str(dim))) ndata = (mat['X'].shape)[1] if generate_type in {'rt': 1}: sample_num = self.savedata_info['sample_num'] totaldata = sample_num * ndata * 2 do_mirror = True elif generate_type == 'ct': sample_num = 1 totaldata = sample_num * ndata do_mirror = False if (dim[0] - newdim[0] + 1) * (dim[1] - newdim[1] + 1) < sample_num: raise HMLPEError(' Invalid sample_num') nparts = self.meta['nparts'] self.meta['ndata'] += totaldata ### BEGIN COMMENT # njoints = self.meta['njoints'] # if njoints == 8: # dicjtname = 'joints8' # else: # #raise HMLPEError('njoints = %d No supported yet' % (njoints)) # dicjtname = 'joints' # newdim = self.savedata_info['newdim'] # filter_size = self.savedata_info['indmap_para']['filter_size'] # stride = self.savedata_info['indmap_para']['stride'] # rate = self.savedata_info['indmap_para']['rate'] # mdim = self.get_indmapdim(newdim, filter_size, stride) # if newdim[0] > dim[0] or newdim[1] > dim[1]: # raise HMLPEError('Invalid new size ') # if (dim[0] - newdim[0] + 1) * (dim[1] - newdim[1] + 1) < sample_num: # raise HMLPEError(' Invalid sample_num') # joint_filter_size = self.savedata_info['indmap_para']['joint_filter_size'] # joint_stride = self.savedata_info['indmap_para']['joint_stride'] # jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride) ### END COMMENT fieldpool = self.get_fieldpool_for_positive_mat_data() fieldpool['mat'] = mat self.meta['ind_dim']['part_indmap'] = fieldpool['mdim'] self.meta['ind_dim']['joint_indmap'] = fieldpool['jtmdim'] res = {} per_size = min(totaldata, self.savedata_info['max_batch_size']) allX = mat['X'].reshape((dim[0], dim[1], dim[2], ndata), order='F') Y2dname = fieldpool['Y2dname'] allY = mat[Y2dname].reshape((2, -1, ndata), order='F') newlen = iu.prod(newdim) # prepare data buffer res = self.prepare_savebuffer({'data':fieldpool['newdim'], 'part_indmap':fieldpool['mdim'], 'joint_indmap': fieldpool['jtmdim']},\ per_size, self.meta['nparts'],\ self.meta['njoints']) tmpres = dict() pre_nc = 0 nc = 0 res['is_positive'][:] = True for it in range(ndata): curX = allX[..., it] curY = allY[..., it].transpose() curfilename = str( mat['imagepathlist'][0, it][0]) if 'imagepathlist' in mat else '' mesh = self.create_augumentation_mesh(dim, newdim, generate_type) l = (np.random.permutation(range(len(mesh))))[:sample_num] fieldpool['matidx'] = it fieldpool['curfilename'] = curfilename for p in l: r, c = mesh[p] tmpX = curX tmpX = np.roll(tmpX, shift=-int(r), axis=0) tmpX = np.roll(tmpX, shift=-int(c), axis=1) tmpY = curY - 1 + np.asarray([-c, -r]) fieldpool['r'] = r fieldpool['c'] = c #### fieldpool['curX'] = tmpX fieldpool['Y'] = tmpY # tmpX = tmpX[:newdim[0], :newdim[1],:] # res['data'][...,nc - pre_nc] = tmpX # res[dicjtname][..., nc - pre_nc] = tmpY # res['jointmasks'][...,nc - pre_nc] = self.makejointmask(newdim, tmpY) # res['filenames'][nc - pre_nc] = curfilename # res['oribbox'][...,nc-pre_nc] = mat['oribbox'][...,it] # res['indmap'][...,nc-pre_nc] = self.create_part_indicatormap(tmpY, self.meta['savedata_info']['part_idx'], mdim, rate, filter_size, stride) # res['joint_indmap'][...,nc-pre_nc] = self.create_joint_indicatormap(tmpY, jtmdim, joint_filter_size, joint_stride) # res['joint_sample_offset'][...,nc-pre_nc] = [c, r] # res['is_mirror'][...,nc-pre_nc] = False self.fill_in_positive_mat_data_to_dic(res, nc - pre_nc, \ fieldpool, False) nc = nc + 1 if not do_mirror: continue #flip image tmpX = tmpX[:, ::-1, :] tmpY = self.flip_joints(newdim, tmpY) fieldpool['curX'] = tmpX fieldpool['Y'] = tmpY self.fill_in_positive_mat_data_to_dic(res, nc - pre_nc, \ fieldpool, True) # res['data'][...,nc - pre_nc] = tmpX # res[dicjtname][...,nc -pre_nc] = tmpY # res['jointmasks'][...,nc - pre_nc] = self.makejointmask(newdim, tmpY) # res['filenames'][nc - pre_nc] = curfilename # res['oribbox'][...,nc-pre_nc] = mat['oribbox'][...,it] # res['indmap'][...,nc-pre_nc] = self.create_part_indicatormap(tmpY, part_idx, mdim, rate, filter_size, stride) # res['joint_indmap'][...,nc-pre_nc] = self.create_joint_indicatormap(tmpY, jtmdim, joint_filter_size, joint_stride) # res['joint_sample_offset'][...,nc-pre_nc] = [c, r] # res['is_mirror'][...,nc-pre_nc] = True nc = nc + 1 t = 2 if do_mirror else 1 if nc - pre_nc + t * sample_num > per_size or nc == totaldata: tmpres = self.truncated_copydic(res, nc - pre_nc) tmpres['data'] = tmpres['data'].reshape((-1, nc - pre_nc), order='F') self.meta['data_sum'] = self.meta['data_sum'] + tmpres[ 'data'].sum(axis=1, dtype=float) savepath = iu.fullfile(self.savedata_info['savedir'], \ self.savedata_info['savename'] + \ '_' + str(self.batch_id)) myio.pickle(savepath, tmpres) self.batch_id = self.batch_id + 1 pre_nc = nc
def generate_negative_data_from_image(self, generate_type, allfile=None): """ generate_type = 'neg_sample' savedata_info should have 'neg_sample_num': indicating sampling how many negative window per image If some image is small, then it will try to generate as much as possible """ import Image if allfile is None: allfile = iu.getfilelist(self.imgdata_info['imgdatapath'], \ '\w+(\.png|\.jpg|\.pgm|.jpeg)') print 'imgdatapath=%s, %d images are found' % ( self.imgdata_info['imgdatapath'], len(allfile)) iu.ensure_dir(self.savedata_info['savedir']) savedir = self.savedata_info['savedir'] self.batch_id = self.savedata_info['start_patch_id'] self.init_meta(generate_type) print(self.meta) sample_num = self.savedata_info['neg_sample_num'] totaldata = len(allfile) * sample_num self.meta['ndata'] = 0 newdim = self.savedata_info['newdim'] nparts = self.meta['nparts'] njoints = self.meta['njoints'] if njoints == 8: dicjtname = 'joints8' else: dicjtname = 'joints' #raise HMLPEError('njoints = %d are not supported yet' % njoints) filter_size = self.savedata_info['indmap_para']['filter_size'] stride = self.savedata_info['indmap_para']['stride'] #rate = self.savedata_info['indmap_para']['rate'] mdim = self.get_indmapdim(newdim, filter_size, stride) self.meta['ind_dim']['part_indmap'] = mdim joint_filter_size = self.savedata_info['indmap_para'][ 'joint_filter_size'] joint_stride = self.savedata_info['indmap_para']['joint_stride'] jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride) self.meta['ind_dim']['joint_indmap'] = jtmdim per_size = min(totaldata, self.savedata_info['max_batch_size']) res = self.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, per_size, nparts, njoints) res[dicjtname][:] = 0 res['jointmasks'][:] = False res['indmap'][:] = False res['joint_indmap'][:] = False res['is_mirror'][:] = False res['is_positive'][:] = False pre_nc = 0 nc = 0 np.random.seed(7) for it, fn in enumerate(allfile): print('Processing %s' % fn) curimgpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn) img = np.asarray(Image.open(curimgpath), dtype=np.uint8) imgdim = img.shape if imgdim[0] < newdim[0] or imgdim[1] < newdim[1]: print('small image, ignored') continue mesh = self.create_augumentation_mesh(imgdim, newdim, generate_type) ts = min(len(mesh), sample_num) l = (np.random.permutation(range(len(mesh))))[:ts] for p in l: r, c = mesh[p] timg = img[r:r + newdim[0], c:c + newdim[0], :] res['data'][..., nc - pre_nc] = timg res['joint_sample_offset'][..., nc - pre_nc] = [c, r] res['filenames'][nc - pre_nc] = curimgpath res['oribbox'][..., nc - pre_nc] = [ c, r, c + newdim[1] - 1, r + newdim[0] - 1 ] nc = nc + 1 if sample_num + nc - pre_nc > per_size or it == len(allfile) - 1: tmpres = self.truncated_copydic(res, nc - pre_nc) tmpres['data'] = tmpres['data'].reshape((-1, nc - pre_nc), order='F') self.meta['data_sum'] += tmpres['data'].sum(axis=1, dtype=float) self.meta['ndata'] += nc - pre_nc savepath = iu.fullfile(self.savedata_info['savedir'], \ self.savedata_info['savename'] + \ '_' + str(self.batch_id)) myio.pickle(savepath, tmpres) self.batch_id = self.batch_id + 1 pre_nc = nc if self.meta['ndata'] > 0: self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata'] self.meta['data_mean'] = self.meta['data_mean'].reshape((-1, 1), order='F') else: self.meta['data_mean'] = 0 del self.meta['data_sum'] myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
def shuffle_data(source_dir, target_dir, max_per_file = 4000): """ This function will shuflle all the data in source_dir and save it to target_dir """ if source_dir == target_dir: raise HMLPEError('source dir can not be the same as target dir') import shutil import sys iu.ensure_dir( target_dir) shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \ iu.fullfile(target_dir, 'batches.meta')) meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta')) ndata = meta['ndata'] nbatch = (ndata - 1) / max_per_file + 1 nparts = meta['nparts'] njoints = meta['njoints'] newdim = meta['savedata_info']['newdim'] filter_size = meta['savedata_info']['indmap_para']['filter_size'] stride = meta['savedata_info']['indmap_para']['stride'] joint_filter_size = meta['savedata_info']['indmap_para']['joint_filter_size'] joint_stride = meta['savedata_info']['indmap_para']['joint_stride'] mdim = HMLPE.get_indmapdim(newdim, filter_size, stride) jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride) print('There are %d data in total, I need %d batch to hold it' %(ndata, nbatch)) print 'Begin creating empty files' rest = ndata d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, max_per_file, nparts, njoints) HMLPE.adjust_savebuffer_shape(d) for b in range(nbatch): cur_n = min(max_per_file, rest) if b != nbatch - 1: saved = d else: saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, cur_n, nparts, njoints) HMLPE.adjust_savebuffer_shape(saved) myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved) rest = rest - cur_n print 'End creating' allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+') np.random.seed(7) perm = range(ndata) np.random.shuffle(perm) buf_cap = 12 # store six batch at most nround = (nbatch - 1)/buf_cap + 1 for rd in range(nround): print ('Round %d of %d' % (rd,nround)) buf = dict() offset = 0 for fn in allbatchfn: print( 'Processing %s' % fn ) d = myio.unpickle(iu.fullfile(source_dir, fn)) cur_n = d['data'].shape[-1] for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)): sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch)) sys.stdout.flush() sidx = b * max_per_file eidx = min(ndata, sidx + max_per_file) cur_idx_list = [i for i in range(cur_n) if perm[offset + i] >= sidx and perm[offset + i] < eidx] if len(cur_idx_list) == 0: continue if not b in buf: dsave = myio.unpickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1))) buf[b] = dsave else: dsave = buf[b] save_idx_list = [perm[ x + offset] - sidx for x in cur_idx_list] HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list) # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave) print 'Finished %s' % fn offset = offset + cur_n for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)): myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), buf[b])
def generate_positive_data_from_mat(self, generate_type, matpath): """ in each mat mat['X'] is image data mat['Y'] is npart x ndata array """ mat = sio.loadmat(matpath) dim = mat['dim'][0] newdim = self.savedata_info['newdim'] if newdim[0] > dim[0] or newdim[1] > dim[1]: raise HMLPEError('Invalid new size ') if self.meta['matdim'] is None: self.meta['matdim'] = dim # record the dimension before sampling else: if np.any(self.meta['matdim'] != dim): raise HMLPEError('Inconsistent matdim: Previous dim is %s, current mat dim is %s' % (str(self.meta['matdim']), str(dim))) ndata = (mat['X'].shape)[1] if generate_type in {'rt':1}: sample_num = self.savedata_info['sample_num'] totaldata = sample_num * ndata * 2 do_mirror = True elif generate_type == 'ct': sample_num = 1 totaldata = sample_num * ndata do_mirror = False if (dim[0] - newdim[0] + 1) * (dim[1] - newdim[1] + 1) < sample_num: raise HMLPEError(' Invalid sample_num') nparts = self.meta['nparts'] self.meta['ndata'] += totaldata ### BEGIN COMMENT # njoints = self.meta['njoints'] # if njoints == 8: # dicjtname = 'joints8' # else: # #raise HMLPEError('njoints = %d No supported yet' % (njoints)) # dicjtname = 'joints' # newdim = self.savedata_info['newdim'] # filter_size = self.savedata_info['indmap_para']['filter_size'] # stride = self.savedata_info['indmap_para']['stride'] # rate = self.savedata_info['indmap_para']['rate'] # mdim = self.get_indmapdim(newdim, filter_size, stride) # if newdim[0] > dim[0] or newdim[1] > dim[1]: # raise HMLPEError('Invalid new size ') # if (dim[0] - newdim[0] + 1) * (dim[1] - newdim[1] + 1) < sample_num: # raise HMLPEError(' Invalid sample_num') # joint_filter_size = self.savedata_info['indmap_para']['joint_filter_size'] # joint_stride = self.savedata_info['indmap_para']['joint_stride'] # jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride) ### END COMMENT fieldpool = self.get_fieldpool_for_positive_mat_data() fieldpool['mat'] = mat self.meta['ind_dim']['part_indmap'] = fieldpool['mdim'] self.meta['ind_dim']['joint_indmap'] = fieldpool['jtmdim'] res = {} per_size = min(totaldata, self.savedata_info['max_batch_size']) allX = mat['X'].reshape( (dim[0], dim[1],dim[2], ndata), order='F') Y2dname = fieldpool['Y2dname'] allY = mat[Y2dname].reshape( (2,-1, ndata), order='F') newlen = iu.prod( newdim ) # prepare data buffer res = self.prepare_savebuffer({'data':fieldpool['newdim'], 'part_indmap':fieldpool['mdim'], 'joint_indmap': fieldpool['jtmdim']},\ per_size, self.meta['nparts'],\ self.meta['njoints']) tmpres = dict() pre_nc = 0 nc = 0 res['is_positive'][:] = True for it in range(ndata): curX = allX[...,it] curY = allY[...,it].transpose() curfilename = str(mat['imagepathlist'][0,it][0]) if 'imagepathlist' in mat else '' mesh = self.create_augumentation_mesh(dim, newdim, generate_type) l = (np.random.permutation(range(len(mesh))))[:sample_num] fieldpool['matidx'] = it fieldpool['curfilename'] = curfilename for p in l: r,c = mesh[p] tmpX = curX tmpX = np.roll(tmpX, shift=-int(r), axis = 0) tmpX = np.roll(tmpX, shift=-int(c), axis = 1) tmpY = curY - 1 + np.asarray([-c,-r]) fieldpool['r'] = r fieldpool['c'] = c #### fieldpool['curX'] = tmpX fieldpool['Y'] = tmpY # tmpX = tmpX[:newdim[0], :newdim[1],:] # res['data'][...,nc - pre_nc] = tmpX # res[dicjtname][..., nc - pre_nc] = tmpY # res['jointmasks'][...,nc - pre_nc] = self.makejointmask(newdim, tmpY) # res['filenames'][nc - pre_nc] = curfilename # res['oribbox'][...,nc-pre_nc] = mat['oribbox'][...,it] # res['indmap'][...,nc-pre_nc] = self.create_part_indicatormap(tmpY, self.meta['savedata_info']['part_idx'], mdim, rate, filter_size, stride) # res['joint_indmap'][...,nc-pre_nc] = self.create_joint_indicatormap(tmpY, jtmdim, joint_filter_size, joint_stride) # res['joint_sample_offset'][...,nc-pre_nc] = [c, r] # res['is_mirror'][...,nc-pre_nc] = False self.fill_in_positive_mat_data_to_dic(res, nc - pre_nc, \ fieldpool, False) nc = nc + 1 if not do_mirror: continue #flip image tmpX = tmpX[:,::-1,:] tmpY = self.flip_joints(newdim, tmpY) fieldpool['curX'] = tmpX fieldpool['Y'] = tmpY self.fill_in_positive_mat_data_to_dic(res, nc - pre_nc, \ fieldpool, True) # res['data'][...,nc - pre_nc] = tmpX # res[dicjtname][...,nc -pre_nc] = tmpY # res['jointmasks'][...,nc - pre_nc] = self.makejointmask(newdim, tmpY) # res['filenames'][nc - pre_nc] = curfilename # res['oribbox'][...,nc-pre_nc] = mat['oribbox'][...,it] # res['indmap'][...,nc-pre_nc] = self.create_part_indicatormap(tmpY, part_idx, mdim, rate, filter_size, stride) # res['joint_indmap'][...,nc-pre_nc] = self.create_joint_indicatormap(tmpY, jtmdim, joint_filter_size, joint_stride) # res['joint_sample_offset'][...,nc-pre_nc] = [c, r] # res['is_mirror'][...,nc-pre_nc] = True nc = nc + 1 t = 2 if do_mirror else 1 if nc - pre_nc + t * sample_num > per_size or nc == totaldata: tmpres = self.truncated_copydic(res, nc-pre_nc) tmpres['data'] = tmpres['data'].reshape((-1,nc-pre_nc),order='F') self.meta['data_sum'] = self.meta['data_sum'] + tmpres['data'].sum(axis=1,dtype=float) savepath = iu.fullfile(self.savedata_info['savedir'], \ self.savedata_info['savename'] + \ '_' + str(self.batch_id)) myio.pickle( savepath, tmpres) self.batch_id = self.batch_id + 1 pre_nc = nc
def generate_negative_data_from_image(self, generate_type, allfile=None): """ generate_type = 'neg_sample' savedata_info should have 'neg_sample_num': indicating sampling how many negative window per image If some image is small, then it will try to generate as much as possible """ import Image if allfile is None: allfile = iu.getfilelist(self.imgdata_info['imgdatapath'], \ '\w+(\.png|\.jpg|\.pgm|.jpeg)') print 'imgdatapath=%s, %d images are found' % (self.imgdata_info['imgdatapath'], len(allfile)) iu.ensure_dir(self.savedata_info['savedir']) savedir = self.savedata_info['savedir'] self.batch_id = self.savedata_info['start_patch_id'] self.init_meta(generate_type) print(self.meta) sample_num = self.savedata_info['neg_sample_num'] totaldata = len(allfile) * sample_num self.meta['ndata'] = 0 newdim = self.savedata_info['newdim'] nparts = self.meta['nparts'] njoints = self.meta['njoints'] if njoints == 8: dicjtname = 'joints8' else: dicjtname = 'joints' #raise HMLPEError('njoints = %d are not supported yet' % njoints) filter_size = self.savedata_info['indmap_para']['filter_size'] stride = self.savedata_info['indmap_para']['stride'] #rate = self.savedata_info['indmap_para']['rate'] mdim = self.get_indmapdim(newdim, filter_size, stride) self.meta['ind_dim']['part_indmap'] = mdim joint_filter_size = self.savedata_info['indmap_para']['joint_filter_size'] joint_stride = self.savedata_info['indmap_para']['joint_stride'] jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride) self.meta['ind_dim']['joint_indmap'] = jtmdim per_size = min(totaldata, self.savedata_info['max_batch_size']) res = self.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \ 'joint_indmap': jtmdim}, per_size, nparts, njoints) res[dicjtname][:] = 0 res['jointmasks'][:] = False res['indmap'][:] = False res['joint_indmap'][:] = False res['is_mirror'][:] = False res['is_positive'][:] = False pre_nc = 0 nc = 0 np.random.seed(7) for it, fn in enumerate(allfile): print('Processing %s' % fn) curimgpath= iu.fullfile(self.imgdata_info['imgdatapath'], fn) img = np.asarray(Image.open(curimgpath), dtype=np.uint8) imgdim = img.shape if imgdim[0] < newdim[0] or imgdim[1] < newdim[1]: print('small image, ignored') continue mesh = self.create_augumentation_mesh(imgdim, newdim, generate_type) ts = min(len(mesh), sample_num) l = (np.random.permutation(range(len(mesh))))[:ts] for p in l: r, c = mesh[p] timg = img[r:r+newdim[0],c:c+newdim[0],:] res['data'][...,nc-pre_nc] = timg res['joint_sample_offset'][...,nc-pre_nc] = [c,r] res['filenames'][nc-pre_nc] = curimgpath res['oribbox'][...,nc-pre_nc] = [c,r,c+newdim[1]-1,r+newdim[0]-1] nc = nc + 1 if sample_num + nc-pre_nc > per_size or it == len(allfile)-1: tmpres = self.truncated_copydic(res, nc-pre_nc) tmpres['data'] = tmpres['data'].reshape((-1,nc-pre_nc),order='F') self.meta['data_sum'] += tmpres['data'].sum(axis=1,dtype=float) self.meta['ndata'] += nc - pre_nc savepath = iu.fullfile(self.savedata_info['savedir'], \ self.savedata_info['savename'] + \ '_' + str(self.batch_id)) myio.pickle(savepath, tmpres) self.batch_id = self.batch_id + 1 pre_nc = nc if self.meta['ndata'] > 0: self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata'] self.meta['data_mean'] = self.meta['data_mean'].reshape((-1,1),order='F') else: self.meta['data_mean'] = 0 del self.meta['data_sum'] myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)