Exemple #1
0
def merge_batch_data(data_dir_list,
                     save_dir,
                     is_symbolic=True,
                     batch_start_num=1):
    """
    This function will merge all the data_batches in data_dir into one folder
     and rename them accordining.
       Of cause, meta data will be updated 
    """
    import os
    import shutil
    iu.ensure_dir(save_dir)
    meta = None
    for ddir in data_dir_list:
        cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta'))
        meta = HMLPE.merge_meta(meta, cur_meta)

    myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta)
    cur_id = batch_start_num
    for ddir in data_dir_list:
        all_file = iu.getfilelist(ddir, 'data_batch_\d+')
        print 'I find %d batches in %s' % (len(all_file), ddir)
        if is_symbolic:
            for fn in all_file:
                sn = iu.fullfile(save_dir, 'data_batch_%d' % cur_id)
                if iu.exists(sn, 'file'):
                    os.remove(sn)
                os.symlink(iu.fullfile(ddir, fn), sn)
                cur_id = cur_id + 1
        else:
            for fn in all_file:
                shutil.copyfile(
                    iu.fullfile(ddir, fn),
                    iu.fullfile(save_dir, 'data_batch_%d' % cur_id))
                cur_id = cur_id + 1
Exemple #2
0
def merge_batch_data(data_dir_list, save_dir, is_symbolic = True, batch_start_num = 1):
    """
    This function will merge all the data_batches in data_dir into one folder
     and rename them accordining.
       Of cause, meta data will be updated 
    """
    import os
    import shutil
    iu.ensure_dir(save_dir)
    meta = None
    for ddir in data_dir_list:
        cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta'))    
        meta = HMLPE.merge_meta(meta, cur_meta)

    myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta)
    cur_id = batch_start_num
    for ddir in data_dir_list:
        all_file = iu.getfilelist(ddir, 'data_batch_\d+')
        print 'I find %d batches in %s' % (len(all_file), ddir)
        if is_symbolic:
            for fn in all_file:
                sn = iu.fullfile(save_dir, 'data_batch_%d' %  cur_id)
                if iu.exists(sn, 'file'):
                    os.remove(sn)
                os.symlink(iu.fullfile(ddir, fn), sn)
                cur_id = cur_id + 1
        else:
            for fn in all_file:
                shutil.copyfile(iu.fullfile(ddir, fn), iu.fullfile(save_dir, 'data_batch_%d' %  cur_id))
                cur_id = cur_id + 1
Exemple #3
0
def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride):
    """
    This function is used for generating part indicator map for old data
    data_dir is the directory that you put all batch_datayes
    """
    allfile = iu.getfilelist(data_dir, 'data_batch_\d+')
    meta_path = iu.fullfile(data_dir, 'batches.meta')
    iu.ensure_dir(save_dir)
    if iu.exists(meta_path, 'file'):
        d_meta = myio.unpickle(meta_path)
        if 'savedata_info' not in d_meta:
            d_meta['savedata_info'] = dict()
            d_meta['savedata_info']['indmap_para'] = dict()
        d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size
        d_meta['savedata_info']['indmap_para']['stride'] = stride
        d_meta['savedata_info']['indmap_para']['rate'] = rate
        myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta)
    for fn in allfile:
        print 'Processing %s' % fn
        d = myio.unpickle(iu.fullfile(data_dir, fn))
        ndata = d['data'].shape[-1]
        nparts = 7
        d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata),
                               dtype=np.bool)
        for i in range(ndata):
            jts = d['joints8'][..., i]
            d['indmap'][..., i] = HMLPE.create_part_indicatormap(
                jts, part_idx, mdim, rate, filter_size, stride)
        myio.pickle(iu.fullfile(save_dir, fn), d)
Exemple #4
0
 def generate_positive_data(self, generate_type, allfile=None):
     """
     generate_type = 'rt': random translation
                     'ct'  center block
     """
     if allfile is None:
         allfile = iu.getfilelist(self.imgdata_info['imgdatapath'],
                                  '\w+\.mat')
     print 'imgdatapath=%s, %d files are found' % (
         self.imgdata_info['imgdatapath'], len(allfile))
     iu.ensure_dir(self.savedata_info['savedir'])
     self.batch_id = self.savedata_info['start_patch_id']
     self.init_meta(generate_type)
     print self.meta
     np.random.seed(7)
     for fn in allfile:
         print 'Processing %s ' % fn
         mpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn)
         self.generate_positive_data_from_mat(generate_type,
                                              iu.fullfile(mpath))
     if self.meta['ndata'] > 0:
         self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata']
         self.meta['data_mean'] = self.meta['data_mean'].reshape((-1, 1))
     else:
         self.meta['data_mean'] = 0
     del self.meta['data_sum']
     myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'),
                 self.meta)
Exemple #5
0
def test_addmeta(metadir):
    metapath = iu.fullfile(metadir, 'batches.meta')
    d_meta = myio.unpickle(metapath)
    d_meta['ind_dim'] = dict()
    d_meta['ind_dim']['part_indmap'] = (8, 8)
    d_meta['ind_dim']['joint_indmap'] = (8, 8)
    myio.pickle(metapath, d_meta)
Exemple #6
0
def test_addmeta(metadir):
    metapath = iu.fullfile(metadir, 'batches.meta')
    d_meta = myio.unpickle(metapath)
    d_meta['ind_dim'] = dict()
    d_meta['ind_dim']['part_indmap'] = (8,8)
    d_meta['ind_dim']['joint_indmap'] = (8,8)
    myio.pickle(metapath, d_meta)    
Exemple #7
0
def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride):
    """
    This function is used for generating part indicator map for old data
    data_dir is the directory that you put all batch_datayes
    """
    allfile = iu.getfilelist(data_dir, 'data_batch_\d+')
    meta_path = iu.fullfile(data_dir, 'batches.meta')
    iu.ensure_dir(save_dir)
    if iu.exists(meta_path, 'file'): 
        d_meta = myio.unpickle(meta_path)
        if 'savedata_info' not in d_meta:
            d_meta['savedata_info'] = dict()
            d_meta['savedata_info']['indmap_para'] = dict()
        d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size
        d_meta['savedata_info']['indmap_para']['stride'] = stride
        d_meta['savedata_info']['indmap_para']['rate'] = rate 
        myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta)        
    for fn in allfile:
        print 'Processing %s' % fn
        d = myio.unpickle(iu.fullfile(data_dir, fn))
        ndata = d['data'].shape[-1]
        nparts = 7
        d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata), dtype=np.bool) 
        for i in range(ndata):
            jts = d['joints8'][...,i]
            d['indmap'][...,i] = HMLPE.create_part_indicatormap(jts, part_idx,  mdim, rate, filter_size, stride)
        myio.pickle(iu.fullfile(save_dir, fn), d)
Exemple #8
0
 def generate_data(self, generate_type, allfile = None):
     """
     generate_type = 'rt' only
     """
     if allfile is None:
         allfile = iu.getfilelist( self.imgdata_info['imgdata_path'], '\w+\.mat')
     print 'imgdatapath=%s, %d files are found' % (self.imgdata_info['imgdata_path'], len(allfile))
     iu.ensure_dir(self.savedata_info['savedir'])
     self.batch_id = self.savedata_info['start_patch_id']
     ndata = 0
     self.meta = {'imgdata_info':self.imgdata_info,'savedata_info':self.savedata_info}
     self.meta['num_vis'] = iu.prod(self.savedata_info['newdim'])
     self.meta['data_sum'] = 0
     self.meta['ndata'] = 0
     self.meta['nparts'] = len(part_idx) 
     for fn in allfile:
         if generate_type == 'rt':
             mpath = iu.fullfile(self.imgdata_info['imgdata_path'], fn)
             self.generate_rt_data(iu.fullfile(mpath))
     if self.meta['ndata'] > 0:
         self.meta['data_mean']  = self.meta['data_sum'] / self.meta['ndata']
     del self.meta['data_sum']
     myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
Exemple #9
0
 def generate_positive_data(self, generate_type, allfile = None):
     """
     generate_type = 'rt': random translation
                     'ct'  center block
     """
     if allfile is None:
         allfile = iu.getfilelist( self.imgdata_info['imgdatapath'], '\w+\.mat')
     print 'imgdatapath=%s, %d files are found' % (self.imgdata_info['imgdatapath'], len(allfile))
     iu.ensure_dir(self.savedata_info['savedir'])
     self.batch_id = self.savedata_info['start_patch_id']
     self.init_meta(generate_type)
     print self.meta
     np.random.seed(7)
     for fn in allfile:
         print 'Processing %s ' % fn
         mpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn)
         self.generate_positive_data_from_mat(generate_type ,iu.fullfile(mpath))
     if self.meta['ndata'] > 0:
         self.meta['data_mean']  = self.meta['data_sum'] / self.meta['ndata']
         self.meta['data_mean'] = self.meta['data_mean'].reshape((-1,1))
     else:
         self.meta['data_mean'] = 0
     del self.meta['data_sum']
     myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
Exemple #10
0
def shuffle_data(source_dir, target_dir, max_per_file=4000):
    """
    This function will shuflle all the data in source_dir
    and save it to target_dir
    """
    if source_dir == target_dir:
        raise HMLPEError('source dir can not be the same as target dir')
    import shutil
    import sys
    iu.ensure_dir(target_dir)
    shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \
                iu.fullfile(target_dir, 'batches.meta'))
    meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta'))
    ndata = meta['ndata']
    nbatch = (ndata - 1) / max_per_file + 1
    nparts = meta['nparts']
    njoints = meta['njoints']
    newdim = meta['savedata_info']['newdim']
    filter_size = meta['savedata_info']['indmap_para']['filter_size']
    stride = meta['savedata_info']['indmap_para']['stride']
    joint_filter_size = meta['savedata_info']['indmap_para'][
        'joint_filter_size']
    joint_stride = meta['savedata_info']['indmap_para']['joint_stride']
    mdim = HMLPE.get_indmapdim(newdim, filter_size, stride)
    jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride)
    print('There are %d data in total, I need %d batch to hold it' %
          (ndata, nbatch))
    print 'Begin creating empty files'
    rest = ndata
    d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, max_per_file, nparts, njoints)
    HMLPE.adjust_savebuffer_shape(d)
    for b in range(nbatch):
        cur_n = min(max_per_file, rest)
        if b != nbatch - 1:
            saved = d
        else:
            saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, cur_n, nparts, njoints)
            HMLPE.adjust_savebuffer_shape(saved)
        myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved)
        rest = rest - cur_n
    print 'End creating'
    allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+')
    np.random.seed(7)
    perm = range(ndata)
    np.random.shuffle(perm)
    buf_cap = 12  # store six batch at most
    nround = (nbatch - 1) / buf_cap + 1
    for rd in range(nround):
        print('Round %d of %d' % (rd, nround))
        buf = dict()
        offset = 0
        for fn in allbatchfn:
            print('Processing %s' % fn)
            d = myio.unpickle(iu.fullfile(source_dir, fn))
            cur_n = d['data'].shape[-1]
            for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)):
                sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch))
                sys.stdout.flush()
                sidx = b * max_per_file
                eidx = min(ndata, sidx + max_per_file)
                cur_idx_list = [
                    i for i in range(cur_n)
                    if perm[offset + i] >= sidx and perm[offset + i] < eidx
                ]
                if len(cur_idx_list) == 0:
                    continue
                if not b in buf:
                    dsave = myio.unpickle(
                        iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)))
                    buf[b] = dsave
                else:
                    dsave = buf[b]
                save_idx_list = [perm[x + offset] - sidx for x in cur_idx_list]
                HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list)
                # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave)
            print 'Finished %s' % fn
            offset = offset + cur_n
        for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)):
            myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)),
                        buf[b])
Exemple #11
0
    def generate_positive_data_from_mat(self, generate_type, matpath):
        """
        in each mat
                        mat['X'] is image data
                        mat['Y'] is npart x ndata array
        
        """
        mat = sio.loadmat(matpath)
        dim = mat['dim'][0]
        newdim = self.savedata_info['newdim']
        if newdim[0] > dim[0] or newdim[1] > dim[1]:
            raise HMLPEError('Invalid new size ')
        if self.meta['matdim'] is None:
            self.meta['matdim'] = dim  # record the dimension before sampling
        else:
            if np.any(self.meta['matdim'] != dim):
                raise HMLPEError(
                    'Inconsistent matdim: Previous dim is %s, current mat dim is %s'
                    % (str(self.meta['matdim']), str(dim)))
        ndata = (mat['X'].shape)[1]
        if generate_type in {'rt': 1}:
            sample_num = self.savedata_info['sample_num']
            totaldata = sample_num * ndata * 2
            do_mirror = True
        elif generate_type == 'ct':
            sample_num = 1
            totaldata = sample_num * ndata
            do_mirror = False
        if (dim[0] - newdim[0] + 1) * (dim[1] - newdim[1] + 1) < sample_num:
            raise HMLPEError(' Invalid sample_num')

        nparts = self.meta['nparts']
        self.meta['ndata'] += totaldata

        ### BEGIN COMMENT
        # njoints = self.meta['njoints']
        # if njoints == 8:
        #     dicjtname = 'joints8'
        # else:
        #     #raise HMLPEError('njoints = %d No supported yet' % (njoints))
        #     dicjtname = 'joints'
        # newdim = self.savedata_info['newdim']
        # filter_size = self.savedata_info['indmap_para']['filter_size']
        # stride =  self.savedata_info['indmap_para']['stride']
        # rate = self.savedata_info['indmap_para']['rate']
        # mdim = self.get_indmapdim(newdim, filter_size, stride)

        # if newdim[0] > dim[0] or newdim[1] > dim[1]:
        #     raise HMLPEError('Invalid new size ')
        # if (dim[0] - newdim[0] + 1) * (dim[1] - newdim[1] + 1) < sample_num:
        #     raise HMLPEError(' Invalid sample_num')
        # joint_filter_size = self.savedata_info['indmap_para']['joint_filter_size']
        # joint_stride = self.savedata_info['indmap_para']['joint_stride']
        # jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride)

        ### END COMMENT
        fieldpool = self.get_fieldpool_for_positive_mat_data()
        fieldpool['mat'] = mat
        self.meta['ind_dim']['part_indmap'] = fieldpool['mdim']
        self.meta['ind_dim']['joint_indmap'] = fieldpool['jtmdim']
        res = {}
        per_size = min(totaldata, self.savedata_info['max_batch_size'])

        allX = mat['X'].reshape((dim[0], dim[1], dim[2], ndata), order='F')
        Y2dname = fieldpool['Y2dname']
        allY = mat[Y2dname].reshape((2, -1, ndata), order='F')
        newlen = iu.prod(newdim)
        # prepare data buffer
        res = self.prepare_savebuffer({'data':fieldpool['newdim'], 'part_indmap':fieldpool['mdim'], 'joint_indmap': fieldpool['jtmdim']},\
                                       per_size, self.meta['nparts'],\
                                        self.meta['njoints'])
        tmpres = dict()
        pre_nc = 0
        nc = 0
        res['is_positive'][:] = True
        for it in range(ndata):
            curX = allX[..., it]
            curY = allY[..., it].transpose()
            curfilename = str(
                mat['imagepathlist'][0,
                                     it][0]) if 'imagepathlist' in mat else ''
            mesh = self.create_augumentation_mesh(dim, newdim, generate_type)
            l = (np.random.permutation(range(len(mesh))))[:sample_num]
            fieldpool['matidx'] = it
            fieldpool['curfilename'] = curfilename
            for p in l:
                r, c = mesh[p]
                tmpX = curX
                tmpX = np.roll(tmpX, shift=-int(r), axis=0)
                tmpX = np.roll(tmpX, shift=-int(c), axis=1)
                tmpY = curY - 1 + np.asarray([-c, -r])
                fieldpool['r'] = r
                fieldpool['c'] = c
                ####
                fieldpool['curX'] = tmpX
                fieldpool['Y'] = tmpY

                # tmpX = tmpX[:newdim[0], :newdim[1],:]
                # res['data'][...,nc - pre_nc] = tmpX
                # res[dicjtname][..., nc - pre_nc] = tmpY
                # res['jointmasks'][...,nc - pre_nc] = self.makejointmask(newdim, tmpY)
                # res['filenames'][nc - pre_nc] = curfilename
                # res['oribbox'][...,nc-pre_nc] = mat['oribbox'][...,it]
                # res['indmap'][...,nc-pre_nc] = self.create_part_indicatormap(tmpY, self.meta['savedata_info']['part_idx'], mdim, rate, filter_size, stride)
                # res['joint_indmap'][...,nc-pre_nc] = self.create_joint_indicatormap(tmpY, jtmdim, joint_filter_size, joint_stride)
                # res['joint_sample_offset'][...,nc-pre_nc] = [c, r]
                # res['is_mirror'][...,nc-pre_nc] = False
                self.fill_in_positive_mat_data_to_dic(res, nc - pre_nc, \
                                                      fieldpool, False)
                nc = nc + 1
                if not do_mirror:
                    continue
                #flip image
                tmpX = tmpX[:, ::-1, :]
                tmpY = self.flip_joints(newdim, tmpY)
                fieldpool['curX'] = tmpX
                fieldpool['Y'] = tmpY
                self.fill_in_positive_mat_data_to_dic(res, nc - pre_nc, \
                                                      fieldpool, True)
                # res['data'][...,nc - pre_nc] = tmpX
                # res[dicjtname][...,nc -pre_nc] = tmpY
                # res['jointmasks'][...,nc - pre_nc] = self.makejointmask(newdim, tmpY)
                # res['filenames'][nc - pre_nc] = curfilename

                # res['oribbox'][...,nc-pre_nc] = mat['oribbox'][...,it]
                # res['indmap'][...,nc-pre_nc] = self.create_part_indicatormap(tmpY, part_idx, mdim, rate, filter_size, stride)
                # res['joint_indmap'][...,nc-pre_nc] = self.create_joint_indicatormap(tmpY, jtmdim, joint_filter_size, joint_stride)
                # res['joint_sample_offset'][...,nc-pre_nc] = [c, r]
                # res['is_mirror'][...,nc-pre_nc] = True
                nc = nc + 1
            t = 2 if do_mirror else 1
            if nc - pre_nc + t * sample_num > per_size or nc == totaldata:
                tmpres = self.truncated_copydic(res, nc - pre_nc)
                tmpres['data'] = tmpres['data'].reshape((-1, nc - pre_nc),
                                                        order='F')
                self.meta['data_sum'] = self.meta['data_sum'] + tmpres[
                    'data'].sum(axis=1, dtype=float)
                savepath = iu.fullfile(self.savedata_info['savedir'], \
                                       self.savedata_info['savename'] + \
                                       '_' +  str(self.batch_id))
                myio.pickle(savepath, tmpres)
                self.batch_id = self.batch_id + 1
                pre_nc = nc
Exemple #12
0
    def generate_negative_data_from_image(self, generate_type, allfile=None):
        """
        generate_type = 'neg_sample'
        savedata_info should have 'neg_sample_num':
                      indicating sampling how many negative window per image
        If some image is small, then it will try to generate as much as possible
                      
        """
        import Image
        if allfile is None:
            allfile = iu.getfilelist(self.imgdata_info['imgdatapath'], \
                                     '\w+(\.png|\.jpg|\.pgm|.jpeg)')
        print 'imgdatapath=%s, %d images are found' % (
            self.imgdata_info['imgdatapath'], len(allfile))
        iu.ensure_dir(self.savedata_info['savedir'])
        savedir = self.savedata_info['savedir']
        self.batch_id = self.savedata_info['start_patch_id']
        self.init_meta(generate_type)
        print(self.meta)
        sample_num = self.savedata_info['neg_sample_num']
        totaldata = len(allfile) * sample_num
        self.meta['ndata'] = 0
        newdim = self.savedata_info['newdim']
        nparts = self.meta['nparts']
        njoints = self.meta['njoints']
        if njoints == 8:
            dicjtname = 'joints8'
        else:
            dicjtname = 'joints'
            #raise HMLPEError('njoints = %d are not supported yet' % njoints)
        filter_size = self.savedata_info['indmap_para']['filter_size']
        stride = self.savedata_info['indmap_para']['stride']
        #rate = self.savedata_info['indmap_para']['rate']
        mdim = self.get_indmapdim(newdim, filter_size, stride)
        self.meta['ind_dim']['part_indmap'] = mdim
        joint_filter_size = self.savedata_info['indmap_para'][
            'joint_filter_size']
        joint_stride = self.savedata_info['indmap_para']['joint_stride']
        jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride)
        self.meta['ind_dim']['joint_indmap'] = jtmdim
        per_size = min(totaldata, self.savedata_info['max_batch_size'])
        res = self.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, per_size, nparts, njoints)
        res[dicjtname][:] = 0
        res['jointmasks'][:] = False
        res['indmap'][:] = False
        res['joint_indmap'][:] = False
        res['is_mirror'][:] = False
        res['is_positive'][:] = False
        pre_nc = 0
        nc = 0
        np.random.seed(7)
        for it, fn in enumerate(allfile):
            print('Processing %s' % fn)
            curimgpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn)
            img = np.asarray(Image.open(curimgpath), dtype=np.uint8)
            imgdim = img.shape
            if imgdim[0] < newdim[0] or imgdim[1] < newdim[1]:
                print('small image, ignored')
                continue
            mesh = self.create_augumentation_mesh(imgdim, newdim,
                                                  generate_type)
            ts = min(len(mesh), sample_num)
            l = (np.random.permutation(range(len(mesh))))[:ts]
            for p in l:
                r, c = mesh[p]
                timg = img[r:r + newdim[0], c:c + newdim[0], :]
                res['data'][..., nc - pre_nc] = timg
                res['joint_sample_offset'][..., nc - pre_nc] = [c, r]
                res['filenames'][nc - pre_nc] = curimgpath
                res['oribbox'][..., nc - pre_nc] = [
                    c, r, c + newdim[1] - 1, r + newdim[0] - 1
                ]
                nc = nc + 1
            if sample_num + nc - pre_nc > per_size or it == len(allfile) - 1:
                tmpres = self.truncated_copydic(res, nc - pre_nc)
                tmpres['data'] = tmpres['data'].reshape((-1, nc - pre_nc),
                                                        order='F')
                self.meta['data_sum'] += tmpres['data'].sum(axis=1,
                                                            dtype=float)
                self.meta['ndata'] += nc - pre_nc
                savepath = iu.fullfile(self.savedata_info['savedir'], \
                                       self.savedata_info['savename'] + \
                                       '_' +  str(self.batch_id))
                myio.pickle(savepath, tmpres)
                self.batch_id = self.batch_id + 1
                pre_nc = nc
        if self.meta['ndata'] > 0:
            self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata']
            self.meta['data_mean'] = self.meta['data_mean'].reshape((-1, 1),
                                                                    order='F')
        else:
            self.meta['data_mean'] = 0
        del self.meta['data_sum']

        myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'),
                    self.meta)
Exemple #13
0
def shuffle_data(source_dir, target_dir, max_per_file = 4000):
    """
    This function will shuflle all the data in source_dir
    and save it to target_dir
    """
    if source_dir == target_dir:
        raise HMLPEError('source dir can not be the same as target dir')
    import shutil
    import sys
    iu.ensure_dir( target_dir)
    shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \
                iu.fullfile(target_dir, 'batches.meta'))
    meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta'))
    ndata = meta['ndata']
    nbatch = (ndata  - 1) / max_per_file + 1
    nparts = meta['nparts']
    njoints = meta['njoints']
    newdim = meta['savedata_info']['newdim']
    filter_size = meta['savedata_info']['indmap_para']['filter_size']
    stride = meta['savedata_info']['indmap_para']['stride']
    joint_filter_size = meta['savedata_info']['indmap_para']['joint_filter_size']
    joint_stride = meta['savedata_info']['indmap_para']['joint_stride']
    mdim = HMLPE.get_indmapdim(newdim, filter_size, stride)
    jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride)
    print('There are %d data in total, I need %d batch to hold it' %(ndata, nbatch))
    print 'Begin creating empty files'
    rest = ndata
    d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, max_per_file, nparts, njoints)
    HMLPE.adjust_savebuffer_shape(d)
    for b in range(nbatch):
        cur_n = min(max_per_file, rest)
        if b != nbatch - 1:
            saved = d
        else:
            saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, cur_n, nparts, njoints)
            HMLPE.adjust_savebuffer_shape(saved)
        myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved)
        rest = rest - cur_n
    print 'End creating'
    allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+')
    np.random.seed(7)
    perm = range(ndata)
    np.random.shuffle(perm)
    buf_cap = 12 # store six batch at most
    nround = (nbatch - 1)/buf_cap + 1
    for rd in range(nround):
        print ('Round %d of %d' % (rd,nround))
        buf = dict()
        offset = 0
        for fn in allbatchfn:
            print( 'Processing %s' % fn )
            d = myio.unpickle(iu.fullfile(source_dir, fn))
            cur_n = d['data'].shape[-1]
            for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)):
                sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch))
                sys.stdout.flush() 
                sidx = b * max_per_file
                eidx = min(ndata, sidx + max_per_file)
                cur_idx_list = [i for i in range(cur_n) if perm[offset + i] >= sidx and perm[offset + i] < eidx]
                if len(cur_idx_list) == 0:
                    continue
                if not b in buf:
                    dsave = myio.unpickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)))
                    buf[b] = dsave
                else:
                    dsave = buf[b]
                save_idx_list = [perm[ x + offset] - sidx for x in cur_idx_list]
                HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list)
                # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave)
            print 'Finished %s' % fn
            offset = offset + cur_n
        for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)):
            myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), buf[b])
Exemple #14
0
    def generate_positive_data_from_mat(self, generate_type, matpath):
        """
        in each mat
                        mat['X'] is image data
                        mat['Y'] is npart x ndata array
        
        """
        mat = sio.loadmat(matpath)
        dim = mat['dim'][0]
        newdim = self.savedata_info['newdim']
        if newdim[0] > dim[0] or newdim[1] > dim[1]:
            raise HMLPEError('Invalid new size ')
        if self.meta['matdim'] is None:
            self.meta['matdim'] = dim # record the dimension before sampling
        else:
            if np.any(self.meta['matdim'] != dim):
                raise HMLPEError('Inconsistent matdim: Previous dim is %s, current mat dim is %s' % (str(self.meta['matdim']), str(dim)))
        ndata = (mat['X'].shape)[1]
        if generate_type in {'rt':1}:
            sample_num = self.savedata_info['sample_num']
            totaldata = sample_num * ndata * 2
            do_mirror = True
        elif generate_type == 'ct':
            sample_num  = 1
            totaldata = sample_num * ndata
            do_mirror = False
        if (dim[0] - newdim[0] + 1) * (dim[1] - newdim[1] + 1) < sample_num:
            raise HMLPEError(' Invalid sample_num')
        
        nparts = self.meta['nparts']
        self.meta['ndata'] += totaldata

        ### BEGIN COMMENT
        # njoints = self.meta['njoints']
        # if njoints == 8:
        #     dicjtname = 'joints8'
        # else:
        #     #raise HMLPEError('njoints = %d No supported yet' % (njoints))
        #     dicjtname = 'joints'
        # newdim = self.savedata_info['newdim']
        # filter_size = self.savedata_info['indmap_para']['filter_size']
        # stride =  self.savedata_info['indmap_para']['stride']
        # rate = self.savedata_info['indmap_para']['rate']
        # mdim = self.get_indmapdim(newdim, filter_size, stride)
        

        # if newdim[0] > dim[0] or newdim[1] > dim[1]:
        #     raise HMLPEError('Invalid new size ')
        # if (dim[0] - newdim[0] + 1) * (dim[1] - newdim[1] + 1) < sample_num:
        #     raise HMLPEError(' Invalid sample_num')
        # joint_filter_size = self.savedata_info['indmap_para']['joint_filter_size']
        # joint_stride = self.savedata_info['indmap_para']['joint_stride']
        # jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride)
        
        ### END COMMENT
        fieldpool = self.get_fieldpool_for_positive_mat_data()
        fieldpool['mat'] = mat
        self.meta['ind_dim']['part_indmap'] = fieldpool['mdim']
        self.meta['ind_dim']['joint_indmap'] = fieldpool['jtmdim']
        res = {}
        per_size = min(totaldata, self.savedata_info['max_batch_size'])
        
        allX = mat['X'].reshape( (dim[0], dim[1],dim[2], ndata), order='F')
        Y2dname = fieldpool['Y2dname']
        allY = mat[Y2dname].reshape( (2,-1, ndata), order='F') 
        newlen = iu.prod( newdim )
        # prepare data buffer
        res = self.prepare_savebuffer({'data':fieldpool['newdim'], 'part_indmap':fieldpool['mdim'], 'joint_indmap': fieldpool['jtmdim']},\
                                       per_size, self.meta['nparts'],\
                                        self.meta['njoints'])
        tmpres = dict()
        pre_nc = 0
        nc = 0
        res['is_positive'][:] = True
        for it in range(ndata):
            curX = allX[...,it]
            curY = allY[...,it].transpose()
            curfilename = str(mat['imagepathlist'][0,it][0]) if 'imagepathlist' in mat else ''
            mesh = self.create_augumentation_mesh(dim, newdim, generate_type)
            l = (np.random.permutation(range(len(mesh))))[:sample_num]
            fieldpool['matidx'] = it
            fieldpool['curfilename'] = curfilename
            for p in l:
                r,c = mesh[p]
                tmpX = curX
                tmpX = np.roll(tmpX, shift=-int(r), axis = 0)
                tmpX = np.roll(tmpX, shift=-int(c), axis = 1)
                tmpY = curY - 1 + np.asarray([-c,-r])
                fieldpool['r'] = r
                fieldpool['c'] = c
                ####
                fieldpool['curX'] = tmpX
                fieldpool['Y'] = tmpY
                
                # tmpX = tmpX[:newdim[0], :newdim[1],:]
                # res['data'][...,nc - pre_nc] = tmpX                
                # res[dicjtname][..., nc - pre_nc] = tmpY
                # res['jointmasks'][...,nc - pre_nc] = self.makejointmask(newdim, tmpY)
                # res['filenames'][nc - pre_nc] = curfilename
                # res['oribbox'][...,nc-pre_nc] = mat['oribbox'][...,it]
                # res['indmap'][...,nc-pre_nc] = self.create_part_indicatormap(tmpY, self.meta['savedata_info']['part_idx'], mdim, rate, filter_size, stride)
                # res['joint_indmap'][...,nc-pre_nc] = self.create_joint_indicatormap(tmpY, jtmdim, joint_filter_size, joint_stride)
                # res['joint_sample_offset'][...,nc-pre_nc] = [c, r]
                # res['is_mirror'][...,nc-pre_nc] = False
                self.fill_in_positive_mat_data_to_dic(res, nc - pre_nc, \
                                                      fieldpool, False)
                nc = nc + 1
                if not do_mirror:
                    continue
                #flip image
                tmpX = tmpX[:,::-1,:]
                tmpY = self.flip_joints(newdim, tmpY)
                fieldpool['curX'] = tmpX
                fieldpool['Y'] = tmpY
                self.fill_in_positive_mat_data_to_dic(res, nc - pre_nc, \
                                                      fieldpool, True)
                # res['data'][...,nc - pre_nc] = tmpX
                # res[dicjtname][...,nc -pre_nc] = tmpY
                # res['jointmasks'][...,nc - pre_nc] = self.makejointmask(newdim, tmpY)
                # res['filenames'][nc - pre_nc] = curfilename
                
                # res['oribbox'][...,nc-pre_nc] = mat['oribbox'][...,it]            
                # res['indmap'][...,nc-pre_nc] = self.create_part_indicatormap(tmpY, part_idx, mdim, rate, filter_size, stride)
                # res['joint_indmap'][...,nc-pre_nc] = self.create_joint_indicatormap(tmpY, jtmdim, joint_filter_size, joint_stride)
                # res['joint_sample_offset'][...,nc-pre_nc] = [c, r]
                # res['is_mirror'][...,nc-pre_nc] = True
                nc = nc + 1
            t = 2 if do_mirror else 1
            if nc - pre_nc + t * sample_num > per_size or nc == totaldata:
                tmpres = self.truncated_copydic(res, nc-pre_nc)
                tmpres['data'] = tmpres['data'].reshape((-1,nc-pre_nc),order='F')
                self.meta['data_sum'] = self.meta['data_sum'] + tmpres['data'].sum(axis=1,dtype=float)
                savepath = iu.fullfile(self.savedata_info['savedir'], \
                                       self.savedata_info['savename'] + \
                                       '_' +  str(self.batch_id))
                myio.pickle( savepath, tmpres)       
                self.batch_id = self.batch_id + 1
                pre_nc = nc
Exemple #15
0
    def generate_negative_data_from_image(self, generate_type, allfile=None):
        """
        generate_type = 'neg_sample'
        savedata_info should have 'neg_sample_num':
                      indicating sampling how many negative window per image
        If some image is small, then it will try to generate as much as possible
                      
        """
        import Image
        if allfile is None:
            allfile = iu.getfilelist(self.imgdata_info['imgdatapath'], \
                                     '\w+(\.png|\.jpg|\.pgm|.jpeg)')
        print 'imgdatapath=%s, %d images are found' % (self.imgdata_info['imgdatapath'], len(allfile))
        iu.ensure_dir(self.savedata_info['savedir'])
        savedir = self.savedata_info['savedir']
        self.batch_id = self.savedata_info['start_patch_id']
        self.init_meta(generate_type)
        print(self.meta)
        sample_num = self.savedata_info['neg_sample_num']
        totaldata = len(allfile) * sample_num
        self.meta['ndata'] = 0
        newdim = self.savedata_info['newdim']
        nparts = self.meta['nparts']
        njoints = self.meta['njoints']
        if njoints == 8:
            dicjtname = 'joints8'
        else:
            dicjtname = 'joints'
            #raise HMLPEError('njoints = %d are not supported yet' % njoints)
        filter_size = self.savedata_info['indmap_para']['filter_size']
        stride =  self.savedata_info['indmap_para']['stride']
        #rate = self.savedata_info['indmap_para']['rate']
        mdim = self.get_indmapdim(newdim, filter_size, stride)
        self.meta['ind_dim']['part_indmap'] = mdim
        joint_filter_size = self.savedata_info['indmap_para']['joint_filter_size']
        joint_stride = self.savedata_info['indmap_para']['joint_stride']
        jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride)
        self.meta['ind_dim']['joint_indmap'] = jtmdim
        per_size = min(totaldata, self.savedata_info['max_batch_size'])
        res = self.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, per_size, nparts, njoints)
        res[dicjtname][:] = 0
        res['jointmasks'][:] = False
        res['indmap'][:] = False
        res['joint_indmap'][:] = False
        res['is_mirror'][:] = False
        res['is_positive'][:] = False
        pre_nc = 0
        nc = 0
        np.random.seed(7)
        for it, fn in enumerate(allfile):
            print('Processing %s' % fn)
            curimgpath= iu.fullfile(self.imgdata_info['imgdatapath'], fn)
            img = np.asarray(Image.open(curimgpath), dtype=np.uint8)
            imgdim = img.shape
            if imgdim[0] < newdim[0] or imgdim[1] < newdim[1]:
                print('small image, ignored')
                continue
            mesh = self.create_augumentation_mesh(imgdim, newdim, generate_type)
            ts = min(len(mesh), sample_num)
            l = (np.random.permutation(range(len(mesh))))[:ts]
            for p in l:
                r, c = mesh[p]
                timg = img[r:r+newdim[0],c:c+newdim[0],:]
                res['data'][...,nc-pre_nc] = timg
                res['joint_sample_offset'][...,nc-pre_nc] = [c,r]
                res['filenames'][nc-pre_nc] = curimgpath
                res['oribbox'][...,nc-pre_nc] = [c,r,c+newdim[1]-1,r+newdim[0]-1]
                nc = nc + 1
            if sample_num + nc-pre_nc > per_size or it == len(allfile)-1:
                tmpres = self.truncated_copydic(res, nc-pre_nc)
                tmpres['data'] = tmpres['data'].reshape((-1,nc-pre_nc),order='F')
                self.meta['data_sum'] += tmpres['data'].sum(axis=1,dtype=float)
                self.meta['ndata'] += nc - pre_nc
                savepath = iu.fullfile(self.savedata_info['savedir'], \
                                       self.savedata_info['savename'] + \
                                       '_' +  str(self.batch_id))
                myio.pickle(savepath, tmpres)
                self.batch_id = self.batch_id + 1
                pre_nc = nc
        if self.meta['ndata'] > 0:
            self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata']
            self.meta['data_mean'] = self.meta['data_mean'].reshape((-1,1),order='F')
        else:
            self.meta['data_mean'] = 0
        del self.meta['data_sum']

        myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)