Exemple #1
0
def collect_feature_meta(folder, re_exp='batch_feature_\w+$'):
    allfile = sorted(iu.getfilelist(folder, re_exp), key=lambda x:extract_batch_num(x))
    feature_list_lst = []
    feature_dim = None
    indexes_lst = []
    feature_names = None
    if len(allfile) == 0:
        return dict()
    for f in allfile:
        print f
        p =  iu.fullfile(folder, f)
        d = mio.unpickle(p)
        feature_list_lst += [d['feature_list']]
        if feature_dim:
            if feature_dim!= d['feature_dim']:
                raise Exception('feature dim inconsistent')
        else:
            feature_dim = d['feature_dim']
        indexes_lst += [d['info']['indexes']]
    indexes = np.concatenate(indexes_lst)
    n_feature, n_batch = len(feature_dim), len(allfile)
    feature_list = [np.concatenate([feature_list_lst[i][k] for i in range(n_batch)],
                                   axis=-1)
                    for k in range(n_feature)]
    return {'feature_list':feature_list, 'feature_dim':feature_dim, 'info':{'indexes':indexes,
                                                                        'feature_names':d['info']['feature_names']}}
Exemple #2
0
def MakeDataFromImages(imgdir, max_per_batch , save_dir = None, save_name=None):
    import iutils as iu
    import iconvnet_datacvt as icvt
    from PIL import Image
    if max_per_batch == 0:
        raise CifarError('max_per_batch can''t not be zero')
    allfiles = iu.getfilelist(imgdir, '.*jpg|.*bmp|.*png$')
    ndata = len(allfiles)
    iu.ensure_dir(save_dir)
    d = PrepareData(min(max_per_batch, ndata))
    j = 0
    if save_name is None:
        save_name = 'data_batch'
    bid = 1
    for i,fn in enumerate(allfiles):
        if j == max_per_batch:
            j = 0
            if not save_dir is None:
                icvt.ut.pickle(iu.fullfile(save_dir, save_name + '_' + str(bid)), d)
                bid = bid + 1 
            if ndata - i < max_per_batch:
                d = PrepareData(ndata-i)
        fp = iu.fullfile(imgdir, fn)
        
        img = iu.imgproc.ensure_rgb(np.asarray(Image.open(fp)))
        img = Image.fromarray(img).resize((img_size[0],img_size[1]))
        arr_img = np.asarray(img).reshape((dim_data), order='F')
        d['data'][...,j] = arr_img
        j = j + 1
    if not save_dir is None:
        icvt.ut.pickle(iu.fullfile(save_dir, save_name + '_' + str(bid)), d)         
Exemple #3
0
def MakeDataFromImages(imgdir, max_per_batch, save_dir=None, save_name=None):
    import iutils as iu
    import iconvnet_datacvt as icvt
    from PIL import Image
    if max_per_batch == 0:
        raise CifarError('max_per_batch can' 't not be zero')
    allfiles = iu.getfilelist(imgdir, '.*jpg|.*bmp|.*png$')
    ndata = len(allfiles)
    iu.ensure_dir(save_dir)
    d = PrepareData(min(max_per_batch, ndata))
    j = 0
    if save_name is None:
        save_name = 'data_batch'
    bid = 1
    for i, fn in enumerate(allfiles):
        if j == max_per_batch:
            j = 0
            if not save_dir is None:
                icvt.ut.pickle(
                    iu.fullfile(save_dir, save_name + '_' + str(bid)), d)
                bid = bid + 1
            if ndata - i < max_per_batch:
                d = PrepareData(ndata - i)
        fp = iu.fullfile(imgdir, fn)

        img = iu.imgproc.ensure_rgb(np.asarray(Image.open(fp)))
        img = Image.fromarray(img).resize((img_size[0], img_size[1]))
        arr_img = np.asarray(img).reshape((dim_data), order='F')
        d['data'][..., j] = arr_img
        j = j + 1
    if not save_dir is None:
        icvt.ut.pickle(iu.fullfile(save_dir, save_name + '_' + str(bid)), d)
Exemple #4
0
def merge_batch_data(data_dir_list, save_dir, is_symbolic = True, batch_start_num = 1):
    """
    This function will merge all the data_batches in data_dir into one folder
     and rename them accordining.
       Of cause, meta data will be updated 
    """
    import os
    import shutil
    iu.ensure_dir(save_dir)
    meta = None
    for ddir in data_dir_list:
        cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta'))    
        meta = HMLPE.merge_meta(meta, cur_meta)

    myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta)
    cur_id = batch_start_num
    for ddir in data_dir_list:
        all_file = iu.getfilelist(ddir, 'data_batch_\d+')
        print 'I find %d batches in %s' % (len(all_file), ddir)
        if is_symbolic:
            for fn in all_file:
                sn = iu.fullfile(save_dir, 'data_batch_%d' %  cur_id)
                if iu.exists(sn, 'file'):
                    os.remove(sn)
                os.symlink(iu.fullfile(ddir, fn), sn)
                cur_id = cur_id + 1
        else:
            for fn in all_file:
                shutil.copyfile(iu.fullfile(ddir, fn), iu.fullfile(save_dir, 'data_batch_%d' %  cur_id))
                cur_id = cur_id + 1
Exemple #5
0
 def generate_positive_data(self, generate_type, allfile=None):
     """
     generate_type = 'rt': random translation
                     'ct'  center block
     """
     if allfile is None:
         allfile = iu.getfilelist(self.imgdata_info['imgdatapath'],
                                  '\w+\.mat')
     print 'imgdatapath=%s, %d files are found' % (
         self.imgdata_info['imgdatapath'], len(allfile))
     iu.ensure_dir(self.savedata_info['savedir'])
     self.batch_id = self.savedata_info['start_patch_id']
     self.init_meta(generate_type)
     print self.meta
     np.random.seed(7)
     for fn in allfile:
         print 'Processing %s ' % fn
         mpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn)
         self.generate_positive_data_from_mat(generate_type,
                                              iu.fullfile(mpath))
     if self.meta['ndata'] > 0:
         self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata']
         self.meta['data_mean'] = self.meta['data_mean'].reshape((-1, 1))
     else:
         self.meta['data_mean'] = 0
     del self.meta['data_sum']
     myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'),
                 self.meta)
Exemple #6
0
def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride):
    """
    This function is used for generating part indicator map for old data
    data_dir is the directory that you put all batch_datayes
    """
    allfile = iu.getfilelist(data_dir, 'data_batch_\d+')
    meta_path = iu.fullfile(data_dir, 'batches.meta')
    iu.ensure_dir(save_dir)
    if iu.exists(meta_path, 'file'): 
        d_meta = myio.unpickle(meta_path)
        if 'savedata_info' not in d_meta:
            d_meta['savedata_info'] = dict()
            d_meta['savedata_info']['indmap_para'] = dict()
        d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size
        d_meta['savedata_info']['indmap_para']['stride'] = stride
        d_meta['savedata_info']['indmap_para']['rate'] = rate 
        myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta)        
    for fn in allfile:
        print 'Processing %s' % fn
        d = myio.unpickle(iu.fullfile(data_dir, fn))
        ndata = d['data'].shape[-1]
        nparts = 7
        d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata), dtype=np.bool) 
        for i in range(ndata):
            jts = d['joints8'][...,i]
            d['indmap'][...,i] = HMLPE.create_part_indicatormap(jts, part_idx,  mdim, rate, filter_size, stride)
        myio.pickle(iu.fullfile(save_dir, fn), d)
Exemple #7
0
def add_part_indicatormap(data_dir, save_dir, mdim, rate, filter_size, stride):
    """
    This function is used for generating part indicator map for old data
    data_dir is the directory that you put all batch_datayes
    """
    allfile = iu.getfilelist(data_dir, 'data_batch_\d+')
    meta_path = iu.fullfile(data_dir, 'batches.meta')
    iu.ensure_dir(save_dir)
    if iu.exists(meta_path, 'file'):
        d_meta = myio.unpickle(meta_path)
        if 'savedata_info' not in d_meta:
            d_meta['savedata_info'] = dict()
            d_meta['savedata_info']['indmap_para'] = dict()
        d_meta['savedata_info']['indmap_para']['filter_size'] = filter_size
        d_meta['savedata_info']['indmap_para']['stride'] = stride
        d_meta['savedata_info']['indmap_para']['rate'] = rate
        myio.pickle(iu.fullfile(save_dir, 'batches.meta'), d_meta)
    for fn in allfile:
        print 'Processing %s' % fn
        d = myio.unpickle(iu.fullfile(data_dir, fn))
        ndata = d['data'].shape[-1]
        nparts = 7
        d['indmap'] = np.zeros((nparts, mdim[0], mdim[1], ndata),
                               dtype=np.bool)
        for i in range(ndata):
            jts = d['joints8'][..., i]
            d['indmap'][..., i] = HMLPE.create_part_indicatormap(
                jts, part_idx, mdim, rate, filter_size, stride)
        myio.pickle(iu.fullfile(save_dir, fn), d)
Exemple #8
0
def merge_batch_data(data_dir_list,
                     save_dir,
                     is_symbolic=True,
                     batch_start_num=1):
    """
    This function will merge all the data_batches in data_dir into one folder
     and rename them accordining.
       Of cause, meta data will be updated 
    """
    import os
    import shutil
    iu.ensure_dir(save_dir)
    meta = None
    for ddir in data_dir_list:
        cur_meta = myio.unpickle(iu.fullfile(ddir, 'batches.meta'))
        meta = HMLPE.merge_meta(meta, cur_meta)

    myio.pickle(iu.fullfile(save_dir, 'batches.meta'), meta)
    cur_id = batch_start_num
    for ddir in data_dir_list:
        all_file = iu.getfilelist(ddir, 'data_batch_\d+')
        print 'I find %d batches in %s' % (len(all_file), ddir)
        if is_symbolic:
            for fn in all_file:
                sn = iu.fullfile(save_dir, 'data_batch_%d' % cur_id)
                if iu.exists(sn, 'file'):
                    os.remove(sn)
                os.symlink(iu.fullfile(ddir, fn), sn)
                cur_id = cur_id + 1
        else:
            for fn in all_file:
                shutil.copyfile(
                    iu.fullfile(ddir, fn),
                    iu.fullfile(save_dir, 'data_batch_%d' % cur_id))
                cur_id = cur_id + 1
Exemple #9
0
def collect_feature(folder, item, re_exp='batch_feature_\w+$'):
    allfile = sorted(iu.getfilelist(folder, re_exp), key=lambda x:extract_batch_num(x))
    l = []
    for f in allfile:
        p =  iu.fullfile(folder, f)
        d = mio.unpickle(p)
        l = l + [d[item]]
    return np.concatenate(l, axis=1)
 def __init__(self, imgdir):
     self.Image = __import__('Image')
     self.imgdir = imgdir
     self.cur_idx = -1
     self.images_path = [iu.fullfile(imgdir, x) for x in \
                         sorted(iu.getfilelist(imgdir, '.*\.(jpg|png)'))]
     if len(self.images_path) == 0:
         raise DemoError('I cannot find image uder %s ' % self.images_path)
     print 'I got %d images' % len(self.images_path)
     ICameraBasic.__init__(self)
 def __init__(self, imgdir):
     self.Image = __import__('Image')
     self.imgdir = imgdir
     self.cur_idx = -1
     self.images_path = [iu.fullfile(imgdir, x) for x in \
                         sorted(iu.getfilelist(imgdir, '.*\.(jpg|png)'))]
     if len(self.images_path) == 0:
         raise DemoError('I cannot find image uder %s ' % self.images_path)
     print 'I got %d images' % len(self.images_path)   
     ICameraBasic.__init__(self)
Exemple #12
0
    def do_accveval(self):
        images_folder = self.op.get_value('images_folder')
        # get all jpg file in images_folder
        allfiles = iu.getfilelist(images_folder, '.*\.jpg')
        images_path = [iu.fullfile(images_folder, p) for p in allfiles]
        n_image = len(images_path)
        images = self.load_images(images_path)
        mean_image_path = self.op.get_value('mean_image_path')
        mean_image = sio.loadmat(mean_image_path)['cropped_mean_image']
        mean_image_arr = mean_image.reshape((-1, 1), order='F')
        input_images = images - mean_image_arr
        # pack input images into batch data
        data = [
            input_images,
            np.zeros((51, n_image), dtype=np.single),
            np.zeros((1700, n_image), dtype=np.single)
        ]
        # allocate the buffer for prediction
        pred_buffer = np.zeros((n_image, 51), dtype=np.single)
        data.append(pred_buffer)

        ext_data = [
            np.require(elem, dtype=np.single, requirements='C')
            for elem in data
        ]
        # run the model
        ## get the joint prediction layer indexes
        self.pred_layer_idx = self.get_layer_idx('fc_j2', check_type='fc')
        self.libmodel.startFeatureWriter(ext_data, self.pred_layer_idx)
        self.finish_batch()

        raw_pred = ext_data[-1].T
        pred = dhmlpe_features.convert_relskel2rel(raw_pred) * 1200.0

        # show the first prediction
        show_idx = 0
        img = np.array(Image.open(images_path[show_idx]))
        fig = pl.figure(0)
        ax1 = fig.add_subplot(121)
        ax1.imshow(img)
        ax2 = fig.add_subplot(122, projection='3d')
        cur_pred = pred[..., show_idx].reshape((3, -1), order='F')
        part_idx = iread.h36m_hmlpe.part_idx
        params = {'elev': -94, 'azim': -86, 'linewidth': 6, 'order': 'z'}
        dutils.show_3d_skeleton(cur_pred.T, part_idx, params)
Exemple #13
0
    def do_accveval(self):
        images_folder = self.op.get_value('images_folder')
        # get all jpg file in images_folder
        allfiles = iu.getfilelist(images_folder, '.*\.jpg')
        images_path = [iu.fullfile(images_folder, p) for p in allfiles]
        n_image = len(images_path)
        images = self.load_images(images_path)
        mean_image_path = self.op.get_value('mean_image_path')
        mean_image = sio.loadmat(mean_image_path)['cropped_mean_image']
        mean_image_arr = mean_image.reshape((-1,1),order='F')
        input_images = images - mean_image_arr
        # pack input images into batch data
        data = [input_images, np.zeros((51,n_image),dtype=np.single),
                np.zeros((1700,n_image), dtype=np.single)]
        # allocate the buffer for prediction
        pred_buffer = np.zeros((n_image, 51),dtype=np.single)
        data.append(pred_buffer)

        ext_data = [np.require(elem,dtype=np.single, requirements='C') for elem in data]
        # run the model
        ## get the joint prediction layer indexes
        self.pred_layer_idx = self.get_layer_idx('fc_j2',check_type='fc')
        self.libmodel.startFeatureWriter(ext_data, self.pred_layer_idx)
        self.finish_batch()

        raw_pred = ext_data[-1].T
        pred = dhmlpe_features.convert_relskel2rel(raw_pred) * 1200.0

        # show the first prediction
        show_idx = 0
        img = np.array(Image.open(images_path[show_idx]))
        fig = pl.figure(0)
        ax1 = fig.add_subplot(121)
        ax1.imshow(img)
        ax2 = fig.add_subplot(122,projection='3d')
        cur_pred = pred[..., show_idx].reshape((3,-1),order='F')
        part_idx = iread.h36m_hmlpe.part_idx
        params =  {'elev':-94, 'azim':-86, 'linewidth':6, 'order':'z'}
        dutils.show_3d_skeleton(cur_pred.T, part_idx, params)
Exemple #14
0
 def generate_data(self, generate_type, allfile = None):
     """
     generate_type = 'rt' only
     """
     if allfile is None:
         allfile = iu.getfilelist( self.imgdata_info['imgdata_path'], '\w+\.mat')
     print 'imgdatapath=%s, %d files are found' % (self.imgdata_info['imgdata_path'], len(allfile))
     iu.ensure_dir(self.savedata_info['savedir'])
     self.batch_id = self.savedata_info['start_patch_id']
     ndata = 0
     self.meta = {'imgdata_info':self.imgdata_info,'savedata_info':self.savedata_info}
     self.meta['num_vis'] = iu.prod(self.savedata_info['newdim'])
     self.meta['data_sum'] = 0
     self.meta['ndata'] = 0
     self.meta['nparts'] = len(part_idx) 
     for fn in allfile:
         if generate_type == 'rt':
             mpath = iu.fullfile(self.imgdata_info['imgdata_path'], fn)
             self.generate_rt_data(iu.fullfile(mpath))
     if self.meta['ndata'] > 0:
         self.meta['data_mean']  = self.meta['data_sum'] / self.meta['ndata']
     del self.meta['data_sum']
     myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
def process(op):
    data_folder = op.get_value('load_file')
    save_path = op.get_value('save_path')
    # data_folder = '/public/sijinli2/ibuffer/2015-01-16/net2_test_for_stat_2000'
    all_files = iu.getfilelist(data_folder, '\d+@\d+$')
    print all_files
    d = mio.unpickle(iu.fullfile(data_folder, all_files[0]))
    ms = d['model_state']
    if op.get_value('cost_name') is not None:
        cost_names = op.get_value('cost_name').split(',')
        n_cost = len(cost_name)
    else:
        n_cost = len(d['solver_params']['train_error'][0])
        cost_names = d['solver_params']['train_error'][0].keys()
    print 'Start to plot'
    start_time = time()
    for i in range(n_cost):
        pl.subplot(n_cost, 1, i + 1)
        plot_cost(op, d, cost_names[i])
    print 'Cost {} seconds '.format(time()- start_time)
    if save_path:
        imgproc.imsave_tight(save_path)
    pl.show()
Exemple #16
0
def process(op):
    data_folder = op.get_value('load_file')
    save_path = op.get_value('save_path')
    # data_folder = '/public/sijinli2/ibuffer/2015-01-16/net2_test_for_stat_2000'
    all_files = iu.getfilelist(data_folder, '\d+@\d+$')
    print all_files
    d = mio.unpickle(iu.fullfile(data_folder, all_files[0]))
    ms = d['model_state']
    if op.get_value('cost_name') is not None:
        cost_names = op.get_value('cost_name').split(',')
        n_cost = len(cost_name)
    else:
        n_cost = len(d['solver_params']['train_error'][0])
        cost_names = d['solver_params']['train_error'][0].keys()
    print 'Start to plot'
    start_time = time()
    for i in range(n_cost):
        pl.subplot(n_cost, 1, i + 1)
        plot_cost(op, d, cost_names[i])
    print 'Cost {} seconds '.format(time() - start_time)
    if save_path:
        imgproc.imsave_tight(save_path)
    pl.show()
Exemple #17
0
 def generate_positive_data(self, generate_type, allfile = None):
     """
     generate_type = 'rt': random translation
                     'ct'  center block
     """
     if allfile is None:
         allfile = iu.getfilelist( self.imgdata_info['imgdatapath'], '\w+\.mat')
     print 'imgdatapath=%s, %d files are found' % (self.imgdata_info['imgdatapath'], len(allfile))
     iu.ensure_dir(self.savedata_info['savedir'])
     self.batch_id = self.savedata_info['start_patch_id']
     self.init_meta(generate_type)
     print self.meta
     np.random.seed(7)
     for fn in allfile:
         print 'Processing %s ' % fn
         mpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn)
         self.generate_positive_data_from_mat(generate_type ,iu.fullfile(mpath))
     if self.meta['ndata'] > 0:
         self.meta['data_mean']  = self.meta['data_sum'] / self.meta['ndata']
         self.meta['data_mean'] = self.meta['data_mean'].reshape((-1,1))
     else:
         self.meta['data_mean'] = 0
     del self.meta['data_sum']
     myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
Exemple #18
0
def shuffle_data(source_dir, target_dir, max_per_file=4000):
    """
    This function will shuflle all the data in source_dir
    and save it to target_dir
    """
    if source_dir == target_dir:
        raise HMLPEError('source dir can not be the same as target dir')
    import shutil
    import sys
    iu.ensure_dir(target_dir)
    shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \
                iu.fullfile(target_dir, 'batches.meta'))
    meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta'))
    ndata = meta['ndata']
    nbatch = (ndata - 1) / max_per_file + 1
    nparts = meta['nparts']
    njoints = meta['njoints']
    newdim = meta['savedata_info']['newdim']
    filter_size = meta['savedata_info']['indmap_para']['filter_size']
    stride = meta['savedata_info']['indmap_para']['stride']
    joint_filter_size = meta['savedata_info']['indmap_para'][
        'joint_filter_size']
    joint_stride = meta['savedata_info']['indmap_para']['joint_stride']
    mdim = HMLPE.get_indmapdim(newdim, filter_size, stride)
    jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride)
    print('There are %d data in total, I need %d batch to hold it' %
          (ndata, nbatch))
    print 'Begin creating empty files'
    rest = ndata
    d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, max_per_file, nparts, njoints)
    HMLPE.adjust_savebuffer_shape(d)
    for b in range(nbatch):
        cur_n = min(max_per_file, rest)
        if b != nbatch - 1:
            saved = d
        else:
            saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, cur_n, nparts, njoints)
            HMLPE.adjust_savebuffer_shape(saved)
        myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved)
        rest = rest - cur_n
    print 'End creating'
    allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+')
    np.random.seed(7)
    perm = range(ndata)
    np.random.shuffle(perm)
    buf_cap = 12  # store six batch at most
    nround = (nbatch - 1) / buf_cap + 1
    for rd in range(nround):
        print('Round %d of %d' % (rd, nround))
        buf = dict()
        offset = 0
        for fn in allbatchfn:
            print('Processing %s' % fn)
            d = myio.unpickle(iu.fullfile(source_dir, fn))
            cur_n = d['data'].shape[-1]
            for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)):
                sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch))
                sys.stdout.flush()
                sidx = b * max_per_file
                eidx = min(ndata, sidx + max_per_file)
                cur_idx_list = [
                    i for i in range(cur_n)
                    if perm[offset + i] >= sidx and perm[offset + i] < eidx
                ]
                if len(cur_idx_list) == 0:
                    continue
                if not b in buf:
                    dsave = myio.unpickle(
                        iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)))
                    buf[b] = dsave
                else:
                    dsave = buf[b]
                save_idx_list = [perm[x + offset] - sidx for x in cur_idx_list]
                HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list)
                # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave)
            print 'Finished %s' % fn
            offset = offset + cur_n
        for b in range(rd * buf_cap, min(nbatch, (rd + 1) * buf_cap)):
            myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)),
                        buf[b])
Exemple #19
0
    def generate_negative_data_from_image(self, generate_type, allfile=None):
        """
        generate_type = 'neg_sample'
        savedata_info should have 'neg_sample_num':
                      indicating sampling how many negative window per image
        If some image is small, then it will try to generate as much as possible
                      
        """
        import Image
        if allfile is None:
            allfile = iu.getfilelist(self.imgdata_info['imgdatapath'], \
                                     '\w+(\.png|\.jpg|\.pgm|.jpeg)')
        print 'imgdatapath=%s, %d images are found' % (self.imgdata_info['imgdatapath'], len(allfile))
        iu.ensure_dir(self.savedata_info['savedir'])
        savedir = self.savedata_info['savedir']
        self.batch_id = self.savedata_info['start_patch_id']
        self.init_meta(generate_type)
        print(self.meta)
        sample_num = self.savedata_info['neg_sample_num']
        totaldata = len(allfile) * sample_num
        self.meta['ndata'] = 0
        newdim = self.savedata_info['newdim']
        nparts = self.meta['nparts']
        njoints = self.meta['njoints']
        if njoints == 8:
            dicjtname = 'joints8'
        else:
            dicjtname = 'joints'
            #raise HMLPEError('njoints = %d are not supported yet' % njoints)
        filter_size = self.savedata_info['indmap_para']['filter_size']
        stride =  self.savedata_info['indmap_para']['stride']
        #rate = self.savedata_info['indmap_para']['rate']
        mdim = self.get_indmapdim(newdim, filter_size, stride)
        self.meta['ind_dim']['part_indmap'] = mdim
        joint_filter_size = self.savedata_info['indmap_para']['joint_filter_size']
        joint_stride = self.savedata_info['indmap_para']['joint_stride']
        jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride)
        self.meta['ind_dim']['joint_indmap'] = jtmdim
        per_size = min(totaldata, self.savedata_info['max_batch_size'])
        res = self.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, per_size, nparts, njoints)
        res[dicjtname][:] = 0
        res['jointmasks'][:] = False
        res['indmap'][:] = False
        res['joint_indmap'][:] = False
        res['is_mirror'][:] = False
        res['is_positive'][:] = False
        pre_nc = 0
        nc = 0
        np.random.seed(7)
        for it, fn in enumerate(allfile):
            print('Processing %s' % fn)
            curimgpath= iu.fullfile(self.imgdata_info['imgdatapath'], fn)
            img = np.asarray(Image.open(curimgpath), dtype=np.uint8)
            imgdim = img.shape
            if imgdim[0] < newdim[0] or imgdim[1] < newdim[1]:
                print('small image, ignored')
                continue
            mesh = self.create_augumentation_mesh(imgdim, newdim, generate_type)
            ts = min(len(mesh), sample_num)
            l = (np.random.permutation(range(len(mesh))))[:ts]
            for p in l:
                r, c = mesh[p]
                timg = img[r:r+newdim[0],c:c+newdim[0],:]
                res['data'][...,nc-pre_nc] = timg
                res['joint_sample_offset'][...,nc-pre_nc] = [c,r]
                res['filenames'][nc-pre_nc] = curimgpath
                res['oribbox'][...,nc-pre_nc] = [c,r,c+newdim[1]-1,r+newdim[0]-1]
                nc = nc + 1
            if sample_num + nc-pre_nc > per_size or it == len(allfile)-1:
                tmpres = self.truncated_copydic(res, nc-pre_nc)
                tmpres['data'] = tmpres['data'].reshape((-1,nc-pre_nc),order='F')
                self.meta['data_sum'] += tmpres['data'].sum(axis=1,dtype=float)
                self.meta['ndata'] += nc - pre_nc
                savepath = iu.fullfile(self.savedata_info['savedir'], \
                                       self.savedata_info['savename'] + \
                                       '_' +  str(self.batch_id))
                myio.pickle(savepath, tmpres)
                self.batch_id = self.batch_id + 1
                pre_nc = nc
        if self.meta['ndata'] > 0:
            self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata']
            self.meta['data_mean'] = self.meta['data_mean'].reshape((-1,1),order='F')
        else:
            self.meta['data_mean'] = 0
        del self.meta['data_sum']

        myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'), self.meta)
Exemple #20
0
def shuffle_data(source_dir, target_dir, max_per_file = 4000):
    """
    This function will shuflle all the data in source_dir
    and save it to target_dir
    """
    if source_dir == target_dir:
        raise HMLPEError('source dir can not be the same as target dir')
    import shutil
    import sys
    iu.ensure_dir( target_dir)
    shutil.copy(iu.fullfile(source_dir, 'batches.meta'), \
                iu.fullfile(target_dir, 'batches.meta'))
    meta = myio.unpickle(iu.fullfile(source_dir, 'batches.meta'))
    ndata = meta['ndata']
    nbatch = (ndata  - 1) / max_per_file + 1
    nparts = meta['nparts']
    njoints = meta['njoints']
    newdim = meta['savedata_info']['newdim']
    filter_size = meta['savedata_info']['indmap_para']['filter_size']
    stride = meta['savedata_info']['indmap_para']['stride']
    joint_filter_size = meta['savedata_info']['indmap_para']['joint_filter_size']
    joint_stride = meta['savedata_info']['indmap_para']['joint_stride']
    mdim = HMLPE.get_indmapdim(newdim, filter_size, stride)
    jtmdim = HMLPE.get_indmapdim(newdim, joint_filter_size, joint_stride)
    print('There are %d data in total, I need %d batch to hold it' %(ndata, nbatch))
    print 'Begin creating empty files'
    rest = ndata
    d = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, max_per_file, nparts, njoints)
    HMLPE.adjust_savebuffer_shape(d)
    for b in range(nbatch):
        cur_n = min(max_per_file, rest)
        if b != nbatch - 1:
            saved = d
        else:
            saved = HMLPE.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, cur_n, nparts, njoints)
            HMLPE.adjust_savebuffer_shape(saved)
        myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b + 1)), saved)
        rest = rest - cur_n
    print 'End creating'
    allbatchfn = iu.getfilelist(source_dir, 'data_batch_\d+')
    np.random.seed(7)
    perm = range(ndata)
    np.random.shuffle(perm)
    buf_cap = 12 # store six batch at most
    nround = (nbatch - 1)/buf_cap + 1
    for rd in range(nround):
        print ('Round %d of %d' % (rd,nround))
        buf = dict()
        offset = 0
        for fn in allbatchfn:
            print( 'Processing %s' % fn )
            d = myio.unpickle(iu.fullfile(source_dir, fn))
            cur_n = d['data'].shape[-1]
            for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)):
                sys.stdout.write('\rpadding %d of %d' % (b + 1, nbatch))
                sys.stdout.flush() 
                sidx = b * max_per_file
                eidx = min(ndata, sidx + max_per_file)
                cur_idx_list = [i for i in range(cur_n) if perm[offset + i] >= sidx and perm[offset + i] < eidx]
                if len(cur_idx_list) == 0:
                    continue
                if not b in buf:
                    dsave = myio.unpickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)))
                    buf[b] = dsave
                else:
                    dsave = buf[b]
                save_idx_list = [perm[ x + offset] - sidx for x in cur_idx_list]
                HMLPE.selective_copydic(d, dsave, cur_idx_list, save_idx_list)
                # myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), dsave)
            print 'Finished %s' % fn
            offset = offset + cur_n
        for b in range(rd * buf_cap, min(nbatch, (rd+1)*buf_cap)):
            myio.pickle(iu.fullfile(target_dir, 'data_batch_%d' % (b+1)), buf[b])
Exemple #21
0
    def generate_negative_data_from_image(self, generate_type, allfile=None):
        """
        generate_type = 'neg_sample'
        savedata_info should have 'neg_sample_num':
                      indicating sampling how many negative window per image
        If some image is small, then it will try to generate as much as possible
                      
        """
        import Image
        if allfile is None:
            allfile = iu.getfilelist(self.imgdata_info['imgdatapath'], \
                                     '\w+(\.png|\.jpg|\.pgm|.jpeg)')
        print 'imgdatapath=%s, %d images are found' % (
            self.imgdata_info['imgdatapath'], len(allfile))
        iu.ensure_dir(self.savedata_info['savedir'])
        savedir = self.savedata_info['savedir']
        self.batch_id = self.savedata_info['start_patch_id']
        self.init_meta(generate_type)
        print(self.meta)
        sample_num = self.savedata_info['neg_sample_num']
        totaldata = len(allfile) * sample_num
        self.meta['ndata'] = 0
        newdim = self.savedata_info['newdim']
        nparts = self.meta['nparts']
        njoints = self.meta['njoints']
        if njoints == 8:
            dicjtname = 'joints8'
        else:
            dicjtname = 'joints'
            #raise HMLPEError('njoints = %d are not supported yet' % njoints)
        filter_size = self.savedata_info['indmap_para']['filter_size']
        stride = self.savedata_info['indmap_para']['stride']
        #rate = self.savedata_info['indmap_para']['rate']
        mdim = self.get_indmapdim(newdim, filter_size, stride)
        self.meta['ind_dim']['part_indmap'] = mdim
        joint_filter_size = self.savedata_info['indmap_para'][
            'joint_filter_size']
        joint_stride = self.savedata_info['indmap_para']['joint_stride']
        jtmdim = self.get_indmapdim(newdim, joint_filter_size, joint_stride)
        self.meta['ind_dim']['joint_indmap'] = jtmdim
        per_size = min(totaldata, self.savedata_info['max_batch_size'])
        res = self.prepare_savebuffer({'data':newdim, 'part_indmap':mdim, \
                                       'joint_indmap': jtmdim}, per_size, nparts, njoints)
        res[dicjtname][:] = 0
        res['jointmasks'][:] = False
        res['indmap'][:] = False
        res['joint_indmap'][:] = False
        res['is_mirror'][:] = False
        res['is_positive'][:] = False
        pre_nc = 0
        nc = 0
        np.random.seed(7)
        for it, fn in enumerate(allfile):
            print('Processing %s' % fn)
            curimgpath = iu.fullfile(self.imgdata_info['imgdatapath'], fn)
            img = np.asarray(Image.open(curimgpath), dtype=np.uint8)
            imgdim = img.shape
            if imgdim[0] < newdim[0] or imgdim[1] < newdim[1]:
                print('small image, ignored')
                continue
            mesh = self.create_augumentation_mesh(imgdim, newdim,
                                                  generate_type)
            ts = min(len(mesh), sample_num)
            l = (np.random.permutation(range(len(mesh))))[:ts]
            for p in l:
                r, c = mesh[p]
                timg = img[r:r + newdim[0], c:c + newdim[0], :]
                res['data'][..., nc - pre_nc] = timg
                res['joint_sample_offset'][..., nc - pre_nc] = [c, r]
                res['filenames'][nc - pre_nc] = curimgpath
                res['oribbox'][..., nc - pre_nc] = [
                    c, r, c + newdim[1] - 1, r + newdim[0] - 1
                ]
                nc = nc + 1
            if sample_num + nc - pre_nc > per_size or it == len(allfile) - 1:
                tmpres = self.truncated_copydic(res, nc - pre_nc)
                tmpres['data'] = tmpres['data'].reshape((-1, nc - pre_nc),
                                                        order='F')
                self.meta['data_sum'] += tmpres['data'].sum(axis=1,
                                                            dtype=float)
                self.meta['ndata'] += nc - pre_nc
                savepath = iu.fullfile(self.savedata_info['savedir'], \
                                       self.savedata_info['savename'] + \
                                       '_' +  str(self.batch_id))
                myio.pickle(savepath, tmpres)
                self.batch_id = self.batch_id + 1
                pre_nc = nc
        if self.meta['ndata'] > 0:
            self.meta['data_mean'] = self.meta['data_sum'] / self.meta['ndata']
            self.meta['data_mean'] = self.meta['data_mean'].reshape((-1, 1),
                                                                    order='F')
        else:
            self.meta['data_mean'] = 0
        del self.meta['data_sum']

        myio.pickle(iu.fullfile(self.savedata_info['savedir'], 'batches.meta'),
                    self.meta)