class batchloader(object): """ load lmdb in mini-batch. Roll-over at the end of the epoch. Requirement: NIMGS dividible to batch_size Useful in datalayer of CNN training network. """ def __init__(self, params): self.batch_size = params['batch_size'] self.outshape = params['shape'] self.lmdb = lmdbs(params['source']) self.labels = self.lmdb.get_label_list() self.img_mean = biproto2py(params['mean_file']).squeeze() self.NIMGS = len(self.labels) assert self.NIMGS%self.batch_size==0,'NIMGS {} not dividible by batchsize {}'.format( self.NIMGS,self.batch_size) self.num_batches = self.NIMGS/self.batch_size self._cur = 0 # current batch self.labels_tab = self.labels.reshape((self.num_batches,self.batch_size)) # this class does some simple data-manipulations self.img_augment = SimpleAugment(mean=self.img_mean,shape=params['shape'], scale = params['scale']) #create threadpools for parallel augmentation #self.pool = ThreadPool() #4 def get_info(self): res = dict(NIMGS = self.NIMGS,batch_size = self.batch_size, num_batches = self.num_batches,cur = self._cur) return res def get_labels(self): return self.labels def IsEpochEnded(self): return self._cur == self.num_batches def load_next_batch(self): if self._cur == self.num_batches: self._cur = 0 batch_lst = np.arange(self.batch_size) + self._cur * self.batch_size chunk = self.img_augment.augment_deploy(self.lmdb.get_data(batch_lst)) labels = self.labels_tab[self._cur] self._cur +=1 return chunk,labels
class batchloader2(object): """ batchloader in general. load lmdb in mini-batch. Roll-over at the end of the epoch. DIFFERENT: does not require NIMGS dividible to batch_size; hence the last batch may have different size. Useful in CNN deploy. """ def __init__(self, params): self.batch_size = params['batch_size'] self.outshape = params['shape'] self.lmdb = lmdbs(params['source']) self.labels = self.lmdb.get_label_list() self.img_mean = biproto2py(params['mean_file']).squeeze() self.NIMGS = len(self.labels) self.num_batches = int(np.ceil(self.NIMGS/float(self.batch_size))) self._cur = 0 # current batch # this class does some simple data-manipulations self.img_augment = SimpleAugment(mean=self.img_mean,shape=params['shape'], scale = params['scale']) #create threadpools for parallel augmentation #self.pool = ThreadPool() #4 def get_info(self): res = dict(NIMGS = self.NIMGS,batch_size = self.batch_size, num_batches = self.num_batches,cur = self._cur) return res def get_labels(self): return self.labels def IsEpochEnded(self): return self._cur == self.num_batches def load_next_batch(self): if self._cur == self.num_batches: self._cur = 0 batch_lst = np.arange(self._cur * self.batch_size,min([self.NIMGS,(self._cur+1) * self.batch_size])) chunk = self.img_augment.augment_deploy(self.lmdb.get_data(batch_lst)) labels = self.labels[batch_lst] self._cur +=1 return chunk,labels
def extract_cnn_feat(net_params, DB, OUT, layer=0, verbose=True): """ extract features from CNN DB: lmdb data you want to extract feature net_params: dictionary with keys "DEPLOY_PRO","data_mean", "WEIGHTS","scale_factor", batch_size OUT: save output in mat file layer: 0 for last layer, -1: one before the last layer, -2: ... """ assert layer <= 0, 'layer should be a non-positive integer' DEPLOY_PRO = net_params['DEPLOY_PRO'] WEIGHTS = net_params['WEIGHTS'] scale_factor = net_params['scale_factor'] data_mean = net_params['data_mean'] batch_size = net_params['batch_size'] net = caffe.Net(DEPLOY_PRO, WEIGHTS, caffe.TEST) if verbose: print 'Extracting cnn feats...' print ' Model def: {}\n Weights: {}'.format(DEPLOY_PRO, WEIGHTS) start_t = time.time() db = lmdbs(DB) labels = db.get_label_list() NIMGS = labels.size img_mean = biproto2py(data_mean) inblob = net.inputs[0] in_dim = net.blobs[inblob].data.shape[1:] prep = SimpleAugment(mean=img_mean, shape=in_dim[-2:]) feat_l = net.blobs.keys()[layer - 1] out_dim = net.blobs[feat_l].data.squeeze().shape[-1] feats = np.zeros((NIMGS, out_dim), dtype=np.float32) for i in xrange(0, NIMGS, batch_size): batch = range(i, min(i + batch_size, NIMGS)) if verbose: print('Processing sample #{} - {}'.format(batch[0], batch[-1])) new_shape = (len(batch), ) + in_dim net.blobs[inblob].reshape(*new_shape) chunk = db.get_data(batch) net.blobs[inblob].data[...] = prep.augment_deploy(chunk) temp = net.forward() feats[batch] = net.blobs[feat_l].data.squeeze() #apply scale factor feats *= scale_factor if OUT.endswith('.mat'): py2mat(dict(feats=feats, labels=labels), OUT) elif OUT.endswith('.npz'): np.savez(OUT, feats=feats, labels=labels) else: #assume it is pickle helps = helper() helps.save(OUT, feats=feats, labels=labels) net = None if verbose: end_t = time.time() print 'Save features to {}.'.format(OUT) print 'Time: {}\n'.format(timedelta(seconds=int(end_t - start_t)))