コード例 #1
0
class batchloader(object):

  """
  load lmdb in mini-batch. Roll-over at the end of the epoch.
  Requirement: NIMGS dividible to batch_size
  Useful in datalayer of CNN training network.
  """
  def __init__(self, params):
    
    self.batch_size = params['batch_size']
    self.outshape = params['shape']
    
    self.lmdb = lmdbs(params['source'])
    self.labels = self.lmdb.get_label_list()
    self.img_mean = biproto2py(params['mean_file']).squeeze()
    
    self.NIMGS = len(self.labels)
    assert self.NIMGS%self.batch_size==0,'NIMGS {} not dividible by batchsize {}'.format(
           self.NIMGS,self.batch_size)
    
    self.num_batches = self.NIMGS/self.batch_size
    self._cur = 0  # current batch
    self.labels_tab = self.labels.reshape((self.num_batches,self.batch_size))
    
    # this class does some simple data-manipulations
    self.img_augment = SimpleAugment(mean=self.img_mean,shape=params['shape'],
                                     scale = params['scale'])
    #create threadpools for parallel augmentation
    #self.pool = ThreadPool() #4
  
  def get_info(self):
    res = dict(NIMGS = self.NIMGS,batch_size = self.batch_size,
               num_batches = self.num_batches,cur = self._cur)
    return res
  def get_labels(self):
    return self.labels
  def IsEpochEnded(self):
    return self._cur == self.num_batches
  
  def load_next_batch(self):
    if self._cur == self.num_batches:
      self._cur = 0
    
    batch_lst = np.arange(self.batch_size) + self._cur * self.batch_size
    chunk = self.img_augment.augment_deploy(self.lmdb.get_data(batch_lst))
    labels = self.labels_tab[self._cur]
    self._cur +=1
  
    return chunk,labels
コード例 #2
0
class batchloader2(object):

  """
  batchloader in general.
  load lmdb in mini-batch. Roll-over at the end of the epoch.
  DIFFERENT: does not require NIMGS dividible to batch_size; hence the last batch
  may have different size.
  Useful in CNN deploy.
  """
  def __init__(self, params):
    
    self.batch_size = params['batch_size']
    self.outshape = params['shape']
    
    self.lmdb = lmdbs(params['source'])
    self.labels = self.lmdb.get_label_list()
    self.img_mean = biproto2py(params['mean_file']).squeeze()
    
    self.NIMGS = len(self.labels)
    
    self.num_batches = int(np.ceil(self.NIMGS/float(self.batch_size)))
    self._cur = 0  # current batch
    
    # this class does some simple data-manipulations
    self.img_augment = SimpleAugment(mean=self.img_mean,shape=params['shape'],
                                     scale = params['scale'])
    #create threadpools for parallel augmentation
    #self.pool = ThreadPool() #4
  
  def get_info(self):
    res = dict(NIMGS = self.NIMGS,batch_size = self.batch_size,
               num_batches = self.num_batches,cur = self._cur)
    return res
  def get_labels(self):
    return self.labels
  def IsEpochEnded(self):
    return self._cur == self.num_batches
  
  def load_next_batch(self):
    if self._cur == self.num_batches:
      self._cur = 0
    batch_lst = np.arange(self._cur * self.batch_size,min([self.NIMGS,(self._cur+1) * self.batch_size]))
    
    chunk = self.img_augment.augment_deploy(self.lmdb.get_data(batch_lst))
    labels = self.labels[batch_lst]
    self._cur +=1
  
    return chunk,labels
コード例 #3
0
def extract_cnn_feat(net_params, DB, OUT, layer=0, verbose=True):
    """
  extract features from CNN

  DB: lmdb data you want to extract feature
  net_params: dictionary with keys "DEPLOY_PRO","data_mean",
    "WEIGHTS","scale_factor", batch_size
  OUT: save output in mat file
  layer: 0 for last layer, -1: one before the last layer, -2: ...
  """
    assert layer <= 0, 'layer should be a non-positive integer'
    DEPLOY_PRO = net_params['DEPLOY_PRO']
    WEIGHTS = net_params['WEIGHTS']
    scale_factor = net_params['scale_factor']
    data_mean = net_params['data_mean']
    batch_size = net_params['batch_size']

    net = caffe.Net(DEPLOY_PRO, WEIGHTS, caffe.TEST)
    if verbose:
        print 'Extracting cnn feats...'
        print '  Model def: {}\n  Weights: {}'.format(DEPLOY_PRO, WEIGHTS)
        start_t = time.time()
    db = lmdbs(DB)
    labels = db.get_label_list()
    NIMGS = labels.size

    img_mean = biproto2py(data_mean)
    inblob = net.inputs[0]
    in_dim = net.blobs[inblob].data.shape[1:]
    prep = SimpleAugment(mean=img_mean, shape=in_dim[-2:])

    feat_l = net.blobs.keys()[layer - 1]
    out_dim = net.blobs[feat_l].data.squeeze().shape[-1]
    feats = np.zeros((NIMGS, out_dim), dtype=np.float32)

    for i in xrange(0, NIMGS, batch_size):
        batch = range(i, min(i + batch_size, NIMGS))
        if verbose:
            print('Processing sample #{} - {}'.format(batch[0], batch[-1]))
        new_shape = (len(batch), ) + in_dim
        net.blobs[inblob].reshape(*new_shape)

        chunk = db.get_data(batch)
        net.blobs[inblob].data[...] = prep.augment_deploy(chunk)
        temp = net.forward()
        feats[batch] = net.blobs[feat_l].data.squeeze()

    #apply scale factor
    feats *= scale_factor

    if OUT.endswith('.mat'):
        py2mat(dict(feats=feats, labels=labels), OUT)
    elif OUT.endswith('.npz'):
        np.savez(OUT, feats=feats, labels=labels)
    else:  #assume it is pickle
        helps = helper()
        helps.save(OUT, feats=feats, labels=labels)
    net = None
    if verbose:
        end_t = time.time()
        print 'Save features to {}.'.format(OUT)
        print 'Time: {}\n'.format(timedelta(seconds=int(end_t - start_t)))