Exemple #1
0
  def predict(self, save_layers = None, filename = None):
    self.net.save_layerouput(save_layers)
    self.print_net_summary()
    util.log('Starting predict...')
    save_output = []
    while self.curr_epoch < 2:
      start = time.time()
      self.test_data = self.test_dp.get_next_batch()
      self.curr_epoch = self.test_data.epoch
      self.curr_batch = self.test_data.batchnum

      self.num_test_minibatch = divup(self.test_data.data.shape[1], self.batch_size)
      for i in range(self.num_test_minibatch):
        input, label = self.get_next_minibatch(i, TEST)
        self.net.train_batch(input, label, TEST)
      cost , correct, numCase = self.net.get_batch_information()
      print >> sys.stderr,  '%d.%d: error: %f logreg: %f time: %f' % (self.curr_epoch, self.curr_batch, 1 - correct, cost, time.time() - start)
      if save_layers is not None:
        save_output.extend(self.net.get_save_output())

    if save_layers is not None:
      if filename is not None:
        with open(filename, 'w') as f:
          cPickle.dump(save_output, f, protocol = -1)
        util.log('save layer output finished')
Exemple #2
0
  def __init__(self, data_dir, batch_range=None, category_range=None, batch_size=128):
    ParallelDataProvider.__init__(self, data_dir, batch_range)
    self.img_size = 256
    self.border_size = 16
    self.inner_size = 224
    self.batch_size = batch_size

    # self.multiview = dp_params['multiview_test'] and test
    self.multiview = 0
    self.num_views = 5 * 2
    self.data_mult = self.num_views if self.multiview else 1

    self.buffer_idx = 0

    dirs = glob.glob(data_dir + '/n*')
    synid_to_dir = {}
    for d in dirs:
      synid_to_dir[basename(d)[1:]] = d

    if category_range is None:
      cat_dirs = dirs
    else:
      cat_dirs = []
      for i in category_range:
        synid = self.batch_meta['label_to_synid'][i]
        # util.log('Using category: %d, synid: %s, label: %s', i, synid, self.batch_meta['label_names'][i])
        cat_dirs.append(synid_to_dir[synid])

    self.images = []
    batch_dict = dict((k, k) for k in self.batch_range)

    for d in cat_dirs:
      imgs = [v for i, v in enumerate(glob.glob(d + '/*.jpg')) if i in batch_dict]
      self.images.extend(imgs)

    self.images = np.array(self.images)

    # build index vector into 'images' and split into groups of batch-size
    image_index = np.arange(len(self.images))
    np.random.shuffle(image_index)

    self.batches = np.array_split(image_index,
                                  util.divup(len(self.images), batch_size))

    self.batch_range = range(len(self.batches))

    util.log('Starting data provider with %d batches', len(self.batches))
    np.random.shuffle(self.batch_range)

    imagemean = cPickle.loads(open(data_dir + "image-mean.pickle").read())
    self.data_mean = (imagemean['data']
        .astype(np.single)
        .T
        .reshape((3, 256, 256))[:, self.border_size:self.border_size + self.inner_size, self.border_size:self.border_size + self.inner_size]
        .reshape((self.get_data_dims(), 1)))
Exemple #3
0
  def get_test_error(self):
    start = time.time()
    self.test_data = self.test_dp.get_next_batch()

    self.num_test_minibatch = divup(self.test_data.data.shape[1], self.batch_size)
    for i in range(self.num_test_minibatch):
      input, label = self.get_next_minibatch(i, TEST)
      self.net.train_batch(input, label, TEST)
      self._capture_test_data()
    
    cost , correct, numCase, = self.net.get_batch_information()
    self.test_outputs += [({'logprob': [cost, 1 - correct]}, numCase, time.time() - start)]
    print >> sys.stderr,  '[%d] error: %f logreg: %f time: %f' % (self.test_data.batchnum, 1 - correct, cost, time.time() - start)
Exemple #4
0
  def train(self):
    self.print_net_summary()
    util.log('Starting training...')
    while self.should_continue_training():
      self.train_data = self.train_dp.get_next_batch()  # self.train_dp.wait()
      self.curr_epoch = self.train_data.epoch
      self.curr_batch = self.train_data.batchnum

      start = time.time()
      self.num_train_minibatch = divup(self.train_data.data.shape[1], self.batch_size)
      t = 0
      
      for i in range(self.num_train_minibatch):
        input, label = self.get_next_minibatch(i)
        stime = time.time()
        self.net.train_batch(input, label)
        self._capture_training_data()
        t += time.time() - stime
        self.curr_minibatch += 1

      cost , correct, numCase = self.net.get_batch_information()
      self.train_outputs += [({'logprob': [cost, 1 - correct]}, numCase, time.time() - start)]
      print >> sys.stderr,  '%d.%d: error: %f logreg: %f time: %f' % (self.curr_epoch, self.curr_batch, 1 - correct, cost, time.time() - start)

      self.num_batch += 1
      if self.check_test_data():
        print >> sys.stderr,  '---- test ----'
        self.get_test_error()
        print >> sys.stderr,  '------------'

      if self.factor != 1.0 and self.check_adjust_lr():
        print >> sys.stderr,  '---- adjust learning rate ----'
        self.net.adjust_learning_rate(self.factor)
        print >> sys.stderr,  '--------'

      if self.check_save_checkpoint():
        print >> sys.stderr,  '---- save checkpoint ----'
        self.save_checkpoint()
        print >> sys.stderr,  '------------'

      wait_time = time.time()

      #print 'waitting', time.time() - wait_time, 'secs to load'
      #print 'time to train a batch file is', time.time() - start)


    self.get_test_error()
    self.save_checkpoint()
    self.report()
    self._finished_training()
Exemple #5
0
    def __init__(self,
                 data_dir,
                 batch_range=None,
                 category_range=None,
                 batch_size=128):
        ParallelDataProvider.__init__(self, data_dir, batch_range)
        self.img_size = 256
        self.border_size = 16
        self.inner_size = 224
        self.batch_size = batch_size

        # self.multiview = dp_params['multiview_test'] and test
        self.multiview = 0
        self.num_views = 5 * 2
        self.data_mult = self.num_views if self.multiview else 1

        self.buffer_idx = 0

        dirs = glob.glob(data_dir + '/n*')
        synid_to_dir = {}
        for d in dirs:
            synid_to_dir[basename(d)[1:]] = d

        if category_range is None:
            cat_dirs = dirs
        else:
            cat_dirs = []
            for i in category_range:
                synid = self.batch_meta['label_to_synid'][i]
                # util.log('Using category: %d, synid: %s, label: %s', i, synid, self.batch_meta['label_names'][i])
                cat_dirs.append(synid_to_dir[synid])

        self.images = []
        batch_dict = dict((k, k) for k in self.batch_range)

        for d in cat_dirs:
            imgs = [
                v for i, v in enumerate(glob.glob(d + '/*.jpg'))
                if i in batch_dict
            ]
            self.images.extend(imgs)

        self.images = np.array(self.images)

        # build index vector into 'images' and split into groups of batch-size
        image_index = np.arange(len(self.images))
        np.random.shuffle(image_index)

        self.batches = np.array_split(image_index,
                                      util.divup(len(self.images), batch_size))

        self.batch_range = range(len(self.batches))

        util.log('Starting data provider with %d batches', len(self.batches))
        np.random.shuffle(self.batch_range)

        imagemean = cPickle.loads(open(data_dir + "image-mean.pickle").read())
        self.data_mean = (imagemean['data'].astype(np.single).T.reshape(
            (3, 256, 256))[:,
                           self.border_size:self.border_size + self.inner_size,
                           self.border_size:self.border_size +
                           self.inner_size].reshape((self.get_data_dims(), 1)))