예제 #1
0
    def train(self):

        curr_iter = self.curr_iter
        data_loader = self.data_loader
        data_loader_iter = self.data_loader.__iter__()
        data_meter, data_timer, total_timer = AverageMeter(), Timer(), Timer()

        total_loss = 0
        total_num = 0.0

        while (curr_iter < self.config.opt.max_iter):

            curr_iter += 1
            epoch = curr_iter / len(self.data_loader)
            batch_loss, batch_pos_loss, batch_neg_loss = self._train_iter(
                data_loader_iter, [data_meter, data_timer, total_timer])
            total_loss += batch_loss
            total_num += 1

            if curr_iter % self.lr_update_freq == 0 or curr_iter == 1:
                lr = self.scheduler.get_last_lr()
                self.scheduler.step()
                if self.is_master:
                    logging.info(f" Epoch: {epoch}, LR: {lr}")
                    self._save_checkpoint(curr_iter,
                                          'checkpoint_' + str(curr_iter))

            if curr_iter % self.config.trainer.stat_freq == 0 and self.is_master:
                self.writer.add_scalar('train/loss', batch_loss, curr_iter)
                self.writer.add_scalar('train/pos_loss', batch_pos_loss,
                                       curr_iter)
                self.writer.add_scalar('train/neg_loss', batch_neg_loss,
                                       curr_iter)
                logging.info(
                    "Train Epoch: {:.3f} [{}/{}], Current Loss: {:.3e}".format(
                        epoch, curr_iter, len(self.data_loader), batch_loss) +
                    "\tData time: {:.4f}, Train time: {:.4f}, Iter time: {:.4f}, LR: {}"
                    .format(data_meter.avg, total_timer.avg - data_meter.avg,
                            total_timer.avg, self.scheduler.get_last_lr()))
                data_meter.reset()
                total_timer.reset()
  def _train_epoch(self, epoch):
    config = self.config

    gc.collect()
    self.model.train()

    # Epoch starts from 1
    total_loss = 0
    total_num = 0.0
    data_loader = self.data_loader
    data_loader_iter = self.data_loader.__iter__()
    iter_size = self.iter_size
    data_meter, data_timer, total_timer = AverageMeter(), Timer(), Timer()
    pos_dist_meter, neg_dist_meter = AverageMeter(), AverageMeter()
    start_iter = (epoch - 1) * (len(data_loader) // iter_size)
    for curr_iter in range(len(data_loader) // iter_size):
      self.optimizer.zero_grad()
      batch_loss = 0
      data_time = 0
      total_timer.tic()
      for iter_idx in range(iter_size):
        data_timer.tic()
        input_dict = data_loader_iter.next()
        data_time += data_timer.toc(average=False)

        # pairs consist of (xyz1 index, xyz0 index)
        sinput0 = ME.SparseTensor(
            input_dict['sinput0_F'], coords=input_dict['sinput0_C']).to(self.device)
        F0 = self.model(sinput0).F

        sinput1 = ME.SparseTensor(
            input_dict['sinput1_F'], coords=input_dict['sinput1_C']).to(self.device)
        F1 = self.model(sinput1).F

        pos_pairs = input_dict['correspondences']
        loss, pos_dist, neg_dist = self.triplet_loss(
            F0,
            F1,
            pos_pairs,
            num_pos=config.triplet_num_pos * config.batch_size,
            num_hn_samples=config.triplet_num_hn * config.batch_size,
            num_rand_triplet=config.triplet_num_rand * config.batch_size)
        loss /= iter_size
        loss.backward()
        batch_loss += loss.item()
        pos_dist_meter.update(pos_dist)
        neg_dist_meter.update(neg_dist)

      self.optimizer.step()
      gc.collect()

      torch.cuda.empty_cache()

      total_loss += batch_loss
      total_num += 1.0
      total_timer.toc()
      data_meter.update(data_time)

      if curr_iter % self.config.stat_freq == 0:
        self.writer.add_scalar('train/loss', batch_loss, start_iter + curr_iter)
        logging.info(
            "Train Epoch: {} [{}/{}], Current Loss: {:.3e}, Pos dist: {:.3e}, Neg dist: {:.3e}"
            .format(epoch, curr_iter,
                    len(self.data_loader) //
                    iter_size, batch_loss, pos_dist_meter.avg, neg_dist_meter.avg) +
            "\tData time: {:.4f}, Train time: {:.4f}, Iter time: {:.4f}".format(
                data_meter.avg, total_timer.avg - data_meter.avg, total_timer.avg))
        pos_dist_meter.reset()
        neg_dist_meter.reset()
        data_meter.reset()
        total_timer.reset()
  def _train_epoch(self, epoch):
    gc.collect()
    self.model.train()
    # Epoch starts from 1
    total_loss = 0
    total_num = 0.0
    data_loader = self.data_loader
    data_loader_iter = self.data_loader.__iter__()
    iter_size = self.iter_size
    data_meter, data_timer, total_timer = AverageMeter(), Timer(), Timer()
    start_iter = (epoch - 1) * (len(data_loader) // iter_size)
    for curr_iter in range(len(data_loader) // iter_size):
      self.optimizer.zero_grad()
      batch_pos_loss, batch_neg_loss, batch_loss = 0, 0, 0

      data_time = 0
      total_timer.tic()
      for iter_idx in range(iter_size):
        data_timer.tic()
        input_dict = data_loader_iter.next()
        data_time += data_timer.toc(average=False)

        sinput0 = ME.SparseTensor(
            input_dict['sinput0_F'], coords=input_dict['sinput0_C']).to(self.device)
        F0 = self.model(sinput0).F

        sinput1 = ME.SparseTensor(
            input_dict['sinput1_F'], coords=input_dict['sinput1_C']).to(self.device)

        F1 = self.model(sinput1).F

        pos_pairs = input_dict['correspondences']
        pos_loss, neg_loss = self.contrastive_hardest_negative_loss(
            F0,
            F1,
            pos_pairs,
            num_pos=self.config.num_pos_per_batch * self.config.batch_size,
            num_hn_samples=self.config.num_hn_samples_per_batch *
            self.config.batch_size)

        pos_loss /= iter_size
        neg_loss /= iter_size
        loss = pos_loss + self.neg_weight * neg_loss
        loss.backward()

        batch_loss += loss.item()
        batch_pos_loss += pos_loss.item()
        batch_neg_loss += neg_loss.item()

      self.optimizer.step()
      gc.collect()

      torch.cuda.empty_cache()

      total_loss += batch_loss
      total_num += 1.0
      total_timer.toc()
      data_meter.update(data_time)

      if curr_iter % self.config.stat_freq == 0:
        self.writer.add_scalar('train/loss', batch_loss, start_iter + curr_iter)
        self.writer.add_scalar('train/pos_loss', batch_pos_loss, start_iter + curr_iter)
        self.writer.add_scalar('train/neg_loss', batch_neg_loss, start_iter + curr_iter)
        logging.info(
            "Train Epoch: {} [{}/{}], Current Loss: {:.3e} Pos: {:.3f} Neg: {:.3f}"
            .format(epoch, curr_iter,
                    len(self.data_loader) //
                    iter_size, batch_loss, batch_pos_loss, batch_neg_loss) +
            "\tData time: {:.4f}, Train time: {:.4f}, Iter time: {:.4f}".format(
                data_meter.avg, total_timer.avg - data_meter.avg, total_timer.avg))
        data_meter.reset()
        total_timer.reset()
  def _train_epoch(self, epoch):
    gc.collect()
    self.model.train()
    # Epoch starts from 1
    total_loss = 0
    total_num = 0.0

    data_loader = self.data_loader
    data_loader_iter = self.data_loader.__iter__()

    iter_size = self.iter_size
    start_iter = (epoch - 1) * (len(data_loader) // iter_size)

    data_meter, data_timer, total_timer = AverageMeter(), Timer(), Timer()

    # Main training
    for curr_iter in range(len(data_loader) // iter_size):
      self.optimizer.zero_grad()
      batch_pos_loss, batch_neg_loss, batch_loss = 0, 0, 0

      data_time = 0
      total_timer.tic()
      for iter_idx in range(iter_size):
        # Caffe iter size
        data_timer.tic()
        input_dict = data_loader_iter.next()
        data_time += data_timer.toc(average=False)

        # pairs consist of (xyz1 index, xyz0 index)
        sinput0 = ME.SparseTensor(
            input_dict['sinput0_F'], coords=input_dict['sinput0_C']).to(self.device)
        F0 = self.model(sinput0).F

        sinput1 = ME.SparseTensor(
            input_dict['sinput1_F'], coords=input_dict['sinput1_C']).to(self.device)
        F1 = self.model(sinput1).F

        N0, N1 = len(sinput0), len(sinput1)

        pos_pairs = input_dict['correspondences']
        neg_pairs = self.generate_rand_negative_pairs(pos_pairs, max(N0, N1), N0, N1)
        pos_pairs = pos_pairs.long().to(self.device)
        neg_pairs = torch.from_numpy(neg_pairs).long().to(self.device)

        neg0 = F0.index_select(0, neg_pairs[:, 0])
        neg1 = F1.index_select(0, neg_pairs[:, 1])
        pos0 = F0.index_select(0, pos_pairs[:, 0])
        pos1 = F1.index_select(0, pos_pairs[:, 1])

        # Positive loss
        pos_loss = (pos0 - pos1).pow(2).sum(1)

        # Negative loss
        neg_loss = F.relu(self.neg_thresh -
                          ((neg0 - neg1).pow(2).sum(1) + 1e-4).sqrt()).pow(2)

        pos_loss_mean = pos_loss.mean() / iter_size
        neg_loss_mean = neg_loss.mean() / iter_size

        # Weighted loss
        loss = pos_loss_mean + self.neg_weight * neg_loss_mean
        loss.backward(
        )  # To accumulate gradient, zero gradients only at the begining of iter_size
        batch_loss += loss.item()
        batch_pos_loss += pos_loss_mean.item()
        batch_neg_loss += neg_loss_mean.item()

      self.optimizer.step()

      torch.cuda.empty_cache()

      total_loss += batch_loss
      total_num += 1.0
      total_timer.toc()
      data_meter.update(data_time)

      # Print logs
      if curr_iter % self.config.stat_freq == 0:
        self.writer.add_scalar('train/loss', batch_loss, start_iter + curr_iter)
        self.writer.add_scalar('train/pos_loss', batch_pos_loss, start_iter + curr_iter)
        self.writer.add_scalar('train/neg_loss', batch_neg_loss, start_iter + curr_iter)
        logging.info(
            "Train Epoch: {} [{}/{}], Current Loss: {:.3e} Pos: {:.3f} Neg: {:.3f}"
            .format(epoch, curr_iter,
                    len(self.data_loader) //
                    iter_size, batch_loss, batch_pos_loss, batch_neg_loss) +
            "\tData time: {:.4f}, Train time: {:.4f}, Iter time: {:.4f}".format(
                data_meter.avg, total_timer.avg - data_meter.avg, total_timer.avg))
        data_meter.reset()
        total_timer.reset()
예제 #5
0
    def _train_epoch(self, epoch, data_loader_iter):
        # Epoch starts from 1
        total_loss = 0
        total_num = 0.0
        iter_size = self.iter_size
        data_meter, data_timer, total_timer = AverageMeter(), Timer(), Timer()
        for curr_iter in range(self.train_max_iter):
            self.optimizer.zero_grad()
            batch_pos_loss, batch_neg_loss, batch_loss = 0, 0, 0

            data_time = 0
            total_timer.tic()
            for iter_idx in range(iter_size):
                data_timer.tic()
                input_dict = self.get_data(data_loader_iter)
                data_time += data_timer.toc(average=False)

                F0 = self.model(input_dict['img0'].to(self.device))
                F1 = self.model(input_dict['img1'].to(self.device))

                pos_loss, neg_loss = self.contrastive_loss(
                    input_dict['img0'].numpy() + 0.5,
                    input_dict['img1'].numpy() + 0.5,
                    F0,
                    F1,
                    input_dict['pairs'],
                    num_pos=self.config.num_pos_per_batch,
                    num_hn_samples=self.config.num_hn_samples_per_batch)

                pos_loss /= iter_size
                neg_loss /= iter_size
                loss = pos_loss + self.neg_weight * neg_loss
                loss.backward()

                batch_loss += loss.item()
                batch_pos_loss += pos_loss.item()
                batch_neg_loss += neg_loss.item()

            self.optimizer.step()
            gc.collect()

            torch.cuda.empty_cache()

            total_loss += batch_loss
            total_num += 1.0
            total_timer.toc()
            data_meter.update(data_time)
            torch.cuda.empty_cache()

            if curr_iter % self.config.stat_freq == 0:
                self.writer.add_scalar('train/loss', batch_loss, curr_iter)
                self.writer.add_scalar('train/pos_loss', batch_pos_loss,
                                       curr_iter)
                self.writer.add_scalar('train/neg_loss', batch_neg_loss,
                                       curr_iter)
                logging.info(
                    "Train epoch {}, iter {}, Current Loss: {:.3e} Pos: {:.3f} Neg: {:.3f}"
                    .format(epoch, curr_iter, batch_loss, batch_pos_loss,
                            batch_neg_loss) +
                    "\tData time: {:.4f}, Train time: {:.4f}, Iter time: {:.4f}"
                    .format(data_meter.avg, total_timer.avg -
                            data_meter.avg, total_timer.avg))
                data_meter.reset()
                total_timer.reset()