def train(self): curr_iter = self.curr_iter data_loader = self.data_loader data_loader_iter = self.data_loader.__iter__() data_meter, data_timer, total_timer = AverageMeter(), Timer(), Timer() total_loss = 0 total_num = 0.0 while (curr_iter < self.config.opt.max_iter): curr_iter += 1 epoch = curr_iter / len(self.data_loader) batch_loss, batch_pos_loss, batch_neg_loss = self._train_iter( data_loader_iter, [data_meter, data_timer, total_timer]) total_loss += batch_loss total_num += 1 if curr_iter % self.lr_update_freq == 0 or curr_iter == 1: lr = self.scheduler.get_last_lr() self.scheduler.step() if self.is_master: logging.info(f" Epoch: {epoch}, LR: {lr}") self._save_checkpoint(curr_iter, 'checkpoint_' + str(curr_iter)) if curr_iter % self.config.trainer.stat_freq == 0 and self.is_master: self.writer.add_scalar('train/loss', batch_loss, curr_iter) self.writer.add_scalar('train/pos_loss', batch_pos_loss, curr_iter) self.writer.add_scalar('train/neg_loss', batch_neg_loss, curr_iter) logging.info( "Train Epoch: {:.3f} [{}/{}], Current Loss: {:.3e}".format( epoch, curr_iter, len(self.data_loader), batch_loss) + "\tData time: {:.4f}, Train time: {:.4f}, Iter time: {:.4f}, LR: {}" .format(data_meter.avg, total_timer.avg - data_meter.avg, total_timer.avg, self.scheduler.get_last_lr())) data_meter.reset() total_timer.reset()
def _train_epoch(self, epoch): config = self.config gc.collect() self.model.train() # Epoch starts from 1 total_loss = 0 total_num = 0.0 data_loader = self.data_loader data_loader_iter = self.data_loader.__iter__() iter_size = self.iter_size data_meter, data_timer, total_timer = AverageMeter(), Timer(), Timer() pos_dist_meter, neg_dist_meter = AverageMeter(), AverageMeter() start_iter = (epoch - 1) * (len(data_loader) // iter_size) for curr_iter in range(len(data_loader) // iter_size): self.optimizer.zero_grad() batch_loss = 0 data_time = 0 total_timer.tic() for iter_idx in range(iter_size): data_timer.tic() input_dict = data_loader_iter.next() data_time += data_timer.toc(average=False) # pairs consist of (xyz1 index, xyz0 index) sinput0 = ME.SparseTensor( input_dict['sinput0_F'], coords=input_dict['sinput0_C']).to(self.device) F0 = self.model(sinput0).F sinput1 = ME.SparseTensor( input_dict['sinput1_F'], coords=input_dict['sinput1_C']).to(self.device) F1 = self.model(sinput1).F pos_pairs = input_dict['correspondences'] loss, pos_dist, neg_dist = self.triplet_loss( F0, F1, pos_pairs, num_pos=config.triplet_num_pos * config.batch_size, num_hn_samples=config.triplet_num_hn * config.batch_size, num_rand_triplet=config.triplet_num_rand * config.batch_size) loss /= iter_size loss.backward() batch_loss += loss.item() pos_dist_meter.update(pos_dist) neg_dist_meter.update(neg_dist) self.optimizer.step() gc.collect() torch.cuda.empty_cache() total_loss += batch_loss total_num += 1.0 total_timer.toc() data_meter.update(data_time) if curr_iter % self.config.stat_freq == 0: self.writer.add_scalar('train/loss', batch_loss, start_iter + curr_iter) logging.info( "Train Epoch: {} [{}/{}], Current Loss: {:.3e}, Pos dist: {:.3e}, Neg dist: {:.3e}" .format(epoch, curr_iter, len(self.data_loader) // iter_size, batch_loss, pos_dist_meter.avg, neg_dist_meter.avg) + "\tData time: {:.4f}, Train time: {:.4f}, Iter time: {:.4f}".format( data_meter.avg, total_timer.avg - data_meter.avg, total_timer.avg)) pos_dist_meter.reset() neg_dist_meter.reset() data_meter.reset() total_timer.reset()
def _train_epoch(self, epoch): gc.collect() self.model.train() # Epoch starts from 1 total_loss = 0 total_num = 0.0 data_loader = self.data_loader data_loader_iter = self.data_loader.__iter__() iter_size = self.iter_size data_meter, data_timer, total_timer = AverageMeter(), Timer(), Timer() start_iter = (epoch - 1) * (len(data_loader) // iter_size) for curr_iter in range(len(data_loader) // iter_size): self.optimizer.zero_grad() batch_pos_loss, batch_neg_loss, batch_loss = 0, 0, 0 data_time = 0 total_timer.tic() for iter_idx in range(iter_size): data_timer.tic() input_dict = data_loader_iter.next() data_time += data_timer.toc(average=False) sinput0 = ME.SparseTensor( input_dict['sinput0_F'], coords=input_dict['sinput0_C']).to(self.device) F0 = self.model(sinput0).F sinput1 = ME.SparseTensor( input_dict['sinput1_F'], coords=input_dict['sinput1_C']).to(self.device) F1 = self.model(sinput1).F pos_pairs = input_dict['correspondences'] pos_loss, neg_loss = self.contrastive_hardest_negative_loss( F0, F1, pos_pairs, num_pos=self.config.num_pos_per_batch * self.config.batch_size, num_hn_samples=self.config.num_hn_samples_per_batch * self.config.batch_size) pos_loss /= iter_size neg_loss /= iter_size loss = pos_loss + self.neg_weight * neg_loss loss.backward() batch_loss += loss.item() batch_pos_loss += pos_loss.item() batch_neg_loss += neg_loss.item() self.optimizer.step() gc.collect() torch.cuda.empty_cache() total_loss += batch_loss total_num += 1.0 total_timer.toc() data_meter.update(data_time) if curr_iter % self.config.stat_freq == 0: self.writer.add_scalar('train/loss', batch_loss, start_iter + curr_iter) self.writer.add_scalar('train/pos_loss', batch_pos_loss, start_iter + curr_iter) self.writer.add_scalar('train/neg_loss', batch_neg_loss, start_iter + curr_iter) logging.info( "Train Epoch: {} [{}/{}], Current Loss: {:.3e} Pos: {:.3f} Neg: {:.3f}" .format(epoch, curr_iter, len(self.data_loader) // iter_size, batch_loss, batch_pos_loss, batch_neg_loss) + "\tData time: {:.4f}, Train time: {:.4f}, Iter time: {:.4f}".format( data_meter.avg, total_timer.avg - data_meter.avg, total_timer.avg)) data_meter.reset() total_timer.reset()
def _train_epoch(self, epoch): gc.collect() self.model.train() # Epoch starts from 1 total_loss = 0 total_num = 0.0 data_loader = self.data_loader data_loader_iter = self.data_loader.__iter__() iter_size = self.iter_size start_iter = (epoch - 1) * (len(data_loader) // iter_size) data_meter, data_timer, total_timer = AverageMeter(), Timer(), Timer() # Main training for curr_iter in range(len(data_loader) // iter_size): self.optimizer.zero_grad() batch_pos_loss, batch_neg_loss, batch_loss = 0, 0, 0 data_time = 0 total_timer.tic() for iter_idx in range(iter_size): # Caffe iter size data_timer.tic() input_dict = data_loader_iter.next() data_time += data_timer.toc(average=False) # pairs consist of (xyz1 index, xyz0 index) sinput0 = ME.SparseTensor( input_dict['sinput0_F'], coords=input_dict['sinput0_C']).to(self.device) F0 = self.model(sinput0).F sinput1 = ME.SparseTensor( input_dict['sinput1_F'], coords=input_dict['sinput1_C']).to(self.device) F1 = self.model(sinput1).F N0, N1 = len(sinput0), len(sinput1) pos_pairs = input_dict['correspondences'] neg_pairs = self.generate_rand_negative_pairs(pos_pairs, max(N0, N1), N0, N1) pos_pairs = pos_pairs.long().to(self.device) neg_pairs = torch.from_numpy(neg_pairs).long().to(self.device) neg0 = F0.index_select(0, neg_pairs[:, 0]) neg1 = F1.index_select(0, neg_pairs[:, 1]) pos0 = F0.index_select(0, pos_pairs[:, 0]) pos1 = F1.index_select(0, pos_pairs[:, 1]) # Positive loss pos_loss = (pos0 - pos1).pow(2).sum(1) # Negative loss neg_loss = F.relu(self.neg_thresh - ((neg0 - neg1).pow(2).sum(1) + 1e-4).sqrt()).pow(2) pos_loss_mean = pos_loss.mean() / iter_size neg_loss_mean = neg_loss.mean() / iter_size # Weighted loss loss = pos_loss_mean + self.neg_weight * neg_loss_mean loss.backward( ) # To accumulate gradient, zero gradients only at the begining of iter_size batch_loss += loss.item() batch_pos_loss += pos_loss_mean.item() batch_neg_loss += neg_loss_mean.item() self.optimizer.step() torch.cuda.empty_cache() total_loss += batch_loss total_num += 1.0 total_timer.toc() data_meter.update(data_time) # Print logs if curr_iter % self.config.stat_freq == 0: self.writer.add_scalar('train/loss', batch_loss, start_iter + curr_iter) self.writer.add_scalar('train/pos_loss', batch_pos_loss, start_iter + curr_iter) self.writer.add_scalar('train/neg_loss', batch_neg_loss, start_iter + curr_iter) logging.info( "Train Epoch: {} [{}/{}], Current Loss: {:.3e} Pos: {:.3f} Neg: {:.3f}" .format(epoch, curr_iter, len(self.data_loader) // iter_size, batch_loss, batch_pos_loss, batch_neg_loss) + "\tData time: {:.4f}, Train time: {:.4f}, Iter time: {:.4f}".format( data_meter.avg, total_timer.avg - data_meter.avg, total_timer.avg)) data_meter.reset() total_timer.reset()
def _train_epoch(self, epoch, data_loader_iter): # Epoch starts from 1 total_loss = 0 total_num = 0.0 iter_size = self.iter_size data_meter, data_timer, total_timer = AverageMeter(), Timer(), Timer() for curr_iter in range(self.train_max_iter): self.optimizer.zero_grad() batch_pos_loss, batch_neg_loss, batch_loss = 0, 0, 0 data_time = 0 total_timer.tic() for iter_idx in range(iter_size): data_timer.tic() input_dict = self.get_data(data_loader_iter) data_time += data_timer.toc(average=False) F0 = self.model(input_dict['img0'].to(self.device)) F1 = self.model(input_dict['img1'].to(self.device)) pos_loss, neg_loss = self.contrastive_loss( input_dict['img0'].numpy() + 0.5, input_dict['img1'].numpy() + 0.5, F0, F1, input_dict['pairs'], num_pos=self.config.num_pos_per_batch, num_hn_samples=self.config.num_hn_samples_per_batch) pos_loss /= iter_size neg_loss /= iter_size loss = pos_loss + self.neg_weight * neg_loss loss.backward() batch_loss += loss.item() batch_pos_loss += pos_loss.item() batch_neg_loss += neg_loss.item() self.optimizer.step() gc.collect() torch.cuda.empty_cache() total_loss += batch_loss total_num += 1.0 total_timer.toc() data_meter.update(data_time) torch.cuda.empty_cache() if curr_iter % self.config.stat_freq == 0: self.writer.add_scalar('train/loss', batch_loss, curr_iter) self.writer.add_scalar('train/pos_loss', batch_pos_loss, curr_iter) self.writer.add_scalar('train/neg_loss', batch_neg_loss, curr_iter) logging.info( "Train epoch {}, iter {}, Current Loss: {:.3e} Pos: {:.3f} Neg: {:.3f}" .format(epoch, curr_iter, batch_loss, batch_pos_loss, batch_neg_loss) + "\tData time: {:.4f}, Train time: {:.4f}, Iter time: {:.4f}" .format(data_meter.avg, total_timer.avg - data_meter.avg, total_timer.avg)) data_meter.reset() total_timer.reset()