def _train_epoch(self, epoch): """ Training logic for an epoch :param epoch: Integer, current training epoch. :return: A log that contains average loss and metric in this epoch. """ self.model.train() self.train_metrics.reset() for batch_idx, (data, target) in enumerate(self.data_loader): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() # Mixup if self.mixup == True: inputs, targets_a, targets_b, lam = mixup_data(data, target, alpha= self.mixup_alpha, use_cuda=torch.cuda.is_available()) # Forward pass output = self.model(inputs) # Loss loss_func = mixup_criterion(targets_a, targets_b, lam) loss = loss_func(self.criterion, output) else: output = self.model(data) loss = self.criterion(output, target) loss.backward() self.optimizer.step() self.writer.set_step((epoch - 1) * self.len_epoch + batch_idx) self.train_metrics.update('loss', loss.item()) for met in self.train_metric_ftns: self.train_metrics.update(met.__name__, met(output, target)) if batch_idx % self.log_step == 0: self.logger.debug('Train Epoch: {} {} Loss: {:.6f}'.format( epoch, self._progress(batch_idx), loss.item())) #self.writer.add_image('input', make_grid(data.cpu(), nrow=8, normalize=True)) if batch_idx == self.len_epoch: break #log = self.train_metrics.result() log = self.valid_metrics.result() if self.do_validation: val_log = self._valid_epoch(epoch) #log.update(**{'val_'+k : v for k, v in val_log.items()}) log.update(**{k : v for k, v in val_log.items()}) if self.lr_scheduler is not None: self.lr_scheduler.step(val_log['utt_accuracy']) return log
def train(self, train_loader, model, criterion, optimizer, data_augmentation=None): # tell to pytorch that we are training the model model.train() train_metrics = {'loss': 0.0, 'acc': 0.0} correct = 0 total = 0 for i, (images, labels) in enumerate(train_loader): # Loading images on gpu if torch.cuda.is_available(): images, labels = images.cuda(), labels.cuda() if data_augmentation == 'bc+': images, labels_a, _ = between_class(images, labels) labels = torch.max(labels_a, 1)[1] elif data_augmentation == 'mixup': images, labels_a, labels_b, lam = mixup_data(images, labels) # Clear gradients parameters model.zero_grad() # pass images through the network outputs = model(images) if data_augmentation == 'bc+': loss = criterion(torch.softmax(outputs, dim=1).log(), labels_a) elif data_augmentation == 'mixup': loss = mixup_criterion(criterion, outputs, labels_a, labels_b, lam) else: loss = criterion(outputs, labels) # Getting gradients loss.backward() # Clipping gradient clip_grad_norm_(model.parameters(), 5) # Updating parameters optimizer.step() # Compute metrics ## Loss train_metrics['loss'] += loss.data.cpu() * len(images) ## Accuracy pred = torch.max(outputs.data, 1)[1] if torch.cuda.is_available(): if data_augmentation == 'mixup': correct += eval_metrics('acc', pred.cpu().int(), labels_a.cpu().int(), labels_b.cpu().int(), lam) * len(images) else: correct += eval_metrics('acc', pred.cpu().int(), labels.cpu().int()) * len(images) total += labels.size(0) train_metrics['acc'] = 100.0 * float(correct) / total ## Completed percentage p = (100.0 * (i + 1)) / len(train_loader) sys.stdout.write("\r[%s][%.2f%%][ACC:%.2f]" % ('=' * round(p / 2) + '-' * (50 - round(p / 2)), p, train_metrics['acc'])) sys.stdout.flush() print('') train_metrics['loss'] = train_metrics['loss'] / len( train_loader.dataset) return train_metrics
def train(self, train_loader, model, criterion, optimizer, data_augmentation=None): # tell to pytorch that we are training the model model.train() train_metrics = {'loss': 0.0, 'dis_acc': 0.0, 'sev_acc': 0.0} for images, labels_dis, labels_sev in train_loader: # Loading images on gpu if torch.cuda.is_available(): images, labels_dis, labels_sev = images.cuda( ), labels_dis.cuda(), labels_sev.cuda() # Apply data augmentation if data_augmentation == 'bc+': images, labels_dis_a, labels_sev_a = between_class( images, labels_dis, labels_sev) labels_dis, labels_sev = torch.max(labels_dis_a, 1)[1], torch.max( labels_sev_a, 1)[1] elif data_augmentation == 'mixup': images, labels_dis_a, labels_dis_b, labels_sev_a, labels_sev_b, lam = mixup_data( images, labels_dis, labels_sev) # Pass images through the network outputs_dis, outputs_sev = model(images) # Compute error if data_augmentation == 'bc+': loss_dis = criterion( torch.softmax(outputs_dis, dim=1).log(), labels_dis_a) loss_sev = criterion( torch.softmax(outputs_sev, dim=1).log(), labels_sev_a) elif data_augmentation == 'mixup': loss_dis = mixup_criterion(criterion, outputs_dis, labels_dis_a, labels_dis_b, lam) loss_sev = mixup_criterion(criterion, outputs_sev, labels_sev_a, labels_sev_b, lam) else: loss_dis = criterion(outputs_dis, labels_dis) loss_sev = criterion(outputs_sev, labels_sev) # Clear gradients parameters model.zero_grad() # Getting gradients (loss_dis + loss_sev).backward() # Clipping gradient clip_grad_norm_(model.parameters(), 5) # Updating parameters optimizer.step() # Compute metrics # Loss train_metrics['loss'] += (loss_dis + loss_sev).data.cpu() / 2 * len(images) # Biotic stress metrics pred = torch.max(outputs_dis.data, 1)[1] if torch.cuda.is_available(): if data_augmentation == 'mixup': train_metrics['dis_acc'] += eval_metrics( 'acc', pred.cpu().int(), labels_dis_a.cpu().int(), labels_dis_b.cpu().int(), lam) * len(images) else: train_metrics['dis_acc'] += eval_metrics( 'acc', pred.cpu().int(), labels_dis.cpu().int()) * len(images) # Severity metrics pred = torch.max(outputs_sev.data, 1)[1] if torch.cuda.is_available(): if data_augmentation == 'mixup': train_metrics['sev_acc'] += eval_metrics( 'acc', pred.cpu().int(), labels_sev_a.cpu().int(), labels_sev_b.cpu().int(), lam) * len(images) else: train_metrics['sev_acc'] += eval_metrics( 'acc', pred.cpu().int(), labels_sev.cpu().int()) * len(images) for x in train_metrics: if x != 'loss': train_metrics[x] = 100.0 * train_metrics[x] / len( train_loader.dataset) else: train_metrics[x] = train_metrics[x] / len(train_loader.dataset) return train_metrics