def cal_loss(self, output, target): """ Build yolo loss Arguments: output -- tuple (delta_pred, conf_pred, class_score), output data of the yolo network target -- tuple (iou_target, iou_mask, box_target, box_mask, class_target, class_mask) target label data delta_pred -- Variable of shape (B, H * W * num_anchors, 4), predictions of delta σ(t_x), σ(t_y), σ(t_w), σ(t_h) conf_pred -- Variable of shape (B, H * W * num_anchors, 1), prediction of IoU score σ(t_c) class_score -- Variable of shape (B, H * W * num_anchors, num_classes), prediction of class scores (cls1, cls2 ..) iou_target -- Variable of shape (B, H * W * num_anchors, 1) iou_mask -- Variable of shape (B, H * W * num_anchors, 1) box_target -- Variable of shape (B, H * W * num_anchors, 4) box_mask -- Variable of shape (B, H * W * num_anchors, 1) class_target -- Variable of shape (B, H * W * num_anchors, 1) class_mask -- Variable of shape (B, H * W * num_anchors, 1) Return: loss -- yolo overall multi-task loss """ delta_pred_batch = output[0] conf_pred_batch = output[1] class_score_batch = output[2] iou_target = target[0] iou_mask = target[1] box_target = target[2] box_mask = target[3] class_target = target[4] class_mask = target[5] b, _, num_classes = class_score_batch.size() class_score_batch = class_score_batch.view(-1, num_classes) class_target = class_target.view(-1) class_mask = class_mask.view(-1) # ignore the gradient of noobject's target class_keep = class_mask.nonzero().squeeze(1) class_score_batch_keep = class_score_batch[class_keep, :] class_target_keep = class_target[class_keep] # if cfg.debug: # print(class_score_batch_keep) # print(class_target_keep) # calculate the loss, normalized by batch size. box_loss = 1 / b * 1 * F.mse_loss(delta_pred_batch * box_mask, box_target * box_mask, reduction='sum') / 2.0 iou_loss = 1 / b * F.mse_loss(conf_pred_batch * iou_mask, iou_target * iou_mask, reduction='sum') / 2.0 class_loss = 1 / b * 1 * F.cross_entropy( class_score_batch_keep, class_target_keep, reduction='sum') return box_loss, iou_loss, class_loss
def validation(model, device, optimizer, test_loader): # set model as testing mode cnn_encoder, rnn_decoder = model cnn_encoder.eval() rnn_decoder.eval() test_loss = 0 all_y = [] all_y_pred = [] with torch.no_grad(): for X, y in test_loader: # distribute data to device X, y = X.to(device), y.to(device).view(-1, ) output = rnn_decoder(cnn_encoder(X)) loss = F.cross_entropy(output, y, reduction='sum') test_loss += loss.item() # sum up batch loss y_pred = output.max( 1, keepdim=True )[1] # (y_pred != output) get the index of the max log-probability # collect all y and y_pred in all batches all_y.extend(y) all_y_pred.extend(y_pred) test_loss /= len(test_loader.dataset) # compute accuracy all_y = torch.stack(all_y, dim=0) all_y_pred = torch.stack(all_y_pred, dim=0) test_score = accuracy_score(all_y.cpu().data.squeeze().numpy(), all_y_pred.cpu().data.squeeze().numpy()) # show information print( '\nTest set ({:d} samples): Average loss: {:.4f}, Accuracy: {:.2f}%\n'. format(len(all_y), test_loss, 100 * test_score)) # save Pytorch models of best record torch.save( cnn_encoder.state_dict(), os.path.join( save_model_path, 'cnn_encoder_epoch{}.pth'.format(epoch + 1))) # save spatial_encoder torch.save(rnn_decoder.state_dict(), os.path.join( save_model_path, 'rnn_decoder_epoch{}.pth'.format(epoch + 1))) # save motion_encoder torch.save( optimizer.state_dict(), os.path.join(save_model_path, 'optimizer_epoch{}.pth'.format(epoch + 1))) # save optimizer print("Epoch {} model saved!".format(epoch + 1)) return test_loss, test_score
def train(config, model, train_iter, dev_iter, test_iter): start_time = time.time() model.train() optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) # 学习率指数衰减,每次epoch:学习率 = gamma * 学习率 # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) total_batch = 0 # 记录进行到多少batch dev_best_loss = float('inf') last_improve = 0 # 记录上次验证集loss下降的batch数 flag = False # 记录是否很久没有效果提升 writer = SummaryWriter(log_dir=config.log_path + '/' + time.strftime('%m-%d_%H.%M', time.localtime())) for epoch in range(config.num_epochs): print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs)) # scheduler.step() # 学习率衰减 for i, (train_1, labels) in enumerate(train_iter): outputs = model(train_1) model.zero_grad() loss = F.cross_entropy(outputs, labels) loss.backward() optimizer.step() if total_batch % 100 == 0: # 每多少轮输出在训练集和验证集上的效果 true = labels.data.cpu() predic = torch.max(outputs.data, 1)[1].cpu() train_acc = metrics.accuracy_score(true, predic) dev_acc, dev_loss = evaluate(config, model, dev_iter) if dev_loss < dev_best_loss: dev_best_loss = dev_loss torch.save(model.state_dict(), config.save_path) improve = '*' last_improve = total_batch else: improve = '' time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>5.2}, Train Acc: {2:>6.2%}, Val Loss: {3:>5.2}, Val Acc: {4:>6.2%}, Time: {5} {6}' print( msg.format(total_batch, loss.item(), train_acc, dev_loss, dev_acc, time_dif, improve)) writer.add_scalar("loss/train", loss.item(), total_batch) writer.add_scalar("loss/dev", dev_loss, total_batch) writer.add_scalar("acc/train", train_acc, total_batch) writer.add_scalar("acc/dev", dev_acc, total_batch) model.train() total_batch += 1 if total_batch - last_improve > config.require_improvement: # 验证集loss超过1000batch没下降,结束训练 print("No optimization for a long time, auto-stopping...") flag = True break if flag: break writer.close() test(config, model, test_iter)
def cross_entropy_with_probs( input, target, weight=None, reduction="mean", ): # From Snorkel library """Calculate cross-entropy loss when targets are probabilities (floats), not ints. PyTorch's F.cross_entropy() method requires integer labels; it does accept probabilistic labels. We can, however, simulate such functionality with a for loop, calculating the loss contributed by each class and accumulating the results. Libraries such as keras do not require this workaround, as methods like "categorical_crossentropy" accept float labels natively. Note that the method signature is intentionally very similar to F.cross_entropy() so that it can be used as a drop-in replacement when target labels are changed from from a 1D tensor of ints to a 2D tensor of probabilities. Parameters ---------- input A [num_points, num_classes] tensor of logits target A [num_points, num_classes] tensor of probabilistic target labels weight An optional [num_classes] array of weights to multiply the loss by per class reduction One of "none", "mean", "sum", indicating whether to return one loss per data point, the mean loss, or the sum of losses Returns ------- torch.Tensor The calculated loss Raises ------ ValueError If an invalid reduction keyword is submitted """ num_points, num_classes = input.shape # Note that t.new_zeros, t.new_full put tensor on same device as t cum_losses = input.new_zeros(num_points) for y in range(num_classes): target_temp = input.new_full((num_points,), y, dtype=torch.long) y_loss = F.cross_entropy(input, target_temp, reduction="none") if weight is not None: y_loss = y_loss * weight[y] cum_losses += target[:, y].float() * y_loss if reduction == "none": return cum_losses elif reduction == "mean": return cum_losses.mean() elif reduction == "sum": return cum_losses.sum() else: raise ValueError("Keyword 'reduction' must be one of ['none', 'mean', 'sum']")
def forward(self, predict, target, weight=None): """ Args: predict:(n,c,h,w) target:(n,1,h,w) weight (Tensor, optional): a manual rescaling weight given to each class. If given, has to be a Tensor of size "nclasses """ assert not target.requires_grad assert predict.dim() == 4 assert predict.size(0) == target.size(0) assert predict.size(2) == target.size(2) assert predict.size(3) == target.size(3) predict = predict.permute(0, 2, 3, 1).contiguous() predict = predict.view(-1, predict.size()[1]) target = target.view(-1) loss = F.cross_entropy(predict, target, weight=weight, size_average=self.size_average) return loss
def train(log_interval, model, device, train_loader, optimizer, epoch): cnn_encoder, rnn_decoder = model cnn_encoder.train() rnn_decoder.train() losses = [] scores = [] N_count = 0 print(len(train_loader)) for batch_idx, (X, y) in enumerate(train_loader): X, y = X.to(device), y.to(device).view(-1, ) N_count += X.size(0) optimizer.zero_grad() output = rnn_decoder( cnn_encoder(X)) #shape: (batch_size, num_of_classes) loss = F.cross_entropy(output, y) losses.append(loss.item()) #to compute the training accuracy y_pred = torch.max(output, 1)[1] step_score = accuracy_score(y.cpu().data.squeeze().numpy(), y_pred.cpu().data.squeeze().numpy()) scores.append(step_score) loss.backward() optimizer.step() #display the training information #if (batch_idx + 1) % log_interval == 0: print('Train epoch: {} [{}/{} ({:.0f}%)]\tLoss:{:.6f},Accu:{:.2f}%'. format(epoch + 1, N_count, len(train_loader.dataset), 100. * (batch_idx + 1) / len(train_loader), loss.item(), 100 * step_score)) return losses, scores
def evaluate(config, model, data_iter, test=False): model.eval() loss_total = 0 predict_all = np.array([], dtype=int) labels_all = np.array([], dtype=int) with torch.no_grad(): for texts, labels in data_iter: outputs = model(texts) loss = F.cross_entropy(outputs, labels) loss_total += loss labels = labels.data.cpu().numpy() predic = torch.max(outputs.data, 1)[1].cpu().numpy() labels_all = np.append(labels_all, labels) predict_all = np.append(predict_all, predic) acc = metrics.accuracy_score(labels_all, predict_all) if test: report = metrics.classification_report(labels_all, predict_all, target_names=config.class_list, digits=4) confusion = metrics.confusion_matrix(labels_all, predict_all) return acc, loss_total / len(data_iter), report, confusion return acc, loss_total / len(data_iter)
def train(self, print_every=10, epochs=1): """ Train a model using the PyTorch Module API. Arguments: - print_every: (Optional) Print training accuracy every print_every iterations. - epochs: (Optional) A Python integer giving the number of epochs to train for. Returns: Nothing, but prints model accuracies during training. """ # Move the model parameters to CPU / GPU model = self.model.to(device=self.device) optimizer = self.optimizer # Initialize iteration t = 0 for epoch in range(epochs): start = time.time() for train_batch in self.loader_train: # Put model to training mode model.train() # Load x and y x = train_batch.text.transpose( 1, 0) # reshape to [batch_size, len_seq] y = train_batch.target.type(torch.LongTensor) # Move to device, e.g. CPU x = x.to(device=self.device) y = y.to(device=self.device) # Compute scores and softmax loss scores = model(x) loss = F.cross_entropy(scores, y) # Zero out all of the gradients for the variables which the optimizer # will update. optimizer.zero_grad() # Backwards pass: compute the gradient of the loss with # respect to each parameter of the model. loss.backward() # Update the parameters of the model using the gradients # computed by the backwards pass. optimizer.step() # Save loss self.loss_history.append(loss.item()) # Display information if self.verbose and t % print_every == 0: print('Iteration %d, loss = %.4f' % (t, self.loss_history[-1])) acc = self.compute_accuracy(validation=True) print('Accuracy :', acc) print() t += 1 end = time.time() print('Epoch {0} / {1}, time = {2} secs'.format( epoch, epochs, end - start)) # Compute train and val accuracy at the end of each epoch. train_accuracy = self.compute_accuracy(validation=False) val_accuracy = self.compute_accuracy(validation=True) self.train_accuracy_history.append(train_accuracy) self.val_accuracy_history.append(val_accuracy) # Print useful information if self.verbose: print('(Epoch %d / %d) Train acc: %f; Val acc: %f' % (epoch, epochs, train_accuracy, val_accuracy)) # Keep track of the best model if val_accuracy > self.best_val_accuracy: self.best_val_accuracy = val_accuracy # update best params self.best_params['state_dict'] = model.state_dict().copy() self.best_params['optimizer'] = optimizer.state_dict().copy() # Save best model if self.save_model: self._save_model('/Users/robin/Projects/zelros/', self.best_params['state_dict'], self.best_params['optimizer'])
def forward(self, input, target, reduction='mean'): x1, x2, x3 = input x1, x2, x3 = x1.float(), x2.float(), x3.float() y = target.long() return 0.7*F.cross_entropy(x1,y[:,0],reduction=reduction) + 0.1*F.cross_entropy(x2,y[:,1],reduction=reduction) + \ 0.2*F.cross_entropy(x3,y[:,2],reduction=reduction)