예제 #1
0
    def cal_loss(self, output, target):
        """
        Build yolo loss

        Arguments:
        output -- tuple (delta_pred, conf_pred, class_score), output data of the yolo network
        target -- tuple (iou_target, iou_mask, box_target, box_mask, class_target, class_mask) target label data

        delta_pred -- Variable of shape (B, H * W * num_anchors, 4), predictions of delta σ(t_x), σ(t_y), σ(t_w), σ(t_h)
        conf_pred -- Variable of shape (B, H * W * num_anchors, 1), prediction of IoU score σ(t_c)
        class_score -- Variable of shape (B, H * W * num_anchors, num_classes), prediction of class scores (cls1, cls2 ..)

        iou_target -- Variable of shape (B, H * W * num_anchors, 1)
        iou_mask -- Variable of shape (B, H * W * num_anchors, 1)
        box_target -- Variable of shape (B, H * W * num_anchors, 4)
        box_mask -- Variable of shape (B, H * W * num_anchors, 1)
        class_target -- Variable of shape (B, H * W * num_anchors, 1)
        class_mask -- Variable of shape (B, H * W * num_anchors, 1)

        Return:
        loss -- yolo overall multi-task loss
        """

        delta_pred_batch = output[0]
        conf_pred_batch = output[1]
        class_score_batch = output[2]

        iou_target = target[0]
        iou_mask = target[1]
        box_target = target[2]
        box_mask = target[3]
        class_target = target[4]
        class_mask = target[5]

        b, _, num_classes = class_score_batch.size()
        class_score_batch = class_score_batch.view(-1, num_classes)
        class_target = class_target.view(-1)
        class_mask = class_mask.view(-1)

        # ignore the gradient of noobject's target
        class_keep = class_mask.nonzero().squeeze(1)
        class_score_batch_keep = class_score_batch[class_keep, :]
        class_target_keep = class_target[class_keep]

        # if cfg.debug:
        #     print(class_score_batch_keep)
        #     print(class_target_keep)

        # calculate the loss, normalized by batch size.
        box_loss = 1 / b * 1 * F.mse_loss(delta_pred_batch * box_mask,
                                          box_target * box_mask,
                                          reduction='sum') / 2.0
        iou_loss = 1 / b * F.mse_loss(conf_pred_batch * iou_mask,
                                      iou_target * iou_mask,
                                      reduction='sum') / 2.0
        class_loss = 1 / b * 1 * F.cross_entropy(
            class_score_batch_keep, class_target_keep, reduction='sum')

        return box_loss, iou_loss, class_loss
예제 #2
0
def validation(model, device, optimizer, test_loader):
    # set model as testing mode
    cnn_encoder, rnn_decoder = model
    cnn_encoder.eval()
    rnn_decoder.eval()

    test_loss = 0
    all_y = []
    all_y_pred = []
    with torch.no_grad():
        for X, y in test_loader:
            # distribute data to device
            X, y = X.to(device), y.to(device).view(-1, )

            output = rnn_decoder(cnn_encoder(X))

            loss = F.cross_entropy(output, y, reduction='sum')
            test_loss += loss.item()  # sum up batch loss
            y_pred = output.max(
                1, keepdim=True
            )[1]  # (y_pred != output) get the index of the max log-probability

            # collect all y and y_pred in all batches
            all_y.extend(y)
            all_y_pred.extend(y_pred)

    test_loss /= len(test_loader.dataset)

    # compute accuracy
    all_y = torch.stack(all_y, dim=0)
    all_y_pred = torch.stack(all_y_pred, dim=0)
    test_score = accuracy_score(all_y.cpu().data.squeeze().numpy(),
                                all_y_pred.cpu().data.squeeze().numpy())

    # show information
    print(
        '\nTest set ({:d} samples): Average loss: {:.4f}, Accuracy: {:.2f}%\n'.
        format(len(all_y), test_loss, 100 * test_score))

    # save Pytorch models of best record
    torch.save(
        cnn_encoder.state_dict(),
        os.path.join(
            save_model_path,
            'cnn_encoder_epoch{}.pth'.format(epoch +
                                             1)))  # save spatial_encoder
    torch.save(rnn_decoder.state_dict(),
               os.path.join(
                   save_model_path,
                   'rnn_decoder_epoch{}.pth'.format(epoch +
                                                    1)))  # save motion_encoder
    torch.save(
        optimizer.state_dict(),
        os.path.join(save_model_path,
                     'optimizer_epoch{}.pth'.format(epoch +
                                                    1)))  # save optimizer
    print("Epoch {} model saved!".format(epoch + 1))

    return test_loss, test_score
예제 #3
0
def train(config, model, train_iter, dev_iter, test_iter):
    start_time = time.time()
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    # 学习率指数衰减,每次epoch:学习率 = gamma * 学习率
    # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    total_batch = 0  # 记录进行到多少batch
    dev_best_loss = float('inf')
    last_improve = 0  # 记录上次验证集loss下降的batch数
    flag = False  # 记录是否很久没有效果提升
    writer = SummaryWriter(log_dir=config.log_path + '/' +
                           time.strftime('%m-%d_%H.%M', time.localtime()))
    for epoch in range(config.num_epochs):
        print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs))
        # scheduler.step() # 学习率衰减
        for i, (train_1, labels) in enumerate(train_iter):
            outputs = model(train_1)
            model.zero_grad()
            loss = F.cross_entropy(outputs, labels)
            loss.backward()
            optimizer.step()
            if total_batch % 100 == 0:
                # 每多少轮输出在训练集和验证集上的效果
                true = labels.data.cpu()
                predic = torch.max(outputs.data, 1)[1].cpu()
                train_acc = metrics.accuracy_score(true, predic)
                dev_acc, dev_loss = evaluate(config, model, dev_iter)
                if dev_loss < dev_best_loss:
                    dev_best_loss = dev_loss
                    torch.save(model.state_dict(), config.save_path)
                    improve = '*'
                    last_improve = total_batch
                else:
                    improve = ''
                time_dif = get_time_dif(start_time)
                msg = 'Iter: {0:>6},  Train Loss: {1:>5.2},  Train Acc: {2:>6.2%},  Val Loss: {3:>5.2},  Val Acc: {4:>6.2%},  Time: {5} {6}'
                print(
                    msg.format(total_batch, loss.item(), train_acc, dev_loss,
                               dev_acc, time_dif, improve))
                writer.add_scalar("loss/train", loss.item(), total_batch)
                writer.add_scalar("loss/dev", dev_loss, total_batch)
                writer.add_scalar("acc/train", train_acc, total_batch)
                writer.add_scalar("acc/dev", dev_acc, total_batch)
                model.train()
            total_batch += 1
            if total_batch - last_improve > config.require_improvement:
                # 验证集loss超过1000batch没下降,结束训练
                print("No optimization for a long time, auto-stopping...")
                flag = True
                break
        if flag:
            break
    writer.close()
    test(config, model, test_iter)
def cross_entropy_with_probs(
    input,
    target,
    weight=None,
    reduction="mean",
):
    # From Snorkel library
    """Calculate cross-entropy loss when targets are probabilities (floats), not ints.
    PyTorch's F.cross_entropy() method requires integer labels; it does accept
    probabilistic labels. We can, however, simulate such functionality with a for loop,
    calculating the loss contributed by each class and accumulating the results.
    Libraries such as keras do not require this workaround, as methods like
    "categorical_crossentropy" accept float labels natively.
    Note that the method signature is intentionally very similar to F.cross_entropy()
    so that it can be used as a drop-in replacement when target labels are changed from
    from a 1D tensor of ints to a 2D tensor of probabilities.
    Parameters
    ----------
    input
        A [num_points, num_classes] tensor of logits
    target
        A [num_points, num_classes] tensor of probabilistic target labels
    weight
        An optional [num_classes] array of weights to multiply the loss by per class
    reduction
        One of "none", "mean", "sum", indicating whether to return one loss per data
        point, the mean loss, or the sum of losses
    Returns
    -------
    torch.Tensor
        The calculated loss
    Raises
    ------
    ValueError
        If an invalid reduction keyword is submitted
    """
    num_points, num_classes = input.shape
    # Note that t.new_zeros, t.new_full put tensor on same device as t
    cum_losses = input.new_zeros(num_points)
    for y in range(num_classes):
        target_temp = input.new_full((num_points,), y, dtype=torch.long)
        y_loss = F.cross_entropy(input, target_temp, reduction="none")
        if weight is not None:
            y_loss = y_loss * weight[y]
        cum_losses += target[:, y].float() * y_loss

    if reduction == "none":
        return cum_losses
    elif reduction == "mean":
        return cum_losses.mean()
    elif reduction == "sum":
        return cum_losses.sum()
    else:
        raise ValueError("Keyword 'reduction' must be one of ['none', 'mean', 'sum']")
예제 #5
0
    def forward(self, predict, target, weight=None):
        """
        Args:
            predict:(n,c,h,w)
            target:(n,1,h,w)
            weight (Tensor, optional): a manual rescaling weight given to each class.
                                       If given, has to be a Tensor of size "nclasses
        """
        assert not target.requires_grad
        assert predict.dim() == 4
        assert predict.size(0) == target.size(0)
        assert predict.size(2) == target.size(2)
        assert predict.size(3) == target.size(3)

        predict = predict.permute(0, 2, 3, 1).contiguous()
        predict = predict.view(-1, predict.size()[1])
        target = target.view(-1)
        loss = F.cross_entropy(predict,
                               target,
                               weight=weight,
                               size_average=self.size_average)
        return loss
예제 #6
0
def train(log_interval, model, device, train_loader, optimizer, epoch):
    cnn_encoder, rnn_decoder = model
    cnn_encoder.train()
    rnn_decoder.train()

    losses = []
    scores = []
    N_count = 0
    print(len(train_loader))

    for batch_idx, (X, y) in enumerate(train_loader):
        X, y = X.to(device), y.to(device).view(-1, )

        N_count += X.size(0)
        optimizer.zero_grad()

        output = rnn_decoder(
            cnn_encoder(X))  #shape: (batch_size, num_of_classes)

        loss = F.cross_entropy(output, y)
        losses.append(loss.item())

        #to compute the training accuracy
        y_pred = torch.max(output, 1)[1]
        step_score = accuracy_score(y.cpu().data.squeeze().numpy(),
                                    y_pred.cpu().data.squeeze().numpy())
        scores.append(step_score)

        loss.backward()
        optimizer.step()

        #display the training information
        #if (batch_idx + 1) % log_interval == 0:
        print('Train epoch: {} [{}/{} ({:.0f}%)]\tLoss:{:.6f},Accu:{:.2f}%'.
              format(epoch + 1, N_count, len(train_loader.dataset),
                     100. * (batch_idx + 1) / len(train_loader), loss.item(),
                     100 * step_score))

    return losses, scores
예제 #7
0
def evaluate(config, model, data_iter, test=False):
    model.eval()
    loss_total = 0
    predict_all = np.array([], dtype=int)
    labels_all = np.array([], dtype=int)
    with torch.no_grad():
        for texts, labels in data_iter:
            outputs = model(texts)
            loss = F.cross_entropy(outputs, labels)
            loss_total += loss
            labels = labels.data.cpu().numpy()
            predic = torch.max(outputs.data, 1)[1].cpu().numpy()
            labels_all = np.append(labels_all, labels)
            predict_all = np.append(predict_all, predic)

    acc = metrics.accuracy_score(labels_all, predict_all)
    if test:
        report = metrics.classification_report(labels_all,
                                               predict_all,
                                               target_names=config.class_list,
                                               digits=4)
        confusion = metrics.confusion_matrix(labels_all, predict_all)
        return acc, loss_total / len(data_iter), report, confusion
    return acc, loss_total / len(data_iter)
예제 #8
0
    def train(self, print_every=10, epochs=1):
        """
        Train a model using the PyTorch Module API.

        Arguments:
        - print_every: (Optional) Print training accuracy every print_every iterations.
        - epochs: (Optional) A Python integer giving the number of epochs to train for.

        Returns: Nothing, but prints model accuracies during training.
        """

        # Move the model parameters to CPU / GPU
        model = self.model.to(device=self.device)
        optimizer = self.optimizer

        # Initialize iteration
        t = 0

        for epoch in range(epochs):
            start = time.time()
            for train_batch in self.loader_train:

                # Put model to training mode
                model.train()

                # Load x and y
                x = train_batch.text.transpose(
                    1, 0)  # reshape to [batch_size, len_seq]
                y = train_batch.target.type(torch.LongTensor)

                # Move to device, e.g. CPU
                x = x.to(device=self.device)
                y = y.to(device=self.device)

                # Compute scores and softmax loss
                scores = model(x)
                loss = F.cross_entropy(scores, y)

                # Zero out all of the gradients for the variables which the optimizer
                # will update.
                optimizer.zero_grad()

                # Backwards pass: compute the gradient of the loss with
                # respect to each parameter of the model.
                loss.backward()

                # Update the parameters of the model using the gradients
                # computed by the backwards pass.
                optimizer.step()

                # Save loss
                self.loss_history.append(loss.item())

                # Display information
                if self.verbose and t % print_every == 0:
                    print('Iteration %d, loss = %.4f' %
                          (t, self.loss_history[-1]))
                    acc = self.compute_accuracy(validation=True)
                    print('Accuracy :', acc)
                    print()

                t += 1

            end = time.time()
            print('Epoch {0} / {1}, time = {2} secs'.format(
                epoch, epochs, end - start))

            # Compute train and val accuracy at the end of each epoch.
            train_accuracy = self.compute_accuracy(validation=False)
            val_accuracy = self.compute_accuracy(validation=True)

            self.train_accuracy_history.append(train_accuracy)
            self.val_accuracy_history.append(val_accuracy)

            # Print useful information
            if self.verbose:
                print('(Epoch %d / %d) Train acc: %f; Val acc: %f' %
                      (epoch, epochs, train_accuracy, val_accuracy))

            # Keep track of the best model
            if val_accuracy > self.best_val_accuracy:
                self.best_val_accuracy = val_accuracy
                # update best params
                self.best_params['state_dict'] = model.state_dict().copy()
                self.best_params['optimizer'] = optimizer.state_dict().copy()

        # Save best model
        if self.save_model:
            self._save_model('/Users/robin/Projects/zelros/',
                             self.best_params['state_dict'],
                             self.best_params['optimizer'])
 def forward(self, input, target, reduction='mean'):
     x1, x2, x3 = input
     x1, x2, x3 = x1.float(), x2.float(), x3.float()
     y = target.long()
     return 0.7*F.cross_entropy(x1,y[:,0],reduction=reduction) + 0.1*F.cross_entropy(x2,y[:,1],reduction=reduction) + \
       0.2*F.cross_entropy(x3,y[:,2],reduction=reduction)