Example #1
0
    def forward(self, sentences, lengths, cat_in=0, cat_out=0):
        # cat_in = cat_out = (n_categories)
        # sentences = (B, maxlen)
        # lengths = (B)

        # Compute Thought Vectors for each sentence. Also get the actual word embeddings for teacher forcing.
        thoughts, word_embeddings = self.encoder(sentences, cat_in)  # thoughts = (B, thought_size), word_embeddings = (B, maxlen, word_size)

        # Predict the words for previous and next sentences.
        prev_pred, next_pred = self.decoders(thoughts, word_embeddings, cat_out)  # both = (batch-1, maxlen, VOCAB_SIZE)

        # mask the predictions, so that loss for beyond-EOS word predictions is cancelled.
        prev_mask = self.create_mask(prev_pred, lengths[:-1])
        next_mask = self.create_mask(next_pred, lengths[1:])
        
        masked_prev_pred = prev_pred * prev_mask
        masked_next_pred = next_pred * next_mask
        
        prev_loss = F.cross_entropy(masked_prev_pred.view(-1, VOCAB_SIZE), sentences[:-1, :].view(-1))
        next_loss = F.cross_entropy(masked_next_pred.view(-1, VOCAB_SIZE), sentences[1:, :].view(-1))

        loss = prev_loss + next_loss
        
        _, prev_pred_ids = prev_pred[0].max(1)
        _, next_pred_ids = next_pred[0].max(1)

        return loss, sentences[0], sentences[1], prev_pred_ids, next_pred_ids
 def forward(self, input, target):
     assert not target.requires_grad
     if len(input.shape) == 4:
         input = input.permute(0, 2, 3, 1).contiguous()
     input = input.view(-1, self.n_classes)
     target = target.view(-1)
     assert input.shape[:1]==target.shape
     if not self.size_average:
         return F.cross_entropy(input, target, size_average=False).mul_(1.0 / target.size(0))
     else:
         return F.cross_entropy(input, target, size_average=True)
Example #3
0
    def _add_losses(self, sigma_rpn=3.0):
        # RPN, class loss
        rpn_cls_score = self._predictions['rpn_cls_score_reshape'].view(-1, 2)
        rpn_label = self._anchor_targets['rpn_labels'].view(-1)
        rpn_select = (rpn_label.data != -1).nonzero().view(-1)
        rpn_cls_score = rpn_cls_score.index_select(
            0, rpn_select).contiguous().view(-1, 2)
        rpn_label = rpn_label.index_select(0, rpn_select).contiguous().view(-1)
        rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)

        # RPN, bbox loss
        rpn_bbox_pred = self._predictions['rpn_bbox_pred']
        rpn_bbox_targets = self._anchor_targets['rpn_bbox_targets']
        rpn_bbox_inside_weights = self._anchor_targets[
            'rpn_bbox_inside_weights']
        rpn_bbox_outside_weights = self._anchor_targets[
            'rpn_bbox_outside_weights']
        rpn_loss_box = self._smooth_l1_loss(
            rpn_bbox_pred,
            rpn_bbox_targets,
            rpn_bbox_inside_weights,
            rpn_bbox_outside_weights,
            sigma=sigma_rpn,
            dim=[1, 2, 3])

        # RCNN, class loss
        cls_score = self._predictions["cls_score"]
        label = self._proposal_targets["labels"].view(-1)
        cross_entropy = F.cross_entropy(
            cls_score.view(-1, self._num_classes), label)

        # RCNN, bbox loss
        bbox_pred = self._predictions['bbox_pred']
        bbox_targets = self._proposal_targets['bbox_targets']
        bbox_inside_weights = self._proposal_targets['bbox_inside_weights']
        bbox_outside_weights = self._proposal_targets['bbox_outside_weights']
        loss_box = self._smooth_l1_loss(
            bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights)

        self._losses['cross_entropy'] = cross_entropy
        self._losses['loss_box'] = loss_box
        self._losses['rpn_cross_entropy'] = rpn_cross_entropy
        self._losses['rpn_loss_box'] = rpn_loss_box

        loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box
        self._losses['total_loss'] = loss

        for k in self._losses.keys():
            self._event_summaries[k] = self._losses[k]

        return loss
Example #4
0
    def forward(self, task=None, input1=None, input2=None, label=None):
        '''
        Predict through model and task-specific prediction layer

        Args:
            - inputs (tuple(TODO))
            - pred_layer (nn.Module)
            - pair_input (int)

        Returns:
            - logits (TODO)
        '''
        pair_input = task.pair_input
        pred_layer = getattr(self, '%s_pred_layer' % task.name)
        if pair_input:
            if self.pair_enc_type == 'bow':
                sent1 = self.sent_encoder(input1)
                sent2 = self.sent_encoder(input2) # causes a bug with BiDAF
                logits = pred_layer(torch.cat([sent1, sent2, torch.abs(sent1 - sent2),
                                               sent1 * sent2], 1))
            else:
                pair_emb = self.pair_encoder(input1, input2)
                logits = pred_layer(pair_emb)

        else:
            sent_emb = self.sent_encoder(input1)
            logits = pred_layer(sent_emb)
        out = {'logits': logits}
        if label is not None:
            if isinstance(task, (STS14Task, STSBTask)):
                loss = F.mse_loss(logits, label)
                label = label.squeeze(-1).data.cpu().numpy()
                logits = logits.squeeze(-1).data.cpu().numpy()
                task.scorer1(pearsonr(logits, label)[0])
                task.scorer2(spearmanr(logits, label)[0])
            elif isinstance(task, CoLATask):
                label = label.squeeze(-1)
                loss = F.cross_entropy(logits, label)
                task.scorer2(logits, label)
                label = label.data.cpu().numpy()
                _, preds = logits.max(dim=1)
                task.scorer1(matthews_corrcoef(label, preds.data.cpu().numpy()))
            else:
                label = label.squeeze(-1)
                loss = F.cross_entropy(logits, label)
                task.scorer1(logits, label)
                if task.scorer2 is not None:
                    task.scorer2(logits, label)
            out['loss'] = loss
        return out
def l2l_train(model, cluster_center, n_epoch=10000, trunc_step=10):
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    M_all = Variable(torch.zeros(model.n_class, model.n_dim))
    B_all = Variable(torch.zeros(model.n_class))
    for epoch in range(n_epoch):
        loss = 0
        M_step, B_step = [], []
        for step in range(trunc_step):
            data = generate_data(cluster_center)
            optimizer.zero_grad()
            x, y = Variable(torch.from_numpy(data[0])).float(), Variable(torch.from_numpy(data[1]))
            w, b = model(x)
            M = Variable(torch.zeros(model.n_class_n, model.n_dim))
            B = Variable(torch.zeros(model.n_class_n))
            for k in range(model.n_class_n):
                M[k] = torch.cat((w[:, 0][y == model.n_class_l + k].view(-1, 1),
                                  w[:, 1][y == model.n_class_l + k].view(-1, 1)), 1).mean(0)
                B[k] = b[y == model.n_class_l + k].mean()
            if step == 0:
                M_ = M
                B_ = B
            else:
                M_ = step / (step + 1) * M_step[-1] + 1 / (step + 1) * M
                B_ = step / (step + 1) * B_step[-1] + 1 / (step + 1) * B
            M_step.append(M_)
            B_step.append(B_)
            pred = torch.mm(x, M_.t()) + B_.view(1, -1).expand_as(torch.mm(x, M_.t()))
            loss += F.cross_entropy(pred, y)
        loss.backward()
        optimizer.step()
        print('Train Epoch: {}\tLoss: {:.6f}'.format(epoch, loss.data[0]))
    return M_all, B_all, cluster_center
Example #6
0
def single_scale_rpn_losses(
        rpn_cls_logits, rpn_bbox_pred,
        rpn_labels_int32_wide, rpn_bbox_targets_wide,
        rpn_bbox_inside_weights_wide, rpn_bbox_outside_weights_wide):
    """Add losses for a single scale RPN model (i.e., no FPN)."""
    h, w = rpn_cls_logits.shape[2:]
    rpn_labels_int32 = rpn_labels_int32_wide[:, :, :h, :w]   # -1 means ignore
    h, w = rpn_bbox_pred.shape[2:]
    rpn_bbox_targets = rpn_bbox_targets_wide[:, :, :h, :w]
    rpn_bbox_inside_weights = rpn_bbox_inside_weights_wide[:, :, :h, :w]
    rpn_bbox_outside_weights = rpn_bbox_outside_weights_wide[:, :, :h, :w]

    if cfg.RPN.CLS_ACTIVATION == 'softmax':
        B, C, H, W = rpn_cls_logits.size()
        rpn_cls_logits = rpn_cls_logits.view(
            B, 2, C // 2, H, W).permute(0, 2, 3, 4, 1).contiguous().view(-1, 2)
        rpn_labels_int32 = rpn_labels_int32.contiguous().view(-1).long()
        # the loss is averaged over non-ignored targets
        loss_rpn_cls = F.cross_entropy(
            rpn_cls_logits, rpn_labels_int32, ignore_index=-1)
    else:
        weight = (rpn_labels_int32 >= 0).float()
        loss_rpn_cls = F.binary_cross_entropy_with_logits(
            rpn_cls_logits, rpn_labels_int32.float(), weight, size_average=False)
        loss_rpn_cls /= weight.sum()

    loss_rpn_bbox = net_utils.smooth_l1_loss(
        rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights,
        beta=1/9)

    return loss_rpn_cls, loss_rpn_bbox
Example #7
0
def keypoint_losses(kps_pred, keypoint_locations_int32, keypoint_weights,
                    keypoint_loss_normalizer=None):
    """Mask R-CNN keypoint specific losses."""
    device_id = kps_pred.get_device()
    kps_target = Variable(torch.from_numpy(
        keypoint_locations_int32.astype('int64'))).cuda(device_id)
    keypoint_weights = Variable(torch.from_numpy(keypoint_weights)).cuda(device_id)
    # Softmax across **space** (woahh....space!)
    # Note: this is not what is commonly called "spatial softmax"
    # (i.e., softmax applied along the channel dimension at each spatial
    # location); This is softmax applied over a set of spatial locations (i.e.,
    # each spatial location is a "class").
    loss = F.cross_entropy(
        kps_pred.view(-1, cfg.KRCNN.HEATMAP_SIZE**2), kps_target, reduce=False)
    loss = torch.sum(loss * keypoint_weights) / torch.sum(keypoint_weights)
    loss *= cfg.KRCNN.LOSS_WEIGHT

    if not cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
        # Discussion: the softmax loss above will average the loss by the sum of
        # keypoint_weights, i.e. the total number of visible keypoints. Since
        # the number of visible keypoints can vary significantly between
        # minibatches, this has the effect of up-weighting the importance of
        # minibatches with few visible keypoints. (Imagine the extreme case of
        # only one visible keypoint versus N: in the case of N, each one
        # contributes 1/N to the gradient compared to the single keypoint
        # determining the gradient direction). Instead, we can normalize the
        # loss by the total number of keypoints, if it were the case that all
        # keypoints were visible in a full minibatch. (Returning to the example,
        # this means that the one visible keypoint contributes as much as each
        # of the N keypoints.)
        loss *= keypoint_loss_normalizer.item() # np.float32 to float
    return loss
Example #8
0
def validate():
    softmaxer = torch.nn.Softmax(dim=1)
    model.eval()
    correct = total = 0
    precisionmat = (1/np.arange(1,21))[::-1].cumsum()[::-1]
    precisionmat = torch.cuda.FloatTensor(precisionmat.copy())
    precision = 0
    crossentropy = 0
    hidden = model.initHidden()
    for batch in iter(val_iter):
        sentences = batch.text # n=32,bs
        if torch.cuda.is_available():
            sentences = sentences.cuda()
        out, hidden = model(sentences, hidden)
        for j in range(sentences.size(0)-1):
            outj = out[j] # bs,|V|
            labelsj = sentences[j+1] # bs
            # cross entropy
            crossentropy += F.cross_entropy(outj,labelsj,size_average=False,ignore_index=padidx)
            # precision
            outj, labelsj = softmaxer(outj).data, labelsj.data
            _, outsort = torch.sort(outj,dim=1,descending=True)
            outsort = outsort[:,:20]
            inds = (outsort-labelsj.unsqueeze(1)==0)
            inds = inds.sum(dim=0).type(torch.cuda.FloatTensor)
            precision += inds.dot(precisionmat)
            # plain ol accuracy
            _, predicted = torch.max(outj, 1)
            total += labelsj.ne(padidx).int().sum()
            correct += (predicted==labelsj).sum()
            # DEBUGGING: see the rest in trigram.py
        hidden = repackage_hidden(hidden)
    return correct/total, precision/total, torch.exp(crossentropy/total).data[0]
Example #9
0
 def forward(self, input, target, kl_weight=1.0):
     assert not target.requires_grad
     kl = 0.0
     for module in self.net.modules():
         if hasattr(module, 'kl_reg'):
             kl = kl + module.kl_reg()
     return F.cross_entropy(input, target, size_average=True) * self.train_size + kl_weight * kl
Example #10
0
    def ohem_detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws):

        def log_sum_exp(x):
            x_max = x.data.max()
            return torch.log(torch.sum(torch.exp(x - x_max), dim=1, keepdim=True)) + x_max

        num_hard = cfg.TRAIN.BATCH_SIZE * self.batch_size
        pos_idx = rois_label > 0
        num_pos = pos_idx.int().sum()

        # classification loss
        num_classes = cls_score.size(1)
        weight = cls_score.data.new(num_classes).fill_(1.)
        weight[0] = num_pos.data[0] / num_hard

        conf_p = cls_score.detach()
        conf_t = rois_label.detach()

        # rank on cross_entropy loss
        loss_c = log_sum_exp(conf_p) - conf_p.gather(1, conf_t.view(-1,1))
        loss_c[pos_idx] = 100. # include all positive samples
        _, topk_idx = torch.topk(loss_c.view(-1), num_hard)
        loss_cls = F.cross_entropy(cls_score[topk_idx], rois_label[topk_idx], weight=weight)

        # bounding box regression L1 loss
        pos_idx = pos_idx.unsqueeze(1).expand_as(bbox_pred)
        loc_p = bbox_pred[pos_idx].view(-1, 4)
        loc_t = rois_target[pos_idx].view(-1, 4)
        loss_box = F.smooth_l1_loss(loc_p, loc_t)

        return loss_cls, loss_box
def avg_cross_entropy_loss(predicted, targets):
    """ Helper function for computing the simple mean
        cross entropy loss between the predicted one-hot
        and the target class.
    """
    losses = []
    length = len(predicted)
    
    for i in range(length):
        target = np.array(targets[i], dtype=np.float32)
        target = torch.from_numpy(target)
        target = Variable(target).long()
        
        loss = F.cross_entropy(predicted[i], target)
        
        losses.append(loss)

    loss = losses[0]
    
    for i in range(1, length):
        loss += losses[i]
    
    loss = loss / length

    return loss
def test(model, device, test_loader):
    model.to(device)
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        y_pred = []
        y_true = []
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            output = torch.mean(output.view(output.size(0), output.size(1), -1), dim=2)
            test_loss += F.cross_entropy(output, target)
            output = F.softmax(output, dim=1)
            confidence, pred = output.max(1)
            print('confidence: {}, prediction: {}, ground truth: {}'.format(confidence.cpu().numpy(), pred.cpu().numpy(), target.cpu().numpy()))
            y_pred += pred.data.tolist()
            y_true += target.data.tolist()
            correct += pred.eq(target.view_as(pred)).sum().item()

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    print(metrics.classification_report(np.asarray(y_true), np.asarray(y_pred)))
    print('confusion matrix: \n', metrics.confusion_matrix(np.asarray(y_true), np.asarray(y_pred)))
    print('\n')
Example #13
0
    def forward(self, model, sample, reduce=True):
        """Compute the loss for the given sample.

        Returns a tuple with three elements:
        1) the loss
        2) the sample size, which is used as the denominator for the gradient
        3) logging outputs to display while training
        """

        assert hasattr(model.decoder, 'adaptive_softmax') and model.decoder.adaptive_softmax is not None
        adaptive_softmax = model.decoder.adaptive_softmax

        net_output = model(**sample['net_input'])
        target = model.get_targets(sample, net_output).view(-1)

        bsz = target.size(0)

        logits, target = adaptive_softmax(net_output[0], target)
        assert len(target) == len(logits)

        loss = net_output[0].new(1 if reduce else bsz).zero_()

        for i in range(len(target)):
            if target[i] is not None:
                assert (target[i].min() >= 0 and target[i].max() <= logits[i].size(1))
                loss += F.cross_entropy(logits[i], target[i], size_average=False, ignore_index=self.padding_idx,
                                        reduce=reduce)

        sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens']
        logging_output = {
            'loss': utils.item(loss.data) if reduce else loss.data,
            'ntokens': sample['ntokens'],
            'sample_size': sample_size,
        }
        return loss, sample_size, logging_output
Example #14
0
 def forward(self, predict, target, weight=None):
     """
         Args:
             predict:(n, c, h, w)
             target:(n, h, w)
             weight (Tensor, optional): a manual rescaling weight given to each class.
                                        If given, has to be a Tensor of size "nclasses"
     """
     assert not target.requires_grad
     assert predict.dim() == 4
     assert target.dim() == 3
     assert predict.size(0) == target.size(0), "{0} vs {1} ".format(predict.size(0), target.size(0))
     assert predict.size(2) == target.size(1), "{0} vs {1} ".format(predict.size(2), target.size(1))
     assert predict.size(3) == target.size(2), "{0} vs {1} ".format(predict.size(3), target.size(3))
     n, c, h, w = predict.size()
     target_mask = (target >= 0) * (target != self.ignore_label)
     target = target[target_mask]
     if not target.data.dim():
         return Variable(torch.zeros(1))
     predict = predict.transpose(1, 2).transpose(2, 3).contiguous()
     # contiguous():返回一段内存连续的Tensor
     predict = predict[target_mask.view(n, h, w, 1).repeat(1, 1, 1, c)].view(-1, c)
     # target [N] predict [N,C]
     loss = F.cross_entropy(predict, target, weight=weight, size_average=self.size_average)
     return loss
Example #15
0
def mrcnn_class_loss(target_class_ids, pred_class_logits, active_class_ids, config):
    """Loss for the classifier head of Mask RCNN.
    target_class_ids: [batch, num_rois]. Integer class IDs. Uses zero
        padding to fill in the array.
    pred_class_logits: [batch, num_rois, num_classes]
    active_class_ids: [batch, num_classes]. Has a value of 1 for
        classes that are in the dataset of the image, and 0
        for classes that are not in the dataset.
    """

    # Find predictions of classes that are not in the dataset.
    pred_class_logits = pred_class_logits.contiguous().view(-1, config.NUM_CLASSES)

    target_class_ids = target_class_ids.contiguous().view(-1).type(torch.cuda.LongTensor)
    # Loss
    loss = F.cross_entropy(
        pred_class_logits, target_class_ids, weight=None, size_average=True)

    # Erase losses of predictions of classes that are not in the active
    # classes of the image.
#    loss = loss * pred_active

    # Computer loss mean. Use only predictions that contribute
    # to the loss to get a correct mean.
#    loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active)
    return loss
Example #16
0
def cross_entropy_loss(input, target):
    total_loss = torch.tensor(0.0)
    for i in range(input.size(1)):
        cls_idx = torch.full((input.size(0),), i, dtype=torch.long)
        loss = F.cross_entropy(input, cls_idx, reduce=False)
        total_loss += target[:, i].dot(loss)
    return total_loss / input.shape[0]
Example #17
0
def loss(anchors, data, pred, threshold):
    iou = pred['iou']
    device_id = iou.get_device() if torch.cuda.is_available() else None
    rows, cols = pred['feature'].size()[-2:]
    iou_matrix, _iou, _, _data = iou_match(pred['yx_min'].data, pred['yx_max'].data, data)
    anchors = utils.ensure_device(anchors, device_id)
    positive = fit_positive(rows, cols, *(data[key] for key in 'yx_min, yx_max'.split(', ')), anchors)
    negative = ~positive & (_iou < threshold)
    _center_offset, _size_norm = fill_norm(*(_data[key] for key in 'yx_min, yx_max'.split(', ')), anchors)
    positive, negative, _iou, _center_offset, _size_norm, _cls = (torch.autograd.Variable(t) for t in (positive, negative, _iou, _center_offset, _size_norm, _data['cls']))
    _positive = torch.unsqueeze(positive, -1)
    loss = {}
    # iou
    loss['foreground'] = F.mse_loss(iou[positive], _iou[positive], size_average=False)
    loss['background'] = torch.sum(square(iou[negative]))
    # bbox
    loss['center'] = F.mse_loss(pred['center_offset'][_positive], _center_offset[_positive], size_average=False)
    loss['size'] = F.mse_loss(pred['size_norm'][_positive], _size_norm[_positive], size_average=False)
    # cls
    if 'logits' in pred:
        logits = pred['logits']
        if len(_cls.size()) > 3:
            loss['cls'] = F.mse_loss(F.softmax(logits, -1)[_positive], _cls[_positive], size_average=False)
        else:
            loss['cls'] = F.cross_entropy(logits[_positive].view(-1, logits.size(-1)), _cls[positive].view(-1))
    # normalize
    cnt = float(np.multiply.reduce(positive.size()))
    for key in loss:
        loss[key] /= cnt
    return loss, dict(iou=_iou, data=_data, positive=positive, negative=negative)
Example #18
0
File: ssd.py Project: zhliue/objdet
    def forward(self, loc_preds, loc_targets, cls_preds, cls_targets):
        """Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets).

        Args:
          loc_preds: (tensor) predicted locations, sized [N, #anchors, 4].
          loc_targets: (tensor) encoded target locations, sized [N, #anchors, 4].
          cls_preds: (tensor) predicted class confidences, sized [N, #anchors, #classes].
          cls_targets: (tensor) encoded target labels, sized [N, #anchors].

        loss:
          (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + CrossEntropyLoss(cls_preds, cls_targets).
        """
        pos = cls_targets > 0  # [N,#anchors]
        batch_size = pos.size(0)
        num_pos = pos.sum().item()

        # ===============================================================
        # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets)
        # ===============================================================
        mask = pos.unsqueeze(2).expand_as(loc_preds)  # [N,#anchors,4]
        loc_loss = F.smooth_l1_loss(loc_preds[mask], loc_targets[mask], size_average=False)

        # ===============================================================
        # cls_loss = CrossEntropyLoss(cls_preds, cls_targets)
        # ===============================================================
        cls_loss = F.cross_entropy(cls_preds.view(-1, self.num_classes), cls_targets.view(-1), reduce=False)  # [N*#anchors,]
        cls_loss = cls_loss.view(batch_size, -1)
        cls_loss[cls_targets < 0] = 0  # set ignored loss to 0
        neg = self._hard_negative_mining(cls_loss, pos)  # [N,#anchors]
        cls_loss = cls_loss[pos | neg].sum()

        print('loc_loss: {} | cls_loss: {}'.format(loc_loss.item() / num_pos, cls_loss.item() / num_pos))
        loss = (loc_loss + cls_loss) / num_pos
        return loss
def eval(data_iter, model, args, scheduler):
    model.eval()
    corrects, avg_loss = 0, 0
    for batch in data_iter:
        feature, target = batch.text, batch.label
        feature.data.t_(), target.data.sub_(1)  # batch first, index align
        if args.cuda:
            feature, target = feature.cuda(), feature.cuda()

        logit = model(feature)
        loss = F.cross_entropy(logit, target, size_average=False)
        # scheduler.step(loss.data[0])
        # if args.init_clip_max_norm is not None:
        #     # print("aaaa {} ".format(args.init_clip_max_norm))
        #     utils.clip_grad_norm(model.parameters(), max_norm=args.init_clip_max_norm)

        avg_loss += loss.data[0]
        corrects += (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()

    size = len(data_iter.dataset)
    avg_loss = loss.data[0]/size
    # accuracy = float(corrects)/size * 100.0
    accuracy = 100.0 * corrects/size
    model.train()
    print('\nEvaluation - loss: {:.6f}  acc: {:.4f}%({}/{}) \n'.format(avg_loss,
                                                                       accuracy,
                                                                       corrects,
                                                                       size))
Example #20
0
    def detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws):
        # classification loss
        RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

        # bounding box regression L1 loss
        RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

        return RCNN_loss_cls, RCNN_loss_bbox
def test_eval(data_iter, model, save_path, args, model_count):
    model.eval()
    corrects, avg_loss = 0, 0
    for batch in data_iter:
        feature, target = batch.text, batch.label
        feature.data.t_(), target.data.sub_(1)  # batch first, index align
        if args.cuda:
            feature, target = feature.cuda(), feature.cuda()

        logit = model(feature)
        loss = F.cross_entropy(logit, target, size_average=False)
        # scheduler.step(loss.data[0])
        # if args.init_clip_max_norm is not None:
        #     # print("aaaa {} ".format(args.init_clip_max_norm))
        #     utils.clip_grad_norm(model.parameters(), max_norm=args.init_clip_max_norm)

        avg_loss += loss.data[0]
        corrects += (torch.max(logit, 1)
                     [1].view(target.size()).data == target.data).sum()

    size = len(data_iter.dataset)
    avg_loss = loss.data[0]/size
    # accuracy = float(corrects)/size * 100.0
    accuracy = 100.0 * corrects/size
    model.train()
    print('\nEvaluation - loss: {:.6f}  acc: {:.4f}%({}/{}) \n'.format(avg_loss,
                                                                       accuracy,
                                                                       corrects,
                                                                       size))
    print("model_count {}".format(model_count))
    # test result
    if os.path.exists("./Test_Result.txt"):
        file = open("./Test_Result.txt", "a")
    else:
        file = open("./Test_Result.txt", "w")
    file.write("model " + save_path + "\n")
    file.write("Evaluation - loss: {:.6f}  acc: {:.4f}%({}/{}) \n".format(avg_loss, accuracy, corrects, size))
    file.write("model_count {} \n".format(model_count))
    file.write("\n")
    file.close()
    # calculate the best score in current file
    resultlist = []
    if os.path.exists("./Test_Result.txt"):
        file = open("./Test_Result.txt")
        for line in file.readlines():
            if line[:10] == "Evaluation":
                resultlist.append(float(line[34:41]))
        result = sorted(resultlist)
        file.close()
        file = open("./Test_Result.txt", "a")
        file.write("\nThe Current Best Result is : " + str(result[len(result) - 1]))
        file.write("\n\n")
        file.close()
    shutil.copy("./Test_Result.txt", "./snapshot/" + args.mulu + "/Test_Result.txt")
    # whether to delete the model after test acc so that to save space
    if os.path.isfile(save_path) and args.rm_model is True:
        os.remove(save_path)
Example #22
0
def softmax_loss_1d(x, l):
    """ log-likelihood for mixture of discretized logistics, assumes the data has been rescaled to [-1,1] interval """
    x = x[:, 0, :, :]
    ls = [int(y) for y in l.size()]
    
    nr_softmax_bins = ls[1]
    x_quant = ((x+1)*nr_softmax_bins/2).long().clamp(max=nr_softmax_bins-1)
    loss = F.cross_entropy(l, x_quant, reduce=False)

    return torch.sum(loss.view(loss.size(0), -1), dim=1)
Example #23
0
def train(model, cluster_center, n_epoch=5000):
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    for epoch in range(n_epoch):
        batch = generate_data(cluster_center)
        x, y = Variable(torch.from_numpy(batch[0])).float(), Variable(torch.from_numpy(batch[1]))
        optimizer.zero_grad()
        pred = model(x)
        loss = F.cross_entropy(pred, y)
        loss.backward()
        optimizer.step()
        print('Train Epoch: {}\tLoss: {:.6f}'.format(epoch, loss.data[0]))
Example #24
0
    def forward(self, predictions, priors, targets):
        loc_data, conf_data, _ = predictions  # 预测的框以及类别概率(bs,-1,4) (bs,-1,2)
        priors = priors
        num = loc_data.size(0)  # bs
        num_priors = priors.size(0)

        # (bs, 21824, 4)
        loc_t = torch.Tensor(num, num_priors, 4)
        # (bs, 21824)
        conf_t = torch.LongTensor(num, num_priors)
        # (bs,num_obj, 5)
        for idx in range(num):
            truths = targets[idx][:, :-1].data  # cx,cy,w,h
            labels = targets[idx][:, -1].data  # 1 or 0
            defaults = priors.data  # default boxes
            match(0.35, truths, defaults, [0.1, 0.2], labels, loc_t, conf_t, idx)

        if self.device.type == 'cuda':
            loc_t = loc_t.to(self.device)
            conf_t = conf_t.to(self.device)

        # 得到概率 >0 的idx
        pos = conf_t > 0
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)

        # 全部展开 计算loss
        loc_p = loc_data[pos_idx].view(-1, 4)  # predict
        loc_t = loc_t[pos_idx].view(-1, 4)  # label
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        batch_conf = conf_data.view(-1, 2)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, 2)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N

        return loss_l, loss_c
Example #25
0
    def forward(self, base_feat, im_info, gt_boxes, num_boxes):

        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
        # get rpn classification score
        rpn_cls_score = self.RPN_cls_score(rpn_conv1)

        rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, dim=1)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

        # get rpn offsets to the anchor boxes
        rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
                                 im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
                                                            rpn_bbox_outside_weights, sigma=3, dim=[1,2,3])

        return rois, self.rpn_loss_cls, self.rpn_loss_box
Example #26
0
 def _information_loss(model: InfoGAN, fake_hidden, latent):
     cat_logit, cont_mean, cont_logvar, bin_logit = model.rec(fake_hidden)
     info_loss = 0.
     if model.cat_dim > 0:
         cat_code = latent[:, model.cat_idx]
         info_loss += F.cross_entropy(cat_logit, cat_code.argmax(1))
     if model.cont_dim > 0:
         cont_code = latent[:, model.cont_idx]
         info_loss += .1 * _gaussian_loss(cont_code, cont_mean, cont_logvar)
     if model.bin_dim > 0:
         bin_code = latent[:, model.bin_idx]
         info_loss += 2 * F.binary_cross_entropy_with_logits(bin_logit, bin_code)
     return info_loss
Example #27
0
def evaluate(model, val_iter, vocab_size, DE, EN):
    model.eval()
    pad = EN.vocab.stoi['<pad>']
    total_loss = 0
    for b, batch in enumerate(val_iter):
        src, len_src = batch.src
        trg, len_trg = batch.trg
        src = Variable(src.data.cuda(), volatile=True)
        trg = Variable(trg.data.cuda(), volatile=True)
        output = model(src, trg)
        loss = F.cross_entropy(output[1:].view(-1, vocab_size),
                               trg[1:].contiguous().view(-1),
                               ignore_index=pad)
        total_loss += loss.data[0]
    return total_loss / len(val_iter)
Example #28
0
def train(epoch):
	model.train()
	samples_seen = 0
	for data, target in train_loader:
		data, target = Variable(data), Variable(target)
		optimizer.zero_grad()
		output = model(data)
		loss = F.cross_entropy(output, target)
		loss.backward()
		optimizer.step()
		samples_seen += data.size(0)
		if (samples_seen // data.size(0)) % log_interval == 0:
			print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
				epoch, samples_seen, len(train_loader.dataset),
				100. * samples_seen / len(train_loader.dataset), loss.item()))
def train(epoch, model):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))
def validate():
    softmaxer = torch.nn.Softmax(dim=1)
    model.eval()
    fLSTM.eval()
    fGRU.eval()
    fNNLM.eval()
    correct = total = 0
    precisionmat = (1/np.arange(1,21))[::-1].cumsum()[::-1]
    precisionmat = torch.cuda.FloatTensor(precisionmat.copy())
    precision = 0
    crossentropy = 0
    LSTMhidden = fLSTM.initHidden()
    GRUhidden = fGRU.initHidden()
    for batch in iter(val_iter):
        sentences = batch.text # n=32,bs
        if torch.cuda.is_available():
            sentences = sentences.cuda()
        LSTMout, LSTMhidden = fLSTM(sentences, LSTMhidden)
        GRUout, GRUhidden = fGRU(sentences, GRUhidden)
        word_pad = (32 + n - 1) - sentences.size(0)
        pads = Variable(torch.zeros(word_pad,sentences.size(1))).type(torch.cuda.LongTensor)
        padsentences = torch.cat([pads,sentences],dim=0)
        #print("sentence_dim: {}\npadded_dim: {}".format(sentences.size(),padsentences.size()))
        NNLMout = torch.stack([ fNNLM(torch.cat([ padsentences[:,a:a+1][b:b+n,:] for b in range(32) ],dim=1).t()) for a in range(sentences.size(1)) ],dim=1)
        #eOUT = torch.cat([LSTMout,GRUout,NNLMout],dim=2)
        NNLMout = NNLMout[-sentences.size(0):,:sentences.size(1),:len(TEXT.vocab)]
        tOUT = model(sentences.t(),LSTMout,GRUout,NNLMout)
        out  = tOUT
        for j in range(sentences.size(0)-1):
            outj = out[j] # bs,|V|
            labelsj = sentences[j+1] # bs
            # cross entropy
            crossentropy += F.cross_entropy(outj,labelsj,size_average=False,ignore_index=padidx)
            # precision
            outj, labelsj = softmaxer(outj).data, labelsj.data
            _, outsort = torch.sort(outj,dim=1,descending=True)
            outsort = outsort[:,:20]
            inds = (outsort-labelsj.unsqueeze(1)==0)
            inds = inds.sum(dim=0).type(torch.cuda.FloatTensor)
            precision += inds.dot(precisionmat)
            # plain ol accuracy
            _, predicted = torch.max(outj, 1)
            total += labelsj.ne(padidx).int().sum()
            correct += (predicted==labelsj).sum()
            # DEBUGGING: see the rest in trigram.py
        LSTMhidden = repackage_hidden(LSTMhidden)
        GRUhidden  = repackage_hidden(GRUhidden)
    return correct/total, precision/total, torch.exp(crossentropy/total).data[0]
Example #31
0
def go(arg):
    dev = 'cuda' if torch.cuda.is_available() else 'cpu'

    edges, (n2i, i2n), (r2i, i2r), train, test = data.load(arg.name,
                                                           final=arg.final,
                                                           limit=arg.limit)

    # Convert test and train to tensors
    train_idx = [n2i[name] for name, _ in train.items()]
    train_lbl = [cls for _, cls in train.items()]
    train_idx = torch.tensor(train_idx, dtype=torch.long, device=dev)
    train_lbl = torch.tensor(train_lbl, dtype=torch.long, device=dev)

    test_idx = [n2i[name] for name, _ in test.items()]
    test_lbl = [cls for _, cls in test.items()]
    test_idx = torch.tensor(test_idx, dtype=torch.long, device=dev)
    test_lbl = torch.tensor(test_lbl, dtype=torch.long, device=dev)

    # count nr of classes
    cls = set([int(l) for l in test_lbl] + [int(l) for l in train_lbl])
    """
    Define model
    """
    num_cls = len(cls)

    class GATLayer(nn.Module):
        def __init__(self, graph):
            super().__init__()

            self.i2n, self.i2r, self.edges = graph

            froms, tos = [], []

            for p in edges.keys():
                froms.extend(edges[p][0])
                tos.extend(edges[p][1])

            self.register_buffer('froms', torch.tensor(froms,
                                                       dtype=torch.long))
            self.register_buffer('tos', torch.tensor(tos, dtype=torch.long))

        def forward(self, nodes, rels, sample=None):

            n, k = nodes.size()
            k, k, r = rels.size()

            if arg.dense:

                froms = nodes[None, :, :].expand(r, n, k)
                rels = rels.permute(2, 0, 1)

                froms = torch.bmm(froms, rels)

                froms = froms.view(r * n, k)
                adj = torch.mm(froms, nodes.t())  # stacked adjacencies
                adj = F.softmax(adj, dim=0)

                nwnodes = torch.mm(adj, nodes)
                nwnodes = nwnodes.view(r, n, k)
                nwnodes = nwnodes.mean(dim=0)

                return nwnodes

            else:
                rels = [
                    rels[None, :, :, p].expand(len(self.edges[p][0]), k, k)
                    for p in range(r)
                ]
                rels = torch.cat(rels, dim=0)

                assert len(self.froms) == rels.size(0)

                froms = nodes[self.froms, :]
                tos = nodes[self.tos, :]

                froms, tos = froms[:, None, :], tos[:, :, None]

                # unnormalized attention weights
                att = torch.bmm(torch.bmm(froms, rels), tos).squeeze()

                if sample is None:

                    indices = torch.cat(
                        [self.froms[:, None], self.tos[:, None]], dim=1)
                    values = att

                else:

                    pass

                self.values = values
                self.values.retain_grad()

                # normalize the values (TODO try sparsemax)

                values = util.logsoftmax(indices,
                                         values, (n, n),
                                         p=10,
                                         row=True)
                values = torch.exp(values)

                mm = util.sparsemm(torch.cuda.is_available())

                return mm(indices.t(), values, (n, n), nodes)

    class Model(nn.Module):
        def __init__(self, k, num_classes, graph, depth=3):
            super().__init__()

            self.i2n, self.i2r, self.edges = graph
            self.num_classes = num_classes

            n = len(self.i2n)

            # relation embeddings
            self.rels = nn.Parameter(
                torch.randn(k, k,
                            len(self.i2r) +
                            1))  # TODO initialize properly (like distmult?)

            # node embeddings (layer 0)
            self.nodes = nn.Parameter(torch.randn(
                n, k))  # TODO initialize properly (like embedding?)

            self.layers = nn.ModuleList()
            for _ in range(depth):
                self.layers.append(GATLayer(graph))

            self.toclass = nn.Sequential(nn.Linear(k, num_classes),
                                         nn.Softmax(dim=-1))

        def forward(self, sample=None):

            nodes = self.nodes
            for layer in self.layers:
                nodes = layer(nodes, self.rels, sample=sample)

            return self.toclass(nodes)

    model = Model(k=arg.emb_size,
                  depth=arg.depth,
                  num_classes=num_cls,
                  graph=(i2n, i2r, edges))

    if torch.cuda.is_available():
        model.cuda()
        train_lbl = train_lbl.cuda()
        test_lbl = test_lbl.cuda()

    opt = torch.optim.Adam(model.parameters(), lr=arg.lr)

    for e in tqdm.trange(arg.epochs):

        opt.zero_grad()

        cls = model()[train_idx, :]

        loss = F.cross_entropy(cls, train_lbl)

        loss.backward()
        opt.step()

        print(e, loss.item(), e)

        # Evaluate
        with torch.no_grad():
            cls = model()[train_idx, :]
            agreement = cls.argmax(dim=1) == train_lbl
            accuracy = float(agreement.sum()) / agreement.size(0)

            print('   train accuracy ', float(accuracy))

            cls = model()[test_idx, :]
            agreement = cls.argmax(dim=1) == test_lbl
            accuracy = float(agreement.sum()) / agreement.size(0)

            print('   test accuracy ', float(accuracy))

    print('training finished.')
Example #32
0
    def forward(self, images, features, proposals, targets=None):
        """
        Same as StandardROIHeads.forward but add logic for subclass.
        """
        if not self.subclass_on:
            return super().forward(images, features, proposals, targets)

        # --- start copy -------------------------------------------------------
        del images

        if self.training:
            proposals = self.label_and_sample_proposals(proposals, targets)
            # NOTE: `has_gt` = False for negatives and we must manually register `gt_subclasses`,
            #  because custom gt_* fields will not be automatically registered in sampled proposals.
            for pp_per_im in proposals:
                if not pp_per_im.has("gt_subclasses"):
                    background_subcls_idx = 0
                    pp_per_im.gt_subclasses = torch.cuda.LongTensor(
                        len(pp_per_im)
                    ).fill_(background_subcls_idx)
        del targets

        features_list = [features[f] for f in self.in_features]

        box_features = self.box_pooler(
            features_list, [x.proposal_boxes for x in proposals]
        )
        box_features = self.box_head(box_features)
        predictions = self.box_predictor(box_features)
        # --- end copy ---------------------------------------------------------

        # NOTE: don't delete box_features, keep it temporarily
        # del box_features
        box_features = box_features.view(
            box_features.shape[0], np.prod(box_features.shape[1:])
        )
        pred_subclass_logits = self.subclass_head(box_features)

        if self.training:
            losses = self.box_predictor.losses(predictions, proposals)
            # During training the proposals used by the box head are
            # used by the mask, keypoint (and densepose) heads.
            losses.update(self._forward_mask(features, proposals))
            losses.update(self._forward_keypoint(features, proposals))

            # subclass head
            gt_subclasses = cat([p.gt_subclasses for p in proposals], dim=0)
            loss_subclass = F.cross_entropy(
                pred_subclass_logits, gt_subclasses, reduction="mean"
            )
            losses.update({"loss_subclass": loss_subclass})

            return proposals, losses
        else:
            pred_instances, kept_indices = self.box_predictor.inference(
                predictions, proposals
            )
            # During inference cascaded prediction is used: the mask and keypoints
            # heads are only applied to the top scoring box detections.
            pred_instances = self.forward_with_given_boxes(features, pred_instances)

            # subclass head
            probs = F.softmax(pred_subclass_logits, dim=-1)
            for pred_instances_i, kept_indices_i in zip(pred_instances, kept_indices):
                pred_instances_i.pred_subclass_prob = torch.index_select(
                    probs,
                    dim=0,
                    index=kept_indices_i.to(torch.int64),
                )

            if torch.onnx.is_in_onnx_export():
                assert len(pred_instances) == 1
                pred_instances[0].pred_subclass_prob = alias(
                    pred_instances[0].pred_subclass_prob, "subclass_prob_nms"
                )

            return pred_instances, {}
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float() / num_examples * 100


start_time = time.time()
for epoch in range(num_epochs):
    model = model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):

        features = features.to(device)
        targets = targets.to(device)

        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()

        cost.backward()

        ### UPDATE MODEL PARAMETERS
        optimizer.step()

        ### LOGGING
        if not batch_idx % 50:
            print('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' %
                  (epoch + 1, num_epochs, batch_idx, len(train_loader), cost))

    model = model.eval()
    print('Epoch: %03d/%03d training accuracy: %.2f%%' %
          (epoch + 1, num_epochs, compute_accuracy(model, train_loader)))
Example #34
0
    def _add_losses(self, sigma_rpn=3.0):
        if cfg.TRAIN.IMS_PER_BATCH == 1:
            # RPN, class loss
            rpn_cls_score = self._predictions['rpn_cls_score_reshape'].view(
                -1, 2)  #[前景loss,背景loss][Anchorsize*width*height]个anchor
            rpn_label = self._anchor_targets['rpn_labels'].view(-1)
            rpn_select = (rpn_label.data != -1).nonzero().view(-1)  #选取的前景及背景
            rpn_cls_score = rpn_cls_score.index_select(
                0, rpn_select).contiguous().view(-1, 2)  #[256,gt]
            rpn_label = rpn_label.index_select(0,
                                               rpn_select).contiguous().view(
                                                   -1)  #[256]
            # 是rpn部分的loss
            rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)

            # RPN, bbox loss
            rpn_bbox_pred = self._predictions[
                'rpn_bbox_pred']  # batch * h * w * (num_anchors*4) 回归框预测的坐标
            rpn_bbox_targets = self._anchor_targets[
                'rpn_bbox_targets']  # [1,height,width ,9*4] 回归框目标的坐标(和gt的回归值)
            rpn_bbox_inside_weights = self._anchor_targets[
                'rpn_bbox_inside_weights']  # [1,height,width ,9*4]
            rpn_bbox_outside_weights = self._anchor_targets[
                'rpn_bbox_outside_weights']  # [1,height,width ,9*4]
            # 是rpn部分的loss
            rpn_loss_box = self._smooth_l1_loss(rpn_bbox_pred,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=sigma_rpn,
                                                dim=[1, 2, 3])
        elif cfg.TRAIN.IMS_PER_BATCH == 2:

            ############ img1
            # RPN, class loss
            rpn_cls_score = self._predictions['rpn_cls_score_reshape'].view(
                -1, 2)  #[前景loss,背景loss][Anchorsize*width*height]个anchor
            rpn_label = self._anchor_targets['rpn_labels'].view(-1)
            rpn_select = (rpn_label.data != -1).nonzero().view(-1)  #选取的前景及背景
            rpn_cls_score = rpn_cls_score.index_select(
                0, rpn_select).contiguous().view(-1, 2)  #[256,gt]
            rpn_label = rpn_label.index_select(0,
                                               rpn_select).contiguous().view(
                                                   -1)  #[256]
            # 是rpn部分的loss
            rpn_cross_entropy1 = F.cross_entropy(rpn_cls_score, rpn_label)

            # RPN, bbox loss
            rpn_bbox_pred = self._predictions[
                'rpn_bbox_pred']  # batch * h * w * (num_anchors*4) 回归框预测的坐标
            rpn_bbox_targets = self._anchor_targets[
                'rpn_bbox_targets']  # [1,height,width ,9*4] 回归框目标的坐标(和gt的回归值)
            rpn_bbox_inside_weights = self._anchor_targets[
                'rpn_bbox_inside_weights']  # [1,height,width ,9*4]
            rpn_bbox_outside_weights = self._anchor_targets[
                'rpn_bbox_outside_weights']  # [1,height,width ,9*4]
            # 是rpn部分的loss
            rpn_loss_box1 = self._smooth_l1_loss(rpn_bbox_pred,
                                                 rpn_bbox_targets,
                                                 rpn_bbox_inside_weights,
                                                 rpn_bbox_outside_weights,
                                                 sigma=sigma_rpn,
                                                 dim=[1, 2, 3])

            ############img2
            # RPN, class loss
            rpn_label2 = self._anchor_targets['rpn_labels2'].view(-1)
            rpn_select2 = (rpn_label2.data != -1).nonzero().view(-1)  #选取的前景及背景
            rpn_cls_score = self._predictions['rpn_cls_score_reshape'].view(
                -1, 2)  #[前景loss,背景loss][Anchorsize*width*height]个anchor
            rpn_cls_score2 = rpn_cls_score.index_select(
                0, rpn_select2).contiguous().view(-1, 2)  #[256,gt]
            rpn_label2 = rpn_label2.index_select(
                0, rpn_select2).contiguous().view(-1)  #[256]
            # 是rpn部分的loss
            rpn_cross_entropy2 = F.cross_entropy(rpn_cls_score2, rpn_label2)

            # RPN, bbox loss
            rpn_bbox_targets2 = self._anchor_targets[
                'rpn_bbox_targets2']  # [1,height,width ,9*4] 回归框目标的坐标(和gt的回归值)
            rpn_bbox_inside_weights2 = self._anchor_targets[
                'rpn_bbox_inside_weights2']  # [1,height,width ,9*4]
            rpn_bbox_outside_weights2 = self._anchor_targets[
                'rpn_bbox_outside_weights2']  # [1,height,width ,9*4]

            # 是rpn部分的loss

            rpn_loss_box2 = self._smooth_l1_loss(rpn_bbox_pred,
                                                 rpn_bbox_targets2,
                                                 rpn_bbox_inside_weights2,
                                                 rpn_bbox_outside_weights2,
                                                 sigma=sigma_rpn,
                                                 dim=[1, 2, 3])
            ##############################################3
            lam = cfg.lamda
            rpn_cross_entropy = lam * rpn_cross_entropy1 + (
                1 - lam) * rpn_cross_entropy2
            rpn_loss_box = lam * rpn_loss_box1 + (1 - lam) * rpn_loss_box2
        else:
            raise Exception(
                "check cfg.TRAIN.IMS_PER_BACTH in /lib/model/config.py or experiments/cfgs/*.yml"
            )

        if cfg.loss_strategy == 'RCNN_ONLY' or cfg.loss_strategy == 'RCNN+RPN' or cfg.loss_strategy == 'NOCHANGE':
            # RCNN, class loss
            cls_score = self._predictions["cls_score"]  # [256,21]
            label = self._proposal_targets["labels"].view(-1)  #[256]
            # RCNN的loss
            cross_entropy = F.cross_entropy(
                cls_score.view(-1, self._num_classes), label)

            # RCNN, bbox loss
            bbox_pred = self._predictions['bbox_pred']  # [256,84]
            bbox_targets = self._proposal_targets['bbox_targets']  # [256,84]
            bbox_inside_weights = self._proposal_targets[
                'bbox_inside_weights']  # [256,84]
            bbox_outside_weights = self._proposal_targets[
                'bbox_outside_weights']  # [256,84]
            # RCNN box的loss

            loss_box = self._smooth_l1_loss(bbox_pred, bbox_targets,
                                            bbox_inside_weights,
                                            bbox_outside_weights)

        if cfg.loss_strategy == 'RCNN_ONLY' or cfg.loss_strategy == 'RCNN+RPN':
            lam = cfg.lamda
            label2 = self._proposal_targets['labels'][
                self.rcnn_mix_index, :].view(-1)
            cross_entropy2 = F.cross_entropy(
                cls_score.view(-1, self._num_classes), label2)
            cross_entropy = lam * cross_entropy + (1 - lam) * cross_entropy2

            bbox_targets2 = self._proposal_targets['bbox_targets'][
                self.rcnn_mix_index, :]
            bbox_inside_weights2 = self._proposal_targets[
                'bbox_inside_weights'][self.rcnn_mix_index, :]
            bbox_outside_weights2 = self._proposal_targets[
                'bbox_outside_weights'][self.rcnn_mix_index, :]
            loss_box2 = self._smooth_l1_loss(bbox_pred, bbox_targets2,
                                             bbox_inside_weights2,
                                             bbox_outside_weights2)
            loss_box = lam * loss_box + (1 - lam) * loss_box2

        if cfg.loss_strategy == 'RPN_ONLY':
            pass

        if cfg.loss_strategy == 'RCNN+RPN' or cfg.loss_strategy == 'NOCHANGE':
            self._losses['cross_entropy'] = cross_entropy
            self._losses['loss_box'] = loss_box
            self._losses['rpn_cross_entropy'] = rpn_cross_entropy
            self._losses['rpn_loss_box'] = rpn_loss_box

            loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box
        elif cfg.loss_strategy == 'RPN_ONLY':
            loss = rpn_cross_entropy + rpn_loss_box
            self._losses['rpn_cross_entropy'] = rpn_cross_entropy
            self._losses['rpn_loss_box'] = rpn_loss_box

        elif cfg.loss_strategy == 'RCNN_ONLY':
            loss = cross_entropy + loss_box
            self._losses['cross_entropy'] = cross_entropy
            self._losses['loss_box'] = loss_box

        else:
            raise Exception(
                "check cfg.TRAIN.loss_strategy in /lib/model/config.py or experiments/cfgs/*.yml"
            )

##################################################################################################################
        self._losses['total_loss'] = loss

        for k in self._losses.keys():
            self._event_summaries[k] = self._losses[k]

        return loss
 def training_step(self, batch):
     images, labels = batch
     out = self(images)  # Generate predictions
     loss = F.cross_entropy(out, labels)  # Calculate loss
     return loss
Example #36
0
 def _loss(self, logits, labels):
     return F.cross_entropy(logits, labels.view(-1).long())
Example #37
0
def train(train_loader, model, criterion, optimizer, epoch, logger, writer, args):

    model.train()

    losses = AverageMeter()  # loss (per word decoded)
    aux_top1 = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()  # top5 accuracy

    # Batches
    progress = tqdm(enumerate(train_loader), total=len(train_loader), leave=False)
    for i, (imgs, caps, caplens, chidx) in progress:
        # Move to GPU, if available
        tgt = caps[:-1].to(device)
        tgt_y = caps[1:].to(device).permute(1, 0)

        imgs = imgs.to(device)
        # caps = caps.to(device)
        caplens = caplens.to(device)
        chidx = chidx.to(device)

        # Forward prop.
        # scores, caps_sorted, decode_lengths, alphas, sort_ind = decoder(feature, caps, caplens)
        scores, aux_codelen = model(imgs, tgt)
        # print(scores.size())

        # Since we decoded starting with <start>, the targets are all words after <start>, up to <end>
        # targets = caps_sorted[:, 1:]
        # print(scores.topk(1, dim=-1).indices.view(len(scores), -1))

        logger(scores, chidx, caps, 'train: ')
        # Remove timesteps that we didn't decode at, or are pads
        # pack_padded_sequence is an easy trick to do this

        # Calculate loss
        # loss = criterion(scores.permute(1, 0, 2).view(-1, scores.size(-1)), tgt_y.view(-1, tgt_y.size(-1)))
        scores = scores.permute(1, 0, 2).reshape(-1, scores.size(-1))
        tgt_y = tgt_y.reshape(-1)
        # print(scores.size(), tgt_y.size())
        loss = criterion(scores, tgt_y)
        # loss_aux = criterion(aux_out, chidx)
        loss += args.alpha_codelen * F.cross_entropy(aux_codelen, caplens - 3)

        # Add doubly stochastic attention regularization
        # loss += args.alpha_c * ((1. - alphas.sum(dim=1)) ** 2).mean()

        # Back prop.
        optimizer.zero_grad()
        # (loss + 10 * loss_aux).backward()
        loss.backward()

        # Clip gradients
        if args.grad_clip is not None:
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)


        # Update weights
        optimizer.step()

        # Keep track of metrics
        losses.update(loss.item(), sum(caplens - 1))
        top5.update(accuracy(scores, tgt_y, 5), sum(caplens - 1))
        top1.update(accuracy(scores, tgt_y, 1), sum(caplens - 1))
        progress.set_description("train loss: %.4f, top1: %2.2f%%, top5: %2.2f%%"%(losses.avg, top1.avg, top5.avg))

    writer.add_scalar('Loss/train', losses.avg, epoch)
    writer.add_scalar('Accuracy/train', top1.avg, epoch)
    def train(self, current_state, action, next_state, optimizer, eta = 1e-3):

        current_state = self.oneHotEncoding(np.append(current_state,action))
        current_state = torch.from_numpy(current_state).float()

        #compute curiosity reward
        self.curiosity_net.eval()
        current_state = current_state.unsqueeze(0).to(self.device)
        pred_state=self.curiosity_net.forward(current_state)

        #next_state = torch.from_numpy(next_state).float().to(self.device)

        loss = 0
        for i in range(len(pred_state)):
            target = torch.from_numpy(np.array([next_state[i]])).to(self.device)
            #loss += F.cross_entropy(input=pred_state[i],target=target)
            #loss += self.marginal_loss(pred_state[i], target)

            #Hand crafted loss. +1 for every missclasification in the one-hot encoding
            if np.argmax(pred_state[i].cpu().detach().numpy()) != target.cpu().detach().numpy()[0]:
                loss += 1
        loss = eta * loss

        #save states into memory buffer
        self.push(next_state, current_state)

        if (self.train_counter % self.train_every) == 0 and (len(self.memory_target) >= self.batch_size):
            batch_mask = self.sample_index(self.batch_size)
            loss_batch = 0
            self.curiosity_net.train()
            optimizer.zero_grad()
            current_batch = [self.memory_prediction[i] for i in batch_mask]
            current_batch = torch.cat(current_batch)
            current_batch = torch.reshape(current_batch, (self.batch_size,-1))

            prediction_batch = self.curiosity_net.forward(current_batch)

            for j in range(len(next_state)):

                prediction_batch_sub = prediction_batch[j]

                prediction_batch_sub = torch.reshape(prediction_batch_sub, (self.batch_size,-1))

                target_batch = [torch.from_numpy(np.array([self.memory_target[i][j]])).to(self.device) for i in batch_mask]
                target_batch = torch.cat(target_batch)
                #target_batch = torch.reshape(target_batch, (self.batch_size))
                #print(target_batch.size())
                #print(target_batch)
                #print(prediction_batch.size())
                #print(prediction_batch)

                loss_batch += F.cross_entropy(input=prediction_batch_sub,target=target_batch)
                #loss_batch += self.marginal_loss(prediction_batch_sub, target_batch)
            loss_batch = (1/self.batch_size) * loss_batch

            #print('TRAINED')
            #print('LOSS: ', loss_batch)
            loss_batch.backward(retain_graph=True)
            optimizer.step()

        self.train_counter += 1

        #return loss.cpu().detach().numpy()
        #return loss.detach().numpy()
        return loss
def main(args):
    print(args)
    check_args(args)
    if not exists(args.output_dir):
        os.makedirs(args.output_dir)
    summary_writer = SummaryWriter(args.output_dir)

    # if args.coco:
    #     train, val = CocoDetection.splits()
    #     val.ids = val.ids[:args.val_size]
    #     train.ids = train.ids
    #     train_loader, val_loader = CocoDataLoader.splits(train, val, batch_size=args.batch_size,
    #                                                      num_workers=args.num_workers,
    #                                                      num_gpus=args.num_gpus)
    # else:
    train, val, _ = VG.splits(transform=transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ]))
    train_loader, val_loader = VGDataLoader.splits(
        train,
        val,
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        num_gpus=args.num_gpus)
    print(train.ind_to_classes)

    all_in_one_model = neural_motifs_sg2im_model(args, train.ind_to_classes)
    # Freeze the detector
    # for n, param in all_in_one_model.detector.named_parameters():
    #     param.requires_grad = False
    all_in_one_model.cuda()
    gan_g_loss, gan_d_loss = get_gan_losses(args.gan_loss_type)

    t, epoch, checkpoint = all_in_one_model.t, all_in_one_model.epoch, all_in_one_model.checkpoint
    while True:
        if t >= args.num_iterations:
            break
        epoch += 1
        print('Starting epoch %d' % epoch)

        for step, batch in enumerate(
                tqdm(train_loader,
                     desc='Training Epoch %d' % epoch,
                     total=len(train_loader))):
            if t == args.eval_mode_after:
                print('switching to eval mode')
                all_in_one_model.model.eval()
                all_in_one_model.optimizer = optim.Adam(
                    all_in_one_model.parameters(), lr=args.learning_rate)
            if args.l1_mode == "change" and t in args.l1_change_iters:
                old_l1_weight = args.l1_pixel_loss_weight
                args.l1_pixel_loss_weight = args.l1_change_vals[
                    args.l1_change_iters.index(t)]
                print(
                    "Change l1_pixel_loss_weight from %10.f to %.10f at iteration %d"
                    % (old_l1_weight, args.l1_pixel_loss_weight, t))
            elif args.l1_mode == "change_linear":
                old_l1_weight = args.l1_pixel_loss_weight
                args.l1_pixel_loss_weight = args.l1_change_vals[0] + (
                    args.l1_change_vals[1] -
                    args.l1_change_vals[0]) * t / args.num_iterations
                print(
                    "Change l1_pixel_loss_weight from %10.f to %.10f at iteration %d"
                    % (old_l1_weight, args.l1_pixel_loss_weight, t))

            if args.noise_std_mode == "change" and t in args.noise_std_change_iters:
                old_noise_std = args.noise_std
                args.noise_std = args.noise_std_change_vals[
                    args.noise_std_change_iters.index(t)]
                print("Change noise_std from %.10f to %.10f at iteration %d" %
                      (old_noise_std, args.noise_std, t))
            elif args.noise_std_mode == "change_linear":
                old_noise_std = args.noise_std
                args.noise_std = args.noise_std_change_vals[0] + (
                    args.noise_std_change_vals[1] -
                    args.noise_std_change_vals[0]) * t / args.num_iterations
                print("Change noise_std from %.10f to %.10f at iteration %d" %
                      (old_noise_std, args.noise_std, t))

            t += 1

            with timeit('forward', args.timing):
                result = all_in_one_model[batch]
                imgs, imgs_pred, objs, g_scores_fake_crop, g_obj_scores_fake_crop, g_scores_fake_img, \
                d_scores_fake_crop, d_obj_scores_fake_crop, d_scores_real_crop, d_obj_scores_real_crop, \
                d_scores_fake_img, d_scores_real_img = result.imgs, result.imgs_pred, result.objs, \
                result.g_scores_fake_crop, result.g_obj_scores_fake_crop, result.g_scores_fake_img, \
                result.d_scores_fake_crop, result.d_obj_scores_fake_crop, result.d_scores_real_crop, \
                result.d_obj_scores_real_crop, result.d_scores_fake_img, result.d_scores_real_img

            with timeit('loss', args.timing):
                total_loss, losses = calculate_model_losses(
                    args, imgs, imgs_pred)

                if all_in_one_model.obj_discriminator is not None:
                    total_loss = add_loss(
                        total_loss,
                        F.cross_entropy(g_obj_scores_fake_crop, objs), losses,
                        'ac_loss', args.ac_loss_weight)
                    weight = args.discriminator_loss_weight * args.d_obj_weight
                    total_loss = add_loss(total_loss,
                                          gan_g_loss(g_scores_fake_crop),
                                          losses, 'g_gan_obj_loss', weight)

                if all_in_one_model.img_discriminator is not None:
                    weight = args.discriminator_loss_weight * args.d_img_weight
                    total_loss = add_loss(total_loss,
                                          gan_g_loss(g_scores_fake_img),
                                          losses, 'g_gan_img_loss', weight)

            losses['total_loss'] = total_loss.item()
            if not math.isfinite(losses['total_loss']):
                print('WARNING: Got loss = NaN, not backpropping')
                continue

            with timeit('backward', args.timing):
                all_in_one_model.optimizer.zero_grad()
                total_loss.backward()
                all_in_one_model.optimizer.step()

            if all_in_one_model.obj_discriminator is not None:
                with timeit('d_obj loss', args.timing):
                    d_obj_losses = LossManager()
                    d_obj_gan_loss = gan_d_loss(d_scores_real_crop,
                                                d_scores_fake_crop)
                    d_obj_losses.add_loss(d_obj_gan_loss, 'd_obj_gan_loss')
                    d_obj_losses.add_loss(
                        F.cross_entropy(d_obj_scores_real_crop, objs),
                        'd_ac_loss_real')
                    d_obj_losses.add_loss(
                        F.cross_entropy(d_obj_scores_fake_crop, objs),
                        'd_ac_loss_fake')

                with timeit('d_obj backward', args.timing):
                    all_in_one_model.optimizer_d_obj.zero_grad()
                    d_obj_losses.total_loss.backward()
                    all_in_one_model.optimizer_d_obj.step()

            if all_in_one_model.img_discriminator is not None:
                with timeit('d_img loss', args.timing):
                    d_img_losses = LossManager()
                    d_img_gan_loss = gan_d_loss(d_scores_real_img,
                                                d_scores_fake_img)
                    d_img_losses.add_loss(d_img_gan_loss, 'd_img_gan_loss')

                with timeit('d_img backward', args.timing):
                    all_in_one_model.optimizer_d_img.zero_grad()
                    d_img_losses.total_loss.backward()
                    all_in_one_model.optimizer_d_img.step()

            if t % args.print_every == 0:
                print('t = %d / %d' % (t, args.num_iterations))
                G_loss_list = []
                for name, val in losses.items():
                    G_loss_list.append('[%s]: %.4f' % (name, val))
                    checkpoint['losses'][name].append(val)
                    summary_writer.add_scalar("G_%s" % name, val, t)
                print("G: %s" % ", ".join(G_loss_list))
                checkpoint['losses_ts'].append(t)

                if all_in_one_model.obj_discriminator is not None:
                    D_obj_loss_list = []
                    for name, val in d_obj_losses.items():
                        D_obj_loss_list.append('[%s]: %.4f' % (name, val))
                        checkpoint['d_losses'][name].append(val)
                        summary_writer.add_scalar("D_obj_%s" % name, val, t)
                    print("D_obj: %s" % ", ".join(D_obj_loss_list))

                if all_in_one_model.img_discriminator is not None:
                    D_img_loss_list = []
                    for name, val in d_img_losses.items():
                        D_img_loss_list.append('[%s]: %.4f' % (name, val))
                        checkpoint['d_losses'][name].append(val)
                        summary_writer.add_scalar("D_img_%s" % name, val, t)
                    print("D_img: %s" % ", ".join(D_img_loss_list))

            if t % args.checkpoint_every == 0:
                print('checking on train')
                train_results = check_model(args, train_loader,
                                            all_in_one_model)
                t_losses, t_samples = train_results

                checkpoint['train_samples'].append(t_samples)
                checkpoint['checkpoint_ts'].append(t)
                for name, images in t_samples.items():
                    summary_writer.add_image("train_%s" % name, images, t)

                print('checking on val')
                val_results = check_model(args, val_loader, all_in_one_model)
                val_losses, val_samples = val_results
                checkpoint['val_samples'].append(val_samples)
                for name, images in val_samples.items():
                    summary_writer.add_image("val_%s" % name, images, t)

                for k, v in val_losses.items():
                    checkpoint['val_losses'][k].append(v)
                    summary_writer.add_scalar("val_%s" % k, v, t)
                checkpoint['model_state'] = all_in_one_model.model.state_dict()

                if all_in_one_model.obj_discriminator is not None:
                    checkpoint[
                        'd_obj_state'] = all_in_one_model.obj_discriminator.state_dict(
                        )
                    checkpoint[
                        'd_obj_optim_state'] = all_in_one_model.optimizer_d_obj.state_dict(
                        )

                if all_in_one_model.img_discriminator is not None:
                    checkpoint[
                        'd_img_state'] = all_in_one_model.img_discriminator.state_dict(
                        )
                    checkpoint[
                        'd_img_optim_state'] = all_in_one_model.optimizer_d_img.state_dict(
                        )

                checkpoint[
                    'optim_state'] = all_in_one_model.optimizer.state_dict()
                checkpoint['counters']['t'] = t
                checkpoint['counters']['epoch'] = epoch
                checkpoint_path = os.path.join(
                    args.output_dir, '%s_with_model.pt' % args.checkpoint_name)
                print('Saving checkpoint to ', checkpoint_path)
                torch.save(checkpoint, checkpoint_path)

                # Save another checkpoint without any model or optim state
                checkpoint_path = os.path.join(
                    args.output_dir, '%s_no_model.pt' % args.checkpoint_name)
                key_blacklist = [
                    'model_state', 'optim_state', 'model_best_state',
                    'd_obj_state', 'd_obj_optim_state', 'd_obj_best_state',
                    'd_img_state', 'd_img_optim_state', 'd_img_best_state'
                ]
                small_checkpoint = {}
                for k, v in checkpoint.items():
                    if k not in key_blacklist:
                        small_checkpoint[k] = v
                torch.save(small_checkpoint, checkpoint_path)
Example #40
0
    model.train()
    for epoch in range(num_epochs):

        for i, batch in enumerate(train_loader, 1):
            data, _ = batch
            data = data.to(device)
            p = N_shot * train_way
            data_support, data_query = data[:p], data[p:]
            proto = model(data_support)
            proto = proto.reshape(N_shot, train_way, -1).mean(dim=0)

            label = torch.arange(train_way).repeat(N_query)
            label = label.type(torch.cuda.LongTensor)

            logits = euclidean_metric(model(data_query), proto)
            loss = F.cross_entropy(logits, label)
            acc = count_acc(logits, label)
            print('epoch {}, train {}/{}, loss={:.4f} acc={:.4f}'.format(
                epoch + 1, i, len(train_loader), loss.item(), acc))

            torch.save(model.state_dict(),
                       os.path.join(model_dir, 'p1_model.pth'))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            proto = None
            logits = None
            loss = None
        lr_scheduler.step()
Example #41
0
def train(args):
    text = data_processor.load_data(args)
    train_iter = data_processor.gen_batch(args, text)
    dev_iter = data_processor.gen_test(text, args)
    print('加载数据完成')
    word_attn_model = AttentionWordGRU(args)
    sent_attn_model = AttentionSentGRU(args)
    model = None
    if args.cuda: model.cuda()
    word_optimizer = torch.optim.Adam(word_attn_model.parameters(), lr=args.lr)
    sent_optimizer = torch.optim.Adam(sent_attn_model.parameters(), lr=args.lr)
    steps = 0
    best_acc = 0
    last_step = 0
    for epoch in range(1, args.epoch + 1):
        for i in range(args.iterations):
            word_optimizer.zero_grad()
            sent_optimizer.zero_grad()
            doc_texts, targets = next(train_iter)

            # doc_texts的维度为(batch_size, sents_num, words_num)
            word_attn_vectors = None
            for doc_text in doc_texts:
                # word_attn_vector的维度为(sent_num, hidden_size)
                word_attn_vector = word_attn_model(doc_text)
                # 将word_attn_vector的维度变为(1, sent_num, hidden_size)
                word_attn_vector = word_attn_vector.unsqueeze(0)
                if word_attn_vectors is None:
                    word_attn_vectors = word_attn_vector
                else:
                    # word_attn_vectors的维度为(batch_size, sent_num, hidden_size)
                    word_attn_vectors = torch.cat(
                        (word_attn_vectors, word_attn_vector), 0)
            logits = sent_attn_model(word_attn_vectors)
            loss = F.cross_entropy(logits, targets)
            loss.backward()
            word_optimizer.step()
            sent_optimizer.step()
            steps += 1
            if steps % args.log_interval == 0:
                # torch.max(logits, 1)函数:返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引)
                corrects = (torch.max(logits, 1)[1] == targets).sum()
                train_acc = 100.0 * corrects / args.batch_size
                sys.stdout.write(
                    '\rBatch[{}] - loss: {:.6f}  acc: {:.4f}%({}/{})'.format(
                        steps, loss.item(), train_acc, corrects,
                        args.batch_size))
            if steps % args.test_interval == 0:
                dev_acc = eval(dev_iter, word_attn_model, sent_attn_model)
                if dev_acc > best_acc:
                    best_acc = dev_acc
                    last_step = steps
                    if args.save_best:
                        print('Saving best model, acc: {:.4f}%\n'.format(
                            best_acc))
                        save(word_attn_model, args.save_dir, 'best', steps)
                        save(sent_attn_model, args.save_dir, 'best', steps)
                else:
                    if steps - last_step >= args.early_stopping:
                        print('\nearly stop by {} steps, acc: {:.4f}%'.format(
                            args.early_stopping, best_acc))
                        raise KeyboardInterrupt
Example #42
0
    def compute_loss(
        self,
        targets: List[Dict[str, Tensor]],
        head_outputs: Dict[str, Tensor],
        anchors: List[Tensor],
        matched_idxs: List[Tensor],
    ) -> Dict[str, Tensor]:
        bbox_regression = head_outputs["bbox_regression"]
        cls_logits = head_outputs["cls_logits"]

        # Match original targets with default boxes
        num_foreground = 0
        bbox_loss = []
        cls_targets = []
        for (
            targets_per_image,
            bbox_regression_per_image,
            cls_logits_per_image,
            anchors_per_image,
            matched_idxs_per_image,
        ) in zip(targets, bbox_regression, cls_logits, anchors, matched_idxs):
            # produce the matching between boxes and targets
            foreground_idxs_per_image = torch.where(matched_idxs_per_image >= 0)[0]
            foreground_matched_idxs_per_image = matched_idxs_per_image[foreground_idxs_per_image]
            num_foreground += foreground_matched_idxs_per_image.numel()

            # Calculate regression loss
            matched_gt_boxes_per_image = targets_per_image["boxes"][foreground_matched_idxs_per_image]
            bbox_regression_per_image = bbox_regression_per_image[foreground_idxs_per_image, :]
            anchors_per_image = anchors_per_image[foreground_idxs_per_image, :]
            target_regression = self.box_coder.encode_single(matched_gt_boxes_per_image, anchors_per_image)
            bbox_loss.append(
                torch.nn.functional.smooth_l1_loss(bbox_regression_per_image, target_regression, reduction="sum")
            )

            # Estimate ground truth for class targets
            gt_classes_target = torch.zeros(
                (cls_logits_per_image.size(0),),
                dtype=targets_per_image["labels"].dtype,
                device=targets_per_image["labels"].device,
            )
            gt_classes_target[foreground_idxs_per_image] = targets_per_image["labels"][
                foreground_matched_idxs_per_image
            ]
            cls_targets.append(gt_classes_target)

        bbox_loss = torch.stack(bbox_loss)
        cls_targets = torch.stack(cls_targets)

        # Calculate classification loss
        num_classes = cls_logits.size(-1)
        cls_loss = F.cross_entropy(cls_logits.view(-1, num_classes), cls_targets.view(-1), reduction="none").view(
            cls_targets.size()
        )

        # Hard Negative Sampling
        foreground_idxs = cls_targets > 0
        num_negative = self.neg_to_pos_ratio * foreground_idxs.sum(1, keepdim=True)
        # num_negative[num_negative < self.neg_to_pos_ratio] = self.neg_to_pos_ratio
        negative_loss = cls_loss.clone()
        negative_loss[foreground_idxs] = -float("inf")  # use -inf to detect positive values that creeped in the sample
        values, idx = negative_loss.sort(1, descending=True)
        # background_idxs = torch.logical_and(idx.sort(1)[1] < num_negative, torch.isfinite(values))
        background_idxs = idx.sort(1)[1] < num_negative

        N = max(1, num_foreground)
        return {
            "bbox_regression": bbox_loss.sum() / N,
            "classification": (cls_loss[foreground_idxs].sum() + cls_loss[background_idxs].sum()) / N,
        }
Example #43
0
 def validation_step(self, batch, batch_idx):
     # OPTIONAL
     x, y = batch
     y_hat = self.forward(x)
     return {'val_loss': F.cross_entropy(y_hat, y)}
Example #44
0
 def training_step(self, batch, batch_idx):
     # REQUIRED
     x, y = batch
     y_hat = self.forward(x)
     loss = F.cross_entropy(y_hat, y)
     return self.log('loss', loss)
Example #45
0
    def otf_bt(self, batch, lambda_xe, use_pointer=False, gamma=0):
        """
        On the fly back-translation.
        """
        params = self.params
        src_type, tgt_type, data = batch['src_type'], batch['tgt_type'], batch[
            'data']
        src_seq, tgt_seq, src_pos, tgt_pos = map(
            lambda x: x.to(Constants.device), data)
        batch_size = src_seq.size(0)
        src_id, tgt_id = self.type_dict[src_type], self.type_dict[tgt_type]

        self.model.train()

        pred = self.model(src_seq, src_pos, tgt_seq, tgt_pos, src_id, tgt_id)
        gold = tgt_seq[:, 1:].contiguous().view(-1)
        if use_pointer:
            loss = F.nll_loss(pred,
                              gold,
                              ignore_index=Constants.PAD,
                              reduction='sum')
        else:
            loss = F.cross_entropy(pred,
                                   gold,
                                   ignore_index=Constants.PAD,
                                   reduction='sum')

        if self.params.rl_finetune:
            prob = F.softmax(pred.view(batch_size, -1, pred.size(-1)), dim=-1)
            mask = (tgt_seq[:, 1:] != Constants.PAD).long()

            _, gred_sent = prob.max(dim=-1)
            gred_sent = gred_sent * mask
            distribution = Categorical(prob)
            samp_sent = distribution.sample()
            samp_sent = samp_sent * mask
            log_probs = distribution.log_prob(samp_sent)

            baseline = self.get_reward(input_seq=src_seq,
                                       policy_gen=gred_sent,
                                       tgt_seq=tgt_seq,
                                       type=tgt_type)
            rewards = self.get_reward(input_seq=src_seq,
                                      policy_gen=samp_sent,
                                      tgt_seq=tgt_seq,
                                      type=tgt_type)

            if tgt_type == 'simp':
                avg_reward = rewards.sum() / rewards.shape[0]
                self.rewards_simp.append(float(avg_reward))

            baseline = torch.Tensor(baseline).float().unsqueeze(-1).to(
                Constants.device)
            rewards = torch.Tensor(rewards).float().unsqueeze(-1).to(
                Constants.device)

            policy_loss = -(log_probs *
                            (rewards - baseline)).sum() / batch_size

            loss = (1 - gamma) * loss + gamma * policy_loss

        else:
            loss = (1 - gamma) * loss

        self.stat[src_type + '_' + tgt_type + '_otf_loss'].append(loss.item())

        self.model_optimizer.zero_grad()
        loss.backward()
        self.model_optimizer.step()
Example #46
0
def one_hot_cross_entropy_loss(y_hat, y):
    return F.cross_entropy(y_hat, torch.argmax(y, dim=1))
Example #47
0
    def forward(self, imgs, bboxes, labels, scale):
        n = imgs.shape[0]
        if n != 1:  # 传入一张图片
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        # 获取真实框和标签
        bbox = bboxes[0]
        label = labels[0]

        # 获取公用特征层
        features = self.faster_rcnn.extractor(imgs)

        # 获取faster_rcnn的建议框参数
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(
            features, img_size, scale)

        # 获取建议框的置信度和回归系数
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois
        # ------------------------------------------ #
        #               建议框网络的loss
        # ------------------------------------------ #
        # 先获取建议框网络应该有的预测结果
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            bbox.cpu().numpy(), anchor, img_size)

        gt_rpn_label = torch.Tensor(gt_rpn_label).long()
        gt_rpn_loc = torch.Tensor(gt_rpn_loc)

        # 计算建议框网络的loss值#
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)

        # ------------------------------------------ #
        #            classifier网络的loss
        # ------------------------------------------ #
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi,
            bbox.cpu().numpy(),
            label.cpu().numpy(), self.loc_normalize_mean,
            self.loc_normalize_std)

        sample_roi_index = torch.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().cuda(), \
                              torch.Tensor(gt_roi_label).long()]
        gt_roi_label = torch.Tensor(gt_roi_label).long()
        gt_roi_loc = torch.Tensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)
Example #48
0
def train_one_epoch(
    epoch,
    model,
    criteria_x,
    criteria_u,
    optim,
    lr_schdlr,
    ema,
    dltrain_x,
    dltrain_u,
    lb_guessor,
    lambda_u,
    n_iters,
    logger,
):
    model.train()
    # loss_meter, loss_x_meter, loss_u_meter, loss_u_real_meter = [], [], [], []
    loss_meter = AverageMeter()
    loss_x_meter = AverageMeter()
    loss_u_meter = AverageMeter()
    loss_u_real_meter = AverageMeter()
    # the number of correctly-predicted and gradient-considered unlabeled data
    n_correct_u_lbs_meter = AverageMeter()
    # the number of gradient-considered strong augmentation (logits above threshold) of unlabeled samples
    n_strong_aug_meter = AverageMeter()
    mask_meter = AverageMeter()

    epoch_start = time.time()  # start time
    dl_x, dl_u = iter(dltrain_x), iter(dltrain_u)
    for it in range(n_iters):
        ims_x_weak, ims_x_strong, lbs_x = next(dl_x)
        ims_u_weak, ims_u_strong, lbs_u_real = next(dl_u)

        lbs_x = lbs_x.cuda()
        lbs_u_real = lbs_u_real.cuda()

        # --------------------------------------

        bt = ims_x_weak.size(0)
        mu = int(ims_u_weak.size(0) // bt)
        imgs = torch.cat([ims_x_weak, ims_u_weak, ims_u_strong], dim=0).cuda()
        imgs = interleave(imgs, 2 * mu + 1)
        logits = model(imgs)
        logits = de_interleave(logits, 2 * mu + 1)

        logits_x = logits[:bt]
        logits_u_w, logits_u_s = torch.split(logits[bt:], bt * mu)

        loss_x = criteria_x(logits_x, lbs_x)

        with torch.no_grad():
            probs = torch.softmax(logits_u_w, dim=1)
            scores, lbs_u_guess = torch.max(probs, dim=1)
            mask = scores.ge(0.95).float()

        loss_u = (criteria_u(logits_u_s, lbs_u_guess) * mask).mean()
        loss = loss_x + lambda_u * loss_u
        loss_u_real = (F.cross_entropy(logits_u_s, lbs_u_real) * mask).mean()

        # --------------------------------------

        # mask, lbs_u_guess = lb_guessor(model, ims_u_weak.cuda())
        # n_x = ims_x_weak.size(0)
        # ims_x_u = torch.cat([ims_x_weak, ims_u_strong]).cuda()
        # logits_x_u = model(ims_x_u)
        # logits_x, logits_u = logits_x_u[:n_x], logits_x_u[n_x:]
        # loss_x = criteria_x(logits_x, lbs_x)
        # loss_u = (criteria_u(logits_u, lbs_u_guess) * mask).mean()
        # loss = loss_x + lambda_u * loss_u
        # loss_u_real = (F.cross_entropy(logits_u, lbs_u_real) * mask).mean()

        optim.zero_grad()
        loss.backward()
        optim.step()
        ema.update_params()
        lr_schdlr.step()

        loss_meter.update(loss.item())
        loss_x_meter.update(loss_x.item())
        loss_u_meter.update(loss_u.item())
        loss_u_real_meter.update(loss_u_real.item())
        mask_meter.update(mask.mean().item())

        corr_u_lb = (lbs_u_guess == lbs_u_real).float() * mask
        n_correct_u_lbs_meter.update(corr_u_lb.sum().item())
        n_strong_aug_meter.update(mask.sum().item())

        if (it + 1) % 512 == 0:
            t = time.time() - epoch_start

            lr_log = [pg['lr'] for pg in optim.param_groups]
            lr_log = sum(lr_log) / len(lr_log)

            logger.info(
                "epoch:{}, iter: {}. loss: {:.4f}. loss_u: {:.4f}. loss_x: {:.4f}. loss_u_real: {:.4f}. "
                "n_correct_u: {:.2f}/{:.2f}. Mask:{:.4f} . LR: {:.4f}. Time: {:.2f}"
                .format(epoch, it + 1, loss_meter.avg, loss_u_meter.avg,
                        loss_x_meter.avg, loss_u_real_meter.avg,
                        n_correct_u_lbs_meter.avg, n_strong_aug_meter.avg,
                        mask_meter.avg, lr_log, t))

            epoch_start = time.time()

    ema.update_buffer()
    return loss_meter.avg, loss_x_meter.avg, loss_u_meter.avg, loss_u_real_meter.avg, mask_meter.avg
Example #49
0
    def forward(self, rpn_feature_maps, im_info, gt_boxes, num_boxes):

        n_feat_maps = len(rpn_feature_maps)
        rpn_cls_scores = []
        rpn_cls_probs = []
        rpn_bbox_preds = []
        rpn_shapes = []

        for i in range(n_feat_maps):
            feat_map = rpn_feature_maps[i]
            batch_size = feat_map.size(0)
            
            # return feature map after convrelu layer
            rpn_conv1 = F.relu(self.rpn_Conv(feat_map), inplace=True)
            # get rpn classification score
            rpn_cls_score = self.rpn_cls_score(rpn_conv1)

            rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
            rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape)
            rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

            # get rpn offsets to the anchor boxes
            rpn_bbox_pred = self.rpn_bbox_pred(rpn_conv1)

            rpn_shapes.append([rpn_cls_score.size()[2], rpn_cls_score.size()[3]])
            rpn_cls_scores.append(rpn_cls_score.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2))
            rpn_cls_probs.append(rpn_cls_prob.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2))
            rpn_bbox_preds.append(rpn_bbox_pred.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 4))

        rpn_cls_score_alls = torch.cat(rpn_cls_scores, 1)
        rpn_cls_prob_alls = torch.cat(rpn_cls_probs, 1)
        rpn_bbox_pred_alls = torch.cat(rpn_bbox_preds, 1)

        n_rpn_pred = rpn_cls_score_alls.size(1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois = self.rpn_proposal((rpn_cls_prob_alls.data, rpn_bbox_pred_alls.data, im_info, cfg_key, rpn_shapes))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        if self.training:
            assert gt_boxes is not None

            rpn_data = self.rpn_anchor_target((rpn_cls_score_alls.data, gt_boxes, im_info, num_boxes, rpn_shapes))

            rpn_label = rpn_data[0].view(batch_size, -1)
            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(rpn_cls_score_alls.view(-1,2), 0, rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights.unsqueeze(2).expand(batch_size, rpn_bbox_inside_weights.size(1), 4))
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights.unsqueeze(2).expand(batch_size, rpn_bbox_outside_weights.size(1), 4))
            rpn_bbox_targets = Variable(rpn_bbox_targets)
            
            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred_alls, rpn_bbox_targets, rpn_bbox_inside_weights, 
                            rpn_bbox_outside_weights, sigma=3)

        return rois, self.rpn_loss_cls, self.rpn_loss_box
Example #50
0
def main():
    global args
    args = (parser.parse_args())
    use_cuda = cuda_model.ifUseCuda(args.gpu_id, args.multiGpu)
    script_name_stem = dir_utils.get_stem(__file__)
    save_directory = dir_utils.get_dir(
        os.path.join(
            project_root, 'ckpts',
            '{:s}-{:s}-{:s}-split-{:d}-claweight-{:s}-{:.1f}-assgin{:.2f}-alpha{:.4f}-dim{:d}-dropout{:.4f}-seqlen{:d}-samplerate-{:d}-{:s}-{:s}'
            .format(script_name_stem, args.dataset, args.eval_metrics,
                    args.split, str(args.set_cls_weight), args.cls_pos_weight,
                    args.hassign_thres, args.alpha, args.hidden_dim,
                    args.dropout, args.seq_len, args.sample_rate,
                    loss_type[args.EMD], match_type[args.hmatch])))
    log_file = os.path.join(save_directory,
                            'log-{:s}.txt'.format(dir_utils.get_date_str()))
    logger = log_utils.get_logger(log_file)
    log_utils.print_config(vars(args), logger)

    model = PointerNetwork(input_dim=args.input_dim,
                           embedding_dim=args.embedding_dim,
                           hidden_dim=args.hidden_dim,
                           max_decoding_len=args.net_outputs,
                           dropout=args.dropout,
                           n_enc_layers=2)
    hassign_thres = args.hassign_thres
    logger.info("Number of Params\t{:d}".format(
        sum([p.data.nelement() for p in model.parameters()])))
    logger.info('Saving logs to {:s}'.format(log_file))

    if args.resume is not None:

        ckpt_idx = 48

        ckpt_filename = args.resume.format(ckpt_idx)
        assert os.path.isfile(
            ckpt_filename), 'Error: no checkpoint directory found!'

        checkpoint = torch.load(ckpt_filename,
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(checkpoint['state_dict'], strict=False)
        train_iou = checkpoint['IoU']
        args.start_epoch = checkpoint['epoch']

        logger.info("=> loading checkpoint '{}', current iou: {:.04f}".format(
            ckpt_filename, train_iou))

    model = cuda_model.convertModel2Cuda(model,
                                         gpu_id=args.gpu_id,
                                         multiGpu=args.multiGpu)
    # get train/val split
    if args.dataset == 'SumMe':
        train_val_perms = np.arange(25)
    elif args.dataset == 'TVSum':
        train_val_perms = np.arange(50)
    # fixed permutation
    random.Random(0).shuffle(train_val_perms)
    train_val_perms = train_val_perms.reshape([5, -1])
    train_perms = np.delete(train_val_perms, args.split, 0).reshape([-1])
    val_perms = train_val_perms[args.split]
    logger.info(" training split: " + str(train_perms))
    logger.info(" val split: " + str(val_perms))

    if args.location == 'home':
        data_path = os.path.join(os.path.expanduser('~'), 'datasets')
    else:
        data_path = os.path.join('/nfs/%s/boyu/SDN' % (args.location),
                                 'datasets')
    train_dataset = vsSumLoader3_c3dd.cDataset(dataset_name=args.dataset,
                                               split='train',
                                               seq_length=args.seq_len,
                                               overlap=0.9,
                                               sample_rate=[args.sample_rate],
                                               train_val_perms=train_perms,
                                               data_path=data_path)
    # val_dataset = vsSumLoader3_c3dd.cDataset(dataset_name=args.dataset, split='val', seq_length=args.seq_len, overlap=0.9, sample_rate=[8])
    val_evaluator = Evaluator.Evaluator(dataset_name=args.dataset,
                                        split='val',
                                        seq_length=args.seq_len,
                                        overlap=0.9,
                                        sample_rate=[args.sample_rate],
                                        sum_budget=0.15,
                                        train_val_perms=val_perms,
                                        eval_metrics=args.eval_metrics,
                                        data_path=data_path)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=4)
    # val_dataloader = DataLoader(val_dataset,
    #                               batch_size=args.batch_size,
    #                               shuffle=False,
    #                               num_workers=4)

    model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                    model.parameters()),
                             lr=float(args.lr))
    optim_scheduler = optim.lr_scheduler.ReduceLROnPlateau(model_optim,
                                                           'min',
                                                           patience=10)

    alpha = args.alpha
    # cls_weights = torch.FloatTensor([0.2, 1.0]).cuda()
    if args.set_cls_weight:
        cls_weights = torch.FloatTensor([
            1. * train_dataset.n_positive_train_samples /
            train_dataset.n_total_train_samples, args.cls_pos_weight
        ]).cuda()
    else:
        cls_weights = torch.FloatTensor([0.5, 0.5]).cuda()
    logger.info(" total: {:d}, total pos: {:d}".format(
        train_dataset.n_total_train_samples,
        train_dataset.n_positive_train_samples))
    logger.info(" classify weight: " + str(cls_weights[0]) +
                str(cls_weights[1]))
    for epoch in range(args.start_epoch, args.nof_epoch + args.start_epoch):
        total_losses = AverageMeter()
        loc_losses = AverageMeter()
        cls_losses = AverageMeter()
        Accuracy = AverageMeter()
        IOU = AverageMeter()
        ordered_IOU = AverageMeter()
        model.train()
        pbar = progressbar.ProgressBar(max_value=len(train_dataloader))
        for i_batch, sample_batch in enumerate(train_dataloader):
            pbar.update(i_batch)

            feature_batch = Variable(sample_batch[0])
            start_indices = Variable(sample_batch[1])
            end_indices = Variable(sample_batch[2])
            gt_valids = Variable(sample_batch[3])
            # seq_labels = Variable(sample_batch[3])

            if use_cuda:
                feature_batch = feature_batch.cuda()
                start_indices = start_indices.cuda()
                end_indices = end_indices.cuda()

            gt_positions = torch.stack([start_indices, end_indices], dim=-1)

            head_pointer_probs, head_positions, tail_pointer_probs, tail_positions, cls_scores, _ = model(
                feature_batch)

            pred_positions = torch.stack([head_positions, tail_positions],
                                         dim=-1)
            if args.hmatch:
                assigned_scores, assigned_locations, total_valid, total_iou = h_match.Assign_Batch_v2(
                    gt_positions,
                    pred_positions,
                    gt_valids,
                    thres=hassign_thres)

            else:
                assigned_scores, assigned_locations = f_match.Assign_Batch(
                    gt_positions,
                    pred_positions,
                    gt_valids,
                    thres=hassign_thres)
                _, _, total_valid, total_iou = h_match.Assign_Batch_v2(
                    gt_positions,
                    pred_positions,
                    gt_valids,
                    thres=hassign_thres)

            if total_valid > 0:
                IOU.update(total_iou / total_valid, total_valid)

            assigned_scores = Variable(torch.LongTensor(assigned_scores),
                                       requires_grad=False)
            assigned_locations = Variable(torch.LongTensor(assigned_locations),
                                          requires_grad=False)
            if use_cuda:
                assigned_scores = assigned_scores.cuda()
                assigned_locations = assigned_locations.cuda()

            cls_scores = cls_scores.contiguous().view(-1,
                                                      cls_scores.size()[-1])
            assigned_scores = assigned_scores.contiguous().view(-1)

            cls_loss = F.cross_entropy(cls_scores,
                                       assigned_scores,
                                       weight=cls_weights)

            if total_valid > 0:
                assigned_head_positions = assigned_locations[:, :, 0]
                assigned_head_positions = assigned_head_positions.contiguous(
                ).view(-1)
                #
                assigned_tail_positions = assigned_locations[:, :, 1]
                assigned_tail_positions = assigned_tail_positions.contiguous(
                ).view(-1)

                head_pointer_probs = head_pointer_probs.contiguous().view(
                    -1,
                    head_pointer_probs.size()[-1])
                tail_pointer_probs = tail_pointer_probs.contiguous().view(
                    -1,
                    tail_pointer_probs.size()[-1])

                assigned_head_positions = torch.masked_select(
                    assigned_head_positions, assigned_scores.byte())
                assigned_tail_positions = torch.masked_select(
                    assigned_tail_positions, assigned_scores.byte())

                head_pointer_probs = torch.index_select(
                    head_pointer_probs,
                    dim=0,
                    index=assigned_scores.nonzero().squeeze(1))
                tail_pointer_probs = torch.index_select(
                    tail_pointer_probs,
                    dim=0,
                    index=assigned_scores.nonzero().squeeze(1))

                if args.EMD:
                    assigned_head_positions = to_one_hot(
                        assigned_head_positions, args.seq_len)
                    assigned_tail_positions = to_one_hot(
                        assigned_tail_positions, args.seq_len)

                    prediction_head_loss = EMD_L2(head_pointer_probs,
                                                  assigned_head_positions,
                                                  needSoftMax=True)
                    prediction_tail_loss = EMD_L2(tail_pointer_probs,
                                                  assigned_tail_positions,
                                                  needSoftMax=True)
                else:
                    prediction_head_loss = F.cross_entropy(
                        head_pointer_probs, assigned_head_positions)
                    prediction_tail_loss = F.cross_entropy(
                        tail_pointer_probs, assigned_tail_positions)
                loc_losses.update(
                    prediction_head_loss.data.item() +
                    prediction_tail_loss.data.item(), feature_batch.size(0))
                total_loss = alpha * (prediction_head_loss +
                                      prediction_tail_loss) + cls_loss
            else:
                total_loss = cls_loss

            model_optim.zero_grad()
            total_loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.)
            model_optim.step()
            cls_losses.update(cls_loss.data.item(), feature_batch.size(0))
            total_losses.update(total_loss.item(), feature_batch.size(0))

        logger.info(
            "Train -- Epoch :{:06d}, LR: {:.6f},\tloss={:.4f}, \t c-loss:{:.4f}, \tloc-loss:{:.4f}\tcls-Accuracy:{:.4f}\tloc-Avg-IOU:{:.4f}\t topIOU:{:.4f}"
            .format(epoch, model_optim.param_groups[0]['lr'], total_losses.avg,
                    cls_losses.avg, loc_losses.avg, Accuracy.avg, IOU.avg,
                    ordered_IOU.avg))

        optim_scheduler.step(total_losses.avg)

        model.eval()

        # IOU = AverageMeter()
        # pbar = progressbar.ProgressBar(max_value=len(val_evaluator))
        # for i_batch, sample_batch in enumerate(val_dataloader):
        #     pbar.update(i_batch)

        #     feature_batch = Variable(sample_batch[0])
        #     start_indices = Variable(sample_batch[1])
        #     end_indices = Variable(sample_batch[2])
        #     gt_valids = Variable(sample_batch[3])
        #     # valid_indices = Variable(sample_batch[3])

        #     if use_cuda:
        #         feature_batch = feature_batch.cuda()
        #         start_indices = start_indices.cuda()
        #         end_indices = end_indices.cuda()

        #     gt_positions = torch.stack([start_indices, end_indices], dim=-1)

        #     head_pointer_probs, head_positions, tail_pointer_probs, tail_positions, cls_scores, _ = model(
        #         feature_batch)#Update: compared to the previous version, we now update the matching rules

        #     pred_positions = torch.stack([head_positions, tail_positions], dim=-1)
        #     pred_scores = cls_scores[:, :, -1]
        #     #TODO: should NOT change here for evaluation!
        #     assigned_scores, assigned_locations, total_valid, total_iou = h_match.Assign_Batch_v2(gt_positions, pred_positions, gt_valids, thres=hassign_thres)
        #     if total_valid>0:
        #         IOU.update(total_iou / total_valid, total_valid)

        F1s = val_evaluator.Evaluate(model)

        logger.info("Val -- Epoch :{:06d}, LR: {:.6f},\tF1s:{:.4f}".format(
            epoch, model_optim.param_groups[0]['lr'], F1s))

        if epoch % 1 == 0:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'loss': total_losses.avg,
                    'cls_loss': cls_losses.avg,
                    'loc_loss': loc_losses.avg,
                    'IoU': IOU.avg,
                    'val_F1s': F1s
                }, (epoch + 1),
                file_direcotry=save_directory)
Example #51
0
        x_dev, y_dev, _ = get_data(dev_path)
        dataset_dev = DealDataset(x_dev, y_dev, device)
        dataloader_dev = DataLoader(dataset=dataset_dev,
                                    batch_size=batch_size,
                                    shuffle=True)

        optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
        model.train()
        best_acc = 0
        for i in range(epoch):
            index = 0
            for datas, labels in tqdm(dataloader):
                model.zero_grad()
                output = model(datas)
                loss = F.cross_entropy(output, labels)
                loss.backward()
                optimizer.step()
                index += 1
                if index % 50 == 0:
                    # 每多少轮输出在训练集和验证集上的效果
                    true = labels.data.cpu()
                    predic = torch.max(output.data, 1)[1].cpu()
                    train_acc = metrics.accuracy_score(true, predic)
                    dev_acc = evaluate(model, dataloader_dev)
                    print(
                        f'epoch:{i} batch:{index} loss:{loss} train_acc:{train_acc} dev_acc:{dev_acc}'
                    )
                    # if dev_acc > best_acc:
                    #     torch.save(model, f'{output_path}/{model_name}/model.pt')
                    model.train()
Example #52
0
    PATH = '../model/gesture_recognition_3-1.pth'
    torch.save(model.state_dict(), PATH)

    model = CNN()
    model.load_state_dict(torch.load(PATH))

    model.eval()
    test_loader = load_data('eval')
    acc_set = []
    avg_loss_set = []
    for batch_id, data in enumerate(test_loader()):
        images, labels = data
        image = torch.from_numpy(images)
        label = torch.from_numpy(labels).squeeze()
        outputs = model(image)
        loss = F.cross_entropy(outputs, label)
        _, predicted = torch.max(outputs, 1)
        acc = (predicted == label).sum().item() / 15
        acc_set.append(acc)
        avg_loss_set.append(float(loss.detach().numpy()))

    # 计算多个batch的平均损失和准确率
    acc_val_mean = np.array(acc_set).mean()
    avg_loss_val_mean = np.array(avg_loss_set).mean()

    print('loss={}, acc={}'.format(avg_loss_val_mean, acc_val_mean))

    # BatchSize=50, epoch=30, loss=0.6554504831631979, acc=0.899999996026357
    # BatchSize=50, epoch=30, loss=0.659913182258606, acc=0.8999999999999999
    # BatchSize=15, epoch=50, loss=0.6402452290058136, acc=0.9066666666666666
 def validation_step(self, batch):
     images, labels = batch
     out = self(images)  # Generate predictions
     loss = F.cross_entropy(out, labels)  # Calculate loss
     acc = accuracy(out, labels)  # Calculate accuracy
     return {'val_loss': loss.detach(), 'val_score': acc}
def train():
    print("============================= TRAIN ============================")

    voc_train_iter = iter(voc_train_loader)
    voc_it = 0
    sal_train_iter = iter(sal_train_loader)
    sal_it = 0
    log = {'best_miou': 0, 'best_it_miou': 0, 
            'best_mae': 1000, 'best_it_mae':0, 'best_fm':0, 'best_it_fm':0}
    optimizer = torch.optim.Adam([{'params': net.parameters(), 
        'lr': learn_rate, 'betas':(0.95, 0.999)}])
    if start_iter > 0:
        net.load_state_dict(torch.load(os.path.join(
            path_save_checkpoints, "{}.pth".format(start_iter))))

    for i in range(start_iter, train_iters):
        if i % 3000 == 0:
            _lr = learn_rate / float(2**(i//3000))
            optimizer = torch.optim.Adam([{'params': net.parameters(), 
                'lr': _lr, 'betas':(0.95, 0.999)}])
        """loss 1 """
        if sal_it >= len(sal_train_loader):
            sal_train_iter = iter(sal_train_loader)
            sal_it = 0
        img_sal, gt_sal = sal_train_iter.next()
        sal_it += 1
        img_sal_raw = img_sal
        gt_sal = gt_sal[:, None, ...].cuda()
        gt_sal = gt_sal.squeeze(1).long()
        img_sal_raw = img_sal
        img_sal = (img_sal.cuda()-mean)/std

        pred_seg, v_sal, _ = net(img_sal)
        pred_seg = torch.softmax(pred_seg, 1)
        bg = pred_seg[:, :1]
        fg = (pred_seg[:, 1:]*v_sal[:, 1:]).sum(1, keepdim=True)
        pred_sal = torch.cat((bg, fg), 1)
        loss_sal = F.nll_loss(pred_sal, gt_sal)

        """loss 2 """
        if voc_it >= len(voc_train_loader):
            voc_train_iter = iter(voc_train_loader)
            voc_it = 0
        img_seg, gt_seg, plbl_seg = voc_train_iter.next()
        voc_it += 1
        gt_cls = gt_seg[:, None, ...] == torch.arange(c_output)[None, ..., None, None]
        gt_cls = (gt_cls.sum(3).sum(2)>0).float().cuda()
        img_seg_raw = img_seg
        img_seg = (img_seg.cuda()-mean)/std
        pred_seg, _, cls_fc = net(img_seg)
        loss_cls = F.cross_entropy(pred_seg,plbl_seg.cuda())+\
                F.binary_cross_entropy_with_logits(cls_fc[:, 1:], gt_cls[:, 1:])
        loss = loss_cls+loss_sal

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        """output """
        if i % 50 == 0:
            writer.add_scalar("sal_loss", loss_sal.item(), i)
            writer.add_scalar("cls_loss", loss_cls.item(), i)
            num_show = _num_show if img_seg.size(0) > _num_show else img_seg.size(0)
            img = img_seg_raw[-num_show:]
            writer.add_image('image_seg', torchvision.utils.make_grid(img), i)

            pred = plbl_seg[-num_show:,None,...]
            pred = torchvision.utils.make_grid(pred.expand(-1, 3, -1,-1))
            pred = pred[0]
            writer.add_label('gt_seg', pred,i)
            _, pred_label = pred_seg.max(1)
            pred = pred_label[-num_show:,None,...]
            pred = torchvision.utils.make_grid(pred.expand(-1, 3, -1,-1))
            pred = pred[0]
            writer.add_label('pred_seg', pred,i)

            img = img_sal_raw[-num_show:]
            writer.add_image('image_sal', torchvision.utils.make_grid(img), i)

            pred = gt_sal[-num_show:,None,...]
            pred = torchvision.utils.make_grid(pred.expand(-1, 3, -1,-1))
            pred = pred[0]
            writer.add_label('gt_sal', pred,i)

            pred = fg[-num_show:,...]
            pred = torchvision.utils.make_grid(pred.expand(-1, 3, -1,-1))
            writer.add_image('pred_sal', pred,i)
            writer.write_html()
            print("iter %d loss_sal %.4f; loss_cls %.4f"%(i, loss_sal.item(), loss_cls.item()))
        """validation"""
        if i!=0 and i % 500 == 0:
            log[i] = {}
            save_dict = net.state_dict()
            torch.save(save_dict, "{}/{}.pth".format(path_save_checkpoints, i))
            miou = val_voc()
            writer.add_scalar("miou", miou, i)
            log[i]['miou'] = miou
            if miou > log['best_miou']:
                log['best_miou'] = miou
                log['best_it_miou'] = i
            print("validation: iter %d; miou %.4f; best %d:%.4f"%(i, miou, log['best_it_miou'], log['best_miou']))
            fm, mae = val_sal()
            writer.add_scalar("mae", mae, i)
            writer.add_scalar("fm", fm, i)
            log[i]['mae'] = mae
            log[i]['fm'] = fm
            if mae < log['best_mae']:
                log['best_mae'] = mae
                log['best_it_mae'] = i
            if fm > log['best_fm']:
                log['best_fm'] = fm
                log['best_it_fm'] = i
            print("mae %.4f; best %d:%.4f"%(mae, log['best_it_mae'], log['best_mae']))
            print("fm %.4f; best %d:%.4f"%(fm, log['best_it_fm'], log['best_fm']))
            with open("output/{}.json".format(experiment_name), "w") as f:
                json.dump(log, f)
Example #55
0
    def forward(self, input):
        '''
        The inputs are two tuples. One for each image.
        :param input holds data (target_feat, im_info, template_feat, gt_boxes, num_boxes)
                target_feat is of size (1, C, H, W)
                gt_boxes is a batch of gt_boxes for tracking, and is of size (N, 1, 6). 6 represents: x1,y1,x2,y2,class,trackid.
                template_feat is of size (N, C, kH, kW).
        :return:
        '''
        if self.training:
            target_feat, im_info, template_feat, gt_boxes, num_boxes = input
            gt_boxes = gt_boxes[:, :, :5]
        else:
            target_feat, im_info, template_feat = input

        n_templates = template_feat.size(0)
        nC = template_feat.size(1)
        kh = template_feat.size(2)
        kw = template_feat.size(3)
        assert self.din == nC, 'The feature dims are not compatible.{}!={}'.format(self.din, nC)
        assert nC == target_feat.size(1), 'The feature dims of template_feat and target_feat should be same.'
        assert target_feat.size(0) == 1, 'Input target_feat should have a batch size of 1.'

        # target branch.
        target_feat_cls = self.RPN_Conv_cls(target_feat)
        target_feat_bbox = self.RPN_Conv_bbox(target_feat)

        # template branch.
        template_feat_cls = self.RPN_cls_score(template_feat)
        template_feat_bbox = self.RPN_bbox_pred(template_feat)


        template_feat_cls = template_feat_cls.view(n_templates, self.nc_score_out, -1, template_feat_cls.size(2),
                                                   template_feat_cls.size(3))
        template_feat_bbox = template_feat_bbox.view(n_templates, self.nc_bbox_out, -1, template_feat_bbox.size(2),
                                                     template_feat_bbox.size(3))

        # correlation
        if self.use_separable_correlation:
            rpn_cls_score = self.depth_wise_cross_correlation_cls(target_feat_cls, template_feat_cls, self.bias_cls)
            rpn_bbox_pred = self.depth_wise_cross_correlation_box(target_feat_bbox, template_feat_bbox, self.bias_bbox)
        else:
            rpn_cls_score = self.cross_correlation(target_feat_cls, template_feat_cls, self.bias_cls)
            rpn_bbox_pred = self.cross_correlation(target_feat_bbox, template_feat_bbox, self.bias_bbox)

        rpn_cls_score = rpn_cls_score*0.1
        rpn_bbox_pred = rpn_bbox_pred*0.1
        # adjust
        rpn_bbox_pred = self.RPN_bbox_adjust(rpn_bbox_pred)

        rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        im_info = im_info.expand((rpn_cls_prob.size(0), im_info.size(1)))
        rois, scores = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
                                  im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            batch_size = n_templates
            rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            rpn_keep = rpn_label.view(-1).ne(-1).nonzero().view(-1)
            if len(rpn_keep)>0:
                rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0, rpn_keep)
                rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
                rpn_label = rpn_label.long()

                self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
                fg_cnt = torch.sum(rpn_label.data.ne(0))

                rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]

                # compute bbox regression loss
                self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
                                                    rpn_bbox_outside_weights, sigma=3, dim=[1, 2, 3])
            else:
                self.rpn_loss_cls, self.rpn_loss_box = None, None
        return rois, scores, self.rpn_loss_cls, self.rpn_loss_box
    net.train()
    for epoch_num in tqdm(range(max_epoch), ncols=70):
        time1 = time.time()
        for i_batch, sampled_batch in enumerate(trainloader):
            time2 = time.time()
            # print('fetch data cost {}'.format(time2-time1))
            volume_batch, label_batch = sampled_batch['image'], sampled_batch[
                'label']
            volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda()
            outputs, out_dis = net(volume_batch)
            from utils.losses import compute_fore_dist
            with torch.no_grad():
                gt_dis = compute_fore_dist(label_batch.cpu().numpy())
                gt_dis = torch.from_numpy(gt_dis).float().cuda()

            loss_seg = F.cross_entropy(outputs, label_batch)
            outputs_soft = F.softmax(outputs, dim=1)
            loss_seg_dice = dice_loss(outputs_soft[:, 1, :, :, :],
                                      label_batch == 1)
            # print('out_dis, label_batch shapes', out_dis.shape, label_batch.shape)
            # out_dis.shape=(b,1,x,y,z); label_batch.shape=(b,x,y,z)
            dist_mse = F.mse_loss(out_dis, gt_dis)  #  # sdf_kl_loss
            # print('sdf_kl.shape: ', sdf_kl.shape)

            loss = loss_seg + loss_seg_dice + dist_mse

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            iter_num = iter_num + 1
Example #57
0
def prog_epoch_pass(net, optimizer, loader):
    kvs = GlobalKVS()
    running_loss, pbar, n_batches, epoch, max_epoch, device = init_epoch_pass(net, optimizer, loader)

    preds_progression = []
    gt_progression = []
    ids = []
    preds_kl = []
    gt_kl = []

    with torch.set_grad_enabled(optimizer is not None):
        for i, batch in enumerate(loader):
            if optimizer is not None:
                optimizer.zero_grad()
            # forward + backward + optimize if train
            labels_prog = batch['label'].long().to(device)
            labels_kl = batch['KL'].long().to(device)

            inputs = batch['img'].to(device)

            outputs_kl, outputs_prog = net(inputs)
            loss_kl = F.cross_entropy(outputs_kl, labels_kl)
            loss_prog = F.cross_entropy(outputs_prog, labels_prog)

            loss = loss_prog.mul(kvs['args'].loss_weight) + loss_kl.mul(1 - kvs['args'].loss_weight)

            if optimizer is not None:
                loss.backward()
                if kvs['args'].clip_grad:
                    torch.nn.utils.clip_grad_norm_(net.parameters(), kvs['args'].clip_grad_norm)
                optimizer.step()
            else:
                probs_progression_batch = F.softmax(outputs_prog, 1).data.to('cpu').numpy()
                probs_kl_batch = F.softmax(outputs_kl, 1).data.to('cpu').numpy()

                preds_progression.append(probs_progression_batch)
                gt_progression.append(batch['label'].numpy())

                preds_kl.append(probs_kl_batch)
                gt_kl.append(batch['KL'])
                ids.extend(batch['ID_SIDE'])

            running_loss += loss.item()
            if optimizer is not None:
                pbar.set_description(f'Training   [{epoch} / {max_epoch}]:: {running_loss / (i + 1):.3f}')
            else:
                pbar.set_description(f'Validating [{epoch} / {max_epoch}]:')
            pbar.update()

            gc.collect()

    if optimizer is None:
        preds_progression = np.vstack(preds_progression)
        gt_progression = np.hstack(gt_progression)

        preds_kl = np.vstack(preds_kl)
        gt_kl = np.hstack(gt_kl)

    gc.collect()
    pbar.close()

    if optimizer is not None:
        return running_loss / n_batches
    else:
        return running_loss / n_batches, ids, gt_progression, preds_progression, gt_kl, preds_kl
Example #58
0
def run(train_iter, val_iter, Text_vocab, save_model_path):
    """
    创建、训练和保存深度学习模型

    """
    # 配置
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    torch.backends.cudnn.deterministic = True  # 保证每次结果一样
    # --------------------- 实现模型创建、训练和保存等部分的代码 ---------------------
    vocab_size = len(Text_vocab)
    learning_rate = 1e-3
    num_epochs = 20
    require_improvement = 1000
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # -----------------------------------------------------------------------------
    start_time = time.time()
    print("载入模型...")
    model = Net(vocab_size).to(device)
    print("模型载入完成...")
    time_diff = get_time_diff(start_time)
    print("Time usage:", time_diff)

    print("打印模型参数...")
    print(model.parameters)
    for name, parameters in model.named_parameters():
        print(name, ':', parameters.shape)

    # 模型训练
    start_time = time.time()
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # 学习率指数衰减,每次epoch:学习率 = gamma * 学习率
    # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    total_batch = 0  # 记录进行到多少batch
    val_best_loss = float('inf')
    last_improve = 0  # 记录上次验证集loss下降的batch数
    stop = False  # 记录是否很久没有效果提升
    # plot
    train_loss_list = []
    train_acc_list = []
    val_loss_list = []
    val_acc_list = []
    # 训练
    for epoch in range(num_epochs):
        print('Epoch [{}/{}]'.format(epoch + 1, num_epochs))
        # scheduler.step() # 学习率衰减
        for i, batch in enumerate(train_iter):
            texts, labels = batch.text, batch.category
            outputs = model(texts)
            # model.zero_grad()
            optimizer.zero_grad()
            loss = F.cross_entropy(outputs, labels.long())
            # loss.backward(retain_graph = True)
            loss.backward()
            optimizer.step()

            if total_batch % 100 == 0:
                # 没多少轮输出在训练集和验证集上的效果
                labels = labels.cpu()
                predict = torch.argmax(outputs, 1).cpu()
                train_acc = metrics.accuracy_score(labels, predict)
                val_acc, val_loss = evaluate(model, val_iter)

                if val_loss < val_best_loss:
                    val_best_loss = val_loss
                    torch.save(model.state_dict(), save_model_path)
                    improve = '*'
                    last_improve = total_batch
                else:
                    improve = ''

                time_diff = get_time_diff(start_time)
                msg = 'Iter: {0:>6},  Train Loss: {1:>5.2},  Train Acc: {2:>6.2%},  Val Loss: {3:>5.2},  Val Acc: {4:>6.2%},  Time: {5} {6}'
                print(
                    msg.format(total_batch, loss.item(), train_acc, val_loss,
                               val_acc, time_diff, improve))
                # plot mo platform
                # print('{{"metric": "Train Loss", "value": {}}}'.format(loss.item()))
                # print('{{"metric": "Train Acc", "value": {}}}'.format(train_acc))
                # print('{{"metric": "Val Loss", "value": {}}}'.format(val_loss))
                # print('{{"metric": "Val Acc", "value": {}}}'.format(val_acc))

                # plot
                train_loss_list.append(loss.item())
                train_acc_list.append(train_acc)
                val_loss_list.append(val_loss)
                val_acc_list.append(val_acc)
            total_batch = total_batch + 1
        #     if total_batch - last_improve > require_improvement:
        #         # 验证集loss超过1000batch没下降,结束训练
        #         print("No optimization for a long time, auto-stopping...")
        #         stop = True
        #         break
        # if stop:
        #     break
    # 保存模型(请写好保存模型的路径及名称)
    # torch.save(model.state_dict(), save_model_path)
    # 绘制曲线
    plt.figure(figsize=(15, 5.5))
    plt.subplot(121)
    plt.plot(train_acc_list, label='train acc')
    plt.plot(val_acc_list, label='val acc')
    plt.title("acc")
    plt.subplot(122)
    plt.plot(train_loss_list, label='train loss')
    plt.plot(val_loss_list, label='val loss')
    plt.title("loss")
    plt.legend()
    plt.savefig('results/results.jpg')
def cross_entropy(
    inputs,
    target,
    weight=None,
    ignore_index=-100,
    reduction="mean",
    smooth_eps=None,
    smooth_dist=None,
    from_logits=True,
):
    """cross entropy loss, with support for target distributions and label smoothing https://arxiv.org/abs/1512.00567"""
    smooth_eps = smooth_eps or 0

    # ordinary log-liklihood - use cross_entropy from nn
    if _is_long(target) and smooth_eps == 0:
        if from_logits:
            return F.cross_entropy(inputs,
                                   target,
                                   weight,
                                   ignore_index=ignore_index,
                                   reduction=reduction)
        else:
            return F.nll_loss(inputs,
                              target,
                              weight,
                              ignore_index=ignore_index,
                              reduction=reduction)

    if from_logits:
        # log-softmax of inputs
        lsm = F.log_softmax(inputs, dim=-1)
    else:
        lsm = inputs

    masked_indices = None
    num_classes = inputs.size(-1)

    if _is_long(target) and ignore_index >= 0:
        masked_indices = target.eq(ignore_index)

    if smooth_eps > 0 and smooth_dist is not None:
        if _is_long(target):
            target = onehot(target, num_classes).type_as(inputs)
        if smooth_dist.dim() < target.dim():
            smooth_dist = smooth_dist.unsqueeze(0)
        target.lerp_(smooth_dist, smooth_eps)

    if weight is not None:
        lsm = lsm * weight.unsqueeze(0)

    if _is_long(target):
        eps_sum = smooth_eps / num_classes
        eps_nll = 1.0 - eps_sum - smooth_eps
        likelihood = lsm.gather(dim=-1, index=target.unsqueeze(-1)).squeeze(-1)
        loss = -(eps_nll * likelihood + eps_sum * lsm.sum(-1))
    else:
        loss = -(target * lsm).sum(-1)

    if masked_indices is not None:
        loss.masked_fill_(masked_indices, 0)

    if reduction == "sum":
        loss = loss.sum()
    elif reduction == "mean":
        if masked_indices is None:
            loss = loss.mean()
        else:
            loss = loss.sum() / float(loss.size(0) - masked_indices.sum())

    return loss
    def forward(
        self,
        trajectories: Optional[torch.LongTensor] = None,
        past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
        targets: Optional[torch.FloatTensor] = None,
        attention_mask: Optional[torch.FloatTensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ):
        r"""
        Returns:

        Examples:

        ```python
        >>> from transformers import TrajectoryTransformerModel
        >>> import torch

        >>> model = TrajectoryTransformerModel.from_pretrained(
        ...     "CarlCochet/trajectory-transformer-halfcheetah-medium-v2"
        ... )
        >>> model.to(device)
        >>> model.eval()

        >>> observations_dim, action_dim, batch_size = 17, 6, 256
        >>> seq_length = observations_dim + action_dim + 1

        >>> trajectories = torch.LongTensor([np.random.permutation(self.seq_length) for _ in range(batch_size)]).to(
        ...     device
        ... )
        >>> targets = torch.LongTensor([np.random.permutation(self.seq_length) for _ in range(batch_size)]).to(device)

        >>> outputs = model(
        ...     trajectories,
        ...     targets=targets,
        ...     use_cache=True,
        ...     output_attentions=True,
        ...     output_hidden_states=True,
        ...     return_dict=True,
        ... )
        ```
        """
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (output_hidden_states
                                if output_hidden_states is not None else
                                self.config.output_hidden_states)

        if past_key_values is None:
            past_key_values = tuple([None] * len(self.blocks))

        batch_size, sequence_length = trajectories.size()

        if sequence_length > self.block_size:
            raise ValueError("Cannot forward, model block size is exhausted.")

        offset_trajectories = self.offset_tokens(trajectories)
        # [ batch_size x sequence_length x embedding_dim ]
        # forward the GPT model
        token_embeddings = self.tok_emb(
            offset_trajectories)  # each index maps to a (learnable) vector
        position_embeddings = self.pos_emb[:, :
                                           sequence_length, :]  # each position maps to a (learnable) vector

        hidden_states = self.drop(token_embeddings + position_embeddings)

        presents = () if use_cache else None
        all_self_attentions = () if output_attentions else None
        all_hidden_states = () if output_hidden_states else None

        for i, (block,
                layer_past) in enumerate(zip(self.blocks, past_key_values)):

            if output_hidden_states:
                all_hidden_states = all_hidden_states + (hidden_states, )

            if self.gradient_checkpointing and self.training:
                if use_cache:
                    logger.warning(
                        "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
                    )
                    use_cache = False

                def create_custom_forward(module):
                    def custom_forward(*inputs):
                        return module(*inputs)

                    return custom_forward

                outputs = torch.utils.checkpoint.checkpoint(
                    create_custom_forward(block),
                    hidden_states,
                    layer_past,
                    use_cache,
                    output_attentions,
                )
            else:
                outputs = block(hidden_states, layer_past, use_cache,
                                output_attentions)

            hidden_states = outputs[0]
            if use_cache is True:
                presents = presents + (outputs[1], )

            if output_attentions:
                all_self_attentions = all_self_attentions + (
                    outputs[2 if use_cache else 1], )

        # [ batch_size x sequence_length x embedding_dim ]
        hidden_state = self.ln_f(hidden_states)

        if output_hidden_states:
            all_hidden_states = all_hidden_states + (hidden_states, )

        hidden_states_pad, n_pad = self.pad_to_full_observation(hidden_state)

        logits = self.head(hidden_states_pad)
        logits = logits.reshape(batch_size, sequence_length + n_pad,
                                self.vocab_size + 1)
        logits = logits[:, :sequence_length]

        # if we are given some desired targets also calculate the loss
        if targets is not None:
            loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)),
                                   targets.view(-1),
                                   reduction="none")
            if self.action_weight != 1 or self.reward_weight != 1 or self.value_weight != 1:
                # make weights
                n_states = int(np.ceil(sequence_length / self.transition_dim))
                weights = torch.cat([
                    torch.ones(self.observation_dim,
                               device=trajectories.device),
                    torch.ones(self.action_dim, device=trajectories.device) *
                    self.action_weight,
                    torch.ones(1, device=trajectories.device) *
                    self.reward_weight,
                    torch.ones(1, device=trajectories.device) *
                    self.value_weight,
                ])
                weights = weights.repeat(n_states)
                weights = weights[1:].repeat(batch_size, 1)
                loss = loss * weights.view(-1)
            loss = (loss * attention_mask.view(-1)).mean()
        else:
            loss = None

        if not return_dict:
            return tuple(v for v in [
                loss, logits, presents, all_hidden_states, all_self_attentions
            ] if v is not None)

        return TrajectoryTransformerOutput(
            loss=loss,
            logits=logits,
            past_key_values=presents,
            hidden_states=all_hidden_states,
            attentions=all_self_attentions,
        )