def forward(self, sentences, lengths, cat_in=0, cat_out=0): # cat_in = cat_out = (n_categories) # sentences = (B, maxlen) # lengths = (B) # Compute Thought Vectors for each sentence. Also get the actual word embeddings for teacher forcing. thoughts, word_embeddings = self.encoder(sentences, cat_in) # thoughts = (B, thought_size), word_embeddings = (B, maxlen, word_size) # Predict the words for previous and next sentences. prev_pred, next_pred = self.decoders(thoughts, word_embeddings, cat_out) # both = (batch-1, maxlen, VOCAB_SIZE) # mask the predictions, so that loss for beyond-EOS word predictions is cancelled. prev_mask = self.create_mask(prev_pred, lengths[:-1]) next_mask = self.create_mask(next_pred, lengths[1:]) masked_prev_pred = prev_pred * prev_mask masked_next_pred = next_pred * next_mask prev_loss = F.cross_entropy(masked_prev_pred.view(-1, VOCAB_SIZE), sentences[:-1, :].view(-1)) next_loss = F.cross_entropy(masked_next_pred.view(-1, VOCAB_SIZE), sentences[1:, :].view(-1)) loss = prev_loss + next_loss _, prev_pred_ids = prev_pred[0].max(1) _, next_pred_ids = next_pred[0].max(1) return loss, sentences[0], sentences[1], prev_pred_ids, next_pred_ids
def forward(self, input, target): assert not target.requires_grad if len(input.shape) == 4: input = input.permute(0, 2, 3, 1).contiguous() input = input.view(-1, self.n_classes) target = target.view(-1) assert input.shape[:1]==target.shape if not self.size_average: return F.cross_entropy(input, target, size_average=False).mul_(1.0 / target.size(0)) else: return F.cross_entropy(input, target, size_average=True)
def _add_losses(self, sigma_rpn=3.0): # RPN, class loss rpn_cls_score = self._predictions['rpn_cls_score_reshape'].view(-1, 2) rpn_label = self._anchor_targets['rpn_labels'].view(-1) rpn_select = (rpn_label.data != -1).nonzero().view(-1) rpn_cls_score = rpn_cls_score.index_select( 0, rpn_select).contiguous().view(-1, 2) rpn_label = rpn_label.index_select(0, rpn_select).contiguous().view(-1) rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label) # RPN, bbox loss rpn_bbox_pred = self._predictions['rpn_bbox_pred'] rpn_bbox_targets = self._anchor_targets['rpn_bbox_targets'] rpn_bbox_inside_weights = self._anchor_targets[ 'rpn_bbox_inside_weights'] rpn_bbox_outside_weights = self._anchor_targets[ 'rpn_bbox_outside_weights'] rpn_loss_box = self._smooth_l1_loss( rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=sigma_rpn, dim=[1, 2, 3]) # RCNN, class loss cls_score = self._predictions["cls_score"] label = self._proposal_targets["labels"].view(-1) cross_entropy = F.cross_entropy( cls_score.view(-1, self._num_classes), label) # RCNN, bbox loss bbox_pred = self._predictions['bbox_pred'] bbox_targets = self._proposal_targets['bbox_targets'] bbox_inside_weights = self._proposal_targets['bbox_inside_weights'] bbox_outside_weights = self._proposal_targets['bbox_outside_weights'] loss_box = self._smooth_l1_loss( bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) self._losses['cross_entropy'] = cross_entropy self._losses['loss_box'] = loss_box self._losses['rpn_cross_entropy'] = rpn_cross_entropy self._losses['rpn_loss_box'] = rpn_loss_box loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box self._losses['total_loss'] = loss for k in self._losses.keys(): self._event_summaries[k] = self._losses[k] return loss
def forward(self, task=None, input1=None, input2=None, label=None): ''' Predict through model and task-specific prediction layer Args: - inputs (tuple(TODO)) - pred_layer (nn.Module) - pair_input (int) Returns: - logits (TODO) ''' pair_input = task.pair_input pred_layer = getattr(self, '%s_pred_layer' % task.name) if pair_input: if self.pair_enc_type == 'bow': sent1 = self.sent_encoder(input1) sent2 = self.sent_encoder(input2) # causes a bug with BiDAF logits = pred_layer(torch.cat([sent1, sent2, torch.abs(sent1 - sent2), sent1 * sent2], 1)) else: pair_emb = self.pair_encoder(input1, input2) logits = pred_layer(pair_emb) else: sent_emb = self.sent_encoder(input1) logits = pred_layer(sent_emb) out = {'logits': logits} if label is not None: if isinstance(task, (STS14Task, STSBTask)): loss = F.mse_loss(logits, label) label = label.squeeze(-1).data.cpu().numpy() logits = logits.squeeze(-1).data.cpu().numpy() task.scorer1(pearsonr(logits, label)[0]) task.scorer2(spearmanr(logits, label)[0]) elif isinstance(task, CoLATask): label = label.squeeze(-1) loss = F.cross_entropy(logits, label) task.scorer2(logits, label) label = label.data.cpu().numpy() _, preds = logits.max(dim=1) task.scorer1(matthews_corrcoef(label, preds.data.cpu().numpy())) else: label = label.squeeze(-1) loss = F.cross_entropy(logits, label) task.scorer1(logits, label) if task.scorer2 is not None: task.scorer2(logits, label) out['loss'] = loss return out
def l2l_train(model, cluster_center, n_epoch=10000, trunc_step=10): optimizer = optim.Adam(model.parameters(), lr=0.01) M_all = Variable(torch.zeros(model.n_class, model.n_dim)) B_all = Variable(torch.zeros(model.n_class)) for epoch in range(n_epoch): loss = 0 M_step, B_step = [], [] for step in range(trunc_step): data = generate_data(cluster_center) optimizer.zero_grad() x, y = Variable(torch.from_numpy(data[0])).float(), Variable(torch.from_numpy(data[1])) w, b = model(x) M = Variable(torch.zeros(model.n_class_n, model.n_dim)) B = Variable(torch.zeros(model.n_class_n)) for k in range(model.n_class_n): M[k] = torch.cat((w[:, 0][y == model.n_class_l + k].view(-1, 1), w[:, 1][y == model.n_class_l + k].view(-1, 1)), 1).mean(0) B[k] = b[y == model.n_class_l + k].mean() if step == 0: M_ = M B_ = B else: M_ = step / (step + 1) * M_step[-1] + 1 / (step + 1) * M B_ = step / (step + 1) * B_step[-1] + 1 / (step + 1) * B M_step.append(M_) B_step.append(B_) pred = torch.mm(x, M_.t()) + B_.view(1, -1).expand_as(torch.mm(x, M_.t())) loss += F.cross_entropy(pred, y) loss.backward() optimizer.step() print('Train Epoch: {}\tLoss: {:.6f}'.format(epoch, loss.data[0])) return M_all, B_all, cluster_center
def single_scale_rpn_losses( rpn_cls_logits, rpn_bbox_pred, rpn_labels_int32_wide, rpn_bbox_targets_wide, rpn_bbox_inside_weights_wide, rpn_bbox_outside_weights_wide): """Add losses for a single scale RPN model (i.e., no FPN).""" h, w = rpn_cls_logits.shape[2:] rpn_labels_int32 = rpn_labels_int32_wide[:, :, :h, :w] # -1 means ignore h, w = rpn_bbox_pred.shape[2:] rpn_bbox_targets = rpn_bbox_targets_wide[:, :, :h, :w] rpn_bbox_inside_weights = rpn_bbox_inside_weights_wide[:, :, :h, :w] rpn_bbox_outside_weights = rpn_bbox_outside_weights_wide[:, :, :h, :w] if cfg.RPN.CLS_ACTIVATION == 'softmax': B, C, H, W = rpn_cls_logits.size() rpn_cls_logits = rpn_cls_logits.view( B, 2, C // 2, H, W).permute(0, 2, 3, 4, 1).contiguous().view(-1, 2) rpn_labels_int32 = rpn_labels_int32.contiguous().view(-1).long() # the loss is averaged over non-ignored targets loss_rpn_cls = F.cross_entropy( rpn_cls_logits, rpn_labels_int32, ignore_index=-1) else: weight = (rpn_labels_int32 >= 0).float() loss_rpn_cls = F.binary_cross_entropy_with_logits( rpn_cls_logits, rpn_labels_int32.float(), weight, size_average=False) loss_rpn_cls /= weight.sum() loss_rpn_bbox = net_utils.smooth_l1_loss( rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, beta=1/9) return loss_rpn_cls, loss_rpn_bbox
def keypoint_losses(kps_pred, keypoint_locations_int32, keypoint_weights, keypoint_loss_normalizer=None): """Mask R-CNN keypoint specific losses.""" device_id = kps_pred.get_device() kps_target = Variable(torch.from_numpy( keypoint_locations_int32.astype('int64'))).cuda(device_id) keypoint_weights = Variable(torch.from_numpy(keypoint_weights)).cuda(device_id) # Softmax across **space** (woahh....space!) # Note: this is not what is commonly called "spatial softmax" # (i.e., softmax applied along the channel dimension at each spatial # location); This is softmax applied over a set of spatial locations (i.e., # each spatial location is a "class"). loss = F.cross_entropy( kps_pred.view(-1, cfg.KRCNN.HEATMAP_SIZE**2), kps_target, reduce=False) loss = torch.sum(loss * keypoint_weights) / torch.sum(keypoint_weights) loss *= cfg.KRCNN.LOSS_WEIGHT if not cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS: # Discussion: the softmax loss above will average the loss by the sum of # keypoint_weights, i.e. the total number of visible keypoints. Since # the number of visible keypoints can vary significantly between # minibatches, this has the effect of up-weighting the importance of # minibatches with few visible keypoints. (Imagine the extreme case of # only one visible keypoint versus N: in the case of N, each one # contributes 1/N to the gradient compared to the single keypoint # determining the gradient direction). Instead, we can normalize the # loss by the total number of keypoints, if it were the case that all # keypoints were visible in a full minibatch. (Returning to the example, # this means that the one visible keypoint contributes as much as each # of the N keypoints.) loss *= keypoint_loss_normalizer.item() # np.float32 to float return loss
def validate(): softmaxer = torch.nn.Softmax(dim=1) model.eval() correct = total = 0 precisionmat = (1/np.arange(1,21))[::-1].cumsum()[::-1] precisionmat = torch.cuda.FloatTensor(precisionmat.copy()) precision = 0 crossentropy = 0 hidden = model.initHidden() for batch in iter(val_iter): sentences = batch.text # n=32,bs if torch.cuda.is_available(): sentences = sentences.cuda() out, hidden = model(sentences, hidden) for j in range(sentences.size(0)-1): outj = out[j] # bs,|V| labelsj = sentences[j+1] # bs # cross entropy crossentropy += F.cross_entropy(outj,labelsj,size_average=False,ignore_index=padidx) # precision outj, labelsj = softmaxer(outj).data, labelsj.data _, outsort = torch.sort(outj,dim=1,descending=True) outsort = outsort[:,:20] inds = (outsort-labelsj.unsqueeze(1)==0) inds = inds.sum(dim=0).type(torch.cuda.FloatTensor) precision += inds.dot(precisionmat) # plain ol accuracy _, predicted = torch.max(outj, 1) total += labelsj.ne(padidx).int().sum() correct += (predicted==labelsj).sum() # DEBUGGING: see the rest in trigram.py hidden = repackage_hidden(hidden) return correct/total, precision/total, torch.exp(crossentropy/total).data[0]
def forward(self, input, target, kl_weight=1.0): assert not target.requires_grad kl = 0.0 for module in self.net.modules(): if hasattr(module, 'kl_reg'): kl = kl + module.kl_reg() return F.cross_entropy(input, target, size_average=True) * self.train_size + kl_weight * kl
def ohem_detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws): def log_sum_exp(x): x_max = x.data.max() return torch.log(torch.sum(torch.exp(x - x_max), dim=1, keepdim=True)) + x_max num_hard = cfg.TRAIN.BATCH_SIZE * self.batch_size pos_idx = rois_label > 0 num_pos = pos_idx.int().sum() # classification loss num_classes = cls_score.size(1) weight = cls_score.data.new(num_classes).fill_(1.) weight[0] = num_pos.data[0] / num_hard conf_p = cls_score.detach() conf_t = rois_label.detach() # rank on cross_entropy loss loss_c = log_sum_exp(conf_p) - conf_p.gather(1, conf_t.view(-1,1)) loss_c[pos_idx] = 100. # include all positive samples _, topk_idx = torch.topk(loss_c.view(-1), num_hard) loss_cls = F.cross_entropy(cls_score[topk_idx], rois_label[topk_idx], weight=weight) # bounding box regression L1 loss pos_idx = pos_idx.unsqueeze(1).expand_as(bbox_pred) loc_p = bbox_pred[pos_idx].view(-1, 4) loc_t = rois_target[pos_idx].view(-1, 4) loss_box = F.smooth_l1_loss(loc_p, loc_t) return loss_cls, loss_box
def avg_cross_entropy_loss(predicted, targets): """ Helper function for computing the simple mean cross entropy loss between the predicted one-hot and the target class. """ losses = [] length = len(predicted) for i in range(length): target = np.array(targets[i], dtype=np.float32) target = torch.from_numpy(target) target = Variable(target).long() loss = F.cross_entropy(predicted[i], target) losses.append(loss) loss = losses[0] for i in range(1, length): loss += losses[i] loss = loss / length return loss
def test(model, device, test_loader): model.to(device) model.eval() test_loss = 0 correct = 0 with torch.no_grad(): y_pred = [] y_true = [] for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) output = torch.mean(output.view(output.size(0), output.size(1), -1), dim=2) test_loss += F.cross_entropy(output, target) output = F.softmax(output, dim=1) confidence, pred = output.max(1) print('confidence: {}, prediction: {}, ground truth: {}'.format(confidence.cpu().numpy(), pred.cpu().numpy(), target.cpu().numpy())) y_pred += pred.data.tolist() y_true += target.data.tolist() correct += pred.eq(target.view_as(pred)).sum().item() print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) print(metrics.classification_report(np.asarray(y_true), np.asarray(y_pred))) print('confusion matrix: \n', metrics.confusion_matrix(np.asarray(y_true), np.asarray(y_pred))) print('\n')
def forward(self, model, sample, reduce=True): """Compute the loss for the given sample. Returns a tuple with three elements: 1) the loss 2) the sample size, which is used as the denominator for the gradient 3) logging outputs to display while training """ assert hasattr(model.decoder, 'adaptive_softmax') and model.decoder.adaptive_softmax is not None adaptive_softmax = model.decoder.adaptive_softmax net_output = model(**sample['net_input']) target = model.get_targets(sample, net_output).view(-1) bsz = target.size(0) logits, target = adaptive_softmax(net_output[0], target) assert len(target) == len(logits) loss = net_output[0].new(1 if reduce else bsz).zero_() for i in range(len(target)): if target[i] is not None: assert (target[i].min() >= 0 and target[i].max() <= logits[i].size(1)) loss += F.cross_entropy(logits[i], target[i], size_average=False, ignore_index=self.padding_idx, reduce=reduce) sample_size = sample['target'].size(0) if self.args.sentence_avg else sample['ntokens'] logging_output = { 'loss': utils.item(loss.data) if reduce else loss.data, 'ntokens': sample['ntokens'], 'sample_size': sample_size, } return loss, sample_size, logging_output
def forward(self, predict, target, weight=None): """ Args: predict:(n, c, h, w) target:(n, h, w) weight (Tensor, optional): a manual rescaling weight given to each class. If given, has to be a Tensor of size "nclasses" """ assert not target.requires_grad assert predict.dim() == 4 assert target.dim() == 3 assert predict.size(0) == target.size(0), "{0} vs {1} ".format(predict.size(0), target.size(0)) assert predict.size(2) == target.size(1), "{0} vs {1} ".format(predict.size(2), target.size(1)) assert predict.size(3) == target.size(2), "{0} vs {1} ".format(predict.size(3), target.size(3)) n, c, h, w = predict.size() target_mask = (target >= 0) * (target != self.ignore_label) target = target[target_mask] if not target.data.dim(): return Variable(torch.zeros(1)) predict = predict.transpose(1, 2).transpose(2, 3).contiguous() # contiguous():返回一段内存连续的Tensor predict = predict[target_mask.view(n, h, w, 1).repeat(1, 1, 1, c)].view(-1, c) # target [N] predict [N,C] loss = F.cross_entropy(predict, target, weight=weight, size_average=self.size_average) return loss
def mrcnn_class_loss(target_class_ids, pred_class_logits, active_class_ids, config): """Loss for the classifier head of Mask RCNN. target_class_ids: [batch, num_rois]. Integer class IDs. Uses zero padding to fill in the array. pred_class_logits: [batch, num_rois, num_classes] active_class_ids: [batch, num_classes]. Has a value of 1 for classes that are in the dataset of the image, and 0 for classes that are not in the dataset. """ # Find predictions of classes that are not in the dataset. pred_class_logits = pred_class_logits.contiguous().view(-1, config.NUM_CLASSES) target_class_ids = target_class_ids.contiguous().view(-1).type(torch.cuda.LongTensor) # Loss loss = F.cross_entropy( pred_class_logits, target_class_ids, weight=None, size_average=True) # Erase losses of predictions of classes that are not in the active # classes of the image. # loss = loss * pred_active # Computer loss mean. Use only predictions that contribute # to the loss to get a correct mean. # loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active) return loss
def cross_entropy_loss(input, target): total_loss = torch.tensor(0.0) for i in range(input.size(1)): cls_idx = torch.full((input.size(0),), i, dtype=torch.long) loss = F.cross_entropy(input, cls_idx, reduce=False) total_loss += target[:, i].dot(loss) return total_loss / input.shape[0]
def loss(anchors, data, pred, threshold): iou = pred['iou'] device_id = iou.get_device() if torch.cuda.is_available() else None rows, cols = pred['feature'].size()[-2:] iou_matrix, _iou, _, _data = iou_match(pred['yx_min'].data, pred['yx_max'].data, data) anchors = utils.ensure_device(anchors, device_id) positive = fit_positive(rows, cols, *(data[key] for key in 'yx_min, yx_max'.split(', ')), anchors) negative = ~positive & (_iou < threshold) _center_offset, _size_norm = fill_norm(*(_data[key] for key in 'yx_min, yx_max'.split(', ')), anchors) positive, negative, _iou, _center_offset, _size_norm, _cls = (torch.autograd.Variable(t) for t in (positive, negative, _iou, _center_offset, _size_norm, _data['cls'])) _positive = torch.unsqueeze(positive, -1) loss = {} # iou loss['foreground'] = F.mse_loss(iou[positive], _iou[positive], size_average=False) loss['background'] = torch.sum(square(iou[negative])) # bbox loss['center'] = F.mse_loss(pred['center_offset'][_positive], _center_offset[_positive], size_average=False) loss['size'] = F.mse_loss(pred['size_norm'][_positive], _size_norm[_positive], size_average=False) # cls if 'logits' in pred: logits = pred['logits'] if len(_cls.size()) > 3: loss['cls'] = F.mse_loss(F.softmax(logits, -1)[_positive], _cls[_positive], size_average=False) else: loss['cls'] = F.cross_entropy(logits[_positive].view(-1, logits.size(-1)), _cls[positive].view(-1)) # normalize cnt = float(np.multiply.reduce(positive.size())) for key in loss: loss[key] /= cnt return loss, dict(iou=_iou, data=_data, positive=positive, negative=negative)
def forward(self, loc_preds, loc_targets, cls_preds, cls_targets): """Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets). Args: loc_preds: (tensor) predicted locations, sized [N, #anchors, 4]. loc_targets: (tensor) encoded target locations, sized [N, #anchors, 4]. cls_preds: (tensor) predicted class confidences, sized [N, #anchors, #classes]. cls_targets: (tensor) encoded target labels, sized [N, #anchors]. loss: (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + CrossEntropyLoss(cls_preds, cls_targets). """ pos = cls_targets > 0 # [N,#anchors] batch_size = pos.size(0) num_pos = pos.sum().item() # =============================================================== # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets) # =============================================================== mask = pos.unsqueeze(2).expand_as(loc_preds) # [N,#anchors,4] loc_loss = F.smooth_l1_loss(loc_preds[mask], loc_targets[mask], size_average=False) # =============================================================== # cls_loss = CrossEntropyLoss(cls_preds, cls_targets) # =============================================================== cls_loss = F.cross_entropy(cls_preds.view(-1, self.num_classes), cls_targets.view(-1), reduce=False) # [N*#anchors,] cls_loss = cls_loss.view(batch_size, -1) cls_loss[cls_targets < 0] = 0 # set ignored loss to 0 neg = self._hard_negative_mining(cls_loss, pos) # [N,#anchors] cls_loss = cls_loss[pos | neg].sum() print('loc_loss: {} | cls_loss: {}'.format(loc_loss.item() / num_pos, cls_loss.item() / num_pos)) loss = (loc_loss + cls_loss) / num_pos return loss
def eval(data_iter, model, args, scheduler): model.eval() corrects, avg_loss = 0, 0 for batch in data_iter: feature, target = batch.text, batch.label feature.data.t_(), target.data.sub_(1) # batch first, index align if args.cuda: feature, target = feature.cuda(), feature.cuda() logit = model(feature) loss = F.cross_entropy(logit, target, size_average=False) # scheduler.step(loss.data[0]) # if args.init_clip_max_norm is not None: # # print("aaaa {} ".format(args.init_clip_max_norm)) # utils.clip_grad_norm(model.parameters(), max_norm=args.init_clip_max_norm) avg_loss += loss.data[0] corrects += (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum() size = len(data_iter.dataset) avg_loss = loss.data[0]/size # accuracy = float(corrects)/size * 100.0 accuracy = 100.0 * corrects/size model.train() print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss, accuracy, corrects, size))
def detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws): # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) return RCNN_loss_cls, RCNN_loss_bbox
def test_eval(data_iter, model, save_path, args, model_count): model.eval() corrects, avg_loss = 0, 0 for batch in data_iter: feature, target = batch.text, batch.label feature.data.t_(), target.data.sub_(1) # batch first, index align if args.cuda: feature, target = feature.cuda(), feature.cuda() logit = model(feature) loss = F.cross_entropy(logit, target, size_average=False) # scheduler.step(loss.data[0]) # if args.init_clip_max_norm is not None: # # print("aaaa {} ".format(args.init_clip_max_norm)) # utils.clip_grad_norm(model.parameters(), max_norm=args.init_clip_max_norm) avg_loss += loss.data[0] corrects += (torch.max(logit, 1) [1].view(target.size()).data == target.data).sum() size = len(data_iter.dataset) avg_loss = loss.data[0]/size # accuracy = float(corrects)/size * 100.0 accuracy = 100.0 * corrects/size model.train() print('\nEvaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n'.format(avg_loss, accuracy, corrects, size)) print("model_count {}".format(model_count)) # test result if os.path.exists("./Test_Result.txt"): file = open("./Test_Result.txt", "a") else: file = open("./Test_Result.txt", "w") file.write("model " + save_path + "\n") file.write("Evaluation - loss: {:.6f} acc: {:.4f}%({}/{}) \n".format(avg_loss, accuracy, corrects, size)) file.write("model_count {} \n".format(model_count)) file.write("\n") file.close() # calculate the best score in current file resultlist = [] if os.path.exists("./Test_Result.txt"): file = open("./Test_Result.txt") for line in file.readlines(): if line[:10] == "Evaluation": resultlist.append(float(line[34:41])) result = sorted(resultlist) file.close() file = open("./Test_Result.txt", "a") file.write("\nThe Current Best Result is : " + str(result[len(result) - 1])) file.write("\n\n") file.close() shutil.copy("./Test_Result.txt", "./snapshot/" + args.mulu + "/Test_Result.txt") # whether to delete the model after test acc so that to save space if os.path.isfile(save_path) and args.rm_model is True: os.remove(save_path)
def softmax_loss_1d(x, l): """ log-likelihood for mixture of discretized logistics, assumes the data has been rescaled to [-1,1] interval """ x = x[:, 0, :, :] ls = [int(y) for y in l.size()] nr_softmax_bins = ls[1] x_quant = ((x+1)*nr_softmax_bins/2).long().clamp(max=nr_softmax_bins-1) loss = F.cross_entropy(l, x_quant, reduce=False) return torch.sum(loss.view(loss.size(0), -1), dim=1)
def train(model, cluster_center, n_epoch=5000): optimizer = optim.Adam(model.parameters(), lr=0.01) for epoch in range(n_epoch): batch = generate_data(cluster_center) x, y = Variable(torch.from_numpy(batch[0])).float(), Variable(torch.from_numpy(batch[1])) optimizer.zero_grad() pred = model(x) loss = F.cross_entropy(pred, y) loss.backward() optimizer.step() print('Train Epoch: {}\tLoss: {:.6f}'.format(epoch, loss.data[0]))
def forward(self, predictions, priors, targets): loc_data, conf_data, _ = predictions # 预测的框以及类别概率(bs,-1,4) (bs,-1,2) priors = priors num = loc_data.size(0) # bs num_priors = priors.size(0) # (bs, 21824, 4) loc_t = torch.Tensor(num, num_priors, 4) # (bs, 21824) conf_t = torch.LongTensor(num, num_priors) # (bs,num_obj, 5) for idx in range(num): truths = targets[idx][:, :-1].data # cx,cy,w,h labels = targets[idx][:, -1].data # 1 or 0 defaults = priors.data # default boxes match(0.35, truths, defaults, [0.1, 0.2], labels, loc_t, conf_t, idx) if self.device.type == 'cuda': loc_t = loc_t.to(self.device) conf_t = conf_t.to(self.device) # 得到概率 >0 的idx pos = conf_t > 0 pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) # 全部展开 计算loss loc_p = loc_data[pos_idx].view(-1, 4) # predict loc_t = loc_t[pos_idx].view(-1, 4) # label loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') batch_conf = conf_data.view(-1, 2) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # Hard Negative Mining loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now loss_c = loss_c.view(num, -1) _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) neg = idx_rank < num_neg.expand_as(idx_rank) pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, 2) targets_weighted = conf_t[(pos + neg).gt(0)] loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = max(num_pos.data.sum().float(), 1) loss_l /= N loss_c /= N return loss_l, loss_c
def forward(self, base_feat, im_info, gt_boxes, num_boxes): batch_size = base_feat.size(0) # return feature map after convrelu layer rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True) # get rpn classification score rpn_cls_score = self.RPN_cls_score(rpn_conv1) rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2) rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, dim=1) rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) # get rpn offsets to the anchor boxes rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1) # proposal layer cfg_key = 'TRAIN' if self.training else 'TEST' rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key)) self.rpn_loss_cls = 0 self.rpn_loss_box = 0 # generating training labels and build the rpn loss if self.training: assert gt_boxes is not None rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes)) # compute classification loss rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) rpn_label = rpn_data[0].view(batch_size, -1) rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1)) rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep) rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data) rpn_label = Variable(rpn_label.long()) self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label) fg_cnt = torch.sum(rpn_label.data.ne(0)) rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:] # compute bbox regression loss rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights) rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights) rpn_bbox_targets = Variable(rpn_bbox_targets) self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=3, dim=[1,2,3]) return rois, self.rpn_loss_cls, self.rpn_loss_box
def _information_loss(model: InfoGAN, fake_hidden, latent): cat_logit, cont_mean, cont_logvar, bin_logit = model.rec(fake_hidden) info_loss = 0. if model.cat_dim > 0: cat_code = latent[:, model.cat_idx] info_loss += F.cross_entropy(cat_logit, cat_code.argmax(1)) if model.cont_dim > 0: cont_code = latent[:, model.cont_idx] info_loss += .1 * _gaussian_loss(cont_code, cont_mean, cont_logvar) if model.bin_dim > 0: bin_code = latent[:, model.bin_idx] info_loss += 2 * F.binary_cross_entropy_with_logits(bin_logit, bin_code) return info_loss
def evaluate(model, val_iter, vocab_size, DE, EN): model.eval() pad = EN.vocab.stoi['<pad>'] total_loss = 0 for b, batch in enumerate(val_iter): src, len_src = batch.src trg, len_trg = batch.trg src = Variable(src.data.cuda(), volatile=True) trg = Variable(trg.data.cuda(), volatile=True) output = model(src, trg) loss = F.cross_entropy(output[1:].view(-1, vocab_size), trg[1:].contiguous().view(-1), ignore_index=pad) total_loss += loss.data[0] return total_loss / len(val_iter)
def train(epoch): model.train() samples_seen = 0 for data, target in train_loader: data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = F.cross_entropy(output, target) loss.backward() optimizer.step() samples_seen += data.size(0) if (samples_seen // data.size(0)) % log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, samples_seen, len(train_loader.dataset), 100. * samples_seen / len(train_loader.dataset), loss.item()))
def train(epoch, model): model.train() for batch_idx, (data, target) in enumerate(train_loader): if args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = F.cross_entropy(output, target) loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0]))
def validate(): softmaxer = torch.nn.Softmax(dim=1) model.eval() fLSTM.eval() fGRU.eval() fNNLM.eval() correct = total = 0 precisionmat = (1/np.arange(1,21))[::-1].cumsum()[::-1] precisionmat = torch.cuda.FloatTensor(precisionmat.copy()) precision = 0 crossentropy = 0 LSTMhidden = fLSTM.initHidden() GRUhidden = fGRU.initHidden() for batch in iter(val_iter): sentences = batch.text # n=32,bs if torch.cuda.is_available(): sentences = sentences.cuda() LSTMout, LSTMhidden = fLSTM(sentences, LSTMhidden) GRUout, GRUhidden = fGRU(sentences, GRUhidden) word_pad = (32 + n - 1) - sentences.size(0) pads = Variable(torch.zeros(word_pad,sentences.size(1))).type(torch.cuda.LongTensor) padsentences = torch.cat([pads,sentences],dim=0) #print("sentence_dim: {}\npadded_dim: {}".format(sentences.size(),padsentences.size())) NNLMout = torch.stack([ fNNLM(torch.cat([ padsentences[:,a:a+1][b:b+n,:] for b in range(32) ],dim=1).t()) for a in range(sentences.size(1)) ],dim=1) #eOUT = torch.cat([LSTMout,GRUout,NNLMout],dim=2) NNLMout = NNLMout[-sentences.size(0):,:sentences.size(1),:len(TEXT.vocab)] tOUT = model(sentences.t(),LSTMout,GRUout,NNLMout) out = tOUT for j in range(sentences.size(0)-1): outj = out[j] # bs,|V| labelsj = sentences[j+1] # bs # cross entropy crossentropy += F.cross_entropy(outj,labelsj,size_average=False,ignore_index=padidx) # precision outj, labelsj = softmaxer(outj).data, labelsj.data _, outsort = torch.sort(outj,dim=1,descending=True) outsort = outsort[:,:20] inds = (outsort-labelsj.unsqueeze(1)==0) inds = inds.sum(dim=0).type(torch.cuda.FloatTensor) precision += inds.dot(precisionmat) # plain ol accuracy _, predicted = torch.max(outj, 1) total += labelsj.ne(padidx).int().sum() correct += (predicted==labelsj).sum() # DEBUGGING: see the rest in trigram.py LSTMhidden = repackage_hidden(LSTMhidden) GRUhidden = repackage_hidden(GRUhidden) return correct/total, precision/total, torch.exp(crossentropy/total).data[0]
def go(arg): dev = 'cuda' if torch.cuda.is_available() else 'cpu' edges, (n2i, i2n), (r2i, i2r), train, test = data.load(arg.name, final=arg.final, limit=arg.limit) # Convert test and train to tensors train_idx = [n2i[name] for name, _ in train.items()] train_lbl = [cls for _, cls in train.items()] train_idx = torch.tensor(train_idx, dtype=torch.long, device=dev) train_lbl = torch.tensor(train_lbl, dtype=torch.long, device=dev) test_idx = [n2i[name] for name, _ in test.items()] test_lbl = [cls for _, cls in test.items()] test_idx = torch.tensor(test_idx, dtype=torch.long, device=dev) test_lbl = torch.tensor(test_lbl, dtype=torch.long, device=dev) # count nr of classes cls = set([int(l) for l in test_lbl] + [int(l) for l in train_lbl]) """ Define model """ num_cls = len(cls) class GATLayer(nn.Module): def __init__(self, graph): super().__init__() self.i2n, self.i2r, self.edges = graph froms, tos = [], [] for p in edges.keys(): froms.extend(edges[p][0]) tos.extend(edges[p][1]) self.register_buffer('froms', torch.tensor(froms, dtype=torch.long)) self.register_buffer('tos', torch.tensor(tos, dtype=torch.long)) def forward(self, nodes, rels, sample=None): n, k = nodes.size() k, k, r = rels.size() if arg.dense: froms = nodes[None, :, :].expand(r, n, k) rels = rels.permute(2, 0, 1) froms = torch.bmm(froms, rels) froms = froms.view(r * n, k) adj = torch.mm(froms, nodes.t()) # stacked adjacencies adj = F.softmax(adj, dim=0) nwnodes = torch.mm(adj, nodes) nwnodes = nwnodes.view(r, n, k) nwnodes = nwnodes.mean(dim=0) return nwnodes else: rels = [ rels[None, :, :, p].expand(len(self.edges[p][0]), k, k) for p in range(r) ] rels = torch.cat(rels, dim=0) assert len(self.froms) == rels.size(0) froms = nodes[self.froms, :] tos = nodes[self.tos, :] froms, tos = froms[:, None, :], tos[:, :, None] # unnormalized attention weights att = torch.bmm(torch.bmm(froms, rels), tos).squeeze() if sample is None: indices = torch.cat( [self.froms[:, None], self.tos[:, None]], dim=1) values = att else: pass self.values = values self.values.retain_grad() # normalize the values (TODO try sparsemax) values = util.logsoftmax(indices, values, (n, n), p=10, row=True) values = torch.exp(values) mm = util.sparsemm(torch.cuda.is_available()) return mm(indices.t(), values, (n, n), nodes) class Model(nn.Module): def __init__(self, k, num_classes, graph, depth=3): super().__init__() self.i2n, self.i2r, self.edges = graph self.num_classes = num_classes n = len(self.i2n) # relation embeddings self.rels = nn.Parameter( torch.randn(k, k, len(self.i2r) + 1)) # TODO initialize properly (like distmult?) # node embeddings (layer 0) self.nodes = nn.Parameter(torch.randn( n, k)) # TODO initialize properly (like embedding?) self.layers = nn.ModuleList() for _ in range(depth): self.layers.append(GATLayer(graph)) self.toclass = nn.Sequential(nn.Linear(k, num_classes), nn.Softmax(dim=-1)) def forward(self, sample=None): nodes = self.nodes for layer in self.layers: nodes = layer(nodes, self.rels, sample=sample) return self.toclass(nodes) model = Model(k=arg.emb_size, depth=arg.depth, num_classes=num_cls, graph=(i2n, i2r, edges)) if torch.cuda.is_available(): model.cuda() train_lbl = train_lbl.cuda() test_lbl = test_lbl.cuda() opt = torch.optim.Adam(model.parameters(), lr=arg.lr) for e in tqdm.trange(arg.epochs): opt.zero_grad() cls = model()[train_idx, :] loss = F.cross_entropy(cls, train_lbl) loss.backward() opt.step() print(e, loss.item(), e) # Evaluate with torch.no_grad(): cls = model()[train_idx, :] agreement = cls.argmax(dim=1) == train_lbl accuracy = float(agreement.sum()) / agreement.size(0) print(' train accuracy ', float(accuracy)) cls = model()[test_idx, :] agreement = cls.argmax(dim=1) == test_lbl accuracy = float(agreement.sum()) / agreement.size(0) print(' test accuracy ', float(accuracy)) print('training finished.')
def forward(self, images, features, proposals, targets=None): """ Same as StandardROIHeads.forward but add logic for subclass. """ if not self.subclass_on: return super().forward(images, features, proposals, targets) # --- start copy ------------------------------------------------------- del images if self.training: proposals = self.label_and_sample_proposals(proposals, targets) # NOTE: `has_gt` = False for negatives and we must manually register `gt_subclasses`, # because custom gt_* fields will not be automatically registered in sampled proposals. for pp_per_im in proposals: if not pp_per_im.has("gt_subclasses"): background_subcls_idx = 0 pp_per_im.gt_subclasses = torch.cuda.LongTensor( len(pp_per_im) ).fill_(background_subcls_idx) del targets features_list = [features[f] for f in self.in_features] box_features = self.box_pooler( features_list, [x.proposal_boxes for x in proposals] ) box_features = self.box_head(box_features) predictions = self.box_predictor(box_features) # --- end copy --------------------------------------------------------- # NOTE: don't delete box_features, keep it temporarily # del box_features box_features = box_features.view( box_features.shape[0], np.prod(box_features.shape[1:]) ) pred_subclass_logits = self.subclass_head(box_features) if self.training: losses = self.box_predictor.losses(predictions, proposals) # During training the proposals used by the box head are # used by the mask, keypoint (and densepose) heads. losses.update(self._forward_mask(features, proposals)) losses.update(self._forward_keypoint(features, proposals)) # subclass head gt_subclasses = cat([p.gt_subclasses for p in proposals], dim=0) loss_subclass = F.cross_entropy( pred_subclass_logits, gt_subclasses, reduction="mean" ) losses.update({"loss_subclass": loss_subclass}) return proposals, losses else: pred_instances, kept_indices = self.box_predictor.inference( predictions, proposals ) # During inference cascaded prediction is used: the mask and keypoints # heads are only applied to the top scoring box detections. pred_instances = self.forward_with_given_boxes(features, pred_instances) # subclass head probs = F.softmax(pred_subclass_logits, dim=-1) for pred_instances_i, kept_indices_i in zip(pred_instances, kept_indices): pred_instances_i.pred_subclass_prob = torch.index_select( probs, dim=0, index=kept_indices_i.to(torch.int64), ) if torch.onnx.is_in_onnx_export(): assert len(pred_instances) == 1 pred_instances[0].pred_subclass_prob = alias( pred_instances[0].pred_subclass_prob, "subclass_prob_nms" ) return pred_instances, {}
num_examples += targets.size(0) correct_pred += (predicted_labels == targets).sum() return correct_pred.float() / num_examples * 100 start_time = time.time() for epoch in range(num_epochs): model = model.train() for batch_idx, (features, targets) in enumerate(train_loader): features = features.to(device) targets = targets.to(device) ### FORWARD AND BACK PROP logits, probas = model(features) cost = F.cross_entropy(logits, targets) optimizer.zero_grad() cost.backward() ### UPDATE MODEL PARAMETERS optimizer.step() ### LOGGING if not batch_idx % 50: print('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' % (epoch + 1, num_epochs, batch_idx, len(train_loader), cost)) model = model.eval() print('Epoch: %03d/%03d training accuracy: %.2f%%' % (epoch + 1, num_epochs, compute_accuracy(model, train_loader)))
def _add_losses(self, sigma_rpn=3.0): if cfg.TRAIN.IMS_PER_BATCH == 1: # RPN, class loss rpn_cls_score = self._predictions['rpn_cls_score_reshape'].view( -1, 2) #[前景loss,背景loss][Anchorsize*width*height]个anchor rpn_label = self._anchor_targets['rpn_labels'].view(-1) rpn_select = (rpn_label.data != -1).nonzero().view(-1) #选取的前景及背景 rpn_cls_score = rpn_cls_score.index_select( 0, rpn_select).contiguous().view(-1, 2) #[256,gt] rpn_label = rpn_label.index_select(0, rpn_select).contiguous().view( -1) #[256] # 是rpn部分的loss rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label) # RPN, bbox loss rpn_bbox_pred = self._predictions[ 'rpn_bbox_pred'] # batch * h * w * (num_anchors*4) 回归框预测的坐标 rpn_bbox_targets = self._anchor_targets[ 'rpn_bbox_targets'] # [1,height,width ,9*4] 回归框目标的坐标(和gt的回归值) rpn_bbox_inside_weights = self._anchor_targets[ 'rpn_bbox_inside_weights'] # [1,height,width ,9*4] rpn_bbox_outside_weights = self._anchor_targets[ 'rpn_bbox_outside_weights'] # [1,height,width ,9*4] # 是rpn部分的loss rpn_loss_box = self._smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=sigma_rpn, dim=[1, 2, 3]) elif cfg.TRAIN.IMS_PER_BATCH == 2: ############ img1 # RPN, class loss rpn_cls_score = self._predictions['rpn_cls_score_reshape'].view( -1, 2) #[前景loss,背景loss][Anchorsize*width*height]个anchor rpn_label = self._anchor_targets['rpn_labels'].view(-1) rpn_select = (rpn_label.data != -1).nonzero().view(-1) #选取的前景及背景 rpn_cls_score = rpn_cls_score.index_select( 0, rpn_select).contiguous().view(-1, 2) #[256,gt] rpn_label = rpn_label.index_select(0, rpn_select).contiguous().view( -1) #[256] # 是rpn部分的loss rpn_cross_entropy1 = F.cross_entropy(rpn_cls_score, rpn_label) # RPN, bbox loss rpn_bbox_pred = self._predictions[ 'rpn_bbox_pred'] # batch * h * w * (num_anchors*4) 回归框预测的坐标 rpn_bbox_targets = self._anchor_targets[ 'rpn_bbox_targets'] # [1,height,width ,9*4] 回归框目标的坐标(和gt的回归值) rpn_bbox_inside_weights = self._anchor_targets[ 'rpn_bbox_inside_weights'] # [1,height,width ,9*4] rpn_bbox_outside_weights = self._anchor_targets[ 'rpn_bbox_outside_weights'] # [1,height,width ,9*4] # 是rpn部分的loss rpn_loss_box1 = self._smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=sigma_rpn, dim=[1, 2, 3]) ############img2 # RPN, class loss rpn_label2 = self._anchor_targets['rpn_labels2'].view(-1) rpn_select2 = (rpn_label2.data != -1).nonzero().view(-1) #选取的前景及背景 rpn_cls_score = self._predictions['rpn_cls_score_reshape'].view( -1, 2) #[前景loss,背景loss][Anchorsize*width*height]个anchor rpn_cls_score2 = rpn_cls_score.index_select( 0, rpn_select2).contiguous().view(-1, 2) #[256,gt] rpn_label2 = rpn_label2.index_select( 0, rpn_select2).contiguous().view(-1) #[256] # 是rpn部分的loss rpn_cross_entropy2 = F.cross_entropy(rpn_cls_score2, rpn_label2) # RPN, bbox loss rpn_bbox_targets2 = self._anchor_targets[ 'rpn_bbox_targets2'] # [1,height,width ,9*4] 回归框目标的坐标(和gt的回归值) rpn_bbox_inside_weights2 = self._anchor_targets[ 'rpn_bbox_inside_weights2'] # [1,height,width ,9*4] rpn_bbox_outside_weights2 = self._anchor_targets[ 'rpn_bbox_outside_weights2'] # [1,height,width ,9*4] # 是rpn部分的loss rpn_loss_box2 = self._smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets2, rpn_bbox_inside_weights2, rpn_bbox_outside_weights2, sigma=sigma_rpn, dim=[1, 2, 3]) ##############################################3 lam = cfg.lamda rpn_cross_entropy = lam * rpn_cross_entropy1 + ( 1 - lam) * rpn_cross_entropy2 rpn_loss_box = lam * rpn_loss_box1 + (1 - lam) * rpn_loss_box2 else: raise Exception( "check cfg.TRAIN.IMS_PER_BACTH in /lib/model/config.py or experiments/cfgs/*.yml" ) if cfg.loss_strategy == 'RCNN_ONLY' or cfg.loss_strategy == 'RCNN+RPN' or cfg.loss_strategy == 'NOCHANGE': # RCNN, class loss cls_score = self._predictions["cls_score"] # [256,21] label = self._proposal_targets["labels"].view(-1) #[256] # RCNN的loss cross_entropy = F.cross_entropy( cls_score.view(-1, self._num_classes), label) # RCNN, bbox loss bbox_pred = self._predictions['bbox_pred'] # [256,84] bbox_targets = self._proposal_targets['bbox_targets'] # [256,84] bbox_inside_weights = self._proposal_targets[ 'bbox_inside_weights'] # [256,84] bbox_outside_weights = self._proposal_targets[ 'bbox_outside_weights'] # [256,84] # RCNN box的loss loss_box = self._smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) if cfg.loss_strategy == 'RCNN_ONLY' or cfg.loss_strategy == 'RCNN+RPN': lam = cfg.lamda label2 = self._proposal_targets['labels'][ self.rcnn_mix_index, :].view(-1) cross_entropy2 = F.cross_entropy( cls_score.view(-1, self._num_classes), label2) cross_entropy = lam * cross_entropy + (1 - lam) * cross_entropy2 bbox_targets2 = self._proposal_targets['bbox_targets'][ self.rcnn_mix_index, :] bbox_inside_weights2 = self._proposal_targets[ 'bbox_inside_weights'][self.rcnn_mix_index, :] bbox_outside_weights2 = self._proposal_targets[ 'bbox_outside_weights'][self.rcnn_mix_index, :] loss_box2 = self._smooth_l1_loss(bbox_pred, bbox_targets2, bbox_inside_weights2, bbox_outside_weights2) loss_box = lam * loss_box + (1 - lam) * loss_box2 if cfg.loss_strategy == 'RPN_ONLY': pass if cfg.loss_strategy == 'RCNN+RPN' or cfg.loss_strategy == 'NOCHANGE': self._losses['cross_entropy'] = cross_entropy self._losses['loss_box'] = loss_box self._losses['rpn_cross_entropy'] = rpn_cross_entropy self._losses['rpn_loss_box'] = rpn_loss_box loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box elif cfg.loss_strategy == 'RPN_ONLY': loss = rpn_cross_entropy + rpn_loss_box self._losses['rpn_cross_entropy'] = rpn_cross_entropy self._losses['rpn_loss_box'] = rpn_loss_box elif cfg.loss_strategy == 'RCNN_ONLY': loss = cross_entropy + loss_box self._losses['cross_entropy'] = cross_entropy self._losses['loss_box'] = loss_box else: raise Exception( "check cfg.TRAIN.loss_strategy in /lib/model/config.py or experiments/cfgs/*.yml" ) ################################################################################################################## self._losses['total_loss'] = loss for k in self._losses.keys(): self._event_summaries[k] = self._losses[k] return loss
def training_step(self, batch): images, labels = batch out = self(images) # Generate predictions loss = F.cross_entropy(out, labels) # Calculate loss return loss
def _loss(self, logits, labels): return F.cross_entropy(logits, labels.view(-1).long())
def train(train_loader, model, criterion, optimizer, epoch, logger, writer, args): model.train() losses = AverageMeter() # loss (per word decoded) aux_top1 = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # top5 accuracy # Batches progress = tqdm(enumerate(train_loader), total=len(train_loader), leave=False) for i, (imgs, caps, caplens, chidx) in progress: # Move to GPU, if available tgt = caps[:-1].to(device) tgt_y = caps[1:].to(device).permute(1, 0) imgs = imgs.to(device) # caps = caps.to(device) caplens = caplens.to(device) chidx = chidx.to(device) # Forward prop. # scores, caps_sorted, decode_lengths, alphas, sort_ind = decoder(feature, caps, caplens) scores, aux_codelen = model(imgs, tgt) # print(scores.size()) # Since we decoded starting with <start>, the targets are all words after <start>, up to <end> # targets = caps_sorted[:, 1:] # print(scores.topk(1, dim=-1).indices.view(len(scores), -1)) logger(scores, chidx, caps, 'train: ') # Remove timesteps that we didn't decode at, or are pads # pack_padded_sequence is an easy trick to do this # Calculate loss # loss = criterion(scores.permute(1, 0, 2).view(-1, scores.size(-1)), tgt_y.view(-1, tgt_y.size(-1))) scores = scores.permute(1, 0, 2).reshape(-1, scores.size(-1)) tgt_y = tgt_y.reshape(-1) # print(scores.size(), tgt_y.size()) loss = criterion(scores, tgt_y) # loss_aux = criterion(aux_out, chidx) loss += args.alpha_codelen * F.cross_entropy(aux_codelen, caplens - 3) # Add doubly stochastic attention regularization # loss += args.alpha_c * ((1. - alphas.sum(dim=1)) ** 2).mean() # Back prop. optimizer.zero_grad() # (loss + 10 * loss_aux).backward() loss.backward() # Clip gradients if args.grad_clip is not None: torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) # Update weights optimizer.step() # Keep track of metrics losses.update(loss.item(), sum(caplens - 1)) top5.update(accuracy(scores, tgt_y, 5), sum(caplens - 1)) top1.update(accuracy(scores, tgt_y, 1), sum(caplens - 1)) progress.set_description("train loss: %.4f, top1: %2.2f%%, top5: %2.2f%%"%(losses.avg, top1.avg, top5.avg)) writer.add_scalar('Loss/train', losses.avg, epoch) writer.add_scalar('Accuracy/train', top1.avg, epoch)
def train(self, current_state, action, next_state, optimizer, eta = 1e-3): current_state = self.oneHotEncoding(np.append(current_state,action)) current_state = torch.from_numpy(current_state).float() #compute curiosity reward self.curiosity_net.eval() current_state = current_state.unsqueeze(0).to(self.device) pred_state=self.curiosity_net.forward(current_state) #next_state = torch.from_numpy(next_state).float().to(self.device) loss = 0 for i in range(len(pred_state)): target = torch.from_numpy(np.array([next_state[i]])).to(self.device) #loss += F.cross_entropy(input=pred_state[i],target=target) #loss += self.marginal_loss(pred_state[i], target) #Hand crafted loss. +1 for every missclasification in the one-hot encoding if np.argmax(pred_state[i].cpu().detach().numpy()) != target.cpu().detach().numpy()[0]: loss += 1 loss = eta * loss #save states into memory buffer self.push(next_state, current_state) if (self.train_counter % self.train_every) == 0 and (len(self.memory_target) >= self.batch_size): batch_mask = self.sample_index(self.batch_size) loss_batch = 0 self.curiosity_net.train() optimizer.zero_grad() current_batch = [self.memory_prediction[i] for i in batch_mask] current_batch = torch.cat(current_batch) current_batch = torch.reshape(current_batch, (self.batch_size,-1)) prediction_batch = self.curiosity_net.forward(current_batch) for j in range(len(next_state)): prediction_batch_sub = prediction_batch[j] prediction_batch_sub = torch.reshape(prediction_batch_sub, (self.batch_size,-1)) target_batch = [torch.from_numpy(np.array([self.memory_target[i][j]])).to(self.device) for i in batch_mask] target_batch = torch.cat(target_batch) #target_batch = torch.reshape(target_batch, (self.batch_size)) #print(target_batch.size()) #print(target_batch) #print(prediction_batch.size()) #print(prediction_batch) loss_batch += F.cross_entropy(input=prediction_batch_sub,target=target_batch) #loss_batch += self.marginal_loss(prediction_batch_sub, target_batch) loss_batch = (1/self.batch_size) * loss_batch #print('TRAINED') #print('LOSS: ', loss_batch) loss_batch.backward(retain_graph=True) optimizer.step() self.train_counter += 1 #return loss.cpu().detach().numpy() #return loss.detach().numpy() return loss
def main(args): print(args) check_args(args) if not exists(args.output_dir): os.makedirs(args.output_dir) summary_writer = SummaryWriter(args.output_dir) # if args.coco: # train, val = CocoDetection.splits() # val.ids = val.ids[:args.val_size] # train.ids = train.ids # train_loader, val_loader = CocoDataLoader.splits(train, val, batch_size=args.batch_size, # num_workers=args.num_workers, # num_gpus=args.num_gpus) # else: train, val, _ = VG.splits(transform=transforms.Compose([ transforms.Resize(args.image_size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ])) train_loader, val_loader = VGDataLoader.splits( train, val, batch_size=args.batch_size, num_workers=args.num_workers, num_gpus=args.num_gpus) print(train.ind_to_classes) all_in_one_model = neural_motifs_sg2im_model(args, train.ind_to_classes) # Freeze the detector # for n, param in all_in_one_model.detector.named_parameters(): # param.requires_grad = False all_in_one_model.cuda() gan_g_loss, gan_d_loss = get_gan_losses(args.gan_loss_type) t, epoch, checkpoint = all_in_one_model.t, all_in_one_model.epoch, all_in_one_model.checkpoint while True: if t >= args.num_iterations: break epoch += 1 print('Starting epoch %d' % epoch) for step, batch in enumerate( tqdm(train_loader, desc='Training Epoch %d' % epoch, total=len(train_loader))): if t == args.eval_mode_after: print('switching to eval mode') all_in_one_model.model.eval() all_in_one_model.optimizer = optim.Adam( all_in_one_model.parameters(), lr=args.learning_rate) if args.l1_mode == "change" and t in args.l1_change_iters: old_l1_weight = args.l1_pixel_loss_weight args.l1_pixel_loss_weight = args.l1_change_vals[ args.l1_change_iters.index(t)] print( "Change l1_pixel_loss_weight from %10.f to %.10f at iteration %d" % (old_l1_weight, args.l1_pixel_loss_weight, t)) elif args.l1_mode == "change_linear": old_l1_weight = args.l1_pixel_loss_weight args.l1_pixel_loss_weight = args.l1_change_vals[0] + ( args.l1_change_vals[1] - args.l1_change_vals[0]) * t / args.num_iterations print( "Change l1_pixel_loss_weight from %10.f to %.10f at iteration %d" % (old_l1_weight, args.l1_pixel_loss_weight, t)) if args.noise_std_mode == "change" and t in args.noise_std_change_iters: old_noise_std = args.noise_std args.noise_std = args.noise_std_change_vals[ args.noise_std_change_iters.index(t)] print("Change noise_std from %.10f to %.10f at iteration %d" % (old_noise_std, args.noise_std, t)) elif args.noise_std_mode == "change_linear": old_noise_std = args.noise_std args.noise_std = args.noise_std_change_vals[0] + ( args.noise_std_change_vals[1] - args.noise_std_change_vals[0]) * t / args.num_iterations print("Change noise_std from %.10f to %.10f at iteration %d" % (old_noise_std, args.noise_std, t)) t += 1 with timeit('forward', args.timing): result = all_in_one_model[batch] imgs, imgs_pred, objs, g_scores_fake_crop, g_obj_scores_fake_crop, g_scores_fake_img, \ d_scores_fake_crop, d_obj_scores_fake_crop, d_scores_real_crop, d_obj_scores_real_crop, \ d_scores_fake_img, d_scores_real_img = result.imgs, result.imgs_pred, result.objs, \ result.g_scores_fake_crop, result.g_obj_scores_fake_crop, result.g_scores_fake_img, \ result.d_scores_fake_crop, result.d_obj_scores_fake_crop, result.d_scores_real_crop, \ result.d_obj_scores_real_crop, result.d_scores_fake_img, result.d_scores_real_img with timeit('loss', args.timing): total_loss, losses = calculate_model_losses( args, imgs, imgs_pred) if all_in_one_model.obj_discriminator is not None: total_loss = add_loss( total_loss, F.cross_entropy(g_obj_scores_fake_crop, objs), losses, 'ac_loss', args.ac_loss_weight) weight = args.discriminator_loss_weight * args.d_obj_weight total_loss = add_loss(total_loss, gan_g_loss(g_scores_fake_crop), losses, 'g_gan_obj_loss', weight) if all_in_one_model.img_discriminator is not None: weight = args.discriminator_loss_weight * args.d_img_weight total_loss = add_loss(total_loss, gan_g_loss(g_scores_fake_img), losses, 'g_gan_img_loss', weight) losses['total_loss'] = total_loss.item() if not math.isfinite(losses['total_loss']): print('WARNING: Got loss = NaN, not backpropping') continue with timeit('backward', args.timing): all_in_one_model.optimizer.zero_grad() total_loss.backward() all_in_one_model.optimizer.step() if all_in_one_model.obj_discriminator is not None: with timeit('d_obj loss', args.timing): d_obj_losses = LossManager() d_obj_gan_loss = gan_d_loss(d_scores_real_crop, d_scores_fake_crop) d_obj_losses.add_loss(d_obj_gan_loss, 'd_obj_gan_loss') d_obj_losses.add_loss( F.cross_entropy(d_obj_scores_real_crop, objs), 'd_ac_loss_real') d_obj_losses.add_loss( F.cross_entropy(d_obj_scores_fake_crop, objs), 'd_ac_loss_fake') with timeit('d_obj backward', args.timing): all_in_one_model.optimizer_d_obj.zero_grad() d_obj_losses.total_loss.backward() all_in_one_model.optimizer_d_obj.step() if all_in_one_model.img_discriminator is not None: with timeit('d_img loss', args.timing): d_img_losses = LossManager() d_img_gan_loss = gan_d_loss(d_scores_real_img, d_scores_fake_img) d_img_losses.add_loss(d_img_gan_loss, 'd_img_gan_loss') with timeit('d_img backward', args.timing): all_in_one_model.optimizer_d_img.zero_grad() d_img_losses.total_loss.backward() all_in_one_model.optimizer_d_img.step() if t % args.print_every == 0: print('t = %d / %d' % (t, args.num_iterations)) G_loss_list = [] for name, val in losses.items(): G_loss_list.append('[%s]: %.4f' % (name, val)) checkpoint['losses'][name].append(val) summary_writer.add_scalar("G_%s" % name, val, t) print("G: %s" % ", ".join(G_loss_list)) checkpoint['losses_ts'].append(t) if all_in_one_model.obj_discriminator is not None: D_obj_loss_list = [] for name, val in d_obj_losses.items(): D_obj_loss_list.append('[%s]: %.4f' % (name, val)) checkpoint['d_losses'][name].append(val) summary_writer.add_scalar("D_obj_%s" % name, val, t) print("D_obj: %s" % ", ".join(D_obj_loss_list)) if all_in_one_model.img_discriminator is not None: D_img_loss_list = [] for name, val in d_img_losses.items(): D_img_loss_list.append('[%s]: %.4f' % (name, val)) checkpoint['d_losses'][name].append(val) summary_writer.add_scalar("D_img_%s" % name, val, t) print("D_img: %s" % ", ".join(D_img_loss_list)) if t % args.checkpoint_every == 0: print('checking on train') train_results = check_model(args, train_loader, all_in_one_model) t_losses, t_samples = train_results checkpoint['train_samples'].append(t_samples) checkpoint['checkpoint_ts'].append(t) for name, images in t_samples.items(): summary_writer.add_image("train_%s" % name, images, t) print('checking on val') val_results = check_model(args, val_loader, all_in_one_model) val_losses, val_samples = val_results checkpoint['val_samples'].append(val_samples) for name, images in val_samples.items(): summary_writer.add_image("val_%s" % name, images, t) for k, v in val_losses.items(): checkpoint['val_losses'][k].append(v) summary_writer.add_scalar("val_%s" % k, v, t) checkpoint['model_state'] = all_in_one_model.model.state_dict() if all_in_one_model.obj_discriminator is not None: checkpoint[ 'd_obj_state'] = all_in_one_model.obj_discriminator.state_dict( ) checkpoint[ 'd_obj_optim_state'] = all_in_one_model.optimizer_d_obj.state_dict( ) if all_in_one_model.img_discriminator is not None: checkpoint[ 'd_img_state'] = all_in_one_model.img_discriminator.state_dict( ) checkpoint[ 'd_img_optim_state'] = all_in_one_model.optimizer_d_img.state_dict( ) checkpoint[ 'optim_state'] = all_in_one_model.optimizer.state_dict() checkpoint['counters']['t'] = t checkpoint['counters']['epoch'] = epoch checkpoint_path = os.path.join( args.output_dir, '%s_with_model.pt' % args.checkpoint_name) print('Saving checkpoint to ', checkpoint_path) torch.save(checkpoint, checkpoint_path) # Save another checkpoint without any model or optim state checkpoint_path = os.path.join( args.output_dir, '%s_no_model.pt' % args.checkpoint_name) key_blacklist = [ 'model_state', 'optim_state', 'model_best_state', 'd_obj_state', 'd_obj_optim_state', 'd_obj_best_state', 'd_img_state', 'd_img_optim_state', 'd_img_best_state' ] small_checkpoint = {} for k, v in checkpoint.items(): if k not in key_blacklist: small_checkpoint[k] = v torch.save(small_checkpoint, checkpoint_path)
model.train() for epoch in range(num_epochs): for i, batch in enumerate(train_loader, 1): data, _ = batch data = data.to(device) p = N_shot * train_way data_support, data_query = data[:p], data[p:] proto = model(data_support) proto = proto.reshape(N_shot, train_way, -1).mean(dim=0) label = torch.arange(train_way).repeat(N_query) label = label.type(torch.cuda.LongTensor) logits = euclidean_metric(model(data_query), proto) loss = F.cross_entropy(logits, label) acc = count_acc(logits, label) print('epoch {}, train {}/{}, loss={:.4f} acc={:.4f}'.format( epoch + 1, i, len(train_loader), loss.item(), acc)) torch.save(model.state_dict(), os.path.join(model_dir, 'p1_model.pth')) optimizer.zero_grad() loss.backward() optimizer.step() proto = None logits = None loss = None lr_scheduler.step()
def train(args): text = data_processor.load_data(args) train_iter = data_processor.gen_batch(args, text) dev_iter = data_processor.gen_test(text, args) print('加载数据完成') word_attn_model = AttentionWordGRU(args) sent_attn_model = AttentionSentGRU(args) model = None if args.cuda: model.cuda() word_optimizer = torch.optim.Adam(word_attn_model.parameters(), lr=args.lr) sent_optimizer = torch.optim.Adam(sent_attn_model.parameters(), lr=args.lr) steps = 0 best_acc = 0 last_step = 0 for epoch in range(1, args.epoch + 1): for i in range(args.iterations): word_optimizer.zero_grad() sent_optimizer.zero_grad() doc_texts, targets = next(train_iter) # doc_texts的维度为(batch_size, sents_num, words_num) word_attn_vectors = None for doc_text in doc_texts: # word_attn_vector的维度为(sent_num, hidden_size) word_attn_vector = word_attn_model(doc_text) # 将word_attn_vector的维度变为(1, sent_num, hidden_size) word_attn_vector = word_attn_vector.unsqueeze(0) if word_attn_vectors is None: word_attn_vectors = word_attn_vector else: # word_attn_vectors的维度为(batch_size, sent_num, hidden_size) word_attn_vectors = torch.cat( (word_attn_vectors, word_attn_vector), 0) logits = sent_attn_model(word_attn_vectors) loss = F.cross_entropy(logits, targets) loss.backward() word_optimizer.step() sent_optimizer.step() steps += 1 if steps % args.log_interval == 0: # torch.max(logits, 1)函数:返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引) corrects = (torch.max(logits, 1)[1] == targets).sum() train_acc = 100.0 * corrects / args.batch_size sys.stdout.write( '\rBatch[{}] - loss: {:.6f} acc: {:.4f}%({}/{})'.format( steps, loss.item(), train_acc, corrects, args.batch_size)) if steps % args.test_interval == 0: dev_acc = eval(dev_iter, word_attn_model, sent_attn_model) if dev_acc > best_acc: best_acc = dev_acc last_step = steps if args.save_best: print('Saving best model, acc: {:.4f}%\n'.format( best_acc)) save(word_attn_model, args.save_dir, 'best', steps) save(sent_attn_model, args.save_dir, 'best', steps) else: if steps - last_step >= args.early_stopping: print('\nearly stop by {} steps, acc: {:.4f}%'.format( args.early_stopping, best_acc)) raise KeyboardInterrupt
def compute_loss( self, targets: List[Dict[str, Tensor]], head_outputs: Dict[str, Tensor], anchors: List[Tensor], matched_idxs: List[Tensor], ) -> Dict[str, Tensor]: bbox_regression = head_outputs["bbox_regression"] cls_logits = head_outputs["cls_logits"] # Match original targets with default boxes num_foreground = 0 bbox_loss = [] cls_targets = [] for ( targets_per_image, bbox_regression_per_image, cls_logits_per_image, anchors_per_image, matched_idxs_per_image, ) in zip(targets, bbox_regression, cls_logits, anchors, matched_idxs): # produce the matching between boxes and targets foreground_idxs_per_image = torch.where(matched_idxs_per_image >= 0)[0] foreground_matched_idxs_per_image = matched_idxs_per_image[foreground_idxs_per_image] num_foreground += foreground_matched_idxs_per_image.numel() # Calculate regression loss matched_gt_boxes_per_image = targets_per_image["boxes"][foreground_matched_idxs_per_image] bbox_regression_per_image = bbox_regression_per_image[foreground_idxs_per_image, :] anchors_per_image = anchors_per_image[foreground_idxs_per_image, :] target_regression = self.box_coder.encode_single(matched_gt_boxes_per_image, anchors_per_image) bbox_loss.append( torch.nn.functional.smooth_l1_loss(bbox_regression_per_image, target_regression, reduction="sum") ) # Estimate ground truth for class targets gt_classes_target = torch.zeros( (cls_logits_per_image.size(0),), dtype=targets_per_image["labels"].dtype, device=targets_per_image["labels"].device, ) gt_classes_target[foreground_idxs_per_image] = targets_per_image["labels"][ foreground_matched_idxs_per_image ] cls_targets.append(gt_classes_target) bbox_loss = torch.stack(bbox_loss) cls_targets = torch.stack(cls_targets) # Calculate classification loss num_classes = cls_logits.size(-1) cls_loss = F.cross_entropy(cls_logits.view(-1, num_classes), cls_targets.view(-1), reduction="none").view( cls_targets.size() ) # Hard Negative Sampling foreground_idxs = cls_targets > 0 num_negative = self.neg_to_pos_ratio * foreground_idxs.sum(1, keepdim=True) # num_negative[num_negative < self.neg_to_pos_ratio] = self.neg_to_pos_ratio negative_loss = cls_loss.clone() negative_loss[foreground_idxs] = -float("inf") # use -inf to detect positive values that creeped in the sample values, idx = negative_loss.sort(1, descending=True) # background_idxs = torch.logical_and(idx.sort(1)[1] < num_negative, torch.isfinite(values)) background_idxs = idx.sort(1)[1] < num_negative N = max(1, num_foreground) return { "bbox_regression": bbox_loss.sum() / N, "classification": (cls_loss[foreground_idxs].sum() + cls_loss[background_idxs].sum()) / N, }
def validation_step(self, batch, batch_idx): # OPTIONAL x, y = batch y_hat = self.forward(x) return {'val_loss': F.cross_entropy(y_hat, y)}
def training_step(self, batch, batch_idx): # REQUIRED x, y = batch y_hat = self.forward(x) loss = F.cross_entropy(y_hat, y) return self.log('loss', loss)
def otf_bt(self, batch, lambda_xe, use_pointer=False, gamma=0): """ On the fly back-translation. """ params = self.params src_type, tgt_type, data = batch['src_type'], batch['tgt_type'], batch[ 'data'] src_seq, tgt_seq, src_pos, tgt_pos = map( lambda x: x.to(Constants.device), data) batch_size = src_seq.size(0) src_id, tgt_id = self.type_dict[src_type], self.type_dict[tgt_type] self.model.train() pred = self.model(src_seq, src_pos, tgt_seq, tgt_pos, src_id, tgt_id) gold = tgt_seq[:, 1:].contiguous().view(-1) if use_pointer: loss = F.nll_loss(pred, gold, ignore_index=Constants.PAD, reduction='sum') else: loss = F.cross_entropy(pred, gold, ignore_index=Constants.PAD, reduction='sum') if self.params.rl_finetune: prob = F.softmax(pred.view(batch_size, -1, pred.size(-1)), dim=-1) mask = (tgt_seq[:, 1:] != Constants.PAD).long() _, gred_sent = prob.max(dim=-1) gred_sent = gred_sent * mask distribution = Categorical(prob) samp_sent = distribution.sample() samp_sent = samp_sent * mask log_probs = distribution.log_prob(samp_sent) baseline = self.get_reward(input_seq=src_seq, policy_gen=gred_sent, tgt_seq=tgt_seq, type=tgt_type) rewards = self.get_reward(input_seq=src_seq, policy_gen=samp_sent, tgt_seq=tgt_seq, type=tgt_type) if tgt_type == 'simp': avg_reward = rewards.sum() / rewards.shape[0] self.rewards_simp.append(float(avg_reward)) baseline = torch.Tensor(baseline).float().unsqueeze(-1).to( Constants.device) rewards = torch.Tensor(rewards).float().unsqueeze(-1).to( Constants.device) policy_loss = -(log_probs * (rewards - baseline)).sum() / batch_size loss = (1 - gamma) * loss + gamma * policy_loss else: loss = (1 - gamma) * loss self.stat[src_type + '_' + tgt_type + '_otf_loss'].append(loss.item()) self.model_optimizer.zero_grad() loss.backward() self.model_optimizer.step()
def one_hot_cross_entropy_loss(y_hat, y): return F.cross_entropy(y_hat, torch.argmax(y, dim=1))
def forward(self, imgs, bboxes, labels, scale): n = imgs.shape[0] if n != 1: # 传入一张图片 raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) # 获取真实框和标签 bbox = bboxes[0] label = labels[0] # 获取公用特征层 features = self.faster_rcnn.extractor(imgs) # 获取faster_rcnn的建议框参数 rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( features, img_size, scale) # 获取建议框的置信度和回归系数 rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # ------------------------------------------ # # 建议框网络的loss # ------------------------------------------ # # 先获取建议框网络应该有的预测结果 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( bbox.cpu().numpy(), anchor, img_size) gt_rpn_label = torch.Tensor(gt_rpn_label).long() gt_rpn_loc = torch.Tensor(gt_rpn_loc) # 计算建议框网络的loss值# rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) # ------------------------------------------ # # classifier网络的loss # ------------------------------------------ # sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, bbox.cpu().numpy(), label.cpu().numpy(), self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = torch.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi, sample_roi_index) n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().cuda(), \ torch.Tensor(gt_roi_label).long()] gt_roi_label = torch.Tensor(gt_roi_label).long() gt_roi_loc = torch.Tensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses)
def train_one_epoch( epoch, model, criteria_x, criteria_u, optim, lr_schdlr, ema, dltrain_x, dltrain_u, lb_guessor, lambda_u, n_iters, logger, ): model.train() # loss_meter, loss_x_meter, loss_u_meter, loss_u_real_meter = [], [], [], [] loss_meter = AverageMeter() loss_x_meter = AverageMeter() loss_u_meter = AverageMeter() loss_u_real_meter = AverageMeter() # the number of correctly-predicted and gradient-considered unlabeled data n_correct_u_lbs_meter = AverageMeter() # the number of gradient-considered strong augmentation (logits above threshold) of unlabeled samples n_strong_aug_meter = AverageMeter() mask_meter = AverageMeter() epoch_start = time.time() # start time dl_x, dl_u = iter(dltrain_x), iter(dltrain_u) for it in range(n_iters): ims_x_weak, ims_x_strong, lbs_x = next(dl_x) ims_u_weak, ims_u_strong, lbs_u_real = next(dl_u) lbs_x = lbs_x.cuda() lbs_u_real = lbs_u_real.cuda() # -------------------------------------- bt = ims_x_weak.size(0) mu = int(ims_u_weak.size(0) // bt) imgs = torch.cat([ims_x_weak, ims_u_weak, ims_u_strong], dim=0).cuda() imgs = interleave(imgs, 2 * mu + 1) logits = model(imgs) logits = de_interleave(logits, 2 * mu + 1) logits_x = logits[:bt] logits_u_w, logits_u_s = torch.split(logits[bt:], bt * mu) loss_x = criteria_x(logits_x, lbs_x) with torch.no_grad(): probs = torch.softmax(logits_u_w, dim=1) scores, lbs_u_guess = torch.max(probs, dim=1) mask = scores.ge(0.95).float() loss_u = (criteria_u(logits_u_s, lbs_u_guess) * mask).mean() loss = loss_x + lambda_u * loss_u loss_u_real = (F.cross_entropy(logits_u_s, lbs_u_real) * mask).mean() # -------------------------------------- # mask, lbs_u_guess = lb_guessor(model, ims_u_weak.cuda()) # n_x = ims_x_weak.size(0) # ims_x_u = torch.cat([ims_x_weak, ims_u_strong]).cuda() # logits_x_u = model(ims_x_u) # logits_x, logits_u = logits_x_u[:n_x], logits_x_u[n_x:] # loss_x = criteria_x(logits_x, lbs_x) # loss_u = (criteria_u(logits_u, lbs_u_guess) * mask).mean() # loss = loss_x + lambda_u * loss_u # loss_u_real = (F.cross_entropy(logits_u, lbs_u_real) * mask).mean() optim.zero_grad() loss.backward() optim.step() ema.update_params() lr_schdlr.step() loss_meter.update(loss.item()) loss_x_meter.update(loss_x.item()) loss_u_meter.update(loss_u.item()) loss_u_real_meter.update(loss_u_real.item()) mask_meter.update(mask.mean().item()) corr_u_lb = (lbs_u_guess == lbs_u_real).float() * mask n_correct_u_lbs_meter.update(corr_u_lb.sum().item()) n_strong_aug_meter.update(mask.sum().item()) if (it + 1) % 512 == 0: t = time.time() - epoch_start lr_log = [pg['lr'] for pg in optim.param_groups] lr_log = sum(lr_log) / len(lr_log) logger.info( "epoch:{}, iter: {}. loss: {:.4f}. loss_u: {:.4f}. loss_x: {:.4f}. loss_u_real: {:.4f}. " "n_correct_u: {:.2f}/{:.2f}. Mask:{:.4f} . LR: {:.4f}. Time: {:.2f}" .format(epoch, it + 1, loss_meter.avg, loss_u_meter.avg, loss_x_meter.avg, loss_u_real_meter.avg, n_correct_u_lbs_meter.avg, n_strong_aug_meter.avg, mask_meter.avg, lr_log, t)) epoch_start = time.time() ema.update_buffer() return loss_meter.avg, loss_x_meter.avg, loss_u_meter.avg, loss_u_real_meter.avg, mask_meter.avg
def forward(self, rpn_feature_maps, im_info, gt_boxes, num_boxes): n_feat_maps = len(rpn_feature_maps) rpn_cls_scores = [] rpn_cls_probs = [] rpn_bbox_preds = [] rpn_shapes = [] for i in range(n_feat_maps): feat_map = rpn_feature_maps[i] batch_size = feat_map.size(0) # return feature map after convrelu layer rpn_conv1 = F.relu(self.rpn_Conv(feat_map), inplace=True) # get rpn classification score rpn_cls_score = self.rpn_cls_score(rpn_conv1) rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2) rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape) rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) # get rpn offsets to the anchor boxes rpn_bbox_pred = self.rpn_bbox_pred(rpn_conv1) rpn_shapes.append([rpn_cls_score.size()[2], rpn_cls_score.size()[3]]) rpn_cls_scores.append(rpn_cls_score.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)) rpn_cls_probs.append(rpn_cls_prob.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)) rpn_bbox_preds.append(rpn_bbox_pred.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 4)) rpn_cls_score_alls = torch.cat(rpn_cls_scores, 1) rpn_cls_prob_alls = torch.cat(rpn_cls_probs, 1) rpn_bbox_pred_alls = torch.cat(rpn_bbox_preds, 1) n_rpn_pred = rpn_cls_score_alls.size(1) # proposal layer cfg_key = 'TRAIN' if self.training else 'TEST' rois = self.rpn_proposal((rpn_cls_prob_alls.data, rpn_bbox_pred_alls.data, im_info, cfg_key, rpn_shapes)) self.rpn_loss_cls = 0 self.rpn_loss_box = 0 if self.training: assert gt_boxes is not None rpn_data = self.rpn_anchor_target((rpn_cls_score_alls.data, gt_boxes, im_info, num_boxes, rpn_shapes)) rpn_label = rpn_data[0].view(batch_size, -1) rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1)) rpn_cls_score = torch.index_select(rpn_cls_score_alls.view(-1,2), 0, rpn_keep) rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data) rpn_label = Variable(rpn_label.long()) self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label) rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:] rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights.unsqueeze(2).expand(batch_size, rpn_bbox_inside_weights.size(1), 4)) rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights.unsqueeze(2).expand(batch_size, rpn_bbox_outside_weights.size(1), 4)) rpn_bbox_targets = Variable(rpn_bbox_targets) self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred_alls, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=3) return rois, self.rpn_loss_cls, self.rpn_loss_box
def main(): global args args = (parser.parse_args()) use_cuda = cuda_model.ifUseCuda(args.gpu_id, args.multiGpu) script_name_stem = dir_utils.get_stem(__file__) save_directory = dir_utils.get_dir( os.path.join( project_root, 'ckpts', '{:s}-{:s}-{:s}-split-{:d}-claweight-{:s}-{:.1f}-assgin{:.2f}-alpha{:.4f}-dim{:d}-dropout{:.4f}-seqlen{:d}-samplerate-{:d}-{:s}-{:s}' .format(script_name_stem, args.dataset, args.eval_metrics, args.split, str(args.set_cls_weight), args.cls_pos_weight, args.hassign_thres, args.alpha, args.hidden_dim, args.dropout, args.seq_len, args.sample_rate, loss_type[args.EMD], match_type[args.hmatch]))) log_file = os.path.join(save_directory, 'log-{:s}.txt'.format(dir_utils.get_date_str())) logger = log_utils.get_logger(log_file) log_utils.print_config(vars(args), logger) model = PointerNetwork(input_dim=args.input_dim, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, max_decoding_len=args.net_outputs, dropout=args.dropout, n_enc_layers=2) hassign_thres = args.hassign_thres logger.info("Number of Params\t{:d}".format( sum([p.data.nelement() for p in model.parameters()]))) logger.info('Saving logs to {:s}'.format(log_file)) if args.resume is not None: ckpt_idx = 48 ckpt_filename = args.resume.format(ckpt_idx) assert os.path.isfile( ckpt_filename), 'Error: no checkpoint directory found!' checkpoint = torch.load(ckpt_filename, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict'], strict=False) train_iou = checkpoint['IoU'] args.start_epoch = checkpoint['epoch'] logger.info("=> loading checkpoint '{}', current iou: {:.04f}".format( ckpt_filename, train_iou)) model = cuda_model.convertModel2Cuda(model, gpu_id=args.gpu_id, multiGpu=args.multiGpu) # get train/val split if args.dataset == 'SumMe': train_val_perms = np.arange(25) elif args.dataset == 'TVSum': train_val_perms = np.arange(50) # fixed permutation random.Random(0).shuffle(train_val_perms) train_val_perms = train_val_perms.reshape([5, -1]) train_perms = np.delete(train_val_perms, args.split, 0).reshape([-1]) val_perms = train_val_perms[args.split] logger.info(" training split: " + str(train_perms)) logger.info(" val split: " + str(val_perms)) if args.location == 'home': data_path = os.path.join(os.path.expanduser('~'), 'datasets') else: data_path = os.path.join('/nfs/%s/boyu/SDN' % (args.location), 'datasets') train_dataset = vsSumLoader3_c3dd.cDataset(dataset_name=args.dataset, split='train', seq_length=args.seq_len, overlap=0.9, sample_rate=[args.sample_rate], train_val_perms=train_perms, data_path=data_path) # val_dataset = vsSumLoader3_c3dd.cDataset(dataset_name=args.dataset, split='val', seq_length=args.seq_len, overlap=0.9, sample_rate=[8]) val_evaluator = Evaluator.Evaluator(dataset_name=args.dataset, split='val', seq_length=args.seq_len, overlap=0.9, sample_rate=[args.sample_rate], sum_budget=0.15, train_val_perms=val_perms, eval_metrics=args.eval_metrics, data_path=data_path) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) # val_dataloader = DataLoader(val_dataset, # batch_size=args.batch_size, # shuffle=False, # num_workers=4) model_optim = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=float(args.lr)) optim_scheduler = optim.lr_scheduler.ReduceLROnPlateau(model_optim, 'min', patience=10) alpha = args.alpha # cls_weights = torch.FloatTensor([0.2, 1.0]).cuda() if args.set_cls_weight: cls_weights = torch.FloatTensor([ 1. * train_dataset.n_positive_train_samples / train_dataset.n_total_train_samples, args.cls_pos_weight ]).cuda() else: cls_weights = torch.FloatTensor([0.5, 0.5]).cuda() logger.info(" total: {:d}, total pos: {:d}".format( train_dataset.n_total_train_samples, train_dataset.n_positive_train_samples)) logger.info(" classify weight: " + str(cls_weights[0]) + str(cls_weights[1])) for epoch in range(args.start_epoch, args.nof_epoch + args.start_epoch): total_losses = AverageMeter() loc_losses = AverageMeter() cls_losses = AverageMeter() Accuracy = AverageMeter() IOU = AverageMeter() ordered_IOU = AverageMeter() model.train() pbar = progressbar.ProgressBar(max_value=len(train_dataloader)) for i_batch, sample_batch in enumerate(train_dataloader): pbar.update(i_batch) feature_batch = Variable(sample_batch[0]) start_indices = Variable(sample_batch[1]) end_indices = Variable(sample_batch[2]) gt_valids = Variable(sample_batch[3]) # seq_labels = Variable(sample_batch[3]) if use_cuda: feature_batch = feature_batch.cuda() start_indices = start_indices.cuda() end_indices = end_indices.cuda() gt_positions = torch.stack([start_indices, end_indices], dim=-1) head_pointer_probs, head_positions, tail_pointer_probs, tail_positions, cls_scores, _ = model( feature_batch) pred_positions = torch.stack([head_positions, tail_positions], dim=-1) if args.hmatch: assigned_scores, assigned_locations, total_valid, total_iou = h_match.Assign_Batch_v2( gt_positions, pred_positions, gt_valids, thres=hassign_thres) else: assigned_scores, assigned_locations = f_match.Assign_Batch( gt_positions, pred_positions, gt_valids, thres=hassign_thres) _, _, total_valid, total_iou = h_match.Assign_Batch_v2( gt_positions, pred_positions, gt_valids, thres=hassign_thres) if total_valid > 0: IOU.update(total_iou / total_valid, total_valid) assigned_scores = Variable(torch.LongTensor(assigned_scores), requires_grad=False) assigned_locations = Variable(torch.LongTensor(assigned_locations), requires_grad=False) if use_cuda: assigned_scores = assigned_scores.cuda() assigned_locations = assigned_locations.cuda() cls_scores = cls_scores.contiguous().view(-1, cls_scores.size()[-1]) assigned_scores = assigned_scores.contiguous().view(-1) cls_loss = F.cross_entropy(cls_scores, assigned_scores, weight=cls_weights) if total_valid > 0: assigned_head_positions = assigned_locations[:, :, 0] assigned_head_positions = assigned_head_positions.contiguous( ).view(-1) # assigned_tail_positions = assigned_locations[:, :, 1] assigned_tail_positions = assigned_tail_positions.contiguous( ).view(-1) head_pointer_probs = head_pointer_probs.contiguous().view( -1, head_pointer_probs.size()[-1]) tail_pointer_probs = tail_pointer_probs.contiguous().view( -1, tail_pointer_probs.size()[-1]) assigned_head_positions = torch.masked_select( assigned_head_positions, assigned_scores.byte()) assigned_tail_positions = torch.masked_select( assigned_tail_positions, assigned_scores.byte()) head_pointer_probs = torch.index_select( head_pointer_probs, dim=0, index=assigned_scores.nonzero().squeeze(1)) tail_pointer_probs = torch.index_select( tail_pointer_probs, dim=0, index=assigned_scores.nonzero().squeeze(1)) if args.EMD: assigned_head_positions = to_one_hot( assigned_head_positions, args.seq_len) assigned_tail_positions = to_one_hot( assigned_tail_positions, args.seq_len) prediction_head_loss = EMD_L2(head_pointer_probs, assigned_head_positions, needSoftMax=True) prediction_tail_loss = EMD_L2(tail_pointer_probs, assigned_tail_positions, needSoftMax=True) else: prediction_head_loss = F.cross_entropy( head_pointer_probs, assigned_head_positions) prediction_tail_loss = F.cross_entropy( tail_pointer_probs, assigned_tail_positions) loc_losses.update( prediction_head_loss.data.item() + prediction_tail_loss.data.item(), feature_batch.size(0)) total_loss = alpha * (prediction_head_loss + prediction_tail_loss) + cls_loss else: total_loss = cls_loss model_optim.zero_grad() total_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.) model_optim.step() cls_losses.update(cls_loss.data.item(), feature_batch.size(0)) total_losses.update(total_loss.item(), feature_batch.size(0)) logger.info( "Train -- Epoch :{:06d}, LR: {:.6f},\tloss={:.4f}, \t c-loss:{:.4f}, \tloc-loss:{:.4f}\tcls-Accuracy:{:.4f}\tloc-Avg-IOU:{:.4f}\t topIOU:{:.4f}" .format(epoch, model_optim.param_groups[0]['lr'], total_losses.avg, cls_losses.avg, loc_losses.avg, Accuracy.avg, IOU.avg, ordered_IOU.avg)) optim_scheduler.step(total_losses.avg) model.eval() # IOU = AverageMeter() # pbar = progressbar.ProgressBar(max_value=len(val_evaluator)) # for i_batch, sample_batch in enumerate(val_dataloader): # pbar.update(i_batch) # feature_batch = Variable(sample_batch[0]) # start_indices = Variable(sample_batch[1]) # end_indices = Variable(sample_batch[2]) # gt_valids = Variable(sample_batch[3]) # # valid_indices = Variable(sample_batch[3]) # if use_cuda: # feature_batch = feature_batch.cuda() # start_indices = start_indices.cuda() # end_indices = end_indices.cuda() # gt_positions = torch.stack([start_indices, end_indices], dim=-1) # head_pointer_probs, head_positions, tail_pointer_probs, tail_positions, cls_scores, _ = model( # feature_batch)#Update: compared to the previous version, we now update the matching rules # pred_positions = torch.stack([head_positions, tail_positions], dim=-1) # pred_scores = cls_scores[:, :, -1] # #TODO: should NOT change here for evaluation! # assigned_scores, assigned_locations, total_valid, total_iou = h_match.Assign_Batch_v2(gt_positions, pred_positions, gt_valids, thres=hassign_thres) # if total_valid>0: # IOU.update(total_iou / total_valid, total_valid) F1s = val_evaluator.Evaluate(model) logger.info("Val -- Epoch :{:06d}, LR: {:.6f},\tF1s:{:.4f}".format( epoch, model_optim.param_groups[0]['lr'], F1s)) if epoch % 1 == 0: save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'loss': total_losses.avg, 'cls_loss': cls_losses.avg, 'loc_loss': loc_losses.avg, 'IoU': IOU.avg, 'val_F1s': F1s }, (epoch + 1), file_direcotry=save_directory)
x_dev, y_dev, _ = get_data(dev_path) dataset_dev = DealDataset(x_dev, y_dev, device) dataloader_dev = DataLoader(dataset=dataset_dev, batch_size=batch_size, shuffle=True) optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) model.train() best_acc = 0 for i in range(epoch): index = 0 for datas, labels in tqdm(dataloader): model.zero_grad() output = model(datas) loss = F.cross_entropy(output, labels) loss.backward() optimizer.step() index += 1 if index % 50 == 0: # 每多少轮输出在训练集和验证集上的效果 true = labels.data.cpu() predic = torch.max(output.data, 1)[1].cpu() train_acc = metrics.accuracy_score(true, predic) dev_acc = evaluate(model, dataloader_dev) print( f'epoch:{i} batch:{index} loss:{loss} train_acc:{train_acc} dev_acc:{dev_acc}' ) # if dev_acc > best_acc: # torch.save(model, f'{output_path}/{model_name}/model.pt') model.train()
PATH = '../model/gesture_recognition_3-1.pth' torch.save(model.state_dict(), PATH) model = CNN() model.load_state_dict(torch.load(PATH)) model.eval() test_loader = load_data('eval') acc_set = [] avg_loss_set = [] for batch_id, data in enumerate(test_loader()): images, labels = data image = torch.from_numpy(images) label = torch.from_numpy(labels).squeeze() outputs = model(image) loss = F.cross_entropy(outputs, label) _, predicted = torch.max(outputs, 1) acc = (predicted == label).sum().item() / 15 acc_set.append(acc) avg_loss_set.append(float(loss.detach().numpy())) # 计算多个batch的平均损失和准确率 acc_val_mean = np.array(acc_set).mean() avg_loss_val_mean = np.array(avg_loss_set).mean() print('loss={}, acc={}'.format(avg_loss_val_mean, acc_val_mean)) # BatchSize=50, epoch=30, loss=0.6554504831631979, acc=0.899999996026357 # BatchSize=50, epoch=30, loss=0.659913182258606, acc=0.8999999999999999 # BatchSize=15, epoch=50, loss=0.6402452290058136, acc=0.9066666666666666
def validation_step(self, batch): images, labels = batch out = self(images) # Generate predictions loss = F.cross_entropy(out, labels) # Calculate loss acc = accuracy(out, labels) # Calculate accuracy return {'val_loss': loss.detach(), 'val_score': acc}
def train(): print("============================= TRAIN ============================") voc_train_iter = iter(voc_train_loader) voc_it = 0 sal_train_iter = iter(sal_train_loader) sal_it = 0 log = {'best_miou': 0, 'best_it_miou': 0, 'best_mae': 1000, 'best_it_mae':0, 'best_fm':0, 'best_it_fm':0} optimizer = torch.optim.Adam([{'params': net.parameters(), 'lr': learn_rate, 'betas':(0.95, 0.999)}]) if start_iter > 0: net.load_state_dict(torch.load(os.path.join( path_save_checkpoints, "{}.pth".format(start_iter)))) for i in range(start_iter, train_iters): if i % 3000 == 0: _lr = learn_rate / float(2**(i//3000)) optimizer = torch.optim.Adam([{'params': net.parameters(), 'lr': _lr, 'betas':(0.95, 0.999)}]) """loss 1 """ if sal_it >= len(sal_train_loader): sal_train_iter = iter(sal_train_loader) sal_it = 0 img_sal, gt_sal = sal_train_iter.next() sal_it += 1 img_sal_raw = img_sal gt_sal = gt_sal[:, None, ...].cuda() gt_sal = gt_sal.squeeze(1).long() img_sal_raw = img_sal img_sal = (img_sal.cuda()-mean)/std pred_seg, v_sal, _ = net(img_sal) pred_seg = torch.softmax(pred_seg, 1) bg = pred_seg[:, :1] fg = (pred_seg[:, 1:]*v_sal[:, 1:]).sum(1, keepdim=True) pred_sal = torch.cat((bg, fg), 1) loss_sal = F.nll_loss(pred_sal, gt_sal) """loss 2 """ if voc_it >= len(voc_train_loader): voc_train_iter = iter(voc_train_loader) voc_it = 0 img_seg, gt_seg, plbl_seg = voc_train_iter.next() voc_it += 1 gt_cls = gt_seg[:, None, ...] == torch.arange(c_output)[None, ..., None, None] gt_cls = (gt_cls.sum(3).sum(2)>0).float().cuda() img_seg_raw = img_seg img_seg = (img_seg.cuda()-mean)/std pred_seg, _, cls_fc = net(img_seg) loss_cls = F.cross_entropy(pred_seg,plbl_seg.cuda())+\ F.binary_cross_entropy_with_logits(cls_fc[:, 1:], gt_cls[:, 1:]) loss = loss_cls+loss_sal optimizer.zero_grad() loss.backward() optimizer.step() """output """ if i % 50 == 0: writer.add_scalar("sal_loss", loss_sal.item(), i) writer.add_scalar("cls_loss", loss_cls.item(), i) num_show = _num_show if img_seg.size(0) > _num_show else img_seg.size(0) img = img_seg_raw[-num_show:] writer.add_image('image_seg', torchvision.utils.make_grid(img), i) pred = plbl_seg[-num_show:,None,...] pred = torchvision.utils.make_grid(pred.expand(-1, 3, -1,-1)) pred = pred[0] writer.add_label('gt_seg', pred,i) _, pred_label = pred_seg.max(1) pred = pred_label[-num_show:,None,...] pred = torchvision.utils.make_grid(pred.expand(-1, 3, -1,-1)) pred = pred[0] writer.add_label('pred_seg', pred,i) img = img_sal_raw[-num_show:] writer.add_image('image_sal', torchvision.utils.make_grid(img), i) pred = gt_sal[-num_show:,None,...] pred = torchvision.utils.make_grid(pred.expand(-1, 3, -1,-1)) pred = pred[0] writer.add_label('gt_sal', pred,i) pred = fg[-num_show:,...] pred = torchvision.utils.make_grid(pred.expand(-1, 3, -1,-1)) writer.add_image('pred_sal', pred,i) writer.write_html() print("iter %d loss_sal %.4f; loss_cls %.4f"%(i, loss_sal.item(), loss_cls.item())) """validation""" if i!=0 and i % 500 == 0: log[i] = {} save_dict = net.state_dict() torch.save(save_dict, "{}/{}.pth".format(path_save_checkpoints, i)) miou = val_voc() writer.add_scalar("miou", miou, i) log[i]['miou'] = miou if miou > log['best_miou']: log['best_miou'] = miou log['best_it_miou'] = i print("validation: iter %d; miou %.4f; best %d:%.4f"%(i, miou, log['best_it_miou'], log['best_miou'])) fm, mae = val_sal() writer.add_scalar("mae", mae, i) writer.add_scalar("fm", fm, i) log[i]['mae'] = mae log[i]['fm'] = fm if mae < log['best_mae']: log['best_mae'] = mae log['best_it_mae'] = i if fm > log['best_fm']: log['best_fm'] = fm log['best_it_fm'] = i print("mae %.4f; best %d:%.4f"%(mae, log['best_it_mae'], log['best_mae'])) print("fm %.4f; best %d:%.4f"%(fm, log['best_it_fm'], log['best_fm'])) with open("output/{}.json".format(experiment_name), "w") as f: json.dump(log, f)
def forward(self, input): ''' The inputs are two tuples. One for each image. :param input holds data (target_feat, im_info, template_feat, gt_boxes, num_boxes) target_feat is of size (1, C, H, W) gt_boxes is a batch of gt_boxes for tracking, and is of size (N, 1, 6). 6 represents: x1,y1,x2,y2,class,trackid. template_feat is of size (N, C, kH, kW). :return: ''' if self.training: target_feat, im_info, template_feat, gt_boxes, num_boxes = input gt_boxes = gt_boxes[:, :, :5] else: target_feat, im_info, template_feat = input n_templates = template_feat.size(0) nC = template_feat.size(1) kh = template_feat.size(2) kw = template_feat.size(3) assert self.din == nC, 'The feature dims are not compatible.{}!={}'.format(self.din, nC) assert nC == target_feat.size(1), 'The feature dims of template_feat and target_feat should be same.' assert target_feat.size(0) == 1, 'Input target_feat should have a batch size of 1.' # target branch. target_feat_cls = self.RPN_Conv_cls(target_feat) target_feat_bbox = self.RPN_Conv_bbox(target_feat) # template branch. template_feat_cls = self.RPN_cls_score(template_feat) template_feat_bbox = self.RPN_bbox_pred(template_feat) template_feat_cls = template_feat_cls.view(n_templates, self.nc_score_out, -1, template_feat_cls.size(2), template_feat_cls.size(3)) template_feat_bbox = template_feat_bbox.view(n_templates, self.nc_bbox_out, -1, template_feat_bbox.size(2), template_feat_bbox.size(3)) # correlation if self.use_separable_correlation: rpn_cls_score = self.depth_wise_cross_correlation_cls(target_feat_cls, template_feat_cls, self.bias_cls) rpn_bbox_pred = self.depth_wise_cross_correlation_box(target_feat_bbox, template_feat_bbox, self.bias_bbox) else: rpn_cls_score = self.cross_correlation(target_feat_cls, template_feat_cls, self.bias_cls) rpn_bbox_pred = self.cross_correlation(target_feat_bbox, template_feat_bbox, self.bias_bbox) rpn_cls_score = rpn_cls_score*0.1 rpn_bbox_pred = rpn_bbox_pred*0.1 # adjust rpn_bbox_pred = self.RPN_bbox_adjust(rpn_bbox_pred) rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2) rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1) rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) # proposal layer cfg_key = 'TRAIN' if self.training else 'TEST' im_info = im_info.expand((rpn_cls_prob.size(0), im_info.size(1))) rois, scores = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key)) self.rpn_loss_cls = 0 self.rpn_loss_box = 0 # generating training labels and build the rpn loss if self.training: assert gt_boxes is not None batch_size = n_templates rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes)) # compute classification loss rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) rpn_label = rpn_data[0].view(batch_size, -1) rpn_keep = rpn_label.view(-1).ne(-1).nonzero().view(-1) if len(rpn_keep)>0: rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0, rpn_keep) rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data) rpn_label = rpn_label.long() self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label) fg_cnt = torch.sum(rpn_label.data.ne(0)) rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:] # compute bbox regression loss self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=3, dim=[1, 2, 3]) else: self.rpn_loss_cls, self.rpn_loss_box = None, None return rois, scores, self.rpn_loss_cls, self.rpn_loss_box
net.train() for epoch_num in tqdm(range(max_epoch), ncols=70): time1 = time.time() for i_batch, sampled_batch in enumerate(trainloader): time2 = time.time() # print('fetch data cost {}'.format(time2-time1)) volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() outputs, out_dis = net(volume_batch) from utils.losses import compute_fore_dist with torch.no_grad(): gt_dis = compute_fore_dist(label_batch.cpu().numpy()) gt_dis = torch.from_numpy(gt_dis).float().cuda() loss_seg = F.cross_entropy(outputs, label_batch) outputs_soft = F.softmax(outputs, dim=1) loss_seg_dice = dice_loss(outputs_soft[:, 1, :, :, :], label_batch == 1) # print('out_dis, label_batch shapes', out_dis.shape, label_batch.shape) # out_dis.shape=(b,1,x,y,z); label_batch.shape=(b,x,y,z) dist_mse = F.mse_loss(out_dis, gt_dis) # # sdf_kl_loss # print('sdf_kl.shape: ', sdf_kl.shape) loss = loss_seg + loss_seg_dice + dist_mse optimizer.zero_grad() loss.backward() optimizer.step() iter_num = iter_num + 1
def prog_epoch_pass(net, optimizer, loader): kvs = GlobalKVS() running_loss, pbar, n_batches, epoch, max_epoch, device = init_epoch_pass(net, optimizer, loader) preds_progression = [] gt_progression = [] ids = [] preds_kl = [] gt_kl = [] with torch.set_grad_enabled(optimizer is not None): for i, batch in enumerate(loader): if optimizer is not None: optimizer.zero_grad() # forward + backward + optimize if train labels_prog = batch['label'].long().to(device) labels_kl = batch['KL'].long().to(device) inputs = batch['img'].to(device) outputs_kl, outputs_prog = net(inputs) loss_kl = F.cross_entropy(outputs_kl, labels_kl) loss_prog = F.cross_entropy(outputs_prog, labels_prog) loss = loss_prog.mul(kvs['args'].loss_weight) + loss_kl.mul(1 - kvs['args'].loss_weight) if optimizer is not None: loss.backward() if kvs['args'].clip_grad: torch.nn.utils.clip_grad_norm_(net.parameters(), kvs['args'].clip_grad_norm) optimizer.step() else: probs_progression_batch = F.softmax(outputs_prog, 1).data.to('cpu').numpy() probs_kl_batch = F.softmax(outputs_kl, 1).data.to('cpu').numpy() preds_progression.append(probs_progression_batch) gt_progression.append(batch['label'].numpy()) preds_kl.append(probs_kl_batch) gt_kl.append(batch['KL']) ids.extend(batch['ID_SIDE']) running_loss += loss.item() if optimizer is not None: pbar.set_description(f'Training [{epoch} / {max_epoch}]:: {running_loss / (i + 1):.3f}') else: pbar.set_description(f'Validating [{epoch} / {max_epoch}]:') pbar.update() gc.collect() if optimizer is None: preds_progression = np.vstack(preds_progression) gt_progression = np.hstack(gt_progression) preds_kl = np.vstack(preds_kl) gt_kl = np.hstack(gt_kl) gc.collect() pbar.close() if optimizer is not None: return running_loss / n_batches else: return running_loss / n_batches, ids, gt_progression, preds_progression, gt_kl, preds_kl
def run(train_iter, val_iter, Text_vocab, save_model_path): """ 创建、训练和保存深度学习模型 """ # 配置 np.random.seed(1) torch.manual_seed(1) torch.cuda.manual_seed_all(1) torch.backends.cudnn.deterministic = True # 保证每次结果一样 # --------------------- 实现模型创建、训练和保存等部分的代码 --------------------- vocab_size = len(Text_vocab) learning_rate = 1e-3 num_epochs = 20 require_improvement = 1000 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # ----------------------------------------------------------------------------- start_time = time.time() print("载入模型...") model = Net(vocab_size).to(device) print("模型载入完成...") time_diff = get_time_diff(start_time) print("Time usage:", time_diff) print("打印模型参数...") print(model.parameters) for name, parameters in model.named_parameters(): print(name, ':', parameters.shape) # 模型训练 start_time = time.time() model.train() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # 学习率指数衰减,每次epoch:学习率 = gamma * 学习率 # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) total_batch = 0 # 记录进行到多少batch val_best_loss = float('inf') last_improve = 0 # 记录上次验证集loss下降的batch数 stop = False # 记录是否很久没有效果提升 # plot train_loss_list = [] train_acc_list = [] val_loss_list = [] val_acc_list = [] # 训练 for epoch in range(num_epochs): print('Epoch [{}/{}]'.format(epoch + 1, num_epochs)) # scheduler.step() # 学习率衰减 for i, batch in enumerate(train_iter): texts, labels = batch.text, batch.category outputs = model(texts) # model.zero_grad() optimizer.zero_grad() loss = F.cross_entropy(outputs, labels.long()) # loss.backward(retain_graph = True) loss.backward() optimizer.step() if total_batch % 100 == 0: # 没多少轮输出在训练集和验证集上的效果 labels = labels.cpu() predict = torch.argmax(outputs, 1).cpu() train_acc = metrics.accuracy_score(labels, predict) val_acc, val_loss = evaluate(model, val_iter) if val_loss < val_best_loss: val_best_loss = val_loss torch.save(model.state_dict(), save_model_path) improve = '*' last_improve = total_batch else: improve = '' time_diff = get_time_diff(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>5.2}, Train Acc: {2:>6.2%}, Val Loss: {3:>5.2}, Val Acc: {4:>6.2%}, Time: {5} {6}' print( msg.format(total_batch, loss.item(), train_acc, val_loss, val_acc, time_diff, improve)) # plot mo platform # print('{{"metric": "Train Loss", "value": {}}}'.format(loss.item())) # print('{{"metric": "Train Acc", "value": {}}}'.format(train_acc)) # print('{{"metric": "Val Loss", "value": {}}}'.format(val_loss)) # print('{{"metric": "Val Acc", "value": {}}}'.format(val_acc)) # plot train_loss_list.append(loss.item()) train_acc_list.append(train_acc) val_loss_list.append(val_loss) val_acc_list.append(val_acc) total_batch = total_batch + 1 # if total_batch - last_improve > require_improvement: # # 验证集loss超过1000batch没下降,结束训练 # print("No optimization for a long time, auto-stopping...") # stop = True # break # if stop: # break # 保存模型(请写好保存模型的路径及名称) # torch.save(model.state_dict(), save_model_path) # 绘制曲线 plt.figure(figsize=(15, 5.5)) plt.subplot(121) plt.plot(train_acc_list, label='train acc') plt.plot(val_acc_list, label='val acc') plt.title("acc") plt.subplot(122) plt.plot(train_loss_list, label='train loss') plt.plot(val_loss_list, label='val loss') plt.title("loss") plt.legend() plt.savefig('results/results.jpg')
def cross_entropy( inputs, target, weight=None, ignore_index=-100, reduction="mean", smooth_eps=None, smooth_dist=None, from_logits=True, ): """cross entropy loss, with support for target distributions and label smoothing https://arxiv.org/abs/1512.00567""" smooth_eps = smooth_eps or 0 # ordinary log-liklihood - use cross_entropy from nn if _is_long(target) and smooth_eps == 0: if from_logits: return F.cross_entropy(inputs, target, weight, ignore_index=ignore_index, reduction=reduction) else: return F.nll_loss(inputs, target, weight, ignore_index=ignore_index, reduction=reduction) if from_logits: # log-softmax of inputs lsm = F.log_softmax(inputs, dim=-1) else: lsm = inputs masked_indices = None num_classes = inputs.size(-1) if _is_long(target) and ignore_index >= 0: masked_indices = target.eq(ignore_index) if smooth_eps > 0 and smooth_dist is not None: if _is_long(target): target = onehot(target, num_classes).type_as(inputs) if smooth_dist.dim() < target.dim(): smooth_dist = smooth_dist.unsqueeze(0) target.lerp_(smooth_dist, smooth_eps) if weight is not None: lsm = lsm * weight.unsqueeze(0) if _is_long(target): eps_sum = smooth_eps / num_classes eps_nll = 1.0 - eps_sum - smooth_eps likelihood = lsm.gather(dim=-1, index=target.unsqueeze(-1)).squeeze(-1) loss = -(eps_nll * likelihood + eps_sum * lsm.sum(-1)) else: loss = -(target * lsm).sum(-1) if masked_indices is not None: loss.masked_fill_(masked_indices, 0) if reduction == "sum": loss = loss.sum() elif reduction == "mean": if masked_indices is None: loss = loss.mean() else: loss = loss.sum() / float(loss.size(0) - masked_indices.sum()) return loss
def forward( self, trajectories: Optional[torch.LongTensor] = None, past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None, targets: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ): r""" Returns: Examples: ```python >>> from transformers import TrajectoryTransformerModel >>> import torch >>> model = TrajectoryTransformerModel.from_pretrained( ... "CarlCochet/trajectory-transformer-halfcheetah-medium-v2" ... ) >>> model.to(device) >>> model.eval() >>> observations_dim, action_dim, batch_size = 17, 6, 256 >>> seq_length = observations_dim + action_dim + 1 >>> trajectories = torch.LongTensor([np.random.permutation(self.seq_length) for _ in range(batch_size)]).to( ... device ... ) >>> targets = torch.LongTensor([np.random.permutation(self.seq_length) for _ in range(batch_size)]).to(device) >>> outputs = model( ... trajectories, ... targets=targets, ... use_cache=True, ... output_attentions=True, ... output_hidden_states=True, ... return_dict=True, ... ) ``` """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = (output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states) if past_key_values is None: past_key_values = tuple([None] * len(self.blocks)) batch_size, sequence_length = trajectories.size() if sequence_length > self.block_size: raise ValueError("Cannot forward, model block size is exhausted.") offset_trajectories = self.offset_tokens(trajectories) # [ batch_size x sequence_length x embedding_dim ] # forward the GPT model token_embeddings = self.tok_emb( offset_trajectories) # each index maps to a (learnable) vector position_embeddings = self.pos_emb[:, : sequence_length, :] # each position maps to a (learnable) vector hidden_states = self.drop(token_embeddings + position_embeddings) presents = () if use_cache else None all_self_attentions = () if output_attentions else None all_hidden_states = () if output_hidden_states else None for i, (block, layer_past) in enumerate(zip(self.blocks, past_key_values)): if output_hidden_states: all_hidden_states = all_hidden_states + (hidden_states, ) if self.gradient_checkpointing and self.training: if use_cache: logger.warning( "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." ) use_cache = False def create_custom_forward(module): def custom_forward(*inputs): return module(*inputs) return custom_forward outputs = torch.utils.checkpoint.checkpoint( create_custom_forward(block), hidden_states, layer_past, use_cache, output_attentions, ) else: outputs = block(hidden_states, layer_past, use_cache, output_attentions) hidden_states = outputs[0] if use_cache is True: presents = presents + (outputs[1], ) if output_attentions: all_self_attentions = all_self_attentions + ( outputs[2 if use_cache else 1], ) # [ batch_size x sequence_length x embedding_dim ] hidden_state = self.ln_f(hidden_states) if output_hidden_states: all_hidden_states = all_hidden_states + (hidden_states, ) hidden_states_pad, n_pad = self.pad_to_full_observation(hidden_state) logits = self.head(hidden_states_pad) logits = logits.reshape(batch_size, sequence_length + n_pad, self.vocab_size + 1) logits = logits[:, :sequence_length] # if we are given some desired targets also calculate the loss if targets is not None: loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)), targets.view(-1), reduction="none") if self.action_weight != 1 or self.reward_weight != 1 or self.value_weight != 1: # make weights n_states = int(np.ceil(sequence_length / self.transition_dim)) weights = torch.cat([ torch.ones(self.observation_dim, device=trajectories.device), torch.ones(self.action_dim, device=trajectories.device) * self.action_weight, torch.ones(1, device=trajectories.device) * self.reward_weight, torch.ones(1, device=trajectories.device) * self.value_weight, ]) weights = weights.repeat(n_states) weights = weights[1:].repeat(batch_size, 1) loss = loss * weights.view(-1) loss = (loss * attention_mask.view(-1)).mean() else: loss = None if not return_dict: return tuple(v for v in [ loss, logits, presents, all_hidden_states, all_self_attentions ] if v is not None) return TrajectoryTransformerOutput( loss=loss, logits=logits, past_key_values=presents, hidden_states=all_hidden_states, attentions=all_self_attentions, )