def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map # Bottom-up c1 = self.RCNN_layer0(im_data) c2 = self.RCNN_layer1(c1) c3 = self.RCNN_layer2(c2) c4 = self.RCNN_layer3(c3) c5 = self.RCNN_layer4(c4) # Top-down p5 = self.RCNN_toplayer(c5) p4 = self._upsample_add(p5, self.RCNN_latlayer1(c4)) p4 = self.RCNN_smooth1(p4) p3 = self._upsample_add(p4, self.RCNN_latlayer2(c3)) p3 = self.RCNN_smooth2(p3) p2 = self._upsample_add(p3, self.RCNN_latlayer3(c2)) p2 = self.RCNN_smooth3(p2) p6 = self.maxpool2d(p5) rpn_feature_maps = [p2, p3, p4, p5, p6] mrcnn_feature_maps = [p2, p3, p4, p5] rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( rpn_feature_maps, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, gt_assign, rois_target, rois_inside_ws, rois_outside_ws = roi_data ## NOTE: additionally, normalize proposals to range [0, 1], # this is necessary so that the following roi pooling # is correct on different feature maps # rois[:, :, 1::2] /= im_info[0][1] # rois[:, :, 2::2] /= im_info[0][0] rois = rois.view(-1, 5) rois_label = rois_label.view(-1).long() gt_assign = gt_assign.view(-1).long() pos_id = rois_label.nonzero().squeeze() gt_assign_pos = gt_assign[pos_id] rois_label_pos = rois_label[pos_id] rois_label_pos_ids = pos_id rois_pos = Variable(rois[pos_id]) rois = Variable(rois) rois_label = Variable(rois_label) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: ## NOTE: additionally, normalize proposals to range [0, 1], # this is necessary so that the following roi pooling # is correct on different feature maps # rois[:, :, 1::2] /= im_info[0][1] # rois[:, :, 2::2] /= im_info[0][0] rois_label = None gt_assign = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = rois.view(-1, 5) pos_id = torch.arange(0, rois.size(0)).long().type_as(rois).long() rois_label_pos_ids = pos_id rois_pos = Variable(rois[pos_id]) rois = Variable(rois) # pooling features based on rois, output 14x14 map (128,64,7,7) roi_pool_feat = self._PyramidRoI_Feat(mrcnn_feature_maps, rois, im_info) Use_emsemble = False emsemble_vgg, emsemble_detnet = [False, True] if Use_emsemble: if emsemble_vgg: model_vgg = Cnn() model_vgg = model_vgg.cuda() ## vgg net pretrained_model_vgg = '/home/lab30202/lq/ai_future/single_classsification_vgg/model_save/galxay_star_classification_vgg.pth' # 预训练模型参数保存地址 pretrained_dict = torch.load(pretrained_model_vgg) model_dict = model_vgg.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model_vgg.load_state_dict(model_dict) feature_map_vgg = model_vgg.convnet(im_data) if self.training: idx_l = [x for x in range(0, 128, 1)] else: idx_l = [x for x in range(0, 300, 1)] idx_l = torch.LongTensor(idx_l) feat = self.RCNN_roi_align(feature_map_vgg, rois[idx_l], 0.5) roi_pool_vgg = feat.view(feat.shape[0], -1) cls_score_vgg = model_vgg.fc(roi_pool_vgg) # cls_prob_vgg = F.softmax(cls_score_vgg,dim=1) if emsemble_detnet: ## detnet detnet = Detnet() detnet = detnet.cuda() # Bottom-up c1_det = detnet.RCNN_layer0_det(im_data) c2_det = detnet.RCNN_layer1_det(c1_det) c3_det = detnet.RCNN_layer2_det(c2_det) c4_det = detnet.RCNN_layer3_det(c3_det) c5_det = detnet.RCNN_layer4_det(c4_det) c6_det = detnet.RCNN_layer5_det(c5_det) # Top-down p6_det = detnet.RCNN_toplayer_det(c6_det) p5_det = detnet.RCNN_latlayer1_det(c5_det) + p6_det p4_det = detnet.RCNN_latlayer2_det(c4_det) + p5_det p3_det = detnet._upsample_add( p4_det, detnet.RCNN_latlayer3_det(c3_det)) p3_det = detnet.RCNN_smooth1_det(p3_det) p2_det = detnet._upsample_add( p3_det, detnet.RCNN_latlayer4_det(c2_det)) p2_det = detnet.RCNN_smooth2_det(p2_det) rpn_feature_maps_det = [p2_det, p3_det, p4_det, p5_det, p6_det] mrcnn_feature_maps_det = [p2_det, p3_det, p4_det, p5_det] rois_det, rpn_loss_cls_det, rpn_loss_bbox_det = self.RCNN_rpn( rpn_feature_maps_det, im_info, gt_boxes, num_boxes) if self.training: roi_data_det = self.RCNN_proposal_target( rois_det, gt_boxes, num_boxes) rois_det, rois_label_det, gt_assign_det, rois_target_det, rois_inside_ws_det, rois_outside_ws_det = roi_data_det rois_det = rois_det.view(-1, 5) rois_label_det = rois_label_det.view(-1).long() gt_assign_det = gt_assign_det.view(-1).long() pos_id_det = rois_label_det.nonzero().squeeze() gt_assign_pos_det = gt_assign_det[pos_id_det] rois_label_pos_det = rois_label_det[pos_id_det] rois_label_pos_ids_det = pos_id_det rois_pos_det = Variable(rois_det[pos_id_det]) rois_det = Variable(rois_det) rois_label_det = Variable(rois_label_det) rois_target_det = Variable( rois_target_det.view(-1, rois_target_det.size(2))) rois_inside_ws_det = Variable( rois_inside_ws_det.view(-1, rois_inside_ws_det.size(2))) rois_outside_ws_det = Variable( rois_outside_ws_det.view(-1, rois_outside_ws_det.size(2))) else: rois_label_det = None gt_assign_det = None rois_target_det = None rois_inside_ws_det = None rois_outside_ws_det = None rpn_loss_cls_det = 0 rpn_loss_bbox_det = 0 rois_det = rois_det.view(-1, 5) pos_id_det = torch.arange( 0, rois_det.size(0)).long().type_as(rois_det).long() rois_label_pos_ids_det = pos_id_det rois_pos_det = Variable(rois_det[pos_id_det]) rois_det = Variable(rois_det) feat_det = self._PyramidRoI_Feat(mrcnn_feature_maps_det, rois, im_info) if emsemble_detnet: pooled_feat_det = detnet._head_to_tail(feat_det) cls_score_det = self.RCNN_cls_score(pooled_feat_det) else: roi_pool_det = feat_det.view(feat_det.shape[0], -1) cls_score_det = model_vgg.fc(roi_pool_det) pooled_feat = self._head_to_tail(roi_pool_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.long().view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) # cls_prob = F.softmax(cls_score,dim=1) if Use_emsemble: if emsemble_detnet and emsemble_vgg: cls_score_liner = 0.5 * cls_score + 0.3 * cls_score_vgg + 0.2 * cls_score_det cls_score = model_vgg.fc_new(cls_score_liner) cls_prob = F.softmax(cls_score, dim=1) elif emsemble_vgg and not emsemble_detnet: cls_score_liner = cls_score + cls_score_vgg cls_score = model_vgg.fc_new(cls_score_liner) cls_prob = F.softmax(cls_score, dim=1) elif emsemble_detnet and not emsemble_vgg: cls_score_liner = cls_score + cls_score_det cls_score = detnet.fc_add(cls_score_liner) cls_prob = F.softmax(cls_score, dim=1) else: cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, dim=1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # loss (cross entropy) for object classification Use_focal_loss = True Use_label_smoothing = False Use_Giou_loss = False if not Use_focal_loss: if Use_label_smoothing: # criteria = LabelSmoothSoftmaxCE(label_smoothing=0.1) criteria = LabelSmoothSoftmaxCE(lb_pos=0.9, lb_neg=5e-3) RCNN_loss_cls = criteria(cls_score, rois_label) else: RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) else: FL = FocalLoss(class_num=self.n_classes, alpha=1, gamma=2) RCNN_loss_cls = FL(cls_score, rois_label) RCNN_loss_cls = RCNN_loss_cls.type(torch.FloatTensor).cuda() # loss (l1-norm) for bounding box regression if Use_Giou_loss: rois1 = rois.view(batch_size, -1, rois.size(1)) boxes = rois1.data[:, :, 1:5] bbox_pred1 = bbox_pred.view(batch_size, -1, bbox_pred.size(1)) box_deltas = bbox_pred1.data # if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # # Optionally normalize targets by a precomputed mean and stdev # box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ # + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() # box_deltas = box_deltas.view(1, -1, 4 * len(self.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes /= im_info[0][2].cuda() # RCNN_loss_bbox = generalized_iou_loss(rois_target,bbox_pred) _, _, RCNN_loss_bbox = Giou_np(pred_boxes, boxes) else: RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) rois = rois.view(batch_size, -1, rois.size(1)) cls_prob = cls_prob.view(batch_size, -1, cls_prob.size(1)) bbox_pred = bbox_pred.view(batch_size, -1, bbox_pred.size(1)) if self.training: rois_label = rois_label.view(batch_size, -1) rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0) rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0) RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0) RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
args.session = checkpoint['session'] args.start_epoch = checkpoint['epoch'] fasterRCNN.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print("loaded checkpoint %s" % (load_name)) if args.mGPUs: fasterRCNN = nn.DataParallel(fasterRCNN) iters_per_epoch = int(10000 / args.batch_size) if args.ef: FL = EFocalLoss(class_num=2, gamma=args.gamma) else: FL = FocalLoss(class_num=2, gamma=args.gamma) if args.use_tfboard: from tensorboardX import SummaryWriter logger = SummaryWriter("logs") count_iter = 0 for epoch in range(args.start_epoch, args.max_epochs + 1): # setting to train mode fasterRCNN.train() loss_temp = 0 start = time.time() if epoch % (args.lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) lr *= args.lr_decay_gamma
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets, device, net, optimizer, num_workers, lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step, resume, load_name, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args = Args(dataset=dataset_source, dataset_t=dataset_target, imdb_name_target=[], cfg_file=cfg_file, net=net) args = set_dataset_args(args) args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net) args_val = set_dataset_args(args_val, test=True) logger = LoggerForSacred(None, ex, True) if cfg_file is not None: cfg_from_file(cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) output_dir = output_dir + "_{}".format(_run._id) if not os.path.exists(output_dir): os.makedirs(output_dir) dataloader_s, dataloader_t, imdb = init_dataloaders_1s_mixed_mt(args, batch_size, num_workers) val_dataloader_ts, val_imdb_ts = init_val_dataloaders_mt(args_val, 1, num_workers) session = 1 fasterRCNN, lr, optimizer, session, start_epoch, _ = init_htcn_model_optimizer(lr, LA_ATT, MID_ATT, class_agnostic, device, gc, imdb, lc, load_name, net, optimizer, resume, session, start_epoch) if torch.cuda.device_count() > 1: fasterRCNN = nn.DataParallel(fasterRCNN) iters_per_epoch = int(10000 / batch_size) if ef: FL = EFocalLoss(class_num=2, gamma=gamma) else: FL = FocalLoss(class_num=2, gamma=gamma) total_step = 0 for epoch in range(start_epoch, max_epochs + 1): # setting to train mode fasterRCNN.train() if epoch - 1 in lr_decay_step: adjust_learning_rate(optimizer, lr_decay_gamma) lr *= lr_decay_gamma total_step = frcnn_utils.train_htcn_one_epoch(args, FL, total_step, dataloader_s, dataloader_t, iters_per_epoch, fasterRCNN, optimizer, device, eta, logger) save_name = os.path.join(output_dir, 'target_{}_eta_{}_local_{}_global_{}_gamma_{}_session_{}_epoch_{}_total_step_{}.pth'.format( args.dataset_t, args.eta, lc, gc, gamma, session, epoch, total_step)) save_checkpoint({ 'session': session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict() if torch.cuda.device_count() > 1 else fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': class_agnostic, }, save_name) return 0