def load_client_dataset(imdb_list): dataloader_list = [] iter_epochs_list = [] for imdb_name in imdb_list: pkl_file = os.path.join(data_cache_path, imdb_name + '_gt_roidb.pkl') with open(pkl_file, 'rb') as f: roidb = pickle.load(f) roidb = filter_roidb(roidb) ratio_list, ratio_index = rank_roidb_ratio(roidb) train_size = len(roidb) print(train_size) iters_per_epoch = int(train_size / args.batch_size) print('iters_per_epoch: ' + str(iters_per_epoch)) iter_epochs_list.append(iters_per_epoch) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, imdb_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler_batch, num_workers=args.num_workers) dataloader_list.append(dataloader) return dataloader_list, iter_epochs_list
phase=args.phase) metaclass = metadataset.metaclass metaloader = torch.utils.data.DataLoader(metadataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) # filter out training samples from novel categories according to shot number print('\n before class filtering, there are %d images...' % (len(roidb))) if args.dataset != "pascal_voc_0712" and args.phase == 1: roidb = filter_class_roidb_flip(roidb, 0, imdb, base_num) ratio_list, ratio_index = rank_roidb_ratio(roidb) imdb.set_roidb(roidb) # filter roidb for the second phase if args.phase == 2: roidb = filter_class_roidb_flip(roidb, args.shots, imdb, base_num) ratio_list, ratio_index = rank_roidb_ratio(roidb) imdb.set_roidb(roidb) print('after class filtering, there are %d images...\n' % (len(roidb))) train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) sys.stdout.flush() output_dir = args.save_dir if not os.path.exists(output_dir):
args = parse_args() np.random.seed(cfg.RNG_SEED) args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.TRAIN.USE_FLIPPED = False test_img_set = TestFolder(args, args.root_path, args.seed, flip=True) test_ratio_list, test_ratio_index = rank_roidb_ratio(test_img_set) print('{:d} roidb entries'.format(len(test_img_set))) classes = ('__background__', 'Car') # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(('__background__', 'Car'), pretrained=True, class_agnostic=args.class_agnostic, is_deconv=True) elif args.net == 'res101': fasterRCNN = resnet(classes, 101, pretrained=False, class_agnostic=args.class_agnostic)
for a in range(len(lines)): save_coco_unflip['image'] ="/home/user/JISOO/R-FCN.pytorch-master/data/coco/images/" + lines[a][:-1] save_coco_flip['image'] ="/home/user/JISOO/R-FCN.pytorch-master/data/coco/images/" + lines[a][:-1] img = cv2.imread(save_coco_unflip['image']) height, width, channels = img.shape save_coco_unflip['width'] = width save_coco_unflip['height'] = height coco_roidb.append(save_coco_unflip) coco_flip_roidb.append(save_coco_flip) coco_roidb = coco_roidb + coco_flip_roidb unlabel_roidb = unlabel_roidb + coco_roidb unlabel_ratio_list, unlabel_ratio_index = rank_roidb_ratio(unlabel_roidb) print('{:d} roidb entries'.format(len(roidb))) print('{:d} roidb entries'.format(len(unlabel_roidb))) # print('{:d} roidb entries'.format(len(coco_unlabel_roidb))) output_dir = os.path.join(args.save_dir, args.arch, args.net, args.dataset) if not os.path.exists(output_dir): os.makedirs(output_dir) supervised_batch_size = 1 unsupervised_batch_size = args.batch_size - supervised_batch_size
def bld_train(args, ann_path=None, step=0): # print('Train from annotaion {}'.format(ann_path)) # print('Called with args:') # print(args) if args.use_tfboard: from model.utils.logger import Logger # Set the logger logger = Logger( os.path.join('./.logs', args.active_method, "/activestep" + str(step))) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20' ] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20' ] elif args.dataset == "coco": args.imdb_name = "coco_2014_train" args.imdbval_name = "coco_2014_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50' ] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30' ] elif args.dataset == "vg": # train sizes: train, smalltrain, minitrain # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20'] args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50' ] elif args.dataset == "voc_coco": args.imdb_name = "voc_coco_2007_train+voc_coco_2007_val" args.imdbval_name = "voc_coco_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20' ] else: raise NotImplementedError args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # print('Using config:') # pprint.pprint(cfg) # np.random.seed(cfg.RNG_SEED) # torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set = source set + target set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda # source train set, fully labeled #ann_path_source = os.path.join(ann_path, 'voc_coco_2007_train_f.json') #ann_path_target = os.path.join(ann_path, 'voc_coco_2007_train_l.json') imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdb_name, ann_path=os.path.join(ann_path, 'source')) imdb_tg, roidb_tg, ratio_list_tg, ratio_index_tg = combined_roidb( args.imdb_name, ann_path=os.path.join(ann_path, 'target')) print('{:d} roidb entries for source set'.format(len(roidb))) print('{:d} roidb entries for target set'.format(len(roidb_tg))) output_dir = args.save_dir + "/" + args.net + "/" + args.dataset + "/" + args.active_method + "/activestep" + str( step) if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch_tg = None # do not sample target set bs_tg = 4 dataset_tg = roibatchLoader(roidb_tg, ratio_list_tg, ratio_index_tg, bs_tg, \ imdb_tg.num_classes, training=True) assert imdb.num_classes == imdb_tg.num_classes dataloader_tg = torch.utils.data.DataLoader(dataset_tg, batch_size=bs_tg, sampler=sampler_batch_tg, num_workers=args.num_workers, worker_init_fn=_rand_fn()) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) image_label = torch.FloatTensor(1) confidence = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() image_label = image_label.cuda() confidence = confidence.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) image_label = Variable(image_label) confidence = Variable(confidence) if args.cuda: cfg.CUDA = True # initialize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") raise NotImplementedError # initialize the expectation network. if args.net == 'vgg16': fasterRCNN_val = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN_val = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN_val = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN_val = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") raise NotImplementedError fasterRCNN.create_architecture() fasterRCNN_val.create_architecture() # lr = cfg.TRAIN.LEARNING_RATE lr = args.lr # tr_momentum = cfg.TRAIN.MOMENTUM # tr_momentum = args.momentum params = [] for key, value in dict(fasterRCNN.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY }] if args.optimizer == "adam": lr = lr * 0.1 optimizer = torch.optim.Adam(params) elif args.optimizer == "sgd": optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) else: raise NotImplementedError if args.resume: load_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) args.session = checkpoint['session'] args.start_epoch = checkpoint['epoch'] fasterRCNN.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print("loaded checkpoint %s" % (load_name)) # expectation model print("load checkpoint for expectation model: %s" % args.model_path) checkpoint = torch.load(args.model_path) fasterRCNN_val.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] fasterRCNN_val = fasterRCNN_val fasterRCNN_val.eval() if args.mGPUs: fasterRCNN = nn.DataParallel(fasterRCNN) #fasterRCNN_val = nn.DataParallel(fasterRCNN_val) if args.cuda: fasterRCNN.cuda() fasterRCNN_val.cuda() # Evaluation # data_iter = iter(dataloader_tg) # for target_k in range( int(train_size_tg / args.batch_size)): fname = "noisy_annotations.pkl" if not os.path.isfile(fname): for batch_k, data in enumerate(dataloader_tg): im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) image_label.data.resize_(data[4].size()).copy_(data[4]) b_size = len(im_data) # expactation pass rois, cls_prob, bbox_pred, \ _, _, _, _, _ = fasterRCNN_val(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TRAIN.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(b_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() # print('DEBUG: Size of box_deltas is {}'.format(box_deltas.size()) ) box_deltas = box_deltas.view(b_size, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # TODO: data distalliation # Choose the confident samples for b_idx in range(b_size): # fill one confidence # confidence.data[b_idx, :] = 1 - (gt_boxes.data[b_idx, :, 4] == 0) # resize prediction pred_boxes[b_idx] /= data[1][b_idx][2] for j in xrange(1, imdb.num_classes): if image_label.data[b_idx, j] != 1: continue # next if no image label # filtering box outside of the image not_keep = (pred_boxes[b_idx][:, j * 4] == pred_boxes[b_idx][:, j * 4 + 2]) | \ (pred_boxes[b_idx][:, j * 4 + 1] == pred_boxes[b_idx][:, j * 4 + 3]) keep = torch.nonzero(not_keep == 0).view(-1) # decease the number of pgts thresh = 0.5 while torch.nonzero( scores[b_idx, :, j][keep] > thresh).view(-1).numel() <= 0: thresh = thresh * 0.5 inds = torch.nonzero( scores[b_idx, :, j][keep] > thresh).view(-1) # if there is no det, error if inds.numel() <= 0: print('Warning!!!!!!! It should not appear!!') continue # find missing ID missing_list = np.where(gt_boxes.data[b_idx, :, 4] == 0)[0] if (len(missing_list) == 0): continue missing_id = missing_list[0] cls_scores = scores[b_idx, :, j][keep][inds] cls_boxes = pred_boxes[b_idx][keep][inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) keep = nms(cls_dets, 0.2) # Magic number ???? keep = keep.view(-1).tolist() sys.stdout.write( 'from {} predictions choose-> min({},4) as pseudo label \r' .format(len(cls_scores), len(keep))) sys.stdout.flush() _, order = torch.sort(cls_scores[keep], 0, True) if len(keep) == 0: continue max_keep = 4 for pgt_k in range(max_keep): if len(order) <= pgt_k: break if missing_id + pgt_k >= 20: break gt_boxes.data[b_idx, missing_id + pgt_k, :4] = cls_boxes[keep][order[ len(order) - 1 - pgt_k]] gt_boxes.data[b_idx, missing_id + pgt_k, 4] = j # class #confidence[b_idx, missing_id + pgt_k] = cls_scores[keep][order[len(order) - 1 - pgt_k]] num_boxes[b_idx] = num_boxes[b_idx] + 1 sample = roidb_tg[dataset_tg.ratio_index[batch_k * bs_tg + b_idx]] pgt_boxes = np.array([ gt_boxes[b_idx, x, :4].cpu().data.numpy() for x in range(int(num_boxes[b_idx])) ]) pgt_classes = np.array([ gt_boxes[b_idx, x, 4].cpu().data[0] for x in range(int(num_boxes[b_idx])) ]) sample["boxes"] = pgt_boxes sample["gt_classes"] = pgt_classes # DEBUG assert np.array_equal(sample["label"],image_label[b_idx].cpu().data.numpy()), \ "Image labels are not equal! {} vs {}".format(sample["label"],image_label[b_idx].cpu().data.numpy()) #with open(fname, 'w') as f: # pickle.dump(roidb_tg, f) else: pass # with open(fname) as f: # Python 3: open(..., 'rb') # roidb_tg = pickle.load(f) print("-- Optimization Stage --") # Optimization print("######################################################l") roidb.extend(roidb_tg) # merge two datasets print('before filtering, there are %d images...' % (len(roidb))) i = 0 while i < len(roidb): if True: if len(roidb[i]['boxes']) == 0: del roidb[i] i -= 1 else: if len(roidb[i]['boxes']) == 0: del roidb[i] i -= 1 i += 1 print('after filtering, there are %d images...' % (len(roidb))) from roi_data_layer.roidb import rank_roidb_ratio ratio_list, ratio_index = rank_roidb_ratio(roidb) train_size = len(roidb) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler_batch, num_workers=args.num_workers, worker_init_fn=_rand_fn()) iters_per_epoch = int(train_size / args.batch_size) print("Training set size is {}".format(train_size)) for epoch in range(args.start_epoch, args.max_epochs + 1): fasterRCNN.train() loss_temp = 0 start = time.time() epoch_start = start # adjust learning rate if epoch % (args.lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) lr *= args.lr_decay_gamma # one step data_iter = iter(dataloader) for step in range(iters_per_epoch): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) image_label.data.resize_(data[4].size()).copy_(data[4]) #gt_boxes.data = \ # torch.cat((gt_boxes.data, torch.zeros(gt_boxes.size(0), gt_boxes.size(1), 1).cuda()), dim=2) conf_data = torch.zeros(gt_boxes.size(0), gt_boxes.size(1)).cuda() confidence.data.resize_(conf_data.size()).copy_(conf_data) fasterRCNN.zero_grad() # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence) rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence) loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \ + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean() loss_temp += loss.data[0] # backward optimizer.zero_grad() loss.backward() if args.net == "vgg16": clip_gradient(fasterRCNN, 10.) optimizer.step() if step % args.disp_interval == 0: end = time.time() if step > 0: loss_temp /= args.disp_interval if args.mGPUs: loss_rpn_cls = rpn_loss_cls.mean().data[0] loss_rpn_box = rpn_loss_box.mean().data[0] loss_rcnn_cls = RCNN_loss_cls.mean().data[0] loss_rcnn_box = RCNN_loss_bbox.mean().data[0] fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt else: loss_rpn_cls = rpn_loss_cls.data[0] loss_rpn_box = rpn_loss_box.data[0] loss_rcnn_cls = RCNN_loss_cls.data[0] loss_rcnn_box = RCNN_loss_bbox.data[0] fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \ % (args.session, epoch, step, iters_per_epoch, loss_temp, lr)) print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start)) print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \ % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box)) if args.use_tfboard: info = { 'loss': loss_temp, 'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_box': loss_rpn_box, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box } for tag, value in info.items(): logger.scalar_summary(tag, value, step) images = [] for k in range(args.batch_size): image = draw_bounding_boxes( im_data[k].data.cpu().numpy(), gt_boxes[k].data.cpu().numpy(), im_info[k].data.cpu().numpy(), num_boxes[k].data.cpu().numpy()) images.append(image) logger.image_summary("Train epoch %2d, iter %4d/%4d" % (epoch, step, iters_per_epoch), \ images, step) loss_temp = 0 start = time.time() if False: break if args.mGPUs: save_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint( { 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) else: save_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint( { 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) print('save model: {}'.format(save_name)) epoch_end = time.time() print('Epoch time cost: {}'.format(epoch_end - epoch_start)) print('finished!')