def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets, device, net, optimizer, num_workers, teacher_pth, student_pth, lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step, resume, load_name, imitation_loss_weight, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net) args_val = set_dataset_args(args_val, test=True) logger = LoggerForSacred(None, ex, False) if cfg_file is not None: cfg_from_file(cfg_file) if args_val.set_cfgs is not None: cfg_from_list(args_val.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) val_dataloader_ts, val_imdb_ts = init_frcnn_utils.init_val_dataloaders_mt( args_val, 1, num_workers) session = 1 teacher = init_frcnn_utils.init_model_only(device, "res101", htcn_resnet, val_imdb_ts[0], teacher_pth, class_agnostic=class_agnostic, lc=lc, gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT) fasterRCNN = init_frcnn_utils.init_model_only( device, "res50", htcn_resnet, val_imdb_ts[0], student_pth, class_agnostic=class_agnostic, lc=lc, gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT) fasterRCNN_2 = init_frcnn_utils.init_model_only( device, "res50", htcn_resnet, val_imdb_ts[0], student_pth, class_agnostic=class_agnostic, lc=lc, gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT) fasterRCNN.RCNN_rpn = teacher.RCNN_rpn if torch.cuda.device_count() > 1: fasterRCNN = nn.DataParallel(fasterRCNN) total_step = 0 best_ap = 0. if isinstance(val_datasets, list): avg_ap = 0 for i, val_dataloader_t in enumerate(val_dataloader_ts): map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t, fasterRCNN, device, val_imdb_ts[i]) logger.log_scalar( "student with teacher rpn map on {}".format(val_datasets[i]), map, 0) map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t, teacher, device, val_imdb_ts[i]) logger.log_scalar( "teacher original map on {}".format(val_datasets[i]), map, 0) teacher.RCNN_rpn = fasterRCNN_2.RCNN_rpn map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t, teacher, device, val_imdb_ts[i]) logger.log_scalar( "teacher with stu rpn map on {}".format(val_datasets[i]), map, 0)
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets, model_type, device, net, optimizer, num_workers, model_pth, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net) args_val = set_dataset_args(args_val, test=True) logger = LoggerForSacred(None, ex, True) if cfg_file is not None: cfg_from_file(cfg_file) if args_val.set_cfgs is not None: cfg_from_list(args_val.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) val_dataloader_ts, val_imdb_ts = init_frcnn_utils.init_val_dataloaders_mt(args_val, 1, num_workers) session = 1 backbone_fn = htcn_resnet if 'res' in net: if model_type == 'normal': backbone_fn = n_resnet elif model_type == 'saitp': backbone_fn = s_resnet else: if model_type == 'normal': backbone_fn = n_vgg16 elif model_type == 'htcn': backbone_fn = htcn_vgg16 elif model_type == 'saitp': backbone_fn = None model = init_frcnn_utils.init_model_only(device, net, backbone_fn, val_imdb_ts[0], model_pth, class_agnostic=class_agnostic, lc=lc, gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT) total_step = 0 best_ap = 0. avg_ap = 0. avg_ap_per_class = {} if isinstance(val_datasets, list): for i, val_dataloader_t in enumerate(val_dataloader_ts): map, ap_per_class = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t, model, device, val_imdb_ts[i], return_ap_class=True) logger.log_scalar(" map on {}".format(val_datasets[i]), map, 0) for cls, ap in ap_per_class.items(): if cls in avg_ap_per_class: avg_ap_per_class[cls] += ap else: avg_ap_per_class[cls] = ap avg_ap += map avg_ap /= len(val_dataloader_ts) for cls, ap in avg_ap_per_class.items(): ap /= len(val_dataloader_ts) logger.log_scalar(" map of class {}".format(cls), ap, 0) logger.log_scalar("avp map",avg_ap, 0) return avg_ap.item()
def get_ready(query_img_path): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) np.random.seed(cfg.RNG_SEED) if args.dataset == "coco": args.imdb_name = "coco_2017_train" args.imdbval_name = "coco_2017_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] # args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) args.cfg_file = "cfgs/{}_{}.yml".format( args.net, args.group) if args.group != 0 else "cfgs/{}.yml".format( args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False # imdb_vs, roidb_vs, ratio_list_vs, ratio_index_vs, query_vs = combined_roidb('coco_2014_valminusminival', False) imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb( args.imdbval_name, False, seen=args.seen) # imdb_vs.competition_mode(on=True) imdb_vu.competition_mode(on=True) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'res50': fasterRCNN = resnet(imdb_vu.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # initilize the tensor holder here. im_data = torch.FloatTensor(1) query = torch.FloatTensor(1) im_info = torch.FloatTensor(1) catgory = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) if args.cuda: cfg.CUDA = True fasterRCNN.cuda() output_dir_vu = get_output_dir(imdb_vu, 'faster_rcnn_unseen') dataset_vu = roibatchLoader(roidb_vu, ratio_list_vu, ratio_index_vu, query_vu, 1, imdb_vu.num_classes, training=False, seen=args.seen) fasterRCNN.eval() avg = 0 dataset_vu.query_position = avg num_images_vu = len(imdb_vu.image_index) all_boxes = [[[] for _ in xrange(num_images_vu)] for _ in xrange(imdb_vu.num_classes)] _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir_vu, 'detections_%d_%d.pkl' % (args.seen, avg)) print(det_file) # make query data query_im = imread(query_img_path) query_im = cv2.resize(query_im, dsize=(640, 480), interpolation=cv2.INTER_LINEAR) _query_im = np.copy(query_im) query_im, query_im_scale = prep_im_for_blob(query_im, target_size=128) query_im = torch.tensor(query_im) query_im = torch.unsqueeze(query_im, 0) query_im = query_im.transpose(1, 3) query = query_im.transpose(2, 3) query = query.cuda() return fasterRCNN, all_boxes, query, _query_im
def extract_feature(): MIN_BOXES = 10 MAX_BOXES = 100 N_CLASSES = 1601 CONF_THRESH = 0.2 args = parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) os.makedirs(args.output_dir, exist_ok=True) use_cuda = torch.cuda.is_available() assert use_cuda, 'Works only with CUDA' device = torch.device('cuda') if use_cuda else torch.device('cpu') # device = torch.device('cpu') cfg.CUDA = use_cuda np.random.seed(cfg.RNG_SEED) # Load the model. fasterRCNN = resnet(N_CLASSES, 101, pretrained=False) fasterRCNN.create_architecture() fasterRCNN.load_state_dict(torch.load(args.model_file)) fasterRCNN.to(device) fasterRCNN.eval() print('Model is loaded.') # Load images. imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Number of images: {}.'.format(num_images)) # Extract features. for im_file in tqdm(imglist): im = cv2.imread(os.path.join(args.image_dir, im_file)) blobs, im_scales = get_image_blob(im) assert len(im_scales) == 1, 'Only single-image batch is implemented' im_data = torch.from_numpy(blobs).permute(0, 3, 1, 2).to(device) im_info = torch.tensor([[blobs.shape[1], blobs.shape[2], im_scales[0]]]).to(device) gt_boxes = torch.zeros(1, 1, 5).to(device) num_boxes = torch.zeros(1).to(device) with torch.set_grad_enabled(False): rois, cls_prob, _, _, _, _, _, _, \ pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) boxes = rois.data.cpu().numpy()[:, :, 1:5].squeeze() boxes /= im_scales[0] cls_prob = cls_prob.data.cpu().numpy().squeeze() pooled_feat = pooled_feat.data.cpu().numpy() # Keep only the best detections. max_conf = np.zeros((boxes.shape[0])) for cls_ind in range(1, cls_prob.shape[1]): cls_scores = cls_prob[:, cls_ind] dets = np.hstack( (boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(cpu_nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= CONF_THRESH)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] image_feat = pooled_feat[keep_boxes] if args.save_boxes: image_bboxes = boxes[keep_boxes] else: image_bboxes = None output_file = os.path.join(args.output_dir, im_file.split('.')[0] + '.npy') save_features(output_file, image_feat, image_bboxes)
def __init__(self): self.args = EasyDict() self.args.dataset = 'vrd' self.args.cfg_file = 'faster-rcnn/cfgs/vgg16.yml' self.args.net = 'vgg16' self.args.load_dir = 'models/faster_rcnn_1_20_7559.pth' self.args.cuda = True self.args.mGPUs = False self.args.class_agnostic = False self.args.parallel_type = 0 self.args.batch_size = 1 print(self.args) if self.args.cfg_file is not None: cfg_from_file(self.args.cfg_file) cfg.USE_GPU_NMS = self.args.cuda print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. load_name = self.args.load_dir with open('data/vrd/obj.txt') as f: self.vrd_classes = [ x.strip() for x in f.readlines() ] self.vrd_classes = ['__background__'] + self.vrd_classes self.fasterRCNN = vgg16(self.vrd_classes, pretrained=False, class_agnostic=self.args.class_agnostic) self.fasterRCNN.create_architecture() if self.args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) self.fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') print("load checkpoint %s" % (load_name)) # initilize the tensor holder here. self.im_data = torch.FloatTensor(1) self.im_info = torch.FloatTensor(1) self.num_boxes = torch.LongTensor(1) self.gt_boxes = torch.FloatTensor(1) # ship to cuda if self.args.cuda > 0: self.im_data = self.im_data.cuda() self.im_info = self.im_info.cuda() self.num_boxes = self.num_boxes.cuda() self.gt_boxes = self.gt_boxes.cuda() # make variable self.im_data = Variable(self.im_data, volatile=True) self.im_info = Variable(self.im_info, volatile=True) self.num_boxes = Variable(self.num_boxes, volatile=True) self.gt_boxes = Variable(self.gt_boxes, volatile=True) if self.args.cuda > 0: cfg.CUDA = True if self.args.cuda > 0: self.fasterRCNN.cuda() self.fasterRCNN.eval()
def load_model(args): # set cfg according to the dataset used to train the pre-trained model if args.dataset == "pascal_voc": args.set_cfgs = [ "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]" ] elif args.dataset == "pascal_voc_0712": args.set_cfgs = [ "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]" ] elif args.dataset == "coco": args.set_cfgs = [ "ANCHOR_SCALES", "[4, 8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]", ] elif args.dataset == "imagenet": args.set_cfgs = [ "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]" ] elif args.dataset == "vg": args.set_cfgs = [ "ANCHOR_SCALES", "[4, 8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]", ] if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.USE_GPU_NMS = args.cuda print("Using config:") pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # Load classes classes = ["__background__"] with open(os.path.join(args.classes_dir, "objects_vocab.txt")) as f: for object in f.readlines(): classes.append(object.split(",")[0].lower().strip()) if not os.path.exists(args.load_dir): raise Exception( "There is no input directory for loading network from " + args.load_dir) load_name = os.path.join( args.load_dir, "faster_rcnn_{}_{}.pth".format(args.net, args.dataset)) # initilize the network here. the network used to train the pre-trained model if args.net == "vgg16": fasterRCNN = vgg16(classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == "res101": fasterRCNN = resnet(classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == "res50": fasterRCNN = resnet(classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == "res152": fasterRCNN = resnet(classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(checkpoint["model"]) if "pooling_mode" in checkpoint.keys(): cfg.POOLING_MODE = checkpoint["pooling_mode"] print("load model successfully!") print("load model %s" % (load_name)) return classes, fasterRCNN
] elif args.dataset == "vg": # train sizes: train, smalltrain, minitrain # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20'] args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50' ] args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading.
def frcnn(train): args = parse_args() print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) from model.utils.config import cfg cfg.USE_GPU_NMS = args.cuda print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) pascal_classes = np.asarray([ '___background__', u'person', u'bicycle', u'car', u'motorcycle', u'airplane', u'bus', u'train', u'truck', u'boat', u'traffic light', u'fire hydrant', u'stop sign', u'parking meter', u'bench', u'bird', u'cat', u'dog', u'horse', u'sheep', u'cow', u'elephant', u'bear', u'zebra', u'giraffe', u'backpack', u'umbrella', u'handbag', u'tie', u'suitcase', u'frisbee', u'skis', u'snowboard', u'sports ball', u'kite', u'baseball bat', u'baseball glove', u'skateboard', u'surfboard', u'tennis racket', u'bottle', u'wine glass', u'cup', u'fork', u'knife', u'spoon', u'bowl', u'banana', u'apple', u'sandwich', u'orange', u'broccoli', u'carrot', u'hot dog', u'pizza', u'donut', u'cake', u'chair', u'couch', u'potted plant', u'bed', u'dining table', u'toilet', u'tv', u'laptop', u'mouse', u'remote', u'keyboard', u'cell phone', u'microwave', u'oven', u'toaster', u'sink', u'refrigerator', u'book', u'clock', u'vase', u'scissors', u'teddy bear', u'hair drier', u'toothbrush' ]) # initilize the network here. #args.imdb_name = "coco_2014_train+coco_2014_valminusminival" # imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) if args.net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # pdb.set_trace() print("load checkpoint %s" % (load_name)) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() thresh = 0.5 webcam_num = args.webcam_num imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) import json, re from tqdm import tqdm d = {} pbar = tqdm(imglist) if not train: for i in pbar: im_file = os.path.join(args.image_dir, i) # im = cv2.imread(im_file) im_name = i im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: #Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() lis = json.load( open( '/home/nesa320/huangshicheng/gitforwork/gsnn/graph/labels.json', 'r')) sm_lis = np.zeros(len(lis)) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) #cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] score = cls_dets[0][-1] try: sm_lis[lis.index(pascal_classes[j])] = score.numpy() except: pass d[re.sub("\D", "", im_name)] = sm_lis.tolist() json.dump(d, open('annotation_dict' + '.json', 'w'), indent=2) else: pass
args.set_cfgs = ['ANCHOR_SCALES', '[2,4,5,6,8,9,10,12,14,16]', 'NUM_CLASSES', args.num_classes] #args.set_cfgs = ['ANCHOR_SCALES', '[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56]', 'NUM_CLASSES', args.num_classes] elif args.dataset == "activitynet": args.imdb_name = "train_data_25fps_flipped.pkl" #_192.pkl" args.imdbval_name = "val_data_25fps.pkl" args.num_classes = 201 #args.set_cfgs = ['ANCHOR_SCALES', '[1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64]', 'NUM_CLASSES', args.num_classes] / stride args.set_cfgs = ['ANCHOR_SCALES', '[1,1.25, 1.5,1.75, 2,2.5, 3,3.5, 4,4.5, 5,5.5, 6,7, 8,9,10,11,12,14,16,18,20,22,24,28,32,36,40,44,52,60,68,76,84,92,100]', 'NUM_CLASSES', args.num_classes] args.cfg_file = "cfgs/{}_{}.yml".format(args.net, args.dataset) cfg.CUDA = True cfg.USE_GPU_NMS = True if args.cfg_file is not None: cfg_from_file(args.cfg_file) #根据配置文件中的参数更新配置信息 if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) # for reproduce np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) if cfg.CUDA: torch.cuda.manual_seed_all(cfg.RNG_SEED) cudnn.benchmark = True #设置这个 flag 可以让内置的 cuDNN 的 auto-tuner 自动寻找最适合当前配置的高效算法,来达到优化运行效率的问题。 # train set
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets, device, net, optimizer, num_workers, lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step, resume, load_name, pretrained, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args = Args(dataset=dataset_source, dataset_t=dataset_target, imdb_name_target=[], cfg_file=cfg_file, net=net) args = set_dataset_args(args) args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net) args_val = set_dataset_args(args_val, test=True) is_bgr = False if net in ['res101', 'res50', 'res152', 'vgg16']: is_bgr = True logger = LoggerForSacred(None, ex, True) if cfg_file is not None: cfg_from_file(cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) output_dir = output_dir + "_{}".format(_run._id) if not os.path.exists(output_dir): os.makedirs(output_dir) dataloader_s, m_dataloader_t, imdb, m_imdb_t = init_dataloaders_1s_mt(args, batch_size, num_workers, is_bgr) val_dataloader_ts, val_imdb_ts = init_val_dataloaders_mt(args_val, 1, num_workers, is_bgr) session = 1 fasterRCNN = init_htcn_model(LA_ATT, MID_ATT, class_agnostic, device, gc, imdb, lc, load_name, net, pretrained=pretrained) #fasterRCNN.re_init_da_layers(device) lr, optimizer, session, start_epoch = init_optimizer(lr, fasterRCNN, optimizer, resume, load_name, session, start_epoch, is_all_params=True) # _, optimizer_unsup, _, _ = init_optimizer(lr, fasterRCNN, optimizer, resume, load_name, session, # start_epoch, is_all_params=True) if torch.cuda.device_count() > 1: fasterRCNN = nn.DataParallel(fasterRCNN) iters_per_epoch = int(10000 / batch_size) if ef: FL = EFocalLoss(class_num=2, gamma=gamma) else: FL = FocalLoss(class_num=2, gamma=gamma) total_step = 0 if resume: total_step = (start_epoch - 1) * 10000 for epoch in range(start_epoch, max_epochs + 1): # setting to train mode fasterRCNN.train() if epoch - 1 in lr_decay_step: adjust_learning_rate(optimizer, lr_decay_gamma) lr *= lr_decay_gamma total_step = inc_frcnn_utils.train_htcn_one_epoch_inc_union(args, FL, total_step, dataloader_s, m_dataloader_t, iters_per_epoch, fasterRCNN, optimizer, device, eta, logger) save_name = os.path.join(output_dir, 'target_{}_eta_{}_local_{}_global_{}_gamma_{}_session_{}_epoch_{}_total_step_{}.pth'.format( args.dataset_t, args.eta, lc, gc, gamma, session, epoch, total_step)) save_checkpoint({ 'session': session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict() if torch.cuda.device_count() > 1 else fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': class_agnostic, }, save_name) return 0
def load(self, path, use_cuda): # define options path_model_detector = os.path.join( path, 'fpn101_1_10_9999.pth' ) # model for detector (food, tableware, drink) dataset = 'CloudStatus_val' imdb_name2 = 'CloudTableThings_val' total_imdb_name = 'CloudStatusTableThings_val' load_name = path_model_detector self.use_share_regress = True self.use_progress = True net = 'resnet101' self.cuda = use_cuda self.class_agnostic = False self.att_type = 'None' self.vis = True # generate debug images # Load food classifier # possible dbname='FoodX251', 'Food101', 'Kfood' # possible eval_crop_type='CenterCrop', 'TenCrop' self.food_classifier = FoodClassifier(net='senet154', dbname='Kfood', eval_crop_type='CenterCrop', ck_file_folder=path, use_cuda=use_cuda, pretrained=False) cfg_file = os.path.join(path, '{}_ls.yml'.format(net)) set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30' ] if cfg_file is not None: cfg_from_file(cfg_file) if set_cfgs is not None: cfg_from_list(set_cfgs) USE_GPU_NMS = self.cuda print('Using config:') pprint.pprint(cfg) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. input_dir = load_name if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) self.list_box_color = [(0, 0, 0), (0, 0, 200), (0, 200, 0), (200, 200, 0), (200, 0, 200), (0, 200, 200), (200, 0, 200)] self.classes0 = self.get_class_list(dataset) self.classes1 = self.get_class_list(imdb_name2) self.classes_total = self.get_class_list(total_imdb_name) from model.fpn.resnet_multi_CBAM import resnet self.fasterRCNN = resnet(self.classes0, self.classes1, use_pretrained=False, num_layers=101, class_agnostic=self.class_agnostic, use_share_regress=self.use_share_regress, use_progress=self.use_progress, att_type=self.att_type) self.fasterRCNN.create_architecture() print("loading checkpoint %s..." % (load_name)) if self.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load( load_name, map_location=(lambda storage, loc: storage)) self.fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('succeeded') # initilize the tensor holder here. self.im_data = torch.FloatTensor(1) self.im_info = torch.FloatTensor(1) self.num_boxes = torch.LongTensor(1) self.gt_boxes = torch.FloatTensor(1) # ship to cuda if self.cuda > 0: self.im_data = self.im_data.cuda() self.im_info = self.im_info.cuda() self.num_boxes = self.num_boxes.cuda() self.gt_boxes = self.gt_boxes.cuda() # make variable with torch.no_grad(): self.im_data = Variable(self.im_data) self.im_info = Variable(self.im_info) self.num_boxes = Variable(self.num_boxes) self.gt_boxes = Variable(self.gt_boxes) if self.cuda > 0: cfg.CUDA = True if self.cuda > 0: self.fasterRCNN.cuda() self.fasterRCNN.eval() print('- models loaded from {}'.format(path))
def prep_model(input_dir): args = parse_args() print('Called with args:') print(args) args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.USE_GPU_NMS = 1 print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir) # pascal_classes = np.asarray(['__background__', # 'aeroplane', 'bicycle', 'bird', 'boat', # 'bottle', 'bus', 'car', 'cat', 'chair', # 'cow', 'diningtable', 'dog', 'horse', # 'motorbike', 'person', 'pottedplant', # 'sheep', 'sofa', 'train', 'tvmonitor']) pascal_classes = np.asarray([ '__background__', "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ]) # initilize the network here. fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) # fasterRCNN.load_state_dict(checkpoint) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') return fasterRCNN
def bld_train(args, ann_path=None, step=0): # print('Train from annotaion {}'.format(ann_path)) # print('Called with args:') # print(args) if args.use_tfboard: from model.utils.logger import Logger # Set the logger logger = Logger( os.path.join('./.logs', args.active_method, "/activestep" + str(step))) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20' ] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20' ] elif args.dataset == "coco": args.imdb_name = "coco_2014_train" args.imdbval_name = "coco_2014_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50' ] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30' ] elif args.dataset == "vg": # train sizes: train, smalltrain, minitrain # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20'] args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50' ] elif args.dataset == "voc_coco": args.imdb_name = "voc_coco_2007_train+voc_coco_2007_val" args.imdbval_name = "voc_coco_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20' ] else: raise NotImplementedError args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # print('Using config:') # pprint.pprint(cfg) # np.random.seed(cfg.RNG_SEED) # torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set = source set + target set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda # source train set, fully labeled #ann_path_source = os.path.join(ann_path, 'voc_coco_2007_train_f.json') #ann_path_target = os.path.join(ann_path, 'voc_coco_2007_train_l.json') imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdb_name, ann_path=os.path.join(ann_path, 'source')) imdb_tg, roidb_tg, ratio_list_tg, ratio_index_tg = combined_roidb( args.imdb_name, ann_path=os.path.join(ann_path, 'target')) print('{:d} roidb entries for source set'.format(len(roidb))) print('{:d} roidb entries for target set'.format(len(roidb_tg))) output_dir = args.save_dir + "/" + args.net + "/" + args.dataset + "/" + args.active_method + "/activestep" + str( step) if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch_tg = None # do not sample target set bs_tg = 4 dataset_tg = roibatchLoader(roidb_tg, ratio_list_tg, ratio_index_tg, bs_tg, \ imdb_tg.num_classes, training=True) assert imdb.num_classes == imdb_tg.num_classes dataloader_tg = torch.utils.data.DataLoader(dataset_tg, batch_size=bs_tg, sampler=sampler_batch_tg, num_workers=args.num_workers, worker_init_fn=_rand_fn()) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) image_label = torch.FloatTensor(1) confidence = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() image_label = image_label.cuda() confidence = confidence.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) image_label = Variable(image_label) confidence = Variable(confidence) if args.cuda: cfg.CUDA = True # initialize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") raise NotImplementedError # initialize the expectation network. if args.net == 'vgg16': fasterRCNN_val = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN_val = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN_val = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN_val = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") raise NotImplementedError fasterRCNN.create_architecture() fasterRCNN_val.create_architecture() # lr = cfg.TRAIN.LEARNING_RATE lr = args.lr # tr_momentum = cfg.TRAIN.MOMENTUM # tr_momentum = args.momentum params = [] for key, value in dict(fasterRCNN.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY }] if args.optimizer == "adam": lr = lr * 0.1 optimizer = torch.optim.Adam(params) elif args.optimizer == "sgd": optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) else: raise NotImplementedError if args.resume: load_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) args.session = checkpoint['session'] args.start_epoch = checkpoint['epoch'] fasterRCNN.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print("loaded checkpoint %s" % (load_name)) # expectation model print("load checkpoint for expectation model: %s" % args.model_path) checkpoint = torch.load(args.model_path) fasterRCNN_val.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] fasterRCNN_val = fasterRCNN_val fasterRCNN_val.eval() if args.mGPUs: fasterRCNN = nn.DataParallel(fasterRCNN) #fasterRCNN_val = nn.DataParallel(fasterRCNN_val) if args.cuda: fasterRCNN.cuda() fasterRCNN_val.cuda() # Evaluation # data_iter = iter(dataloader_tg) # for target_k in range( int(train_size_tg / args.batch_size)): fname = "noisy_annotations.pkl" if not os.path.isfile(fname): for batch_k, data in enumerate(dataloader_tg): im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) image_label.data.resize_(data[4].size()).copy_(data[4]) b_size = len(im_data) # expactation pass rois, cls_prob, bbox_pred, \ _, _, _, _, _ = fasterRCNN_val(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TRAIN.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(b_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() # print('DEBUG: Size of box_deltas is {}'.format(box_deltas.size()) ) box_deltas = box_deltas.view(b_size, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # TODO: data distalliation # Choose the confident samples for b_idx in range(b_size): # fill one confidence # confidence.data[b_idx, :] = 1 - (gt_boxes.data[b_idx, :, 4] == 0) # resize prediction pred_boxes[b_idx] /= data[1][b_idx][2] for j in xrange(1, imdb.num_classes): if image_label.data[b_idx, j] != 1: continue # next if no image label # filtering box outside of the image not_keep = (pred_boxes[b_idx][:, j * 4] == pred_boxes[b_idx][:, j * 4 + 2]) | \ (pred_boxes[b_idx][:, j * 4 + 1] == pred_boxes[b_idx][:, j * 4 + 3]) keep = torch.nonzero(not_keep == 0).view(-1) # decease the number of pgts thresh = 0.5 while torch.nonzero( scores[b_idx, :, j][keep] > thresh).view(-1).numel() <= 0: thresh = thresh * 0.5 inds = torch.nonzero( scores[b_idx, :, j][keep] > thresh).view(-1) # if there is no det, error if inds.numel() <= 0: print('Warning!!!!!!! It should not appear!!') continue # find missing ID missing_list = np.where(gt_boxes.data[b_idx, :, 4] == 0)[0] if (len(missing_list) == 0): continue missing_id = missing_list[0] cls_scores = scores[b_idx, :, j][keep][inds] cls_boxes = pred_boxes[b_idx][keep][inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) keep = nms(cls_dets, 0.2) # Magic number ???? keep = keep.view(-1).tolist() sys.stdout.write( 'from {} predictions choose-> min({},4) as pseudo label \r' .format(len(cls_scores), len(keep))) sys.stdout.flush() _, order = torch.sort(cls_scores[keep], 0, True) if len(keep) == 0: continue max_keep = 4 for pgt_k in range(max_keep): if len(order) <= pgt_k: break if missing_id + pgt_k >= 20: break gt_boxes.data[b_idx, missing_id + pgt_k, :4] = cls_boxes[keep][order[ len(order) - 1 - pgt_k]] gt_boxes.data[b_idx, missing_id + pgt_k, 4] = j # class #confidence[b_idx, missing_id + pgt_k] = cls_scores[keep][order[len(order) - 1 - pgt_k]] num_boxes[b_idx] = num_boxes[b_idx] + 1 sample = roidb_tg[dataset_tg.ratio_index[batch_k * bs_tg + b_idx]] pgt_boxes = np.array([ gt_boxes[b_idx, x, :4].cpu().data.numpy() for x in range(int(num_boxes[b_idx])) ]) pgt_classes = np.array([ gt_boxes[b_idx, x, 4].cpu().data[0] for x in range(int(num_boxes[b_idx])) ]) sample["boxes"] = pgt_boxes sample["gt_classes"] = pgt_classes # DEBUG assert np.array_equal(sample["label"],image_label[b_idx].cpu().data.numpy()), \ "Image labels are not equal! {} vs {}".format(sample["label"],image_label[b_idx].cpu().data.numpy()) #with open(fname, 'w') as f: # pickle.dump(roidb_tg, f) else: pass # with open(fname) as f: # Python 3: open(..., 'rb') # roidb_tg = pickle.load(f) print("-- Optimization Stage --") # Optimization print("######################################################l") roidb.extend(roidb_tg) # merge two datasets print('before filtering, there are %d images...' % (len(roidb))) i = 0 while i < len(roidb): if True: if len(roidb[i]['boxes']) == 0: del roidb[i] i -= 1 else: if len(roidb[i]['boxes']) == 0: del roidb[i] i -= 1 i += 1 print('after filtering, there are %d images...' % (len(roidb))) from roi_data_layer.roidb import rank_roidb_ratio ratio_list, ratio_index = rank_roidb_ratio(roidb) train_size = len(roidb) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler_batch, num_workers=args.num_workers, worker_init_fn=_rand_fn()) iters_per_epoch = int(train_size / args.batch_size) print("Training set size is {}".format(train_size)) for epoch in range(args.start_epoch, args.max_epochs + 1): fasterRCNN.train() loss_temp = 0 start = time.time() epoch_start = start # adjust learning rate if epoch % (args.lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) lr *= args.lr_decay_gamma # one step data_iter = iter(dataloader) for step in range(iters_per_epoch): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) image_label.data.resize_(data[4].size()).copy_(data[4]) #gt_boxes.data = \ # torch.cat((gt_boxes.data, torch.zeros(gt_boxes.size(0), gt_boxes.size(1), 1).cuda()), dim=2) conf_data = torch.zeros(gt_boxes.size(0), gt_boxes.size(1)).cuda() confidence.data.resize_(conf_data.size()).copy_(conf_data) fasterRCNN.zero_grad() # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence) rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence) loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \ + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean() loss_temp += loss.data[0] # backward optimizer.zero_grad() loss.backward() if args.net == "vgg16": clip_gradient(fasterRCNN, 10.) optimizer.step() if step % args.disp_interval == 0: end = time.time() if step > 0: loss_temp /= args.disp_interval if args.mGPUs: loss_rpn_cls = rpn_loss_cls.mean().data[0] loss_rpn_box = rpn_loss_box.mean().data[0] loss_rcnn_cls = RCNN_loss_cls.mean().data[0] loss_rcnn_box = RCNN_loss_bbox.mean().data[0] fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt else: loss_rpn_cls = rpn_loss_cls.data[0] loss_rpn_box = rpn_loss_box.data[0] loss_rcnn_cls = RCNN_loss_cls.data[0] loss_rcnn_box = RCNN_loss_bbox.data[0] fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \ % (args.session, epoch, step, iters_per_epoch, loss_temp, lr)) print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start)) print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \ % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box)) if args.use_tfboard: info = { 'loss': loss_temp, 'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_box': loss_rpn_box, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box } for tag, value in info.items(): logger.scalar_summary(tag, value, step) images = [] for k in range(args.batch_size): image = draw_bounding_boxes( im_data[k].data.cpu().numpy(), gt_boxes[k].data.cpu().numpy(), im_info[k].data.cpu().numpy(), num_boxes[k].data.cpu().numpy()) images.append(image) logger.image_summary("Train epoch %2d, iter %4d/%4d" % (epoch, step, iters_per_epoch), \ images, step) loss_temp = 0 start = time.time() if False: break if args.mGPUs: save_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint( { 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) else: save_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint( { 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) print('save model: {}'.format(save_name)) epoch_end = time.time() print('Epoch time cost: {}'.format(epoch_end - epoch_start)) print('finished!')
def __init__(self): self.cuda = True cfg.USE_GPU_NMS = self.cuda cfg_file = 'cfgs/vgg16.yml' cfg_from_file(cfg_file) dataset = 'grasp' net = 'vgg16' load_dir = './models' self.class_agnostic = True checksession = 1 checkepoch = 5 checkpoint = 899 input_dir = load_dir + "/" + net + "/" + dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(checksession, checkepoch, checkpoint)) self.grasp_classes = np.asarray( ['__background__', 'bolt', 'hammer', 'scissors', 'tape']) self.grasp_poses = np.asarray(['__background__', # always index 0 'bin01', 'bin02', 'bin03', 'bin04', 'bin05', 'bin06', \ 'bin07', 'bin08', 'bin09', 'bin10', 'bin11', 'bin12', \ 'bin13', 'bin14', 'bin15', 'bin16', 'bin17', 'bin18', \ 'binAll']) # initilize the network here. self.fasterRCNN = vgg16(self.grasp_classes, self.grasp_poses, pretrained=False, class_agnostic=self.class_agnostic) self.fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if self.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load( load_name, map_location=(lambda storage, loc: storage)) self.fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # pdb.set_trace() print("load checkpoint %s" % (load_name)) with torch.no_grad(): # initilize the tensor holder here. self.im_data = torch.FloatTensor(1) self.im_info = torch.FloatTensor(1) self.num_boxes = torch.LongTensor(1) self.gt_boxes = torch.FloatTensor(1) # ship to cuda if self.cuda > 0: self.im_data = self.im_data.cuda() self.im_info = self.im_info.cuda() self.num_boxes = self.num_boxes.cuda() self.gt_boxes = self.gt_boxes.cuda() cfg.CUDA = self.cuda self.fasterRCNN.cuda() self.fasterRCNN.eval() self.thresh = 0.05 self.vis = True
def train(dataset="kaggle_pna", train_ds="train", arch="couplenet", net="res152", start_epoch=1, max_epochs=20, disp_interval=100, save_dir="save", num_workers=4, cuda=True, large_scale=False, mGPUs=True, batch_size=4, class_agnostic=False, anchor_scales=4, optimizer="sgd", lr_decay_step=10, lr_decay_gamma=.1, session=1, resume=False, checksession=1, checkepoch=1, checkpoint=0, use_tfboard=False, flip_prob=0.0, scale=0.0, scale_prob=0.0, translate=0.0, translate_prob=0.0, angle=0.0, dist="cont", rotate_prob=0.0, shear_factor=0.0, shear_prob=0.0, rpn_loss_cls_wt=1, rpn_loss_box_wt=1, RCNN_loss_cls_wt=1, RCNN_loss_bbox_wt=1, **kwargs): print("Train Arguments: {}".format(locals())) # Import network definition if arch == 'rcnn': from model.faster_rcnn.resnet import resnet elif arch == 'rfcn': from model.rfcn.resnet_atrous import resnet elif arch == 'couplenet': from model.couplenet.resnet_atrous import resnet from roi_data_layer.pnaRoiBatchLoader import roibatchLoader from roi_data_layer.pna_roidb import combined_roidb print('Called with kwargs:') print(kwargs) # Set up logger if use_tfboard: from model.utils.logger import Logger # Set the logger logger = Logger('./logs') # Anchor settings: ANCHOR_SCALES: [8, 16, 32] or [4, 8, 16, 32] if anchor_scales == 3: scales = [8, 16, 32] elif anchor_scales == 4: scales = [4, 8, 16, 32] # Dataset related settings: MAX_NUM_GT_BOXES: 20, 30, 50 if train_ds == "train": imdb_name = "pna_2018_train" elif train_ds == "trainval": imdb_name = "pna_2018_trainval" set_cfgs = [ 'ANCHOR_SCALES', str(scales), 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30' ] import model model_repo_path = os.path.dirname( os.path.dirname(os.path.dirname(model.__file__))) cfg_file = "cfgs/{}_ls.yml".format( net) if large_scale else "cfgs/{}.yml".format(net) if cfg_file is not None: cfg_from_file(os.path.join(model_repo_path, cfg_file)) if set_cfgs is not None: cfg_from_list(set_cfgs) train_kwargs = kwargs.pop("TRAIN", None) resnet_kwargs = kwargs.pop("RESNET", None) mobilenet_kwargs = kwargs.pop("MOBILENET", None) if train_kwargs is not None: for key, value in train_kwargs.items(): cfg["TRAIN"][key] = value if resnet_kwargs is not None: for key, value in resnet_kwargs.items(): cfg["RESNET"][key] = value if mobilenet_kwargs is not None: for key, value in mobilenet_kwargs.items(): cfg["MOBILENET"][key] = value if kwargs is not None: for key, value in kwargs.items(): cfg[key] = value print('Using config:') cfg.MODEL_DIR = os.path.abspath(cfg.MODEL_DIR) cfg.TRAIN_DATA_CLEAN_PATH = os.path.abspath(cfg.TRAIN_DATA_CLEAN_PATH) pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) print("LEARNING RATE: {}".format(cfg.TRAIN.LEARNING_RATE)) # Warning to use cuda if available if torch.cuda.is_available() and not cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # Train set # Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(imdb_name) train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) # output_dir = os.path.join(save_dir, arch, net, dataset) output_dir = cfg.MODEL_DIR if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch = sampler(train_size, batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, batch_size, imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=sampler_batch, num_workers=num_workers) # Initilize the tensor holder im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # Copy tensors in CUDA memory if cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # Make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if cuda: cfg.CUDA = True # Initilize the network: if net == 'vgg16': # model = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) print("Pretrained model is not downloaded and network is not used") elif net == 'res18': model = resnet(imdb.classes, 18, pretrained=False, class_agnostic=class_agnostic) # TODO: Check dim error elif net == 'res34': model = resnet(imdb.classes, 34, pretrained=False, class_agnostic=class_agnostic) # TODO: Check dim error elif net == 'res50': model = resnet(imdb.classes, 50, pretrained=False, class_agnostic=class_agnostic) # TODO: Check dim error elif net == 'res101': model = resnet(imdb.classes, 101, pretrained=True, class_agnostic=class_agnostic) elif net == 'res152': model = resnet(imdb.classes, 152, pretrained=True, class_agnostic=class_agnostic) else: print("network is not defined") pdb.set_trace() # Create network architecture model.create_architecture() # Update model parameters lr = cfg.TRAIN.LEARNING_RATE # tr_momentum = cfg.TRAIN.MOMENTUM # tr_momentum = args.momentum params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY }] # Optimizer if optimizer == "adam": lr = lr * 0.1 optimizer = torch.optim.Adam(params) elif optimizer == "sgd": optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) # Resume training if resume: load_name = os.path.join( output_dir, '{}_{}_{}_{}.pth'.format(arch, checksession, checkepoch, checkpoint)) print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) session = checkpoint['session'] + 1 start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print("loaded checkpoint %s" % (load_name)) # Train on Multiple GPUS if mGPUs: model = nn.DataParallel(model) # Copy network to CUDA memroy if cuda: model.cuda() # Training loop iters_per_epoch = int(train_size / batch_size) sys.stdout.flush() for epoch in range(start_epoch, max_epochs + 1): # remove batch re-sizing for augmentation or adjust? dataset.resize_batch() # Set model to train mode model.train() loss_temp = 0 start = time.time() # Update learning rate as per decay step if epoch % (lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, lr_decay_gamma) lr *= lr_decay_gamma # Get batch data and train data_iter = iter(dataloader) for step in range(iters_per_epoch): sys.stdout.flush() data = next(data_iter) # Apply augmentations aug_img_tensors, aug_bbox_tensors = apply_augmentations( data[0], data[2], flip_prob=flip_prob, scale=scale, scale_prob=scale_prob, translate=translate, translate_prob=translate_prob, angle=angle, dist=dist, rotate_prob=rotate_prob, shear_factor=shear_factor, shear_prob=shear_prob) # im_data.data.resize_(data[0].size()).copy_(data[0]) im_data.data.resize_(aug_img_tensors.size()).copy_(aug_img_tensors) im_info.data.resize_(data[1].size()).copy_(data[1]) # gt_boxes.data.resize_(data[2].size()).copy_(data[2]) gt_boxes.data.resize_( aug_bbox_tensors.size()).copy_(aug_bbox_tensors) num_boxes.data.resize_(data[3].size()).copy_(data[3]) # Compute multi-task loss model.zero_grad() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = model(im_data, im_info, gt_boxes, num_boxes) loss = rpn_loss_cls_wt * rpn_loss_cls.mean() + rpn_loss_box_wt * rpn_loss_box.mean() + \ RCNN_loss_cls_wt * RCNN_loss_cls.mean() + RCNN_loss_bbox_wt * RCNN_loss_bbox.mean() loss_temp += loss.data[0] # Backward pass to compute gradients and update weights optimizer.zero_grad() loss.backward() if net == "vgg16": clip_gradient(model, 10.) optimizer.step() # Display training stats on terminal if step % disp_interval == 0: end = time.time() if step > 0: loss_temp /= disp_interval if mGPUs: batch_loss = loss.data[0] loss_rpn_cls = rpn_loss_cls.mean().data[0] loss_rpn_box = rpn_loss_box.mean().data[0] loss_rcnn_cls = RCNN_loss_cls.mean().data[0] loss_rcnn_box = RCNN_loss_bbox.mean().data[0] fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt else: batch_loss = loss.data[0] loss_rpn_cls = rpn_loss_cls.data[0] loss_rpn_box = rpn_loss_box.data[0] loss_rcnn_cls = RCNN_loss_cls.data[0] loss_rcnn_box = RCNN_loss_bbox.data[0] fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \ % (session, epoch, step, iters_per_epoch, loss_temp, lr)) print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start)) print("\t\t\t batch_loss: %.4f, rpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \ % (batch_loss, loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box)) if use_tfboard: info = { 'loss': loss_temp, 'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_box': loss_rpn_box, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box } for tag, value in info.items(): logger.scalar_summary(tag, value, step) loss_temp = 0 start = time.time() # Save model at checkpoints if mGPUs: save_name = os.path.join( output_dir, '{}_{}_{}_{}.pth'.format(arch, session, epoch, step)) save_checkpoint( { 'session': session, 'epoch': epoch + 1, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': class_agnostic, }, save_name) else: save_name = os.path.join( output_dir, '{}_{}_{}_{}.pth'.format(arch, session, epoch, step)) save_checkpoint( { 'session': session, 'epoch': epoch + 1, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': class_agnostic, }, save_name) print('save model: {}'.format(save_name)) end = time.time() delete_older_checkpoints( os.path.join(cfg.MODEL_DIR, "couplenet_{}_*.pth".format(i))) print("Run Time: ", end - start)
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets, device, net, optimizer, num_workers, lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step, mask_load_p, resume, load_name, pretrained, model_type, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args = Args(dataset=dataset_source, dataset_t=dataset_target, cfg_file=cfg_file, net=net) args = set_dataset_args(args) is_bgr = False if net in ['res101', 'res50', 'res152', 'vgg16']: is_bgr = True logger = LoggerForSacred(None, ex, False) if cfg_file is not None: cfg_from_file(cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) backbone_fn = htcn_resnet if 'res' in net: if model_type == 'normal': backbone_fn = n_resnet elif model_type == 'saitp': backbone_fn = s_resnet else: if model_type == 'normal': backbone_fn = n_vgg16 elif model_type == 'htcn': backbone_fn = htcn_vgg16 elif model_type == 'saitp': backbone_fn = None dataloader_s, dataloader_t, imdb, imdb_t = init_dataloaders_1s_1t( args, batch_size, num_workers, is_bgr, False) model = init_frcnn_utils.init_model_only(device, net, backbone_fn, imdb_t, '', class_agnostic=class_agnostic, lc=lc, gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT) model.eval() im_data = torch.randn(1, 3, 600, 1200).to(device) im_info = torch.FloatTensor([[600, 900, 2]]).to(device) gt_boxes = torch.zeros((1, 1, 5)).to(device) num_boxes = torch.zeros([1]).to(device) macs, params = profile(model, inputs=(im_data, im_info, gt_boxes, num_boxes)) macs, params = clever_format([macs, params], "%.3f") print("Model CFLOPS: {}".format(macs)) print("Model Cparams: {}".format(params)) random_mask = nn.Sequential( nn.Conv2d(3, 256, 1, stride=1, padding=0, bias=False), nn.ReLU(), nn.Conv2d(256, 3, 1)).to(device) macs, params = profile(random_mask, inputs=(im_data, )) macs, params = clever_format([macs, params], "%.3f") print("Mask CFLOPS: {}".format(macs)) print("Mask Cparams: {}".format(params)) iters_per_epoch = int(1000 / batch_size) data_iter_s = iter(dataloader_s) for step in range(1, iters_per_epoch + 1): try: data_s = next(data_iter_s) except: data_iter_s = iter(dataloader_s) data_s = next(data_iter_s) im_data = data_s[0].to(device) im_info = data_s[1].to(device) gt_boxes = data_s[2].to(device) num_boxes = data_s[3].to(device) pass
def load_model(args): if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.USE_GPU_NMS = args.cuda np.random.seed(cfg.RNG_SEED) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) global pascal_classes pascal_classes = np.asarray([ '__background__', # always index 0 'element in the Array-list', 'root node in the Binary-tree', 'internal node in the Binary-tree', 'leaf node in the Binary-tree', 'vertex in the Graph', 'process in the Deadlock', 'resource in the Deadlock', 'head element in the Queue', 'element in the Queue', 'tail element in the Queue', 'head node in the Queue', 'pointer in the Queue', 'node in the Non-binary-tree', 'node in the Network_topology', 'head element in the Linked_List', 'element in the Linked_List', 'tail element in the Linked_List', 'insert element in the Linked_List', 'head node in the Linked_List', 'arrow', 'edge', 'top element in the Stack', 'bottom element in the Stack', 'push element in the Stack', 'pop element in the Stack', 'internal element in the Stack', 'empty stack in the Stack', 'terminal in the Flowchart', 'process in the Flowchart', 'decision in the Flowchart', 'flowline in the Flowchart', 'document in the Flowchart', 'input in the Flowchart', 'output in the Flowchart', 'annotation in the Flowchart', 'database in the Flowchart', 'manual operation in the Flowchart', 'predefined process in the Flowchart', 'on-page connector in the Flowchart' ]) # initilize the network here. global fasterRCNN if args.net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() if args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!')
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, device, net, optimizer, num_workers, lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step, resume, load_name, pretrained, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run): args = Args(dataset=dataset_source, dataset_t=[], imdb_name_target=[], cfg_file=cfg_file, net=net) args = set_dataset_args(args) is_bgr = False if net in ['res101', 'res50', 'res152', 'vgg16']: is_bgr = True logger = LoggerForSacred(None, ex) if cfg_file is not None: cfg_from_file(cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = True if device == 'cuda' else False device = torch.device(device) load_id = re.findall("\d+", load_name)[0] output_dir = output_dir + "_{}".format(load_id) if not os.path.exists(output_dir): os.makedirs(output_dir) dataloader_s, _, imdb, _ = init_dataloaders_1s_mt(args, batch_size, num_workers, is_bgr) session = 1 fasterRCNN = init_htcn_model(LA_ATT, MID_ATT, class_agnostic, device, gc, imdb, lc, load_name, net, strict=False, pretrained=pretrained) dtm = nn.Sequential(nn.Conv2d(3, 256, 1, stride=1, padding=0, bias=False), nn.ReLU(), nn.Conv2d(256, 3, 1)) dtm.to(device) optimizer = torch.optim.SGD(dtm.parameters(), lr=lr, momentum=0.9) if torch.cuda.device_count() > 1: fasterRCNN = nn.DataParallel(fasterRCNN) iters_per_epoch = int(10000 / batch_size) if ef: FL = EFocalLoss(class_num=2, gamma=gamma) else: FL = FocalLoss(class_num=2, gamma=gamma) dtm_util.get_mask_for_target(args, FL, 0, dataloader_s, iters_per_epoch, fasterRCNN, dtm, optimizer, device, logger) find_id = re.findall("\d+", load_name) if len(find_id) == 0: find_id = 0 else: find_id = re.findall("\d+", load_name)[-1] torch.save( dtm, os.path.join(output_dir, 'dtm_target_cnn_{}_{}.p'.format(load_id, find_id))) return 0
args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50'] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30'] elif args.dataset == "vg": # train sizes: train, smalltrain, minitrain # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20'] args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50'] # .yml: yet another markup language 其中的冒号表示键值对 args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) # 做某些配置操作 if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) # 输出配置情况 np.random.seed(cfg.RNG_SEED) # (For reproducibility) ?? #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # train set # -- Note: Use validation set and disable the flipped to enable faster loading. 加速 # cfg: configuration
return parser.parse_args() if __name__ == '__main__': print(_init_paths.lib_path) args = parse_args() print('Called with args:') print(args) logger = SummaryWriter( os.path.join('logs', '{}_{}'.format(args.session, args.dataset))) args.imdb_name = "voc_{}_trainval".format(args.dataset) args.imdbval_name = "voc_{}_test".format(args.dataset) cfg_from_file("cfgs/{}{}.yml".format(args.net, "_ls" if args.large_scale else "")) if args.config_file: cfg_from_file(args.config_file) cfg_fix() print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) output_dir = os.path.join(args.save_dir, str(args.session), args.net, args.dataset) os.makedirs(output_dir, exist_ok=True) cfg_to_json = deepcopy(cfg) del cfg_to_json["PIXEL_MEANS"] json.dump(cfg_to_json,
args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50'] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30'] elif args.dataset == "vg": # train sizes: train, smalltrain, minitrain # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20'] args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50'] args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda
out.write(frame_array[i]) out.release() if __name__ == '__main__': imdb_name = "imagenet_vid_train" imdbval_name = "imagenet_vid_test" set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30' ] cfg_file = "cfgs/{}.yml".format(net) cfg_from_file(cfg_file) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. input_dir = load_dir + "/" + net + "/" + dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir,