input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) pascal_classes = np.asarray(['__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda > 0: checkpoint = torch.load(load_name)
imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) input_dir = args.load_dir if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, '/fldata/pytorch-model/faster_rcnn_vgg16_coco-jwy.pth') # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=False,
im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, imdb.super_classes, imdb.super_classes_range, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() lr = cfg.TRAIN.LEARNING_RATE lr = args.lr #tr_momentum = cfg.TRAIN.MOMENTUM
def main(args): if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.USE_GPU_NMS = args.cuda np.random.seed(cfg.RNG_SEED) pascal_classes = np.asarray(['__background__', 'targetobject', 'hand']) args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32, 64]', 'ANCHOR_RATIOS', '[0.5, 1, 2]' ] # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() raise Exception fasterRCNN.create_architecture() load_name = 'models/res101_handobj_100K/pascal_voc/faster_rcnn_1_8_132028.pth' print("load checkpoint %s" % (load_name)) if args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) box_info = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() with torch.no_grad(): if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() with torch.no_grad(): start = time.time() max_per_image = 100 thresh_hand = args.thresh_hand thresh_obj = args.thresh_obj vis = args.vis # print(f'thresh_hand = {thresh_hand}') # print(f'thnres_obj = {thresh_obj}') webcam_num = args.webcam_num # Set up webcam or get image directories if webcam_num >= 0: cap = cv2.VideoCapture(webcam_num) num_images = 0 else: print(f'image dir = {args.image_dir}') print(f'save dir = {args.save_dir}') imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) while (num_images >= 0): total_tic = time.time() if webcam_num == -1: num_images -= 1 # Get image from the webcam if webcam_num >= 0: if not cap.isOpened(): raise RuntimeError( "Webcam could not open. Please check connection.") ret, frame = cap.read() im_in = np.array(frame) # Load the demo image else: im_file = os.path.join(args.image_dir, imglist[num_images]) im_in = np.array(imread(im_file)) # resize # im_in = np.array(Image.fromarray(im_in).resize((640, 360))) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) feats_file_base = os.path.join( FEATS_PATH, os.path.splitext(os.path.basename(imglist[num_images]))[0]) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() box_info.resize_(1, 1, 5).zero_() # pdb.set_trace() det_tic = time.time() base_feats = fasterRCNN.forward_base_features( im_data, im_info, gt_boxes, num_boxes, box_info) #pooled_feats, pooled_feats_padded = fasterRCNN.forward_pooled_feats(im_data, im_info, gt_boxes, num_boxes, box_info) torch.save(base_feats, feats_file_base + '_base.pt') #torch.save(pooled_feats, feats_file_base + '_pooled.pt') #torch.save(pooled_feats_padded, feats_file_base + '_pooled_padded.pt') det_toc = time.time() detect_time = det_toc - det_tic print('Inference time: ', detect_time, 's | ', feats_file_base)
def test(args, model=None): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # Load dataset imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb( args.imdbval_name, False) imdb_vu.competition_mode(on=True) dataset_vu = roibatchLoader(roidb_vu, ratio_list_vu, ratio_index_vu, query_vu, 1, imdb_vu._classes, training=False) # initilize the network here. if not model: if args.net == 'vgg16': fasterRCNN = vgg16(imdb_vu.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb_vu.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb_vu.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb_vu.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") fasterRCNN.create_architecture() # Load checkpoint print("load checkpoint %s" % (args.weights)) checkpoint = torch.load(args.weights) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') else: # evaluate constructed model fasterRCNN = model # initialize the tensor holder here. im_data = torch.FloatTensor(1) query = torch.FloatTensor(1) im_info = torch.FloatTensor(1) catgory = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: cfg.CUDA = True fasterRCNN.cuda() im_data = im_data.cuda() query = query.cuda() im_info = im_info.cuda() catgory = catgory.cuda() gt_boxes = gt_boxes.cuda() # record time start = time.time() # visiualization vis = args.vis if hasattr(args, 'vis') else None if vis: thresh = 0.05 else: thresh = 0.0 max_per_image = 100 fasterRCNN.eval() dataset_vu.query_position = 0 test_scales = cfg.TEST.SCALES multiscale_iterators = [] for i_scale, test_scale in enumerate(test_scales): cur_dataloader_vu = torch.utils.data.DataLoader(dataset_vu, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) cur_data_iter_vu = iter(cur_dataloader_vu) multiscale_iterators.append(cur_data_iter_vu) # total quantity of testing images, each images include multiple detect class num_images_vu = len(imdb_vu.image_index) num_detect = len(ratio_index_vu[0]) all_boxes = [[[] for _ in range(num_images_vu)] for _ in range(imdb_vu.num_classes)] _t = {'im_detect': time.time(), 'misc': time.time()} for i, index in enumerate(ratio_index_vu[0]): det_tic = time.time() multiscale_boxes = [] multiscale_scores = [] for i_scale, (data_iter_vu, test_scale) in enumerate( zip(multiscale_iterators, test_scales)): # need to rewrite cfg.TRAIN.SCALES - very hacky! BACKUP_TRAIN_SCALES = cfg.TRAIN.SCALES cfg.TRAIN.SCALES = [test_scale] data = next(data_iter_vu) cfg.TRAIN.SCALES = BACKUP_TRAIN_SCALES with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) query.resize_(data[1].size()).copy_(data[1]) im_info.resize_(data[2].size()).copy_(data[2]) gt_boxes.resize_(data[3].size()).copy_(data[3]) catgory.data.resize_(data[4].size()).copy_(data[4]) # Run Testing if not hasattr(args, "class_image_augmentation" ) or not args.class_image_augmentation: queries = [query] elif args.class_image_augmentation.lower() == "rotation90": queries = [query] for _ in range(3): queries.append(queries[-1].rot90(1, [2, 3])) else: raise RuntimeError( "Unknown class_image_augmentation: {}".format( args.class_image_augmentation)) for q in queries: rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, _, RCNN_loss_bbox, \ rois_label, weight = fasterRCNN(im_data, q, im_info, gt_boxes, catgory) scores = cls_prob.data boxes = rois.data[:, :, 1:5] # Apply bounding-box regression if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view( 1, -1, 4 * len(imdb_vu.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # Resize to original ratio pred_boxes /= data[2][0][2].item() # Remove batch_size dimension scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() multiscale_scores.append(scores) multiscale_boxes.append(pred_boxes) scores = torch.cat(multiscale_scores, dim=0) pred_boxes = torch.cat(multiscale_boxes, dim=0) # Record time det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() # Post processing inds = torch.nonzero(scores > thresh).view(-1) if inds.numel() > 0: # remove useless indices cls_scores = scores[inds] cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # rearrange order _, order = torch.sort(cls_scores, 0, True) cls_dets = cls_dets[order] # NMS keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] all_boxes[catgory][index] = cls_dets.cpu().numpy() # Limit to max_per_image detections *over all classes* if max_per_image > 0: try: image_scores = all_boxes[catgory][index][:, -1] if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] keep = np.where( all_boxes[catgory][index][:, -1] >= image_thresh)[0] all_boxes[catgory][index] = all_boxes[catgory][index][ keep, :] except: pass misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_detect, detect_time, nms_time)) sys.stdout.flush() # save test image if vis and i % 1 == 0: im2show = cv2.imread( dataset_vu._roidb[dataset_vu.ratio_index[i]]['image']) im2show = vis_detections(im2show, 'shot', cls_dets.cpu().numpy(), 0.3) o_query = data[1][0].permute(1, 2, 0).contiguous().cpu().numpy() o_query *= [0.229, 0.224, 0.225] o_query += [0.485, 0.456, 0.406] o_query *= 255 o_query = o_query[:, :, ::-1] (h, w, c) = im2show.shape o_query = cv2.resize(o_query, (h, h), interpolation=cv2.INTER_LINEAR) im2show = np.concatenate((im2show, o_query), axis=1) vis_path = "./test_img" if not os.path.isdir(vis_path): os.makedirs(vis_path) cv2.imwrite(os.path.join(vis_path, "%d_d.png" % (i)), im2show) print('Evaluating detections') mAP = imdb_vu.evaluate_detections(all_boxes, None) end = time.time() print("test time: %0.4fs" % (end - start)) return mAP
# make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True #import pdb #pdb.set_trace() # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(('__background__', 'ped'), pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") #pdb.set_trace() fasterRCNN.create_architecture() lr = cfg.TRAIN.LEARNING_RATE lr = args.lr #tr_momentum = cfg.TRAIN.MOMENTUM
im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(vocabulary, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(vocabulary, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(vocabulary, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(vocabulary, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() lr = cfg.TRAIN.LEARNING_RATE lr = args.lr # tr_momentum = cfg.TRAIN.MOMENTUM
im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True # initilize the network here. if args.net == 'vgg11': fasterRCNN = vgg11(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic, sup=True) elif args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() # teacher fasterRCNN_sup_vgg16 = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic, sup=True) fasterRCNN_sup_vgg16.create_architecture() supervisor_path = './data/VGG16-FRCNN/faster_rcnn_1_7_10021.pth' print("load checkpoint %s" % (supervisor_path)) checkpoint = torch.load(supervisor_path) fasterRCNN_sup_vgg16.load_state_dict(checkpoint['model'])
def __call__(self, *args, **kwargs): net = 'vgg16' checksession = 1 checkepoch = 6 checkpoint = 10021 load_dir = './mydetector/model' cfgs = 'vgg16.vml' set_cfgs = None dataset = 'imagenet' image_dir = 'images' webcam_num = -1 cfg_file = './mydetector/cfgs/vgg16.yml' vis = False cfg.CUDA = True cfg_from_file(cfg_file) if set_cfgs is not None: cfg_from_list(set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(1) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. #加载预训练模型 input_dir = load_dir + "/" + net + "/" + dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(checksession, checkepoch, checkpoint)) pascal_classes = np.asarray([ '__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]) # initilize the network here. if net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=False) elif net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=False) elif net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=False) elif net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=False) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # pdb.set_trace() print("load checkpoint %s" % (load_name)) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) fasterRCNN.cuda() fasterRCNN.eval() start = time.time() max_per_image = 100 thresh = 0.05 vis = True imglist = os.listdir(image_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) while (num_images >= 0): total_tic = time.time() if webcam_num == -1: num_images -= 1 im_file = os.path.join(image_dir, imglist[num_images]) # im = cv2.imread(im_file) im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im2show = np.copy(im) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join( image_dir, imglist[num_images][:-4] + "_det.jpg") cv2.imwrite(result_path, im2show) else: im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB) cv2.imshow("frame", im2showRGB) total_toc = time.time() total_time = total_toc - total_tic frame_rate = 1 / total_time print('Frame rate:', frame_rate) if cv2.waitKey(1) & 0xFF == ord('q'): break
def frcnn(train): args = parse_args() print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) from model.utils.config import cfg cfg.USE_GPU_NMS = args.cuda print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) pascal_classes = np.asarray(['___background__', u'person', u'bicycle', u'car', u'motorcycle', u'airplane', u'bus', u'train', u'truck', u'boat', u'traffic light', u'fire hydrant', u'stop sign', u'parking meter', u'bench', u'bird', u'cat', u'dog', u'horse', u'sheep', u'cow', u'elephant', u'bear', u'zebra', u'giraffe', u'backpack', u'umbrella', u'handbag', u'tie', u'suitcase', u'frisbee', u'skis', u'snowboard', u'sports ball', u'kite', u'baseball bat', u'baseball glove', u'skateboard', u'surfboard', u'tennis racket', u'bottle', u'wine glass', u'cup', u'fork', u'knife', u'spoon', u'bowl', u'banana', u'apple', u'sandwich', u'orange', u'broccoli', u'carrot', u'hot dog', u'pizza', u'donut', u'cake', u'chair', u'couch', u'potted plant', u'bed', u'dining table', u'toilet', u'tv', u'laptop', u'mouse', u'remote', u'keyboard', u'cell phone', u'microwave', u'oven', u'toaster', u'sink', u'refrigerator', u'book', u'clock', u'vase', u'scissors', u'teddy bear', u'hair drier', u'toothbrush']) # initilize the network here. #args.imdb_name = "coco_2014_train+coco_2014_valminusminival" # imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) if args.net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # pdb.set_trace() print("load checkpoint %s" % (load_name)) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable with torch.no_grad(): im_data =Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() thresh = 0.05 webcam_num = args.webcam_num imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) import json,re from tqdm import tqdm d = {} pbar = tqdm(imglist) if not train: for i in pbar: im_file = os.path.join(args.image_dir, i) # im = cv2.imread(im_file) im_name = i im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:,:,np.newaxis] im_in = np.concatenate((im_in,im_in,im_in), axis=2) # rgb -> bgr im = im_in[:,:,::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: #Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() lis = json.load(open('/home/nesa320/huangshicheng/gitforwork/gsnn/graph/labels.json', 'r')) sm_lis = np.zeros(len(lis)) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:,j]>thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:,j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) #cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] score = cls_dets[0][-1] try: sm_lis[lis.index(pascal_classes[j])] = score.numpy() except: pass d[re.sub("\D", "", im_name)] = sm_lis.tolist() json.dump(d, open('annotation_dict' + '.json', 'w'), indent=2) else: for i in pbar: print("training") im_file = os.path.join(args.image_dir, i) # im = cv2.imread(im_file) im_name = i im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() lis = ['__background__', u'person', u'bicycle', u'car', u'motorcycle', u'airplane', u'bus', u'train', u'truck', u'boat', u'traffic light', u'fire hydrant', u'stop sign', u'parking meter', u'bench', u'bird', u'cat', u'dog', u'horse', u'sheep', u'cow', u'elephant', u'bear', u'zebra', u'giraffe', u'backpack', u'umbrella', u'handbag', u'tie', u'suitcase', u'frisbee', u'skis', u'snowboard', u'sports ball', u'kite', u'baseball bat', u'baseball glove', u'skateboard', u'surfboard', u'tennis racket', u'bottle', u'wine glass', u'cup', u'fork', u'knife', u'spoon', u'bowl', u'banana', u'apple', u'sandwich', u'orange', u'broccoli', u'carrot', u'hot dog', u'pizza', u'donut', u'cake', u'chair', u'couch', u'potted plant', u'bed', u'dining table', u'toilet', u'tv', u'laptop', u'mouse', u'remote', u'keyboard', u'cell phone', u'microwave', u'oven', u'toaster', u'sink', u'refrigerator', u'book', u'clock', u'vase', u'scissors', u'teddy bear', u'hair drier', u'toothbrush'] assert len(lis) == 81 sm_lis = np.zeros(len(lis)) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] score = cls_dets[0][-1] try: sm_lis[lis.index(pascal_classes[j])] = score.numpy() except: pass print(sm_lis.tolist()) d[re.sub("\D", "", im_name)] = sm_lis.tolist() json.dump(d, open('concat_dict' + '.json', 'w'), indent=2)
def run(args): lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY try: xrange # Python 2 except NameError: xrange = range # Python 3 #args = parse_args() print('Called with args:') print(args) if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) np.random.seed(cfg.RNG_SEED) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "coco": args.imdb_name = "coco_2014_train+coco_2014_valminusminival" args.imdbval_name = "coco_2014_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "vg": args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True if args.cuda: fasterRCNN.cuda() start = time.time() max_per_image = 100 vis = args.vis if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] #pdb.set_trace() output_dir = get_output_dir(imdb, save_name) dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize = False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) # cls_dets = cls_dets[order] # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) # cls_dets = cls_dets[keep.view(-1).long()] cls_dets = cls_dets[order] # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) keep = softnms_cpu_torch(cls_dets) # cls_dets = cls_dets[keep.view(-1).long()] cls_dets = keep if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('result.png', im2show) pdb.set_trace() #cv2.imshow('test', im2show) #cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir) end = time.time() print("test time: %0.4fs" % (end - start))
args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) input_dir = args.load_dir + "/" + args.net if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network') load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=False) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=False) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model'])
np.random.seed(cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. ''' input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network from ' + input_dir) ''' load_name = "/root/workspace/project/models/vgg16/faster_rcnn_1_5_29069.pth" ships_classes = np.asarray(['__background__', 'ship']) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(ships_classes, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(ships_classes, 101, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(ships_classes, 50, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(ships_classes, 152, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace()
im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() lr = cfg.TRAIN.LEARNING_RATE lr = args.lr #tr_momentum = cfg.TRAIN.MOMENTUM
pretrained_rfcn=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': RFCN = resnet(imdb_classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res18': RFCN = resnet(imdb_classes, 18, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'vgg16': RFCN = vgg16(imdb_classes, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() RFCN.create_architecture() lr = cfg.TRAIN.LEARNING_RATE lr = args.lr params = [] for key, value in dict(RFCN.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params':[value],'lr':lr*(cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
def testing_rgb(): # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") # pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] Log.info(cfg.POOLING_MODE) print('load model successfully!') # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True if args.cuda: fasterRCNN.cuda() start = time.time() max_per_image = 100 vis = args.vis if vis: thresh = 0.05 else: thresh = 0.0 num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), roidb[i], 0.8) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() pic_path = os.path.join(output_dir, 'pic') if not os.path.exists(pic_path): os.makedirs(pic_path) if vis: #print(pic_path) cv2.imwrite(pic_path + '/result_%s.png' % str(i), im2show) #pdb.set_trace() i += 1 #if vis: # cv2.imwrite('result.png', im2show) # pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(10) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir) print("evaluate end") end = time.time() print("test time: %0.4fs" % (end - start))
im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True # initilize the network here. fasterRCNN = vgg16(['bg', 'Car'], pretrained=False, class_agnostic=args.class_agnostic, img_channels=6) fasterRCNN.create_architecture() lr = cfg.TRAIN.LEARNING_RATE lr = args.lr params = [] for key, value in list(dict(fasterRCNN.named_parameters()).items()): if value.requires_grad: if 'bias' in key: params += [{'params':[value],'lr':lr*(cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{ 'params': [value],
def __init__(self, baseFolder='models', filename='faster_rcnn_1_10_9999_mosaicCL3to5_CBAM_Gblur.pth', threshold=0.9, att_type='CBAM'): # att_type=None super(DetectorAIR15, self).__init__() self.cfg = __import__('model').utils.config.cfg def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser( description='Train a Fast R-CNN network') parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default='cfgs/vgg16.yml', type=str) parser.add_argument('--net', dest='net', help='vgg16, res50, res101, res152', default='res101', type=str) parser.add_argument('--set', dest='set_cfgs', help='set config keys', default=None, nargs=argparse.REMAINDER) parser.add_argument('--cuda', dest='cuda', help='whether use CUDA', action='store_true') parser.add_argument('--mGPUs', dest='mGPUs', help='whether use multiple GPUs', action='store_true') parser.add_argument( '--cag', dest='class_agnostic', help='whether perform class_agnostic bbox regression', action='store_true') parser.add_argument( '--parallel_type', dest='parallel_type', help= 'which part of model to parallel, 0: all, 1: model before roi pooling', default=0, type=int) parser.add_argument('--ls', dest='large_scale', help='whether use large imag scale', action='store_true') return parser cmd_args = [ '--net', 'res101', '--ls', '--cuda', ] load_name = os.path.join(baseFolder, filename) # w/o bottle class self.thresh = threshold parser = parse_args() self.args = parser.parse_args(cmd_args) self.args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] self.args.cfg_file = "{}/cfgs/{}_ls.yml".format( baseFolder, self.args.net ) if self.args.large_scale else "{}/cfgs/{}.yml".format( baseFolder, self.args.net) print('Called with args:') print(self.args) if self.args.cfg_file is not None: # check cfg file and copy cfg_from_file(self.args.cfg_file) if self.args.set_cfgs is not None: cfg_from_list(self.args.set_cfgs) self.cfg.USE_GPU_NMS = self.args.cuda print('Using config:') pprint.pprint(self.cfg) np.random.seed(self.cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. # # input_dir = self.args.load_dir + "/" + self.args.net + "/" + self.args.dataset # if not os.path.exists(input_dir): # raise Exception('There is no input directory for loading network from ' + input_dir) # load_name = os.path.join(input_dir, # 'faster_rcnn_{}_{}_{}.pth'.format(self.args.checksession, self.args.checkepoch, self.args.checkpoint)) self.classes = np.asarray([ '__background__', # always index 0 'cup', 'pen', 'hat', 'mobile_phone', 'sock', 'glasses', 'towel', 'cane_stick', 'newspaper', 'remote', 'key', 'wallet', 'pack', 'medicine_case', # 'bottle', 'medicine_packet', ]) # self.display_classes = self.classes self.display_classes = { 'cup': '컵', 'pen': '펜', 'hat': '모자', 'mobile_phone': '핸드폰', 'sock': '양말', 'glasses': '안경', 'towel': '수건', 'cane_stick': '지팡이', 'newspaper': '신문', 'remote': '리모컨', 'key': '열쇠', 'wallet': '지갑', 'pack': '담배갑', 'medicine_case': '약통', # 'bottle': '약통', 'medicine_packet': '약봉지', } # initilize the network here. if self.args.net == 'vgg16': self.fasterRCNN = vgg16(self.classes, pretrained=False, class_agnostic=self.args.class_agnostic) elif 'res' in self.args.net: # from model.faster_rcnn.resnet import resnet from model.faster_rcnn.resnet_AIRvar_CBAM import resnet if self.args.net == 'res101': self.fasterRCNN = resnet( self.classes, 101, pretrained=False, class_agnostic=self.args.class_agnostic, att_type=att_type) elif self.args.net == 'res50': self.fasterRCNN = resnet( self.classes, 50, pretrained=False, class_agnostic=self.args.class_agnostic, att_type=att_type) elif self.args.net == 'res152': self.fasterRCNN = resnet( self.classes, 152, pretrained=False, class_agnostic=self.args.class_agnostic, att_type=att_type) else: print("network is not defined") pdb.set_trace() self.fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if self.args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load( load_name, map_location=(lambda storage, loc: storage)) self.fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): self.cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # initilize the tensor holder here. self.im_data = torch.FloatTensor(1) self.im_info = torch.FloatTensor(1) self.num_boxes = torch.LongTensor(1) self.gt_boxes = torch.FloatTensor(1) # ship to cuda if self.args.cuda > 0: self.im_data = self.im_data.cuda() self.im_info = self.im_info.cuda() self.num_boxes = self.num_boxes.cuda() self.gt_boxes = self.gt_boxes.cuda() # make variable with torch.no_grad(): self.im_data = Variable(self.im_data) self.im_info = Variable(self.im_info) self.num_boxes = Variable(self.num_boxes) self.gt_boxes = Variable(self.gt_boxes) if self.args.cuda > 0: self.cfg.CUDA = True if self.args.cuda > 0: self.fasterRCNN.cuda() self.fasterRCNN.eval() self.max_per_image = 100
def main(): args = parse_args() print('Called with args:') print(args) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20' ] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20' ] elif args.dataset == "coco": args.imdb_name = "coco_2014_train+coco_2014_valminusminival" args.imdbval_name = "coco_2014_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50' ] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30' ] elif args.dataset == "vg": # train sizes: train, smalltrain, minitrain # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20'] args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50' ] args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) output_dir = args.save_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler_batch, num_workers=args.num_workers) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() lr = cfg.TRAIN.LEARNING_RATE lr = args.lr #tr_momentum = cfg.TRAIN.MOMENTUM #tr_momentum = args.momentum params = [] for key, value in dict(fasterRCNN.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params':[value],'lr':lr*(cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY }] if args.optimizer == "adam": lr = lr * 0.1 optimizer = torch.optim.Adam(params) elif args.optimizer == "sgd": optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) if args.cuda: fasterRCNN.cuda() if args.resume: load_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) args.session = checkpoint['session'] args.start_epoch = checkpoint['epoch'] fasterRCNN.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print("loaded checkpoint %s" % (load_name)) if args.mGPUs: fasterRCNN = nn.DataParallel(fasterRCNN) iters_per_epoch = int(train_size / args.batch_size) if args.use_tfboard: from tensorboardX import SummaryWriter logger = SummaryWriter("logs") for epoch in range(args.start_epoch, args.max_epochs + 1): # setting to train mode fasterRCNN.train() loss_temp = 0 start = time.time() if epoch % (args.lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) lr *= args.lr_decay_gamma data_iter = iter(dataloader) for step in range(iters_per_epoch): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) fasterRCNN.zero_grad() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \ + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean() loss_temp += loss.item() # backward optimizer.zero_grad() loss.backward() if args.net == "vgg16": clip_gradient(fasterRCNN, 10.) optimizer.step() if step % args.disp_interval == 0: end = time.time() if step > 0: loss_temp /= (args.disp_interval + 1) if args.mGPUs: loss_rpn_cls = rpn_loss_cls.mean().item() loss_rpn_box = rpn_loss_box.mean().item() loss_rcnn_cls = RCNN_loss_cls.mean().item() loss_rcnn_box = RCNN_loss_bbox.mean().item() fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt else: loss_rpn_cls = rpn_loss_cls.item() loss_rpn_box = rpn_loss_box.item() loss_rcnn_cls = RCNN_loss_cls.item() loss_rcnn_box = RCNN_loss_bbox.item() fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \ % (args.session, epoch, step, iters_per_epoch, loss_temp, lr)) print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start)) print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \ % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box)) if args.use_tfboard: info = { 'loss': loss_temp, 'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_box': loss_rpn_box, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box } logger.add_scalars("logs_s_{}/losses".format(args.session), info, (epoch - 1) * iters_per_epoch + step) loss_temp = 0 start = time.time() save_name = os.path.join( output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint( { 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict() if args.mGPUs else fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) print('save model: {}'.format(save_name)) if args.use_tfboard: logger.close()
im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True # initilize the network here. if args.net == 'vgg16': model = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': model = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': model = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': model = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() model.create_architecture() lr = cfg.TRAIN.LEARNING_RATE lr = args.lr #tr_momentum = cfg.TRAIN.MOMENTUM
args.checkepoch, args.checkpoint)) device = torch.device("cuda" if args.cuda > 0 else "cpu") pascal_classes = np.asarray([ '__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]) # initilize the network here. if args.net == 'vgg16': _RCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=args.class_agnostic, lighthead=lighthead) elif args.net == 'res101': _RCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic, lighthead=lighthead) elif args.net == 'res50': _RCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic, lighthead=lighthead) elif args.net == 'res152': _RCNN = resnet(pascal_classes,
# make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True if args.lighthead: lighthead = True # initilize the network here. if args.net == 'vgg16': _RCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic, lighthead=lighthead) elif args.net == 'res101': _RCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic, lighthead=lighthead) elif args.net == 'res50': _RCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic, lighthead=lighthead) elif args.net == 'res152': _RCNN = resnet(imdb.classes,
#pdb.set_trace() input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(labels, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(labels, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(labels, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(labels, 152, pretrained=False,
def load_model(args): # set cfg according to the dataset used to train the pre-trained model if args.dataset == "pascal_voc": args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "pascal_voc_0712": args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "coco": args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "imagenet": args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "vg": args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.USE_GPU_NMS = args.cuda print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # Load classes classes = ['__background__'] with open(os.path.join(args.classes_dir, 'objects_vocab.txt')) as f: for object in f.readlines(): classes.append(object.split(',')[0].lower().strip()) if not os.path.exists(args.load_dir): raise Exception( 'There is no input directory for loading network from ' + args.load_dir) load_name = os.path.join( args.load_dir, 'faster_rcnn_{}_{}.pth'.format(args.net, args.dataset)) # initilize the network here. the network used to train the pre-trained model if args.net == 'vgg16': fasterRCNN = vgg16(classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') print("load model %s" % (load_name)) return classes, fasterRCNN
if args.cuda: cfg.CUDA = True # initilize the network here. if args.s_net == 'alexnet': student_net = alexnet(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) else: print("student network is not defined") pdb.set_trace() if args.t_net == 'vgg16': teacher_net = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic, teaching=True) else: print("teacher network is not defined") pdb.set_trace() ##CREATE ARCHITECTURES teacher_net.create_architecture() student_net.create_architecture() #LOAD TEACHER NET input_dir = args.load_dir + "/" + args.t_net + "/" + args.dataset print(input_dir) if not os.path.exists(input_dir): raise Exception(
imdb_vu.competition_mode(on=True) dataset_vu = roibatchLoader(roidb_vu, ratio_list_vu, ratio_index_vu, query_vu, 1, imdb_vu.num_classes, args.sketch_path, args.sketch_class_2_label, training=False, seen=args.seen) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb_vu.classes, pretrained=False, class_agnostic=args.class_agnostic, model_type=args.model_type) elif args.net == 'res101': fasterRCNN = resnet(imdb_vu.classes, 101, pretrained=False, class_agnostic=args.class_agnostic, model_type=args.model_type) elif args.net == 'res50': fasterRCNN = resnet(imdb_vu.classes, 50, pretrained=False, class_agnostic=args.class_agnostic, model_type=args.model_type) elif args.net == 'res152': fasterRCNN = resnet(imdb_vu.classes,
np.random.seed(cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) own_data_classes = np.asarray(['__background__', 'abnormal']) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(own_data_classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(own_data_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(own_data_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(own_data_classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda > 0: checkpoint = torch.load(load_name)
'Backpack': '/m/01940j', 'Suitcase': '/m/01s55n', 'Bench': '/m/0cvnqh', 'Dog': '/m/0bt9lr', 'Motorcycle': '/m/04_sv', 'Woman': '/m/03bt1vf', 'Microwave oven': '/m/0fx9l', 'Sofa bed': '/m/03m3pdh', 'Handbag': '/m/080hkjn' } # initilize the network here. if args.net == 'vgg16': print(pascal_classes) fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False,
# args.imdbval_name = "MI3_val" # args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] # elif args.dataset == "coco": # args.imdb_name = "coco_2014_train+coco_2014_valminusminival" # args.imdbval_name = "coco_2014_minival" # args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] # imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False) # imdb.competition_mode(on=True) # ('__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') # pascal_classes=np.asarray(imdb.classes) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(KAIST_classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(KAIST_classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(KAIST_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(KAIST_classes, 152, pretrained=False,
num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb_s.classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb_s.classes, 101, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb_s.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb_s.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() lr = cfg.TRAIN.LEARNING_RATE lr = args.lr #tr_momentum = cfg.TRAIN.MOMENTUM
'There is no input directory for loading network from ' + input_dir) load_name = [os.path.join(input_dir, model) for model in models] '''it = 0 for model in models: if it 20: load_name = os.path.join(input_dir,model) it += 1''' detection_classes = np.asarray(['__background__', 'Poma']) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(detection_classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'vgg16_4ch': fasterRCNN = vgg16_4ch(detection_classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'vgg16_5ch': fasterRCNN = vgg16_5ch(detection_classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(detection_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50':
if os.path.exists(output_dir): shutil.rmtree(output_dir) os.mkdir(output_dir) tt100k_classes = np.asarray([ '__background__', 'p11', 'pl5', 'pne', 'il60', 'pl80', 'pl100', 'il80', 'po', 'w55', 'pl40', 'pn', 'pm55', 'w32', 'pl20', 'p27', 'p26', 'p12', 'i5', 'pl120', 'pl60', 'pl30', 'pl70', 'pl50', 'ip', 'pg', 'p10', 'io', 'pr40', 'p5', 'p3', 'i2', 'i4', 'ph4', 'wo', 'pm30', 'ph5', 'p23', 'pm20', 'w57', 'w13', 'p19', 'w59', 'il100', 'p6', 'ph4.5' ]) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(tt100k_classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(tt100k_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(tt100k_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(tt100k_classes, 152, pretrained=False,