Esempio n. 1
0
def prep_model(input_dir):

    args = parse_args()

    print('Called with args:')
    print(args)
    args.set_cfgs = [
        'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
    ]

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.USE_GPU_NMS = 1

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(input_dir)

    #   pascal_classes = np.asarray(['__background__',
    #                        'aeroplane', 'bicycle', 'bird', 'boat',
    #                        'bottle', 'bus', 'car', 'cat', 'chair',
    #                        'cow', 'diningtable', 'dog', 'horse',
    #                        'motorbike', 'person', 'pottedplant',
    #                        'sheep', 'sofa', 'train', 'tvmonitor'])
    pascal_classes = np.asarray([
        '__background__', "person", "bicycle", "car", "motorbike", "aeroplane",
        "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
        "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
        "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
        "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
        "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
        "skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
        "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
        "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
        "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet",
        "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
        "microwave", "oven", "toaster", "sink", "refrigerator", "book",
        "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
    ])

    # initilize the network here.
    fasterRCNN = resnet(pascal_classes,
                        101,
                        pretrained=False,
                        class_agnostic=args.class_agnostic)

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    #   fasterRCNN.load_state_dict(checkpoint)
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']
    print('load model successfully!')
    return fasterRCNN
Esempio n. 2
0
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(imdb.classes,
                           pretrained=False,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()
Esempio n. 3
0
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    # initilize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(nusc_classes,
                           pretrained=True,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(nusc_classes,
                            101,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(nusc_classes,
                            50,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(nusc_classes,
                            152,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")

    fasterRCNN.create_architecture()
Esempio n. 4
0
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        '{}_{}_{}_{}.pth'.format(args.dataset, str(args.net),
                                 args.checksession, args.checkepoch))
    # initilize the network here.
    if args.net == 'metarcnn':
        num_layers = 101 if args.dataset == 'pascal_voc_0712' else 50

        num_cls = imdb.num_classes

        fasterRCNN = resnet(num_cls,
                            num_layers,
                            pretrained=True,
                            class_agnostic=args.class_agnostic,
                            meta_train=False,
                            meta_test=args.meta_test,
                            meta_loss=args.meta_loss)
    else:
        print('No module define')

    load_name = os.path.join(
        input_dir,
        '{}_{}_{}_{}.pth'.format(args.dataset, str(args.net),
                                 args.checksession, args.checkepoch))
    fasterRCNN.create_architecture()
    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
Esempio n. 5
0
                                             pin_memory=False)

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'rfcn_detect_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    if args.net == 'res101':
        RFCN = resnet(imdb.classes,
                      101,
                      pretrained=False,
                      class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    RFCN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    RFCN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')
    # initilize the tensor holder here.
Esempio n. 6
0
    load_name = os.path.join(
        input_dir,
        "faster_rcnn_{}_{}_{}.pth".format(args.checksession, args.checkepoch,
                                          args.checkpoint),
    )

    # initilize the network here.
    if args.net == "vgg16":
        fasterRCNN = vgg16(imdb._action_classes,
                           imdb._obj_classes,
                           pretrained=False,
                           class_agnostic=args.class_agnostic)
    elif args.net == "res101":
        fasterRCNN = resnet(imdb._action_classes,
                            imdb._obj_classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint["model"])
    kp_dist_mean = checkpoint["kp_dist_mean"]
    kp_dist_var = checkpoint["kp_dist_var"]
    kp_selection = checkpoint["kp_selection"]
Esempio n. 7
0
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    # initilize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(imdb.classes, pretrained=args.pretrained, class_agnostic=args.class_agnostic)
    elif args.net == 'res18':
        fasterRCNN = resnet(imdb.classes, 18, pretrained=args.pretrained, class_agnostic=args.class_agnostic)
    elif args.net == 'res34':
        fasterRCNN = resnet(imdb.classes, 34, pretrained=args.pretrained, class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes, 101, pretrained=args.pretrained, class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes, 50, pretrained=args.pretrained, class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes, 152, pretrained=args.pretrained, class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    lr = cfg.TRAIN.LEARNING_RATE
Esempio n. 8
0
    gt_boxes = gt_boxes.cuda()

  # make variable
  im_data = Variable(im_data)
  im_info = Variable(im_info)
  num_boxes = Variable(num_boxes)
  gt_boxes = Variable(gt_boxes)

  if args.cuda:
    cfg.CUDA = True

  # initilize the network here.
  if args.net == 'vgg16':
    fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic)
  elif args.net == 'res101':
    fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic)
  elif args.net == 'res50':
    fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic)
  elif args.net == 'res152':
    fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic)
  else:
    print("network is not defined")
    pdb.set_trace()

  fasterRCNN.create_architecture()

  lr = cfg.TRAIN.LEARNING_RATE
  lr = args.lr
  #tr_momentum = cfg.TRAIN.MOMENTUM
  #tr_momentum = args.momentum
Esempio n. 9
0
    input_dir = os.path.join(args.load_dir, args.net, args.dataset)
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(imdb.classes, pretrained=False)
    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes, 101, pretrained=False)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes, 50, pretrained=False)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes, 152, pretrained=False)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    tmp_state_dict = checkpoint['model']
    correct_state_dict = {
        k: tmp_state_dict['module.' + k]
Esempio n. 10
0
    if args.cuda:
        cfg.CUDA = True

    if args.lighthead:
        lighthead = True

    # initilize the network here.
    if args.net == 'vgg16':
        _RCNN = vgg16(imdb.classes,
                      pretrained=True,
                      class_agnostic=args.class_agnostic,
                      lighthead=lighthead)
    elif args.net == 'res101':
        _RCNN = resnet(imdb.classes,
                       101,
                       pretrained=True,
                       class_agnostic=args.class_agnostic,
                       lighthead=lighthead)
    elif args.net == 'res50':
        _RCNN = resnet(imdb.classes,
                       50,
                       pretrained=True,
                       class_agnostic=args.class_agnostic,
                       lighthead=lighthead)
    elif args.net == 'res152':
        _RCNN = resnet(imdb.classes,
                       152,
                       pretrained=True,
                       class_agnostic=args.class_agnostic,
                       lighthead=lighthead)
    elif args.net == 'xception':
Esempio n. 11
0
def extract_feature():
    MIN_BOXES = 10
    MAX_BOXES = 100
    N_CLASSES = 1601
    CONF_THRESH = 0.2
    args = parse_args()

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)

    os.makedirs(args.output_dir, exist_ok=True)

    use_cuda = torch.cuda.is_available()
    assert use_cuda, 'Works only with CUDA'
    device = torch.device('cuda') if use_cuda else torch.device('cpu')
    # device = torch.device('cpu')
    cfg.CUDA = use_cuda
    np.random.seed(cfg.RNG_SEED)

    # Load the model.
    fasterRCNN = resnet(N_CLASSES, 101, pretrained=False)
    fasterRCNN.create_architecture()
    fasterRCNN.load_state_dict(torch.load(args.model_file))
    fasterRCNN.to(device)
    fasterRCNN.eval()
    print('Model is loaded.')

    # Load images.
    imglist = os.listdir(args.image_dir)
    num_images = len(imglist)
    print('Number of images: {}.'.format(num_images))

    # Extract features.
    for im_file in tqdm(imglist):
        im = cv2.imread(os.path.join(args.image_dir, im_file))
        blobs, im_scales = get_image_blob(im)
        assert len(im_scales) == 1, 'Only single-image batch is implemented'

        im_data = torch.from_numpy(blobs).permute(0, 3, 1, 2).to(device)
        im_info = torch.tensor([[blobs.shape[1], blobs.shape[2],
                                 im_scales[0]]]).to(device)
        gt_boxes = torch.zeros(1, 1, 5).to(device)
        num_boxes = torch.zeros(1).to(device)

        with torch.set_grad_enabled(False):
            rois, cls_prob, _, _, _, _, _, _, \
            pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        boxes = rois.data.cpu().numpy()[:, :, 1:5].squeeze()
        boxes /= im_scales[0]
        cls_prob = cls_prob.data.cpu().numpy().squeeze()
        pooled_feat = pooled_feat.data.cpu().numpy()

        # Keep only the best detections.
        max_conf = np.zeros((boxes.shape[0]))
        for cls_ind in range(1, cls_prob.shape[1]):
            cls_scores = cls_prob[:, cls_ind]
            dets = np.hstack(
                (boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = np.array(cpu_nms(dets, cfg.TEST.NMS))
            max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep],
                                      cls_scores[keep], max_conf[keep])

        keep_boxes = np.where(max_conf >= CONF_THRESH)[0]
        if len(keep_boxes) < MIN_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES]
        elif len(keep_boxes) > MAX_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES]

        image_feat = pooled_feat[keep_boxes]
        if args.save_boxes:
            image_bboxes = boxes[keep_boxes]
        else:
            image_bboxes = None

        output_file = os.path.join(args.output_dir,
                                   im_file.split('.')[0] + '.npy')
        save_features(output_file, image_feat, image_bboxes)
Esempio n. 12
0
    args = parse_args()

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)

    os.makedirs(args.output_dir, exist_ok=True)

    use_cuda = torch.cuda.is_available()
    assert use_cuda, 'Works only with CUDA'
    device = torch.device('cuda') if use_cuda else torch.device('cpu')
    # device = torch.device('cpu')
    cfg.CUDA = use_cuda
    np.random.seed(cfg.RNG_SEED)

    # Load the model.
    fasterRCNN = resnet(N_CLASSES, 101, pretrained=False)
    fasterRCNN.create_architecture()
    fasterRCNN.load_state_dict(torch.load(args.model_file))
    fasterRCNN.to(device)
    fasterRCNN.eval()
    print('Model is loaded.')

    # Load images.
    imglist = os.listdir(args.image_dir)
    num_images = len(imglist)
    print('Number of images: {}.'.format(num_images))

    # Extract features.
    for im_file in tqdm(imglist):
        im = cv2.imread(os.path.join(args.image_dir, im_file))
        blobs, im_scales = get_image_blob(im)
    #  imagenet_vid_classes = ['__background__','person']
    # =============================================================================
    #   imagenet_vid_classes = ['__background__',  # always index 0
    #           'airplane', 'antelope', 'bear', 'bicycle',
    #           'bird', 'bus', 'car', 'cattle',
    #           'dog', 'domestic_cat', 'elephant', 'fox',
    #           'giant_panda', 'hamster', 'horse', 'lion',
    #           'lizard', 'monkey', 'motorcycle', 'rabbit',
    #           'red_panda', 'sheep', 'snake', 'squirrel',
    #           'tiger', 'train', 'turtle', 'watercraft',
    # 'whale', 'zebra']
    # =============================================================================
    # initilize the network here.
    if net == 'res101':
        RFCN = resnet(imagenet_vid_classes,
                      101,
                      pretrained=False,
                      class_agnostic=class_agnostic)
    elif net == 'res50':
        RFCN = resnet(imagenet_vid_classes,
                      50,
                      pretrained=False,
                      class_agnostic=class_agnostic)
    elif net == 'res18':
        RFCN = resnet(imagenet_vid_classes,
                      18,
                      pretrained=False,
                      class_agnostic=class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()
    gt_boxes = gt_boxes.cuda()

  # make variable
  im_data = Variable(im_data)
  im_info = Variable(im_info)
  num_boxes = Variable(num_boxes)
  gt_boxes = Variable(gt_boxes)

  if args.cuda:
    cfg.CUDA = True

  # initilize the network here.
  if args.net == 'vgg16':
    fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic)
  elif args.net == 'res101':
    fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic)
  elif args.net == 'res50':
    fasterRCNN = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic)
  elif args.net == 'res18':
    fasterRCNN = resnet(imdb.classes, 18, pretrained=True, class_agnostic=args.class_agnostic, \
                        pretrained_path=args.pretrained_path)
  elif args.net == 'res152':
    fasterRCNN = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic)
  else:
    print("network is not defined")
    pdb.set_trace()

  fasterRCNN.create_architecture()

  if args.cuda:
    fasterRCNN.cuda()
Esempio n. 15
0
def bld_train(args, ann_path=None, step=0):

    # print('Train from annotaion {}'.format(ann_path))
    # print('Called with args:')
    # print(args)

    if args.use_tfboard:
        from model.utils.logger import Logger
        # Set the logger
        logger = Logger(
            os.path.join('./.logs', args.active_method,
                         "/activestep" + str(step)))

    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '50'
        ]
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '30'
        ]
    elif args.dataset == "vg":
        # train sizes: train, smalltrain, minitrain
        # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '50'
        ]
    elif args.dataset == "voc_coco":
        args.imdb_name = "voc_coco_2007_train+voc_coco_2007_val"
        args.imdbval_name = "voc_coco_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    else:
        raise NotImplementedError

    args.cfg_file = "cfgs/{}_ls.yml".format(
        args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    # print('Using config:')
    # pprint.pprint(cfg)
    # np.random.seed(cfg.RNG_SEED)

    # torch.backends.cudnn.benchmark = True
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # train set = source set + target set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = args.cuda
    # source train set, fully labeled
    #ann_path_source = os.path.join(ann_path, 'voc_coco_2007_train_f.json')
    #ann_path_target = os.path.join(ann_path, 'voc_coco_2007_train_l.json')
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdb_name, ann_path=os.path.join(ann_path, 'source'))
    imdb_tg, roidb_tg, ratio_list_tg, ratio_index_tg = combined_roidb(
        args.imdb_name, ann_path=os.path.join(ann_path, 'target'))

    print('{:d} roidb entries for source set'.format(len(roidb)))
    print('{:d} roidb entries for target set'.format(len(roidb_tg)))

    output_dir = args.save_dir + "/" + args.net + "/" + args.dataset + "/" + args.active_method + "/activestep" + str(
        step)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    sampler_batch_tg = None  # do not sample target set

    bs_tg = 4
    dataset_tg = roibatchLoader(roidb_tg, ratio_list_tg, ratio_index_tg, bs_tg, \
                             imdb_tg.num_classes, training=True)

    assert imdb.num_classes == imdb_tg.num_classes

    dataloader_tg = torch.utils.data.DataLoader(dataset_tg,
                                                batch_size=bs_tg,
                                                sampler=sampler_batch_tg,
                                                num_workers=args.num_workers,
                                                worker_init_fn=_rand_fn())

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    image_label = torch.FloatTensor(1)
    confidence = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()
        image_label = image_label.cuda()
        confidence = confidence.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)
    image_label = Variable(image_label)
    confidence = Variable(confidence)

    if args.cuda:
        cfg.CUDA = True

    # initialize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(imdb.classes,
                           pretrained=True,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes,
                            101,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes,
                            50,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes,
                            152,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        raise NotImplementedError

    # initialize the expectation network.
    if args.net == 'vgg16':
        fasterRCNN_val = vgg16(imdb.classes,
                               pretrained=True,
                               class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN_val = resnet(imdb.classes,
                                101,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN_val = resnet(imdb.classes,
                                50,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN_val = resnet(imdb.classes,
                                152,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        raise NotImplementedError

    fasterRCNN.create_architecture()
    fasterRCNN_val.create_architecture()

    # lr = cfg.TRAIN.LEARNING_RATE
    lr = args.lr
    # tr_momentum = cfg.TRAIN.MOMENTUM
    # tr_momentum = args.momentum

    params = []
    for key, value in dict(fasterRCNN.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                }]

    if args.optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)
    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)
    else:
        raise NotImplementedError

    if args.resume:
        load_name = os.path.join(
            output_dir,
            'faster_rcnn_{}_{}_{}.pth'.format(args.checksession,
                                              args.checkepoch,
                                              args.checkpoint))
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        args.session = checkpoint['session']
        args.start_epoch = checkpoint['epoch']
        fasterRCNN.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']
        print("loaded checkpoint %s" % (load_name))

    # expectation model
    print("load checkpoint for expectation model: %s" % args.model_path)
    checkpoint = torch.load(args.model_path)
    fasterRCNN_val.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    fasterRCNN_val = fasterRCNN_val
    fasterRCNN_val.eval()

    if args.mGPUs:
        fasterRCNN = nn.DataParallel(fasterRCNN)
        #fasterRCNN_val = nn.DataParallel(fasterRCNN_val)

    if args.cuda:
        fasterRCNN.cuda()
        fasterRCNN_val.cuda()

    # Evaluation
    # data_iter = iter(dataloader_tg)
    # for target_k in range( int(train_size_tg / args.batch_size)):
    fname = "noisy_annotations.pkl"
    if not os.path.isfile(fname):
        for batch_k, data in enumerate(dataloader_tg):
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])
            image_label.data.resize_(data[4].size()).copy_(data[4])
            b_size = len(im_data)
            # expactation pass
            rois, cls_prob, bbox_pred, \
            _, _, _, _, _ = fasterRCNN_val(im_data, im_info, gt_boxes, num_boxes)
            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]
            if cfg.TRAIN.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(b_size, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        # print('DEBUG: Size of box_deltas is {}'.format(box_deltas.size()) )
                        box_deltas = box_deltas.view(b_size, -1,
                                                     4 * len(imdb.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            # TODO: data distalliation
            # Choose the confident samples
            for b_idx in range(b_size):
                # fill one confidence
                # confidence.data[b_idx, :] = 1 - (gt_boxes.data[b_idx, :, 4] == 0)
                # resize prediction
                pred_boxes[b_idx] /= data[1][b_idx][2]
                for j in xrange(1, imdb.num_classes):
                    if image_label.data[b_idx, j] != 1:
                        continue  # next if no image label

                    # filtering box outside of the image
                    not_keep = (pred_boxes[b_idx][:, j * 4] == pred_boxes[b_idx][:, j * 4 + 2]) | \
                               (pred_boxes[b_idx][:, j * 4 + 1] == pred_boxes[b_idx][:, j * 4 + 3])
                    keep = torch.nonzero(not_keep == 0).view(-1)
                    # decease the number of pgts
                    thresh = 0.5
                    while torch.nonzero(
                            scores[b_idx, :,
                                   j][keep] > thresh).view(-1).numel() <= 0:
                        thresh = thresh * 0.5
                    inds = torch.nonzero(
                        scores[b_idx, :, j][keep] > thresh).view(-1)

                    # if there is no det, error
                    if inds.numel() <= 0:
                        print('Warning!!!!!!! It should not appear!!')
                        continue

                    # find missing ID
                    missing_list = np.where(gt_boxes.data[b_idx, :, 4] == 0)[0]
                    if (len(missing_list) == 0): continue
                    missing_id = missing_list[0]
                    cls_scores = scores[b_idx, :, j][keep][inds]
                    cls_boxes = pred_boxes[b_idx][keep][inds][:, j *
                                                              4:(j + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    keep = nms(cls_dets, 0.2)  # Magic number ????
                    keep = keep.view(-1).tolist()
                    sys.stdout.write(
                        'from {} predictions choose-> min({},4) as pseudo label  \r'
                        .format(len(cls_scores), len(keep)))
                    sys.stdout.flush()
                    _, order = torch.sort(cls_scores[keep], 0, True)
                    if len(keep) == 0: continue

                    max_keep = 4
                    for pgt_k in range(max_keep):
                        if len(order) <= pgt_k: break
                        if missing_id + pgt_k >= 20: break
                        gt_boxes.data[b_idx, missing_id +
                                      pgt_k, :4] = cls_boxes[keep][order[
                                          len(order) - 1 - pgt_k]]
                        gt_boxes.data[b_idx, missing_id + pgt_k,
                                      4] = j  # class
                        #confidence[b_idx, missing_id + pgt_k] = cls_scores[keep][order[len(order) - 1 - pgt_k]]
                        num_boxes[b_idx] = num_boxes[b_idx] + 1
                sample = roidb_tg[dataset_tg.ratio_index[batch_k * bs_tg +
                                                         b_idx]]
                pgt_boxes = np.array([
                    gt_boxes[b_idx, x, :4].cpu().data.numpy()
                    for x in range(int(num_boxes[b_idx]))
                ])
                pgt_classes = np.array([
                    gt_boxes[b_idx, x, 4].cpu().data[0]
                    for x in range(int(num_boxes[b_idx]))
                ])
                sample["boxes"] = pgt_boxes
                sample["gt_classes"] = pgt_classes
                # DEBUG
                assert np.array_equal(sample["label"],image_label[b_idx].cpu().data.numpy()), \
                    "Image labels are not equal! {} vs {}".format(sample["label"],image_label[b_idx].cpu().data.numpy())

        #with open(fname, 'w') as f:
        # pickle.dump(roidb_tg, f)
    else:
        pass
        # with open(fname) as f:  # Python 3: open(..., 'rb')
        # roidb_tg = pickle.load(f)

    print("-- Optimization Stage --")
    # Optimization
    print("######################################################l")

    roidb.extend(roidb_tg)  # merge two datasets
    print('before filtering, there are %d images...' % (len(roidb)))
    i = 0
    while i < len(roidb):
        if True:
            if len(roidb[i]['boxes']) == 0:
                del roidb[i]
                i -= 1
        else:
            if len(roidb[i]['boxes']) == 0:
                del roidb[i]
                i -= 1
        i += 1

    print('after filtering, there are %d images...' % (len(roidb)))
    from roi_data_layer.roidb import rank_roidb_ratio
    ratio_list, ratio_index = rank_roidb_ratio(roidb)
    train_size = len(roidb)
    sampler_batch = sampler(train_size, args.batch_size)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                             imdb.num_classes, training=True)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             sampler=sampler_batch,
                                             num_workers=args.num_workers,
                                             worker_init_fn=_rand_fn())
    iters_per_epoch = int(train_size / args.batch_size)
    print("Training set size is {}".format(train_size))
    for epoch in range(args.start_epoch, args.max_epochs + 1):
        fasterRCNN.train()

        loss_temp = 0
        start = time.time()
        epoch_start = start

        # adjust learning rate
        if epoch % (args.lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, args.lr_decay_gamma)
            lr *= args.lr_decay_gamma

        # one step
        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            data = next(data_iter)
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])
            image_label.data.resize_(data[4].size()).copy_(data[4])

            #gt_boxes.data = \
            #    torch.cat((gt_boxes.data, torch.zeros(gt_boxes.size(0), gt_boxes.size(1), 1).cuda()), dim=2)
            conf_data = torch.zeros(gt_boxes.size(0), gt_boxes.size(1)).cuda()
            confidence.data.resize_(conf_data.size()).copy_(conf_data)

            fasterRCNN.zero_grad()

            # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence)
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)
            # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence)

            loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \
                   + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
            loss_temp += loss.data[0]

            # backward
            optimizer.zero_grad()
            loss.backward()
            if args.net == "vgg16":
                clip_gradient(fasterRCNN, 10.)
            optimizer.step()

            if step % args.disp_interval == 0:
                end = time.time()
                if step > 0:
                    loss_temp /= args.disp_interval

                if args.mGPUs:
                    loss_rpn_cls = rpn_loss_cls.mean().data[0]
                    loss_rpn_box = rpn_loss_box.mean().data[0]
                    loss_rcnn_cls = RCNN_loss_cls.mean().data[0]
                    loss_rcnn_box = RCNN_loss_bbox.mean().data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt
                else:
                    loss_rpn_cls = rpn_loss_cls.data[0]
                    loss_rpn_box = rpn_loss_box.data[0]
                    loss_rcnn_cls = RCNN_loss_cls.data[0]
                    loss_rcnn_box = RCNN_loss_bbox.data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt

                print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                      % (args.session, epoch, step, iters_per_epoch, loss_temp, lr))
                print("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                      (fg_cnt, bg_cnt, end - start))
                print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                      % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                if args.use_tfboard:
                    info = {
                        'loss': loss_temp,
                        'loss_rpn_cls': loss_rpn_cls,
                        'loss_rpn_box': loss_rpn_box,
                        'loss_rcnn_cls': loss_rcnn_cls,
                        'loss_rcnn_box': loss_rcnn_box
                    }
                    for tag, value in info.items():
                        logger.scalar_summary(tag, value, step)

                    images = []
                    for k in range(args.batch_size):
                        image = draw_bounding_boxes(
                            im_data[k].data.cpu().numpy(),
                            gt_boxes[k].data.cpu().numpy(),
                            im_info[k].data.cpu().numpy(),
                            num_boxes[k].data.cpu().numpy())
                        images.append(image)
                    logger.image_summary("Train epoch %2d, iter %4d/%4d" % (epoch, step, iters_per_epoch), \
                                          images, step)
                loss_temp = 0
                start = time.time()
                if False:
                    break

        if args.mGPUs:
            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))
            save_checkpoint(
                {
                    'session': args.session,
                    'epoch': epoch + 1,
                    'model': fasterRCNN.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': args.class_agnostic,
                }, save_name)
        else:
            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))
            save_checkpoint(
                {
                    'session': args.session,
                    'epoch': epoch + 1,
                    'model': fasterRCNN.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': args.class_agnostic,
                }, save_name)
        print('save model: {}'.format(save_name))

        epoch_end = time.time()
        print('Epoch time cost: {}'.format(epoch_end - epoch_start))

    print('finished!')
Esempio n. 16
0
        raise Exception('There is no input directory for loading network from ' + input_dir)
    load_name = os.path.join(input_dir,
                             'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint))

    pascal_classes = np.asarray(['__background__',
                                 'aeroplane', 'bicycle', 'bird', 'boat',
                                 'bottle', 'bus', 'car', 'cat', 'chair',
                                 'cow', 'diningtable', 'dog', 'horse',
                                 'motorbike', 'person', 'pottedplant',
                                 'sheep', 'sofa', 'train', 'tvmonitor'])

    # initilize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    if args.cuda > 0:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage))
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(pascal_classes,
                           pretrained=False,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(pascal_classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res18':
        fasterRCNN = resnet(pascal_classes,
                            18,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res34':
        fasterRCNN = resnet(pascal_classes,
                            34,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(pascal_classes,
                            50,
                            pretrained=False,
Esempio n. 18
0
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    # initilize the network here.
    if args.net == 'vgg16':
        model = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        model = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        model = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        model = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    model.create_architecture()

    lr = cfg.TRAIN.LEARNING_RATE
    lr = args.lr
    #tr_momentum = cfg.TRAIN.MOMENTUM
    #tr_momentum = args.momentum
Esempio n. 19
0
def training_fusion():
    # initilize the tensor holder here.
    im_data1 = torch.FloatTensor(1)
    im_data2 = torch.FloatTensor(1)

    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data1 = im_data1.cuda()
        im_data2 = im_data2.cuda()

        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data1 = Variable(im_data1)
    im_data2 = Variable(im_data2)

    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    # initilize the network here.
    if args.net == 'vgg16f':
        fasterRCNN = vgg16f(imdb.classes,
                            pretrained=True,
                            class_agnostic=args.class_agnostic,
                            fusion_mode=args.fusion_mode)

    elif args.net == 'vgg16c':
        fasterRCNN = vgg16c(imdb.classes,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)

    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes,
                            101,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes,
                            50,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes,
                            152,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    lr = cfg.TRAIN.LEARNING_RATE
    lr = args.lr
    # tr_momentum = cfg.TRAIN.MOMENTUM
    # tr_momentum = args.momentum

    params = []
    for key, value in dict(fasterRCNN.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                }]

    if args.cuda:
        fasterRCNN.cuda()

    if args.optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)

    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)

    if args.resume:
        load_name = os.path.join(
            output_dir,
            'faster_rcnn_{}_{}_{}.pth'.format(args.checksession,
                                              args.checkepoch,
                                              args.checkpoint))
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        args.session = checkpoint['session']
        args.start_epoch = checkpoint['epoch']
        fasterRCNN.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']
        print("loaded checkpoint %s" % (load_name))

    if args.mGPUs:
        fasterRCNN = nn.DataParallel(fasterRCNN)

    iters_per_epoch = int(train_size / args.batch_size)

    # tfboard
    if args.use_tfboard:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter("logs")
        #TODO
        #logger.add_graph(fasterRCNN, (im_data1, im_data2, im_info, gt_boxes, num_boxes))
        #TODO
    if args.net == 'vgg16f':
        for epoch in range(args.start_epoch, args.max_epochs + 1):
            # setting to train mode
            fasterRCNN.train()
            loss_temp = 0
            start = time.time()

            if epoch % (args.lr_decay_step + 1) == 0:
                adjust_learning_rate(optimizer, args.lr_decay_gamma)
                lr *= args.lr_decay_gamma

            data_iter = iter(dataloader)
            for step in range(iters_per_epoch):
                data = next(data_iter)

                im_data1.resize_(data[0].size()).copy_(data[0])
                im_data2.resize_(data[1].size()).copy_(data[1])
                im_info.resize_(data[2].size()).copy_(data[2])
                gt_boxes.resize_(data[3].size()).copy_(data[3])
                num_boxes.resize_(data[4].size()).copy_(data[4])

                fasterRCNN.zero_grad()
                rois, cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label = fasterRCNN(im_data1, im_data2, im_info, gt_boxes, num_boxes)

                loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \
                       + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
                loss_temp += loss.item()

                # backward
                optimizer.zero_grad()
                loss.backward()
                if args.net == "vgg16":
                    clip_gradient(fasterRCNN, 10.)
                optimizer.step()

                if step % args.disp_interval == 0:
                    end = time.time()
                    if step > 0:
                        loss_temp /= (args.disp_interval + 1)

                    if args.mGPUs:
                        loss_rpn_cls = rpn_loss_cls.mean().item()
                        loss_rpn_box = rpn_loss_box.mean().item()
                        loss_rcnn_cls = RCNN_loss_cls.mean().item()
                        loss_rcnn_box = RCNN_loss_bbox.mean().item()
                        fg_cnt = torch.sum(rois_label.data.ne(0))
                        bg_cnt = rois_label.data.numel() - fg_cnt
                    else:
                        loss_rpn_cls = rpn_loss_cls.item()
                        loss_rpn_box = rpn_loss_box.item()
                        loss_rcnn_cls = RCNN_loss_cls.item()
                        loss_rcnn_box = RCNN_loss_bbox.item()
                        fg_cnt = torch.sum(rois_label.data.ne(0))
                        bg_cnt = rois_label.data.numel() - fg_cnt

                    Log.info("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                          % (args.session, epoch, step, iters_per_epoch, loss_temp, lr))
                    Log.info("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                             (fg_cnt, bg_cnt, end - start))
                    Log.info("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                          % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                    if args.use_tfboard:
                        info = {
                            'loss': loss_temp,
                            'loss_rpn_cls': loss_rpn_cls,
                            'loss_rpn_box': loss_rpn_box,
                            'loss_rcnn_cls': loss_rcnn_cls,
                            'loss_rcnn_box': loss_rcnn_box
                        }
                        logger.add_scalars(
                            "logs_s_{}/losses".format(args.session), info,
                            (epoch - 1) * iters_per_epoch + step)

                    loss_temp = 0
                    start = time.time()

            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))

            save_checkpoint(
                {
                    'session':
                    args.session,
                    'epoch':
                    epoch + 1,
                    'model':
                    fasterRCNN.module.state_dict()
                    if args.mGPUs else fasterRCNN.state_dict(),
                    'optimizer':
                    optimizer.state_dict(),
                    'pooling_mode':
                    cfg.POOLING_MODE,
                    'class_agnostic':
                    args.class_agnostic,
                }, save_name)
            print('save model: {}'.format(save_name))
    elif args.net == 'vgg16c':
        for epoch in range(args.start_epoch, args.max_epochs + 1):
            # setting to train mode
            fasterRCNN.train()
            loss_temp = 0
            start = time.time()

            if epoch % (args.lr_decay_step + 1) == 0:
                adjust_learning_rate(optimizer, args.lr_decay_gamma)
                lr *= args.lr_decay_gamma

            data_iter = iter(dataloader)
            for step in range(iters_per_epoch):
                data = next(data_iter)

                im_data1.resize_(data[0].size()).copy_(data[0])
                im_data2.resize_(data[1].size()).copy_(data[1])
                im_info.resize_(data[2].size()).copy_(data[2])
                gt_boxes.resize_(data[3].size()).copy_(data[3])
                num_boxes.resize_(data[4].size()).copy_(data[4])

                fasterRCNN.zero_grad()
                rois, cls_prob, bbox_pred, \
                rpn_loss_cls1, rpn_loss_box1, \
                rpn_loss_cls2, rpn_loss_box2, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label = fasterRCNN(im_data1, im_data2, im_info, gt_boxes, num_boxes)

                loss = rpn_loss_cls1.mean() + rpn_loss_box1.mean() \
                        + rpn_loss_cls2.mean() + rpn_loss_box2.mean() \
                        + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
                loss_temp += loss.item()

                # backward
                optimizer.zero_grad()
                loss.backward()
                if args.net == "vgg16":
                    clip_gradient(fasterRCNN, 10.)
                optimizer.step()

                if step % args.disp_interval == 0:
                    end = time.time()
                    if step > 0:
                        loss_temp /= (args.disp_interval + 1)

                    if args.mGPUs:
                        loss_rpn_cls1 = rpn_loss_cls1.mean().item()
                        loss_rpn_cls2 = rpn_loss_cls2.mean().item()
                        loss_rpn_box1 = rpn_loss_box1.mean().item()
                        loss_rpn_box2 = rpn_loss_box2.mean().item()
                        loss_rcnn_cls = RCNN_loss_cls.mean().item()
                        loss_rcnn_box = RCNN_loss_bbox.mean().item()
                        fg_cnt = torch.sum(rois_label.data.ne(0))
                        bg_cnt = rois_label.data.numel() - fg_cnt
                    else:
                        loss_rpn_cls1 = rpn_loss_cls1.item()
                        loss_rpn_cls2 = rpn_loss_cls2.item()
                        loss_rpn_box1 = rpn_loss_box1.item()
                        loss_rpn_box2 = rpn_loss_box2.item()
                        loss_rcnn_cls = RCNN_loss_cls.item()
                        loss_rcnn_box = RCNN_loss_bbox.item()
                        fg_cnt = torch.sum(rois_label.data.ne(0))
                        bg_cnt = rois_label.data.numel() - fg_cnt

                    Log.info("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                             % (args.session, epoch, step, iters_per_epoch, loss_temp, lr))
                    Log.info("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                             (fg_cnt, bg_cnt, end - start))
                    Log.info("\t\t\trpn_cls1: %.4f,rpn_cls2: %.4f, rpn_box1: %.4f, rpn_box2: %.4f,rcnn_cls: %.4f, rcnn_box %.4f" \
                             % (loss_rpn_cls1, loss_rpn_cls2,loss_rpn_box1,loss_rpn_box2, loss_rcnn_cls, loss_rcnn_box))
                    if args.use_tfboard:
                        info = {
                            'loss': loss_temp,
                            'loss_rpn_cls1': loss_rpn_cls1,
                            'loss_rpn_cls2': loss_rpn_cls2,
                            'loss_rpn_box1': loss_rpn_box1,
                            'loss_rpn_box2': loss_rpn_box2,
                            'loss_rcnn_cls': loss_rcnn_cls,
                            'loss_rcnn_box': loss_rcnn_box
                        }
                        logger.add_scalars(
                            "logs_s_{}/losses".format(args.session), info,
                            (epoch - 1) * iters_per_epoch + step)

                    loss_temp = 0
                    start = time.time()

            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))

            save_checkpoint(
                {
                    'session':
                    args.session,
                    'epoch':
                    epoch + 1,
                    'model':
                    fasterRCNN.module.state_dict()
                    if args.mGPUs else fasterRCNN.state_dict(),
                    'optimizer':
                    optimizer.state_dict(),
                    'pooling_mode':
                    cfg.POOLING_MODE,
                    'class_agnostic':
                    args.class_agnostic,
                }, save_name)
            print('save model: {}'.format(save_name))
    #logger.add_graph(fasterRCNN, (im_data1, im_data2, im_info, gt_boxes, num_boxes))

    if args.use_tfboard:
        logger.close()