def read_cfgs():
    args = parse_args()
    print('Called with args:')
    print(args)
    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = ['MAX_NUM_GT_BOXES', '20']
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = ['MAX_NUM_GT_BOXES', '20']
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train+coco_2014_valminusminival"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = ['MAX_NUM_GT_BOXES', '50']
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = ['MAX_NUM_GT_BOXES', '30']
    elif args.dataset == "vg":
        # train sizes: train, smalltrain, minitrain
        # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = ['MAX_NUM_GT_BOXES', '50']
    elif args.dataset == 'vmrdcompv1':
        args.imdb_name = "vmrd_compv1_trainval"
        args.imdbval_name = "vmrd_compv1_test"
        args.set_cfgs = ['MAX_NUM_GT_BOXES', '20']
    elif args.dataset == "vg_vmrd":
        args.imdb_name = "vmrd_compv1_trainval+vg_150-50-50_minitrain"
        args.imdbval_name = "vmrd_compv1_test"
        args.set_cfgs = ['MAX_NUM_GT_BOXES', '50']
    elif args.dataset == 'bdds':
        args.imdb_name = "bdds_trainval"
        args.imdbval_name = "bdds_test"
        args.set_cfgs = ['MAX_NUM_GT_BOXES', '20']
    elif args.dataset[:7] == 'cornell':
        cornell = args.dataset.split('_')
        args.imdb_name = 'cornell_{}_{}_trainval_{}'.format(cornell[1],cornell[2],cornell[3])
        args.imdbval_name = 'cornell_{}_{}_test_{}'.format(cornell[1],cornell[2],cornell[3])
        args.set_cfgs = ['MAX_NUM_GT_BOXES', '50']
    elif args.dataset[:8] == 'jacquard':
        jacquard = args.dataset.split('_')
        args.imdb_name = 'jacquard_{}_trainval_{}'.format(jacquard[1], jacquard[2])
        args.imdbval_name = 'jacquard_{}_test_{}'.format(jacquard[1], jacquard[2])
        args.set_cfgs = ['MAX_NUM_GT_GRASPS', '1000']
    if args.dataset[:7] == 'cornell':
        args.cfg_file = "cfgs/cornell_{}_{}_ls.yml".format(args.frame, args.net) if args.large_scale \
        else "cfgs/cornell_{}_{}.yml".format(args.frame, args.net)
    elif args.dataset[:8] == 'jacquard':
        args.cfg_file = "cfgs/jacquard_{}_{}_ls.yml".format(args.frame, args.net) if args.large_scale \
        else "cfgs/jacquard_{}_{}.yml".format(args.frame, args.net)
    else:
        args.cfg_file = "cfgs/{}_{}_{}_ls.yml".format(args.dataset, args.frame, args.net) if args.large_scale \
        else "cfgs/{}_{}_{}.yml".format(args.dataset, args.frame, args.net)
    print("Using cfg file: " + args.cfg_file)
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)
    if not args.disp_interval:
        args.disp_interval = cfg.TRAIN.COMMON.DISPLAY
    if not args.batch_size:
        args.batch_size = cfg.TRAIN.COMMON.IMS_PER_BATCH
    if not args.lr_decay_step:
        args.lr_decay_step = cfg.TRAIN.COMMON.LR_DECAY_STEPSIZE[0]
    if not args.lr:
        args.lr = cfg.TRAIN.COMMON.LEARNING_RATE
    if not args.lr_decay_gamma:
        args.lr_decay_gamma = cfg.TRAIN.COMMON.GAMMA
    if not args.max_epochs:
        args.max_epochs = cfg.TRAIN.COMMON.MAX_EPOCH
    print('Using config:')
    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.COMMON.USE_FLIPPED = True
    cfg.USE_GPU_NMS = args.cuda
    pprint.pprint(cfg)
    if args.cuda:
        cfg.CUDA = True

    return args
예제 #2
0
def main(scene_img_path, query_img_path):
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "coco":
        args.imdb_name = "coco_2017_train"
        args.imdbval_name = "coco_2017_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    # args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)
    args.cfg_file = "cfgs/{}_{}.yml".format(
        args.net, args.group) if args.group != 0 else "cfgs/{}.yml".format(
            args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    cfg.TRAIN.USE_FLIPPED = False
    # imdb_vs, roidb_vs, ratio_list_vs, ratio_index_vs, query_vs = combined_roidb('coco_2014_valminusminival', False)
    imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb(
        args.imdbval_name, False, seen=args.seen)
    # imdb_vs.competition_mode(on=True)
    imdb_vu.competition_mode(on=True)

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    if args.net == 'res50':
        fasterRCNN = resnet(imdb_vu.classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    query = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    catgory = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        query = query.cuda()
        im_info = im_info.cuda()
        catgory = catgory.cuda()
        gt_boxes = gt_boxes.cuda()

    if args.cuda:
        cfg.CUDA = True
        fasterRCNN.cuda()

    max_per_image = 100

    thresh = 0.05

    # output_dir_vs = get_output_dir(imdb_vs, 'faster_rcnn_seen')
    output_dir_vu = get_output_dir(imdb_vu, 'faster_rcnn_unseen')
    all_weight = np.zeros((len(ratio_index_vu[0]), 1024))
    all_times = np.zeros((imdb_vu.num_classes))

    dataset_vu = roibatchLoader(roidb_vu,
                                ratio_list_vu,
                                ratio_index_vu,
                                query_vu,
                                1,
                                imdb_vu.num_classes,
                                training=False,
                                seen=args.seen)
    fasterRCNN.eval()

    avg = 0
    dataset_vu.query_position = avg

    num_images_vu = len(imdb_vu.image_index)

    all_boxes = [[[] for _ in xrange(num_images_vu)]
                 for _ in xrange(imdb_vu.num_classes)]

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir_vu,
                            'detections_%d_%d.pkl' % (args.seen, avg))
    print(det_file)

    i = 0
    index = 0

    data = [0, 0, 0, 0, 0]
    # version = 'custom'      # coco is completed
    # if version == 'coco':
    #     im = imread('/home/yjyoo/PycharmProjects/data/coco/images/val2017/000000397133.jpg')
    #     query_im = imread('/home/yjyoo/PycharmProjects/data/coco/images/val2017/000000007816.jpg')
    #     query_im = crop(query_im, [505.54, 53.01, 543.08, 164.09], size=128)
    # else:
    im = imread(scene_img_path)
    im = cv2.resize(im, dsize=(640, 480), interpolation=cv2.INTER_LINEAR)
    query_im = imread(query_img_path)
    query_im = cv2.resize(query_im,
                          dsize=(640, 480),
                          interpolation=cv2.INTER_LINEAR)
    _im = np.copy(im)
    _query_im = np.copy(query_im)
    # make im_data

    im, im_scale = prep_im_for_blob(im, target_size=600)
    im = torch.tensor(im)
    im = torch.unsqueeze(im, 0)
    im = im.transpose(1, 3)
    im_data = im.transpose(2, 3)

    # make query data

    query_im, query_im_scale = prep_im_for_blob(query_im, target_size=128)
    query_im = torch.tensor(query_im)
    query_im = torch.unsqueeze(query_im, 0)
    query_im = query_im.transpose(1, 3)
    query = query_im.transpose(2, 3)

    im_data = data[0] = im_data.cuda()
    query = data[1] = query.cuda()
    im_info = data[2] = torch.tensor([[600, 899, 1.4052]])
    gt_boxes = data[3] = torch.rand(1, 4, 5)  # don't care
    catgory = data[4] = torch.tensor([1])

    det_tic = time.time()
    rois, cls_prob, bbox_pred, \
    rpn_loss_cls, rpn_loss_box, \
    RCNN_loss_cls, _, RCNN_loss_bbox, \
    rois_label, weight = fasterRCNN(im_data, query, im_info, gt_boxes, catgory)

    # all_weight[data[4],:] = all_weight[data[4],:] + weight.view(-1).detach().cpu().numpy()
    all_weight[i, :] = weight.view(-1).detach().cpu().numpy()
    all_times[data[4]] = all_times[data[4]] + 1

    scores = cls_prob.data
    boxes = rois.data[:, :, 1:5]

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
            box_deltas = box_deltas.view(1, -1, 4)

        pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    pred_boxes /= data[2][0][2].item()

    scores = scores.squeeze()
    pred_boxes = pred_boxes.squeeze()
    det_toc = time.time()
    detect_time = det_toc - det_tic
    misc_tic = time.time()

    im2show = np.copy(_im)

    inds = torch.nonzero(scores > thresh).view(-1)
    # if there is det
    if inds.numel() > 0:
        cls_scores = scores[inds]
        _, order = torch.sort(cls_scores, 0, True)
        cls_boxes = pred_boxes[inds, :]

        cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
        # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
        cls_dets = cls_dets[order]
        keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
        cls_dets = cls_dets[keep.view(-1).long()]
        all_boxes[data[4]][index] = cls_dets.cpu().numpy()

        im2show = vis_detections(im2show, 'shot', cls_dets.cpu().numpy(), 0.8)
        _im2show = np.concatenate((im2show, _query_im), axis=1)
        plt.imshow(_im2show)
        plt.show()

    # Limit to max_per_image detections *over all classes*
    if max_per_image > 0:
        try:
            image_scores = all_boxes[data[4]][index][:, -1]
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]

                keep = np.where(
                    all_boxes[data[4]][index][:, -1] >= image_thresh)[0]
                all_boxes[data[4]][index] = all_boxes[data[4]][index][keep, :]
        except:
            pass

    misc_toc = time.time()

    o_query = data[1][0].permute(1, 2, 0).contiguous().cpu().numpy()
    o_query *= [0.229, 0.224, 0.225]
    o_query += [0.485, 0.456, 0.406]
    o_query *= 255
    o_query = o_query[:, :, ::-1]

    (h, w, c) = im2show.shape
    o_query = cv2.resize(o_query, (h, h), interpolation=cv2.INTER_LINEAR)
    o_query = cv2.cvtColor(o_query, cv2.COLOR_BGR2RGB)

    im2show = np.concatenate((im2show, o_query), axis=1)
    im2show = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB)

    cv2.imwrite('./test_img/%d.png' % (i), im2show)
예제 #3
0
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target,
                   val_datasets, device, net, optimizer, num_workers, lr,
                   batch_size, start_epoch, max_epochs, lr_decay_gamma,
                   lr_decay_step, resume, load_name, pretrained, eta, gamma,
                   ef, class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run):

    args = Args(dataset=dataset_source,
                dataset_t=dataset_target,
                cfg_file=cfg_file,
                net=net)
    args = set_dataset_args(args)

    args_val = Args(dataset=dataset_source,
                    dataset_t=val_datasets,
                    imdb_name_target=[],
                    cfg_file=cfg_file,
                    net=net)
    args_val = set_dataset_args(args_val, test=True)

    is_bgr = False
    if net in ['res101', 'res50', 'res152', 'vgg16']:
        is_bgr = True

    logger = LoggerForSacred(None, ex, True)

    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = True if device == 'cuda' else False
    device = torch.device(device)

    output_dir = output_dir + "_{}".format(_run._id)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    dataloader_s, dataloader_t, imdb, imdb_t = init_dataloaders_1s_1t(
        args, batch_size, num_workers, is_bgr)
    val_dataloader_ts, val_imdb_ts = init_val_dataloaders_mt(
        args_val, 1, num_workers, is_bgr)

    session = 1
    fasterRCNN = init_htcn_model(LA_ATT,
                                 MID_ATT,
                                 class_agnostic,
                                 device,
                                 gc,
                                 imdb,
                                 lc,
                                 load_name,
                                 net,
                                 pretrained=pretrained)
    lr, optimizer, session, start_epoch = init_optimizer(lr,
                                                         fasterRCNN,
                                                         optimizer,
                                                         resume,
                                                         load_name,
                                                         session,
                                                         start_epoch,
                                                         is_all_params=True)

    if torch.cuda.device_count() > 1:
        fasterRCNN = nn.DataParallel(fasterRCNN)

    iters_per_epoch = int(10000 / batch_size)

    if ef:
        FL = EFocalLoss(class_num=2, gamma=gamma)
    else:
        FL = FocalLoss(class_num=2, gamma=gamma)

    total_step = 0

    for epoch in range(start_epoch, max_epochs + 1):
        # setting to train mode
        fasterRCNN.train()

        if epoch - 1 in lr_decay_step:
            adjust_learning_rate(optimizer, lr_decay_gamma)
            lr *= lr_decay_gamma

        total_step = frcnn_utils.train_htcn_one_epoch(
            args, FL, total_step, dataloader_s, dataloader_t, iters_per_epoch,
            fasterRCNN, optimizer, device, eta, logger)
        save_name = os.path.join(
            output_dir,
            'target_{}_eta_{}_local_{}_global_{}_gamma_{}_session_{}_epoch_{}_total_step_{}.pth'
            .format(args.dataset_t, args.eta, lc, gc, gamma, session, epoch,
                    total_step))
        save_checkpoint(
            {
                'session':
                session,
                'epoch':
                epoch + 1,
                'model':
                fasterRCNN.module.state_dict()
                if torch.cuda.device_count() > 1 else fasterRCNN.state_dict(),
                'optimizer':
                optimizer.state_dict(),
                'pooling_mode':
                cfg.POOLING_MODE,
                'class_agnostic':
                class_agnostic,
            }, save_name)
    return 0
예제 #4
0
def prep_model(input_dir):

    args = parse_args()

    print('Called with args:')
    print(args)
    args.set_cfgs = [
        'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
    ]

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.USE_GPU_NMS = 1

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(input_dir)

    #   pascal_classes = np.asarray(['__background__',
    #                        'aeroplane', 'bicycle', 'bird', 'boat',
    #                        'bottle', 'bus', 'car', 'cat', 'chair',
    #                        'cow', 'diningtable', 'dog', 'horse',
    #                        'motorbike', 'person', 'pottedplant',
    #                        'sheep', 'sofa', 'train', 'tvmonitor'])
    pascal_classes = np.asarray([
        '__background__', "person", "bicycle", "car", "motorbike", "aeroplane",
        "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
        "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
        "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
        "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
        "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
        "skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
        "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
        "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
        "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet",
        "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
        "microwave", "oven", "toaster", "sink", "refrigerator", "book",
        "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
    ])

    # initilize the network here.
    fasterRCNN = resnet(pascal_classes,
                        101,
                        pretrained=False,
                        class_agnostic=args.class_agnostic)

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    #   fasterRCNN.load_state_dict(checkpoint)
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']
    print('load model successfully!')
    return fasterRCNN
예제 #5
0
    def __init__(self, baseFolder='models', filename='faster_rcnn_1_10_9999_mosaicCL3to5_CBAM_Gblur_class23_wOrgCW.pth',
                 threshold=0.9, att_type='CBAM'): # att_type=None
        super(DetectorAIR23, self).__init__()

        self.cfg = __import__('model').utils.config.cfg

        def parse_args():
            """
            Parse input arguments
            """
            parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
            parser.add_argument('--cfg', dest='cfg_file',
                                help='optional config file',
                                default='cfgs/vgg16.yml', type=str)
            parser.add_argument('--net', dest='net',
                                help='vgg16, res50, res101, res152',
                                default='res101', type=str)
            parser.add_argument('--set', dest='set_cfgs',
                                help='set config keys', default=None,
                                nargs=argparse.REMAINDER)
            parser.add_argument('--cuda', dest='cuda',
                                help='whether use CUDA',
                                action='store_true')
            parser.add_argument('--mGPUs', dest='mGPUs',
                                help='whether use multiple GPUs',
                                action='store_true')
            parser.add_argument('--cag', dest='class_agnostic',
                                help='whether perform class_agnostic bbox regression',
                                action='store_true')
            parser.add_argument('--parallel_type', dest='parallel_type',
                                help='which part of model to parallel, 0: all, 1: model before roi pooling',
                                default=0, type=int)
            parser.add_argument('--ls', dest='large_scale',
                                help='whether use large imag scale',
                                action='store_true')
            parser.add_argument('--use_FPN', dest='use_FPN', action='store_true')

            return parser

        cmd_args = [
            '--net', 'res101',
            '--ls',
            '--cuda',
        ]

        if 'FPN' in filename:
            cmd_args.append('--use_FPN')

        load_name = os.path.join(baseFolder, filename) # w/o bottle class

        self.thresh = threshold

        parser = parse_args()
        self.args = parser.parse_args(cmd_args)

        self.args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']
        self.args.cfg_file = "{}/cfgs/{}_ls.yml".format(baseFolder, self.args.net) if self.args.large_scale else "{}/cfgs/{}.yml".format(baseFolder, self.args.net)

        print('Called with args:')
        print(self.args)

        if self.args.cfg_file is not None:
            # check cfg file and copy
            cfg_from_file(self.args.cfg_file)
        if self.args.set_cfgs is not None:
            cfg_from_list(self.args.set_cfgs)

        self.cfg.USE_GPU_NMS = self.args.cuda

        print('Using config:')
        pprint.pprint(self.cfg)
        np.random.seed(self.cfg.RNG_SEED)

        # train set
        # -- Note: Use validation set and disable the flipped to enable faster loading.
        #
        # input_dir = self.args.load_dir + "/" + self.args.net + "/" + self.args.dataset
        # if not os.path.exists(input_dir):
        #     raise Exception('There is no input directory for loading network from ' + input_dir)
        # load_name = os.path.join(input_dir,
        #                          'faster_rcnn_{}_{}_{}.pth'.format(self.args.checksession, self.args.checkepoch, self.args.checkpoint))

        self.classes = np.asarray([
            '__background__',  # always index 0
             'cane_stick',
             'mobile_phone',
             'pack',
             'cup',
             'glasses',
             'hat',
             'key',
             'medicine_case',
             'medicine_packet',
             'newspaper',
             'remote',
             'sock',
             'towel',
             'wallet',
             'pen',
             'sink',
             'table',
             'bed',
             'sofa_bed',
             'refrigerator',
             'television',
             'toilet',
             'mechanical_fan',
        ])

        # self.display_classes = self.classes
        self.display_classes = {
            'cup': '컵',
            'pen': '펜',
            'hat': '모자',
            'mobile_phone': '핸드폰',
            'sock': '양말',
            'glasses': '안경',
            'towel': '수건',
            'newspaper': '신문',
            'remote': '리모컨',
            'key': '열쇠',
            'wallet': '지갑',
            'pack': '담배갑',
            'medicine_case': '약통',
            'medicine_packet': '약봉지',
            'sink': '싱크대',
            'table': '테이블',
            'bed': '침대',
            'sofa_bed': '소파',
            'refrigerator': '냉장고',
            'television': '티비',
            'toilet': '화장실',
            'mechanical_fan': '선풍기',
        }

        # initilize the network here.
        if self.args.net == 'vgg16':
            from model.faster_rcnn.vgg16 import vgg16
            self.fasterRCNN = vgg16(self.classes, pretrained=False, class_agnostic=self.args.class_agnostic)
        elif 'res' in self.args.net:
            # from model.faster_rcnn.resnet import resnet
            if self.args.use_FPN:
                from model.fpn.resnet_AIRvar_CBAM import resnet
            else:
                from model.faster_rcnn.resnet_AIRvar_CBAM import resnet
            if self.args.net == 'res101':
                self.fasterRCNN = resnet(self.classes, 101, pretrained=False, class_agnostic=self.args.class_agnostic, att_type=att_type)
            elif self.args.net == 'res50':
                self.fasterRCNN = resnet(self.classes, 50, pretrained=False, class_agnostic=self.args.class_agnostic, att_type=att_type)
            elif self.args.net == 'res152':
                self.fasterRCNN = resnet(self.classes, 152, pretrained=False, class_agnostic=self.args.class_agnostic, att_type=att_type)
        else:
            print("network is not defined")
            pdb.set_trace()

        self.fasterRCNN.create_architecture()

        print("load checkpoint %s" % (load_name))
        if self.args.cuda > 0:
            checkpoint = torch.load(load_name)
        else:
            checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage))
        self.fasterRCNN.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            self.cfg.POOLING_MODE = checkpoint['pooling_mode']
        print('load model successfully!')

        # initilize the tensor holder here.
        self.im_data = torch.FloatTensor(1)
        self.im_info = torch.FloatTensor(1)
        self.num_boxes = torch.LongTensor(1)
        self.gt_boxes = torch.FloatTensor(1)

        # ship to cuda
        if self.args.cuda > 0:
            self.im_data = self.im_data.cuda()
            self.im_info = self.im_info.cuda()
            self.num_boxes = self.num_boxes.cuda()
            self.gt_boxes = self.gt_boxes.cuda()

        # make variable
        with torch.no_grad():
            self.im_data = Variable(self.im_data)
            self.im_info = Variable(self.im_info)
            self.num_boxes = Variable(self.num_boxes)
            self.gt_boxes = Variable(self.gt_boxes)

        if self.args.cuda > 0:
            self.cfg.CUDA = True

        if self.args.cuda > 0:
            self.fasterRCNN.cuda()

        self.fasterRCNN.eval()

        self.max_per_image = 100
예제 #6
0
    elif args.imdb_name == "val2014":
        imdb_name = args.dataset + "_2014_val"
    elif args.imdb_name == "test2014":
        imdb_name = args.dataset + "_2014_test"
    elif args.imdb_name == "test2015":
        imdb_name = args.dataset + "_2015_test"

    cfg.TRAIN.OBJECT_CLASSES = imdb_vg._classes
    cfg.TRAIN.ATTRIBUTE_CLASSES = imdb_vg._attributes
    cfg.TRAIN.RELATION_CLASSES = imdb_vg._relations

    cfg_file = "cfgs/{}.yml".format(args.net)
    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if set_cfgs is not None:
        cfg_from_list(set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = False

    imdb, roidb, ratio_list, ratio_index = combined_roidb(imdb_name)
    train_size = len(roidb)
    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))
예제 #7
0
        raise Exception("dataset is not defined")

    if args.more_scale:
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    else:
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    args.cfg_file = "cfgs/res50.yml"
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    # prepare roidb
    cfg.TRAIN.USE_FLIPPED = False
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name, False)
    cwd = os.getcwd()
    if args.not_pure:
        support_dir = os.path.join(cwd, 'data/supports',
                                   args.dataset + '_random')
    else:
        support_dir = os.path.join(cwd, 'data/supports', args.dataset)

    # load dir
    input_dir = os.path.join(args.load_dir, "train/checkpoints")
    if not os.path.exists(input_dir):
예제 #8
0
def get_ready(query_img_path):
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "coco":
        args.imdb_name = "coco_2017_train"
        args.imdbval_name = "coco_2017_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    # args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)
    args.cfg_file = "cfgs/{}_{}.yml".format(
        args.net, args.group) if args.group != 0 else "cfgs/{}.yml".format(
            args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    cfg.TRAIN.USE_FLIPPED = False
    # imdb_vs, roidb_vs, ratio_list_vs, ratio_index_vs, query_vs = combined_roidb('coco_2014_valminusminival', False)
    imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb(
        args.imdbval_name, False, seen=args.seen)
    # imdb_vs.competition_mode(on=True)
    imdb_vu.competition_mode(on=True)

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    if args.net == 'res50':
        fasterRCNN = resnet(imdb_vu.classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    query = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    catgory = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    if args.cuda:
        cfg.CUDA = True
        fasterRCNN.cuda()

    output_dir_vu = get_output_dir(imdb_vu, 'faster_rcnn_unseen')

    dataset_vu = roibatchLoader(roidb_vu,
                                ratio_list_vu,
                                ratio_index_vu,
                                query_vu,
                                1,
                                imdb_vu.num_classes,
                                training=False,
                                seen=args.seen)
    fasterRCNN.eval()

    avg = 0
    dataset_vu.query_position = avg

    num_images_vu = len(imdb_vu.image_index)

    all_boxes = [[[] for _ in xrange(num_images_vu)]
                 for _ in xrange(imdb_vu.num_classes)]

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir_vu,
                            'detections_%d_%d.pkl' % (args.seen, avg))
    print(det_file)

    # make query data
    query_im = imread(query_img_path)
    query_im = cv2.resize(query_im,
                          dsize=(640, 480),
                          interpolation=cv2.INTER_LINEAR)
    _query_im = np.copy(query_im)
    query_im, query_im_scale = prep_im_for_blob(query_im, target_size=128)
    query_im = torch.tensor(query_im)
    query_im = torch.unsqueeze(query_im, 0)
    query_im = query_im.transpose(1, 3)
    query = query_im.transpose(2, 3)
    query = query.cuda()

    return fasterRCNN, all_boxes, query, _query_im
예제 #9
0
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target,
                   val_datasets, device, net, optimizer, num_workers,
                   teacher_pth, student_pth, lr, batch_size, start_epoch,
                   max_epochs, lr_decay_gamma, lr_decay_step, resume,
                   load_name, imitation_loss_weight, eta, gamma, ef,
                   class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run):

    args_val = Args(dataset=dataset_source,
                    dataset_t=val_datasets,
                    imdb_name_target=[],
                    cfg_file=cfg_file,
                    net=net)
    args_val = set_dataset_args(args_val, test=True)

    logger = LoggerForSacred(None, ex, False)

    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if args_val.set_cfgs is not None:
        cfg_from_list(args_val.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = True if device == 'cuda' else False
    device = torch.device(device)

    val_dataloader_ts, val_imdb_ts = init_frcnn_utils.init_val_dataloaders_mt(
        args_val, 1, num_workers)

    session = 1
    teacher = init_frcnn_utils.init_model_only(device,
                                               "res101",
                                               htcn_resnet,
                                               val_imdb_ts[0],
                                               teacher_pth,
                                               class_agnostic=class_agnostic,
                                               lc=lc,
                                               gc=gc,
                                               la_attention=LA_ATT,
                                               mid_attention=MID_ATT)
    fasterRCNN = init_frcnn_utils.init_model_only(
        device,
        "res50",
        htcn_resnet,
        val_imdb_ts[0],
        student_pth,
        class_agnostic=class_agnostic,
        lc=lc,
        gc=gc,
        la_attention=LA_ATT,
        mid_attention=MID_ATT)
    fasterRCNN_2 = init_frcnn_utils.init_model_only(
        device,
        "res50",
        htcn_resnet,
        val_imdb_ts[0],
        student_pth,
        class_agnostic=class_agnostic,
        lc=lc,
        gc=gc,
        la_attention=LA_ATT,
        mid_attention=MID_ATT)

    fasterRCNN.RCNN_rpn = teacher.RCNN_rpn

    if torch.cuda.device_count() > 1:
        fasterRCNN = nn.DataParallel(fasterRCNN)

    total_step = 0
    best_ap = 0.
    if isinstance(val_datasets, list):
        avg_ap = 0
        for i, val_dataloader_t in enumerate(val_dataloader_ts):
            map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t,
                                                  fasterRCNN, device,
                                                  val_imdb_ts[i])
            logger.log_scalar(
                "student with teacher rpn map on {}".format(val_datasets[i]),
                map, 0)
            map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t,
                                                  teacher, device,
                                                  val_imdb_ts[i])
            logger.log_scalar(
                "teacher original map on {}".format(val_datasets[i]), map, 0)
            teacher.RCNN_rpn = fasterRCNN_2.RCNN_rpn
            map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t,
                                                  teacher, device,
                                                  val_imdb_ts[i])
            logger.log_scalar(
                "teacher with stu rpn map on {}".format(val_datasets[i]), map,
                0)
예제 #10
0
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, device, net,
                   optimizer, num_workers, lr, batch_size, start_epoch,
                   max_epochs, lr_decay_gamma, lr_decay_step, resume,
                   load_name, pretrained, eta, gamma, ef, class_agnostic, lc,
                   gc, LA_ATT, MID_ATT, debug, _run):

    args = Args(dataset=dataset_source,
                dataset_t=[],
                imdb_name_target=[],
                cfg_file=cfg_file,
                net=net)
    args = set_dataset_args(args)

    is_bgr = False
    if net in ['res101', 'res50', 'res152', 'vgg16']:
        is_bgr = True

    logger = LoggerForSacred(None, ex)

    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = True if device == 'cuda' else False
    device = torch.device(device)

    load_id = re.findall("\d+", load_name)[0]
    output_dir = output_dir + "_{}".format(load_id)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    dataloader_s, _, imdb, _ = init_dataloaders_1s_mt(args, batch_size,
                                                      num_workers, is_bgr)

    session = 1
    fasterRCNN = init_htcn_model(LA_ATT,
                                 MID_ATT,
                                 class_agnostic,
                                 device,
                                 gc,
                                 imdb,
                                 lc,
                                 load_name,
                                 net,
                                 strict=False,
                                 pretrained=pretrained)

    dtm = nn.Sequential(nn.Conv2d(3, 256, 1, stride=1, padding=0, bias=False),
                        nn.ReLU(), nn.Conv2d(256, 3, 1))

    dtm.to(device)
    optimizer = torch.optim.SGD(dtm.parameters(), lr=lr, momentum=0.9)

    if torch.cuda.device_count() > 1:
        fasterRCNN = nn.DataParallel(fasterRCNN)

    iters_per_epoch = int(10000 / batch_size)

    if ef:
        FL = EFocalLoss(class_num=2, gamma=gamma)
    else:
        FL = FocalLoss(class_num=2, gamma=gamma)

    dtm_util.get_mask_for_target(args, FL, 0, dataloader_s, iters_per_epoch,
                                 fasterRCNN, dtm, optimizer, device, logger)

    find_id = re.findall("\d+", load_name)
    if len(find_id) == 0:
        find_id = 0
    else:
        find_id = re.findall("\d+", load_name)[-1]
    torch.save(
        dtm,
        os.path.join(output_dir,
                     'dtm_target_cnn_{}_{}.p'.format(load_id, find_id)))

    return 0
예제 #11
0
파일: exp_eval.py 프로젝트: Natlem/M-HTCN
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets,
                    model_type, device, net, optimizer, num_workers, model_pth, class_agnostic, lc, gc, LA_ATT, MID_ATT,
                    debug, _run):

    args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net)
    args_val = set_dataset_args(args_val, test=True)


    logger = LoggerForSacred(None, ex, True)

    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if args_val.set_cfgs is not None:
        cfg_from_list(args_val.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = True if device == 'cuda' else False
    device = torch.device(device)

    val_dataloader_ts, val_imdb_ts = init_frcnn_utils.init_val_dataloaders_mt(args_val, 1, num_workers)

    session = 1
    backbone_fn = htcn_resnet
    if 'res' in net:
        if model_type == 'normal':
            backbone_fn = n_resnet
        elif model_type == 'saitp':
            backbone_fn = s_resnet
    else:
        if model_type == 'normal':
            backbone_fn = n_vgg16
        elif model_type == 'htcn':
            backbone_fn = htcn_vgg16
        elif model_type == 'saitp':
            backbone_fn = None


    model = init_frcnn_utils.init_model_only(device, net, backbone_fn, val_imdb_ts[0], model_pth, class_agnostic=class_agnostic, lc=lc,
                           gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT)



    total_step = 0
    best_ap = 0.
    avg_ap = 0.
    avg_ap_per_class = {}
    if isinstance(val_datasets, list):
        for i, val_dataloader_t in enumerate(val_dataloader_ts):
            map, ap_per_class = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t, model, device, val_imdb_ts[i], return_ap_class=True)
            logger.log_scalar(" map on {}".format(val_datasets[i]), map, 0)
            for cls, ap in ap_per_class.items():
                if cls in avg_ap_per_class:
                    avg_ap_per_class[cls] += ap
                else:
                    avg_ap_per_class[cls] = ap
            avg_ap += map
        avg_ap /= len(val_dataloader_ts)
        for cls, ap in avg_ap_per_class.items():
            ap /= len(val_dataloader_ts)
            logger.log_scalar(" map of class {}".format(cls), ap, 0)
    logger.log_scalar("avp map",avg_ap, 0)

    return avg_ap.item()
예제 #12
0
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target,
                   val_datasets, device, net, optimizer, num_workers, lr,
                   batch_size, start_epoch, max_epochs, lr_decay_gamma,
                   lr_decay_step, mask_load_p, resume, load_name, pretrained,
                   model_type, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT,
                   MID_ATT, debug, _run):

    args = Args(dataset=dataset_source,
                dataset_t=dataset_target,
                cfg_file=cfg_file,
                net=net)
    args = set_dataset_args(args)
    is_bgr = False
    if net in ['res101', 'res50', 'res152', 'vgg16']:
        is_bgr = True

    logger = LoggerForSacred(None, ex, False)

    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = True if device == 'cuda' else False
    device = torch.device(device)

    backbone_fn = htcn_resnet
    if 'res' in net:
        if model_type == 'normal':
            backbone_fn = n_resnet
        elif model_type == 'saitp':
            backbone_fn = s_resnet
    else:
        if model_type == 'normal':
            backbone_fn = n_vgg16
        elif model_type == 'htcn':
            backbone_fn = htcn_vgg16
        elif model_type == 'saitp':
            backbone_fn = None
    dataloader_s, dataloader_t, imdb, imdb_t = init_dataloaders_1s_1t(
        args, batch_size, num_workers, is_bgr, False)
    model = init_frcnn_utils.init_model_only(device,
                                             net,
                                             backbone_fn,
                                             imdb_t,
                                             '',
                                             class_agnostic=class_agnostic,
                                             lc=lc,
                                             gc=gc,
                                             la_attention=LA_ATT,
                                             mid_attention=MID_ATT)
    model.eval()

    im_data = torch.randn(1, 3, 600, 1200).to(device)
    im_info = torch.FloatTensor([[600, 900, 2]]).to(device)
    gt_boxes = torch.zeros((1, 1, 5)).to(device)
    num_boxes = torch.zeros([1]).to(device)
    macs, params = profile(model,
                           inputs=(im_data, im_info, gt_boxes, num_boxes))
    macs, params = clever_format([macs, params], "%.3f")

    print("Model CFLOPS: {}".format(macs))
    print("Model Cparams: {}".format(params))

    random_mask = nn.Sequential(
        nn.Conv2d(3, 256, 1, stride=1, padding=0, bias=False), nn.ReLU(),
        nn.Conv2d(256, 3, 1)).to(device)

    macs, params = profile(random_mask, inputs=(im_data, ))
    macs, params = clever_format([macs, params], "%.3f")

    print("Mask CFLOPS: {}".format(macs))
    print("Mask Cparams: {}".format(params))

    iters_per_epoch = int(1000 / batch_size)
    data_iter_s = iter(dataloader_s)

    for step in range(1, iters_per_epoch + 1):
        try:
            data_s = next(data_iter_s)
        except:
            data_iter_s = iter(dataloader_s)
            data_s = next(data_iter_s)
        im_data = data_s[0].to(device)
        im_info = data_s[1].to(device)
        gt_boxes = data_s[2].to(device)
        num_boxes = data_s[3].to(device)

        pass
예제 #13
0
파일: demo.py 프로젝트: yichi0911/gsnn_demo
def frcnn(train):

    args = parse_args()

    print('Called with args:')
    print(args)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)
    from model.utils.config import cfg

    cfg.USE_GPU_NMS = args.cuda

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    pascal_classes = np.asarray([
        '___background__', u'person', u'bicycle', u'car', u'motorcycle',
        u'airplane', u'bus', u'train', u'truck', u'boat', u'traffic light',
        u'fire hydrant', u'stop sign', u'parking meter', u'bench', u'bird',
        u'cat', u'dog', u'horse', u'sheep', u'cow', u'elephant', u'bear',
        u'zebra', u'giraffe', u'backpack', u'umbrella', u'handbag', u'tie',
        u'suitcase', u'frisbee', u'skis', u'snowboard', u'sports ball',
        u'kite', u'baseball bat', u'baseball glove', u'skateboard',
        u'surfboard', u'tennis racket', u'bottle', u'wine glass', u'cup',
        u'fork', u'knife', u'spoon', u'bowl', u'banana', u'apple', u'sandwich',
        u'orange', u'broccoli', u'carrot', u'hot dog', u'pizza', u'donut',
        u'cake', u'chair', u'couch', u'potted plant', u'bed', u'dining table',
        u'toilet', u'tv', u'laptop', u'mouse', u'remote', u'keyboard',
        u'cell phone', u'microwave', u'oven', u'toaster', u'sink',
        u'refrigerator', u'book', u'clock', u'vase', u'scissors',
        u'teddy bear', u'hair drier', u'toothbrush'
    ])
    # initilize the network here.
    #args.imdb_name = "coco_2014_train+coco_2014_valminusminival"
    # imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name)
    if args.net == 'vgg16':
        fasterRCNN = vgg16(pascal_classes,
                           pretrained=True,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(pascal_classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(pascal_classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(pascal_classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    if args.cuda > 0:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=(lambda storage, loc: storage))
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')

    # pdb.set_trace()

    print("load checkpoint %s" % (load_name))

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda > 0:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    with torch.no_grad():
        im_data = Variable(im_data)
        im_info = Variable(im_info)
        num_boxes = Variable(num_boxes)
        gt_boxes = Variable(gt_boxes)

    if args.cuda > 0:
        cfg.CUDA = True

    if args.cuda > 0:
        fasterRCNN.cuda()

    fasterRCNN.eval()
    thresh = 0.5

    webcam_num = args.webcam_num
    imglist = os.listdir(args.image_dir)
    num_images = len(imglist)

    print('Loaded Photo: {} images.'.format(num_images))
    import json, re
    from tqdm import tqdm
    d = {}
    pbar = tqdm(imglist)
    if not train:
        for i in pbar:
            im_file = os.path.join(args.image_dir, i)
            # im = cv2.imread(im_file)
            im_name = i
            im_in = np.array(imread(im_file))
            if len(im_in.shape) == 2:
                im_in = im_in[:, :, np.newaxis]
                im_in = np.concatenate((im_in, im_in, im_in), axis=2)
            # rgb -> bgr
            im = im_in[:, :, ::-1]

            blobs, im_scales = _get_image_blob(im)
            assert len(im_scales) == 1, "Only single-image batch implemented"
            im_blob = blobs
            im_info_np = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)

            im_data_pt = torch.from_numpy(im_blob)
            im_data_pt = im_data_pt.permute(0, 3, 1, 2)
            im_info_pt = torch.from_numpy(im_info_np)

            im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
            im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
            gt_boxes.data.resize_(1, 1, 5).zero_()
            num_boxes.data.resize_(1).zero_()

            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        if args.cuda > 0:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        else:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                        box_deltas = box_deltas.view(1, -1, 4)
                    else:
                        if args.cuda > 0:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        else:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                        box_deltas = box_deltas.view(1, -1,
                                                     4 * len(pascal_classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                #Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= im_scales[0]
            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()

            lis = json.load(
                open(
                    '/home/nesa320/huangshicheng/gitforwork/gsnn/graph/labels.json',
                    'r'))

            sm_lis = np.zeros(len(lis))
            for j in xrange(1, len(pascal_classes)):

                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:

                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    #cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_dets,
                               cfg.TEST.NMS,
                               force_cpu=not cfg.USE_GPU_NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    score = cls_dets[0][-1]
                    try:
                        sm_lis[lis.index(pascal_classes[j])] = score.numpy()
                    except:
                        pass
            d[re.sub("\D", "", im_name)] = sm_lis.tolist()
            json.dump(d, open('annotation_dict' + '.json', 'w'), indent=2)
    else:
        pass
예제 #14
0
    def load(self, path, use_cuda):
        # define options
        path_model_detector = os.path.join(
            path, 'fpn101_1_10_9999.pth'
        )  # model for detector (food, tableware, drink)

        dataset = 'CloudStatus_val'
        imdb_name2 = 'CloudTableThings_val'
        total_imdb_name = 'CloudStatusTableThings_val'
        load_name = path_model_detector

        self.use_share_regress = True
        self.use_progress = True

        net = 'resnet101'
        self.cuda = use_cuda
        self.class_agnostic = False
        self.att_type = 'None'

        self.vis = True  # generate debug images

        # Load food classifier
        # possible dbname='FoodX251', 'Food101', 'Kfood'
        # possible eval_crop_type='CenterCrop', 'TenCrop'
        self.food_classifier = FoodClassifier(net='senet154',
                                              dbname='Kfood',
                                              eval_crop_type='CenterCrop',
                                              ck_file_folder=path,
                                              use_cuda=use_cuda,
                                              pretrained=False)

        cfg_file = os.path.join(path, '{}_ls.yml'.format(net))
        set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '30'
        ]

        if cfg_file is not None:
            cfg_from_file(cfg_file)
        if set_cfgs is not None:
            cfg_from_list(set_cfgs)

        USE_GPU_NMS = self.cuda

        print('Using config:')
        pprint.pprint(cfg)

        # train set
        # -- Note: Use validation set and disable the flipped to enable faster loading.
        input_dir = load_name
        if not os.path.exists(input_dir):
            raise Exception(
                'There is no input directory for loading network from ' +
                input_dir)

        self.list_box_color = [(0, 0, 0), (0, 0, 200), (0, 200, 0),
                               (200, 200, 0), (200, 0, 200), (0, 200, 200),
                               (200, 0, 200)]

        self.classes0 = self.get_class_list(dataset)
        self.classes1 = self.get_class_list(imdb_name2)
        self.classes_total = self.get_class_list(total_imdb_name)

        from model.fpn.resnet_multi_CBAM import resnet
        self.fasterRCNN = resnet(self.classes0,
                                 self.classes1,
                                 use_pretrained=False,
                                 num_layers=101,
                                 class_agnostic=self.class_agnostic,
                                 use_share_regress=self.use_share_regress,
                                 use_progress=self.use_progress,
                                 att_type=self.att_type)

        self.fasterRCNN.create_architecture()

        print("loading checkpoint %s..." % (load_name))
        if self.cuda > 0:
            checkpoint = torch.load(load_name)
        else:
            checkpoint = torch.load(
                load_name, map_location=(lambda storage, loc: storage))
        self.fasterRCNN.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']

        print('succeeded')

        # initilize the tensor holder here.
        self.im_data = torch.FloatTensor(1)
        self.im_info = torch.FloatTensor(1)
        self.num_boxes = torch.LongTensor(1)
        self.gt_boxes = torch.FloatTensor(1)

        # ship to cuda
        if self.cuda > 0:
            self.im_data = self.im_data.cuda()
            self.im_info = self.im_info.cuda()
            self.num_boxes = self.num_boxes.cuda()
            self.gt_boxes = self.gt_boxes.cuda()

        # make variable
        with torch.no_grad():
            self.im_data = Variable(self.im_data)
            self.im_info = Variable(self.im_info)
            self.num_boxes = Variable(self.num_boxes)
            self.gt_boxes = Variable(self.gt_boxes)

        if self.cuda > 0:
            cfg.CUDA = True

        if self.cuda > 0:
            self.fasterRCNN.cuda()

        self.fasterRCNN.eval()

        print('- models loaded from {}'.format(path))
예제 #15
0
if __name__ == '__main__':

  args = parse_args()

  print('Called with args:')
  print(args)
  args = set_dataset_args(args,test=True)
  if torch.cuda.is_available() and not args.cuda:
    print("WARNING: You have a CUDA device, so you should probably run with --cuda")
  np.random.seed(cfg.RNG_SEED)

  if args.cfg_file is not None:
    cfg_from_file(args.cfg_file)
  if args.set_cfgs_target is not None:
    cfg_from_list(args.set_cfgs_target)

  print('Using config:')
  pprint.pprint(cfg)

  cfg.TRAIN.USE_FLIPPED = False
  imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name_target, False)
  imdb.competition_mode(on=True)

  print('{:d} roidb entries'.format(len(roidb)))

  # initilize the network here.
  from model.faster_rcnn.vgg16_HTCN import vgg16
  from model.faster_rcnn.resnet_HTCN import resnet

  if args.net == 'vgg16':
예제 #16
0
def train(dataset="kaggle_pna",
          train_ds="train",
          arch="couplenet",
          net="res152",
          start_epoch=1,
          max_epochs=20,
          disp_interval=100,
          save_dir="save",
          num_workers=4,
          cuda=True,
          large_scale=False,
          mGPUs=True,
          batch_size=4,
          class_agnostic=False,
          anchor_scales=4,
          optimizer="sgd",
          lr_decay_step=10,
          lr_decay_gamma=.1,
          session=1,
          resume=False,
          checksession=1,
          checkepoch=1,
          checkpoint=0,
          use_tfboard=False,
          flip_prob=0.0,
          scale=0.0,
          scale_prob=0.0,
          translate=0.0,
          translate_prob=0.0,
          angle=0.0,
          dist="cont",
          rotate_prob=0.0,
          shear_factor=0.0,
          shear_prob=0.0,
          rpn_loss_cls_wt=1,
          rpn_loss_box_wt=1,
          RCNN_loss_cls_wt=1,
          RCNN_loss_bbox_wt=1,
          **kwargs):
    print("Train Arguments: {}".format(locals()))

    # Import network definition
    if arch == 'rcnn':
        from model.faster_rcnn.resnet import resnet
    elif arch == 'rfcn':
        from model.rfcn.resnet_atrous import resnet
    elif arch == 'couplenet':
        from model.couplenet.resnet_atrous import resnet

    from roi_data_layer.pnaRoiBatchLoader import roibatchLoader
    from roi_data_layer.pna_roidb import combined_roidb

    print('Called with kwargs:')
    print(kwargs)

    # Set up logger
    if use_tfboard:
        from model.utils.logger import Logger
        # Set the logger
        logger = Logger('./logs')

    # Anchor settings: ANCHOR_SCALES: [8, 16, 32] or [4, 8, 16, 32]
    if anchor_scales == 3:
        scales = [8, 16, 32]
    elif anchor_scales == 4:
        scales = [4, 8, 16, 32]

    # Dataset related settings: MAX_NUM_GT_BOXES: 20, 30, 50
    if train_ds == "train":
        imdb_name = "pna_2018_train"
    elif train_ds == "trainval":
        imdb_name = "pna_2018_trainval"

    set_cfgs = [
        'ANCHOR_SCALES',
        str(scales), 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30'
    ]

    import model
    model_repo_path = os.path.dirname(
        os.path.dirname(os.path.dirname(model.__file__)))

    cfg_file = "cfgs/{}_ls.yml".format(
        net) if large_scale else "cfgs/{}.yml".format(net)

    if cfg_file is not None:
        cfg_from_file(os.path.join(model_repo_path, cfg_file))
    if set_cfgs is not None:
        cfg_from_list(set_cfgs)

    train_kwargs = kwargs.pop("TRAIN", None)
    resnet_kwargs = kwargs.pop("RESNET", None)
    mobilenet_kwargs = kwargs.pop("MOBILENET", None)

    if train_kwargs is not None:
        for key, value in train_kwargs.items():
            cfg["TRAIN"][key] = value

    if resnet_kwargs is not None:
        for key, value in resnet_kwargs.items():
            cfg["RESNET"][key] = value

    if mobilenet_kwargs is not None:
        for key, value in mobilenet_kwargs.items():
            cfg["MOBILENET"][key] = value

    if kwargs is not None:
        for key, value in kwargs.items():
            cfg[key] = value

    print('Using config:')
    cfg.MODEL_DIR = os.path.abspath(cfg.MODEL_DIR)
    cfg.TRAIN_DATA_CLEAN_PATH = os.path.abspath(cfg.TRAIN_DATA_CLEAN_PATH)
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)
    print("LEARNING RATE: {}".format(cfg.TRAIN.LEARNING_RATE))

    # Warning to use cuda if available
    if torch.cuda.is_available() and not cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # Train set
    # Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = cuda
    imdb, roidb, ratio_list, ratio_index = combined_roidb(imdb_name)
    train_size = len(roidb)

    print('{:d} roidb entries'.format(len(roidb)))

    # output_dir = os.path.join(save_dir, arch, net, dataset)
    output_dir = cfg.MODEL_DIR
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    sampler_batch = sampler(train_size, batch_size)

    dataset = roibatchLoader(roidb,
                             ratio_list,
                             ratio_index,
                             batch_size,
                             imdb.num_classes,
                             training=True)

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             sampler=sampler_batch,
                                             num_workers=num_workers)

    # Initilize the tensor holder
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # Copy tensors in CUDA memory
    if cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # Make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if cuda:
        cfg.CUDA = True

    # Initilize the network:
    if net == 'vgg16':
        # model = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic)
        print("Pretrained model is not downloaded and network is not used")
    elif net == 'res18':
        model = resnet(imdb.classes,
                       18,
                       pretrained=False,
                       class_agnostic=class_agnostic)  # TODO: Check dim error
    elif net == 'res34':
        model = resnet(imdb.classes,
                       34,
                       pretrained=False,
                       class_agnostic=class_agnostic)  # TODO: Check dim error
    elif net == 'res50':
        model = resnet(imdb.classes,
                       50,
                       pretrained=False,
                       class_agnostic=class_agnostic)  # TODO: Check dim error
    elif net == 'res101':
        model = resnet(imdb.classes,
                       101,
                       pretrained=True,
                       class_agnostic=class_agnostic)
    elif net == 'res152':
        model = resnet(imdb.classes,
                       152,
                       pretrained=True,
                       class_agnostic=class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    # Create network architecture
    model.create_architecture()

    # Update model parameters
    lr = cfg.TRAIN.LEARNING_RATE
    # tr_momentum = cfg.TRAIN.MOMENTUM
    # tr_momentum = args.momentum

    params = []
    for key, value in dict(model.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                }]

    # Optimizer
    if optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)

    elif optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)

    # Resume training
    if resume:
        load_name = os.path.join(
            output_dir, '{}_{}_{}_{}.pth'.format(arch, checksession,
                                                 checkepoch, checkpoint))
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        session = checkpoint['session'] + 1
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']
        print("loaded checkpoint %s" % (load_name))

    # Train on Multiple GPUS
    if mGPUs:
        model = nn.DataParallel(model)

    # Copy network to CUDA memroy
    if cuda:
        model.cuda()

    # Training loop
    iters_per_epoch = int(train_size / batch_size)

    sys.stdout.flush()

    for epoch in range(start_epoch, max_epochs + 1):
        # remove batch re-sizing for augmentation or adjust?
        dataset.resize_batch()

        # Set model to train mode
        model.train()
        loss_temp = 0
        start = time.time()

        # Update learning rate as per decay step
        if epoch % (lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, lr_decay_gamma)
            lr *= lr_decay_gamma

        # Get batch data and train
        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            sys.stdout.flush()
            data = next(data_iter)

            # Apply augmentations
            aug_img_tensors, aug_bbox_tensors = apply_augmentations(
                data[0],
                data[2],
                flip_prob=flip_prob,
                scale=scale,
                scale_prob=scale_prob,
                translate=translate,
                translate_prob=translate_prob,
                angle=angle,
                dist=dist,
                rotate_prob=rotate_prob,
                shear_factor=shear_factor,
                shear_prob=shear_prob)

            # im_data.data.resize_(data[0].size()).copy_(data[0])
            im_data.data.resize_(aug_img_tensors.size()).copy_(aug_img_tensors)
            im_info.data.resize_(data[1].size()).copy_(data[1])
            # gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            gt_boxes.data.resize_(
                aug_bbox_tensors.size()).copy_(aug_bbox_tensors)
            num_boxes.data.resize_(data[3].size()).copy_(data[3])

            # Compute multi-task loss
            model.zero_grad()
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = model(im_data, im_info, gt_boxes, num_boxes)

            loss = rpn_loss_cls_wt * rpn_loss_cls.mean() + rpn_loss_box_wt * rpn_loss_box.mean() + \
                   RCNN_loss_cls_wt * RCNN_loss_cls.mean() + RCNN_loss_bbox_wt * RCNN_loss_bbox.mean()
            loss_temp += loss.data[0]

            # Backward pass to compute gradients and update weights
            optimizer.zero_grad()
            loss.backward()
            if net == "vgg16":
                clip_gradient(model, 10.)
            optimizer.step()

            # Display training stats on terminal
            if step % disp_interval == 0:
                end = time.time()
                if step > 0:
                    loss_temp /= disp_interval

                if mGPUs:
                    batch_loss = loss.data[0]
                    loss_rpn_cls = rpn_loss_cls.mean().data[0]
                    loss_rpn_box = rpn_loss_box.mean().data[0]
                    loss_rcnn_cls = RCNN_loss_cls.mean().data[0]
                    loss_rcnn_box = RCNN_loss_bbox.mean().data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt
                else:
                    batch_loss = loss.data[0]
                    loss_rpn_cls = rpn_loss_cls.data[0]
                    loss_rpn_box = rpn_loss_box.data[0]
                    loss_rcnn_cls = RCNN_loss_cls.data[0]
                    loss_rcnn_box = RCNN_loss_bbox.data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt

                print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                      % (session, epoch, step, iters_per_epoch, loss_temp, lr))
                print("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                      (fg_cnt, bg_cnt, end - start))
                print("\t\t\t batch_loss: %.4f, rpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                      % (batch_loss, loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                if use_tfboard:
                    info = {
                        'loss': loss_temp,
                        'loss_rpn_cls': loss_rpn_cls,
                        'loss_rpn_box': loss_rpn_box,
                        'loss_rcnn_cls': loss_rcnn_cls,
                        'loss_rcnn_box': loss_rcnn_box
                    }
                    for tag, value in info.items():
                        logger.scalar_summary(tag, value, step)

                loss_temp = 0
                start = time.time()

                # Save model at checkpoints
        if mGPUs:
            save_name = os.path.join(
                output_dir, '{}_{}_{}_{}.pth'.format(arch, session, epoch,
                                                     step))
            save_checkpoint(
                {
                    'session': session,
                    'epoch': epoch + 1,
                    'model': model.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': class_agnostic,
                }, save_name)
        else:
            save_name = os.path.join(
                output_dir, '{}_{}_{}_{}.pth'.format(arch, session, epoch,
                                                     step))
            save_checkpoint(
                {
                    'session': session,
                    'epoch': epoch + 1,
                    'model': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': class_agnostic,
                }, save_name)
        print('save model: {}'.format(save_name))

        end = time.time()
        delete_older_checkpoints(
            os.path.join(cfg.MODEL_DIR, "couplenet_{}_*.pth".format(i)))
        print("Run Time: ", end - start)
예제 #17
0
def run(args):
    lr = cfg.TRAIN.LEARNING_RATE
    momentum = cfg.TRAIN.MOMENTUM
    weight_decay = cfg.TRAIN.WEIGHT_DECAY
    try:
        xrange  # Python 2
    except NameError:
        xrange = range  # Python 3

    #args = parse_args()

    print('Called with args:')
    print(args)

    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train+coco_2014_valminusminival"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]
    elif args.dataset == "vg":
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    args.cfg_file = "cfgs/{}_ls.yml".format(
        args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    cfg.TRAIN.USE_FLIPPED = False
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdbval_name, False)
    imdb.competition_mode(on=True)

    print('{:d} roidb entries'.format(len(roidb)))

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(imdb.classes,
                           pretrained=False,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    if args.cuda:
        fasterRCNN.cuda()

    start = time.time()
    max_per_image = 100

    vis = args.vis

    if vis:
        thresh = 0.05
    else:
        thresh = 0.0

    save_name = 'faster_rcnn_10'
    num_images = len(imdb.image_index)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    #pdb.set_trace()
    output_dir = get_output_dir(imdb, save_name)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \
                          imdb.num_classes, training=False, normalize = False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=0,
                                             pin_memory=True)

    data_iter = iter(dataloader)

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir, 'detections.pkl')

    fasterRCNN.eval()
    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))
    for i in range(num_images):

        data = next(data_iter)
        with torch.no_grad():
            im_data.resize_(data[0].size()).copy_(data[0])
            im_info.resize_(data[1].size()).copy_(data[1])
            gt_boxes.resize_(data[2].size()).copy_(data[2])
            num_boxes.resize_(data[3].size()).copy_(data[3])

        det_tic = time.time()
        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, :, 1:5]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data
            if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                # Optionally normalize targets by a precomputed mean and stdev
                if args.class_agnostic:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4)
                else:
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                               + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                    box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes))

            pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
            pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        pred_boxes /= data[1][0][2].item()

        scores = scores.squeeze()
        pred_boxes = pred_boxes.squeeze()
        det_toc = time.time()
        detect_time = det_toc - det_tic
        misc_tic = time.time()
        if vis:
            im = cv2.imread(imdb.image_path_at(i))
            im2show = np.copy(im)
        for j in xrange(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if args.class_agnostic:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                # cls_dets = cls_dets[order]
                # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
                # cls_dets = cls_dets[keep.view(-1).long()]
                cls_dets = cls_dets[order]
                # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS)
                keep = softnms_cpu_torch(cls_dets)
                # cls_dets = cls_dets[keep.view(-1).long()]
                cls_dets = keep
                if vis:
                    im2show = vis_detections(im2show, imdb.classes[j],
                                             cls_dets.cpu().numpy(), 0.3)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in xrange(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
            .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

        if vis:
            cv2.imwrite('result.png', im2show)
            pdb.set_trace()
            #cv2.imshow('test', im2show)
            #cv2.waitKey(0)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    imdb.evaluate_detections(all_boxes, output_dir)

    end = time.time()
    print("test time: %0.4fs" % (end - start))
예제 #18
0
def load_model(args):
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.USE_GPU_NMS = args.cuda

    np.random.seed(cfg.RNG_SEED)
    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))
    global pascal_classes
    pascal_classes = np.asarray([
        '__background__',  # always index 0
        'element in the Array-list',
        'root node in the Binary-tree',
        'internal node in the Binary-tree',
        'leaf node in the Binary-tree',
        'vertex in the Graph',
        'process in the Deadlock',
        'resource in the Deadlock',
        'head element in the Queue',
        'element in the Queue',
        'tail element in the Queue',
        'head node in the Queue',
        'pointer in the Queue',
        'node in the Non-binary-tree',
        'node in the Network_topology',
        'head element in the Linked_List',
        'element in the Linked_List',
        'tail element in the Linked_List',
        'insert element in the Linked_List',
        'head node in the Linked_List',
        'arrow',
        'edge',
        'top element in the Stack',
        'bottom element in the Stack',
        'push element in the Stack',
        'pop element in the Stack',
        'internal element in the Stack',
        'empty stack in the Stack',
        'terminal in the Flowchart',
        'process in the Flowchart',
        'decision in the Flowchart',
        'flowline in the Flowchart',
        'document in the Flowchart',
        'input in the Flowchart',
        'output in the Flowchart',
        'annotation in the Flowchart',
        'database in the Flowchart',
        'manual operation in the Flowchart',
        'predefined process in the Flowchart',
        'on-page connector in the Flowchart'
    ])

    # initilize the network here.
    global fasterRCNN
    if args.net == 'vgg16':
        fasterRCNN = vgg16(pascal_classes,
                           pretrained=False,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(pascal_classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(pascal_classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(pascal_classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    if args.cuda > 0:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=(lambda storage, loc: storage))
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')
    def __call__(self, *args, **kwargs):

        net = 'vgg16'
        checksession = 1
        checkepoch = 6
        checkpoint = 10021
        load_dir = './mydetector/model'
        cfgs = 'vgg16.vml'
        set_cfgs = None
        dataset = 'imagenet'
        image_dir = 'images'
        webcam_num = -1
        cfg_file = './mydetector/cfgs/vgg16.yml'
        vis = False
        cfg.CUDA = True

        cfg_from_file(cfg_file)
        if set_cfgs is not None:
            cfg_from_list(set_cfgs)

        print('Using config:')
        pprint.pprint(cfg)
        np.random.seed(1)

        # train set
        # -- Note: Use validation set and disable the flipped to enable faster loading.

        #加载预训练模型
        input_dir = load_dir + "/" + net + "/" + dataset
        if not os.path.exists(input_dir):
            raise Exception(
                'There is no input directory for loading network from ' +
                input_dir)
        load_name = os.path.join(
            input_dir,
            'faster_rcnn_{}_{}_{}.pth'.format(checksession, checkepoch,
                                              checkpoint))

        pascal_classes = np.asarray([
            '__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
            'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
            'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
            'tvmonitor'
        ])

        # initilize the network here.
        if net == 'vgg16':
            fasterRCNN = vgg16(pascal_classes,
                               pretrained=False,
                               class_agnostic=False)
        elif net == 'res101':
            fasterRCNN = resnet(pascal_classes,
                                101,
                                pretrained=False,
                                class_agnostic=False)
        elif net == 'res50':
            fasterRCNN = resnet(pascal_classes,
                                50,
                                pretrained=False,
                                class_agnostic=False)
        elif net == 'res152':
            fasterRCNN = resnet(pascal_classes,
                                152,
                                pretrained=False,
                                class_agnostic=False)
        else:
            print("network is not defined")
            pdb.set_trace()

        fasterRCNN.create_architecture()

        print("load checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        fasterRCNN.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']

        print('load model successfully!')

        # pdb.set_trace()

        print("load checkpoint %s" % (load_name))

        # initilize the tensor holder here.
        im_data = torch.FloatTensor(1)
        im_info = torch.FloatTensor(1)
        num_boxes = torch.LongTensor(1)
        gt_boxes = torch.FloatTensor(1)

        # ship to cuda
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

        # make variable
        im_data = Variable(im_data, volatile=True)
        im_info = Variable(im_info, volatile=True)
        num_boxes = Variable(num_boxes, volatile=True)
        gt_boxes = Variable(gt_boxes, volatile=True)

        fasterRCNN.cuda()

        fasterRCNN.eval()

        start = time.time()
        max_per_image = 100
        thresh = 0.05
        vis = True

        imglist = os.listdir(image_dir)
        num_images = len(imglist)

        print('Loaded Photo: {} images.'.format(num_images))

        while (num_images >= 0):
            total_tic = time.time()
            if webcam_num == -1:
                num_images -= 1

            im_file = os.path.join(image_dir, imglist[num_images])
            # im = cv2.imread(im_file)
            im_in = np.array(imread(im_file))
            if len(im_in.shape) == 2:
                im_in = im_in[:, :, np.newaxis]
                im_in = np.concatenate((im_in, im_in, im_in), axis=2)
            # rgb -> bgr
            im = im_in[:, :, ::-1]

            blobs, im_scales = _get_image_blob(im)
            assert len(im_scales) == 1, "Only single-image batch implemented"
            im_blob = blobs
            im_info_np = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)

            im_data_pt = torch.from_numpy(im_blob)
            im_data_pt = im_data_pt.permute(0, 3, 1, 2)
            im_info_pt = torch.from_numpy(im_info_np)

            with torch.no_grad():
                im_data.resize_(im_data_pt.size()).copy_(im_data_pt)
                im_info.resize_(im_info_pt.size()).copy_(im_info_pt)
                gt_boxes.resize_(1, 1, 5).zero_()
                num_boxes.resize_(1).zero_()

            # pdb.set_trace()
            det_tic = time.time()

            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(
                        cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                 + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()

                    box_deltas = box_deltas.view(1, -1,
                                                 4 * len(pascal_classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= im_scales[0]

            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()
            det_toc = time.time()
            detect_time = det_toc - det_tic
            misc_tic = time.time()
            if vis:
                im2show = np.copy(im)
            for j in xrange(1, len(pascal_classes)):
                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:
                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]

                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    # cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS)
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    if vis:
                        im2show = vis_detections(im2show, pascal_classes[j],
                                                 cls_dets.cpu().numpy(), 0.5)

            misc_toc = time.time()
            nms_time = misc_toc - misc_tic

            if webcam_num == -1:
                sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                                 .format(num_images + 1, len(imglist), detect_time, nms_time))
                sys.stdout.flush()

            if vis and webcam_num == -1:
                # cv2.imshow('test', im2show)
                # cv2.waitKey(0)
                result_path = os.path.join(
                    image_dir, imglist[num_images][:-4] + "_det.jpg")
                cv2.imwrite(result_path, im2show)
            else:
                im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB)
                cv2.imshow("frame", im2showRGB)
                total_toc = time.time()
                total_time = total_toc - total_tic
                frame_rate = 1 / total_time
                print('Frame rate:', frame_rate)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
예제 #20
0
def load_model(args):
    # set cfg according to the dataset used to train the pre-trained model
    if args.dataset == "pascal_voc":
        args.set_cfgs = [
            "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]"
        ]
    elif args.dataset == "pascal_voc_0712":
        args.set_cfgs = [
            "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]"
        ]
    elif args.dataset == "coco":
        args.set_cfgs = [
            "ANCHOR_SCALES",
            "[4, 8, 16, 32]",
            "ANCHOR_RATIOS",
            "[0.5,1,2]",
        ]
    elif args.dataset == "imagenet":
        args.set_cfgs = [
            "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]"
        ]
    elif args.dataset == "vg":
        args.set_cfgs = [
            "ANCHOR_SCALES",
            "[4, 8, 16, 32]",
            "ANCHOR_RATIOS",
            "[0.5,1,2]",
        ]

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.USE_GPU_NMS = args.cuda

    print("Using config:")
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    # Load classes
    classes = ["__background__"]
    with open(os.path.join(args.classes_dir, "objects_vocab.txt")) as f:
        for object in f.readlines():
            classes.append(object.split(",")[0].lower().strip())

    if not os.path.exists(args.load_dir):
        raise Exception(
            "There is no input directory for loading network from " +
            args.load_dir)
    load_name = os.path.join(
        args.load_dir, "faster_rcnn_{}_{}.pth".format(args.net, args.dataset))

    # initilize the network here. the network used to train the pre-trained model
    if args.net == "vgg16":
        fasterRCNN = vgg16(classes,
                           pretrained=False,
                           class_agnostic=args.class_agnostic)
    elif args.net == "res101":
        fasterRCNN = resnet(classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == "res50":
        fasterRCNN = resnet(classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == "res152":
        fasterRCNN = resnet(classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    if args.cuda > 0:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=(lambda storage, loc: storage))
    fasterRCNN.load_state_dict(checkpoint["model"])
    if "pooling_mode" in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint["pooling_mode"]

    print("load model successfully!")

    print("load model %s" % (load_name))

    return classes, fasterRCNN
예제 #21
0
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30']
    elif args.dataset == "vg":
        # train sizes: train, smalltrain, minitrain
        # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50']

    args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    #torch.backends.cudnn.benchmark = True
    if torch.cuda.is_available() and not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = args.cuda
    imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name)
    train_size = len(roidb)
예제 #22
0
def bld_train(args, ann_path=None, step=0):

    # print('Train from annotaion {}'.format(ann_path))
    # print('Called with args:')
    # print(args)

    if args.use_tfboard:
        from model.utils.logger import Logger
        # Set the logger
        logger = Logger(
            os.path.join('./.logs', args.active_method,
                         "/activestep" + str(step)))

    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '50'
        ]
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '30'
        ]
    elif args.dataset == "vg":
        # train sizes: train, smalltrain, minitrain
        # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '50'
        ]
    elif args.dataset == "voc_coco":
        args.imdb_name = "voc_coco_2007_train+voc_coco_2007_val"
        args.imdbval_name = "voc_coco_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    else:
        raise NotImplementedError

    args.cfg_file = "cfgs/{}_ls.yml".format(
        args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    # print('Using config:')
    # pprint.pprint(cfg)
    # np.random.seed(cfg.RNG_SEED)

    # torch.backends.cudnn.benchmark = True
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # train set = source set + target set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = args.cuda
    # source train set, fully labeled
    #ann_path_source = os.path.join(ann_path, 'voc_coco_2007_train_f.json')
    #ann_path_target = os.path.join(ann_path, 'voc_coco_2007_train_l.json')
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdb_name, ann_path=os.path.join(ann_path, 'source'))
    imdb_tg, roidb_tg, ratio_list_tg, ratio_index_tg = combined_roidb(
        args.imdb_name, ann_path=os.path.join(ann_path, 'target'))

    print('{:d} roidb entries for source set'.format(len(roidb)))
    print('{:d} roidb entries for target set'.format(len(roidb_tg)))

    output_dir = args.save_dir + "/" + args.net + "/" + args.dataset + "/" + args.active_method + "/activestep" + str(
        step)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    sampler_batch_tg = None  # do not sample target set

    bs_tg = 4
    dataset_tg = roibatchLoader(roidb_tg, ratio_list_tg, ratio_index_tg, bs_tg, \
                             imdb_tg.num_classes, training=True)

    assert imdb.num_classes == imdb_tg.num_classes

    dataloader_tg = torch.utils.data.DataLoader(dataset_tg,
                                                batch_size=bs_tg,
                                                sampler=sampler_batch_tg,
                                                num_workers=args.num_workers,
                                                worker_init_fn=_rand_fn())

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    image_label = torch.FloatTensor(1)
    confidence = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()
        image_label = image_label.cuda()
        confidence = confidence.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)
    image_label = Variable(image_label)
    confidence = Variable(confidence)

    if args.cuda:
        cfg.CUDA = True

    # initialize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(imdb.classes,
                           pretrained=True,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes,
                            101,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes,
                            50,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes,
                            152,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        raise NotImplementedError

    # initialize the expectation network.
    if args.net == 'vgg16':
        fasterRCNN_val = vgg16(imdb.classes,
                               pretrained=True,
                               class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN_val = resnet(imdb.classes,
                                101,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN_val = resnet(imdb.classes,
                                50,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN_val = resnet(imdb.classes,
                                152,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        raise NotImplementedError

    fasterRCNN.create_architecture()
    fasterRCNN_val.create_architecture()

    # lr = cfg.TRAIN.LEARNING_RATE
    lr = args.lr
    # tr_momentum = cfg.TRAIN.MOMENTUM
    # tr_momentum = args.momentum

    params = []
    for key, value in dict(fasterRCNN.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                }]

    if args.optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)
    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)
    else:
        raise NotImplementedError

    if args.resume:
        load_name = os.path.join(
            output_dir,
            'faster_rcnn_{}_{}_{}.pth'.format(args.checksession,
                                              args.checkepoch,
                                              args.checkpoint))
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        args.session = checkpoint['session']
        args.start_epoch = checkpoint['epoch']
        fasterRCNN.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']
        print("loaded checkpoint %s" % (load_name))

    # expectation model
    print("load checkpoint for expectation model: %s" % args.model_path)
    checkpoint = torch.load(args.model_path)
    fasterRCNN_val.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    fasterRCNN_val = fasterRCNN_val
    fasterRCNN_val.eval()

    if args.mGPUs:
        fasterRCNN = nn.DataParallel(fasterRCNN)
        #fasterRCNN_val = nn.DataParallel(fasterRCNN_val)

    if args.cuda:
        fasterRCNN.cuda()
        fasterRCNN_val.cuda()

    # Evaluation
    # data_iter = iter(dataloader_tg)
    # for target_k in range( int(train_size_tg / args.batch_size)):
    fname = "noisy_annotations.pkl"
    if not os.path.isfile(fname):
        for batch_k, data in enumerate(dataloader_tg):
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])
            image_label.data.resize_(data[4].size()).copy_(data[4])
            b_size = len(im_data)
            # expactation pass
            rois, cls_prob, bbox_pred, \
            _, _, _, _, _ = fasterRCNN_val(im_data, im_info, gt_boxes, num_boxes)
            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]
            if cfg.TRAIN.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(b_size, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        # print('DEBUG: Size of box_deltas is {}'.format(box_deltas.size()) )
                        box_deltas = box_deltas.view(b_size, -1,
                                                     4 * len(imdb.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            # TODO: data distalliation
            # Choose the confident samples
            for b_idx in range(b_size):
                # fill one confidence
                # confidence.data[b_idx, :] = 1 - (gt_boxes.data[b_idx, :, 4] == 0)
                # resize prediction
                pred_boxes[b_idx] /= data[1][b_idx][2]
                for j in xrange(1, imdb.num_classes):
                    if image_label.data[b_idx, j] != 1:
                        continue  # next if no image label

                    # filtering box outside of the image
                    not_keep = (pred_boxes[b_idx][:, j * 4] == pred_boxes[b_idx][:, j * 4 + 2]) | \
                               (pred_boxes[b_idx][:, j * 4 + 1] == pred_boxes[b_idx][:, j * 4 + 3])
                    keep = torch.nonzero(not_keep == 0).view(-1)
                    # decease the number of pgts
                    thresh = 0.5
                    while torch.nonzero(
                            scores[b_idx, :,
                                   j][keep] > thresh).view(-1).numel() <= 0:
                        thresh = thresh * 0.5
                    inds = torch.nonzero(
                        scores[b_idx, :, j][keep] > thresh).view(-1)

                    # if there is no det, error
                    if inds.numel() <= 0:
                        print('Warning!!!!!!! It should not appear!!')
                        continue

                    # find missing ID
                    missing_list = np.where(gt_boxes.data[b_idx, :, 4] == 0)[0]
                    if (len(missing_list) == 0): continue
                    missing_id = missing_list[0]
                    cls_scores = scores[b_idx, :, j][keep][inds]
                    cls_boxes = pred_boxes[b_idx][keep][inds][:, j *
                                                              4:(j + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    keep = nms(cls_dets, 0.2)  # Magic number ????
                    keep = keep.view(-1).tolist()
                    sys.stdout.write(
                        'from {} predictions choose-> min({},4) as pseudo label  \r'
                        .format(len(cls_scores), len(keep)))
                    sys.stdout.flush()
                    _, order = torch.sort(cls_scores[keep], 0, True)
                    if len(keep) == 0: continue

                    max_keep = 4
                    for pgt_k in range(max_keep):
                        if len(order) <= pgt_k: break
                        if missing_id + pgt_k >= 20: break
                        gt_boxes.data[b_idx, missing_id +
                                      pgt_k, :4] = cls_boxes[keep][order[
                                          len(order) - 1 - pgt_k]]
                        gt_boxes.data[b_idx, missing_id + pgt_k,
                                      4] = j  # class
                        #confidence[b_idx, missing_id + pgt_k] = cls_scores[keep][order[len(order) - 1 - pgt_k]]
                        num_boxes[b_idx] = num_boxes[b_idx] + 1
                sample = roidb_tg[dataset_tg.ratio_index[batch_k * bs_tg +
                                                         b_idx]]
                pgt_boxes = np.array([
                    gt_boxes[b_idx, x, :4].cpu().data.numpy()
                    for x in range(int(num_boxes[b_idx]))
                ])
                pgt_classes = np.array([
                    gt_boxes[b_idx, x, 4].cpu().data[0]
                    for x in range(int(num_boxes[b_idx]))
                ])
                sample["boxes"] = pgt_boxes
                sample["gt_classes"] = pgt_classes
                # DEBUG
                assert np.array_equal(sample["label"],image_label[b_idx].cpu().data.numpy()), \
                    "Image labels are not equal! {} vs {}".format(sample["label"],image_label[b_idx].cpu().data.numpy())

        #with open(fname, 'w') as f:
        # pickle.dump(roidb_tg, f)
    else:
        pass
        # with open(fname) as f:  # Python 3: open(..., 'rb')
        # roidb_tg = pickle.load(f)

    print("-- Optimization Stage --")
    # Optimization
    print("######################################################l")

    roidb.extend(roidb_tg)  # merge two datasets
    print('before filtering, there are %d images...' % (len(roidb)))
    i = 0
    while i < len(roidb):
        if True:
            if len(roidb[i]['boxes']) == 0:
                del roidb[i]
                i -= 1
        else:
            if len(roidb[i]['boxes']) == 0:
                del roidb[i]
                i -= 1
        i += 1

    print('after filtering, there are %d images...' % (len(roidb)))
    from roi_data_layer.roidb import rank_roidb_ratio
    ratio_list, ratio_index = rank_roidb_ratio(roidb)
    train_size = len(roidb)
    sampler_batch = sampler(train_size, args.batch_size)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                             imdb.num_classes, training=True)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             sampler=sampler_batch,
                                             num_workers=args.num_workers,
                                             worker_init_fn=_rand_fn())
    iters_per_epoch = int(train_size / args.batch_size)
    print("Training set size is {}".format(train_size))
    for epoch in range(args.start_epoch, args.max_epochs + 1):
        fasterRCNN.train()

        loss_temp = 0
        start = time.time()
        epoch_start = start

        # adjust learning rate
        if epoch % (args.lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, args.lr_decay_gamma)
            lr *= args.lr_decay_gamma

        # one step
        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            data = next(data_iter)
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])
            image_label.data.resize_(data[4].size()).copy_(data[4])

            #gt_boxes.data = \
            #    torch.cat((gt_boxes.data, torch.zeros(gt_boxes.size(0), gt_boxes.size(1), 1).cuda()), dim=2)
            conf_data = torch.zeros(gt_boxes.size(0), gt_boxes.size(1)).cuda()
            confidence.data.resize_(conf_data.size()).copy_(conf_data)

            fasterRCNN.zero_grad()

            # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence)
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)
            # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence)

            loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \
                   + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
            loss_temp += loss.data[0]

            # backward
            optimizer.zero_grad()
            loss.backward()
            if args.net == "vgg16":
                clip_gradient(fasterRCNN, 10.)
            optimizer.step()

            if step % args.disp_interval == 0:
                end = time.time()
                if step > 0:
                    loss_temp /= args.disp_interval

                if args.mGPUs:
                    loss_rpn_cls = rpn_loss_cls.mean().data[0]
                    loss_rpn_box = rpn_loss_box.mean().data[0]
                    loss_rcnn_cls = RCNN_loss_cls.mean().data[0]
                    loss_rcnn_box = RCNN_loss_bbox.mean().data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt
                else:
                    loss_rpn_cls = rpn_loss_cls.data[0]
                    loss_rpn_box = rpn_loss_box.data[0]
                    loss_rcnn_cls = RCNN_loss_cls.data[0]
                    loss_rcnn_box = RCNN_loss_bbox.data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt

                print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                      % (args.session, epoch, step, iters_per_epoch, loss_temp, lr))
                print("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                      (fg_cnt, bg_cnt, end - start))
                print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                      % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                if args.use_tfboard:
                    info = {
                        'loss': loss_temp,
                        'loss_rpn_cls': loss_rpn_cls,
                        'loss_rpn_box': loss_rpn_box,
                        'loss_rcnn_cls': loss_rcnn_cls,
                        'loss_rcnn_box': loss_rcnn_box
                    }
                    for tag, value in info.items():
                        logger.scalar_summary(tag, value, step)

                    images = []
                    for k in range(args.batch_size):
                        image = draw_bounding_boxes(
                            im_data[k].data.cpu().numpy(),
                            gt_boxes[k].data.cpu().numpy(),
                            im_info[k].data.cpu().numpy(),
                            num_boxes[k].data.cpu().numpy())
                        images.append(image)
                    logger.image_summary("Train epoch %2d, iter %4d/%4d" % (epoch, step, iters_per_epoch), \
                                          images, step)
                loss_temp = 0
                start = time.time()
                if False:
                    break

        if args.mGPUs:
            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))
            save_checkpoint(
                {
                    'session': args.session,
                    'epoch': epoch + 1,
                    'model': fasterRCNN.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': args.class_agnostic,
                }, save_name)
        else:
            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))
            save_checkpoint(
                {
                    'session': args.session,
                    'epoch': epoch + 1,
                    'model': fasterRCNN.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': args.class_agnostic,
                }, save_name)
        print('save model: {}'.format(save_name))

        epoch_end = time.time()
        print('Epoch time cost: {}'.format(epoch_end - epoch_start))

    print('finished!')