Exemple #1
0
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target,
                   val_datasets, device, net, optimizer, num_workers,
                   teacher_pth, student_pth, lr, batch_size, start_epoch,
                   max_epochs, lr_decay_gamma, lr_decay_step, resume,
                   load_name, imitation_loss_weight, eta, gamma, ef,
                   class_agnostic, lc, gc, LA_ATT, MID_ATT, debug, _run):

    args_val = Args(dataset=dataset_source,
                    dataset_t=val_datasets,
                    imdb_name_target=[],
                    cfg_file=cfg_file,
                    net=net)
    args_val = set_dataset_args(args_val, test=True)

    logger = LoggerForSacred(None, ex, False)

    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if args_val.set_cfgs is not None:
        cfg_from_list(args_val.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = True if device == 'cuda' else False
    device = torch.device(device)

    val_dataloader_ts, val_imdb_ts = init_frcnn_utils.init_val_dataloaders_mt(
        args_val, 1, num_workers)

    session = 1
    teacher = init_frcnn_utils.init_model_only(device,
                                               "res101",
                                               htcn_resnet,
                                               val_imdb_ts[0],
                                               teacher_pth,
                                               class_agnostic=class_agnostic,
                                               lc=lc,
                                               gc=gc,
                                               la_attention=LA_ATT,
                                               mid_attention=MID_ATT)
    fasterRCNN = init_frcnn_utils.init_model_only(
        device,
        "res50",
        htcn_resnet,
        val_imdb_ts[0],
        student_pth,
        class_agnostic=class_agnostic,
        lc=lc,
        gc=gc,
        la_attention=LA_ATT,
        mid_attention=MID_ATT)
    fasterRCNN_2 = init_frcnn_utils.init_model_only(
        device,
        "res50",
        htcn_resnet,
        val_imdb_ts[0],
        student_pth,
        class_agnostic=class_agnostic,
        lc=lc,
        gc=gc,
        la_attention=LA_ATT,
        mid_attention=MID_ATT)

    fasterRCNN.RCNN_rpn = teacher.RCNN_rpn

    if torch.cuda.device_count() > 1:
        fasterRCNN = nn.DataParallel(fasterRCNN)

    total_step = 0
    best_ap = 0.
    if isinstance(val_datasets, list):
        avg_ap = 0
        for i, val_dataloader_t in enumerate(val_dataloader_ts):
            map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t,
                                                  fasterRCNN, device,
                                                  val_imdb_ts[i])
            logger.log_scalar(
                "student with teacher rpn map on {}".format(val_datasets[i]),
                map, 0)
            map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t,
                                                  teacher, device,
                                                  val_imdb_ts[i])
            logger.log_scalar(
                "teacher original map on {}".format(val_datasets[i]), map, 0)
            teacher.RCNN_rpn = fasterRCNN_2.RCNN_rpn
            map = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t,
                                                  teacher, device,
                                                  val_imdb_ts[i])
            logger.log_scalar(
                "teacher with stu rpn map on {}".format(val_datasets[i]), map,
                0)
Exemple #2
0
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets,
                    model_type, device, net, optimizer, num_workers, model_pth, class_agnostic, lc, gc, LA_ATT, MID_ATT,
                    debug, _run):

    args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net)
    args_val = set_dataset_args(args_val, test=True)


    logger = LoggerForSacred(None, ex, True)

    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if args_val.set_cfgs is not None:
        cfg_from_list(args_val.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = True if device == 'cuda' else False
    device = torch.device(device)

    val_dataloader_ts, val_imdb_ts = init_frcnn_utils.init_val_dataloaders_mt(args_val, 1, num_workers)

    session = 1
    backbone_fn = htcn_resnet
    if 'res' in net:
        if model_type == 'normal':
            backbone_fn = n_resnet
        elif model_type == 'saitp':
            backbone_fn = s_resnet
    else:
        if model_type == 'normal':
            backbone_fn = n_vgg16
        elif model_type == 'htcn':
            backbone_fn = htcn_vgg16
        elif model_type == 'saitp':
            backbone_fn = None


    model = init_frcnn_utils.init_model_only(device, net, backbone_fn, val_imdb_ts[0], model_pth, class_agnostic=class_agnostic, lc=lc,
                           gc=gc, la_attention=LA_ATT, mid_attention=MID_ATT)



    total_step = 0
    best_ap = 0.
    avg_ap = 0.
    avg_ap_per_class = {}
    if isinstance(val_datasets, list):
        for i, val_dataloader_t in enumerate(val_dataloader_ts):
            map, ap_per_class = frcnn_utils.eval_one_dataloader(output_dir, val_dataloader_t, model, device, val_imdb_ts[i], return_ap_class=True)
            logger.log_scalar(" map on {}".format(val_datasets[i]), map, 0)
            for cls, ap in ap_per_class.items():
                if cls in avg_ap_per_class:
                    avg_ap_per_class[cls] += ap
                else:
                    avg_ap_per_class[cls] = ap
            avg_ap += map
        avg_ap /= len(val_dataloader_ts)
        for cls, ap in avg_ap_per_class.items():
            ap /= len(val_dataloader_ts)
            logger.log_scalar(" map of class {}".format(cls), ap, 0)
    logger.log_scalar("avp map",avg_ap, 0)

    return avg_ap.item()
Exemple #3
0
def get_ready(query_img_path):
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    np.random.seed(cfg.RNG_SEED)
    if args.dataset == "coco":
        args.imdb_name = "coco_2017_train"
        args.imdbval_name = "coco_2017_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
        ]

    # args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)
    args.cfg_file = "cfgs/{}_{}.yml".format(
        args.net, args.group) if args.group != 0 else "cfgs/{}.yml".format(
            args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    cfg.TRAIN.USE_FLIPPED = False
    # imdb_vs, roidb_vs, ratio_list_vs, ratio_index_vs, query_vs = combined_roidb('coco_2014_valminusminival', False)
    imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb(
        args.imdbval_name, False, seen=args.seen)
    # imdb_vs.competition_mode(on=True)
    imdb_vu.competition_mode(on=True)

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    # initilize the network here.
    if args.net == 'res50':
        fasterRCNN = resnet(imdb_vu.classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')
    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    query = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    catgory = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    if args.cuda:
        cfg.CUDA = True
        fasterRCNN.cuda()

    output_dir_vu = get_output_dir(imdb_vu, 'faster_rcnn_unseen')

    dataset_vu = roibatchLoader(roidb_vu,
                                ratio_list_vu,
                                ratio_index_vu,
                                query_vu,
                                1,
                                imdb_vu.num_classes,
                                training=False,
                                seen=args.seen)
    fasterRCNN.eval()

    avg = 0
    dataset_vu.query_position = avg

    num_images_vu = len(imdb_vu.image_index)

    all_boxes = [[[] for _ in xrange(num_images_vu)]
                 for _ in xrange(imdb_vu.num_classes)]

    _t = {'im_detect': time.time(), 'misc': time.time()}
    det_file = os.path.join(output_dir_vu,
                            'detections_%d_%d.pkl' % (args.seen, avg))
    print(det_file)

    # make query data
    query_im = imread(query_img_path)
    query_im = cv2.resize(query_im,
                          dsize=(640, 480),
                          interpolation=cv2.INTER_LINEAR)
    _query_im = np.copy(query_im)
    query_im, query_im_scale = prep_im_for_blob(query_im, target_size=128)
    query_im = torch.tensor(query_im)
    query_im = torch.unsqueeze(query_im, 0)
    query_im = query_im.transpose(1, 3)
    query = query_im.transpose(2, 3)
    query = query.cuda()

    return fasterRCNN, all_boxes, query, _query_im
Exemple #4
0
def extract_feature():
    MIN_BOXES = 10
    MAX_BOXES = 100
    N_CLASSES = 1601
    CONF_THRESH = 0.2
    args = parse_args()

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)

    os.makedirs(args.output_dir, exist_ok=True)

    use_cuda = torch.cuda.is_available()
    assert use_cuda, 'Works only with CUDA'
    device = torch.device('cuda') if use_cuda else torch.device('cpu')
    # device = torch.device('cpu')
    cfg.CUDA = use_cuda
    np.random.seed(cfg.RNG_SEED)

    # Load the model.
    fasterRCNN = resnet(N_CLASSES, 101, pretrained=False)
    fasterRCNN.create_architecture()
    fasterRCNN.load_state_dict(torch.load(args.model_file))
    fasterRCNN.to(device)
    fasterRCNN.eval()
    print('Model is loaded.')

    # Load images.
    imglist = os.listdir(args.image_dir)
    num_images = len(imglist)
    print('Number of images: {}.'.format(num_images))

    # Extract features.
    for im_file in tqdm(imglist):
        im = cv2.imread(os.path.join(args.image_dir, im_file))
        blobs, im_scales = get_image_blob(im)
        assert len(im_scales) == 1, 'Only single-image batch is implemented'

        im_data = torch.from_numpy(blobs).permute(0, 3, 1, 2).to(device)
        im_info = torch.tensor([[blobs.shape[1], blobs.shape[2],
                                 im_scales[0]]]).to(device)
        gt_boxes = torch.zeros(1, 1, 5).to(device)
        num_boxes = torch.zeros(1).to(device)

        with torch.set_grad_enabled(False):
            rois, cls_prob, _, _, _, _, _, _, \
            pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        boxes = rois.data.cpu().numpy()[:, :, 1:5].squeeze()
        boxes /= im_scales[0]
        cls_prob = cls_prob.data.cpu().numpy().squeeze()
        pooled_feat = pooled_feat.data.cpu().numpy()

        # Keep only the best detections.
        max_conf = np.zeros((boxes.shape[0]))
        for cls_ind in range(1, cls_prob.shape[1]):
            cls_scores = cls_prob[:, cls_ind]
            dets = np.hstack(
                (boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = np.array(cpu_nms(dets, cfg.TEST.NMS))
            max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep],
                                      cls_scores[keep], max_conf[keep])

        keep_boxes = np.where(max_conf >= CONF_THRESH)[0]
        if len(keep_boxes) < MIN_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES]
        elif len(keep_boxes) > MAX_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES]

        image_feat = pooled_feat[keep_boxes]
        if args.save_boxes:
            image_bboxes = boxes[keep_boxes]
        else:
            image_bboxes = None

        output_file = os.path.join(args.output_dir,
                                   im_file.split('.')[0] + '.npy')
        save_features(output_file, image_feat, image_bboxes)
Exemple #5
0
  def __init__(self):
    self.args = EasyDict()
    self.args.dataset = 'vrd'
    self.args.cfg_file = 'faster-rcnn/cfgs/vgg16.yml'
    self.args.net = 'vgg16'
    self.args.load_dir = 'models/faster_rcnn_1_20_7559.pth'
    self.args.cuda = True
    self.args.mGPUs = False
    self.args.class_agnostic = False
    self.args.parallel_type = 0
    self.args.batch_size = 1
    print(self.args)

    if self.args.cfg_file is not None:
      cfg_from_file(self.args.cfg_file)
    
    cfg.USE_GPU_NMS = self.args.cuda

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.

    load_name = self.args.load_dir  
    with open('data/vrd/obj.txt') as f:
      self.vrd_classes = [ x.strip() for x in f.readlines() ]
    self.vrd_classes = ['__background__'] + self.vrd_classes

    self.fasterRCNN = vgg16(self.vrd_classes, pretrained=False, class_agnostic=self.args.class_agnostic)
    self.fasterRCNN.create_architecture()

    if self.args.cuda > 0:
      checkpoint = torch.load(load_name)
    else:
      checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage))
    self.fasterRCNN.load_state_dict(checkpoint['model'])

    if 'pooling_mode' in checkpoint.keys():
      cfg.POOLING_MODE = checkpoint['pooling_mode']
    print('load model successfully!')
    print("load checkpoint %s" % (load_name))

    # initilize the tensor holder here.
    self.im_data = torch.FloatTensor(1)
    self.im_info = torch.FloatTensor(1)
    self.num_boxes = torch.LongTensor(1)
    self.gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if self.args.cuda > 0:
      self.im_data = self.im_data.cuda()
      self.im_info = self.im_info.cuda()
      self.num_boxes = self.num_boxes.cuda()
      self.gt_boxes = self.gt_boxes.cuda()

    # make variable
    self.im_data = Variable(self.im_data, volatile=True)
    self.im_info = Variable(self.im_info, volatile=True)
    self.num_boxes = Variable(self.num_boxes, volatile=True)
    self.gt_boxes = Variable(self.gt_boxes, volatile=True)

    if self.args.cuda > 0:
      cfg.CUDA = True

    if self.args.cuda > 0:
      self.fasterRCNN.cuda()

    self.fasterRCNN.eval()       
Exemple #6
0
def load_model(args):
    # set cfg according to the dataset used to train the pre-trained model
    if args.dataset == "pascal_voc":
        args.set_cfgs = [
            "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]"
        ]
    elif args.dataset == "pascal_voc_0712":
        args.set_cfgs = [
            "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]"
        ]
    elif args.dataset == "coco":
        args.set_cfgs = [
            "ANCHOR_SCALES",
            "[4, 8, 16, 32]",
            "ANCHOR_RATIOS",
            "[0.5,1,2]",
        ]
    elif args.dataset == "imagenet":
        args.set_cfgs = [
            "ANCHOR_SCALES", "[8, 16, 32]", "ANCHOR_RATIOS", "[0.5,1,2]"
        ]
    elif args.dataset == "vg":
        args.set_cfgs = [
            "ANCHOR_SCALES",
            "[4, 8, 16, 32]",
            "ANCHOR_RATIOS",
            "[0.5,1,2]",
        ]

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.USE_GPU_NMS = args.cuda

    print("Using config:")
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    # Load classes
    classes = ["__background__"]
    with open(os.path.join(args.classes_dir, "objects_vocab.txt")) as f:
        for object in f.readlines():
            classes.append(object.split(",")[0].lower().strip())

    if not os.path.exists(args.load_dir):
        raise Exception(
            "There is no input directory for loading network from " +
            args.load_dir)
    load_name = os.path.join(
        args.load_dir, "faster_rcnn_{}_{}.pth".format(args.net, args.dataset))

    # initilize the network here. the network used to train the pre-trained model
    if args.net == "vgg16":
        fasterRCNN = vgg16(classes,
                           pretrained=False,
                           class_agnostic=args.class_agnostic)
    elif args.net == "res101":
        fasterRCNN = resnet(classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == "res50":
        fasterRCNN = resnet(classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == "res152":
        fasterRCNN = resnet(classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    if args.cuda > 0:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=(lambda storage, loc: storage))
    fasterRCNN.load_state_dict(checkpoint["model"])
    if "pooling_mode" in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint["pooling_mode"]

    print("load model successfully!")

    print("load model %s" % (load_name))

    return classes, fasterRCNN
        ]
    elif args.dataset == "vg":
        # train sizes: train, smalltrain, minitrain
        # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '50'
        ]

    args.cfg_file = "cfgs/{}_ls.yml".format(
        args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    #torch.backends.cudnn.benchmark = True
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
Exemple #8
0
def frcnn(train):

    args = parse_args()

    print('Called with args:')
    print(args)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)
    from model.utils.config import cfg

    cfg.USE_GPU_NMS = args.cuda

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))

    pascal_classes = np.asarray([
        '___background__', u'person', u'bicycle', u'car', u'motorcycle',
        u'airplane', u'bus', u'train', u'truck', u'boat', u'traffic light',
        u'fire hydrant', u'stop sign', u'parking meter', u'bench', u'bird',
        u'cat', u'dog', u'horse', u'sheep', u'cow', u'elephant', u'bear',
        u'zebra', u'giraffe', u'backpack', u'umbrella', u'handbag', u'tie',
        u'suitcase', u'frisbee', u'skis', u'snowboard', u'sports ball',
        u'kite', u'baseball bat', u'baseball glove', u'skateboard',
        u'surfboard', u'tennis racket', u'bottle', u'wine glass', u'cup',
        u'fork', u'knife', u'spoon', u'bowl', u'banana', u'apple', u'sandwich',
        u'orange', u'broccoli', u'carrot', u'hot dog', u'pizza', u'donut',
        u'cake', u'chair', u'couch', u'potted plant', u'bed', u'dining table',
        u'toilet', u'tv', u'laptop', u'mouse', u'remote', u'keyboard',
        u'cell phone', u'microwave', u'oven', u'toaster', u'sink',
        u'refrigerator', u'book', u'clock', u'vase', u'scissors',
        u'teddy bear', u'hair drier', u'toothbrush'
    ])
    # initilize the network here.
    #args.imdb_name = "coco_2014_train+coco_2014_valminusminival"
    # imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name)
    if args.net == 'vgg16':
        fasterRCNN = vgg16(pascal_classes,
                           pretrained=True,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(pascal_classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(pascal_classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(pascal_classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    if args.cuda > 0:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=(lambda storage, loc: storage))
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')

    # pdb.set_trace()

    print("load checkpoint %s" % (load_name))

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda > 0:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    with torch.no_grad():
        im_data = Variable(im_data)
        im_info = Variable(im_info)
        num_boxes = Variable(num_boxes)
        gt_boxes = Variable(gt_boxes)

    if args.cuda > 0:
        cfg.CUDA = True

    if args.cuda > 0:
        fasterRCNN.cuda()

    fasterRCNN.eval()
    thresh = 0.5

    webcam_num = args.webcam_num
    imglist = os.listdir(args.image_dir)
    num_images = len(imglist)

    print('Loaded Photo: {} images.'.format(num_images))
    import json, re
    from tqdm import tqdm
    d = {}
    pbar = tqdm(imglist)
    if not train:
        for i in pbar:
            im_file = os.path.join(args.image_dir, i)
            # im = cv2.imread(im_file)
            im_name = i
            im_in = np.array(imread(im_file))
            if len(im_in.shape) == 2:
                im_in = im_in[:, :, np.newaxis]
                im_in = np.concatenate((im_in, im_in, im_in), axis=2)
            # rgb -> bgr
            im = im_in[:, :, ::-1]

            blobs, im_scales = _get_image_blob(im)
            assert len(im_scales) == 1, "Only single-image batch implemented"
            im_blob = blobs
            im_info_np = np.array(
                [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
                dtype=np.float32)

            im_data_pt = torch.from_numpy(im_blob)
            im_data_pt = im_data_pt.permute(0, 3, 1, 2)
            im_info_pt = torch.from_numpy(im_info_np)

            im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt)
            im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt)
            gt_boxes.data.resize_(1, 1, 5).zero_()
            num_boxes.data.resize_(1).zero_()

            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]

            if cfg.TEST.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        if args.cuda > 0:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        else:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)

                        box_deltas = box_deltas.view(1, -1, 4)
                    else:
                        if args.cuda > 0:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        else:
                            box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \
                                       + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
                        box_deltas = box_deltas.view(1, -1,
                                                     4 * len(pascal_classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                #Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= im_scales[0]
            scores = scores.squeeze()
            pred_boxes = pred_boxes.squeeze()

            lis = json.load(
                open(
                    '/home/nesa320/huangshicheng/gitforwork/gsnn/graph/labels.json',
                    'r'))

            sm_lis = np.zeros(len(lis))
            for j in xrange(1, len(pascal_classes)):

                inds = torch.nonzero(scores[:, j] > thresh).view(-1)
                # if there is det
                if inds.numel() > 0:

                    cls_scores = scores[:, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if args.class_agnostic:
                        cls_boxes = pred_boxes[inds, :]
                    else:
                        cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    #cls_dets = torch.cat((cls_boxes, cls_scores), 1)
                    cls_dets = cls_dets[order]
                    keep = nms(cls_dets,
                               cfg.TEST.NMS,
                               force_cpu=not cfg.USE_GPU_NMS)
                    cls_dets = cls_dets[keep.view(-1).long()]
                    score = cls_dets[0][-1]
                    try:
                        sm_lis[lis.index(pascal_classes[j])] = score.numpy()
                    except:
                        pass
            d[re.sub("\D", "", im_name)] = sm_lis.tolist()
            json.dump(d, open('annotation_dict' + '.json', 'w'), indent=2)
    else:
        pass
        args.set_cfgs = ['ANCHOR_SCALES', '[2,4,5,6,8,9,10,12,14,16]', 'NUM_CLASSES', args.num_classes]
        #args.set_cfgs = ['ANCHOR_SCALES', '[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56]', 'NUM_CLASSES', args.num_classes]
    elif args.dataset == "activitynet":
        args.imdb_name = "train_data_25fps_flipped.pkl" #_192.pkl"
        args.imdbval_name = "val_data_25fps.pkl"
        args.num_classes = 201
        #args.set_cfgs = ['ANCHOR_SCALES', '[1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64]', 'NUM_CLASSES', args.num_classes] / stride
        args.set_cfgs = ['ANCHOR_SCALES', '[1,1.25, 1.5,1.75, 2,2.5, 3,3.5, 4,4.5, 5,5.5, 6,7, 8,9,10,11,12,14,16,18,20,22,24,28,32,36,40,44,52,60,68,76,84,92,100]', 'NUM_CLASSES', args.num_classes]  

    args.cfg_file = "cfgs/{}_{}.yml".format(args.net, args.dataset)

    cfg.CUDA = True 
    cfg.USE_GPU_NMS = True

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file) #根据配置文件中的参数更新配置信息
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)

    # for reproduce
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)
    if cfg.CUDA:
        torch.cuda.manual_seed_all(cfg.RNG_SEED)

    cudnn.benchmark = True #设置这个 flag 可以让内置的 cuDNN 的 auto-tuner 自动寻找最适合当前配置的高效算法,来达到优化运行效率的问题。

    # train set
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets,
                    device, net, optimizer, num_workers,
                    lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step,
                    resume, load_name, pretrained,
                    eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT,
                    debug, _run):

    args = Args(dataset=dataset_source, dataset_t=dataset_target, imdb_name_target=[], cfg_file=cfg_file, net=net)
    args = set_dataset_args(args)

    args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net)
    args_val = set_dataset_args(args_val, test=True)

    is_bgr = False
    if net in ['res101', 'res50', 'res152', 'vgg16']:
        is_bgr = True


    logger = LoggerForSacred(None, ex, True)

    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = True if device == 'cuda' else False
    device = torch.device(device)

    output_dir = output_dir + "_{}".format(_run._id)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    dataloader_s, m_dataloader_t, imdb, m_imdb_t = init_dataloaders_1s_mt(args, batch_size, num_workers, is_bgr)
    val_dataloader_ts, val_imdb_ts = init_val_dataloaders_mt(args_val, 1, num_workers, is_bgr)

    session = 1
    fasterRCNN = init_htcn_model(LA_ATT, MID_ATT, class_agnostic, device, gc, imdb, lc, load_name, net, pretrained=pretrained)
    #fasterRCNN.re_init_da_layers(device)
    lr, optimizer, session, start_epoch = init_optimizer(lr, fasterRCNN, optimizer, resume, load_name, session, start_epoch, is_all_params=True)
    # _, optimizer_unsup, _, _ = init_optimizer(lr, fasterRCNN, optimizer, resume, load_name, session,
    #                                                      start_epoch, is_all_params=True)


    if torch.cuda.device_count() > 1:
        fasterRCNN = nn.DataParallel(fasterRCNN)

    iters_per_epoch = int(10000 / batch_size)

    if ef:
        FL = EFocalLoss(class_num=2, gamma=gamma)
    else:
        FL = FocalLoss(class_num=2, gamma=gamma)

    total_step = 0
    if resume:
        total_step = (start_epoch - 1) * 10000




    for epoch in range(start_epoch, max_epochs + 1):
        # setting to train mode
        fasterRCNN.train()

        if epoch - 1 in lr_decay_step:
            adjust_learning_rate(optimizer, lr_decay_gamma)
            lr *= lr_decay_gamma

        total_step = inc_frcnn_utils.train_htcn_one_epoch_inc_union(args, FL, total_step, dataloader_s, m_dataloader_t, iters_per_epoch, fasterRCNN, optimizer, device, eta, logger)

        save_name = os.path.join(output_dir,
                                 'target_{}_eta_{}_local_{}_global_{}_gamma_{}_session_{}_epoch_{}_total_step_{}.pth'.format(
                                     args.dataset_t, args.eta,
                                     lc, gc, gamma,
                                     session, epoch,
                                     total_step))
        save_checkpoint({
            'session': session,
            'epoch': epoch + 1,
            'model': fasterRCNN.module.state_dict() if torch.cuda.device_count() > 1 else fasterRCNN.state_dict(),
            'optimizer': optimizer.state_dict(),
            'pooling_mode': cfg.POOLING_MODE,
            'class_agnostic': class_agnostic,
        }, save_name)
    return 0
Exemple #11
0
    def load(self, path, use_cuda):
        # define options
        path_model_detector = os.path.join(
            path, 'fpn101_1_10_9999.pth'
        )  # model for detector (food, tableware, drink)

        dataset = 'CloudStatus_val'
        imdb_name2 = 'CloudTableThings_val'
        total_imdb_name = 'CloudStatusTableThings_val'
        load_name = path_model_detector

        self.use_share_regress = True
        self.use_progress = True

        net = 'resnet101'
        self.cuda = use_cuda
        self.class_agnostic = False
        self.att_type = 'None'

        self.vis = True  # generate debug images

        # Load food classifier
        # possible dbname='FoodX251', 'Food101', 'Kfood'
        # possible eval_crop_type='CenterCrop', 'TenCrop'
        self.food_classifier = FoodClassifier(net='senet154',
                                              dbname='Kfood',
                                              eval_crop_type='CenterCrop',
                                              ck_file_folder=path,
                                              use_cuda=use_cuda,
                                              pretrained=False)

        cfg_file = os.path.join(path, '{}_ls.yml'.format(net))
        set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '30'
        ]

        if cfg_file is not None:
            cfg_from_file(cfg_file)
        if set_cfgs is not None:
            cfg_from_list(set_cfgs)

        USE_GPU_NMS = self.cuda

        print('Using config:')
        pprint.pprint(cfg)

        # train set
        # -- Note: Use validation set and disable the flipped to enable faster loading.
        input_dir = load_name
        if not os.path.exists(input_dir):
            raise Exception(
                'There is no input directory for loading network from ' +
                input_dir)

        self.list_box_color = [(0, 0, 0), (0, 0, 200), (0, 200, 0),
                               (200, 200, 0), (200, 0, 200), (0, 200, 200),
                               (200, 0, 200)]

        self.classes0 = self.get_class_list(dataset)
        self.classes1 = self.get_class_list(imdb_name2)
        self.classes_total = self.get_class_list(total_imdb_name)

        from model.fpn.resnet_multi_CBAM import resnet
        self.fasterRCNN = resnet(self.classes0,
                                 self.classes1,
                                 use_pretrained=False,
                                 num_layers=101,
                                 class_agnostic=self.class_agnostic,
                                 use_share_regress=self.use_share_regress,
                                 use_progress=self.use_progress,
                                 att_type=self.att_type)

        self.fasterRCNN.create_architecture()

        print("loading checkpoint %s..." % (load_name))
        if self.cuda > 0:
            checkpoint = torch.load(load_name)
        else:
            checkpoint = torch.load(
                load_name, map_location=(lambda storage, loc: storage))
        self.fasterRCNN.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']

        print('succeeded')

        # initilize the tensor holder here.
        self.im_data = torch.FloatTensor(1)
        self.im_info = torch.FloatTensor(1)
        self.num_boxes = torch.LongTensor(1)
        self.gt_boxes = torch.FloatTensor(1)

        # ship to cuda
        if self.cuda > 0:
            self.im_data = self.im_data.cuda()
            self.im_info = self.im_info.cuda()
            self.num_boxes = self.num_boxes.cuda()
            self.gt_boxes = self.gt_boxes.cuda()

        # make variable
        with torch.no_grad():
            self.im_data = Variable(self.im_data)
            self.im_info = Variable(self.im_info)
            self.num_boxes = Variable(self.num_boxes)
            self.gt_boxes = Variable(self.gt_boxes)

        if self.cuda > 0:
            cfg.CUDA = True

        if self.cuda > 0:
            self.fasterRCNN.cuda()

        self.fasterRCNN.eval()

        print('- models loaded from {}'.format(path))
def prep_model(input_dir):

    args = parse_args()

    print('Called with args:')
    print(args)
    args.set_cfgs = [
        'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'
    ]

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.USE_GPU_NMS = 1

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(input_dir)

    #   pascal_classes = np.asarray(['__background__',
    #                        'aeroplane', 'bicycle', 'bird', 'boat',
    #                        'bottle', 'bus', 'car', 'cat', 'chair',
    #                        'cow', 'diningtable', 'dog', 'horse',
    #                        'motorbike', 'person', 'pottedplant',
    #                        'sheep', 'sofa', 'train', 'tvmonitor'])
    pascal_classes = np.asarray([
        '__background__', "person", "bicycle", "car", "motorbike", "aeroplane",
        "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
        "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
        "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
        "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
        "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
        "skateboard", "surfboard", "tennis racket", "bottle", "wine glass",
        "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
        "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
        "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet",
        "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
        "microwave", "oven", "toaster", "sink", "refrigerator", "book",
        "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
    ])

    # initilize the network here.
    fasterRCNN = resnet(pascal_classes,
                        101,
                        pretrained=False,
                        class_agnostic=args.class_agnostic)

    fasterRCNN.create_architecture()

    print("load checkpoint %s" % (load_name))
    checkpoint = torch.load(load_name)
    fasterRCNN.load_state_dict(checkpoint['model'])
    #   fasterRCNN.load_state_dict(checkpoint)
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']
    print('load model successfully!')
    return fasterRCNN
Exemple #13
0
def bld_train(args, ann_path=None, step=0):

    # print('Train from annotaion {}'.format(ann_path))
    # print('Called with args:')
    # print(args)

    if args.use_tfboard:
        from model.utils.logger import Logger
        # Set the logger
        logger = Logger(
            os.path.join('./.logs', args.active_method,
                         "/activestep" + str(step)))

    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '50'
        ]
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '30'
        ]
    elif args.dataset == "vg":
        # train sizes: train, smalltrain, minitrain
        # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '50'
        ]
    elif args.dataset == "voc_coco":
        args.imdb_name = "voc_coco_2007_train+voc_coco_2007_val"
        args.imdbval_name = "voc_coco_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    else:
        raise NotImplementedError

    args.cfg_file = "cfgs/{}_ls.yml".format(
        args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    # print('Using config:')
    # pprint.pprint(cfg)
    # np.random.seed(cfg.RNG_SEED)

    # torch.backends.cudnn.benchmark = True
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # train set = source set + target set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = args.cuda
    # source train set, fully labeled
    #ann_path_source = os.path.join(ann_path, 'voc_coco_2007_train_f.json')
    #ann_path_target = os.path.join(ann_path, 'voc_coco_2007_train_l.json')
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdb_name, ann_path=os.path.join(ann_path, 'source'))
    imdb_tg, roidb_tg, ratio_list_tg, ratio_index_tg = combined_roidb(
        args.imdb_name, ann_path=os.path.join(ann_path, 'target'))

    print('{:d} roidb entries for source set'.format(len(roidb)))
    print('{:d} roidb entries for target set'.format(len(roidb_tg)))

    output_dir = args.save_dir + "/" + args.net + "/" + args.dataset + "/" + args.active_method + "/activestep" + str(
        step)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    sampler_batch_tg = None  # do not sample target set

    bs_tg = 4
    dataset_tg = roibatchLoader(roidb_tg, ratio_list_tg, ratio_index_tg, bs_tg, \
                             imdb_tg.num_classes, training=True)

    assert imdb.num_classes == imdb_tg.num_classes

    dataloader_tg = torch.utils.data.DataLoader(dataset_tg,
                                                batch_size=bs_tg,
                                                sampler=sampler_batch_tg,
                                                num_workers=args.num_workers,
                                                worker_init_fn=_rand_fn())

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    image_label = torch.FloatTensor(1)
    confidence = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()
        image_label = image_label.cuda()
        confidence = confidence.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)
    image_label = Variable(image_label)
    confidence = Variable(confidence)

    if args.cuda:
        cfg.CUDA = True

    # initialize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(imdb.classes,
                           pretrained=True,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes,
                            101,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes,
                            50,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes,
                            152,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        raise NotImplementedError

    # initialize the expectation network.
    if args.net == 'vgg16':
        fasterRCNN_val = vgg16(imdb.classes,
                               pretrained=True,
                               class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN_val = resnet(imdb.classes,
                                101,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN_val = resnet(imdb.classes,
                                50,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN_val = resnet(imdb.classes,
                                152,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        raise NotImplementedError

    fasterRCNN.create_architecture()
    fasterRCNN_val.create_architecture()

    # lr = cfg.TRAIN.LEARNING_RATE
    lr = args.lr
    # tr_momentum = cfg.TRAIN.MOMENTUM
    # tr_momentum = args.momentum

    params = []
    for key, value in dict(fasterRCNN.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                }]

    if args.optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)
    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)
    else:
        raise NotImplementedError

    if args.resume:
        load_name = os.path.join(
            output_dir,
            'faster_rcnn_{}_{}_{}.pth'.format(args.checksession,
                                              args.checkepoch,
                                              args.checkpoint))
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        args.session = checkpoint['session']
        args.start_epoch = checkpoint['epoch']
        fasterRCNN.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']
        print("loaded checkpoint %s" % (load_name))

    # expectation model
    print("load checkpoint for expectation model: %s" % args.model_path)
    checkpoint = torch.load(args.model_path)
    fasterRCNN_val.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    fasterRCNN_val = fasterRCNN_val
    fasterRCNN_val.eval()

    if args.mGPUs:
        fasterRCNN = nn.DataParallel(fasterRCNN)
        #fasterRCNN_val = nn.DataParallel(fasterRCNN_val)

    if args.cuda:
        fasterRCNN.cuda()
        fasterRCNN_val.cuda()

    # Evaluation
    # data_iter = iter(dataloader_tg)
    # for target_k in range( int(train_size_tg / args.batch_size)):
    fname = "noisy_annotations.pkl"
    if not os.path.isfile(fname):
        for batch_k, data in enumerate(dataloader_tg):
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])
            image_label.data.resize_(data[4].size()).copy_(data[4])
            b_size = len(im_data)
            # expactation pass
            rois, cls_prob, bbox_pred, \
            _, _, _, _, _ = fasterRCNN_val(im_data, im_info, gt_boxes, num_boxes)
            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]
            if cfg.TRAIN.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(b_size, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        # print('DEBUG: Size of box_deltas is {}'.format(box_deltas.size()) )
                        box_deltas = box_deltas.view(b_size, -1,
                                                     4 * len(imdb.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            # TODO: data distalliation
            # Choose the confident samples
            for b_idx in range(b_size):
                # fill one confidence
                # confidence.data[b_idx, :] = 1 - (gt_boxes.data[b_idx, :, 4] == 0)
                # resize prediction
                pred_boxes[b_idx] /= data[1][b_idx][2]
                for j in xrange(1, imdb.num_classes):
                    if image_label.data[b_idx, j] != 1:
                        continue  # next if no image label

                    # filtering box outside of the image
                    not_keep = (pred_boxes[b_idx][:, j * 4] == pred_boxes[b_idx][:, j * 4 + 2]) | \
                               (pred_boxes[b_idx][:, j * 4 + 1] == pred_boxes[b_idx][:, j * 4 + 3])
                    keep = torch.nonzero(not_keep == 0).view(-1)
                    # decease the number of pgts
                    thresh = 0.5
                    while torch.nonzero(
                            scores[b_idx, :,
                                   j][keep] > thresh).view(-1).numel() <= 0:
                        thresh = thresh * 0.5
                    inds = torch.nonzero(
                        scores[b_idx, :, j][keep] > thresh).view(-1)

                    # if there is no det, error
                    if inds.numel() <= 0:
                        print('Warning!!!!!!! It should not appear!!')
                        continue

                    # find missing ID
                    missing_list = np.where(gt_boxes.data[b_idx, :, 4] == 0)[0]
                    if (len(missing_list) == 0): continue
                    missing_id = missing_list[0]
                    cls_scores = scores[b_idx, :, j][keep][inds]
                    cls_boxes = pred_boxes[b_idx][keep][inds][:, j *
                                                              4:(j + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    keep = nms(cls_dets, 0.2)  # Magic number ????
                    keep = keep.view(-1).tolist()
                    sys.stdout.write(
                        'from {} predictions choose-> min({},4) as pseudo label  \r'
                        .format(len(cls_scores), len(keep)))
                    sys.stdout.flush()
                    _, order = torch.sort(cls_scores[keep], 0, True)
                    if len(keep) == 0: continue

                    max_keep = 4
                    for pgt_k in range(max_keep):
                        if len(order) <= pgt_k: break
                        if missing_id + pgt_k >= 20: break
                        gt_boxes.data[b_idx, missing_id +
                                      pgt_k, :4] = cls_boxes[keep][order[
                                          len(order) - 1 - pgt_k]]
                        gt_boxes.data[b_idx, missing_id + pgt_k,
                                      4] = j  # class
                        #confidence[b_idx, missing_id + pgt_k] = cls_scores[keep][order[len(order) - 1 - pgt_k]]
                        num_boxes[b_idx] = num_boxes[b_idx] + 1
                sample = roidb_tg[dataset_tg.ratio_index[batch_k * bs_tg +
                                                         b_idx]]
                pgt_boxes = np.array([
                    gt_boxes[b_idx, x, :4].cpu().data.numpy()
                    for x in range(int(num_boxes[b_idx]))
                ])
                pgt_classes = np.array([
                    gt_boxes[b_idx, x, 4].cpu().data[0]
                    for x in range(int(num_boxes[b_idx]))
                ])
                sample["boxes"] = pgt_boxes
                sample["gt_classes"] = pgt_classes
                # DEBUG
                assert np.array_equal(sample["label"],image_label[b_idx].cpu().data.numpy()), \
                    "Image labels are not equal! {} vs {}".format(sample["label"],image_label[b_idx].cpu().data.numpy())

        #with open(fname, 'w') as f:
        # pickle.dump(roidb_tg, f)
    else:
        pass
        # with open(fname) as f:  # Python 3: open(..., 'rb')
        # roidb_tg = pickle.load(f)

    print("-- Optimization Stage --")
    # Optimization
    print("######################################################l")

    roidb.extend(roidb_tg)  # merge two datasets
    print('before filtering, there are %d images...' % (len(roidb)))
    i = 0
    while i < len(roidb):
        if True:
            if len(roidb[i]['boxes']) == 0:
                del roidb[i]
                i -= 1
        else:
            if len(roidb[i]['boxes']) == 0:
                del roidb[i]
                i -= 1
        i += 1

    print('after filtering, there are %d images...' % (len(roidb)))
    from roi_data_layer.roidb import rank_roidb_ratio
    ratio_list, ratio_index = rank_roidb_ratio(roidb)
    train_size = len(roidb)
    sampler_batch = sampler(train_size, args.batch_size)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                             imdb.num_classes, training=True)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             sampler=sampler_batch,
                                             num_workers=args.num_workers,
                                             worker_init_fn=_rand_fn())
    iters_per_epoch = int(train_size / args.batch_size)
    print("Training set size is {}".format(train_size))
    for epoch in range(args.start_epoch, args.max_epochs + 1):
        fasterRCNN.train()

        loss_temp = 0
        start = time.time()
        epoch_start = start

        # adjust learning rate
        if epoch % (args.lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, args.lr_decay_gamma)
            lr *= args.lr_decay_gamma

        # one step
        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            data = next(data_iter)
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])
            image_label.data.resize_(data[4].size()).copy_(data[4])

            #gt_boxes.data = \
            #    torch.cat((gt_boxes.data, torch.zeros(gt_boxes.size(0), gt_boxes.size(1), 1).cuda()), dim=2)
            conf_data = torch.zeros(gt_boxes.size(0), gt_boxes.size(1)).cuda()
            confidence.data.resize_(conf_data.size()).copy_(conf_data)

            fasterRCNN.zero_grad()

            # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence)
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)
            # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence)

            loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \
                   + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
            loss_temp += loss.data[0]

            # backward
            optimizer.zero_grad()
            loss.backward()
            if args.net == "vgg16":
                clip_gradient(fasterRCNN, 10.)
            optimizer.step()

            if step % args.disp_interval == 0:
                end = time.time()
                if step > 0:
                    loss_temp /= args.disp_interval

                if args.mGPUs:
                    loss_rpn_cls = rpn_loss_cls.mean().data[0]
                    loss_rpn_box = rpn_loss_box.mean().data[0]
                    loss_rcnn_cls = RCNN_loss_cls.mean().data[0]
                    loss_rcnn_box = RCNN_loss_bbox.mean().data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt
                else:
                    loss_rpn_cls = rpn_loss_cls.data[0]
                    loss_rpn_box = rpn_loss_box.data[0]
                    loss_rcnn_cls = RCNN_loss_cls.data[0]
                    loss_rcnn_box = RCNN_loss_bbox.data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt

                print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                      % (args.session, epoch, step, iters_per_epoch, loss_temp, lr))
                print("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                      (fg_cnt, bg_cnt, end - start))
                print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                      % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                if args.use_tfboard:
                    info = {
                        'loss': loss_temp,
                        'loss_rpn_cls': loss_rpn_cls,
                        'loss_rpn_box': loss_rpn_box,
                        'loss_rcnn_cls': loss_rcnn_cls,
                        'loss_rcnn_box': loss_rcnn_box
                    }
                    for tag, value in info.items():
                        logger.scalar_summary(tag, value, step)

                    images = []
                    for k in range(args.batch_size):
                        image = draw_bounding_boxes(
                            im_data[k].data.cpu().numpy(),
                            gt_boxes[k].data.cpu().numpy(),
                            im_info[k].data.cpu().numpy(),
                            num_boxes[k].data.cpu().numpy())
                        images.append(image)
                    logger.image_summary("Train epoch %2d, iter %4d/%4d" % (epoch, step, iters_per_epoch), \
                                          images, step)
                loss_temp = 0
                start = time.time()
                if False:
                    break

        if args.mGPUs:
            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))
            save_checkpoint(
                {
                    'session': args.session,
                    'epoch': epoch + 1,
                    'model': fasterRCNN.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': args.class_agnostic,
                }, save_name)
        else:
            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))
            save_checkpoint(
                {
                    'session': args.session,
                    'epoch': epoch + 1,
                    'model': fasterRCNN.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': args.class_agnostic,
                }, save_name)
        print('save model: {}'.format(save_name))

        epoch_end = time.time()
        print('Epoch time cost: {}'.format(epoch_end - epoch_start))

    print('finished!')
Exemple #14
0
    def __init__(self):
        self.cuda = True
        cfg.USE_GPU_NMS = self.cuda
        cfg_file = 'cfgs/vgg16.yml'
        cfg_from_file(cfg_file)
        dataset = 'grasp'
        net = 'vgg16'
        load_dir = './models'
        self.class_agnostic = True
        checksession = 1
        checkepoch = 5
        checkpoint = 899

        input_dir = load_dir + "/" + net + "/" + dataset
        if not os.path.exists(input_dir):
            raise Exception(
                'There is no input directory for loading network from ' +
                input_dir)

        load_name = os.path.join(
            input_dir,
            'faster_rcnn_{}_{}_{}.pth'.format(checksession, checkepoch,
                                              checkpoint))

        self.grasp_classes = np.asarray(
            ['__background__', 'bolt', 'hammer', 'scissors', 'tape'])
        self.grasp_poses = np.asarray(['__background__',  # always index 0
                           'bin01', 'bin02', 'bin03', 'bin04', 'bin05', 'bin06', \
                           'bin07', 'bin08', 'bin09', 'bin10', 'bin11', 'bin12', \
                           'bin13', 'bin14', 'bin15', 'bin16', 'bin17', 'bin18', \
                           'binAll'])

        # initilize the network here.
        self.fasterRCNN = vgg16(self.grasp_classes,
                                self.grasp_poses,
                                pretrained=False,
                                class_agnostic=self.class_agnostic)
        self.fasterRCNN.create_architecture()

        print("load checkpoint %s" % (load_name))
        if self.cuda > 0:
            checkpoint = torch.load(load_name)
        else:
            checkpoint = torch.load(
                load_name, map_location=(lambda storage, loc: storage))

        self.fasterRCNN.load_state_dict(checkpoint['model'])
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']

        print('load model successfully!')
        # pdb.set_trace()
        print("load checkpoint %s" % (load_name))

        with torch.no_grad():
            # initilize the tensor holder here.
            self.im_data = torch.FloatTensor(1)
            self.im_info = torch.FloatTensor(1)
            self.num_boxes = torch.LongTensor(1)
            self.gt_boxes = torch.FloatTensor(1)

        # ship to cuda
        if self.cuda > 0:
            self.im_data = self.im_data.cuda()
            self.im_info = self.im_info.cuda()
            self.num_boxes = self.num_boxes.cuda()
            self.gt_boxes = self.gt_boxes.cuda()

        cfg.CUDA = self.cuda
        self.fasterRCNN.cuda()
        self.fasterRCNN.eval()

        self.thresh = 0.05
        self.vis = True
Exemple #15
0
def train(dataset="kaggle_pna",
          train_ds="train",
          arch="couplenet",
          net="res152",
          start_epoch=1,
          max_epochs=20,
          disp_interval=100,
          save_dir="save",
          num_workers=4,
          cuda=True,
          large_scale=False,
          mGPUs=True,
          batch_size=4,
          class_agnostic=False,
          anchor_scales=4,
          optimizer="sgd",
          lr_decay_step=10,
          lr_decay_gamma=.1,
          session=1,
          resume=False,
          checksession=1,
          checkepoch=1,
          checkpoint=0,
          use_tfboard=False,
          flip_prob=0.0,
          scale=0.0,
          scale_prob=0.0,
          translate=0.0,
          translate_prob=0.0,
          angle=0.0,
          dist="cont",
          rotate_prob=0.0,
          shear_factor=0.0,
          shear_prob=0.0,
          rpn_loss_cls_wt=1,
          rpn_loss_box_wt=1,
          RCNN_loss_cls_wt=1,
          RCNN_loss_bbox_wt=1,
          **kwargs):
    print("Train Arguments: {}".format(locals()))

    # Import network definition
    if arch == 'rcnn':
        from model.faster_rcnn.resnet import resnet
    elif arch == 'rfcn':
        from model.rfcn.resnet_atrous import resnet
    elif arch == 'couplenet':
        from model.couplenet.resnet_atrous import resnet

    from roi_data_layer.pnaRoiBatchLoader import roibatchLoader
    from roi_data_layer.pna_roidb import combined_roidb

    print('Called with kwargs:')
    print(kwargs)

    # Set up logger
    if use_tfboard:
        from model.utils.logger import Logger
        # Set the logger
        logger = Logger('./logs')

    # Anchor settings: ANCHOR_SCALES: [8, 16, 32] or [4, 8, 16, 32]
    if anchor_scales == 3:
        scales = [8, 16, 32]
    elif anchor_scales == 4:
        scales = [4, 8, 16, 32]

    # Dataset related settings: MAX_NUM_GT_BOXES: 20, 30, 50
    if train_ds == "train":
        imdb_name = "pna_2018_train"
    elif train_ds == "trainval":
        imdb_name = "pna_2018_trainval"

    set_cfgs = [
        'ANCHOR_SCALES',
        str(scales), 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30'
    ]

    import model
    model_repo_path = os.path.dirname(
        os.path.dirname(os.path.dirname(model.__file__)))

    cfg_file = "cfgs/{}_ls.yml".format(
        net) if large_scale else "cfgs/{}.yml".format(net)

    if cfg_file is not None:
        cfg_from_file(os.path.join(model_repo_path, cfg_file))
    if set_cfgs is not None:
        cfg_from_list(set_cfgs)

    train_kwargs = kwargs.pop("TRAIN", None)
    resnet_kwargs = kwargs.pop("RESNET", None)
    mobilenet_kwargs = kwargs.pop("MOBILENET", None)

    if train_kwargs is not None:
        for key, value in train_kwargs.items():
            cfg["TRAIN"][key] = value

    if resnet_kwargs is not None:
        for key, value in resnet_kwargs.items():
            cfg["RESNET"][key] = value

    if mobilenet_kwargs is not None:
        for key, value in mobilenet_kwargs.items():
            cfg["MOBILENET"][key] = value

    if kwargs is not None:
        for key, value in kwargs.items():
            cfg[key] = value

    print('Using config:')
    cfg.MODEL_DIR = os.path.abspath(cfg.MODEL_DIR)
    cfg.TRAIN_DATA_CLEAN_PATH = os.path.abspath(cfg.TRAIN_DATA_CLEAN_PATH)
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)
    print("LEARNING RATE: {}".format(cfg.TRAIN.LEARNING_RATE))

    # Warning to use cuda if available
    if torch.cuda.is_available() and not cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # Train set
    # Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = cuda
    imdb, roidb, ratio_list, ratio_index = combined_roidb(imdb_name)
    train_size = len(roidb)

    print('{:d} roidb entries'.format(len(roidb)))

    # output_dir = os.path.join(save_dir, arch, net, dataset)
    output_dir = cfg.MODEL_DIR
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    sampler_batch = sampler(train_size, batch_size)

    dataset = roibatchLoader(roidb,
                             ratio_list,
                             ratio_index,
                             batch_size,
                             imdb.num_classes,
                             training=True)

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             sampler=sampler_batch,
                                             num_workers=num_workers)

    # Initilize the tensor holder
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # Copy tensors in CUDA memory
    if cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # Make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if cuda:
        cfg.CUDA = True

    # Initilize the network:
    if net == 'vgg16':
        # model = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic)
        print("Pretrained model is not downloaded and network is not used")
    elif net == 'res18':
        model = resnet(imdb.classes,
                       18,
                       pretrained=False,
                       class_agnostic=class_agnostic)  # TODO: Check dim error
    elif net == 'res34':
        model = resnet(imdb.classes,
                       34,
                       pretrained=False,
                       class_agnostic=class_agnostic)  # TODO: Check dim error
    elif net == 'res50':
        model = resnet(imdb.classes,
                       50,
                       pretrained=False,
                       class_agnostic=class_agnostic)  # TODO: Check dim error
    elif net == 'res101':
        model = resnet(imdb.classes,
                       101,
                       pretrained=True,
                       class_agnostic=class_agnostic)
    elif net == 'res152':
        model = resnet(imdb.classes,
                       152,
                       pretrained=True,
                       class_agnostic=class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    # Create network architecture
    model.create_architecture()

    # Update model parameters
    lr = cfg.TRAIN.LEARNING_RATE
    # tr_momentum = cfg.TRAIN.MOMENTUM
    # tr_momentum = args.momentum

    params = []
    for key, value in dict(model.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                }]

    # Optimizer
    if optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)

    elif optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)

    # Resume training
    if resume:
        load_name = os.path.join(
            output_dir, '{}_{}_{}_{}.pth'.format(arch, checksession,
                                                 checkepoch, checkpoint))
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        session = checkpoint['session'] + 1
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']
        print("loaded checkpoint %s" % (load_name))

    # Train on Multiple GPUS
    if mGPUs:
        model = nn.DataParallel(model)

    # Copy network to CUDA memroy
    if cuda:
        model.cuda()

    # Training loop
    iters_per_epoch = int(train_size / batch_size)

    sys.stdout.flush()

    for epoch in range(start_epoch, max_epochs + 1):
        # remove batch re-sizing for augmentation or adjust?
        dataset.resize_batch()

        # Set model to train mode
        model.train()
        loss_temp = 0
        start = time.time()

        # Update learning rate as per decay step
        if epoch % (lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, lr_decay_gamma)
            lr *= lr_decay_gamma

        # Get batch data and train
        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            sys.stdout.flush()
            data = next(data_iter)

            # Apply augmentations
            aug_img_tensors, aug_bbox_tensors = apply_augmentations(
                data[0],
                data[2],
                flip_prob=flip_prob,
                scale=scale,
                scale_prob=scale_prob,
                translate=translate,
                translate_prob=translate_prob,
                angle=angle,
                dist=dist,
                rotate_prob=rotate_prob,
                shear_factor=shear_factor,
                shear_prob=shear_prob)

            # im_data.data.resize_(data[0].size()).copy_(data[0])
            im_data.data.resize_(aug_img_tensors.size()).copy_(aug_img_tensors)
            im_info.data.resize_(data[1].size()).copy_(data[1])
            # gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            gt_boxes.data.resize_(
                aug_bbox_tensors.size()).copy_(aug_bbox_tensors)
            num_boxes.data.resize_(data[3].size()).copy_(data[3])

            # Compute multi-task loss
            model.zero_grad()
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = model(im_data, im_info, gt_boxes, num_boxes)

            loss = rpn_loss_cls_wt * rpn_loss_cls.mean() + rpn_loss_box_wt * rpn_loss_box.mean() + \
                   RCNN_loss_cls_wt * RCNN_loss_cls.mean() + RCNN_loss_bbox_wt * RCNN_loss_bbox.mean()
            loss_temp += loss.data[0]

            # Backward pass to compute gradients and update weights
            optimizer.zero_grad()
            loss.backward()
            if net == "vgg16":
                clip_gradient(model, 10.)
            optimizer.step()

            # Display training stats on terminal
            if step % disp_interval == 0:
                end = time.time()
                if step > 0:
                    loss_temp /= disp_interval

                if mGPUs:
                    batch_loss = loss.data[0]
                    loss_rpn_cls = rpn_loss_cls.mean().data[0]
                    loss_rpn_box = rpn_loss_box.mean().data[0]
                    loss_rcnn_cls = RCNN_loss_cls.mean().data[0]
                    loss_rcnn_box = RCNN_loss_bbox.mean().data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt
                else:
                    batch_loss = loss.data[0]
                    loss_rpn_cls = rpn_loss_cls.data[0]
                    loss_rpn_box = rpn_loss_box.data[0]
                    loss_rcnn_cls = RCNN_loss_cls.data[0]
                    loss_rcnn_box = RCNN_loss_bbox.data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt

                print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                      % (session, epoch, step, iters_per_epoch, loss_temp, lr))
                print("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                      (fg_cnt, bg_cnt, end - start))
                print("\t\t\t batch_loss: %.4f, rpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                      % (batch_loss, loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                if use_tfboard:
                    info = {
                        'loss': loss_temp,
                        'loss_rpn_cls': loss_rpn_cls,
                        'loss_rpn_box': loss_rpn_box,
                        'loss_rcnn_cls': loss_rcnn_cls,
                        'loss_rcnn_box': loss_rcnn_box
                    }
                    for tag, value in info.items():
                        logger.scalar_summary(tag, value, step)

                loss_temp = 0
                start = time.time()

                # Save model at checkpoints
        if mGPUs:
            save_name = os.path.join(
                output_dir, '{}_{}_{}_{}.pth'.format(arch, session, epoch,
                                                     step))
            save_checkpoint(
                {
                    'session': session,
                    'epoch': epoch + 1,
                    'model': model.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': class_agnostic,
                }, save_name)
        else:
            save_name = os.path.join(
                output_dir, '{}_{}_{}_{}.pth'.format(arch, session, epoch,
                                                     step))
            save_checkpoint(
                {
                    'session': session,
                    'epoch': epoch + 1,
                    'model': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': class_agnostic,
                }, save_name)
        print('save model: {}'.format(save_name))

        end = time.time()
        delete_older_checkpoints(
            os.path.join(cfg.MODEL_DIR, "couplenet_{}_*.pth".format(i)))
        print("Run Time: ", end - start)
Exemple #16
0
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target,
                   val_datasets, device, net, optimizer, num_workers, lr,
                   batch_size, start_epoch, max_epochs, lr_decay_gamma,
                   lr_decay_step, mask_load_p, resume, load_name, pretrained,
                   model_type, eta, gamma, ef, class_agnostic, lc, gc, LA_ATT,
                   MID_ATT, debug, _run):

    args = Args(dataset=dataset_source,
                dataset_t=dataset_target,
                cfg_file=cfg_file,
                net=net)
    args = set_dataset_args(args)
    is_bgr = False
    if net in ['res101', 'res50', 'res152', 'vgg16']:
        is_bgr = True

    logger = LoggerForSacred(None, ex, False)

    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = True if device == 'cuda' else False
    device = torch.device(device)

    backbone_fn = htcn_resnet
    if 'res' in net:
        if model_type == 'normal':
            backbone_fn = n_resnet
        elif model_type == 'saitp':
            backbone_fn = s_resnet
    else:
        if model_type == 'normal':
            backbone_fn = n_vgg16
        elif model_type == 'htcn':
            backbone_fn = htcn_vgg16
        elif model_type == 'saitp':
            backbone_fn = None
    dataloader_s, dataloader_t, imdb, imdb_t = init_dataloaders_1s_1t(
        args, batch_size, num_workers, is_bgr, False)
    model = init_frcnn_utils.init_model_only(device,
                                             net,
                                             backbone_fn,
                                             imdb_t,
                                             '',
                                             class_agnostic=class_agnostic,
                                             lc=lc,
                                             gc=gc,
                                             la_attention=LA_ATT,
                                             mid_attention=MID_ATT)
    model.eval()

    im_data = torch.randn(1, 3, 600, 1200).to(device)
    im_info = torch.FloatTensor([[600, 900, 2]]).to(device)
    gt_boxes = torch.zeros((1, 1, 5)).to(device)
    num_boxes = torch.zeros([1]).to(device)
    macs, params = profile(model,
                           inputs=(im_data, im_info, gt_boxes, num_boxes))
    macs, params = clever_format([macs, params], "%.3f")

    print("Model CFLOPS: {}".format(macs))
    print("Model Cparams: {}".format(params))

    random_mask = nn.Sequential(
        nn.Conv2d(3, 256, 1, stride=1, padding=0, bias=False), nn.ReLU(),
        nn.Conv2d(256, 3, 1)).to(device)

    macs, params = profile(random_mask, inputs=(im_data, ))
    macs, params = clever_format([macs, params], "%.3f")

    print("Mask CFLOPS: {}".format(macs))
    print("Mask Cparams: {}".format(params))

    iters_per_epoch = int(1000 / batch_size)
    data_iter_s = iter(dataloader_s)

    for step in range(1, iters_per_epoch + 1):
        try:
            data_s = next(data_iter_s)
        except:
            data_iter_s = iter(dataloader_s)
            data_s = next(data_iter_s)
        im_data = data_s[0].to(device)
        im_info = data_s[1].to(device)
        gt_boxes = data_s[2].to(device)
        num_boxes = data_s[3].to(device)

        pass
Exemple #17
0
def load_model(args):
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    cfg.USE_GPU_NMS = args.cuda

    np.random.seed(cfg.RNG_SEED)
    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,
        'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch,
                                          args.checkpoint))
    global pascal_classes
    pascal_classes = np.asarray([
        '__background__',  # always index 0
        'element in the Array-list',
        'root node in the Binary-tree',
        'internal node in the Binary-tree',
        'leaf node in the Binary-tree',
        'vertex in the Graph',
        'process in the Deadlock',
        'resource in the Deadlock',
        'head element in the Queue',
        'element in the Queue',
        'tail element in the Queue',
        'head node in the Queue',
        'pointer in the Queue',
        'node in the Non-binary-tree',
        'node in the Network_topology',
        'head element in the Linked_List',
        'element in the Linked_List',
        'tail element in the Linked_List',
        'insert element in the Linked_List',
        'head node in the Linked_List',
        'arrow',
        'edge',
        'top element in the Stack',
        'bottom element in the Stack',
        'push element in the Stack',
        'pop element in the Stack',
        'internal element in the Stack',
        'empty stack in the Stack',
        'terminal in the Flowchart',
        'process in the Flowchart',
        'decision in the Flowchart',
        'flowline in the Flowchart',
        'document in the Flowchart',
        'input in the Flowchart',
        'output in the Flowchart',
        'annotation in the Flowchart',
        'database in the Flowchart',
        'manual operation in the Flowchart',
        'predefined process in the Flowchart',
        'on-page connector in the Flowchart'
    ])

    # initilize the network here.
    global fasterRCNN
    if args.net == 'vgg16':
        fasterRCNN = vgg16(pascal_classes,
                           pretrained=False,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(pascal_classes,
                            101,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(pascal_classes,
                            50,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(pascal_classes,
                            152,
                            pretrained=False,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    if args.cuda > 0:
        checkpoint = torch.load(load_name)
    else:
        checkpoint = torch.load(load_name,
                                map_location=(lambda storage, loc: storage))
    fasterRCNN.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    print('load model successfully!')
Exemple #18
0
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, device, net,
                   optimizer, num_workers, lr, batch_size, start_epoch,
                   max_epochs, lr_decay_gamma, lr_decay_step, resume,
                   load_name, pretrained, eta, gamma, ef, class_agnostic, lc,
                   gc, LA_ATT, MID_ATT, debug, _run):

    args = Args(dataset=dataset_source,
                dataset_t=[],
                imdb_name_target=[],
                cfg_file=cfg_file,
                net=net)
    args = set_dataset_args(args)

    is_bgr = False
    if net in ['res101', 'res50', 'res152', 'vgg16']:
        is_bgr = True

    logger = LoggerForSacred(None, ex)

    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = True if device == 'cuda' else False
    device = torch.device(device)

    load_id = re.findall("\d+", load_name)[0]
    output_dir = output_dir + "_{}".format(load_id)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    dataloader_s, _, imdb, _ = init_dataloaders_1s_mt(args, batch_size,
                                                      num_workers, is_bgr)

    session = 1
    fasterRCNN = init_htcn_model(LA_ATT,
                                 MID_ATT,
                                 class_agnostic,
                                 device,
                                 gc,
                                 imdb,
                                 lc,
                                 load_name,
                                 net,
                                 strict=False,
                                 pretrained=pretrained)

    dtm = nn.Sequential(nn.Conv2d(3, 256, 1, stride=1, padding=0, bias=False),
                        nn.ReLU(), nn.Conv2d(256, 3, 1))

    dtm.to(device)
    optimizer = torch.optim.SGD(dtm.parameters(), lr=lr, momentum=0.9)

    if torch.cuda.device_count() > 1:
        fasterRCNN = nn.DataParallel(fasterRCNN)

    iters_per_epoch = int(10000 / batch_size)

    if ef:
        FL = EFocalLoss(class_num=2, gamma=gamma)
    else:
        FL = FocalLoss(class_num=2, gamma=gamma)

    dtm_util.get_mask_for_target(args, FL, 0, dataloader_s, iters_per_epoch,
                                 fasterRCNN, dtm, optimizer, device, logger)

    find_id = re.findall("\d+", load_name)
    if len(find_id) == 0:
        find_id = 0
    else:
        find_id = re.findall("\d+", load_name)[-1]
    torch.save(
        dtm,
        os.path.join(output_dir,
                     'dtm_target_cnn_{}_{}.p'.format(load_id, find_id)))

    return 0
	  args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50']
  elif args.dataset == "imagenet":
	  args.imdb_name = "imagenet_train"
	  args.imdbval_name = "imagenet_val"
	  args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30']
  elif args.dataset == "vg":
	  # train sizes: train, smalltrain, minitrain
	  # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
	  args.imdb_name = "vg_150-50-50_minitrain"
	  args.imdbval_name = "vg_150-50-50_minival"
	  args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50']
  # .yml: yet another markup language 其中的冒号表示键值对
  args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

  if args.cfg_file is not None:
	cfg_from_file(args.cfg_file) # 做某些配置操作
  if args.set_cfgs is not None:
	cfg_from_list(args.set_cfgs)

  print('Using config:')
  pprint.pprint(cfg) # 输出配置情况
  np.random.seed(cfg.RNG_SEED) # (For reproducibility) ??

  #torch.backends.cudnn.benchmark = True
  if torch.cuda.is_available() and not args.cuda:
	print("WARNING: You have a CUDA device, so you should probably run with --cuda")

  # train set
  # -- Note: Use validation set and disable the flipped to enable faster loading. 加速

  # cfg: configuration
Exemple #20
0
    return parser.parse_args()


if __name__ == '__main__':
    print(_init_paths.lib_path)
    args = parse_args()

    print('Called with args:')
    print(args)

    logger = SummaryWriter(
        os.path.join('logs', '{}_{}'.format(args.session, args.dataset)))

    args.imdb_name = "voc_{}_trainval".format(args.dataset)
    args.imdbval_name = "voc_{}_test".format(args.dataset)
    cfg_from_file("cfgs/{}{}.yml".format(args.net,
                                         "_ls" if args.large_scale else ""))
    if args.config_file:
        cfg_from_file(args.config_file)

    cfg_fix()

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    output_dir = os.path.join(args.save_dir, str(args.session), args.net,
                              args.dataset)
    os.makedirs(output_dir, exist_ok=True)
    cfg_to_json = deepcopy(cfg)
    del cfg_to_json["PIXEL_MEANS"]
    json.dump(cfg_to_json,
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50']
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30']
    elif args.dataset == "vg":
        # train sizes: train, smalltrain, minitrain
        # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '50']

    args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    #torch.backends.cudnn.benchmark = True
    if torch.cuda.is_available() and not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = args.cuda
        out.write(frame_array[i])
    out.release()


if __name__ == '__main__':

    imdb_name = "imagenet_vid_train"
    imdbval_name = "imagenet_vid_test"
    set_cfgs = [
        'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
        'MAX_NUM_GT_BOXES', '30'
    ]

    cfg_file = "cfgs/{}.yml".format(net)

    cfg_from_file(cfg_file)

    print('Using config:')
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)

    # train set
    # -- Note: Use validation set and disable the flipped to enable faster loading.

    input_dir = load_dir + "/" + net + "/" + dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)
    load_name = os.path.join(
        input_dir,