model = nn.DataParallel(model)

    if args.cuda:
        model.cuda()

    iters_per_epoch = int(train_size / args.batch_size)

    for epoch in range(args.start_epoch, args.max_epochs + 1):
        dataset.resize_batch()
        # setting to train mode
        model.train()
        loss_temp = 0
        start = time.time()

        if epoch % (args.lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, args.lr_decay_gamma)
            lr *= args.lr_decay_gamma

        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            data = next(data_iter)
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])

            model.zero_grad()
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = model(im_data, im_info, gt_boxes, num_boxes)
Exemple #2
0
def train(dataset="kaggle_pna",
          train_ds="train",
          arch="couplenet",
          net="res152",
          start_epoch=1,
          max_epochs=20,
          disp_interval=100,
          save_dir="save",
          num_workers=4,
          cuda=True,
          large_scale=False,
          mGPUs=True,
          batch_size=4,
          class_agnostic=False,
          anchor_scales=4,
          optimizer="sgd",
          lr_decay_step=10,
          lr_decay_gamma=.1,
          session=1,
          resume=False,
          checksession=1,
          checkepoch=1,
          checkpoint=0,
          use_tfboard=False,
          flip_prob=0.0,
          scale=0.0,
          scale_prob=0.0,
          translate=0.0,
          translate_prob=0.0,
          angle=0.0,
          dist="cont",
          rotate_prob=0.0,
          shear_factor=0.0,
          shear_prob=0.0,
          rpn_loss_cls_wt=1,
          rpn_loss_box_wt=1,
          RCNN_loss_cls_wt=1,
          RCNN_loss_bbox_wt=1,
          **kwargs):
    print("Train Arguments: {}".format(locals()))

    # Import network definition
    if arch == 'rcnn':
        from model.faster_rcnn.resnet import resnet
    elif arch == 'rfcn':
        from model.rfcn.resnet_atrous import resnet
    elif arch == 'couplenet':
        from model.couplenet.resnet_atrous import resnet

    from roi_data_layer.pnaRoiBatchLoader import roibatchLoader
    from roi_data_layer.pna_roidb import combined_roidb

    print('Called with kwargs:')
    print(kwargs)

    # Set up logger
    if use_tfboard:
        from model.utils.logger import Logger
        # Set the logger
        logger = Logger('./logs')

    # Anchor settings: ANCHOR_SCALES: [8, 16, 32] or [4, 8, 16, 32]
    if anchor_scales == 3:
        scales = [8, 16, 32]
    elif anchor_scales == 4:
        scales = [4, 8, 16, 32]

    # Dataset related settings: MAX_NUM_GT_BOXES: 20, 30, 50
    if train_ds == "train":
        imdb_name = "pna_2018_train"
    elif train_ds == "trainval":
        imdb_name = "pna_2018_trainval"

    set_cfgs = [
        'ANCHOR_SCALES',
        str(scales), 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '30'
    ]

    import model
    model_repo_path = os.path.dirname(
        os.path.dirname(os.path.dirname(model.__file__)))

    cfg_file = "cfgs/{}_ls.yml".format(
        net) if large_scale else "cfgs/{}.yml".format(net)

    if cfg_file is not None:
        cfg_from_file(os.path.join(model_repo_path, cfg_file))
    if set_cfgs is not None:
        cfg_from_list(set_cfgs)

    train_kwargs = kwargs.pop("TRAIN", None)
    resnet_kwargs = kwargs.pop("RESNET", None)
    mobilenet_kwargs = kwargs.pop("MOBILENET", None)

    if train_kwargs is not None:
        for key, value in train_kwargs.items():
            cfg["TRAIN"][key] = value

    if resnet_kwargs is not None:
        for key, value in resnet_kwargs.items():
            cfg["RESNET"][key] = value

    if mobilenet_kwargs is not None:
        for key, value in mobilenet_kwargs.items():
            cfg["MOBILENET"][key] = value

    if kwargs is not None:
        for key, value in kwargs.items():
            cfg[key] = value

    print('Using config:')
    cfg.MODEL_DIR = os.path.abspath(cfg.MODEL_DIR)
    cfg.TRAIN_DATA_CLEAN_PATH = os.path.abspath(cfg.TRAIN_DATA_CLEAN_PATH)
    pprint.pprint(cfg)
    np.random.seed(cfg.RNG_SEED)
    print("LEARNING RATE: {}".format(cfg.TRAIN.LEARNING_RATE))

    # Warning to use cuda if available
    if torch.cuda.is_available() and not cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # Train set
    # Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = cuda
    imdb, roidb, ratio_list, ratio_index = combined_roidb(imdb_name)
    train_size = len(roidb)

    print('{:d} roidb entries'.format(len(roidb)))

    # output_dir = os.path.join(save_dir, arch, net, dataset)
    output_dir = cfg.MODEL_DIR
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    sampler_batch = sampler(train_size, batch_size)

    dataset = roibatchLoader(roidb,
                             ratio_list,
                             ratio_index,
                             batch_size,
                             imdb.num_classes,
                             training=True)

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             sampler=sampler_batch,
                                             num_workers=num_workers)

    # Initilize the tensor holder
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # Copy tensors in CUDA memory
    if cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # Make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if cuda:
        cfg.CUDA = True

    # Initilize the network:
    if net == 'vgg16':
        # model = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic)
        print("Pretrained model is not downloaded and network is not used")
    elif net == 'res18':
        model = resnet(imdb.classes,
                       18,
                       pretrained=False,
                       class_agnostic=class_agnostic)  # TODO: Check dim error
    elif net == 'res34':
        model = resnet(imdb.classes,
                       34,
                       pretrained=False,
                       class_agnostic=class_agnostic)  # TODO: Check dim error
    elif net == 'res50':
        model = resnet(imdb.classes,
                       50,
                       pretrained=False,
                       class_agnostic=class_agnostic)  # TODO: Check dim error
    elif net == 'res101':
        model = resnet(imdb.classes,
                       101,
                       pretrained=True,
                       class_agnostic=class_agnostic)
    elif net == 'res152':
        model = resnet(imdb.classes,
                       152,
                       pretrained=True,
                       class_agnostic=class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    # Create network architecture
    model.create_architecture()

    # Update model parameters
    lr = cfg.TRAIN.LEARNING_RATE
    # tr_momentum = cfg.TRAIN.MOMENTUM
    # tr_momentum = args.momentum

    params = []
    for key, value in dict(model.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                }]

    # Optimizer
    if optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)

    elif optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)

    # Resume training
    if resume:
        load_name = os.path.join(
            output_dir, '{}_{}_{}_{}.pth'.format(arch, checksession,
                                                 checkepoch, checkpoint))
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        session = checkpoint['session'] + 1
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']
        print("loaded checkpoint %s" % (load_name))

    # Train on Multiple GPUS
    if mGPUs:
        model = nn.DataParallel(model)

    # Copy network to CUDA memroy
    if cuda:
        model.cuda()

    # Training loop
    iters_per_epoch = int(train_size / batch_size)

    sys.stdout.flush()

    for epoch in range(start_epoch, max_epochs + 1):
        # remove batch re-sizing for augmentation or adjust?
        dataset.resize_batch()

        # Set model to train mode
        model.train()
        loss_temp = 0
        start = time.time()

        # Update learning rate as per decay step
        if epoch % (lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, lr_decay_gamma)
            lr *= lr_decay_gamma

        # Get batch data and train
        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            sys.stdout.flush()
            data = next(data_iter)

            # Apply augmentations
            aug_img_tensors, aug_bbox_tensors = apply_augmentations(
                data[0],
                data[2],
                flip_prob=flip_prob,
                scale=scale,
                scale_prob=scale_prob,
                translate=translate,
                translate_prob=translate_prob,
                angle=angle,
                dist=dist,
                rotate_prob=rotate_prob,
                shear_factor=shear_factor,
                shear_prob=shear_prob)

            # im_data.data.resize_(data[0].size()).copy_(data[0])
            im_data.data.resize_(aug_img_tensors.size()).copy_(aug_img_tensors)
            im_info.data.resize_(data[1].size()).copy_(data[1])
            # gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            gt_boxes.data.resize_(
                aug_bbox_tensors.size()).copy_(aug_bbox_tensors)
            num_boxes.data.resize_(data[3].size()).copy_(data[3])

            # Compute multi-task loss
            model.zero_grad()
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = model(im_data, im_info, gt_boxes, num_boxes)

            loss = rpn_loss_cls_wt * rpn_loss_cls.mean() + rpn_loss_box_wt * rpn_loss_box.mean() + \
                   RCNN_loss_cls_wt * RCNN_loss_cls.mean() + RCNN_loss_bbox_wt * RCNN_loss_bbox.mean()
            loss_temp += loss.data[0]

            # Backward pass to compute gradients and update weights
            optimizer.zero_grad()
            loss.backward()
            if net == "vgg16":
                clip_gradient(model, 10.)
            optimizer.step()

            # Display training stats on terminal
            if step % disp_interval == 0:
                end = time.time()
                if step > 0:
                    loss_temp /= disp_interval

                if mGPUs:
                    batch_loss = loss.data[0]
                    loss_rpn_cls = rpn_loss_cls.mean().data[0]
                    loss_rpn_box = rpn_loss_box.mean().data[0]
                    loss_rcnn_cls = RCNN_loss_cls.mean().data[0]
                    loss_rcnn_box = RCNN_loss_bbox.mean().data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt
                else:
                    batch_loss = loss.data[0]
                    loss_rpn_cls = rpn_loss_cls.data[0]
                    loss_rpn_box = rpn_loss_box.data[0]
                    loss_rcnn_cls = RCNN_loss_cls.data[0]
                    loss_rcnn_box = RCNN_loss_bbox.data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt

                print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                      % (session, epoch, step, iters_per_epoch, loss_temp, lr))
                print("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                      (fg_cnt, bg_cnt, end - start))
                print("\t\t\t batch_loss: %.4f, rpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                      % (batch_loss, loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                if use_tfboard:
                    info = {
                        'loss': loss_temp,
                        'loss_rpn_cls': loss_rpn_cls,
                        'loss_rpn_box': loss_rpn_box,
                        'loss_rcnn_cls': loss_rcnn_cls,
                        'loss_rcnn_box': loss_rcnn_box
                    }
                    for tag, value in info.items():
                        logger.scalar_summary(tag, value, step)

                loss_temp = 0
                start = time.time()

                # Save model at checkpoints
        if mGPUs:
            save_name = os.path.join(
                output_dir, '{}_{}_{}_{}.pth'.format(arch, session, epoch,
                                                     step))
            save_checkpoint(
                {
                    'session': session,
                    'epoch': epoch + 1,
                    'model': model.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': class_agnostic,
                }, save_name)
        else:
            save_name = os.path.join(
                output_dir, '{}_{}_{}_{}.pth'.format(arch, session, epoch,
                                                     step))
            save_checkpoint(
                {
                    'session': session,
                    'epoch': epoch + 1,
                    'model': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': class_agnostic,
                }, save_name)
        print('save model: {}'.format(save_name))

        end = time.time()
        delete_older_checkpoints(
            os.path.join(cfg.MODEL_DIR, "couplenet_{}_*.pth".format(i)))
        print("Run Time: ", end - start)
        fasterRCNN = nn.DataParallel(fasterRCNN)

    iters_per_epoch = int(train_size / args.batch_size)

    if args.use_tfboard:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter("logs")

    for epoch in range(args.start_epoch, args.max_epochs + 1):
        # setting to train mode
        fasterRCNN.train()
        loss_temp = 0
        start = time.time()

        if epoch % (args.lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, args.lr_decay_gamma)
            lr *= args.lr_decay_gamma

        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            data = next(data_iter)
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])
            # st()
            fasterRCNN.zero_grad()
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)
Exemple #4
0
    print(inst_domain_classifier)
    """
    if epoch % (args.lr_decay_step + 1) == 0:
        adjust_learning_rate(optimizer, args.lr_decay_gamma)
        adjust_learning_rate(img_domain_optimizer, args.lr_decay_gamma)
        adjust_learning_rate(inst_domain_optimizer, args.lr_decay_gamma)
        lr *= args.lr_decay_gamma
    """
    data_iter = iter(dataloader)

    for step in range(iters_per_epoch):
      
      data = next(data_iter)

      if total_steps==args.lr_decay_step:
        adjust_learning_rate(optimizer, args.lr_decay_gamma)
        adjust_learning_rate(img_domain_optimizer, args.lr_decay_gamma)
        adjust_learning_rate(inst_domain_optimizer, args.lr_decay_gamma)
        lr *= args.lr_decay_gamma 

      """
      @Add iter for target dataset...
      """
      total_steps+=1
      if total_steps%target_size==0:
          target_data_iter = iter(target_dataloader)


      target_data = next(target_data_iter)

Exemple #5
0
        # Get weights from the previous group
        b_fasterRCNN.load_state_dict(
            (fasterRCNN.module if cfg.MGPU else fasterRCNN).state_dict())
        change_require_gradient(b_fasterRCNN, False)

        iters_per_epoch = train_size // cfg.TRAIN.BATCH_SIZE

        tot_step = 0

        # Here is the training loop
        for epoch in trange(cfg.TRAIN.MAX_EPOCH, desc="Epoch", leave=True):
            loss_temp = 0

            if epoch % cfg.TRAIN.LEARNING_RATE_DECAY_STEP == 0 and epoch > 0:
                adjust_learning_rate(optimizer,
                                     cfg.TRAIN.LEARNING_RATE_DECAY_GAMMA)
                lr *= cfg.TRAIN.LEARNING_RATE_DECAY_GAMMA

            data_iter = iter(dataloader)
            for _ in trange(iters_per_epoch, desc="Iter", leave=True):
                tot_step += 1
                data = next(data_iter)
                im_data.data.resize_(data[0].size()).copy_(data[0])
                im_info.data.resize_(data[1].size()).copy_(data[1])
                gt_boxes.data.resize_(data[2].size()).copy_(data[2])
                num_boxes.data.resize_(data[3].size()).copy_(data[3])
                im_path = list(data[4])

                fasterRCNN.zero_grad()
                rois, cls_prob, bbox_pred, bbox_raw, \
                rpn_label, rpn_feature, rpn_cls_score, \
                     meters['rcnn_cls_loss'].avg, meters['rcnn_box_loss'].avg,
                     meters['pos_recall'].avg, meters['neg_recall'].avg,
                     meters['acc'].avg, meters['pos_num'].avg,
                     meters['neg_num'].avg, meters['rpn_pos_recall'].avg,
                     meters['rpn_neg_recall'].avg, meters['rpn_acc'].avg,
                     meters['rpn_pos_num'].avg, meters['rpn_neg_num'].avg))
                for k in meters.keys():
                    meters[k].reset()

            if i != 0 and i % 1000 == 0:
                save_name = os.path.join('output',
                                         'nofix_{}_{}.pth'.format(epoch, i))
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'model': fasterRCNN.state_dict(),
                        'optimizer': optimizer.state_dict()
                    }, save_name)

        #if epoch % 1 == 0:
        adjust_learning_rate(optimizer, 0.1)
        lr *= 0.1

        save_name = os.path.join('output', 'nofix_{}.pth'.format(epoch))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': fasterRCNN.state_dict(),
                'optimizer': optimizer.state_dict()
            }, save_name)
def exp_htcn_mixed(cfg_file, output_dir, dataset_source, dataset_target, val_datasets,
                    device, net, optimizer, num_workers,
                    lr, batch_size, start_epoch, max_epochs, lr_decay_gamma, lr_decay_step,
                    resume, load_name, pretrained,
                    eta, gamma, ef, class_agnostic, lc, gc, LA_ATT, MID_ATT,
                    debug, _run):

    args = Args(dataset=dataset_source, dataset_t=dataset_target, imdb_name_target=[], cfg_file=cfg_file, net=net)
    args = set_dataset_args(args)

    args_val = Args(dataset=dataset_source, dataset_t=val_datasets, imdb_name_target=[], cfg_file=cfg_file, net=net)
    args_val = set_dataset_args(args_val, test=True)

    is_bgr = False
    if net in ['res101', 'res50', 'res152', 'vgg16']:
        is_bgr = True


    logger = LoggerForSacred(None, ex, True)

    if cfg_file is not None:
        cfg_from_file(cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = True if device == 'cuda' else False
    device = torch.device(device)

    output_dir = output_dir + "_{}".format(_run._id)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    dataloader_s, m_dataloader_t, imdb, m_imdb_t = init_dataloaders_1s_mt(args, batch_size, num_workers, is_bgr)
    val_dataloader_ts, val_imdb_ts = init_val_dataloaders_mt(args_val, 1, num_workers, is_bgr)

    session = 1
    fasterRCNN = init_htcn_model(LA_ATT, MID_ATT, class_agnostic, device, gc, imdb, lc, load_name, net, pretrained=pretrained)
    #fasterRCNN.re_init_da_layers(device)
    lr, optimizer, session, start_epoch = init_optimizer(lr, fasterRCNN, optimizer, resume, load_name, session, start_epoch, is_all_params=True)
    # _, optimizer_unsup, _, _ = init_optimizer(lr, fasterRCNN, optimizer, resume, load_name, session,
    #                                                      start_epoch, is_all_params=True)


    if torch.cuda.device_count() > 1:
        fasterRCNN = nn.DataParallel(fasterRCNN)

    iters_per_epoch = int(10000 / batch_size)

    if ef:
        FL = EFocalLoss(class_num=2, gamma=gamma)
    else:
        FL = FocalLoss(class_num=2, gamma=gamma)

    total_step = 0
    if resume:
        total_step = (start_epoch - 1) * 10000




    for epoch in range(start_epoch, max_epochs + 1):
        # setting to train mode
        fasterRCNN.train()

        if epoch - 1 in lr_decay_step:
            adjust_learning_rate(optimizer, lr_decay_gamma)
            lr *= lr_decay_gamma

        total_step = inc_frcnn_utils.train_htcn_one_epoch_inc_union(args, FL, total_step, dataloader_s, m_dataloader_t, iters_per_epoch, fasterRCNN, optimizer, device, eta, logger)

        save_name = os.path.join(output_dir,
                                 'target_{}_eta_{}_local_{}_global_{}_gamma_{}_session_{}_epoch_{}_total_step_{}.pth'.format(
                                     args.dataset_t, args.eta,
                                     lc, gc, gamma,
                                     session, epoch,
                                     total_step))
        save_checkpoint({
            'session': session,
            'epoch': epoch + 1,
            'model': fasterRCNN.module.state_dict() if torch.cuda.device_count() > 1 else fasterRCNN.state_dict(),
            'optimizer': optimizer.state_dict(),
            'pooling_mode': cfg.POOLING_MODE,
            'class_agnostic': class_agnostic,
        }, save_name)
    return 0
Exemple #8
0
def bld_train(args, ann_path=None, step=0):

    # print('Train from annotaion {}'.format(ann_path))
    # print('Called with args:')
    # print(args)

    if args.use_tfboard:
        from model.utils.logger import Logger
        # Set the logger
        logger = Logger(
            os.path.join('./.logs', args.active_method,
                         "/activestep" + str(step)))

    if args.dataset == "pascal_voc":
        args.imdb_name = "voc_2007_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    elif args.dataset == "pascal_voc_0712":
        args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
        args.imdbval_name = "voc_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    elif args.dataset == "coco":
        args.imdb_name = "coco_2014_train"
        args.imdbval_name = "coco_2014_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '50'
        ]
    elif args.dataset == "imagenet":
        args.imdb_name = "imagenet_train"
        args.imdbval_name = "imagenet_val"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '30'
        ]
    elif args.dataset == "vg":
        # train sizes: train, smalltrain, minitrain
        # train scale: ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']
        args.imdb_name = "vg_150-50-50_minitrain"
        args.imdbval_name = "vg_150-50-50_minival"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '50'
        ]
    elif args.dataset == "voc_coco":
        args.imdb_name = "voc_coco_2007_train+voc_coco_2007_val"
        args.imdbval_name = "voc_coco_2007_test"
        args.set_cfgs = [
            'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]',
            'MAX_NUM_GT_BOXES', '20'
        ]
    else:
        raise NotImplementedError

    args.cfg_file = "cfgs/{}_ls.yml".format(
        args.net) if args.large_scale else "cfgs/{}.yml".format(args.net)

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None:
        cfg_from_list(args.set_cfgs)

    # print('Using config:')
    # pprint.pprint(cfg)
    # np.random.seed(cfg.RNG_SEED)

    # torch.backends.cudnn.benchmark = True
    if torch.cuda.is_available() and not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

    # train set = source set + target set
    # -- Note: Use validation set and disable the flipped to enable faster loading.
    cfg.TRAIN.USE_FLIPPED = True
    cfg.USE_GPU_NMS = args.cuda
    # source train set, fully labeled
    #ann_path_source = os.path.join(ann_path, 'voc_coco_2007_train_f.json')
    #ann_path_target = os.path.join(ann_path, 'voc_coco_2007_train_l.json')
    imdb, roidb, ratio_list, ratio_index = combined_roidb(
        args.imdb_name, ann_path=os.path.join(ann_path, 'source'))
    imdb_tg, roidb_tg, ratio_list_tg, ratio_index_tg = combined_roidb(
        args.imdb_name, ann_path=os.path.join(ann_path, 'target'))

    print('{:d} roidb entries for source set'.format(len(roidb)))
    print('{:d} roidb entries for target set'.format(len(roidb_tg)))

    output_dir = args.save_dir + "/" + args.net + "/" + args.dataset + "/" + args.active_method + "/activestep" + str(
        step)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    sampler_batch_tg = None  # do not sample target set

    bs_tg = 4
    dataset_tg = roibatchLoader(roidb_tg, ratio_list_tg, ratio_index_tg, bs_tg, \
                             imdb_tg.num_classes, training=True)

    assert imdb.num_classes == imdb_tg.num_classes

    dataloader_tg = torch.utils.data.DataLoader(dataset_tg,
                                                batch_size=bs_tg,
                                                sampler=sampler_batch_tg,
                                                num_workers=args.num_workers,
                                                worker_init_fn=_rand_fn())

    # initilize the tensor holder here.
    im_data = torch.FloatTensor(1)
    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)
    image_label = torch.FloatTensor(1)
    confidence = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data = im_data.cuda()
        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()
        image_label = image_label.cuda()
        confidence = confidence.cuda()

    # make variable
    im_data = Variable(im_data)
    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)
    image_label = Variable(image_label)
    confidence = Variable(confidence)

    if args.cuda:
        cfg.CUDA = True

    # initialize the network here.
    if args.net == 'vgg16':
        fasterRCNN = vgg16(imdb.classes,
                           pretrained=True,
                           class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes,
                            101,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes,
                            50,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes,
                            152,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        raise NotImplementedError

    # initialize the expectation network.
    if args.net == 'vgg16':
        fasterRCNN_val = vgg16(imdb.classes,
                               pretrained=True,
                               class_agnostic=args.class_agnostic)
    elif args.net == 'res101':
        fasterRCNN_val = resnet(imdb.classes,
                                101,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN_val = resnet(imdb.classes,
                                50,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN_val = resnet(imdb.classes,
                                152,
                                pretrained=True,
                                class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        raise NotImplementedError

    fasterRCNN.create_architecture()
    fasterRCNN_val.create_architecture()

    # lr = cfg.TRAIN.LEARNING_RATE
    lr = args.lr
    # tr_momentum = cfg.TRAIN.MOMENTUM
    # tr_momentum = args.momentum

    params = []
    for key, value in dict(fasterRCNN.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                }]

    if args.optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)
    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)
    else:
        raise NotImplementedError

    if args.resume:
        load_name = os.path.join(
            output_dir,
            'faster_rcnn_{}_{}_{}.pth'.format(args.checksession,
                                              args.checkepoch,
                                              args.checkpoint))
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        args.session = checkpoint['session']
        args.start_epoch = checkpoint['epoch']
        fasterRCNN.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']
        print("loaded checkpoint %s" % (load_name))

    # expectation model
    print("load checkpoint for expectation model: %s" % args.model_path)
    checkpoint = torch.load(args.model_path)
    fasterRCNN_val.load_state_dict(checkpoint['model'])
    if 'pooling_mode' in checkpoint.keys():
        cfg.POOLING_MODE = checkpoint['pooling_mode']

    fasterRCNN_val = fasterRCNN_val
    fasterRCNN_val.eval()

    if args.mGPUs:
        fasterRCNN = nn.DataParallel(fasterRCNN)
        #fasterRCNN_val = nn.DataParallel(fasterRCNN_val)

    if args.cuda:
        fasterRCNN.cuda()
        fasterRCNN_val.cuda()

    # Evaluation
    # data_iter = iter(dataloader_tg)
    # for target_k in range( int(train_size_tg / args.batch_size)):
    fname = "noisy_annotations.pkl"
    if not os.path.isfile(fname):
        for batch_k, data in enumerate(dataloader_tg):
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])
            image_label.data.resize_(data[4].size()).copy_(data[4])
            b_size = len(im_data)
            # expactation pass
            rois, cls_prob, bbox_pred, \
            _, _, _, _, _ = fasterRCNN_val(im_data, im_info, gt_boxes, num_boxes)
            scores = cls_prob.data
            boxes = rois.data[:, :, 1:5]
            if cfg.TRAIN.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if args.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(b_size, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        # print('DEBUG: Size of box_deltas is {}'.format(box_deltas.size()) )
                        box_deltas = box_deltas.view(b_size, -1,
                                                     4 * len(imdb.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            # TODO: data distalliation
            # Choose the confident samples
            for b_idx in range(b_size):
                # fill one confidence
                # confidence.data[b_idx, :] = 1 - (gt_boxes.data[b_idx, :, 4] == 0)
                # resize prediction
                pred_boxes[b_idx] /= data[1][b_idx][2]
                for j in xrange(1, imdb.num_classes):
                    if image_label.data[b_idx, j] != 1:
                        continue  # next if no image label

                    # filtering box outside of the image
                    not_keep = (pred_boxes[b_idx][:, j * 4] == pred_boxes[b_idx][:, j * 4 + 2]) | \
                               (pred_boxes[b_idx][:, j * 4 + 1] == pred_boxes[b_idx][:, j * 4 + 3])
                    keep = torch.nonzero(not_keep == 0).view(-1)
                    # decease the number of pgts
                    thresh = 0.5
                    while torch.nonzero(
                            scores[b_idx, :,
                                   j][keep] > thresh).view(-1).numel() <= 0:
                        thresh = thresh * 0.5
                    inds = torch.nonzero(
                        scores[b_idx, :, j][keep] > thresh).view(-1)

                    # if there is no det, error
                    if inds.numel() <= 0:
                        print('Warning!!!!!!! It should not appear!!')
                        continue

                    # find missing ID
                    missing_list = np.where(gt_boxes.data[b_idx, :, 4] == 0)[0]
                    if (len(missing_list) == 0): continue
                    missing_id = missing_list[0]
                    cls_scores = scores[b_idx, :, j][keep][inds]
                    cls_boxes = pred_boxes[b_idx][keep][inds][:, j *
                                                              4:(j + 1) * 4]
                    cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)),
                                         1)
                    keep = nms(cls_dets, 0.2)  # Magic number ????
                    keep = keep.view(-1).tolist()
                    sys.stdout.write(
                        'from {} predictions choose-> min({},4) as pseudo label  \r'
                        .format(len(cls_scores), len(keep)))
                    sys.stdout.flush()
                    _, order = torch.sort(cls_scores[keep], 0, True)
                    if len(keep) == 0: continue

                    max_keep = 4
                    for pgt_k in range(max_keep):
                        if len(order) <= pgt_k: break
                        if missing_id + pgt_k >= 20: break
                        gt_boxes.data[b_idx, missing_id +
                                      pgt_k, :4] = cls_boxes[keep][order[
                                          len(order) - 1 - pgt_k]]
                        gt_boxes.data[b_idx, missing_id + pgt_k,
                                      4] = j  # class
                        #confidence[b_idx, missing_id + pgt_k] = cls_scores[keep][order[len(order) - 1 - pgt_k]]
                        num_boxes[b_idx] = num_boxes[b_idx] + 1
                sample = roidb_tg[dataset_tg.ratio_index[batch_k * bs_tg +
                                                         b_idx]]
                pgt_boxes = np.array([
                    gt_boxes[b_idx, x, :4].cpu().data.numpy()
                    for x in range(int(num_boxes[b_idx]))
                ])
                pgt_classes = np.array([
                    gt_boxes[b_idx, x, 4].cpu().data[0]
                    for x in range(int(num_boxes[b_idx]))
                ])
                sample["boxes"] = pgt_boxes
                sample["gt_classes"] = pgt_classes
                # DEBUG
                assert np.array_equal(sample["label"],image_label[b_idx].cpu().data.numpy()), \
                    "Image labels are not equal! {} vs {}".format(sample["label"],image_label[b_idx].cpu().data.numpy())

        #with open(fname, 'w') as f:
        # pickle.dump(roidb_tg, f)
    else:
        pass
        # with open(fname) as f:  # Python 3: open(..., 'rb')
        # roidb_tg = pickle.load(f)

    print("-- Optimization Stage --")
    # Optimization
    print("######################################################l")

    roidb.extend(roidb_tg)  # merge two datasets
    print('before filtering, there are %d images...' % (len(roidb)))
    i = 0
    while i < len(roidb):
        if True:
            if len(roidb[i]['boxes']) == 0:
                del roidb[i]
                i -= 1
        else:
            if len(roidb[i]['boxes']) == 0:
                del roidb[i]
                i -= 1
        i += 1

    print('after filtering, there are %d images...' % (len(roidb)))
    from roi_data_layer.roidb import rank_roidb_ratio
    ratio_list, ratio_index = rank_roidb_ratio(roidb)
    train_size = len(roidb)
    sampler_batch = sampler(train_size, args.batch_size)
    dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \
                             imdb.num_classes, training=True)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             sampler=sampler_batch,
                                             num_workers=args.num_workers,
                                             worker_init_fn=_rand_fn())
    iters_per_epoch = int(train_size / args.batch_size)
    print("Training set size is {}".format(train_size))
    for epoch in range(args.start_epoch, args.max_epochs + 1):
        fasterRCNN.train()

        loss_temp = 0
        start = time.time()
        epoch_start = start

        # adjust learning rate
        if epoch % (args.lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, args.lr_decay_gamma)
            lr *= args.lr_decay_gamma

        # one step
        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            data = next(data_iter)
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])
            image_label.data.resize_(data[4].size()).copy_(data[4])

            #gt_boxes.data = \
            #    torch.cat((gt_boxes.data, torch.zeros(gt_boxes.size(0), gt_boxes.size(1), 1).cuda()), dim=2)
            conf_data = torch.zeros(gt_boxes.size(0), gt_boxes.size(1)).cuda()
            confidence.data.resize_(conf_data.size()).copy_(conf_data)

            fasterRCNN.zero_grad()

            # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence)
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)
            # rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, confidence)

            loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \
                   + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
            loss_temp += loss.data[0]

            # backward
            optimizer.zero_grad()
            loss.backward()
            if args.net == "vgg16":
                clip_gradient(fasterRCNN, 10.)
            optimizer.step()

            if step % args.disp_interval == 0:
                end = time.time()
                if step > 0:
                    loss_temp /= args.disp_interval

                if args.mGPUs:
                    loss_rpn_cls = rpn_loss_cls.mean().data[0]
                    loss_rpn_box = rpn_loss_box.mean().data[0]
                    loss_rcnn_cls = RCNN_loss_cls.mean().data[0]
                    loss_rcnn_box = RCNN_loss_bbox.mean().data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt
                else:
                    loss_rpn_cls = rpn_loss_cls.data[0]
                    loss_rpn_box = rpn_loss_box.data[0]
                    loss_rcnn_cls = RCNN_loss_cls.data[0]
                    loss_rcnn_box = RCNN_loss_bbox.data[0]
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt

                print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                      % (args.session, epoch, step, iters_per_epoch, loss_temp, lr))
                print("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                      (fg_cnt, bg_cnt, end - start))
                print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                      % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                if args.use_tfboard:
                    info = {
                        'loss': loss_temp,
                        'loss_rpn_cls': loss_rpn_cls,
                        'loss_rpn_box': loss_rpn_box,
                        'loss_rcnn_cls': loss_rcnn_cls,
                        'loss_rcnn_box': loss_rcnn_box
                    }
                    for tag, value in info.items():
                        logger.scalar_summary(tag, value, step)

                    images = []
                    for k in range(args.batch_size):
                        image = draw_bounding_boxes(
                            im_data[k].data.cpu().numpy(),
                            gt_boxes[k].data.cpu().numpy(),
                            im_info[k].data.cpu().numpy(),
                            num_boxes[k].data.cpu().numpy())
                        images.append(image)
                    logger.image_summary("Train epoch %2d, iter %4d/%4d" % (epoch, step, iters_per_epoch), \
                                          images, step)
                loss_temp = 0
                start = time.time()
                if False:
                    break

        if args.mGPUs:
            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))
            save_checkpoint(
                {
                    'session': args.session,
                    'epoch': epoch + 1,
                    'model': fasterRCNN.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': args.class_agnostic,
                }, save_name)
        else:
            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))
            save_checkpoint(
                {
                    'session': args.session,
                    'epoch': epoch + 1,
                    'model': fasterRCNN.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'pooling_mode': cfg.POOLING_MODE,
                    'class_agnostic': args.class_agnostic,
                }, save_name)
        print('save model: {}'.format(save_name))

        epoch_end = time.time()
        print('Epoch time cost: {}'.format(epoch_end - epoch_start))

    print('finished!')
Exemple #9
0
def pgp_fasterRCNN(epochs, target_prune_rate, remove_ratio, criterion_func,
                   **kwargs):

    frcnn_extra = kwargs["frcnn_extra"]
    SHIFT_OPTI = 8
    FREEZE_FIRST_NUM = 10
    if frcnn_extra.net == "res101":
        SHIFT_OPTI = 8

    optimizer = kwargs["optimizer"]
    model = kwargs["model"]
    cuda = kwargs["cuda"]
    initializer_fn = kwargs["initializer_fn"]
    model_adapter = kwargs["model_adapter"]

    logger = kwargs["logger"]
    logger_id = ""
    if "logger_id" in kwargs:
        logger_id = kwargs["logger_id"]

    is_break = False
    if "is_break" in kwargs and kwargs["is_break"]:
        is_break = True

    till_break = False
    is_conservative = False
    if "is_conservative" in kwargs and kwargs["is_conservative"] is not None:
        is_conservative = kwargs["is_conservative"]
        till_break = True

    kwargs["train_loader"] = frcnn_extra.dataloader_train

    loss_acc = []
    type_list = []
    finished_list = False
    model_architecture = OrderedDict()
    removed_filters_total = 0
    forced_remove = False
    same_three = 0
    parameters_hard_removed_total = 0
    get_weak_fn = get_prune_index_target_with_reset
    lr = optimizer.param_groups[0]['lr']

    decay_rates_c = OrderedDict()
    original_c = OrderedDict()
    for name, parameters in model.named_parameters():
        param_type, tensor_index, layer_index, block_index = model_adapter.get_param_type_and_layer_index(
            name)
        if param_type == ParameterType.CNN_WEIGHTS or param_type == ParameterType.DOWNSAMPLE_WEIGHTS:
            decay_rates_c[
                name] = target_prune_rate * parameters.shape[0] / epochs
            original_c[name] = parameters.shape[0]
            model_architecture[name] = []

    for epoch in range(1, epochs + 1):

        start = time.clock()
        total_loss = train_frcnn(frcnn_extra, cuda, model, optimizer, is_break)
        end = time.clock()
        if logger is not None:
            logger.log_scalar(
                "pgp_target_frcnn_{}_epoch_time".format(logger_id),
                time.clock() - end, epoch)
            logger.log_scalar(
                "pgp_target_frcnn_{}_training_loss".format(logger_id),
                total_loss, epoch)

        if epoch % (frcnn_extra.lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, frcnn_extra.lr_decay_gamma)
            lr *= frcnn_extra.lr_decay_gamma

        prune_index_dict, _ = criterion_func(**kwargs)

        out_channels_keep_indexes = []
        in_channels_keep_indexes = []
        reset_indexes = []
        original_out_channels = 0
        first_fc = False
        current_ids = OrderedDict()
        start_index = None
        last_start_conv = None
        last_keep_index = None
        removed_filters_total_epoch = 0
        reset_filters_total_epoch = 0
        parameters_hard_removed_per_epoch = 0
        parameters_reset_removed = 0

        # print(epoch)
        o_state_dict = optimizer.state_dict()
        for name, parameters in model.named_parameters():
            current_ids[name] = id(parameters)
            param_type, tensor_index, layer_index, block_index = model_adapter.get_param_type_and_layer_index(
                name)
            if not finished_list and parameters.requires_grad:
                type_list.append(param_type)

            if not parameters.requires_grad:
                continue

            if param_type is None:
                reset_indexes.append([])
                out_channels_keep_indexes.append([])
                in_channels_keep_indexes.append([])
                continue

            if layer_index == -1:
                # Handling CNN and BN before Resnet

                if tensor_index == model_adapter.last_layer_index:
                    if param_type == ParameterType.CNN_WEIGHTS:
                        original_out_channels = parameters.shape[0]
                        conv_tensor = model_adapter.get_layer(
                            model, param_type, tensor_index, layer_index,
                            block_index)

                        keep_index = torch.arange(
                            0, original_out_channels).long()
                        reset_index = []

                        new_conv_tensor = create_conv_tensor(
                            conv_tensor, out_channels_keep_indexes,
                            initializer_fn, keep_index, None).to(cuda)
                        model_adapter.set_layer(model, param_type,
                                                new_conv_tensor, tensor_index,
                                                layer_index, block_index)

                        in_c = parameters.shape[1]
                        if len(out_channels_keep_indexes) != 0:
                            in_c = out_channels_keep_indexes[-1].shape[0]

                        parameters_hard_removed_per_epoch += (original_out_channels - keep_index.shape[0]) * \
                                                             in_c * parameters.shape[2:].numel()
                        parameters_reset_removed += 0 if reset_index is None or len(
                            reset_index
                        ) == 0 else len(
                            reset_index) * in_c * parameters.shape[2:].numel()

                        reset_indexes.append(reset_index)
                        if out_channels_keep_indexes is not None and len(
                                out_channels_keep_indexes) != 0:
                            in_channels_keep_indexes.append(
                                out_channels_keep_indexes[-1].sort()[0])
                        else:
                            in_channels_keep_indexes.append(None)
                        out_channels_keep_indexes.append(keep_index.sort()[0])
                    elif param_type == ParameterType.CNN_BIAS:
                        reset_indexes.append(reset_indexes[-1])
                        in_channels_keep_indexes.append(
                            out_channels_keep_indexes[-1].sort()[0])
                        out_channels_keep_indexes.append(
                            out_channels_keep_indexes[-1])
                    continue

                if param_type == ParameterType.CNN_WEIGHTS:
                    original_out_channels = parameters.shape[0]
                    conv_tensor = model_adapter.get_layer(
                        model, param_type, tensor_index, layer_index,
                        block_index)

                    if name in prune_index_dict:
                        sorted_filters_index = prune_index_dict[name]
                        keep_index, reset_index = get_weak_fn(
                            original_out_channels,
                            0,
                            remove_ratio,
                            sorted_filters_index,
                            forced_remove,
                            original_c=original_c[name],
                            decay_rates_c=decay_rates_c[name],
                            epoch=epoch)
                        if reset_index is not None:
                            keep_index = torch.cat((keep_index, reset_index))
                    else:
                        keep_index = torch.arange(
                            0, original_out_channels).long()
                        reset_index = []

                    new_conv_tensor = create_conv_tensor(
                        conv_tensor, out_channels_keep_indexes, initializer_fn,
                        keep_index, reset_index).to(cuda)
                    model_adapter.set_layer(model, param_type, new_conv_tensor,
                                            tensor_index, layer_index,
                                            block_index)

                    if name not in model_architecture:
                        model_architecture[name] = []
                    model_architecture[name].append(keep_index.shape[0])

                    removed_filters_total_epoch += original_out_channels - keep_index.shape[
                        0]
                    reset_filters_total_epoch += len(reset_index)

                    in_c = 3
                    if len(out_channels_keep_indexes) != 0 and len(
                            out_channels_keep_indexes[-1]):
                        in_c = out_channels_keep_indexes[-1].shape[0]

                    parameters_hard_removed_per_epoch += (original_out_channels - keep_index.shape[0]) * \
                                                         in_c * parameters.shape[2:].numel()
                    parameters_reset_removed += 0 if reset_index is None or len(
                        reset_index) == 0 else len(
                            reset_index) * in_c * parameters.shape[2:].numel()

                    start_index = (keep_index.sort()[0], reset_index)

                    reset_indexes.append(reset_index)
                    if out_channels_keep_indexes is not None and len(
                            out_channels_keep_indexes) != 0 and len(
                                out_channels_keep_indexes[-1]):
                        in_channels_keep_indexes.append(
                            out_channels_keep_indexes[-1].sort()[0])
                    else:
                        in_channels_keep_indexes.append(None)
                    out_channels_keep_indexes.append(keep_index.sort()[0])
                elif param_type == ParameterType.CNN_BIAS:
                    reset_indexes.append(reset_indexes[-1])
                    in_channels_keep_indexes.append(
                        out_channels_keep_indexes[-1].sort()[0])
                    out_channels_keep_indexes.append(
                        out_channels_keep_indexes[-1])

                elif param_type == ParameterType.BN_WEIGHT:
                    bn_tensor = model_adapter.get_layer(
                        model, param_type, tensor_index, layer_index,
                        block_index)

                    keep_index = out_channels_keep_indexes[-1]
                    reset_index = reset_indexes[-1]

                    n_bn = create_new_bn(bn_tensor, keep_index, reset_index)
                    model_adapter.set_layer(model, param_type, n_bn,
                                            tensor_index, layer_index,
                                            block_index)
                    reset_indexes.append(reset_index)
                    if out_channels_keep_indexes is not None or len(
                            out_channels_keep_indexes) != 0:
                        in_channels_keep_indexes.append(
                            out_channels_keep_indexes[-1].sort()[0])
                    out_channels_keep_indexes.append(keep_index.sort()[0])

                elif param_type == ParameterType.BN_BIAS:
                    reset_indexes.append(reset_index)
                    if out_channels_keep_indexes is not None or len(
                            out_channels_keep_indexes) != 0:
                        in_channels_keep_indexes.append(
                            out_channels_keep_indexes[-1].sort()[0])
                    out_channels_keep_indexes.append(keep_index.sort()[0])

                elif param_type == ParameterType.FC_WEIGHTS and first_fc == False:
                    fc_tensor = model_adapter.get_layer(
                        model, param_type, tensor_index, layer_index,
                        block_index)
                    new_fc_weight = prune_fc_like(
                        fc_tensor.weight.data, out_channels_keep_indexes[-1],
                        original_out_channels)

                    new_fc_bias = None
                    if fc_tensor.bias is not None:
                        new_fc_bias = fc_tensor.bias.data
                    new_fc_tensor = nn.Linear(new_fc_weight.shape[1],
                                              new_fc_weight.shape[0],
                                              bias=new_fc_bias
                                              is not None).to(cuda)
                    new_fc_tensor.weight.data = new_fc_weight
                    if fc_tensor.bias is not None:
                        new_fc_tensor.bias.data = new_fc_bias
                    model_adapter.set_layer(model, param_type, new_fc_tensor,
                                            tensor_index, layer_index,
                                            block_index)
                    first_fc = True
                    finished_list = True

            else:

                if param_type == ParameterType.CNN_WEIGHTS:

                    if tensor_index == 1:
                        original_out_channels = parameters.shape[0]
                        conv_tensor = model_adapter.get_layer(
                            model, param_type, tensor_index, layer_index,
                            block_index)

                        if name in prune_index_dict:
                            sorted_filters_index = prune_index_dict[name]
                            keep_index, reset_index = get_weak_fn(
                                original_out_channels,
                                0,
                                remove_ratio,
                                sorted_filters_index,
                                forced_remove,
                                original_c=original_c[name],
                                decay_rates_c=decay_rates_c[name],
                                epoch=epoch)
                            if reset_index is not None:
                                keep_index = torch.cat(
                                    (keep_index, reset_index))
                        else:
                            keep_index = torch.arange(
                                0, original_out_channels).long()
                            reset_index = []

                        new_conv_tensor = create_conv_tensor(
                            conv_tensor, out_channels_keep_indexes,
                            initializer_fn, keep_index, reset_index).to(cuda)
                        model_adapter.set_layer(model, param_type,
                                                new_conv_tensor, tensor_index,
                                                layer_index, block_index)

                        if name not in model_architecture:
                            model_architecture[name] = []
                        model_architecture[name].append(keep_index.shape[0])

                        removed_filters_total_epoch += original_out_channels - keep_index.shape[
                            0]
                        reset_filters_total_epoch += len(reset_index)

                        in_c = conv_tensor.in_channels
                        if len(out_channels_keep_indexes) != 0:
                            in_c = out_channels_keep_indexes[-1].shape[0]

                        parameters_hard_removed_per_epoch += (original_out_channels - keep_index.shape[0]) * \
                                                             in_c * parameters.shape[
                                                                    2:].numel()
                        parameters_reset_removed += 0 if reset_index is None or len(
                            reset_index
                        ) == 0 else len(
                            reset_index) * in_c * parameters.shape[2:].numel()

                        reset_indexes.append(reset_index)
                        if out_channels_keep_indexes is not None or len(
                                out_channels_keep_indexes) != 0:
                            in_channels_keep_indexes.append(
                                out_channels_keep_indexes[-1].sort()[0])
                        out_channels_keep_indexes.append(keep_index.sort()[0])

                    elif tensor_index == 2:

                        downsample_cnn, d_name = model_adapter.get_downsample(
                            model, layer_index, block_index)
                        if downsample_cnn is not None:
                            original_out_channels = parameters.shape[0]
                            last_keep_index, _ = start_index
                            if d_name in prune_index_dict:
                                sorted_filters_index = prune_index_dict[d_name]
                                # conv_tensor.out_channels

                                keep_index, reset_index = get_weak_fn(
                                    original_out_channels,
                                    0,
                                    remove_ratio,
                                    sorted_filters_index,
                                    forced_remove,
                                    original_c=original_c[d_name],
                                    decay_rates_c=decay_rates_c[d_name],
                                    epoch=epoch)

                                if reset_index is not None:
                                    keep_index = torch.cat(
                                        (keep_index, reset_index))
                            else:
                                keep_index = torch.arange(
                                    0, original_out_channels).long()
                                reset_index = []

                            last_start_conv = create_conv_tensor(
                                downsample_cnn, [last_keep_index],
                                initializer_fn, keep_index,
                                reset_index).to(cuda)
                            last_start_conv = [
                                last_start_conv, 0, layer_index, block_index
                            ]

                            if d_name not in model_architecture:
                                model_architecture[d_name] = []
                            model_architecture[d_name].append(
                                keep_index.shape[0])

                            removed_filters_total_epoch += original_out_channels - keep_index.shape[
                                0]
                            reset_filters_total_epoch += len(reset_index)
                            parameters_hard_removed_per_epoch += (original_out_channels - keep_index.shape[0]) * \
                                                                 last_keep_index.shape[0] * parameters.shape[2:].numel()
                            parameters_reset_removed += 0 if reset_index is None or len(
                                reset_index) == 0 else len(
                                    reset_index) * last_keep_index.shape[
                                        0] * parameters.shape[2:].numel()
                            start_index = (keep_index.sort()[0], reset_index)

                        conv_tensor = model_adapter.get_layer(
                            model, param_type, tensor_index, layer_index,
                            block_index)
                        keep_index, reset_index = start_index

                        new_conv_tensor = create_conv_tensor(
                            conv_tensor, out_channels_keep_indexes,
                            initializer_fn, keep_index, reset_index).to(cuda)
                        model_adapter.set_layer(model, param_type,
                                                new_conv_tensor, tensor_index,
                                                layer_index, block_index)

                        reset_indexes.append(reset_index)
                        if out_channels_keep_indexes is not None or len(
                                out_channels_keep_indexes) != 0:
                            in_channels_keep_indexes.append(
                                out_channels_keep_indexes[-1].sort()[0])

                        removed_filters_total_epoch += original_out_channels - keep_index.shape[
                            0]
                        reset_filters_total_epoch += len(reset_index)
                        parameters_hard_removed_per_epoch += (original_out_channels - keep_index.shape[0]) * \
                                                             out_channels_keep_indexes[-1].shape[0] * parameters.shape[
                                                                                                      2:].numel()
                        parameters_reset_removed += 0 if reset_index is None or len(
                            reset_index
                        ) == 0 else len(
                            reset_index) * out_channels_keep_indexes[-1].shape[
                                0] * parameters.shape[2:].numel()

                        out_channels_keep_indexes.append(keep_index.sort()[0])
                        if name not in model_architecture:
                            model_architecture[name] = []
                        model_architecture[name].append(keep_index.shape[0])

                elif param_type == ParameterType.DOWNSAMPLE_WEIGHTS:

                    last_start_conv, tensor_index, layer_index, block_index = last_start_conv
                    model_adapter.set_layer(model,
                                            ParameterType.DOWNSAMPLE_WEIGHTS,
                                            last_start_conv, tensor_index,
                                            layer_index, block_index)

                    keep_index, reset_index = start_index
                    reset_indexes.append(reset_index)
                    in_channels_keep_indexes.append(last_keep_index.sort()[0])
                    out_channels_keep_indexes.append(keep_index.sort()[0])

                elif param_type == ParameterType.BN_WEIGHT:
                    bn_tensor = model_adapter.get_layer(
                        model, param_type, tensor_index, layer_index,
                        block_index)

                    keep_index = out_channels_keep_indexes[-1]
                    reset_index = reset_indexes[-1]

                    n_bn = create_new_bn(bn_tensor, keep_index, reset_index)
                    model_adapter.set_layer(model, param_type, n_bn,
                                            tensor_index, layer_index,
                                            block_index)
                    reset_indexes.append(reset_index)
                    in_channels_keep_indexes.append(
                        out_channels_keep_indexes[-1].sort()[0])
                    out_channels_keep_indexes.append(keep_index.sort()[0])

                elif param_type == ParameterType.BN_BIAS:
                    reset_indexes.append(reset_indexes[-1])
                    in_channels_keep_indexes.append(
                        out_channels_keep_indexes[-1].sort()[0])
                    out_channels_keep_indexes.append(keep_index.sort()[0])

                elif param_type == ParameterType.DOWNSAMPLE_BN_W:

                    bn_tensor = model_adapter.get_layer(
                        model, param_type, tensor_index, layer_index,
                        block_index)

                    keep_index, reset_index = start_index

                    n_bn = create_new_bn(bn_tensor, keep_index, reset_index)
                    model_adapter.set_layer(model, param_type, n_bn,
                                            tensor_index, layer_index,
                                            block_index)
                    reset_indexes.append(reset_index)
                    in_channels_keep_indexes.append(
                        out_channels_keep_indexes[-1].sort()[0])
                    out_channels_keep_indexes.append(keep_index.sort()[0])

                elif param_type == ParameterType.DOWNSAMPLE_BN_B:
                    keep_index, reset_index = start_index
                    reset_indexes.append(reset_index)
                    in_channels_keep_indexes.append(
                        out_channels_keep_indexes[-1].sort()[0])
                    out_channels_keep_indexes.append(keep_index.sort()[0])

                elif param_type == ParameterType.CNN_BIAS:
                    reset_indexes.append(reset_indexes[-1])
                    in_channels_keep_indexes.append(
                        out_channels_keep_indexes[-1].sort()[0])
                    out_channels_keep_indexes.append(
                        out_channels_keep_indexes[-1])

        finished_list = True
        new_old_ids = OrderedDict()
        new_ids = OrderedDict()
        for k, v in model.named_parameters():
            if v.requires_grad:
                new_id = id(v)
                new_ids[k] = new_id
                new_old_ids[new_id] = current_ids[k]

        for layer in range(10):
            for p in model.RCNN_base[layer].parameters():
                p.requires_grad = False

        params = []
        for key, value in dict(model.named_parameters()).items():
            if value.requires_grad:
                if 'bias' in key:
                    params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                                'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
                else:
                    params += [{
                        'params': [value],
                        'lr': lr,
                        'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                    }]

        optimizer = optim.SGD(params,
                              lr=optimizer.param_groups[0]["lr"],
                              momentum=optimizer.param_groups[0]["momentum"])
        n_new_state_dict = optimizer.state_dict()

        for i, k in enumerate(n_new_state_dict["param_groups"]):

            old_id = new_old_ids[k['params'][0]]
            old_momentum = o_state_dict["state"][old_id]
            n_new_state_dict["state"][k['params'][0]] = old_momentum
        in_place_load_state_dict(optimizer, n_new_state_dict)

        index_op_dict = OrderedDict()
        first_fc = False
        #type_list = [x for x in type_list if x is not None]
        for i in range(len(type_list)):
            if type_list[i] == ParameterType.FC_WEIGHTS and first_fc == False:
                index_op_dict[optimizer.param_groups[i]['params'][0]] = (
                    type_list[i], out_channels_keep_indexes[i - 1], None, None)
                first_fc = True
            elif type_list[i] == ParameterType.FC_BIAS:
                continue
            elif type_list[i] == ParameterType.DOWNSAMPLE_BN_B or type_list[i] == ParameterType.DOWNSAMPLE_BN_W or \
                    type_list[i] == ParameterType.BN_BIAS or type_list[i] == ParameterType.BN_WEIGHT:
                index_op_dict[optimizer.param_groups[i]['params'][0]] = (
                    type_list[i], out_channels_keep_indexes[i],
                    reset_indexes[i], None)
            elif type_list[i] is None:
                continue
            elif type_list[i] == ParameterType.CNN_WEIGHTS or type_list[
                    i] == ParameterType.DOWNSAMPLE_WEIGHTS or type_list[
                        i] == ParameterType.CNN_BIAS or type_list == ParameterType.DOWNSAMPLE_BIAS:
                index_op_dict[optimizer.param_groups[i]['params'][0]] = (
                    type_list[i], out_channels_keep_indexes[i],
                    reset_indexes[i], in_channels_keep_indexes[i])

        j = 0
        for k, v in index_op_dict.items():

            if v[0] == ParameterType.CNN_WEIGHTS or v[
                    0] == ParameterType.DOWNSAMPLE_WEIGHTS:
                if v[3] is not None and len(v[3]):
                    optimizer.state[k]["momentum_buffer"] = optimizer.state[k][
                        "momentum_buffer"][:, v[3], :, :]
                    if v[2] is not None:
                        optimizer.state[k]["momentum_buffer"][
                            v[2]] = initializer_fn(
                                optimizer.state[k]["momentum_buffer"][v[2]])
                optimizer.state[k]['momentum_buffer'] = optimizer.state[k][
                    'momentum_buffer'][v[1], :, :, :]

            elif v[0] == ParameterType.CNN_BIAS or v[0] == ParameterType.BN_WEIGHT or v[0] == ParameterType.BN_BIAS \
                    or v[0] == ParameterType.DOWNSAMPLE_BN_W or v[0] == ParameterType.DOWNSAMPLE_BN_B:
                if v[2] is not None:
                    optimizer.state[k]["momentum_buffer"][
                        v[2]] = initializer_fn(
                            optimizer.state[k]["momentum_buffer"][v[2]])
                optimizer.state[k]['momentum_buffer'] = optimizer.state[k][
                    'momentum_buffer'][v[1]]
            else:
                optimizer.state[k]['momentum_buffer'] = \
                    prune_fc_like(optimizer.state[k]['momentum_buffer'], v[1], original_out_channels)
            j += 1
        removed_filters_total += removed_filters_total_epoch
        parameters_hard_removed_total += parameters_hard_removed_per_epoch

        map = eval_frcnn(frcnn_extra, cuda, model, is_break)
        if logger is not None:
            logger.log_scalar(
                "pgp_target_frcnn_{}_after_target_val_acc".format(logger_id),
                map, epoch)
            logger.log_scalar(
                "pgp_target_frcnn_{}_number of filter removed".format(
                    logger_id),
                removed_filters_total + reset_filters_total_epoch, epoch)
            logger.log_scalar(
                "pgp_target_frcnn_{}_acc_number of filter removed".format(
                    logger_id), map,
                removed_filters_total + reset_filters_total_epoch)
            logger.log_scalar(
                "pgp_target_frcnn_{}_acc_number of parameters removed".format(
                    logger_id), map,
                parameters_hard_removed_total + parameters_reset_removed)
        torch.cuda.empty_cache()

    return loss_acc, model_architecture
Exemple #10
0
def training_fusion():
    # initilize the tensor holder here.
    im_data1 = torch.FloatTensor(1)
    im_data2 = torch.FloatTensor(1)

    im_info = torch.FloatTensor(1)
    num_boxes = torch.LongTensor(1)
    gt_boxes = torch.FloatTensor(1)

    # ship to cuda
    if args.cuda:
        im_data1 = im_data1.cuda()
        im_data2 = im_data2.cuda()

        im_info = im_info.cuda()
        num_boxes = num_boxes.cuda()
        gt_boxes = gt_boxes.cuda()

    # make variable
    im_data1 = Variable(im_data1)
    im_data2 = Variable(im_data2)

    im_info = Variable(im_info)
    num_boxes = Variable(num_boxes)
    gt_boxes = Variable(gt_boxes)

    if args.cuda:
        cfg.CUDA = True

    # initilize the network here.
    if args.net == 'vgg16f':
        fasterRCNN = vgg16f(imdb.classes,
                            pretrained=True,
                            class_agnostic=args.class_agnostic,
                            fusion_mode=args.fusion_mode)

    elif args.net == 'vgg16c':
        fasterRCNN = vgg16c(imdb.classes,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)

    elif args.net == 'res101':
        fasterRCNN = resnet(imdb.classes,
                            101,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res50':
        fasterRCNN = resnet(imdb.classes,
                            50,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    elif args.net == 'res152':
        fasterRCNN = resnet(imdb.classes,
                            152,
                            pretrained=True,
                            class_agnostic=args.class_agnostic)
    else:
        print("network is not defined")
        pdb.set_trace()

    fasterRCNN.create_architecture()

    lr = cfg.TRAIN.LEARNING_RATE
    lr = args.lr
    # tr_momentum = cfg.TRAIN.MOMENTUM
    # tr_momentum = args.momentum

    params = []
    for key, value in dict(fasterRCNN.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': cfg.TRAIN.WEIGHT_DECAY
                }]

    if args.cuda:
        fasterRCNN.cuda()

    if args.optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)

    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)

    if args.resume:
        load_name = os.path.join(
            output_dir,
            'faster_rcnn_{}_{}_{}.pth'.format(args.checksession,
                                              args.checkepoch,
                                              args.checkpoint))
        print("loading checkpoint %s" % (load_name))
        checkpoint = torch.load(load_name)
        args.session = checkpoint['session']
        args.start_epoch = checkpoint['epoch']
        fasterRCNN.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']
        print("loaded checkpoint %s" % (load_name))

    if args.mGPUs:
        fasterRCNN = nn.DataParallel(fasterRCNN)

    iters_per_epoch = int(train_size / args.batch_size)

    # tfboard
    if args.use_tfboard:
        from tensorboardX import SummaryWriter
        logger = SummaryWriter("logs")
        #TODO
        #logger.add_graph(fasterRCNN, (im_data1, im_data2, im_info, gt_boxes, num_boxes))
        #TODO
    if args.net == 'vgg16f':
        for epoch in range(args.start_epoch, args.max_epochs + 1):
            # setting to train mode
            fasterRCNN.train()
            loss_temp = 0
            start = time.time()

            if epoch % (args.lr_decay_step + 1) == 0:
                adjust_learning_rate(optimizer, args.lr_decay_gamma)
                lr *= args.lr_decay_gamma

            data_iter = iter(dataloader)
            for step in range(iters_per_epoch):
                data = next(data_iter)

                im_data1.resize_(data[0].size()).copy_(data[0])
                im_data2.resize_(data[1].size()).copy_(data[1])
                im_info.resize_(data[2].size()).copy_(data[2])
                gt_boxes.resize_(data[3].size()).copy_(data[3])
                num_boxes.resize_(data[4].size()).copy_(data[4])

                fasterRCNN.zero_grad()
                rois, cls_prob, bbox_pred, \
                rpn_loss_cls, rpn_loss_box, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label = fasterRCNN(im_data1, im_data2, im_info, gt_boxes, num_boxes)

                loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \
                       + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
                loss_temp += loss.item()

                # backward
                optimizer.zero_grad()
                loss.backward()
                if args.net == "vgg16":
                    clip_gradient(fasterRCNN, 10.)
                optimizer.step()

                if step % args.disp_interval == 0:
                    end = time.time()
                    if step > 0:
                        loss_temp /= (args.disp_interval + 1)

                    if args.mGPUs:
                        loss_rpn_cls = rpn_loss_cls.mean().item()
                        loss_rpn_box = rpn_loss_box.mean().item()
                        loss_rcnn_cls = RCNN_loss_cls.mean().item()
                        loss_rcnn_box = RCNN_loss_bbox.mean().item()
                        fg_cnt = torch.sum(rois_label.data.ne(0))
                        bg_cnt = rois_label.data.numel() - fg_cnt
                    else:
                        loss_rpn_cls = rpn_loss_cls.item()
                        loss_rpn_box = rpn_loss_box.item()
                        loss_rcnn_cls = RCNN_loss_cls.item()
                        loss_rcnn_box = RCNN_loss_bbox.item()
                        fg_cnt = torch.sum(rois_label.data.ne(0))
                        bg_cnt = rois_label.data.numel() - fg_cnt

                    Log.info("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                          % (args.session, epoch, step, iters_per_epoch, loss_temp, lr))
                    Log.info("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                             (fg_cnt, bg_cnt, end - start))
                    Log.info("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                          % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                    if args.use_tfboard:
                        info = {
                            'loss': loss_temp,
                            'loss_rpn_cls': loss_rpn_cls,
                            'loss_rpn_box': loss_rpn_box,
                            'loss_rcnn_cls': loss_rcnn_cls,
                            'loss_rcnn_box': loss_rcnn_box
                        }
                        logger.add_scalars(
                            "logs_s_{}/losses".format(args.session), info,
                            (epoch - 1) * iters_per_epoch + step)

                    loss_temp = 0
                    start = time.time()

            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))

            save_checkpoint(
                {
                    'session':
                    args.session,
                    'epoch':
                    epoch + 1,
                    'model':
                    fasterRCNN.module.state_dict()
                    if args.mGPUs else fasterRCNN.state_dict(),
                    'optimizer':
                    optimizer.state_dict(),
                    'pooling_mode':
                    cfg.POOLING_MODE,
                    'class_agnostic':
                    args.class_agnostic,
                }, save_name)
            print('save model: {}'.format(save_name))
    elif args.net == 'vgg16c':
        for epoch in range(args.start_epoch, args.max_epochs + 1):
            # setting to train mode
            fasterRCNN.train()
            loss_temp = 0
            start = time.time()

            if epoch % (args.lr_decay_step + 1) == 0:
                adjust_learning_rate(optimizer, args.lr_decay_gamma)
                lr *= args.lr_decay_gamma

            data_iter = iter(dataloader)
            for step in range(iters_per_epoch):
                data = next(data_iter)

                im_data1.resize_(data[0].size()).copy_(data[0])
                im_data2.resize_(data[1].size()).copy_(data[1])
                im_info.resize_(data[2].size()).copy_(data[2])
                gt_boxes.resize_(data[3].size()).copy_(data[3])
                num_boxes.resize_(data[4].size()).copy_(data[4])

                fasterRCNN.zero_grad()
                rois, cls_prob, bbox_pred, \
                rpn_loss_cls1, rpn_loss_box1, \
                rpn_loss_cls2, rpn_loss_box2, \
                RCNN_loss_cls, RCNN_loss_bbox, \
                rois_label = fasterRCNN(im_data1, im_data2, im_info, gt_boxes, num_boxes)

                loss = rpn_loss_cls1.mean() + rpn_loss_box1.mean() \
                        + rpn_loss_cls2.mean() + rpn_loss_box2.mean() \
                        + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
                loss_temp += loss.item()

                # backward
                optimizer.zero_grad()
                loss.backward()
                if args.net == "vgg16":
                    clip_gradient(fasterRCNN, 10.)
                optimizer.step()

                if step % args.disp_interval == 0:
                    end = time.time()
                    if step > 0:
                        loss_temp /= (args.disp_interval + 1)

                    if args.mGPUs:
                        loss_rpn_cls1 = rpn_loss_cls1.mean().item()
                        loss_rpn_cls2 = rpn_loss_cls2.mean().item()
                        loss_rpn_box1 = rpn_loss_box1.mean().item()
                        loss_rpn_box2 = rpn_loss_box2.mean().item()
                        loss_rcnn_cls = RCNN_loss_cls.mean().item()
                        loss_rcnn_box = RCNN_loss_bbox.mean().item()
                        fg_cnt = torch.sum(rois_label.data.ne(0))
                        bg_cnt = rois_label.data.numel() - fg_cnt
                    else:
                        loss_rpn_cls1 = rpn_loss_cls1.item()
                        loss_rpn_cls2 = rpn_loss_cls2.item()
                        loss_rpn_box1 = rpn_loss_box1.item()
                        loss_rpn_box2 = rpn_loss_box2.item()
                        loss_rcnn_cls = RCNN_loss_cls.item()
                        loss_rcnn_box = RCNN_loss_bbox.item()
                        fg_cnt = torch.sum(rois_label.data.ne(0))
                        bg_cnt = rois_label.data.numel() - fg_cnt

                    Log.info("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                             % (args.session, epoch, step, iters_per_epoch, loss_temp, lr))
                    Log.info("\t\t\tfg/bg=(%d/%d), time cost: %f" %
                             (fg_cnt, bg_cnt, end - start))
                    Log.info("\t\t\trpn_cls1: %.4f,rpn_cls2: %.4f, rpn_box1: %.4f, rpn_box2: %.4f,rcnn_cls: %.4f, rcnn_box %.4f" \
                             % (loss_rpn_cls1, loss_rpn_cls2,loss_rpn_box1,loss_rpn_box2, loss_rcnn_cls, loss_rcnn_box))
                    if args.use_tfboard:
                        info = {
                            'loss': loss_temp,
                            'loss_rpn_cls1': loss_rpn_cls1,
                            'loss_rpn_cls2': loss_rpn_cls2,
                            'loss_rpn_box1': loss_rpn_box1,
                            'loss_rpn_box2': loss_rpn_box2,
                            'loss_rcnn_cls': loss_rcnn_cls,
                            'loss_rcnn_box': loss_rcnn_box
                        }
                        logger.add_scalars(
                            "logs_s_{}/losses".format(args.session), info,
                            (epoch - 1) * iters_per_epoch + step)

                    loss_temp = 0
                    start = time.time()

            save_name = os.path.join(
                output_dir,
                'faster_rcnn_{}_{}_{}.pth'.format(args.session, epoch, step))

            save_checkpoint(
                {
                    'session':
                    args.session,
                    'epoch':
                    epoch + 1,
                    'model':
                    fasterRCNN.module.state_dict()
                    if args.mGPUs else fasterRCNN.state_dict(),
                    'optimizer':
                    optimizer.state_dict(),
                    'pooling_mode':
                    cfg.POOLING_MODE,
                    'class_agnostic':
                    args.class_agnostic,
                }, save_name)
            print('save model: {}'.format(save_name))
    #logger.add_graph(fasterRCNN, (im_data1, im_data2, im_info, gt_boxes, num_boxes))

    if args.use_tfboard:
        logger.close()