weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = VisualGenome(split=0, num_im=50) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) network.load_net(pretrained_model, net) # network.load_pretrained_npy(net, 'checkpoints/VGG_imagenet.npy') net.cuda() net.train() params = list(net.parameters()) print("Params are {}".format( '\n'.join(['{}: {}'.format(n, p.size()) for n,p in net.named_parameters()])) ) # optimizer = torch.optim.Adam(params, lr=0.001, eps=1e-4, weight_decay=weight_decay) optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay) if not os.path.exists(output_dir): os.mkdir(output_dir) # tensorboad use_tensorboard = use_tensorboard and CrayonClient is not None if use_tensorboard:
def train(): args = parse_args() args.decay_lrs = cfg.TRAIN.DECAY_LRS cfg.USE_GPU_NMS = True if args.use_cuda else False assert args.batch_size == 1, 'Only support single batch' lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY gamma = cfg.TRAIN.GAMMA # initial tensorboardX writer if args.use_tfboard: if args.exp_name == 'default': writer = SummaryWriter() else: writer = SummaryWriter('runs/' + args.exp_name) if args.dataset == 'voc07trainval': args.imdb_name = 'voc_2007_trainval' args.imdbval_name = 'voc_2007_test' elif args.dataset == 'voc0712trainval': args.imdb_name = 'voc_2007_trainval+voc_2012_trainval' args.imdbval_name = 'voc_2007_test' else: raise NotImplementedError if args.net == 'res50': fname = 'resnet50-caffe.pth' elif args.net == 'res101': fname = 'resnet101-caffe.pth' else: raise NotImplementedError args.pretrained_model = os.path.join('data', 'pretrained', fname) output_dir = args.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) # dataset_cachefile = os.path.join(output_dir, 'dataset.pickle') # if not os.path.exists(dataset_cachefile): # imdb, roidb = combined_roidb(args.imdb_name) # cache = [imdb, roidb] # with open(dataset_cachefile, 'wb') as f: # pickle.dump(cache, f) # print('save dataset cache') # else: # with open(dataset_cachefile, 'rb') as f: # cache = pickle.load(f) # imdb, roidb = cache[0], cache[1] # print('loaded dataset from cache') imdb, roidb = combined_roidb(args.imdb_name) train_dataset = RoiDataset(roidb) train_dataloader = DataLoader(train_dataset, args.batch_size, shuffle=True) model = FasterRCNN(backbone=args.net, pretrained=args.pretrained_model) print('model loaded') # if cfg.PRETRAINED_RPN: # rpn_model_path = 'output/rpn.pth' # model.load_state_dict(torch.load(rpn_model_path)['model']) # print('loaded rpn!') # optimizer params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and weight_decay or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': weight_decay }] optimizer = SGD(params, momentum=momentum) if args.use_cuda: model = model.cuda() model.train() iters_per_epoch = int(len(train_dataset) / args.batch_size) # start training for epoch in range(args.start_epoch, args.max_epochs + 1): loss_temp = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0 tic = time.time() train_data_iter = iter(train_dataloader) if epoch in args.decay_lrs: lr = lr * gamma adjust_learning_rate(optimizer, lr) print('adjust learning rate to {}'.format(lr)) for step in range(iters_per_epoch): im_data, gt_boxes, im_info = next(train_data_iter) if args.use_cuda: im_data = im_data.cuda() gt_boxes = gt_boxes.cuda() im_info = im_info.cuda() im_data_variable = Variable(im_data) output = model(im_data_variable, gt_boxes, im_info) rois, _, _, \ rcnn_cls_loss, rcnn_box_loss, \ rpn_cls_loss, rpn_box_loss, _train_info = output loss = rcnn_cls_loss.mean() + rcnn_box_loss.mean() +\ rpn_cls_loss.mean() + rpn_box_loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() loss_temp += loss.item() if cfg.VERBOSE: rpn_tp += _train_info['rpn_tp'] rpn_tn += _train_info['rpn_tn'] rpn_fg += _train_info['rpn_num_fg'] rpn_bg += _train_info['rpn_num_bg'] rcnn_tp += _train_info['rcnn_tp'] rcnn_tn += _train_info['rcnn_tn'] rcnn_fg += _train_info['rcnn_num_fg'] rcnn_bg += _train_info['rcnn_num_bg'] if (step + 1) % args.display_interval == 0: toc = time.time() loss_temp /= args.display_interval rpn_cls_loss_v = rpn_cls_loss.mean().item() rpn_box_loss_v = rpn_box_loss.mean().item() rcnn_cls_loss_v = rcnn_cls_loss.mean().item() rcnn_box_loss_v = rcnn_box_loss.mean().item() print("[epoch %2d][step %4d/%4d] loss: %.4f, lr: %.2e, time cost %.1fs" \ % (epoch, step+1, iters_per_epoch, loss_temp, lr, toc - tic)) print("\t\t\t rpn_cls_loss_v: %.4f, rpn_box_loss_v: %.4f\n\t\t\t " "rcnn_cls_loss_v: %.4f, rcnn_box_loss_v: %.4f" \ % (rpn_cls_loss_v, rpn_box_loss_v, rcnn_cls_loss_v, rcnn_box_loss_v)) if cfg.VERBOSE: print('\t\t\t RPN : [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' % (rpn_fg, rpn_bg, float(rpn_tp) / rpn_fg, float(rpn_tn) / rpn_bg)) print('\t\t\t RCNN: [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' % (rcnn_fg, rcnn_bg, float(rcnn_tp) / rcnn_fg, float(rcnn_tn) / rcnn_bg)) if args.use_tfboard: n_iter = (epoch - 1) * iters_per_epoch + step + 1 writer.add_scalar('losses/loss', loss_temp, n_iter) writer.add_scalar('losses/rpn_cls_loss_v', rpn_cls_loss_v, n_iter) writer.add_scalar('losses/rpn_box_loss_v', rpn_box_loss_v, n_iter) writer.add_scalar('losses/rcnn_cls_loss_v', rcnn_cls_loss_v, n_iter) writer.add_scalar('losses/rcnn_box_loss_v', rcnn_box_loss_v, n_iter) if cfg.VERBOSE: writer.add_scalar('rpn/fg_acc', float(rpn_tp) / rpn_fg, n_iter) writer.add_scalar('rpn/bg_acc', float(rpn_tn) / rpn_bg, n_iter) writer.add_scalar('rcnn/fg_acc', float(rcnn_tp) / rcnn_fg, n_iter) writer.add_scalar('rcnn/bg_acc', float(rcnn_tn) / rcnn_bg, n_iter) loss_temp = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0 tic = time.time() if epoch % args.save_interval == 0: save_name = os.path.join( output_dir, 'faster_{}_epoch_{}.pth'.format(args.net, epoch)) torch.save({ 'model': model.state_dict(), 'epoch': epoch, 'lr': lr }, save_name)
# load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) # network.load_pretrained_npy(net, pretrained_model) network.load_pretrained_model(net, 'vgg16') # model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5' # model_file = 'models/saved_model3/faster_rcnn_60000.h5' # network.load_net(model_file, net) # exp_name = 'vgg16_02-19_13-24' # start_step = 60001 # lr /= 10. # network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01) net.cuda() net.train() # set model in train mode, has effect on Dropout and Batchnorm. Use eval() to set evaluation mode. params = list(net.parameters()) # optimizer = torch.optim.Adam(params[-8:], lr=lr) optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay) if not os.path.exists(output_dir): os.mkdir(output_dir) # tensorboad use_tensorboard = use_tensorboard and CrayonClient is not None if use_tensorboard: cc = CrayonClient(hostname='127.0.0.1') if remove_all_log: cc.remove_all_experiments() if exp_name is None:
def train(): args = parse_args() lr = args.lr decay_lrs = args.decay_lrs momentum = args.momentum weight_decay = args.weight_decay bais_decay = args.bais_decay gamma = args.gamma cfg.USE_GPU_NMS = True if args.use_gpu else False if args.use_tfboard: writer = SummaryWriter() # load data print('load data') if args.dataset == 'voc07trainval': dataset_name = 'voc_2007_trainval' elif args.dataset == 'voc12trainval': dataset_name = 'voc_2012_trainval' elif args.dataset == 'voc0712trainval': dataset_name = 'voc_2007_trainval+voc_2012_trainval' else: raise NotImplementedError imdb, roidb = combined_roidb(dataset_name) train_dataset = RoiDataset(roidb) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) iter_per_epoch = int(len(train_dataset) / args.batch_size) # prepare model print('load model') model = FasterRCNN(backbone=args.backbone) params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{ 'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), 'weight_decay': bais_decay and weight_decay or 0 }] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': weight_decay }] if args.use_gpu: model = model.cuda() model.train() # define optimizer optimizer = SGD(params, momentum=momentum) # training print('start training...') for epoch in range(args.epochs): start_time = time.time() train_data_iter = iter(train_dataloader) temp_loss = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0 if epoch in decay_lrs: lr = lr * gamma adjust_lr(optimizer, lr) print('adjusting learning rate to {}'.format(lr)) for step in range(iter_per_epoch): im_data, gt_boxes, im_info = next(train_data_iter) if args.use_gpu: im_data = im_data.cuda() gt_boxes = gt_boxes.cuda() im_info = im_info.cuda() im_data_variable = Variable(im_data) outputs = model(im_data_variable, gt_boxes, im_info) rois, _, _, faster_rcnn_cls_loss, faster_rcnn_reg_loss, \ rpn_cls_loss, rpn_reg_loss, _train_info = outputs loss = faster_rcnn_cls_loss.mean() + faster_rcnn_reg_loss.mean() + \ rpn_cls_loss.mean() + rpn_reg_loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() temp_loss += loss.item() if cfg.VERBOSE: rpn_tp += _train_info['rpn_tp'] rpn_tn += _train_info['rpn_tn'] rpn_fg += _train_info['rpn_num_fg'] rpn_bg += _train_info['rpn_num_bg'] faster_rcnn_tp += _train_info['faster_rcnn_tp'] faster_rcnn_tn += _train_info['faster_rcnn_tn'] faster_rcnn_fg += _train_info['faster_rcnn_num_fg'] faster_rcnn_bg += _train_info['faster_rcnn_num_bg'] if (step + 1) % args.display_interval == 0: end_time = time.time() temp_loss /= args.display_interval rpn_cls_loss_m = rpn_cls_loss.mean().item() rpn_reg_loss_m = rpn_reg_loss.mean().item() faster_rcnn_cls_loss_m = faster_rcnn_cls_loss.mean().item() faster_rcnn_reg_loss_m = faster_rcnn_reg_loss.mean().item() print('[epoch %2d][step %4d/%4d] loss: %.4f, time_cost: %.1f' % (epoch, step + 1, iter_per_epoch, temp_loss, end_time - start_time)) print( 'loss: rpn_cls_loss_m: %.4f, rpn_reg_loss_m: %.4f, faster_rcnn_cls_loss_m: %.4f, faster_rcnn_reg_loss_m: %.4f' % (rpn_cls_loss_m, rpn_reg_loss_m, faster_rcnn_cls_loss_m, faster_rcnn_reg_loss_m)) if args.use_tfboard: n_iter = epoch * iter_per_epoch + step + 1 writer.add_scalar('losses/loss', temp_loss, n_iter) writer.add_scalar('losses/rpn_cls_loss_m', rpn_cls_loss_m, n_iter) writer.add_scalar('losses/rpn_reg_loss_m', rpn_reg_loss_m, n_iter) writer.add_scalar('losses/faster_rcnn_cls_loss_m', faster_rcnn_cls_loss_m, n_iter) writer.add_scalar('losses/faster_rcnn_reg_loss_m', faster_rcnn_reg_loss_m, n_iter) if cfg.VERBOSE: writer.add_scalar('rpn/fg_acc', float(rpn_tp) / rpn_fg, n_iter) writer.add_scalar('rpn/bg_acc', float(rpn_tn) / rpn_bg, n_iter) writer.add_scalar( 'rcnn/fg_acc', float(faster_rcnn_tp) / faster_rcnn_fg, n_iter) writer.add_scalar( 'rcnn/bg_acc', float(faster_rcnn_tn) / faster_rcnn_bg, n_iter) temp_loss = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0 start_time = time.time() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if epoch % args.save_interval == 0: save_name = os.path.join( args.output_dir, 'faster_rcnn101_epoch_{}.pth'.format(epoch)) torch.save({ 'model': model.state_dict(), 'epoch': epoch, 'lr': lr }, save_name)