def check_gradient_scaling(self): w = self.target.param.array g = self.target.param.grad rate = 0.2 expect = w - g * rate opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook(GradientScaling(rate)) opt.update() testing.assert_allclose(expect, w)
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: LightHeadRCNN') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=1234) parser.add_argument('--batch-size', '-b', type=int, default=2) args = parser.parse_args() # chainermn comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank np.random.seed(args.seed) random.seed(args.seed) # model light_head_rcnn = LightHeadRCNNResNet101( pretrained_model='imagenet', n_fg_class=len(coco_bbox_label_names)) light_head_rcnn.use_preset('evaluate') model = LightHeadRCNNTrainChain(light_head_rcnn) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # train dataset train_dataset = COCOBboxDataset(year='2014', split='train') vmml_dataset = COCOBboxDataset(year='2014', split='valminusminival') # filter non-annotated data train_indices = np.array([ i for i, label in enumerate(train_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) train_dataset = train_dataset.slice[train_indices] vmml_indices = np.array([ i for i, label in enumerate(vmml_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) vmml_dataset = vmml_dataset.slice[vmml_indices] train_dataset = TransformDataset( ConcatenatedDataset(train_dataset, vmml_dataset), ('img', 'bbox', 'label', 'scale'), Transform(model.light_head_rcnn)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=args.batch_size) if comm.rank == 0: test_dataset = COCOBboxDataset(year='2014', split='minival', use_crowded=True, return_crowded=True, return_area=True) test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(momentum=0.9), comm) optimizer.setup(model) global_context_module = model.light_head_rcnn.head.global_context_module global_context_module.col_max.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col_max.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row_max.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row_max.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0001)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.light_head_rcnn.extractor.conv1.disable_update() model.light_head_rcnn.extractor.res2.disable_update() converter = functools.partial( concat_examples, padding=0, # img, bboxes, labels, scales indices_concat=[0, 2, 3], # img, _, labels, scales indices_to_device=[0], # img ) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, converter=converter, device=device) trainer = chainer.training.Trainer(updater, (30, 'epoch'), out=args.out) @make_shift('lr') def lr_scheduler(trainer): base_lr = 0.0005 * 1.25 * args.batch_size * comm.size warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = trainer.updater.iteration epoch = trainer.updater.epoch if iteration < warm_up_duration: rate = warm_up_rate \ + (1 - warm_up_rate) * iteration / warm_up_duration elif epoch < 20: rate = 1 elif epoch < 26: rate = 0.1 else: rate = 0.01 return rate * base_lr trainer.extend(lr_scheduler) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions model_name = model.light_head_rcnn.__class__.__name__ trainer.extend(chainer.training.extensions.snapshot_object( model.light_head_rcnn, savefun=chainer.serializers.save_npz, filename='%s_model_iter_{.updater.iteration}.npz' % model_name), trigger=(1, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) report_items = [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'validation/main/map/iou=0.50:0.95/area=all/max_dets=100', ] trainer.extend(extensions.PrintReport(report_items), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(DetectionCOCOEvaluator( test_iter, model.light_head_rcnn, label_names=coco_bbox_label_names), trigger=ManualScheduleTrigger([20, 26], 'epoch')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def handler(context): dataset_alias = context.datasets trainval_2007_dataset_id = dataset_alias['trainval2007'] trainval_2012_dataset_id = dataset_alias['trainval2012'] test_2007_dataset_id = dataset_alias['test2007'] trainval_2007_dataset = list( load_dataset_from_api(trainval_2007_dataset_id)) trainval_2012_dataset = list( load_dataset_from_api(trainval_2012_dataset_id)) test_2007_dataset = list(load_dataset_from_api(test_2007_dataset_id)) if network_model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif network_model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if USE_GPU >= 0: chainer.cuda.get_device_from_id(USE_GPU).use() model.to_gpu() trainval_2007 = DetectionDatasetFromAPI(trainval_2007_dataset) trainval_2012 = DetectionDatasetFromAPI(trainval_2012_dataset) test_2007 = DetectionDatasetFromAPI(test_2007_dataset, use_difficult=True, return_difficult=True) train = TransformDataset(ConcatenatedDataset(trainval_2007, trainval_2012), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE) test_iter = chainer.iterators.SerialIterator(test_2007, BATCHSIZE, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=USE_GPU) trainer = training.Trainer(updater, (nb_iterations, 'iteration'), out=ABEJA_TRAINING_RESULT_DIR) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(10000, 'iteration')) log_interval = 100, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) print_entries = [ 'iteration', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] report_entries = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] trainer.extend(Statistics(report_entries, nb_iterations, obs_key='iteration'), trigger=log_interval) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(nb_iterations, 'iteration')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): args = parse_args() res = Resource(args, train=True) train, test, train_gt, test_gt = load_train_test( train_dir=const.PREPROCESSED_TRAIN_DIR, gt_dir=const.XML_DIR) res.log_info(f'Train: {len(train)}, test: {len(test)}') model = ARCHS[args.model](n_fg_class=len(const.LABELS), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train_dataset = TransformDataset( ISIC2018Task1Dataset(train, train_gt), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultithreadIterator( train_dataset, args.batchsize, n_threads=args.loaderjob) test_dataset = TransformDataset( ISIC2018Task1Dataset(test, test_gt), Transform(model.coder, model.insize, model.mean)) test_iter = chainer.iterators.MultithreadIterator(test_dataset, args.batchsize, shuffle=False, repeat=False, n_threads=args.loaderjob) optimizer = chainer.optimizers.Adam() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) trainer.extend( DetectionVOCEvaluator(test_iter, model, use_07_metric=False, label_names=const.LABELS)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr()) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ])) trainer.extend(extensions.ProgressBar(update_interval=10)) snapshot_trigger = triggers.MaxValueTrigger(key='validation/main/map') snapshot_object_trigger = triggers.MaxValueTrigger( key='validation/main/map') trainer.extend(extensions.snapshot(filename='snapshot_best.npz'), trigger=snapshot_trigger) trainer.extend(extensions.snapshot_object(model, 'model_best.npz'), trigger=snapshot_object_trigger) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() # save last model chainer.serializers.save_npz(os.path.join(args.out, 'snapshot_last.npz'), trainer) chainer.serializers.save_npz(os.path.join(args.out, 'model_last.npz'), model)
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument( '--lr', '-l', type=float, default=0.0005, help='Default value is for 1 GPU.\n' 'The learning rate will be multiplied by the number of gpu') parser.add_argument('--lr-cooldown-factor', '-lcf', type=float, default=0.1) parser.add_argument('--epoch', '-e', type=int, default=42) parser.add_argument('--cooldown-epoch', '-ce', type=list, default=[28, 31]) args = parser.parse_args() # chainermn comm = chainermn.create_communicator() device = comm.intra_rank np.random.seed(args.seed) # model fcis = FCISPSROIAlignResNet101( n_fg_class=len(sbd_instance_segmentation_label_names), pretrained_model='imagenet', iter2=False) fcis.use_preset('evaluate') model = FCISTrainChain(fcis) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # dataset train_dataset = TransformDataset( SBDInstanceSegmentationDataset(split='train'), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) if comm.rank == 0: test_dataset = SBDInstanceSegmentationDataset(split='val') test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=args.lr * comm.size, momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # lr scheduler trainer.extend(chainer.training.extensions.ExponentialShift( 'lr', args.lr_cooldown_factor, init=args.lr * comm.size), trigger=ManualScheduleTrigger(args.cooldown_epoch, 'epoch')) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions model_name = model.fcis.__class__.__name__ trainer.extend(extensions.snapshot_object( model.fcis, filename='%s_model_iter_{.updater.iteration}.npz' % model_name), trigger=(1, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationVOCEvaluator( test_iter, model.fcis, iou_thresh=0.5, use_07_metric=True, label_names=sbd_instance_segmentation_label_names), trigger=ManualScheduleTrigger(args.cooldown_epoch, 'epoch')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
chainer.cuda.set_max_workspace_size(1024 * 1024 * 1024) chainer.config.autotune = True """Creating Iterators for training. The Transform function is used on train_dataset.""" transformed_train_dataset = TransformDataset(train_dataset, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(transformed_train_dataset, batchsize) valid_iter = chainer.iterators.SerialIterator(valid_dataset, batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=gpu_id) trainer = training.Trainer( updater, (training_epoch, 'epoch'), out) trainer.extend( extensions.ExponentialShift('lr', lr_decay_rate, init=initial_lr), trigger=triggers.ManualScheduleTrigger(lr_decay_timing, 'epoch')) trainer.extend(
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument( '--lr', '-l', type=float, default=0.0005, help='Default value is for 1 GPU.\n' 'The learning rate will be multiplied by the number of gpu') parser.add_argument('--no-ohem', action='store_true') args = parser.parse_args() # chainermn comm = chainermn.create_communicator() device = comm.intra_rank np.random.seed(args.seed) # model proposal_creator_params = { 'nms_thresh': 0.7, 'n_train_pre_nms': 12000, 'n_train_post_nms': 2000, 'n_test_pre_nms': 6000, 'n_test_post_nms': 1000, 'force_cpu_nms': False, 'min_size': 0 } fcis = FCISPSROIAlignResNet101( n_fg_class=len(coco_instance_segmentation_label_names), min_size=800, max_size=1333, anchor_scales=(2, 4, 8, 16, 32), pretrained_model='imagenet', iter2=False, proposal_creator_params=proposal_creator_params) fcis.use_preset('coco_evaluate') if args.no_ohem: model = FCISTrainChain( fcis, n_ohem_sample=None, proposal_target_creator=ProposalTargetCreator(n_sample=128)) else: model = FCISTrainChain(fcis) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # dataset train_dataset = TransformDataset( ConcatenatedDataset( COCOInstanceSegmentationDataset(split='train'), COCOInstanceSegmentationDataset(split='valminusminival')), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) test_dataset = COCOInstanceSegmentationDataset(split='minival', use_crowded=True, return_crowded=True, return_area=True) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) if comm.rank == 0: test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=args.lr * comm.size, momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() converter = functools.partial( concat_examples, padding=0, # img, masks, labels, bboxes, scales indices_concat=[0, 1, 2, 4], # img, masks, labels, _, scales indices_to_device=[0], # img ) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, converter=converter, device=device) trainer = chainer.training.Trainer(updater, (18, 'epoch'), out=args.out) # lr scheduler trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1, init=args.lr * comm.size), trigger=ManualScheduleTrigger([12, 15], 'epoch')) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 10, 'iteration' # training extensions model_name = model.fcis.__class__.__name__ trainer.extend(chainer.training.extensions.snapshot_object( model.fcis, savefun=chainer.serializers.save_npz, filename='%s_model_iter_{.updater.iteration}.npz' % model_name), trigger=(1, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) report_items = [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map/iou=0.50:0.95/area=all/max_dets=100', ] trainer.extend(extensions.PrintReport(report_items), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationCOCOEvaluator( test_iter, model.fcis, label_names=coco_instance_segmentation_label_names), trigger=ManualScheduleTrigger( [len(train_dataset) * 12, len(train_dataset) * 15], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', type=int, default=8) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--iteration', type=int, default=10000) parser.add_argument('--interval', type=int, default=1000) parser.add_argument('--resume') args = parser.parse_args() label_names = pose_bbox_label_names BboxDataset = PoseBboxDataset model = SSD300(n_fg_class=len(label_names), pretrained_model='./models/imagenet.npz') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset(BboxDataset(split='trainval'), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test = BboxDataset(split='test') test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=2e-5), trigger=triggers.ManualScheduleTrigger( [int(args.iteration * 0.8), int(args.iteration * 0.9)], 'iteration')) trainer.extend(DetectionEvaluator(test_iter, model, use_07_metric=True, label_names=label_names), trigger=(args.interval, 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot_object( model, 'pose_iter_{.updater.iteration}.npz'), trigger=(args.iteration // 2, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data-dir', type=str, default=os.path.join("RoadDamageDataset", "All")) parser.add_argument('--batchsize', type=int, default=32, help='Learning minibatch size') parser.add_argument('--gpu', type=int, default=-1, help='GPU ID (negative value indicates CPU') parser.add_argument('--base-network', choices=('vgg16', 'resnet101'), default='vgg16', help='Base network') parser.add_argument('--pretrained-model', default=None, help='Pretrained SSD model') parser.add_argument('--pretrained-extractor', default='auto', help='Pretrained CNN model to extract feature maps') parser.add_argument('--out', default='result-detection', help='Directory to output the result') parser.add_argument('--resume', default=None, help='Initialize the trainer from given file') args = parser.parse_args() print("Data directory : {}".format(args.data_dir)) print("Batchsize : {}".format(args.batchsize)) print("GPU ID : {}".format(args.gpu)) print("Base network : {}".format(args.base_network)) print("Pretrained extractor : {}".format(args.pretrained_extractor)) print("Pretrained model : {}".format(args.pretrained_model)) print("Output directory : {}".format(args.out)) print("Resume from : {}".format(args.resume)) if args.base_network == 'vgg16': # pretrained_extractor is currently not available for this class model = chainercv.links.SSD300( n_fg_class=len(roaddamage_label_names), pretrained_model=args.pretrained_model) preprocessing = MeanSubtraction(model.mean) elif args.base_network == 'resnet101': model = ssd_resnet101.SSD224( n_fg_class=len(roaddamage_label_names), pretrained_extractor=args.pretrained_extractor, pretrained_model=args.pretrained_model) preprocessing = ResNetPreparation(model.insize) else: raise ValueError('Invalid base network') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( RoadDamageDataset(args.data_dir, split='train'), Transform(model.coder, model.insize, model.mean) ) train = TransformDataset(train, preprocessing) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test = RoadDamageDataset(args.data_dir, split='val') test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 3e-4 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=3e-4), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend( DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=roaddamage_label_names), trigger=(4000, 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map']), trigger=log_interval) # trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.snapshot(), trigger=(4000, 'iteration')) trainer.extend( extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=(4000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) print("setup finished") trainer.run() model.to_cpu() serializers.save_npz("model-detector.npz", model)
def main(): rospack = rospkg.RosPack() jsk_perception_datasets_path = osp.join( rospack.get_path('jsk_perception'), 'learning_datasets') parser = argparse.ArgumentParser() # Dataset directory parser.add_argument('--train-dataset-dir', type=str, default=osp.join(jsk_perception_datasets_path, 'kitchen_dataset', 'train')) parser.add_argument('--val-dataset-dir', type=str, default=osp.join(jsk_perception_datasets_path, 'kitchen_dataset', 'test')) parser.add_argument('--dataset-type', type=str, default='instance') parser.add_argument( '--model-name', choices=('ssd300', 'ssd512'), default='ssd512') parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--max-epoch', type=int, default=100) parser.add_argument('--out-dir', type=str, default=None) args = parser.parse_args() if (args.dataset_type == 'instance'): train_dataset = DetectionDataset(args.train_dataset_dir) elif (args.dataset_type == 'bbox'): train_dataset = BboxDetectionDataset(args.train_dataset_dir) else: print('unsuppported dataset type') return fg_label_names = train_dataset.fg_class_names if args.model_name == 'ssd300': model = SSD300( n_fg_class=len(fg_label_names), pretrained_model='imagenet') elif args.model_name == 'ssd512': model = SSD512( n_fg_class=len(fg_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( train_dataset, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batch_size) if (args.dataset_type == 'instance'): test_dataset = DetectionDataset(args.val_dataset_dir) elif (args.dataset_type == 'bbox'): test_dataset = BboxDetectionDataset(args.val_dataset_dir) test_iter = chainer.iterators.SerialIterator( test_dataset, args.batch_size, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu) now = datetime.datetime.now() timestamp = now.strftime('%Y%m%d-%H%M%S') if args.out_dir is None: out_dir = osp.join( rospkg.get_ros_home(), 'learning_logs', timestamp) else: out_dir = args.out_dir step_epoch = [args.max_epoch * 2 // 3, args.max_epoch * 5 // 6] trainer = training.Trainer( updater, (args.max_epoch, 'epoch'), out_dir) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger(step_epoch, 'epoch')) trainer.extend( DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=fg_label_names), trigger=triggers.ManualScheduleTrigger( step_epoch + [args.max_epoch], 'epoch')) log_interval = 10, 'iteration' trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend( extensions.snapshot_object( model, 'model_snapshot.npz'), trigger=(args.max_epoch, 'epoch')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=1) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() model = SSD300( n_fg_class=len(inria_bbox_label_names), pretrained_model='./ssd_vgg16_imagenet_2017_06_09.npz') print("###n_fg_class= ", len(inria_bbox_label_names)) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset( INRIABboxDataset(data_dir='../INRIAPerson', split='Train') ), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test = INRIABboxDataset(data_dir='../INRIAPerson', split='Test') test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=1e-4), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend( DetectionINRIAEvaluator( test_iter, model, use_07_metric=False, label_names=inria_bbox_label_names), trigger=(1, 'iteration')) log_interval = 1, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map']), trigger=log_interval) #trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--lr', '-l', type=float, default=None, help='Learning rate for multi GPUs') parser.add_argument('--batchsize', type=int, default=8) parser.add_argument('--epoch', '-e', type=int, default=42) parser.add_argument('--cooldown-epoch', '-ce', type=int, default=28) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() # chainermn comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank np.random.seed(args.seed) # model fcis = FCISResNet101(n_fg_class=len(sbd_instance_segmentation_label_names), pretrained_model='imagenet', iter2=False) fcis.use_preset('evaluate') model = FCISTrainChain(fcis) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # dataset train_dataset = TransformDataset( SBDInstanceSegmentationDataset(split='train'), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=args.batchsize // comm.size) if comm.rank == 0: test_dataset = SBDInstanceSegmentationDataset(split='val') test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) @make_shift('lr') def lr_scheduler(trainer): if args.lr is None: base_lr = 0.0005 * args.batchsize else: base_lr = args.lr epoch = trainer.updater.epoch if epoch < args.cooldown_epoch: rate = 1 else: rate = 0.1 return rate * base_lr trainer.extend(lr_scheduler) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions trainer.extend(extensions.snapshot_object( model.fcis, filename='snapshot_model.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationVOCEvaluator( test_iter, model.fcis, iou_thresh=0.5, use_07_metric=True, label_names=sbd_instance_segmentation_label_names), trigger=ManualScheduleTrigger([ len(train_dataset) * args.cooldown_epoch, len(train_dataset) * args.epoch ], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): # cuDNNのautotuneを有効にする chainer.cuda.set_max_workspace_size(512 * 1024 * 1024) chainer.config.autotune = True gpu_id = 0 batchsize = 6 out_num = 'results' log_interval = 1, 'epoch' epoch_max = 500 initial_lr = 0.0001 lr_decay_rate = 0.1 lr_decay_timing = [200, 300, 400] # モデルの設定 model = SSD300(n_fg_class=len(voc_labels), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) # GPUの設定 chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() # データセットの設定 train_dataset = MyVoTTVOCDataset( 'C:\Python_Programs\chainer_practice\Telescope_corner', 'train') valid_dataset = MyVoTTVOCDataset( 'C:\Python_Programs\chainer_practice\Telescope_corner', 'val') # データ拡張 transformed_train_dataset = TransformDataset( train_dataset, Transform(model.coder, model.insize, model.mean)) # イテレーターの設定 train_iter = chainer.iterators.MultiprocessIterator( transformed_train_dataset, batchsize) valid_iter = chainer.iterators.SerialIterator(valid_dataset, batchsize, repeat=False, shuffle=False) # オプティマイザーの設定 optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) # アップデーターの設定 updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu_id) # トレーナーの設定 trainer = training.Trainer(updater, (epoch_max, 'epoch'), out_num) trainer.extend(extensions.ExponentialShift('lr', lr_decay_rate, init=initial_lr), trigger=triggers.ManualScheduleTrigger( lr_decay_timing, 'epoch')) trainer.extend(DetectionVOCEvaluator(valid_iter, model, use_07_metric=False, label_names=voc_labels), trigger=(1, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map', 'elapsed_time' ]), trigger=log_interval) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'main/loss/loc', 'main/loss/conf'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['validation/main/map'], 'epoch', file_name='accuracy.png')) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=(10, 'epoch')) # 途中で止めた学習を再開する場合は、trainerにスナップショットをロードして再開する # serializers.load_npz('results/snapshot_epoch_100.npz', trainer) # 学習実行 trainer.run() # 学習データの保存 model.to_cpu() serializers.save_npz('my_ssd_model.npz', model)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--out', '-o', default=None) parser.add_argument('--config', default=None) args = parser.parse_args() # gpu communicator comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank chainer.cuda.get_device_from_id(device).use() # out out = args.out if out is None: timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') out = osp.join(filepath, 'out', timestamp) if not osp.exists(out): os.makedirs(out) # config cfgpath = args.config if cfgpath is None: cfgpath = osp.join(filepath, 'cfg', 'train.yaml') with open(cfgpath, 'r') as f: config = easydict.EasyDict(yaml.load(f)) if comm.rank == 0: shutil.copy(cfgpath, osp.join(out, 'train.yaml')) min_size = config.min_size max_size = config.max_size random_seed = config.random_seed max_epoch = config.max_epoch lr = config.lr warmup_iter = config.warmup_iter cooldown_epoch = config.cooldown_epoch lr = config.lr lr_warmup_factor = config.lr_warmup_factor lr_cooldown_factor = config.lr_cooldown_factor # set random seed np.random.seed(random_seed) cp.random.seed(random_seed) # model n_class = len(coco_label_names) fcis_model = fcis.models.FCISResNet101(n_class) fcis_model.extractor.init_weight() model = fcis.models.FCISTrainChain(fcis_model) model.to_gpu() # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=lr, momentum=0.9), comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) # disable update model.fcis.extractor.res1.disable_update(True, True) model.fcis.extractor.res2.disable_update(True, True) model.fcis.extractor.res3.disable_update(False, True) model.fcis.extractor.res4.disable_update(False, True) model.fcis.extractor.res5.disable_update(False, True) # psroi_conv1 lr model.fcis.head.psroi_conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.psroi_conv1.b.update_rule.add_hook(GradientScaling(3.0)) # dataset if comm.rank == 0: train_dataset = COCOInstanceSegmentationDataset(split='trainval2014') train_dataset = remove_zero_bbox(train_dataset, min_size, max_size) # test_dataset = COCOInstanceSegmentationDataset(split='minival2014') # test_dataset = remove_zero_bbox( # test_dataset, min_size, max_size) train_dataset = TransformDataset( train_dataset, Transform(model.fcis, min_size, max_size)) # test_dataset = TransformDataset( # test_dataset, # Transform(model.fcis, min_size, max_size, flip=False)) else: train_dataset = None # test_dataset = None train_dataset = chainermn.scatter_dataset(train_dataset, comm, shuffle=True) # test_dataset = chainermn.scatter_dataset( # test_dataset, comm, shuffle=False) # iterator train_iters = chainer.iterators.SerialIterator(train_dataset, batch_size=1) # test_iter = chainer.iterators.SerialIterator( # test_dataset, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.StandardUpdater( train_iters, optimizer, converter=fcis.dataset.concat_examples, device=device) trainer = chainer.training.Trainer(updater, (max_epoch, 'epoch'), out=out) # lr scheduler cooldown_iter = int(cooldown_epoch * len(train_dataset)) trainer.extend( chainer.training.extensions.ExponentialShift('lr', lr_warmup_factor), trigger=chainer.training.triggers.ManualScheduleTrigger([warmup_iter], 'iteration')) trainer.extend(chainer.training.extensions.ExponentialShift( 'lr', lr_cooldown_factor * lr_warmup_factor), trigger=chainer.training.triggers.ManualScheduleTrigger( [cooldown_iter], 'iteration')) # interval save_interval = 1, 'epoch' log_interval = 100, 'iteration' # plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # test_interval = 8, 'epoch' # trainer.extend( # chainermn.create_multi_node_evaluator( # chainer.training.extensions.Evaluator( # test_iter, model, # converter=fcis.dataset.concat_examples, # device=device), # comm), # trigger=test_interval) # trainer.extend( # InstanceSegmentationCOCOEvaluator( # test_iter, model.fcis, # coco_label_names), # trigger=test_interval) # logging if comm.rank == 0: model_name = model.fcis.__class__.__name__ # trainer.extend( # chainer.training.extensions.snapshot( # savefun=chainer.serializers.save_npz, # filename='%s_trainer_iter_{.updater.iteration}.npz' # % model_name), # trigger=save_interval) trainer.extend(chainer.training.extensions.snapshot_object( model.fcis, savefun=chainer.serializers.save_npz, filename='%s_model_iter_{.updater.iteration}.npz' % model_name), trigger=save_interval) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( chainer.training.extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(chainer.training.extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/fcis_loc_loss', 'main/fcis_cls_loss', 'main/fcis_mask_loss', 'main/rpn_acc', 'main/fcis_cls_acc', 'main/fcis_fg_acc', 'validation/main/rpn_acc', 'validation/main/fcis_cls_acc', 'validation/main/fcis_fg_acc', ]), trigger=print_interval) trainer.extend( chainer.training.extensions.ProgressBar(update_interval=10)) # if chainer.training.extensions.PlotReport.available(): # trainer.extend( # chainer.training.extensions.PlotReport( # ['main/loss'], # file_name='loss.png', trigger=plot_interval), # trigger=plot_interval) trainer.extend(chainer.training.extensions.dump_graph('main/loss')) trainer.run() print('log is saved in {}'.format(out))
def handler(context): dataset_alias = context.datasets data = list(load_dataset_from_api(dataset_alias['train'])) np.random.seed(0) data = np.random.permutation(data) nb_data = len(data) nb_train = int(7 * nb_data // 10) train_data_raw = data[:nb_train] test_data_raw = data[nb_train:] premodel = SSD300(n_fg_class=20, pretrained_model='voc0712') model = SSD300(n_fg_class=1) copy_ssd(model, premodel) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if USE_GPU >= 0: chainer.cuda.get_device_from_id(USE_GPU).use() model.to_gpu() # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) fix_ssd(train_chain) train_data = DetectionDatasetFromAPI(train_data_raw) test_data = DetectionDatasetFromAPI(test_data_raw, use_difficult=True, return_difficult=True) train_data = TransformDataset( train_data, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train_data, BATCHSIZE) test_iter = chainer.iterators.SerialIterator(test_data, BATCHSIZE, repeat=False, shuffle=False) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=USE_GPU) trainer = training.Trainer(updater, (nb_epochs, 'epoch'), out=ABEJA_TRAINING_RESULT_DIR) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([1200, 1600], 'epoch')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=['cup']), trigger=(1, 'epoch')) log_interval = 1, 'epoch' trainer.extend(extensions.LogReport(trigger=log_interval)) print_entries = [ 'epoch', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] report_entries = [ 'epoch', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] trainer.extend(Statistics(report_entries, nb_epochs), trigger=log_interval) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=(nb_epochs, 'epoch')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('dataset', help="path to train json file") parser.add_argument('test_dataset', help="path to test dataset json file") parser.add_argument( '--dataset-root', help= "path to dataset root if dataset file is not already in root folder of dataset" ) parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd512') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--gpu', type=int, nargs='*', default=[]) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--lr', type=float, default=0.001, help="default learning rate") parser.add_argument('--port', type=int, default=1337, help="port for bbox sending") parser.add_argument('--ip', default='127.0.0.1', help="destination ip for bbox sending") parser.add_argument( '--test-image', help="path to test image that shall be displayed in bbox vis") args = parser.parse_args() if args.dataset_root is None: args.dataset_root = os.path.dirname(args.dataset) if args.model == 'ssd300': model = SSD300(n_fg_class=1, pretrained_model='imagenet') image_size = (300, 300) elif args.model == 'ssd512': model = SSD512(n_fg_class=1, pretrained_model='imagenet') image_size = (512, 512) else: raise NotImplementedError("The model you want to train does not exist") model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) train = TransformDataset( SheepDataset(args.dataset_root, args.dataset, image_size=image_size), Transform(model.coder, model.insize, model.mean)) if len(args.gpu) > 1: gpu_datasets = split_dataset_n_random(train, len(args.gpu)) if not len(gpu_datasets[0]) == len(gpu_datasets[-1]): adapted_second_split = split_dataset(gpu_datasets[-1], len(gpu_datasets[0]))[0] gpu_datasets[-1] = adapted_second_split else: gpu_datasets = [train] train_iter = [ ThreadIterator(gpu_dataset, args.batchsize) for gpu_dataset in gpu_datasets ] test = SheepDataset(args.dataset_root, args.test_dataset, image_size=image_size) test_iter = chainer.iterators.MultithreadIterator(test, args.batchsize, repeat=False, shuffle=False, n_threads=2) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.Adam(alpha=args.lr) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) if len(args.gpu) <= 1: updater = training.updaters.StandardUpdater( train_iter[0], optimizer, device=args.gpu[0] if len(args.gpu) > 0 else -1, ) else: updater = training.updaters.MultiprocessParallelUpdater( train_iter, optimizer, devices=args.gpu) updater.setup_workers() if len(args.gpu) > 0 and args.gpu[0] >= 0: chainer.backends.cuda.get_device_from_id(args.gpu[0]).use() model.to_gpu() trainer = training.Trainer(updater, (200, 'epoch'), args.out) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(1000, 'iteration')) # build logger # make sure to log all data necessary for prediction log_interval = 100, 'iteration' data_to_log = { 'image_size': image_size, 'model_type': args.model, } # add all command line arguments for argument in filter(lambda x: not x.startswith('_'), dir(args)): data_to_log[argument] = getattr(args, argument) # create callback that logs all auxiliary data the first time things get logged def backup_train_config(stats_cpu): if stats_cpu['iteration'] == log_interval: stats_cpu.update(data_to_log) trainer.extend( extensions.LogReport(trigger=log_interval, postprocess=backup_train_config)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(5000, 'iteration')) if args.test_image is not None: plot_image = train._dataset.load_image(args.test_image, resize_to=image_size) else: plot_image, _, _ = train.get_example(0) plot_image += train._transform.mean bbox_plotter = BBOXPlotter( plot_image, os.path.join(args.out, 'bboxes'), send_bboxes=True, upstream_port=args.port, upstream_ip=args.ip, ) trainer.extend(bbox_plotter, trigger=(10, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: LightHeadRCNN') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=1234) parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--epoch', type=int, default=30) parser.add_argument('--step-epoch', type=int, nargs='*', default=[19, 25]) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() # chainermn comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank np.random.seed(args.seed) random.seed(args.seed) # model light_head_rcnn = LightHeadRCNNResNet101( pretrained_model='imagenet', n_fg_class=len(coco_bbox_label_names)) light_head_rcnn.use_preset('evaluate') model = LightHeadRCNNTrainChain(light_head_rcnn) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # train dataset train_dataset = COCOBboxDataset(year='2017', split='train') # filter non-annotated data train_indices = np.array([ i for i, label in enumerate(train_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) train_dataset = train_dataset.slice[train_indices] train_dataset = TransformDataset(train_dataset, ('img', 'bbox', 'label', 'scale'), Transform(model.light_head_rcnn)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=args.batchsize // comm.size) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(momentum=0.9), comm) optimizer.setup(model) global_context_module = model.light_head_rcnn.head.global_context_module global_context_module.col_max.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col_max.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row_max.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row_max.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0001)) model.light_head_rcnn.extractor.conv1.disable_update() model.light_head_rcnn.extractor.res2.disable_update() for link in model.links(): if isinstance(link, L.BatchNormalization): link.disable_update() converter = functools.partial( concat_examples, padding=0, # img, bboxes, labels, scales indices_concat=[0, 2, 3], # img, _, labels, scales indices_to_device=[0], # img ) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, converter=converter, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) @make_shift('lr') def lr_scheduler(trainer): base_lr = 0.0005 * 1.25 * args.batchsize warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = trainer.updater.iteration epoch = trainer.updater.epoch if iteration < warm_up_duration: rate = warm_up_rate \ + (1 - warm_up_rate) * iteration / warm_up_duration else: for step in args.step_epoch: if epoch > step: rate *= 0.1 return rate * base_lr trainer.extend(lr_scheduler) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions model_name = model.light_head_rcnn.__class__.__name__ trainer.extend(chainer.training.extensions.snapshot_object( model.light_head_rcnn, filename='%s_model_iter_{.updater.iteration}.npz' % model_name), trigger=(1, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) report_items = [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'validation/main/map/iou=0.50:0.95/area=all/max_dets=100', ] trainer.extend(extensions.PrintReport(report_items), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('multi_task_300', 'multi_task_512'), default='multi_task_300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--eval_step', type=int, nargs='*', default=[80000, 100000, 120000]) parser.add_argument('--lr_step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--snap_step', type=int, default=10000) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') # in experiments for real experiment parser.add_argument('--resume', type=str) parser.add_argument('--detection', action='store_true', default=False) parser.add_argument('--segmentation', action='store_true', default=False) parser.add_argument('--attention', action='store_true', default=False) parser.add_argument('--dataset', default='voc', type=str) parser.add_argument('--experiment', type=str, default='final_voc') parser.add_argument('--multitask_loss', action='store_true', default=False) parser.add_argument('--dynamic_loss', action='store_true', default=False) parser.add_argument('--log_interval', type=int, default=10) parser.add_argument('--debug', action='store_true', default=False) parser.add_argument('--update_split_interval', type=int, default=100) parser.add_argument( '--loss_split', type=float, default=0.5 ) # in fact for detection, other task(segmentation) is 1-loss_split args = parser.parse_args() snap_step = args.snap_step args.snap_step = [] for step in range(snap_step, args.iteration + 1, snap_step): args.snap_step.append(step) # redefine the output path import os import time args.out = os.path.join(args.out, args.experiment, time.strftime("%Y%m%d_%H%M%S", time.localtime())) if args.model == 'multi_task_300': model = Multi_task_300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet', detection=args.detection, segmentation=args.segmentation, attention=args.attention) elif args.model == 'multi_task_512': model = Multi_task_512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet', detection=args.detection, segmentation=args.segmentation, attention=args.attention) model.use_preset('evaluate') if not (args.segmentation or args.detection): raise RuntimeError train_chain = MultiboxTrainChain(model, gpu=args.gpu >= 0, use_multi_task_loss=args.multitask_loss, loss_split=args.loss_split) train_chain.cleargrads() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( Multi_task_VOC(voc_experiments[args.experiment][args.experiment + '_train']), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator( train, batch_size=args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) test_mask = VOCSemanticSegmentationDataset(split='val') test_mask_iter = chainer.iterators.SerialIterator(test_mask, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) # optimizer.add_hook(GradientClipping(0.1)) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) '''if args.resume: serializers.load_npz(args.resume, trainer)''' trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger( args.lr_step, 'iteration')) if args.dataset == 'voc': use_07 = True label_names = voc_bbox_label_names elif args.dataset == 'coco': label_names = coco_bbox_label_names if args.detection and not args.debug: trainer.extend(MultitaskEvaluator(test_iter, model, args.dataset, use_07, label_names=label_names), trigger=triggers.ManualScheduleTrigger( args.eval_step + [args.iteration], 'iteration')) if args.segmentation and not args.debug: trainer.extend(MultitaskEvaluator(test_mask_iter, model, dataset=args.dataset, label_names=label_names, detection=False), trigger=triggers.ManualScheduleTrigger( args.eval_step + [args.iteration], 'iteration')) log_interval = args.log_interval, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) if args.segmentation and args.detection and args.dynamic_loss: trainer.extend( loss_split.LossSplit(trigger=(args.update_split_interval, 'iteration'))) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/mask', 'main/loss/loc', 'main/loss/conf', 'main/loss/split' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.snap_step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=triggers.ManualScheduleTrigger( args.snap_step + [args.iteration], 'iteration')) if args.resume: if 'model' in args.resume: serializers.load_npz(args.resume, model) else: serializers.load_npz(args.resume, trainer) print(args) trainer.run()
def train(train_data, val_data, label_names, iteration, lr, step_points, batchsize, gpu, out, val_iteration, log_iteration, loaderjob, resume): model = SSD300(n_fg_class=len(label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) # alpha and k? if gpu >= 0: chainer.cuda.get_device_from_id(gpu).use() model.to_gpu() train_data = TransformDataset( train_data, Transform(model.coder, model.insize, model.mean)) if loaderjob <= 0: train_iter = chainer.iterators.SerialIterator(train_data, batchsize) else: train_iter = chainer.iterators.MultiprocessIterator(train_data, batchsize) # , n_processes=min((loaderjob, batchsize))) val_iter = chainer.iterators.SerialIterator(val_data, batchsize, repeat=False, shuffle=False) # think about repeat # initial lr is set to 1e-4 (default run_train) by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=gpu) trainer = training.Trainer(updater, (iteration, 'iteration'), out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=lr), trigger=triggers.ManualScheduleTrigger(step_points, 'iteration')) val_interval = (val_iteration, 'iteration') trainer.extend( DetectionVOCEvaluator( val_iter, model, use_07_metric=True, label_names=label_names), trigger=val_interval) ### # Logging log_interval = log_iteration, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend( extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=val_interval) ### if resume: serializers.load_npz(resume, trainer) trainer.run()
def run(input_dir, test_dir, output, batch_size, iterator='SerialIterator', device=-1, pretrained_model='', save_trigger=10000, test_trigger=1000, parser_module='XMLParser', train_module='MultiboxTrainChain', model_module='chainercv.links.SSD300'): pretrained_model = join(PROJECT_DIR, pretrained_model) if pretrained_model and os.path.isfile(pretrained_model): print('Pretrained model {} loaded.'.format(pretrained_model)) else: print('Pretrained model file not found, ' + 'using imagenet as default.') pretrained_model = 'imagenet' parser = _import_module('multibuildingdetector.parsers.{}', parser_module) model = _import_class(model_module)(n_fg_class=len(parser.LABEL_NAMES), pretrained_model=pretrained_model) model.use_preset('evaluate') train_chain = _import_class('multibuildingdetector.trainchains.{}' .format(train_module))(model) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() train, test = load_train_test_set(input_dir, test_dir, parser) augmented_train = TransformDataset( train, ImageAugmentation(model.coder, model.insize, model.mean)) train_iter = getattr(chainer.iterators, iterator)(augmented_train, batch_size) optimizer = chainer.optimizers.Adam() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) if device is None: updater = chainer.training.StandardUpdater(train_iter, optimizer) else: updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device) trainer = chainer.training.Trainer(updater, (120000, 'iteration'), output) log_fields = ['main/' + x for x in train_chain.loss_labels] if train_module == 'MultiboxTrainChain': test_iter = chainer.iterators.SerialIterator( test, batch_size, repeat=False, shuffle=False) trainer.extend( DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=parser.LABEL_NAMES), trigger=(test_trigger, 'iteration')) log_fields.append('validation/main/map') else: triplet_test = TransformDataset( test, ImageAugmentation(model.coder, model.insize, model.mean, augment=False)) test_iter = chainer.iterators.SerialIterator( triplet_test, batch_size, repeat=False, shuffle=False) trainer.extend( TripletEvaluator( test_iter, model, label_names=parser.LABEL_NAMES, save_plt=True, save_path=output), trigger=(test_trigger, 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', *log_fields])) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(save_trigger, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--labelnum', type=int, default=50) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--image_label', '-il', help='Path to training image-label list file') parser.add_argument('--bbox', help='Path to training bbox list file') parser.add_argument('--image_label_test', '-ilt', help='Path to training image-label list file') parser.add_argument('--bbox_test', help='Path to training bbox list file') parser.add_argument('--image_root', '-TR', default='.', help='Root directory path of image files') args = parser.parse_args() comm = chainermn.create_communicator('naive') if comm.mpi_comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) if args.model == 'ssd300': model = SSD300(n_fg_class=args.labelnum, pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=args.labelnum, pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() from test_datasets import DeepFashionBboxDataset if comm.rank == 0: train = DeepFashionBboxDataset(args.bbox, args.image_label, args.image_root) test = DeepFashionBboxDataset(args.bbox_test, args.image_label_test, args.image_root) train = TransformDataset( train, Transform(model.coder, model.insize, model.mean)) else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) checkpoint_interval = (1000, 'iteration') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) evaluator = DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=(10000, 'iteration')) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def do(): parser = argparse.ArgumentParser() parser.add_argument( '--model',choices=('ssd300','ssd512'),default='ssd300') parser.add_argument('--batchsize', type=int, default=8) parser.add_argument('--iteration', type=int, default=64) parser.add_argument('--step', type=int, nargs='*', default=[8,16]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() model = SSD300( n_fg_class=len(ssdd.labels), pretrained_model='imagenet' ) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) """ if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() """ train = TransformDataset( train_dataset, Transform(model.coder,model.insize,model.mean), ) train_iter = chainer.iterators.MultiprocessIterator(train,args.batchsize) test = test_dataset test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False,shuffle=False) optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater,(args.iteration, 'iteration'),args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger= triggers.ManualScheduleTrigger(args.step, 'iteration') ) """ trainer.extend( extensions.Evaluator( test_iter, model ), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration' ) ) """ trainer.extend(extensions.ProgressBar(update_interval=1)) #trainer.extend(extensions.LogReport(trigger=1)) #trainer.extend(extensions.observe_lr(), trigger=1) #trainer.extend(extensions.PrintReport( # ['epoch', 'iteration', 'lr', # 'main/loss', 'main/loss/loc', 'main/loss/conf', # 'validation/main/map', 'elapsed_time']), # trigger=1) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'main/loss/loc', 'main/loss/conf'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['validation/main/map'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.snapshot( filename='snapshot_iter_{.updater.epoch}.npz'), trigger=(4, 'iteration') ) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data-dir', default='auto') parser.add_argument('--dataset', choices=('ade20k', 'cityscapes')) parser.add_argument('--model', choices=('pspnet_resnet101', 'pspnet_resnet50')) parser.add_argument('--lr', default=1e-2) parser.add_argument('--batchsize', default=2, type=int) parser.add_argument('--out', default='result') parser.add_argument('--iteration', default=None, type=int) parser.add_argument('--communicator', default='hierarchical') args = parser.parse_args() dataset_cfgs = { 'ade20k': { 'input_size': (473, 473), 'label_names': ade20k_semantic_segmentation_label_names, 'iteration': 150000 }, 'cityscapes': { 'input_size': (713, 713), 'label_names': cityscapes_semantic_segmentation_label_names, 'iteration': 90000 } } dataset_cfg = dataset_cfgs[args.dataset] # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank n_class = len(dataset_cfg['label_names']) if args.model == 'pspnet_resnet101': model = PSPNetResNet101(n_class, pretrained_model='imagenet', input_size=dataset_cfg['input_size']) elif args.model == 'pspnet_resnet50': model = PSPNetResNet50(n_class, pretrained_model='imagenet', input_size=dataset_cfg['input_size']) train_chain = create_mnbn_model(TrainChain(model), comm) model = train_chain.model if device >= 0: chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() if args.iteration is None: n_iter = dataset_cfg['iteration'] else: n_iter = args.iteration if args.dataset == 'ade20k': train = ADE20KSemanticSegmentationDataset(data_dir=args.data_dir, split='train') if comm.rank == 0: val = ADE20KSemanticSegmentationDataset(data_dir=args.data_dir, split='val') label_names = ade20k_semantic_segmentation_label_names elif args.dataset == 'cityscapes': train = CityscapesSemanticSegmentationDataset(args.data_dir, label_resolution='fine', split='train') if comm.rank == 0: val = CityscapesSemanticSegmentationDataset( args.data_dir, label_resolution='fine', split='val') label_names = cityscapes_semantic_segmentation_label_names train = TransformDataset(train, ('img', 'label'), Transform(model.mean, dataset_cfg['input_size'])) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.MultiprocessIterator( train, batch_size=args.batchsize, n_processes=2) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(args.lr, 0.9), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(chainer.optimizer.WeightDecay(1e-4)) for l in [ model.ppm, model.head_conv1, model.head_conv2, train_chain.aux_conv1, train_chain.aux_conv2 ]: for param in l.params(): param.update_rule.add_hook(GradientScaling(10)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (n_iter, 'iteration'), args.out) trainer.extend(PolynomialShift('lr', 0.9, n_iter, optimizer=optimizer), trigger=(1, 'iteration')) log_interval = 10, 'iteration' if comm.rank == 0: trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'elapsed_time', 'lr', 'main/loss', 'validation/main/miou', 'validation/main/mean_class_accuracy', 'validation/main/pixel_accuracy' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot_object( train_chain.model, 'snapshot_model_{.updater.iteration}.npz'), trigger=(n_iter, 'iteration')) val_iter = chainer.iterators.SerialIterator(val, batch_size=1, repeat=False, shuffle=False) trainer.extend(SemanticSegmentationEvaluator(val_iter, model, label_names), trigger=(n_iter, 'iteration')) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--lr', '-l', type=float, default=0.0005) parser.add_argument('--lr-cooldown-factor', '-lcf', type=float, default=0.1) parser.add_argument('--epoch', '-e', type=int, default=42) parser.add_argument('--cooldown-epoch', '-ce', type=int, default=28) args = parser.parse_args() np.random.seed(args.seed) # dataset train_dataset = SBDInstanceSegmentationDataset(split='train') test_dataset = SBDInstanceSegmentationDataset(split='val') # model fcis = FCISResNet101(n_fg_class=len(sbd_instance_segmentation_label_names), pretrained_model='imagenet', iter2=False) fcis.use_preset('evaluate') model = FCISTrainChain(fcis) # gpu if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # optimizer optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() train_dataset = TransformDataset(train_dataset, Transform(model.fcis)) # iterator train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=args.gpu) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # lr scheduler trainer.extend(chainer.training.extensions.ExponentialShift( 'lr', args.lr_cooldown_factor, init=args.lr), trigger=(args.cooldown_epoch, 'epoch')) # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions trainer.extend(extensions.snapshot_object(model.fcis, filename='snapshot_model.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationVOCEvaluator( test_iter, model.fcis, iou_thresh=0.5, use_07_metric=True, label_names=sbd_instance_segmentation_label_names), trigger=ManualScheduleTrigger([ len(train_dataset) * args.cooldown_epoch, len(train_dataset) * args.epoch ], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument( '--lr', '-l', type=float, default=0.0005, help='Default value is for 1 GPU.\n' 'The learning rate should be multiplied by the number of gpu') parser.add_argument('--epoch', '-e', type=int, default=18) parser.add_argument('--cooldown-epoch', '-ce', type=int, default=12) args = parser.parse_args() # chainermn comm = chainermn.create_communicator() device = comm.intra_rank np.random.seed(args.seed) # model proposal_creator_params = FCISResNet101.proposal_creator_params proposal_creator_params['min_size'] = 2 fcis = FCISResNet101( n_fg_class=len(coco_instance_segmentation_label_names), anchor_scales=(4, 8, 16, 32), pretrained_model='imagenet', iter2=False, proposal_creator_params=proposal_creator_params) fcis.use_preset('coco_evaluate') proposal_target_creator = ProposalTargetCreator() proposal_target_creator.neg_iou_thresh_lo = 0.0 model = FCISTrainChain(fcis, proposal_target_creator=proposal_target_creator) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # train dataset train_dataset = COCOInstanceSegmentationDataset(year='2014', split='train') vmml_dataset = COCOInstanceSegmentationDataset(year='2014', split='valminusminival') # filter non-annotated data train_indices = np.array([ i for i, label in enumerate(train_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) train_dataset = train_dataset.slice[train_indices] vmml_indices = np.array([ i for i, label in enumerate(vmml_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) vmml_dataset = vmml_dataset.slice[vmml_indices] train_dataset = TransformDataset( ConcatenatedDataset(train_dataset, vmml_dataset), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) # test dataset if comm.rank == 0: test_dataset = COCOInstanceSegmentationDataset(year='2014', split='minival', use_crowded=True, return_crowded=True, return_area=True) indices = np.arange(len(test_dataset)) test_dataset = test_dataset.slice[indices] test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # lr scheduler @make_shift('lr') def lr_scheduler(trainer): base_lr = args.lr iteration = trainer.updater.iteration epoch = trainer.updater.epoch if (iteration * comm.size) < 2000: rate = 0.1 elif epoch < args.cooldown_epoch: rate = 1 else: rate = 0.1 return rate * base_lr trainer.extend(lr_scheduler) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions trainer.extend(extensions.snapshot_object( model.fcis, filename='snapshot_model.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) report_items = [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map/iou=0.50:0.95/area=all/max_dets=100', ] trainer.extend(extensions.PrintReport(report_items), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationCOCOEvaluator( test_iter, model.fcis, label_names=coco_instance_segmentation_label_names), trigger=ManualScheduleTrigger([ len(train_dataset) * args.cooldown_epoch, len(train_dataset) * args.epoch ], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(epic_kitchens_bbox_category_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(epic_kitchens_bbox_category_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = EpicKitchensBboxDataset(year='2018', split='train') if comm.rank == 0: indices = np.arange(len(train)) else: indices = None train = TransformDataset(train, ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=2) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (18, 'epoch'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger([12, 15], 'epoch')) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}.npz'), trigger=(1, 'epoch')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--out', '-o', default=None) parser.add_argument('--config', default=None) parser.add_argument('--resume', default=None) args = parser.parse_args() # gpu communicator comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank chainer.cuda.get_device_from_id(device).use() # out out = args.out if out is None: timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') out = osp.join(filepath, 'out', timestamp) # config cfgpath = args.config if cfgpath is None: cfgpath = osp.join(filepath, 'cfg', 'train.yaml') with open(cfgpath, 'r') as f: config = easydict.EasyDict(yaml.load(f)) if comm.rank == 0: os.makedirs(out) shutil.copy(cfgpath, osp.join(out, 'train.yaml')) min_size = config.min_size max_size = config.max_size random_seed = config.random_seed if 'max_epoch' in config: max_epoch = config.max_epoch max_iter = None else: max_epoch = None max_iter = config.max_iter lr = config.lr if 'cooldown_epoch' in config: cooldown_epoch = config.cooldown_epoch cooldown_iter = None else: cooldown_epoch = None cooldown_iter = config.cooldown_iter lr = config.lr lr_cooldown_factor = config.lr_cooldown_factor # set random seed np.random.seed(random_seed) cp.random.seed(random_seed) # model n_class = len(voc_label_names) fcis_model = fcis.models.FCISResNet101(n_class, ratios=(0.5, 1.0, 2.0), anchor_scales=(8, 16, 32), rpn_min_size=16) if args.resume is None: fcis_model.extractor.init_weight() else: chainer.serializers.load_npz(args.resume, fcis_model) model = fcis.models.FCISTrainChain(fcis_model, n_sample=128, bg_iou_thresh_lo=0.1) model.to_gpu() # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=lr, momentum=0.9), comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) # disable update model.fcis.extractor.res1.disable_update(True, True) model.fcis.extractor.res2.disable_update(True, True) model.fcis.extractor.res3.disable_update(False, True) model.fcis.extractor.res4.disable_update(False, True) model.fcis.extractor.res5.disable_update(False, True) # psroi_conv1 lr model.fcis.head.psroi_conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.psroi_conv1.b.update_rule.add_hook(GradientScaling(3.0)) # dataset if comm.rank == 0: if config.use_sbd: dataset_class = SBDInstanceSegmentationDataset else: dataset_class = VOCInstanceSegmentationDataset train_dataset = dataset_class(split='train') test_dataset = dataset_class(split='val') train_dataset = TransformDataset( train_dataset, Transform(model.fcis, min_size, max_size)) test_dataset = TransformDataset( test_dataset, Transform(model.fcis, min_size, max_size, flip=False)) else: train_dataset = None test_dataset = None train_dataset = chainermn.scatter_dataset(train_dataset, comm, shuffle=True) test_dataset = chainermn.scatter_dataset(test_dataset, comm, shuffle=False) # iterator train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=fcis.dataset.concat_examples, device=device) # interval if max_epoch is not None: max_interval = max_epoch, 'epoch' else: max_interval = max_iter, 'iteration' if cooldown_epoch is not None: cooldown_interval = cooldown_epoch, 'epoch' else: cooldown_interval = cooldown_iter, 'iteration' save_interval = 1, 'epoch' log_interval = 100, 'iteration' print_interval = 20, 'iteration' test_interval = 8, 'epoch' # trainer trainer = chainer.training.Trainer(updater, max_interval, out=out) # lr scheduler trainer.extend(chainer.training.extensions.ExponentialShift( 'lr', lr_cooldown_factor, init=lr), trigger=chainer.training.triggers.ManualScheduleTrigger( *cooldown_interval)) # evaluator trainer.extend(chainermn.create_multi_node_evaluator( chainer.training.extensions.Evaluator( test_iter, model, converter=fcis.dataset.concat_examples, device=device), comm), trigger=test_interval) # logging if comm.rank == 0: snapshot_filename = '{}_model_iter_{{.updater.iteration}}.npz'.format( model.fcis.__class__.__name__) trainer.extend(chainer.training.extensions.snapshot_object( model.fcis, savefun=chainer.serializers.save_npz, filename=snapshot_filename), trigger=save_interval) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( chainer.training.extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(chainer.training.extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/fcis_loc_loss', 'main/fcis_cls_loss', 'main/fcis_mask_loss', 'main/rpn_acc', 'main/fcis_cls_acc', 'main/fcis_fg_acc', 'validation/main/rpn_acc', 'validation/main/fcis_cls_acc', 'validation/main/fcis_fg_acc', ]), trigger=print_interval) trainer.extend( chainer.training.extensions.ProgressBar(update_interval=10)) trainer.extend(chainer.training.extensions.dump_graph('main/loss')) trainer.run() if comm.rank == 0: print('log is saved in {}'.format(out))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=2) if comm.rank == 0: test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) if comm.rank == 0: trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('fpn', 'ssd300', 'ssd512'), default='fpn') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--data_dir', type=str, default='auto') parser.add_argument('--dataset', choices=['voc', 'coco'], default='voc') parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--init_scale', type=float, default=1e-2) parser.add_argument('--resume') args = parser.parse_args() if args.dataset == 'voc': train = ConcatenatedDataset( VOCBboxDataset( year='2007', split='trainval', data_dir=join(args.data_dir, 'VOCdevkit/VOC2007') if args.data_dir != 'auto' else args.data_dir), VOCBboxDataset( year='2012', split='trainval', data_dir=join(args.data_dir, 'VOCdevkit/VOC2012') if args.data_dir != 'auto' else args.data_dir)) test = VOCBboxDataset( year='2007', split='test', use_difficult=True, return_difficult=True, data_dir=join(args.data_dir, 'VOCdevkit/VOC2007') if args.data_dir != 'auto' else args.data_dir) label_names = voc_bbox_label_names elif args.dataset == 'coco': # todo: use train+valminusminival(=coco2017train) # https://github.com/chainer/chainercv/issues/651 train = COCOBboxDataset(data_dir=args.data_dir, split='train') test = COCOBboxDataset(data_dir=args.data_dir, split='val') label_names = coco_bbox_label_names if args.model == 'ssd300': model = SSD300( n_fg_class=len(label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512( n_fg_class=len(label_names), pretrained_model='imagenet') elif args.model == 'fpn': model = FPNSSD( n_fg_class=len(label_names), pretrained_model='imagenet', init_scale=args.init_scale) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( train, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend( DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=label_names), trigger=(10000, 'iteration')) log_interval = 100, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()