def main(): config, args = parse_args() model = get_model(config["model"]) if args.gpu != -1: model.to_gpu(args.gpu) dataset = VOCBboxDataset( data_dir="../dataset/VOC_test/VOC2007_test", year='2007', split='test', use_difficult=True, return_difficult=True) iterator = iterators.SerialIterator( dataset, args.batchsize, repeat=False, shuffle=False) imgs, pred_values, gt_values = apply_prediction_to_iterator( model.predict, iterator, hook=ProgressHook(len(dataset))) # delete unused iterator explicitly del imgs pred_bboxes, pred_labels, pred_scores = pred_values gt_bboxes, gt_labels, gt_difficults = gt_values result = eval_detection_voc( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) print() print('mAP: {:f}'.format(result['map'])) for l, name in enumerate(voc_bbox_label_names): if result['ap'][l]: print('{:s}: {:f}'.format(name, result['ap'][l])) else: print('{:s}: -'.format(name))
def setup(dataset, model, pretrained_model, batchsize): dataset_name = dataset if dataset_name == 'voc': dataset = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) label_names = voc_bbox_label_names def eval_(out_values, rest_values): pred_bboxes, pred_labels, pred_scores = out_values gt_bboxes, gt_labels, gt_difficults = rest_values result = eval_detection_voc(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) print() print('mAP: {:f}'.format(result['map'])) for l, name in enumerate(voc_bbox_label_names): if result['ap'][l]: print('{:s}: {:f}'.format(name, result['ap'][l])) else: print('{:s}: -'.format(name)) elif dataset_name == 'coco': dataset = COCOBboxDataset(year='2017', split='val', use_crowded=True, return_area=True, return_crowded=True) label_names = coco_bbox_label_names def eval_(out_values, rest_values): pred_bboxes, pred_labels, pred_scores = out_values gt_bboxes, gt_labels, gt_area, gt_crowded = rest_values result = eval_detection_coco(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_area, gt_crowded) print() for area in ('all', 'large', 'medium', 'small'): print( 'mmAP ({}):'.format(area), result[ 'map/iou=0.50:0.95/area={}/max_dets=100'.format(area)]) cls, pretrained_models, default_batchsize = models[model] if pretrained_model is None: pretrained_model = pretrained_models.get(dataset_name, dataset_name) model = cls(n_fg_class=len(label_names), pretrained_model=pretrained_model) if batchsize is None: batchsize = default_batchsize return dataset, eval_, model, batchsize
def main(): parser = argparse.ArgumentParser() parser.add_argument('data_dir', type=str, help='Path to the dirctory of COCO dataset.') parser.add_argument('dataset_type', type=str, choices=['COCO', 'VOC']) parser.add_argument('--split', type=str, default='val', choices=['train', 'val']) args = parser.parse_args() if args.dataset_type == 'COCO': dataset = COCOBboxDataset(args.data_dir, args.split) elif args.dataset_type == 'VOC': dataset = VOCBboxDataset(args.data_dir, split=args.split) else: raise ValueError() visualizer = Visualizer(args.dataset_type) for img, bbox, label in dataset: result = visualizer.visualize(img, ([bbox], [label])) cv2.imshow('output', result) key = cv2.waitKey(0) & 0xff if key == ord('q'): break cv2.destroyAllWindows()
def main(): dataset = VOCBboxDataset(year='2007', split='test') models = [ ('Faster R-CNN', FasterRCNNVGG16(pretrained_model='voc07')), ('SSD300', SSD300(pretrained_model='voc0712')), ('SSD512', SSD512(pretrained_model='voc0712')), ] indices = [29, 301, 189, 229] fig = plt.figure(figsize=(30, 30)) for i, idx in enumerate(indices): for j, (name, model) in enumerate(models): img, _, _ = dataset[idx] bboxes, labels, scores = model.predict([img]) bbox, label, score = bboxes[0], labels[0], scores[0] ax = fig.add_subplot( len(indices), len(models), i * len(models) + j + 1) vis_bbox( img, bbox, label, score, label_names=voc_bbox_label_names, ax=ax ) # Set MatplotLib parameters ax.set_aspect('equal') if i == 0: font = FontProperties() font.set_family('serif') ax.set_title(name, fontsize=35, y=1.03, fontproperties=font) plt.axis('off') plt.tight_layout() plt.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--pretrained-model') args = parser.parse_args() model = ResNet50(pretrained_model=args.pretrained_model, n_class=len(voc_bbox_label_names), arch='he') model.pick = 'fc6' if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() dataset = VOCBboxDataset(split='test', year='2007', use_difficult=False) dataset = TransformDataset(dataset, ('img', 'bbox'), bbox_to_multi_label) iterator = iterators.SerialIterator(dataset, 8, repeat=False, shuffle=False) in_values, out_values, rest_values = apply_to_iterator( PredictFunc(model, thresh=0), iterator, hook=ProgressHook(len(dataset))) # delete unused iterators explicitly del in_values pred_labels, pred_scores = out_values gt_labels, = rest_values result = eval_multi_label_classification(pred_labels, pred_scores, gt_labels) print() print('mAP: {:f}'.format(result['map'])) for l, name in enumerate(voc_bbox_label_names): if result['ap'][l]: print('{:s}: {:f}'.format(name, result['ap'][l])) else: print('{:s}: -'.format(name))
if len(argv) > 2: continuous = bool(argv[2]) else: gpu_id = 0 print('gpu_id is {}'.format(gpu_id)) SAVE_PATH = 'ssd300_model_vocall_trval_lrdrop_shadow.npz' print('save path is {}'.format(SAVE_PATH)) iters = 800000 + 1 batch_size = 8 model = SSD300(n_fg_class=21, pretrained_model='imagenet') model.to_gpu(gpu_id) train07 = VOCBboxDataset(data_dir='auto', year='2007', split='trainval', use_difficult=True, return_difficult=False) train12 = VOCBboxDataset(data_dir='auto', year='2012', split='trainval', use_difficult=True, return_difficult=False) train07_right = train07[len(train07) // 2:] train12_right = train12[len(train12) // 2:] train_right = ConcatenatedDataset(train07_right, train12_right) train = TransformDataset(train_right, _Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train,
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('multi_task_300', 'multi_task_512'), default='multi_task_300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--eval_step', type=int, nargs='*', default=[80000, 100000, 120000]) parser.add_argument('--lr_step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--snap_step', type=int, default=10000) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') # in experiments for real experiment parser.add_argument('--resume', type=str) parser.add_argument('--detection', action='store_true', default=False) parser.add_argument('--segmentation', action='store_true', default=False) parser.add_argument('--attention', action='store_true', default=False) parser.add_argument('--dataset', default='voc', type=str) parser.add_argument('--experiment', type=str, default='final_voc') parser.add_argument('--multitask_loss', action='store_true', default=False) parser.add_argument('--dynamic_loss', action='store_true', default=False) parser.add_argument('--log_interval', type=int, default=10) parser.add_argument('--debug', action='store_true', default=False) parser.add_argument('--update_split_interval', type=int, default=100) parser.add_argument( '--loss_split', type=float, default=0.5 ) # in fact for detection, other task(segmentation) is 1-loss_split args = parser.parse_args() snap_step = args.snap_step args.snap_step = [] for step in range(snap_step, args.iteration + 1, snap_step): args.snap_step.append(step) # redefine the output path import os import time args.out = os.path.join(args.out, args.experiment, time.strftime("%Y%m%d_%H%M%S", time.localtime())) if args.model == 'multi_task_300': model = Multi_task_300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet', detection=args.detection, segmentation=args.segmentation, attention=args.attention) elif args.model == 'multi_task_512': model = Multi_task_512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet', detection=args.detection, segmentation=args.segmentation, attention=args.attention) model.use_preset('evaluate') if not (args.segmentation or args.detection): raise RuntimeError train_chain = MultiboxTrainChain(model, gpu=args.gpu >= 0, use_multi_task_loss=args.multitask_loss, loss_split=args.loss_split) train_chain.cleargrads() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( Multi_task_VOC(voc_experiments[args.experiment][args.experiment + '_train']), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator( train, batch_size=args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) test_mask = VOCSemanticSegmentationDataset(split='val') test_mask_iter = chainer.iterators.SerialIterator(test_mask, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) # optimizer.add_hook(GradientClipping(0.1)) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) '''if args.resume: serializers.load_npz(args.resume, trainer)''' trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger( args.lr_step, 'iteration')) if args.dataset == 'voc': use_07 = True label_names = voc_bbox_label_names elif args.dataset == 'coco': label_names = coco_bbox_label_names if args.detection and not args.debug: trainer.extend(MultitaskEvaluator(test_iter, model, args.dataset, use_07, label_names=label_names), trigger=triggers.ManualScheduleTrigger( args.eval_step + [args.iteration], 'iteration')) if args.segmentation and not args.debug: trainer.extend(MultitaskEvaluator(test_mask_iter, model, dataset=args.dataset, label_names=label_names, detection=False), trigger=triggers.ManualScheduleTrigger( args.eval_step + [args.iteration], 'iteration')) log_interval = args.log_interval, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) if args.segmentation and args.detection and args.dynamic_loss: trainer.extend( loss_split.LossSplit(trigger=(args.update_split_interval, 'iteration'))) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/mask', 'main/loss/loc', 'main/loss/conf', 'main/loss/split' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.snap_step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=triggers.ManualScheduleTrigger( args.snap_step + [args.iteration], 'iteration')) if args.resume: if 'model' in args.resume: serializers.load_npz(args.resume, model) else: serializers.load_npz(args.resume, trainer) print(args) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer YOLOv3 VOC Train') parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--iteration', '-i', type=int, default=50200) parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[]) parser.add_argument('--out', '-o', default='yolov3-voc-result') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--display_interval', type=int, default=100) parser.add_argument('--snapshot_interval', type=int, default=100) parser.add_argument('--ignore_thresh', type=float, default=0.5) parser.add_argument('--thresh', type=float, default=0.4) parser.add_argument('--darknet', default='') parser.add_argument('--validation_size', type=int, default=32) args = parser.parse_args() print('GPUs: {}'.format(args.gpus)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# iteration: {}'.format(args.iteration)) print('') random.seed(args.seed) np.random.seed(args.seed) base = None if len(args.darknet) > 0: darknet53 = Darknet53(20) serializers.load_npz(args.darknet, darknet53) base = darknet53.base yolov3 = YOLOv3(20, base, ignore_thresh=args.ignore_thresh) model = YOLOv3Loss(yolov3) device = -1 if len(args.gpus) > 0: device = args.gpus[0] cuda.cupy.random.seed(args.seed) cuda.get_device_from_id(args.gpus[0]).use() if len(args.gpus) == 1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.WeightDecay(0.0005), 'hook_decay') optimizer.add_hook(optimizer_hooks.GradientClipping(10.0), 'hook_grad_clip') train = VOCBboxDataset(split='train') test = VOCBboxDataset(split='val') train = YOLOVOCDataset(train, classifier=False, jitter=0.3, hue=0.1, sat=1.5, val=1.5) #train = train[np.arange(args.batchsize)] test = YOLOVOCDataset(test, classifier=False) test = test[np.random.permutation(np.arange(len(test)))[:min(args.validation_size, len(test))]] train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if len(args.gpus) <= 1: updater = training.StandardUpdater( train_iter, optimizer, converter=concat_yolo, device=device) else: devices = {'main': args.gpus[0]} for gpu in args.gpus[1:]: devices['gpu{}'.format(gpu)] = gpu updater = training.ParallelUpdater( train_iter, optimizer, converter=concat_yolo, devices=devices) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) display_interval = (args.display_interval, 'iteration') snapshot_interval = (args.snapshot_interval, 'iteration') trainer.extend(extensions.Evaluator( test_iter, model, converter=concat_yolo, device=device), trigger=display_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=display_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'iteration', display_interval, file_name='loss.png')) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'validation/main/loss', 'elapsed_time']), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_snapshot.npz'), trigger=training.triggers.MinValueTrigger( 'validation/main/loss', snapshot_interval)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_final.npz'), trigger=snapshot_interval) trainer.extend(DarknetShift( optimizer, 'steps', args.iteration, burn_in=1000, steps=[args.iteration-10200,args.iteration-5200], scales=[0.1,0.1] )) trainer.extend(CropSizeUpdater(train, [(10+i)*32 for i in range(0,5)], args.iteration - 200)) detector = YOLOv3Predictor(yolov3, thresh=args.thresh) class_names = load_list('./data/voc.names') trainer.extend(YOLODetection( detector, ['./data/image/dog.jpg'], class_names, size=(416, 416) ,thresh=args.thresh, trigger=display_interval, device=device )) trainer.run()
hm_mae = F.mean_absolute_error(hm, indata["hm"]) reporter.report( { 'loss': loss, 'hm_loss': hm_loss, 'hm_pos_loss': detail_losses['hm_pos_loss'], 'hm_neg_loss': detail_losses['hm_neg_loss'], 'hm_mae': hm_mae, 'wh_loss': wh_loss, 'offset_loss': offset_loss }, self) return loss if __name__ == '__main__': from centernet.datasets.transforms import CenterDetectionTransform from chainercv.datasets import VOCBboxDataset from chainer.datasets import TransformDataset from chainer.dataset import concat_examples from centernet.models.networks.hourglass import HourglassNet center_detection_transform = CenterDetectionTransform(512, 5, 4) train = VOCBboxDataset(year='2012', split='trainval') x = concat_examples([train[0]]) print(x[0].shape) detector = CenterDetector(HourglassNet, 512, 5) print(detector.predict(x[0]))
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--dataset', choices=('voc07', 'voc0712'), help='The dataset to use: VOC07, VOC07+12', default='voc07') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) if args.dataset == 'voc07': train_data = VOCBboxDataset(split='trainval', year='2007') elif args.dataset == 'voc0712': train_data = ConcatenatedDataset( VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')) test_data = VOCBboxDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, Transform(faster_rcnn)) train_iter = chainer.iterators.MultiprocessIterator(train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator(test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) trainer.extend(extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(DetectionVOCEvaluator(test_iter, model.faster_rcnn, use_07_metric=True, label_names=voc_bbox_label_names), trigger=ManualScheduleTrigger( [args.step_size, args.iteration], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('faster_rcnn', 'ssd300', 'ssd512', 'yolo_v3'), default='ssd300') parser.add_argument('--pretrained_model') parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=32) args = parser.parse_args() if args.model == 'faster_rcnn': if args.pretrained_model: model = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names), pretrained_model=args.pretrained_model) else: model = FasterRCNNVGG16(pretrained_model='voc07') elif args.model == 'ssd300': if args.pretrained_model: model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model=args.pretrained_model) else: model = SSD300(pretrained_model='voc0712') elif args.model == 'ssd512': if args.pretrained_model: model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model=args.pretrained_model) else: model = SSD512(pretrained_model='voc0712') elif args.model == 'yolo_v3': if args.pretrained_model: model = YOLOv3(n_fg_class=len(voc_bbox_label_names), pretrained_model=args.pretrained_model) else: model = YOLOv3(pretrained_model='voc0712') if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() model.use_preset('evaluate') dataset = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) iterator = iterators.SerialIterator(dataset, args.batchsize, repeat=False, shuffle=False) in_values, out_values, rest_values = apply_to_iterator(model.predict, iterator, hook=ProgressHook( len(dataset))) # delete unused iterators explicitly del in_values pred_bboxes, pred_labels, pred_scores = out_values gt_bboxes, gt_labels, gt_difficults = rest_values result = eval_detection_voc(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) print() print('mAP: {:f}'.format(result['map'])) for l, name in enumerate(voc_bbox_label_names): if result['ap'][l]: print('{:s}: {:f}'.format(name, result['ap'][l])) else: print('{:s}: -'.format(name))
import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from chainercv.datasets import VOCBboxDataset from chainercv.datasets import voc_bbox_label_names from chainercv.visualizations import vis_bbox import torch from torchnet.meter import AverageValueMeter, MovingAverageValueMeter from model.faster_rcnn import faster_rcnn from model.utils.transform_tools import image_normalize train_dataset = VOCBboxDataset(year='2007', split='train') val_dataset = VOCBboxDataset(year='2007', split='val') trainval_dataset = VOCBboxDataset(year='2007', split='trainval') test_dataset = VOCBboxDataset(year='2007', split='test') def adjust_learning_rate(optimizer, epoch, init_lr, lr_decay_factor=0.1, lr_decay_epoch=10): """Sets the learning rate to the initial LR decayed by lr_decay_factor every lr_decay_epoch epochs""" if epoch % lr_decay_epoch == 0: lr = init_lr * (lr_decay_factor**(epoch // lr_decay_epoch)) print('LR is set to {}'.format(lr)) for param_group in optimizer.param_groups: param_group['lr'] = lr
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=2) parser.add_argument('--epoch', type=int, default=10) parser.add_argument('--mini', action="store_true") parser.add_argument('--input_size', type=int, default=512) args = parser.parse_args() dtype = np.float32 num_class = len(voc_bbox_label_names) data_augmentation_transform = DataAugmentationTransform(args.input_size) center_detection_transform = CenterDetectionTransform(args.input_size, num_class, 4, dtype=dtype) train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), data_augmentation_transform) train = TransformDataset(train, center_detection_transform) if args.mini: train = datasets.SubDataset(train, 0, 100) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) if args.mini: test = datasets.SubDataset(test, 0, 20) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) detector = CenterDetector(HourglassNet, args.input_size, num_class, dtype=dtype) #detector = CenterDetector(SimpleCNN, args.input_size, num_class) train_chain = CenterDetectorTrain(detector, 1, 0.1, 1) #train_chain = CenterDetectorTrain(detector, 1, 0, 0) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() train_chain.to_gpu(args.gpu) optimizer = Adam(alpha=1.25e-4) #optimizer = SGD() optimizer.setup(train_chain) updater = StandardUpdater(train_iter, optimizer, device=args.gpu) log_interval = 1, 'epoch' log_interval_mini = 500, 'iteration' trainer = Trainer(updater, (args.epoch, 'epoch'), out=f"result{args.gpu}") trainer.extend(extensions.LogReport(trigger=log_interval_mini)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/hm_loss', 'main/wh_loss', 'main/offset_loss', 'main/hm_mae', 'main/hm_pos_loss', 'main/hm_neg_loss', 'validation/main/map', ]), trigger=log_interval_mini) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(DetectionVOCEvaluator(test_iter, detector, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( detector, 'detector{.updater.epoch:03}.npz'), trigger=(1, 'epoch')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('multi_task_300', 'multi_task_512'), default='multi_task_300') parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--model_path', type=str) parser.add_argument('--detection', action='store_true', default=False) parser.add_argument('--segmentation', action='store_true', default=False) parser.add_argument('--attention', action='store_true', default=False) parser.add_argument('--dataset', default='voc', type=str) parser.add_argument('--eval_seg', default=False, action='store_true') parser.add_argument('--eval_det', default=False, action='store_true') parser.add_argument('--batchsize', type=int, default=32) args = parser.parse_args() print(args) if not (args.segmentation or args.detection): raise RuntimeError if not args.model_path: raise RuntimeError if args.model == 'multi_task_300': model = Multi_task_300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet', detection=args.detection, segmentation=args.segmentation, attention=args.attention) elif args.model == 'multi_task_512': model = Multi_task_512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet', detection=args.detection, segmentation=args.segmentation, attention=args.attention) model.use_preset('evaluate') if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() if args.dataset == 'voc': use_07 = True label_names = voc_bbox_label_names elif args.dataset == 'coco': label_names = coco_bbox_label_names if args.model_path: serializers.load_npz(args.model_path, model) if args.detection and args.eval_det: test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) det_evaluator = MultitaskEvaluator(test_iter, model, use_07_metric=use_07, label_names=label_names, detection=True) result = det_evaluator() print('detection result') print(result) if args.segmentation and args.eval_seg: test_mask = VOCSemanticSegmentationDataset(split='val') test_mask_iter = chainer.iterators.SerialIterator(test_mask, args.batchsize, repeat=False, shuffle=False) seg_evaluator = MultitaskEvaluator(test_mask_iter, model, use_07_metric=use_07, label_names=label_names, detection=False) result_mask = seg_evaluator() print('segmentation result') print(result_mask)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--np', type=int, default=8) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--dtype', type=str, choices=dtypes.keys(), default='float32', help='Select the data type of the model') parser.add_argument('--model-dir', default=None, type=str, help='Where to store models') parser.add_argument('--dataset-dir', default=None, type=str, help='Where to store datasets') parser.add_argument('--dynamic-interval', default=None, type=int, help='Interval for dynamic loss scaling') parser.add_argument('--init-scale', default=1, type=float, help='Initial scale for ada loss') parser.add_argument('--loss-scale-method', default='approx_range', type=str, help='Method for adaptive loss scaling') parser.add_argument('--scale-upper-bound', default=16, type=float, help='Hard upper bound for each scale factor') parser.add_argument('--accum-upper-bound', default=1024, type=float, help='Accumulated upper bound for all scale factors') parser.add_argument('--update-per-n-iteration', default=1, type=int, help='Update the loss scale value per n iteration') parser.add_argument('--snapshot-per-n-iteration', default=10000, type=int, help='The frequency of taking snapshots') parser.add_argument('--n-uf', default=1e-3, type=float) parser.add_argument('--nosanity-check', default=False, action='store_true') parser.add_argument('--nouse-fp32-update', default=False, action='store_true') parser.add_argument('--profiling', default=False, action='store_true') parser.add_argument('--verbose', action='store_true', default=False, help='Verbose output') args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank # Set up workspace # 12 GB GPU RAM for workspace chainer.cuda.set_max_workspace_size(16 * 1024 * 1024 * 1024) chainer.global_config.cv_resize_backend = 'cv2' # Setup the data type # when initializing models as follows, their data types will be casted. # Weethave to forbid the usage of cudnn if args.dtype != 'float32': chainer.global_config.use_cudnn = 'never' chainer.global_config.dtype = dtypes[args.dtype] print('==> Setting the data type to {}'.format(args.dtype)) if args.model_dir is not None: chainer.dataset.set_dataset_root(args.model_dir) if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') ###################################### # Setup model ####################################### # Apply ada loss transform recorder = AdaLossRecorder(sample_per_n_iter=100) profiler = Profiler() sanity_checker = SanityChecker( check_per_n_iter=100) if not args.nosanity_check else None # Update the model to support AdaLoss # TODO: refactorize model_ = AdaLossScaled( model, init_scale=args.init_scale, cfg={ 'loss_scale_method': args.loss_scale_method, 'scale_upper_bound': args.scale_upper_bound, 'accum_upper_bound': args.accum_upper_bound, 'update_per_n_iteration': args.update_per_n_iteration, 'recorder': recorder, 'profiler': profiler, 'sanity_checker': sanity_checker, 'n_uf_threshold': args.n_uf, # 'power_of_two': False, }, transforms=[ AdaLossTransformLinear(), AdaLossTransformConvolution2D(), ], verbose=args.verbose) if comm.rank == 0: print(model) train_chain = MultiboxTrainChain(model_, comm=comm) chainer.cuda.get_device_from_id(device).use() # to GPU model.coder.to_gpu() model.extractor.to_gpu() model.multibox.to_gpu() shared_mem = 100 * 1000 * 1000 * 4 if args.dataset_dir is not None: chainer.dataset.set_dataset_root(args.dataset_dir) train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean, dtype=dtypes[args.dtype])) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=8, n_prefetch=2, shared_mem=shared_mem) if comm.rank == 0: # NOTE: only performed on the first device test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) if args.dtype == 'mixed16': if not args.nouse_fp32_update: print('==> Using FP32 update for dtype=mixed16') optimizer.use_fp32_update() # by default use fp32 update # HACK: support skipping update by existing loss scaling functionality if args.dynamic_interval is not None: optimizer.loss_scaling(interval=args.dynamic_interval, scale=None) else: optimizer.loss_scaling(interval=float('inf'), scale=None) optimizer._loss_scale_max = 1.0 # to prevent actual loss scaling optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) # if args.dtype == 'mixed16': # updater.loss_scale = 8 iteration_interval = (args.iteration, 'iteration') trainer = training.Trainer(updater, iteration_interval, args.out) # trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), # trigger=triggers.ManualScheduleTrigger( # args.step, 'iteration')) if args.batchsize != 32: warmup_attr_ratio = 0.1 # NOTE: this is confusing but it means n_iter warmup_n_epoch = 1000 lr_shift = chainerlp.extensions.ExponentialShift( 'lr', 0.1, init=args.lr * warmup_attr_ratio, warmup_attr_ratio=warmup_attr_ratio, warmup_n_epoch=warmup_n_epoch, schedule=args.step) trainer.extend(lr_shift, trigger=(1, 'iteration')) if comm.rank == 0: if not args.profiling: trainer.extend(DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.observe_value( 'loss_scale', lambda trainer: trainer.updater.get_optimizer('main')._loss_scale), trigger=log_interval) metrics = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] if args.dynamic_interval is not None: metrics.insert(2, 'loss_scale') trainer.extend(extensions.PrintReport(metrics), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(args.snapshot_per_n_iteration, 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) hook = AdaLossMonitor(sample_per_n_iter=100, verbose=args.verbose, includes=['Grad', 'Deconvolution']) recorder.trainer = trainer hook.trainer = trainer with ExitStack() as stack: if comm.rank == 0: stack.enter_context(hook) trainer.run() # store recorded results if comm.rank == 0: # NOTE: only export in the first rank recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv')) profiler.export().to_csv(os.path.join(args.out, 'profile.csv')) if sanity_checker: sanity_checker.export().to_csv( os.path.join(args.out, 'sanity_check.csv')) hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
def __init__(self, random=None): self.voc = VOCBboxDataset(year='2012', split='trainval').slice[:, 'img'] self.random = random
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=4) parser.add_argument('--epoch', type=int, default=10) parser.add_argument('--mini', action="store_true") args = parser.parse_args() if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator('pure_nccl') print(comm.size) device = comm.intra_rank num_class = len(voc_bbox_label_names) data_augmentation_transform = DataAugmentationTransform(512) center_detection_transform = CenterDetectionTransform(512, num_class, 4) train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), data_augmentation_transform) if comm.rank == 0: train = TransformDataset(train, center_detection_transform) if args.mini: train = datasets.SubDataset(train, 0, 100) else: train = None train = chainermn.scatter_dataset(train, comm, shuffle=True) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=2) if comm.rank == 0: test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) if args.mini: test = datasets.SubDataset(test, 0, 20) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) detector = CenterDetector(HourglassNet, 512, num_class) train_chain = CenterDetectorTrain(detector, 1, 0.1, 1, comm=comm) chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() optimizer = chainermn.create_multi_node_optimizer(Adam(amsgrad=True), comm) optimizer.setup(train_chain) updater = StandardUpdater(train_iter, optimizer, device=device) trainer = Trainer(updater, (args.epoch, 'epoch')) if comm.rank == 0: log_interval = 1, 'epoch' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/hm_loss', 'main/wh_loss', 'main/offset_loss', 'validation/main/map', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(DetectionVOCEvaluator(test_iter, detector, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( detector, 'detector{.updator.epoch:03}.npz'), trigger=(1, 'epoch')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpus', type=str, default="-1") parser.add_argument('--batchsize', type=int, default=2) parser.add_argument('--epoch', type=int, default=10) parser.add_argument('--mini', action="store_true") args = parser.parse_args() gpus = list(filter(lambda x: x >= 0, map(int, args.gpus.split(",")))) num_class = len(voc_bbox_label_names) data_augmentation_transform = DataAugmentationTransform(512) center_detection_transform = CenterDetectionTransform(512, num_class, 4) train = TransformDataset( ConcatenatedDataset( VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval') ), data_augmentation_transform ) train = TransformDataset(train, center_detection_transform) if args.mini: train = datasets.SubDataset(train, 0, 100) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCBboxDataset( year='2007', split='test', use_difficult=True, return_difficult=True) if args.mini: test = datasets.SubDataset(test, 0, 20) test_iter = chainer.iterators.SerialIterator( test, args.batchsize // len(gpus), repeat=False, shuffle=False) detector = CenterDetector(HourglassNet, 512, num_class) train_chain = CenterDetectorTrain(detector, 1, 0.1, 1) gpus.sort() first_gpu = gpus[0] remain_gpu = gpus[1:] train_chain.to_gpu(first_gpu) optimizer = Adam(amsgrad=True) optimizer.setup(train_chain) devices = { "main": first_gpu } for i, gpu in enumerate(remain_gpu): devices[f"{i + 2}"] = gpu updater = training.updaters.ParallelUpdater( train_iter, optimizer, devices=devices, ) log_interval = 1, 'epoch' trainer = Trainer(updater, (args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/hm_loss', 'main/wh_loss', 'main/offset_loss', 'validation/main/map', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend( DetectionVOCEvaluator( test_iter, detector, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(1, 'epoch')) trainer.extend( extensions.snapshot_object(detector, 'detector{.updater.epoch:03}.npz'), trigger=(1, 'epoch') ) trainer.run()
def main(): parser = argparse.ArgumentParser( description='Chainer Multi-label classification') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--batchsize', '-b', type=int, default=4, help='Number of images in each mini-batch') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') args = parser.parse_args() model = get_resnet_50(len(voc_bbox_label_names)) model.pick = 'fc6' train_chain = MultiLabelClassifier(model, loss_scale=len(voc_bbox_label_names)) train = VOCBboxDataset(year='2007', split='trainval', use_difficult=False) train = TransformDataset(train, ('img', 'bbox'), bbox_to_multi_label) test = VOCBboxDataset(year='2007', split='test', use_difficult=False) test = TransformDataset(test, ('img', 'bbox'), bbox_to_multi_label) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() train_chain.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(0.001) optimizer.setup(train_chain) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(1e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (11, 'epoch') log_interval = (20, 'iteration') updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, stop_trigger, out=args.out) trainer.extend( extensions.Evaluator(test_iter, train_chain, device=args.gpu, converter=converter)) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=triggers.ManualScheduleTrigger([8, 10], 'epoch')) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'lr', 'epoch', 'elapsed_time', 'main/loss', 'main/recall', 'main/precision', 'main/n_pred', 'main/n_pos', 'validation/main/loss', 'validation/main/recall', 'validation/main/precision', 'validation/main/n_pred', 'validation/main/n_pos', ]), trigger=log_interval) trainer.extend(extensions.snapshot_object(model, 'snapshot_model.npz')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def setUp(self): self.dataset = VOCBboxDataset(split=self.split, year=self.year, use_difficult=self.use_difficult, return_difficult=self.return_difficult) self.n_out = 4 if self.return_difficult else 3
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--dtype', type=str, choices=dtypes.keys(), default='float32', help='Select the data type of the model') parser.add_argument('--model-dir', default=None, type=str, help='Where to store models') parser.add_argument('--dataset-dir', default=None, type=str, help='Where to store datasets') parser.add_argument('--dynamic-interval', default=None, type=int, help='Interval for dynamic loss scaling') parser.add_argument('--init-scale', default=1, type=float, help='Initial scale for ada loss') parser.add_argument('--loss-scale-method', default='approx_range', type=str, help='Method for adaptive loss scaling') parser.add_argument('--scale-upper-bound', default=32800, type=float, help='Hard upper bound for each scale factor') parser.add_argument('--accum-upper-bound', default=32800, type=float, help='Accumulated upper bound for all scale factors') parser.add_argument('--update-per-n-iteration', default=100, type=int, help='Update the loss scale value per n iteration') parser.add_argument('--snapshot-per-n-iteration', default=10000, type=int, help='The frequency of taking snapshots') parser.add_argument('--n-uf', default=1e-3, type=float) parser.add_argument('--nosanity-check', default=False, action='store_true') parser.add_argument('--nouse-fp32-update', default=False, action='store_true') parser.add_argument('--profiling', default=False, action='store_true') parser.add_argument('--verbose', action='store_true', default=False, help='Verbose output') args = parser.parse_args() # Setting data types if args.dtype != 'float32': chainer.global_config.use_cudnn = 'never' chainer.global_config.dtype = dtypes[args.dtype] print('==> Setting the data type to {}'.format(args.dtype)) # Initialize model if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') # Apply adaptive loss scaling recorder = AdaLossRecorder(sample_per_n_iter=100) profiler = Profiler() sanity_checker = SanityChecker(check_per_n_iter=100) if not args.nosanity_check else None # Update the model to support AdaLoss # TODO: refactorize model_ = AdaLossScaled( model, init_scale=args.init_scale, cfg={ 'loss_scale_method': args.loss_scale_method, 'scale_upper_bound': args.scale_upper_bound, 'accum_upper_bound': args.accum_upper_bound, 'update_per_n_iteration': args.update_per_n_iteration, 'recorder': recorder, 'profiler': profiler, 'sanity_checker': sanity_checker, 'n_uf_threshold': args.n_uf, }, transforms=[ AdaLossTransformLinear(), AdaLossTransformConvolution2D(), ], verbose=args.verbose) # Finalize the model train_chain = MultiboxTrainChain(model_) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() cp.random.seed(0) # NOTE: we have to transfer modules explicitly to GPU model.coder.to_gpu() model.extractor.to_gpu() model.multibox.to_gpu() # Prepare dataset if args.model_dir is not None: chainer.dataset.set_dataset_root(args.model_dir) train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean, dtype=dtypes[args.dtype])) # train_iter = chainer.iterators.MultiprocessIterator( # train, args.batchsize) # , n_processes=8, n_prefetch=2) train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize) # train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() if args.dtype == 'mixed16': if not args.nouse_fp32_update: print('==> Using FP32 update for dtype=mixed16') optimizer.use_fp32_update() # by default use fp32 update # HACK: support skipping update by existing loss scaling functionality if args.dynamic_interval is not None: optimizer.loss_scaling(interval=args.dynamic_interval, scale=None) else: optimizer.loss_scaling(interval=float('inf'), scale=None) optimizer._loss_scale_max = 1.0 # to prevent actual loss scaling optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.observe_value( 'loss_scale', lambda trainer: trainer.updater.get_optimizer('main')._loss_scale), trigger=log_interval) metrics = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] if args.dynamic_interval is not None: metrics.insert(2, 'loss_scale') trainer.extend(extensions.PrintReport(metrics), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) hook = AdaLossMonitor(sample_per_n_iter=100, verbose=args.verbose, includes=['Grad', 'Deconvolution']) recorder.trainer = trainer hook.trainer = trainer with ExitStack() as stack: stack.enter_context(hook) trainer.run() recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv')) profiler.export().to_csv(os.path.join(args.out, 'profile.csv')) if sanity_checker: sanity_checker.export().to_csv(os.path.join(args.out, 'sanity_check.csv')) hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer Darknet53 Train') parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--iteration', '-i', type=int, default=100000) parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[]) parser.add_argument('--out', '-o', default='darknet53-voc-result') parser.add_argument('--seed', default=0) parser.add_argument('--display_interval', type=int, default=100) parser.add_argument('--snapshot_interval', type=int, default=100) parser.add_argument('--validation_size', type=int, default=2048) args = parser.parse_args() print('GPUs: {}'.format(args.gpus)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# iteration: {}'.format(args.iteration)) print('') random.seed(args.seed) np.random.seed(args.seed) darknet53 = Darknet53(20) model = L.Classifier(darknet53) device = -1 if len(args.gpus) > 0: device = args.gpus[0] cuda.cupy.random.seed(args.seed) cuda.get_device_from_id(args.gpus[0]).use() if len(args.gpus) == 1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0005), 'hook_decay') train = VOCBboxDataset(split='train') test = VOCBboxDataset(split='val') train = YOLOVOCDataset(train, classifier=True, jitter=0.2, hue=0.1, sat=.75, val=.75) test = YOLOVOCDataset(test, classifier=True, crop_size=(256, 256)) test = test[np.random.permutation(np.arange( len(test)))[:min(args.validation_size, len(test))]] train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if len(args.gpus) <= 1: updater = training.StandardUpdater(train_iter, optimizer, device=device) else: devices = {'main': args.gpus[0]} for gpu in args.gpus[1:]: devices['gpu{}'.format(gpu)] = gpu updater = training.ParallelUpdater(train_iter, optimizer, devices=devices) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) display_interval = (args.display_interval, 'iteration') snapshot_interval = (args.snapshot_interval, 'iteration') trainer.extend(extensions.Evaluator(test_iter, model, device=device), trigger=display_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=display_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'iteration', display_interval, file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'iteration', display_interval, file_name='accuracy.png')) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=5)) trainer.extend(extensions.snapshot_object(darknet53, 'darknet53_snapshot.npz'), trigger=training.triggers.MinValueTrigger( 'validation/main/loss', snapshot_interval)) trainer.extend(extensions.snapshot_object(darknet53, 'darknet53_final.npz'), trigger=snapshot_interval) trainer.extend(DarknetShift(optimizer, 'poly', args.iteration)) trainer.extend(CropSizeUpdater(train, [(4 + i) * 32 for i in range(0, 11)])) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=2) if comm.rank == 0: test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) if comm.rank == 0: trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('fpn', 'ssd300', 'ssd512'), default='fpn') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--data_dir', type=str, default='auto') parser.add_argument('--dataset', choices=['voc', 'coco'], default='voc') parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--init_scale', type=float, default=1e-2) parser.add_argument('--resume') args = parser.parse_args() if args.dataset == 'voc': train = ConcatenatedDataset( VOCBboxDataset( year='2007', split='trainval', data_dir=join(args.data_dir, 'VOCdevkit/VOC2007') if args.data_dir != 'auto' else args.data_dir), VOCBboxDataset( year='2012', split='trainval', data_dir=join(args.data_dir, 'VOCdevkit/VOC2012') if args.data_dir != 'auto' else args.data_dir)) test = VOCBboxDataset( year='2007', split='test', use_difficult=True, return_difficult=True, data_dir=join(args.data_dir, 'VOCdevkit/VOC2007') if args.data_dir != 'auto' else args.data_dir) label_names = voc_bbox_label_names elif args.dataset == 'coco': # todo: use train+valminusminival(=coco2017train) # https://github.com/chainer/chainercv/issues/651 train = COCOBboxDataset(data_dir=args.data_dir, split='train') test = COCOBboxDataset(data_dir=args.data_dir, split='val') label_names = coco_bbox_label_names if args.model == 'ssd300': model = SSD300( n_fg_class=len(label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512( n_fg_class=len(label_names), pretrained_model='imagenet') elif args.model == 'fpn': model = FPNSSD( n_fg_class=len(label_names), pretrained_model='imagenet', init_scale=args.init_scale) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( train, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend( DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=label_names), trigger=(10000, 'iteration')) log_interval = 100, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()