def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('fpn', 'ssd300', 'ssd512'), default='fpn') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--data_dir', type=str, default='auto') parser.add_argument('--dataset', choices=['voc', 'coco'], default='voc') parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--init_scale', type=float, default=1e-2) parser.add_argument('--resume') args = parser.parse_args() if args.dataset == 'voc': train = ConcatenatedDataset( VOCBboxDataset( year='2007', split='trainval', data_dir=join(args.data_dir, 'VOCdevkit/VOC2007') if args.data_dir != 'auto' else args.data_dir), VOCBboxDataset( year='2012', split='trainval', data_dir=join(args.data_dir, 'VOCdevkit/VOC2012') if args.data_dir != 'auto' else args.data_dir)) test = VOCBboxDataset( year='2007', split='test', use_difficult=True, return_difficult=True, data_dir=join(args.data_dir, 'VOCdevkit/VOC2007') if args.data_dir != 'auto' else args.data_dir) label_names = voc_bbox_label_names elif args.dataset == 'coco': # todo: use train+valminusminival(=coco2017train) # https://github.com/chainer/chainercv/issues/651 train = COCOBboxDataset(data_dir=args.data_dir, split='train') test = COCOBboxDataset(data_dir=args.data_dir, split='val') label_names = coco_bbox_label_names if args.model == 'ssd300': model = SSD300( n_fg_class=len(label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512( n_fg_class=len(label_names), pretrained_model='imagenet') elif args.model == 'fpn': model = FPNSSD( n_fg_class=len(label_names), pretrained_model='imagenet', init_scale=args.init_scale) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( train, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend( DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=label_names), trigger=(10000, 'iteration')) log_interval = 100, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def train(args, train_data, test_data, evaluator_type): required_args = [ 'dataset', 'class_names', 'logs_dir', 'min_size', 'max_size', 'anchor_scales', 'ratios', ] for arg_key in required_args: if not hasattr(args, arg_key): raise ValueError( 'args must contain required key: {}'.format(arg_key)) assert evaluator_type in ['voc', 'coco'], \ 'Unsupported evaluator_type: {}'.format(evaluator_type) if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: if args.gpu is None: print( 'Option --gpu is required without --multi-node.', file=sys.stderr, ) sys.exit(1) args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() # args.out = osp.join(args.logs_dir, now.strftime('%Y%m%d_%H%M%S')) args.out = '../../../result/omg_instance_segmentation/mask_rnn_log/' + now.strftime( '%Y%m%d_%H%M%S') args.batch_size = args.batch_size_per_gpu * args.n_gpu # lr: 0.00125 * 8 = 0.01 in original # args.lr = 0.00125 * args.batch_size args.lr = 0.00125 args.weight_decay = 0.0001 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [ (120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch, ] random.seed(args.seed) np.random.seed(args.seed) if args.pooling_func == 'align': pooling_func = cmr.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = cmr.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = cmr.functions.crop_and_resize else: raise ValueError('Unsupported pooling_func: {}'.format( args.pooling_func)) if args.initializer == 'normal': mask_initialW = chainer.initializers.Normal(0.01) elif args.initializer == 'he_normal': mask_initialW = chainer.initializers.HeNormal(fan_option='fan_out') else: raise ValueError('Unsupported initializer: {}'.format( args.initializer)) if args.model == 'vgg16': mask_rcnn = cmr.models.MaskRCNNVGG16( # n_fg_class=len(args.class_names), n_fg_class=1, pretrained_model='imagenet', pooling_func=pooling_func, # ratios=(0.5, 1, 2), ratios=args.ratios, anchor_scales=args.anchor_scales, roi_size=args.roi_size, min_size=args.min_size, max_size=args.max_size, mask_initialW=mask_initialW, ) elif args.model in ['resnet50', 'resnet101']: n_layers = int(args.model.lstrip('resnet')) mask_rcnn = cmr.models.MaskRCNNResNet( n_layers=n_layers, # n_fg_class=len(args.class_names), n_fg_class=1, pooling_func=pooling_func, anchor_scales=args.anchor_scales, roi_size=args.roi_size, min_size=args.min_size, max_size=args.max_size, mask_initialW=mask_initialW, ) else: raise ValueError('Unsupported model: {}'.format(args.model)) model = cmr.models.MaskRCNNTrainChain(mask_rcnn) if args.multi_node or args.gpu >= 0: model.to_gpu() # print(model) optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) if args.model in ['resnet50', 'resnet101']: # ResNetExtractor.freeze_at is not enough to freeze params # since WeightDecay updates the param little by little. mask_rcnn.extractor.conv1.disable_update() mask_rcnn.extractor.bn1.disable_update() mask_rcnn.extractor.res2.disable_update() for link in mask_rcnn.links(): if isinstance(link, cmr.links.AffineChannel2D): link.disable_update() train_data = chainer.datasets.TransformDataset( train_data, cmr.datasets.MaskRCNNTransform(mask_rcnn), ) test_data = chainer.datasets.TransformDataset( test_data, cmr.datasets.MaskRCNNTransform(mask_rcnn, train=False), ) if args.multi_node: if comm.rank != 0: train_data = None test_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) test_data = chainermn.scatter_dataset(test_data, comm) # FIXME: MultiProcessIterator sometimes hangs train_iter = chainer.iterators.SerialIterator( train_data, batch_size=args.batch_size_per_gpu, ) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=args.batch_size_per_gpu, repeat=False, shuffle=False, ) converter = functools.partial( cmr.datasets.concat_examples, padding=0, # img, bboxes, labels, masks, scales indices_concat=[0, 2, 3, 4], # img, _, labels, masks, scales indices_to_device=[0, 1], # img, bbox ) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=device, converter=converter, ) trainer = training.Trainer( updater, (args.max_epoch, 'epoch'), out=args.out, ) trainer.extend( extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch', ), ) if args.resume is not None: chainer.serializers.load_npz(args.resume, model.mask_rcnn) eval_interval = 1000, 'iteration' log_interval = 20, 'iteration' plot_interval = 0.1, 'epoch' print_interval = 20, 'iteration' snapshot_interval = 5000, 'iteration' if evaluator_type == 'voc': evaluator = cmr.extensions.InstanceSegmentationVOCEvaluator( test_iter, model.mask_rcnn, device=device, use_07_metric=True, label_names=args.class_names, ) elif evaluator_type == 'coco': evaluator = cmr.extensions.InstanceSegmentationCOCOEvaluator( test_iter, model.mask_rcnn, device=device, label_names=args.class_names, ) else: raise ValueError( 'Unsupported evaluator_type: {}'.format(evaluator_type)) if args.multi_node: evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=eval_interval) if not args.multi_node or comm.rank == 0: # Save snapshot. trainer.extend( extensions.snapshot_object( model.mask_rcnn, 'snapshot_model_{}.npz'.format(trainer.updater.epoch)), # trigger=training.triggers.MaxValueTrigger( # 'validation/main/map', # eval_interval, # ), # trigger=(1, 'epoch'), trigger=snapshot_interval, ) trainer.extend( extensions.snapshot(filename='snapshot_trainer_iter-{}.npz'.format( trainer.updater.iteration)), # tringger=training.triggers.MaxValueTrigger( # 'validation/main/map', # eval_interval, # ), trigger=snapshot_interval, ) # Dump params.yaml. args.git_hash = cmr.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) # Visualization. trainer.extend( cmr.extensions.InstanceSegmentationVisReport( test_iter, model.mask_rcnn, label_names=args.class_names, ), trigger=eval_interval, ) # Logging. trainer.extend( chainer.training.extensions.observe_lr(), trigger=log_interval, ) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ], ), trigger=print_interval, ) trainer.extend(extensions.ProgressBar(update_interval=10)) # Plot. assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport( [ 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', ], file_name='loss.png', trigger=plot_interval, ), trigger=plot_interval, ) trainer.extend( extensions.PlotReport( ['validation/main/map'], file_name='accuracy.png', trigger=plot_interval, ), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) # trainer.run() def visualize(imgs, bboxes, masks, labels, scores): score_thresh = 0.7 print(imgs.shape) dst = [] for img, bbox, mask, label, score \ in zip(imgs, bboxes, masks, labels, scores): keep = score >= score_thresh bbox = bbox[keep] label = label[keep] mask = mask[keep] score = score[keep] captions = [] for p_score in score: caption = 'leaf {:.1%}'.format(p_score) captions.append(caption) viz = cmr.utils.draw_instance_bboxes(img, bbox, label + 1, n_class=2, masks=mask, captions=captions, bg_class=0) dst.append(viz) return dst predict_dir = 'I:/ykato_git/datasets/omg_instance_segmentation/dataset_DIA/image' mask_dir = 'I:/ykato_git/datasets/omg_instance_segmentation/dataset_DIA/label' save_dir = './predict_DIA_noMask' fnames = os.listdir(predict_dir) for fname in fnames: print(osp.join(predict_dir, fname)) img = cv2.imread(osp.join(predict_dir, fname)) # mask = cv2.imread(osp.join(mask_dir, fname)) / 255 # white = np.ones(img.shape) * 255 # img = img * mask + white * (1-mask) # print(img.dtype) img = img.transpose(2, 0, 1)[None, :, :, :].astype(np.uint8) # img_variable = chainer.Variable(xp.array(img)) bboxes, masks, labels, scores = model.mask_rcnn.predict(img) output = visualize(img.transpose(0, 2, 3, 1), bboxes, masks, labels, scores) for p_output in output: cv2.imwrite(osp.join(save_dir, fname), p_output)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-g', '--gpu', default=0, type=int, help='GPU id') parser.add_argument('-d', '--dataset', type=str, required=True, help='Dataset class name') parser.add_argument('-m', '--model', type=str, required=True, help='Model class name') args = parser.parse_args() gpu = args.gpu # 0. config timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') out = timestamp out = osp.join(osp.dirname(here), 'logs', out) max_iter_epoch = 100, 'epoch' progress_bar_update_interval = 10 # iteration print_interval = 100, 'iteration' log_interval = 100, 'iteration' test_interval = 10, 'epoch' save_interval = 10, 'epoch' # 1. dataset if args.dataset == 'LabeledMirrorDataset': dataset_train = LabeledMirrorDataset(split='train', aug=True) dataset_valid = LabeledMirrorDataset(split='test', aug=False) else: print('Invalid dataset class.') exit(1) dataset_train_transformed = TransformDataset(dataset_train, transform) dataset_valid_transformed = TransformDataset(dataset_valid, transform) iter_train = chainer.iterators.MultiprocessIterator( dataset_train_transformed, batch_size=1, shared_mem=10**7) iter_valid = chainer.iterators.MultiprocessIterator( dataset_valid_transformed, batch_size=1, shared_mem=10**7, repeat=False, shuffle=False) # 2. model vgg = fcn.models.VGG16() vgg_path = vgg.download() chainer.serializers.load_npz(vgg_path, vgg) n_class = len(dataset_train.class_names) if args.model == 'FCN8sMirrorSegmentation': model = FCN8sMirrorSegmentation(n_class=n_class) else: print('Invalid model class.') exit(1) model.init_from_vgg16(vgg) if gpu >= 0: cuda.get_device_from_id(gpu).use() model.to_gpu() # 3. optimizer optimizer = chainer.optimizers.Adam(alpha=1.0e-5) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) updater = chainer.training.updater.StandardUpdater(iter_train, optimizer, device=gpu) trainer = chainer.training.Trainer(updater, max_iter_epoch, out=out) trainer.extend(extensions.ExponentialShift("alpha", 0.99999)) if not osp.isdir(out): os.makedirs(out) with open(osp.join(out, 'dataset.txt'), 'w') as f: f.write(dataset_train.__class__.__name__) with open(osp.join(out, 'model.txt'), 'w') as f: f.write(model.__class__.__name__) with open(osp.join(out, 'n_class.txt'), 'w') as f: f.write(str(n_class)) # trainer.extend( # extensions.snapshot_object( # model, # savefun=chainer.serializers.save_npz, # filename='iter_{.updater.iteration}.npz'), # trigger=save_interval) trainer.extend(extensions.snapshot_object( model, savefun=chainer.serializers.save_npz, filename='max_miou.npz'), trigger=chainer.training.triggers.MaxValueTrigger( 'validation/main/miou', save_interval)) trainer.extend( extensions.dump_graph(root_name='main/loss', out_name='graph.dot')) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(chainer.training.extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/miou', 'validation/main/miou', ]), trigger=print_interval) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.ProgressBar(update_interval=progress_bar_update_interval)) trainer.extend(extensions.Evaluator(iter_valid, model, device=gpu), trigger=test_interval) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--dataset', choices=('voc07', 'voc0712'), help='The dataset to use: VOC07, VOC07+12', default='voc07') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) if args.dataset == 'voc07': train_data = VOCDetectionDataset(split='trainval', year='2007') elif args.dataset == 'voc0712': train_data = ConcatenatedDataset( VOCDetectionDataset(year='2007', split='trainval'), VOCDetectionDataset(year='2012', split='trainval')) test_data = VOCDetectionDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, Transform(faster_rcnn)) train_iter = chainer.iterators.MultiprocessIterator(train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator(test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) trainer.extend(extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend( DetectionVOCEvaluator(test_iter, model.faster_rcnn, use_07_metric=True, label_names=voc_detection_label_names), trigger=ManualScheduleTrigger([args.step_size, args.iteration], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): args = parse_args() chainer.global_config.autotune = True #chainer.set_debug(True) # Set the random seeds random.seed(args.seed) np.random.seed(args.seed) # Set up Devices devices = utils.setup_devices(args.gpus) # Load model ext = os.path.splitext(args.model_file)[1] model_path = '.'.join(os.path.split(args.model_file)).replace(ext, '') model = import_module(model_path) model = getattr(model, args.model_name)(args.output_class) #model = L.Classifier(model) model.to_gpu() # create result dir result_dir = create_result_dir(args.model_name) shutil.copy(args.model_file, os.path.join(result_dir, os.path.basename(args.model_file))) with open(os.path.join(result_dir, 'args'), 'w') as fp: fp.write(json.dumps(vars(args))) print(json.dumps(vars(args), sort_keys=True, indent=4)) # Create Dataset # Load the datasets and mean file mean = np.load(args.mean) train = ImagenetDataset(args.train_list, args.train_image) valid = ImagenetDataset(args.val_list, args.val_image) train_transform = partial(transform.food101_transform, mean=mean, random_angle=args.random_angle, expand_ratio=args.expand_ratio, crop_size=args.crop_size, train=True) valid_transform = partial(transform.food101_transform, mean=mean, train=False) train = TransformDataset(train, train_transform) valid = TransformDataset(valid, valid_transform) # Create Iterator train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=4) val_iter = chainer.iterators.MultiprocessIterator(valid, args.batchsize, shuffle=False, repeat=False, n_processes=4) #train_iter = chainer.iterators.SerialIterator(train, args.batchsize) #val_iter = chainer.iterators.SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) # Set Optimizer optimizer = optimizers.MomentumSGD(lr=args.initial_lr, momentum=0.9) optimizer.setup(model) if args.weight_decay > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) # optimizer.use_fp32_update() # Updater updater = training.ParallelUpdater(train_iter, optimizer, devices=devices) # Trainer trainer = training.Trainer(updater, (args.training_epoch, 'epoch'), result_dir) # Trainer Extensions trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr()) trainer.extend(extensions.Evaluator(val_iter, model, device=devices['main']), name='val') trainer.extend(extensions.ExponentialShift('lr', args.lr_decay_rate), trigger=(args.lr_decay_epoch, 'epoch')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'elapsed_time', 'lr' ])) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy.png')) # Print progress bar trainer.extend(extensions.ProgressBar()) # Save the model which minimizes validation loss trainer.extend(extensions.snapshot_object(model, filename='bestmodel.npz'), trigger=training.triggers.MinValueTrigger('val/main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=256, help='Number of images in each mini-batch') parser.add_argument('--batchsize2', '-b2', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--data_type', '-d', type=str, default='LSHTC1') parser.add_argument('--model_type', '-m', type=str, default='DocModel') parser.add_argument('--model_path', '-mp', type=str, default='./models/ResNet50_model_500.npz') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--cluster', '-c', type=int, default=100) parser.add_argument('--weight_decay', '-w', type=float, default=0.0000) parser.add_argument('--unit', '-u', type=int, default=300) parser.add_argument('--alpha', '-a', type=float, default=0.005) parser.add_argument('--epoch', '-e', type=int, default=10) parser.add_argument('--epoch2', '-e2', type=int, default=10) parser.add_argument('--mu', '-mu', type=float, default=30.0) parser.add_argument('--out', '-o', type=str, default='results') parser.add_argument('--train_file', '-train_f', type=str, default='dataset/LSHTC1/LSHTC1_selected03.train') parser.add_argument('--test_file', '-test_f', type=str, default='dataset/LSHTC1/LSHTC1_selected03.test') parser.add_argument('--train_instance', '-train_i', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--train_label', '-train_l', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--test_instance', '-test_i', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--test_label', '-test_l', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--resume', '-r', default='', help='resume the training from snapshot') parser.add_argument('--resume2', '-r2', default='', help='resume the training from snapshot') parser.add_argument('--optimizer', '-op', type=str, default='Adam') parser.add_argument('--optimizer2', '-op2', type=str, default='Adam') parser.add_argument('--initial_lr', type=float, default=0.05) parser.add_argument('--lr_decay_rate', type=float, default=0.5) parser.add_argument('--lr_decay_epoch', type=float, default=25) parser.add_argument('--random', action='store_true', default=False, help='Use random assignment or not') parser.add_argument('--valid', '--v', action='store_true', help='Use random assignment or not') args = parser.parse_args() random.seed(args.seed) np.random.seed(args.seed) gpu = args.gpu data_type = args.data_type model_type = args.model_type num_clusters = args.cluster initial_lr = args.initial_lr lr_decay_rate = args.lr_decay_rate lr_decay_epoch = args.lr_decay_epoch opt1 = args.optimizer opt2 = args.optimizer2 model_path = args.model_path rand_assign = args.random train_file = args.train_file test_file = args.test_file unit = args.unit alpha = args.alpha sparse = False ndim = 1 n_in = None train_transform = None test_transform = None if data_type == 'toy': model = network.LinearModel(2, 2) num_classes = 4 elif data_type == 'mnist': num_classes = 10 if model_type == 'linear': model = network.LinearModel(784, num_clusters) elif model_type == 'DNN': model = network.MLP(1000, num_clusters) elif model_type == 'CNN': ndim = 3 model = network.CNN(num_clusters) else: raise ValueError elif data_type == 'cifar100': num_classes = 100 train_transform = partial(dataset.transform, mean=0.0, std=1.0, train=True) test_transform = partial(dataset.transform, mean=0.0, std=1.0, train=False) if model_type == 'Resnet50': model = network.ResNet50(num_clusters) n_in = 2048 load_npz(model_path, model, not_load_list=['fc7']) elif model_type == 'VGG': model = network.VGG(num_clusters) n_in = 1024 load_npz(model_path, model, not_load_list=['fc6']) else: raise ValueError elif data_type == 'LSHTC1': sparse = True num_classes = None if model_type == 'DocModel': model = network.DocModel(n_in=1024, n_mid=unit, n_out=num_clusters) elif model_type == 'DocModel2': model = network.DocModel2(n_in=1024, n_mid=unit, n_out=num_clusters) elif model_type == 'linear': model = network.LinearModel(n_in=92586, n_out=num_clusters) else: raise ValueError elif data_type == 'Dmoz': sparse = True num_classes = None if model_type == 'DocModel': model = network.DocModel(n_in=561127, n_mid=unit, n_out=num_clusters) elif model_type == 'linear': model = network.LinearModel(n_in=1024, n_out=num_clusters) else: raise ValueError else: num_classes = 10 if model_type == 'Resnet50': model = network.ResNet50(num_clusters) elif model_type == 'Resnet101': model = network.ResNet101(num_clusters) elif model_type == 'VGG': model = network.VGG(num_clusters) elif model_type == 'CNN': model = network.CNN(num_clusters) else: raise ValueError if gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(gpu).use() model.to_gpu() # Copy the model to the GPU (train_instances, train_labels), (test_instances, test_labels), num_classes \ = load_data(data_type, ndim, train_file, test_file) if rand_assign: assignment, count_classes = random_assignment(num_clusters, num_classes) else: if opt1 == 'Adam': optimizer = chainer.optimizers.Adam(alpha=alpha) else: optimizer = chainer.optimizers.SGD(lr=alpha) optimizer.setup(model) train = Dataset(*(train_instances, train_labels), sparse) test = Dataset(*(test_instances, test_labels), sparse) train_iter = chainer.iterators.SerialIterator( train, batch_size=args.batchsize) train_updater = Updater(model, train, train_iter, optimizer, num_clusters=num_clusters, device=gpu, mu=args.mu) trainer = training.Trainer(train_updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'main/loss_cc', 'main/loss_mut_info', 'main/H_Y', 'main/H_YX', 'elapsed_time' ])) trainer.extend(extensions.snapshot(), trigger=(5, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() """ end clustering """ """ res, ss = check_cluster(model, train, num_classes, num_clusters, device=gpu) res_sum = tuple(0 for _ in range(num_clusters)) for i in range(num_classes): res_sum = tuple(res_sum[j] + res[i][j] for j in range(num_clusters)) print(res, res_sum, ss) """ """ res, ss = check_cluster(model, test, num_classes, num_clusters, device=gpu) res_sum = tuple(0 for _ in range(num_clusters)) for i in range(num_classes): res_sum = tuple(res_sum[j] + res[i][j] for j in range(num_clusters)) """ cluster_label = separate.det_cluster(model, train, num_classes, batchsize=128, device=gpu, sparse=sparse) assignment, count_classes = separate.assign(cluster_label, num_classes, num_clusters) del optimizer del train_iter del train_updater del trainer del train del test print(count_classes) """ start classification """ model = h_net.HierarchicalNetwork(model, num_clusters, count_classes, n_in=n_in) if opt2 == 'Adam': optimizer2 = chainer.optimizers.Adam(alpha=initial_lr) elif opt2 == 'SGD': optimizer2 = chainer.optimizers.SGD(lr=initial_lr) else: optimizer2 = chainer.optimizers.MomentumSGD(lr=initial_lr) optimizer2.setup(model) if args.weight_decay > 0: optimizer2.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) if gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(gpu).use() model.to_gpu() # Copy the model to the GPU train = dataset.Dataset(train_instances, train_labels, assignment, _transform=train_transform, sparse=sparse) test = dataset.Dataset(test_instances, test_labels, assignment, _transform=test_transform, sparse=sparse) train_iter = chainer.iterators.SerialIterator(train, batch_size=args.batchsize2) test_iter = chainer.iterators.SerialIterator(test, batch_size=1, repeat=False) train_updater = updater.Updater(model, train, train_iter, optimizer2, num_clusters, device=gpu) trainer = training.Trainer(train_updater, (args.epoch2, 'epoch'), args.out) acc = accuracy.Accuracy(model, assignment, num_clusters) trainer.extend(extensions.Evaluator(test_iter, acc, device=gpu)) """ trainer.extend( extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'), trigger=(20, 'epoch')) """ trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/loss_cluster', 'main/loss_class', 'validation/main/accuracy', 'validation/main/cluster_accuracy', 'validation/main/loss', 'elapsed_time' ])) if opt2 != 'Adam': trainer.extend(extensions.ExponentialShift('lr', lr_decay_rate), trigger=(lr_decay_epoch, 'epoch')) if args.resume2: chainer.serializers.load_npz(args.resume2, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--pretrain', default=0, help='Pretrain (w/o VD) or not (w/ VD).' + ' default is not (0).') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--resume-opt', '-ro', default='', help='Resume optimizer the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') print('# train:', len(train)) print('# test :', len(test)) if args.pretrain: model = nets.VGG16(class_labels) def calc_loss(x, t): model.y = model(x) model.loss = F.softmax_cross_entropy(model.y, t) reporter.report({'loss': model.loss}, model) model.accuracy = F.accuracy(model.y, t) reporter.report({'accuracy': model.accuracy}, model) return model.loss model.calc_loss = calc_loss model.use_raw_dropout = True elif args.resume: model = nets.VGG16VD(class_labels, warm_up=1.) #model = nets.VGG16VD(class_labels, warm_up=0.0001) model(train[0][0][None, ]) # for setting in_channels automatically model.to_variational_dropout() chainer.serializers.load_npz(args.resume, model) else: model = nets.VGG16VD(class_labels, warm_up=0.0001) model(train[0][0][None, ]) # for setting in_channels automatically model.to_variational_dropout() if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU if args.pretrain: # Original Torch code (http://torch.ch/blog/2015/07/30/cifar.html) # uses lr=1. However, it doesn't work well as people say in the post. # This follows a version of Chainer example using lr=0.1. optimizer = chainer.optimizers.MomentumSGD(0.1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) # optimizer.add_hook(chainer.optimizer.GradientClipping(0.1)) elif args.resume: optimizer = chainer.optimizers.Adam(1e-5) optimizer.setup(model) else: optimizer = chainer.optimizers.Adam(1e-4) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(10.)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if args.resume: classifier = L.Classifier(model.copy()) accuracy = extensions.Evaluator(test_iter, classifier, device=args.gpu)()['main/accuracy'] print('test accuracy VD:', accuracy) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, loss_func=model.calc_loss) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend( extensions.Evaluator(test_iter, L.Classifier(model), device=args.gpu)) if args.pretrain: trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) elif not args.resume: trainer.extend( extensions.LinearShift( 'alpha', (1e-4, 0.), (0, args.epoch * len(train) // args.batchsize))) # Take a snapshot at each epoch # trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) if args.pretrain: trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # Write a log of evaluation statistics for each epoch # trainer.extend(extensions.LogReport()) per = min(len(train) // args.batchsize // 2, 1000) trainer.extend(extensions.LogReport(trigger=(per, 'iteration'))) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'main/class', 'main/kl', 'main/mean_p', 'main/sparsity', 'main/W/Wnz', 'main/kl_coef', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Run the training trainer.run() print('Measure inference speeds for 1 sample inference...') test_iter = chainer.iterators.SerialIterator(test, 1, repeat=False, shuffle=False) if not args.pretrain: if args.gpu >= 0: classifier = L.Classifier(model.copy()) start = time.time() accuracy = extensions.Evaluator(test_iter, classifier, device=args.gpu)()['main/accuracy'] print('dense Gpu:', time.time() - start, 's/{} imgs'.format(len(test))) model.to_cpu() classifier = L.Classifier(model.copy()) start = time.time() accuracy = extensions.Evaluator(test_iter, classifier, device=-1)()['main/accuracy'] print('dense Cpu:', time.time() - start, 's/{} imgs'.format(len(test))) model.to_cpu_sparse() model.name = None classifier = L.Classifier(copy.deepcopy(model)) start = time.time() accuracy = extensions.Evaluator(test_iter, classifier, device=-1)()['main/accuracy'] print('sparse Cpu:', time.time() - start, 's/{} imgs'.format(len(test)))
def gan_training(args, train): # These iterators load the images with subprocesses running in parallel to # the training/validation. if args.loaderjob: train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) # Prepare Flow GAN model, defined in net.py gen = net.Generator(video_len=args.video_len) dis = net.Discriminator() if args.gpu >= 0: cuda.get_device(args.gpu).use() gen.to_gpu() dis.to_gpu() xp = np if args.gpu < 0 else cuda.cupy opt_gen = make_optimizer(gen, args) opt_dis = make_optimizer(dis, args) # Updater updater = GAN_Updater(models=(gen, dis), iterator=train_iter, optimizer={ 'gen': opt_gen, 'dis': opt_dis }, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) snapshot_interval = (args.snapshot_interval), 'iteration' visualize_interval = (args.visualize_interval), 'iteration' log_interval = (args.log_interval), 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PlotReport(['gen/loss', 'dis/loss'], trigger=log_interval, file_name='plot.png')) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'gen/loss', 'dis/loss']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( gen, 'gen_iteration_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( dis, 'dis_iteration_{.updater.iteration}.npz'), trigger=snapshot_interval) trainer.extend(extension(gen, args), trigger=visualize_interval) if args.adam_decay_iteration: trainer.extend(extensions.ExponentialShift("alpha", 0.5, optimizer=opt_gen), trigger=(args.adam_decay_iteration, 'iteration')) trainer.extend(extensions.ExponentialShift("alpha", 0.5, optimizer=opt_dis), trigger=(args.adam_decay_iteration, 'iteration')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300( n_fg_class=1, pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512( n_fg_class=1, pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = EpicKitchensBboxDataset(year='2018', split='train') if comm.rank == 0: indices = np.arange(len(train)) else: indices = None train = TransformDataset( train, ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=2) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=device) trainer = training.Trainer(updater, (12, 'epoch'), args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger([8, 10], 'epoch')) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend(extensions.LogReport( log_name='log.json', trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend( extensions.snapshot_object( model, 'model_iter_{.updater.iteration}.npz'), trigger=(1, 'epoch')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def train(args, train_data, test_data, evaluator_type): required_args = [ 'dataset', 'class_names', 'logs_dir', 'min_size', 'max_size', 'anchor_scales', ] for arg_key in required_args: if not hasattr(args, arg_key): raise ValueError( 'args must contain required key: {}'.format(arg_key)) assert evaluator_type in ['voc', 'coco'], \ 'Unsupported evaluator_type: {}'.format(evaluator_type) if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: if args.gpu is None: print( 'Option --gpu is required without --multi-node.', file=sys.stderr, ) sys.exit(1) args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() outdir = osp.basename(args.dataset_dir + now.strftime('_%m%d')) args.out = osp.join(args.logs_dir, outdir) args.batch_size = args.batch_size_per_gpu * args.n_gpu # lr: 0.00125 * 8 = 0.01 in original args.lr = 0.00125 * args.batch_size args.weight_decay = 0.0001 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [ (120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch, ] random.seed(args.seed) np.random.seed(args.seed) if args.pooling_func == 'align': pooling_func = cmr.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = cmr.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = cmr.functions.crop_and_resize else: raise ValueError('Unsupported pooling_func: {}'.format( args.pooling_func)) if args.initializer == 'normal': mask_initialW = chainer.initializers.Normal(0.01) elif args.initializer == 'he_normal': mask_initialW = chainer.initializers.HeNormal(fan_option='fan_out') else: raise ValueError('Unsupported initializer: {}'.format( args.initializer)) if args.model in ['resnet50', 'resnet101']: n_layers = int(args.model.lstrip('resnet')) mask_rcnn = cmr.models.MaskRCNNResNet( n_layers=n_layers, n_fg_class=len(args.class_names), pooling_func=pooling_func, anchor_scales=args.anchor_scales, roi_size=args.roi_size, min_size=args.min_size, max_size=args.max_size, mask_initialW=mask_initialW, ) else: raise ValueError('Unsupported model: {}'.format(args.model)) model = cmr.models.MaskRCNNTrainChain(mask_rcnn) if args.multi_node or args.gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) if args.model in ['resnet50', 'resnet101']: # ResNetExtractor.freeze_at is not enough to freeze params # since WeightDecay updates the param little by little. mask_rcnn.extractor.conv1.disable_update() mask_rcnn.extractor.bn1.disable_update() mask_rcnn.extractor.res2.disable_update() for link in mask_rcnn.links(): if isinstance(link, cmr.links.AffineChannel2D): link.disable_update() train_data = chainer.datasets.TransformDataset( train_data, cmr.datasets.MaskRCNNTransform(mask_rcnn), ) test_data = chainer.datasets.TransformDataset( test_data, cmr.datasets.MaskRCNNTransform(mask_rcnn, train=False), ) if args.multi_node: if comm.rank != 0: train_data = None test_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) test_data = chainermn.scatter_dataset(test_data, comm) # FIXME: MultiProcessIterator sometimes hangs train_iter = chainer.iterators.SerialIterator( train_data, batch_size=args.batch_size_per_gpu, ) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=args.batch_size_per_gpu, repeat=False, shuffle=False, ) converter = functools.partial( cmr.datasets.concat_examples, padding=0, # img, bboxes, labels, masks, scales indices_concat=[0, 2, 3, 4], # img, _, labels, masks, scales indices_to_device=[0, 1], # img, bbox ) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=device, converter=converter, ) trainer = training.Trainer( updater, (args.max_epoch, 'epoch'), out=args.out, ) trainer.extend( extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch', ), ) eval_interval = 1, 'epoch' log_interval = 20, 'iteration' plot_interval = 0.1, 'epoch' print_interval = 20, 'iteration' if evaluator_type == 'voc': evaluator = cmr.extensions.InstanceSegmentationVOCEvaluator( test_iter, model.mask_rcnn, device=device, use_07_metric=True, label_names=args.class_names, ) elif evaluator_type == 'coco': evaluator = cmr.extensions.InstanceSegmentationCOCOEvaluator( test_iter, model.mask_rcnn, device=device, label_names=args.class_names, ) else: raise ValueError( 'Unsupported evaluator_type: {}'.format(evaluator_type)) if args.multi_node: evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=eval_interval) if not args.multi_node or comm.rank == 0: # Save snapshot. trainer.extend( extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'), trigger=training.triggers.MaxValueTrigger( 'validation/main/map', eval_interval, ), ) # Dump params.yaml. args.git_hash = cmr.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) # Visualization. trainer.extend( cmr.extensions.InstanceSegmentationVisReport( test_iter, model.mask_rcnn, label_names=args.class_names, ), trigger=eval_interval, ) # Logging. trainer.extend( chainer.training.extensions.observe_lr(), trigger=log_interval, ) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ], ), trigger=print_interval, ) trainer.extend(extensions.ProgressBar(update_interval=10)) # Plot. assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport( [ 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', ], file_name='loss.png', trigger=plot_interval, ), trigger=plot_interval, ) trainer.extend( extensions.PlotReport( ['validation/main/map'], file_name='accuracy.png', trigger=plot_interval, ), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
max_epoch = 15 model = L.Classifier(model) optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) is_decay_lr = False updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu_id) result_dir = '../results/transfer_hard_{}_{}_{}_depth{}_valid{}'.format(mode, n_topic, iteration, sum(depth)*2+1, args.valid) trainer = training.Trainer(updater, (epoch_size * max_epoch, 'iteration'), out=result_dir) from chainer.training import extensions trainer.extend(extensions.LogReport(trigger=(epoch_size, 'iteration'))) trainer.extend(extensions.snapshot(filename='snapshot_iteration-{.updater.iteration}'), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.snapshot_object(model.predictor, filename='model_iteration-{.updater.iteration}'), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.observe_lr(), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.PrintReport(['iteration', 'lr', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time']), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(epoch_size*3, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=30)) print('running') print('reslut_dir:{}'.format(result_dir)) trainer.run()
extensions.PlotReport(['main/loss', 'val/main/loss'], 'epoch', file_name='loss.png')) # 精度のプロットも毎エポック自動的に保存 trainer.extend( extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], 'epoch', file_name='accuracy.png')) # モデルのtrainプロパティをFalseに設定してvalidationするextension trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id), name='val') # 指定したエポックごとに学習率をlr_drop_ratio倍にする trainer.extend(extensions.ExponentialShift('lr', lr_drop_ratio), trigger=(lr_drop_epoch, 'epoch')) trainer.run() serializers.save_npz('classifier.model', model) chainer.config.train = False for _ in range(10): x, t = valid[np.random.randint(len(valid))] x = cuda.to_cpu(x) y = F.softmax(model.predictor(x[None, ...])) pred = os.path.basename(dnames[int(y.data.argmax())]) label = os.path.basename(dnames[t])
def main(): parser = argparse.ArgumentParser(description='Deep_VoiceChanger') parser.add_argument('--batchsize', '-b', type=int, default=32, help='Number of images in each mini-batch') parser.add_argument('--iteration', '-i', type=int, default=100000, help='Number of to train iteration') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID') parser.add_argument('--out', '-o', default='results', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--gene_ab', '-j', default='', help='Resume generator a2b from file') parser.add_argument('--gene_ba', '-k', default='', help='Resume generator b2a from file') parser.add_argument('--disc_a', '-m', default='', help='Resume discriminator a from file') parser.add_argument('--disc_b', '-l', default='', help='Resume discriminator b from file') parser.add_argument('--folder', '-f', default='', help='Resume all model from foledr') parser.add_argument('--voice_a', '-v', default='../src/KizunaAI_long.wav', help='Path of train wave file of voice a') parser.add_argument('--voice_b', '-w', default='../src/nekomasu_long.wav', help='Path of train wave file of voice b') parser.add_argument('--test_a', '-s', default='../src/KizunaAI_short.wav', help='Path of test wave file of voice a') parser.add_argument('--test_b', '-u', default='../src/nekomasu_short.wav', help='Path of test wave file of voice b') args = parser.parse_args() chainer.cuda.set_max_workspace_size(256 * 1024 * 1024) chainer.config.type_check = False chainer.config.autotune = True if args.gpu < 0: print('sorry, but CPU is not recommended') quit() cp.cuda.Device(args.gpu).use() if args.test_a == '': args.test_a = args.voice_a if args.test_b == '': args.test_b = args.voice_b generator_ab, opt_g_a = init_gene(args.gpu) generator_ba, opt_g_b = init_gene(args.gpu) discriminator_a, opt_d_a = init_disc(args.gpu) discriminator_b, opt_d_b = init_disc(args.gpu) gla = GLA() train_iter_a = init_dataset(args.voice_a, 20000, False, args.batchsize) train_iter_b = init_dataset(args.voice_b, 20000, False, args.batchsize) test_iter_a = init_dataset(args.test_a, -1, True, 16) test_iter_b = init_dataset(args.test_b, -1, True, 16) updater = Updater(train_iter_a, train_iter_b, opt_g_a, opt_g_b, opt_d_a, opt_d_b, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) trainer.extend(extensions.snapshot(filename='snapshot.npz'), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object(generator_ab, 'generator_ab.npz'), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object(generator_ba, 'generator_ba.npz'), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object(discriminator_a, 'discriminator_a.npz'), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object(discriminator_b, 'discriminator_b.npz'), trigger=(10, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'loss/g/recon', 'loss/g/ident', 'loss/g/gene', 'loss/d/disc', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar(update_interval=5)) trainer.extend(extensions.ExponentialShift("alpha", 0.1, optimizer=opt_g_a), trigger=(25000, 'iteration')) trainer.extend(extensions.ExponentialShift("alpha", 0.1, optimizer=opt_g_b), trigger=(25000, 'iteration')) trainer.extend(extensions.ExponentialShift("alpha", 0.1, optimizer=opt_d_a), trigger=(25000, 'iteration')) trainer.extend(extensions.ExponentialShift("alpha", 0.1, optimizer=opt_d_b), trigger=(25000, 'iteration')) trainer.extend(preview_convert(test_iter_a, test_iter_b, generator_ab, generator_ba, args.gpu, gla, args.out), trigger=(1, 'epoch')) resume(args, trainer, generator_ab, generator_ba, discriminator_a, discriminator_b) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def train(train_data, val_data, label_names, iteration, lr, step_points, batchsize, gpu, out, val_iteration, log_iteration, loaderjob, resume): """Train SSD """ pretrained_model = SSD300(pretrained_model='voc0712') model = SSD300(n_fg_class=len(label_names)) model.extractor.copyparams(pretrained_model.extractor) model.multibox.loc.copyparams(pretrained_model.multibox.loc) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if gpu >= 0: chainer.cuda.get_device(gpu).use() model.to_gpu() train_data = TransformDataset( train_data, Transform(model.coder, model.insize, model.mean)) if loaderjob <= 0: train_iter = chainer.iterators.SerialIterator(train_data, batchsize) else: train_iter = chainer.iterators.MultiprocessIterator(train_data, batchsize, n_processes=min( (loaderjob, batchsize))) val_iter = chainer.iterators.SerialIterator(val_data, batchsize, repeat=False, shuffle=False) # initial lr is set by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=gpu) trainer = training.Trainer(updater, (iteration, 'iteration'), out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=lr), trigger=triggers.ManualScheduleTrigger( step_points, 'iteration')) val_interval = (val_iteration, 'iteration') trainer.extend(DetectionVOCEvaluator(val_iter, model, use_07_metric=True, label_names=label_names), trigger=val_interval) log_interval = log_iteration, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) if resume: serializers.load_npz(resume, trainer) trainer.run()
def main(): # command line argument parsing parser = argparse.ArgumentParser( description='Multi-Perceptron classifier/regressor') parser.add_argument('train', help='Path to csv file') parser.add_argument('--root', '-R', default="betti", help='Path to image files') parser.add_argument('--val', help='Path to validation csv file', required=True) parser.add_argument('--regress', '-r', action='store_true', help='set for regression, otherwise classification') parser.add_argument('--time_series', '-ts', action='store_true', help='set for time series data') parser.add_argument('--batchsize', '-b', type=int, default=10, help='Number of samples in each mini-batch') parser.add_argument('--layer', '-l', type=str, choices=['res5', 'pool5'], default='pool5', help='output layer of the pretrained ResNet') parser.add_argument('--fch', type=int, nargs="*", default=[], help='numbers of channels for the last fc layers') parser.add_argument('--cols', '-c', type=int, nargs="*", default=[1], help='column indices in csv of target variables') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--snapshot', '-s', type=int, default=100, help='snapshot interval') parser.add_argument('--initmodel', '-i', help='Initialize the model from given file') parser.add_argument('--random', '-rt', type=int, default=1, help='random translation') parser.add_argument('--gpu', '-g', type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--loaderjob', '-j', type=int, default=3, help='Number of parallel data loading processes') parser.add_argument('--outdir', '-o', default='result', help='Directory to output the result') parser.add_argument('--optimizer', '-op', choices=optim.keys(), default='Adam', help='optimizer') parser.add_argument('--resume', type=str, default=None, help='Resume the training from snapshot') parser.add_argument('--predict', '-p', action='store_true', help='prediction with a specified model') parser.add_argument('--tuning_rate', '-tr', type=float, default=0.1, help='learning rate for pretrained layers') parser.add_argument('--dropout', '-dr', type=float, default=0, help='dropout ratio for the FC layers') parser.add_argument('--cw', '-cw', type=int, default=128, help='crop image width') parser.add_argument('--ch', '-ch', type=int, default=128, help='crop image height') parser.add_argument('--weight_decay', '-w', type=float, default=1e-6, help='weight decay for regularization') parser.add_argument('--wd_norm', '-wn', choices=['none', 'l1', 'l2'], default='l2', help='norm of weight decay for regularization') parser.add_argument('--dtype', '-dt', choices=dtypes.keys(), default='fp32', help='floating point precision') args = parser.parse_args() args.outdir = os.path.join(args.outdir, dt.now().strftime('%m%d_%H%M')) # Enable autotuner of cuDNN chainer.config.autotune = True chainer.config.dtype = dtypes[args.dtype] chainer.print_runtime_info() # read csv file train = Dataset(args.root, args.train, cw=args.cw, ch=args.ch, random=args.random, regression=args.regress, time_series=args.time_series, cols=args.cols) test = Dataset(args.root, args.val, cw=args.cw, ch=args.ch, regression=args.regress, time_series=args.time_series, cols=args.cols) ## if not args.gpu: if chainer.cuda.available: args.gpu = 0 else: args.gpu = -1 print(args) save_args(args, args.outdir) if args.regress: accfun = F.mean_absolute_error lossfun = F.mean_squared_error args.chs = len(args.cols) else: accfun = F.accuracy lossfun = F.softmax_cross_entropy args.chs = max(train.chs, test.chs) if len(args.cols) > 1: print("\n\nClassification only works with a single target.\n\n") exit() # Set up a neural network to train model = L.Classifier(Resnet(args), lossfun=lossfun, accfun=accfun) # Set up an optimizer optimizer = optim[args.optimizer]() optimizer.setup(model) if args.weight_decay > 0: if args.wd_norm == 'l2': optimizer.add_hook(chainer.optimizer.WeightDecay( args.weight_decay)) elif args.wd_norm == 'l1': optimizer.add_hook(chainer.optimizer_hooks.Lasso( args.weight_decay)) # slow update for pretrained layers if args.optimizer in ['Adam']: for func_name in model.predictor.base._children: for param in model.predictor.base[func_name].params(): param.update_rule.hyperparam.alpha *= args.tuning_rate if args.initmodel: print('Load model from: ', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # select numpy or cupy xp = chainer.cuda.cupy if args.gpu >= 0 else np # train_iter = iterators.SerialIterator(train, args.batchsize, shuffle=True) # test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) train_iter = iterators.MultithreadIterator(train, args.batchsize, shuffle=True, n_threads=args.loaderjob) test_iter = iterators.MultithreadIterator(test, args.batchsize, repeat=False, shuffle=False, n_threads=args.loaderjob) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) frequency = args.epoch if args.snapshot == -1 else max(1, args.snapshot) log_interval = 1, 'epoch' val_interval = 20, 'epoch' # frequency/10, 'epoch' # trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(frequency, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=(frequency, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu), trigger=val_interval) if args.optimizer in ['Momentum', 'AdaGrad', 'RMSprop']: trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(args.epoch / 5, 'epoch')) elif args.optimizer in ['Adam']: trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.ExponentialShift("alpha", 0.5, optimizer=optimizer), trigger=(args.epoch / 5, 'epoch')) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # ChainerUI #trainer.extend(CommandsExtension()) trainer.extend(extensions.LogReport(trigger=log_interval)) if not args.predict: trainer.run() ## prediction print("predicting: {} entries...".format(len(test))) test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) converter = concat_examples idx = 0 with open(os.path.join(args.outdir, 'result.txt'), 'w') as output: for batch in test_iter: x, t = converter(batch, device=args.gpu) with chainer.using_config('train', False): with chainer.function.no_backprop_mode(): if args.regress: y = model.predictor(x).data if args.gpu > -1: y = xp.asnumpy(y) t = xp.asnumpy(t) y = y * test.std + test.mean t = t * test.std + test.mean else: y = F.softmax(model.predictor(x)).data if args.gpu > -1: y = xp.asnumpy(y) t = xp.asnumpy(t) for i in range(y.shape[0]): output.write(os.path.basename(test.ids[idx])) if (len(t.shape) > 1): for j in range(t.shape[1]): output.write(",{}".format(t[i, j])) output.write(",{}".format(y[i, j])) else: output.write(",{}".format(t[i])) output.write(",{}".format(np.argmax(y[i, :]))) for yy in y[i]: output.write(",{0:1.5f}".format(yy)) output.write("\n") idx += 1
def main(): rospack = rospkg.RosPack() jsk_perception_datasets_path = osp.join(rospack.get_path('jsk_perception'), 'learning_datasets') parser = argparse.ArgumentParser() # Dataset directory parser.add_argument('--train-dataset-dir', type=str, default=osp.join(jsk_perception_datasets_path, 'kitchen_dataset', 'train')) parser.add_argument('--val-dataset-dir', type=str, default=osp.join(jsk_perception_datasets_path, 'kitchen_dataset', 'test')) parser.add_argument('--model-name', choices=('ssd300', 'ssd512'), default='ssd512') parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--max-epoch', type=int, default=100) parser.add_argument('--out-dir', type=str, default=None) args = parser.parse_args() train_dataset = DetectionDataset(args.train_dataset_dir) fg_label_names = train_dataset.fg_class_names if args.model_name == 'ssd300': model = SSD300(n_fg_class=len(fg_label_names), pretrained_model='imagenet') elif args.model_name == 'ssd512': model = SSD512(n_fg_class=len(fg_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset(train_dataset, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batch_size) test_dataset = DetectionDataset(args.val_dataset_dir) test_iter = chainer.iterators.SerialIterator(test_dataset, args.batch_size, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) now = datetime.datetime.now() timestamp = now.strftime('%Y%m%d-%H%M%S') if args.out_dir is None: out_dir = osp.join(rospkg.get_ros_home(), 'learning_logs', timestamp) step_epoch = [args.max_epoch * 2 // 3, args.max_epoch * 5 // 6] trainer = training.Trainer(updater, (args.max_epoch, 'epoch'), out_dir) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger(step_epoch, 'epoch')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=fg_label_names), trigger=triggers.ManualScheduleTrigger( step_epoch + [args.max_epoch], 'epoch')) log_interval = 10, 'iteration' trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot_object(model, 'model_snapshot.npz'), trigger=(args.max_epoch, 'epoch')) trainer.run()
def main(args): # Initialize the model to train model = models.archs[args.arch]() if args.finetune and hasattr(model, 'finetuned_model_path'): utils.finetuning.load_param(model.finetuned_model_path, model, args.ignore) #model.finetune = True if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() nowt = datetime.datetime.today() outputdir = args.out + '/' + args.arch + '/' + nowt.strftime( "%Y%m%d-%H%M") + '_bs' + str(args.batchsize) if args.test and args.initmodel is not None: outputdir = os.path.dirname(args.initmodel) # Load the datasets and mean file mean = None if hasattr(model, 'mean_value'): mean = makeMeanImage(model.mean_value) else: mean = np.load(args.mean) assert mean is not None train = ppds.PreprocessedDataset(args.train, args.root, mean, model.insize) val = ppds.PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, shuffle=False, n_processes=args.loaderjob) #val_iter = chainer.iterators.MultiprocessIterator( # val, args.val_batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) val_iter = chainer.iterators.SerialIterator(val, args.val_batchsize, repeat=False, shuffle=False) # Set up an optimizer optimizer = optimizers[args.opt]() #if args.opt == 'momentumsgd': if hasattr(optimizer, 'lr'): optimizer.lr = args.baselr if hasattr(optimizer, 'momentum'): optimizer.momentum = args.momentum optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), outputdir) #val_interval = (10 if args.test else int(len(train) / args.batchsize)), 'iteration' val_interval = (10, 'iteration') if args.test else (1, 'epoch') snapshot_interval = (10, 'iteration') if args.test else (4, 'epoch') log_interval = (10 if args.test else 200), 'iteration' # Copy the chain with shared parameters to flip 'train' flag only in test eval_model = model.copy() eval_model.train = False if not args.test: val_evaluator = extensions.Evaluator(val_iter, eval_model, device=args.gpu) else: val_evaluator = utils.EvaluatorPlus(val_iter, eval_model, device=args.gpu) if 'googlenet' in args.arch: val_evaluator.lastname = 'validation/main/loss3' trainer.extend(val_evaluator, trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(500, 'iteration')) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.opt == 'momentumsgd': trainer.extend(extensions.ExponentialShift('lr', args.gamma), trigger=(1, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) if not args.test: chainer.serializers.save_npz(outputdir + '/model0', model) trainer.run() chainer.serializers.save_npz(outputdir + '/model', model) with open(outputdir + '/args.txt', 'w') as o: print(args, file=o) results = val_evaluator(trainer) results['outputdir'] = outputdir if args.test: print(val_evaluator.confmat) categories = utils.io.load_categories(args.categories) confmat_csv_name = args.initmodel + '.csv' confmat_fig_name = args.initmodel + '.eps' utils.io.save_confmat_csv(confmat_csv_name, val_evaluator.confmat, categories) utils.io.save_confmat_fig(confmat_fig_name, val_evaluator.confmat, categories, mode="rate", saveFormat="eps") return results
def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.001, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--ch_scale', '-c', type=int, default=2, help='ch scale') parser.add_argument('--fil_sizes', '-f', type=int, nargs='+', default=[9, 5, 5], help='filter(kernel) sizes') parser.add_argument('--iter_parallel', action='store_true', default=False, help='filter(kernel) sizes') parser.add_argument('--opt', '-o', default='sgd', help='Resume the training from snapshot') args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print("# Number of Filter: {}".format(args.ch_scale)) print("# Sizes of Filter: {}-{}-{}".format(*args.fil_sizes)) print('# Train Dataet: General 100') print('# Test Dataet: Set 14') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # save didrectory outdir = path.join(ROOT_PATH, 'results/SR/SRCNN_Channel_opt_{}_Scale_{}_Filter_Size_{}{}{}'.format( args.opt, args.ch_scale, *args.fil_sizes)) if not path.exists(outdir): os.makedirs(outdir) with open(path.join(outdir, 'arg_param.txt'), 'w') as f: for k, v in args.__dict__.items(): f.write('{}:{}\n'.format(k, v)) print('# loading dataet(General100, Set14) ...') train, test = load_dataset() # prepare model model = N.GenEvaluator(N.SRCNN(ch_scale=args.ch_scale, fil_sizes=args.fil_sizes)) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # setup optimizer if args.opt == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) optimizer.add_hook(chainer.optimizer.GradientClipping(0.1)) elif args.opt == 'adam': optimizer = chainer.optimizers.Adam(alpha=args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator( test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # eval test data trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift if args.opt == 'sgd': trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(50, 'epoch')) elif args.opt == 'adam': trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(50, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport( ['main/PSNR', 'validation/main/PSNR'], 'epoch', file_name='PSNR.png')) # print info trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/PSNR', 'validation/main/PSNR', 'lr', 'elapsed_time'])) # print progbar trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.001, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--iter_parallel', '-p', action='store_true', default=False, help='filter(kernel) sizes') parser.add_argument('--opt', '-o', type=str, choices=('adam', 'sgd'), default='adam') args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print("# Optimizer Method: {}".format(args.opt)) print('# Train Dataet: General 100') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # save didrectory outdir = path.join( ROOT_PATH, 'results/FI/AEFINet/AEFINetConcat_ch4_fsize5_VGG_content_loss_opt_{}'. format(args.opt)) if not path.exists(outdir): os.makedirs(outdir) with open(path.join(outdir, 'arg_param.txt'), 'w') as f: for k, v in args.__dict__.items(): f.write('{}:{}\n'.format(k, v)) print('# loading dataet(General100_train, General100_test) ...') if args.iter_parallel: train = SequenceDataset(dataset='train') test = SequenceDataset(dataset='test') else: train = SequenceDatasetOnMem(dataset='train') test = SequenceDatasetOnMem(dataset='test') # prepare model vgg16 = N.VGG16() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() vgg16.to_gpu() chainer.serializers.load_npz(path.join(ROOT_PATH, 'models/VGG16.npz'), vgg16) model = N.VGG16Evaluator(N.AEFINetConcat(ch=4, f_size=5), vgg16) if args.gpu >= 0: model.to_gpu() # setup optimizer if args.opt == 'adam': optimizer = chainer.optimizers.Adam(alpha=args.learnrate) elif args.opt == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # # eval test data trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift if args.opt == 'sgd': trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(100, 'epoch')) if args.opt == 'adam': trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(50, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'], 'epoch', file_name='PSNR.png')) # print info trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/loss_mse', 'main/loss_cont', 'main/PSNR', 'validation/main/PSNR', 'lr', 'elapsed_time' ])) # print progbar trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.001, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', type=int, default=0, help='GPU1 ID (negative value indicates CPU)') parser.add_argument('--gpu1', '-G', type=int, default=2, help='GPU2 ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--iter_parallel', '-p', action='store_true', default=False, help='loading dataset from disk') parser.add_argument('--opt', '-o', type=str, choices=('adam', 'sgd'), default='adam') parser.add_argument('--fsize', '-f', type=int, default=5) parser.add_argument('--ch', '-c', type=int, default=4) args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print("# Optimizer Method: {}".format(args.opt)) print("# Filter Size: {}".format(args.fsize)) print("# Channel Scale: {}".format(args.ch)) print('# Train Dataet: General 100') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # save didrectory model_dir_name = 'AEFINet_parallel_opt_{}_ch_{}_fsize_{}'.format( args.opt, args.ch, args.fsize) outdir = path.join(ROOT_PATH, 'results', 'FI', 'AEFINet', model_dir_name) if not path.exists(outdir): os.makedirs(outdir) with open(path.join(outdir, 'arg_param.txt'), 'w') as f: for k, v in args.__dict__.items(): f.write('{}:{}\n'.format(k, v)) #loading dataset print('# loading dataet(General100_train, General100_test) ...') if args.iter_parallel: train = ds.SequenceDataset(dataset='train') test = ds.SequenceDataset(dataset='test') else: train = ds.SequenceDatasetOnMem(dataset='train') test = ds.SequenceDatasetOnMem(dataset='test') chainer.cuda.get_device_from_id(args.gpu0).use() # prepare model model = N.GenEvaluator(N.AEFINet(f_size=args.fsize, ch=args.ch)) # model.to_gpu() # setup optimizer if args.opt == 'adam': optimizer = chainer.optimizers.Adam(alpha=args.learnrate) elif args.opt == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.ParallelUpdater( train_iter, optimizer, devices={ 'main': args.gpu0, 'second': args.gpu1 }, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # # eval test data trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift if args.opt == 'sgd': trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(100, 'epoch')) elif args.opt == 'adam': trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(50, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'], 'epoch', file_name='PSNR.png')) # print info trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/PSNR', 'validation/main/PSNR', 'lr', 'elapsed_time' ])) # print progbar trainer.extend(extensions.ProgressBar()) # [ChainerUI] enable to send commands from ChainerUI trainer.extend(CommandsExtension()) # [ChainerUI] save 'args' to show experimental conditions save_args(args, outdir) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) trainer.run() # save final model model_outdir = path.join(ROOT_PATH, 'models', model_dir_name) if not path.exists(model_outdir): os.makedirs(model_outdir) model_name = 'AEFINet_opt_{}_ch_{}_fsize_{}.npz'.format( args.opt, args.ch, args.fsize) chainer.serializers.save_npz(path.join(model_outdir, model_name), model) model_parameter = { 'name': 'AEFINet', 'parameter': { 'f_size': args.fsize, 'ch': args.ch } } with open(path.join(model_outdir, 'model_parameter.json'), 'w') as f: json.dump(model_parameter, f)
device=args.gpus[0]) else: updater = MultiprocessParallelUpdater(train_iters, optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir) if args.cosine: trainer.extend( CosineAnnealing('lr', int(args.epoch), len(train) / args.batchsize, eta_min=args.eta_min, init=args.lr)) else: trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger( [int(args.epoch * 0.50), int(args.epoch * 0.75)], 'epoch')) test_interval = 1, 'epoch' snapshot_interval = 10, 'epoch' log_interval = 100, 'iteration' trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpus[0]), trigger=test_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(model,
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=2) if comm.rank == 0: test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) if comm.rank == 0: trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) train_data = VOCDetectionDataset(split='trainval', year='2007') test_data = VOCDetectionDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: model.to_gpu(args.gpu) chainer.cuda.get_device(args.gpu).use() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) def transform(in_data): img, bbox, label = in_data _, H, W = img.shape img = faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (W, H), (o_W, o_H)) # horizontally flip img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (o_W, o_H), params['x_flip']) return img, bbox, label, scale train_data = TransformDataset(train_data, transform) train_iter = chainer.iterators.MultiprocessIterator( train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) trainer.extend( extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss'], file_name='loss.png', trigger=plot_interval ), trigger=plot_interval ) trainer.extend( DetectionVOCEvaluator( test_iter, model.faster_rcnn, use_07_metric=True), trigger=ManualScheduleTrigger( (args.step_size, args.iteration), 'iteration'), invoke_before_training=False) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def train(mode): Dt1_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_(Gallery&Probe)_2nd" train1 = load_GEI(path_dir=Dt1_train_dir, mode=True) Dt2_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt2_(Gallery&Probe)" train2 = load_GEI(path_dir=Dt2_train_dir, mode=True) Dt3_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt3_(Gallery&Probe)" train3 = load_GEI(path_dir=Dt3_train_dir, mode=True) Dt4_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt4_(Gallery&Probe)" train4 = load_GEI(path_dir=Dt4_train_dir, mode=True) Dt5_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt5_(Gallery&Probe)" train5 = load_GEI(path_dir=Dt5_train_dir, mode=True) Dt6_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt6_(Gallery&Probe)" train6 = load_GEI(path_dir=Dt6_train_dir, mode=True) Dt7_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt7_(Gallery&Probe)" train7 = load_GEI(path_dir=Dt7_train_dir, mode=True) Dt8_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt8_(Gallery&Probe)" train8 = load_GEI(path_dir=Dt8_train_dir, mode=True) Dt9_train_dir = "/media/wutong/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/signed/128_3ch/CV01_Dt9_(Gallery&Probe)" train9 = load_GEI(path_dir=Dt9_train_dir, mode=True) model = Multi_modal_GEINet() model.to_gpu() # train_iter = iterators.MultiprocessIterator(train, batch_size=239) Dt1_train_iter = iterators.SerialIterator(train1, batch_size=239, shuffle=False) Dt2_train_iter = iterators.SerialIterator(train2, batch_size=239, shuffle=False) Dt3_train_iter = iterators.SerialIterator(train3, batch_size=239, shuffle=False) Dt4_train_iter = iterators.SerialIterator(train4, batch_size=239, shuffle=False) Dt5_train_iter = iterators.SerialIterator(train5, batch_size=239, shuffle=False) Dt6_train_iter = iterators.SerialIterator(train6, batch_size=239, shuffle=False) Dt7_train_iter = iterators.SerialIterator(train7, batch_size=239, shuffle=False) Dt8_train_iter = iterators.SerialIterator(train8, batch_size=239, shuffle=False) Dt9_train_iter = iterators.SerialIterator(train9, batch_size=239, shuffle=False) # optimizer = chainer.optimizers.SGD(lr=0.02) optimizer = chainer.optimizers.MomentumSGD(lr=0.02, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.01)) # updater = training.ParallelUpdater(train_iter, optimizer, devices={'main': 0, 'second': 1}) updater = Multi_modal_Updater(model, Dt1_train_iter, Dt2_train_iter, Dt3_train_iter, Dt4_train_iter, Dt5_train_iter, Dt6_train_iter, Dt7_train_iter, Dt8_train_iter, Dt9_train_iter, optimizer, device=0) epoch = 6250 trainer = training.Trainer(updater, (epoch, 'epoch'), out='/home/wutong/Setoguchi/chainer_files/result') # trainer.extend(extensions.Evaluator(test_iter, model, device=0)) trainer.extend(extensions.ExponentialShift(attr='lr', rate=0.56234), trigger=(1250, 'epoch')) trainer.extend(extensions.LogReport(log_name='SFDEI_log', trigger=(20, "epoch"))) trainer.extend((extensions.snapshot_object(model, filename='model_shapshot_{.update.epoch}')), trigger=(1250, 'epoch')) trainer.extend(extensions.snapshot(), trigger=(1250, 'epoch')) trainer.extend(extensions.PrintReport(['epoch', 'accuracy', 'loss'])) # 'validation/main/accuracy']), # trigger=(1, "epoch")) trainer.extend(extensions.dump_graph(root_name="loss", out_name="multi_modal_3.dot")) trainer.extend(extensions.PlotReport(["loss"]), trigger=(50, 'epoch')) trainer.extend(extensions.ProgressBar()) if mode ==True: # Run the trainer trainer.run() else: serializers.load_npz("/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model", trainer) trainer.run() serializers.save_npz("/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model", trainer) serializers.save_npz("/home/wutong/Setoguchi/chainer_files/SFDEINet_multi_modal/SFDEINet_multi_modal_model", model)
def main(): # cuDNNのautotuneを有効にする chainer.cuda.set_max_workspace_size(512 * 1024 * 1024) chainer.config.autotune = True gpu_id = 0 batchsize = 6 out_num = 'results' log_interval = 1, 'epoch' epoch_max = 500 initial_lr = 0.0001 lr_decay_rate = 0.1 lr_decay_timing = [200, 300, 400] # モデルの設定 model = SSD300(n_fg_class=len(voc_labels), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) # GPUの設定 chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() # データセットの設定 train_dataset = MyVoTTVOCDataset( 'C:\Python_Programs\chainer_practice\Telescope_corner', 'train') valid_dataset = MyVoTTVOCDataset( 'C:\Python_Programs\chainer_practice\Telescope_corner', 'val') # データ拡張 transformed_train_dataset = TransformDataset( train_dataset, Transform(model.coder, model.insize, model.mean)) # イテレーターの設定 train_iter = chainer.iterators.MultiprocessIterator( transformed_train_dataset, batchsize) valid_iter = chainer.iterators.SerialIterator(valid_dataset, batchsize, repeat=False, shuffle=False) # オプティマイザーの設定 optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) # アップデーターの設定 updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu_id) # トレーナーの設定 trainer = training.Trainer(updater, (epoch_max, 'epoch'), out_num) trainer.extend(extensions.ExponentialShift('lr', lr_decay_rate, init=initial_lr), trigger=triggers.ManualScheduleTrigger( lr_decay_timing, 'epoch')) trainer.extend(DetectionVOCEvaluator(valid_iter, model, use_07_metric=False, label_names=voc_labels), trigger=(1, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map', 'elapsed_time' ]), trigger=log_interval) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'main/loss/loc', 'main/loss/conf'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['validation/main/map'], 'epoch', file_name='accuracy.png')) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=(10, 'epoch')) # 途中で止めた学習を再開する場合は、trainerにスナップショットをロードして再開する # serializers.load_npz('results/snapshot_epoch_100.npz', trainer) # 学習実行 trainer.run() # 学習データの保存 model.to_cpu() serializers.save_npz('my_ssd_model.npz', model)
sum(depth) * 2 + 1, args.valid) trainer = training.Trainer(updater, (epoch_size * max_epoch, 'iteration'), out=result_dir) from chainer.training import extensions trainer.extend(extensions.LogReport(trigger=(epoch_size, 'iteration'))) trainer.extend( extensions.snapshot(filename='snapshot_iteration-{.updater.iteration}'), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.snapshot_object( model.predictor, filename='model_iteration-{.updater.iteration}'), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.observe_lr(), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.PrintReport([ 'iteration', 'lr', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ]), trigger=(epoch_size, 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(epoch_size * 3, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=30)) print('running') print('reslut_dir:{}'.format(result_dir)) trainer.run()
converter=concat_mols) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) evaluator = FrameworEvaluater(g_stop, g_atom, g_pair, g_action) evaluator = extensions.Evaluator(valid_iter, evaluator, device=device, converter=concat_mols) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) trainer.extend( extensions.observe_value('opt_stop/alpha', lambda t: opt_stop.alpha)) trainer.extend(extensions.ExponentialShift('alpha', 0.9, optimizer=opt_stop), trigger=(args.decay_iter, 'iteration')) trainer.extend( extensions.observe_value('opt_atom/alpha', lambda t: opt_atom.alpha)) trainer.extend(extensions.ExponentialShift('alpha', 0.9, optimizer=opt_atom), trigger=(args.decay_iter, 'iteration')) trainer.extend( extensions.observe_value('opt_pair/alpha', lambda t: opt_pair.alpha)) trainer.extend(extensions.ExponentialShift('alpha', 0.9, optimizer=opt_pair), trigger=(args.decay_iter, 'iteration')) trainer.extend(
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--early-stopping', type=str, help='Metric to watch for early stopping') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Early stopping option if args.early_stopping: stop_trigger = triggers.EarlyStoppingTrigger( monitor=args.early_stopping, verbose=True, max_trigger=(args.epoch, 'epoch')) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.DumpGraph('main/loss')) # Take a snapshot at each epoch trainer.extend( extensions.snapshot(filename='snaphot_epoch_{.updater.epoch}')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
self.save_after= save_after def __call__(self,trainer): curr_iter = trainer.updater.iteration+self.before_iter if curr_iter>self.save_after: chainer.serializers.save_npz(self.saved_dir+self.save_name[:-4]+'_'+str(curr_iter)+'.npz', model,) steps = [200000 , 400000] lr_trigger= triggers.ManualScheduleTrigger(steps, 'iteration') updater = training.updaters.StandardUpdater( train_iter, optimizer, device=gpu_id) trainer = training.Trainer( updater, (iters, 'iteration'), 'ssd_result') trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=lr_trigger) #trainer.extend(evaluator,trigger=(50000, 'iteration')) trainer.extend(training.extensions.LogReport(log_name='ssd_report'+SAVE_PATH,trigger=(1000, 'iteration'))) trainer.extend(extensions.observe_lr(), trigger=(1000, 'iteration')) trainer.extend(training.extensions.PrintReport(['iteration','lr' , 'main/loss', 'main/loss/loc','main/loss/conf'])) trainer.extend(save_model(model,SAVE_PATH,save_after=0),trigger=(50000,'iteration')) if continuous: chainer.serializers.load_npz(os.path.join(SAVE_PATH), model,) trainer.run() chainer.serializers.save_npz(os.path.join(SAVE_PATH), model,) # In[12]: