def main(): batchsize = 32 gpus = [0, 1] unit = 1000 loaderjob = 2 # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(unit, 10)) # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() devices = tuple(gpus) kf = chainer.datasets.get_cross_validation_datasets_random(train, n_fold=5) for i, (train, val) in enumerate(kf): train_iters = [ chainer.iterators.MultiprocessIterator(i, batchsize, n_processes=loaderjob) for i in chainer.datasets.split_dataset_n_random( train, len(devices)) ] # Set up a trainer updater = updaters.MultiprocessParallelUpdater(train_iters, optimizer, devices=devices)
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, 'resnext50': resnext50.ResNeXt50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--gpus', '-g', type=int, nargs="*", default=[0, 1, 2, 3]) parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from {}'.format(args.initmodel)) chainer.serializers.load_npz(args.initmodel, model) # Load the datasets and mean file mean = np.load(args.mean) train = train_imagenet.PreprocessedDataset( args.train, args.root, mean, model.insize) val = train_imagenet.PreprocessedDataset( args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. devices = tuple(args.gpus) train_iters = [ chainer.iterators.MultiprocessIterator(i, args.batchsize, n_processes=args.loaderjob) for i in chainer.datasets.split_dataset_n_random(train, len(devices))] val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = updaters.MultiprocessParallelUpdater(train_iters, optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) if args.test: val_interval = 5, 'epoch' log_interval = 1, 'epoch' else: val_interval = 100000, 'iteration' log_interval = 1000, 'iteration' trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpus[0]), trigger=val_interval) trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=2)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='gpat train ') parser.add_argument("out") parser.add_argument('--resume', default=None) parser.add_argument('--log_dir', default='runs_16') parser.add_argument('--gpus', '-g', type=int, nargs="*", default=[0, 1, 2, 3]) parser.add_argument('--iterations', default=10**5, type=int, help='number of iterations to learn') parser.add_argument('--interval', default=1000, type=int, help='number of iterations to evaluate') parser.add_argument('--batch_size', '-b', type=int, default=128, help='learning minibatch size') parser.add_argument('--lr', type=float, default=1e-4) parser.add_argument('--loaderjob', type=int, default=8) parser.add_argument('--hed', dest='hed', action='store_true', default=False) # parser.add_argument('--size', '-s', default=96, type=int, choices=[48, 64, 80, 96, 112, 128], # help='image size') parser.add_argument('--no-texture', dest='texture', action='store_false', default=True) parser.add_argument('--cbp', dest='cbp', action='store_true', default=False) parser.add_argument('--no-color_aug', dest='color_aug', action='store_false', default=True) parser.add_argument('--model_test', default='', type=str) parser.add_argument('--no-finetune', dest='finetune', action='store_false', default=True) parser.add_argument('--arch', default='googlenet', choices=[ 'texturecnn', 'resnet50', 'resnet101', 'googlenet', 'vgg', 'alex', 'trained', 'resume' ]) parser.add_argument('--opt', default='adam', choices=['adam', 'momentum']) parser.add_argument('--train_path', default='train_extracted_dataset.pkl') parser.add_argument('--test_path', default='test_extracted_dataset.pkl') parser.add_argument('--data_size', type=float, default=1.) parser.add_argument('--new', action='store_true', default=False) args = parser.parse_args() devices = tuple(args.gpus) # os.environ['PATH'] += ':/usr/local/cuda/bin' # log directory logger.init(args) # load data train_dataset = np.load(os.path.join(dataset_path, args.train_path)) test_dataset = np.load(os.path.join(dataset_path, args.test_path)) num_class = 2 image_size = 256 crop_size = 224 if 'extracted' in train_dataset: perm = np.random.permutation( len(train_dataset))[:int(len(train_dataset) * args.data_size)] train_dataset = [train_dataset[p] for p in perm] preprocess_type = args.arch if not args.hed else 'hed' if 'extracted' in args.train_path: train = CamelyonDatasetEx(train_dataset, original_size=image_size, crop_size=crop_size, aug=True, color_aug=args.color_aug, preprocess_type=preprocess_type) else: train = CamelyonDatasetFromTif(train_dataset, original_size=image_size, crop_size=crop_size, aug=True, color_aug=args.color_aug, preprocess_type=preprocess_type) if len(devices) > 1: train_iter = [ chainer.iterators.MultiprocessIterator(i, args.batch_size, n_processes=args.loaderjob) for i in chainer.datasets.split_dataset_n_random( train, len(devices)) ] else: train_iter = iterators.MultiprocessIterator(train, args.batch_size, n_processes=args.loaderjob) test = CamelyonDatasetEx(test_dataset, original_size=image_size, crop_size=crop_size, aug=False, color_aug=False, preprocess_type=preprocess_type) test_iter = iterators.MultiprocessIterator(test, args.batch_size, repeat=False, shuffle=False) # model construct if args.texture: model = BilinearCNN(base_cnn=args.arch, pretrained_model='auto', num_class=num_class, texture_layer=None, cbp=args.cbp, cbp_size=4096) else: model = TrainableCNN(base_cnn=args.arch, pretrained_model='auto', num_class=num_class) if args.model_test: # test # model_path = os.path.join('runs_16', args.model_test, 'models', # sorted(os.listdir(os.path.join('runs_16', args.model_test, 'models')))[-1]) # print(model_path) # chainer.serializers.load_npz(model_path, model) cuda.get_device_from_id(devices[0]).use() model.to_gpu() with chainer.using_config('train', False), chainer.no_backprop_mode(): evaluate_ex(model, test_iter, devices[0]) logger.flush() exit() if args.resume is not None: model_path = os.path.join( 'runs_16', args.resume, 'models', sorted(os.listdir(os.path.join('runs_16', args.resume, 'models')))[-1]) print(model_path) chainer.serializers.load_npz(model_path, model) # set optimizer optimizer = make_optimizer(model, args.opt, args.lr) if len(devices) > 1: updater = updaters.MultiprocessParallelUpdater(train_iter, optimizer, devices=devices) else: cuda.get_device_from_id(devices[0]).use() model.to_gpu() # updater updater = chainer.training.StandardUpdater(train_iter, optimizer, device=devices[0]) # start training start = time.time() train_loss = 0 train_accuracy = 0 while updater.iteration < args.iterations: # train updater.update() progress_report(updater.iteration, start, len(devices) * args.batch_size, len(train)) train_loss += model.loss.data train_accuracy += model.accuracy.data if updater.iteration % args.interval == 0: logger.plot('train_loss', cuda.to_cpu(train_loss) / args.interval) logger.plot('train_accuracy', cuda.to_cpu(train_accuracy) / args.interval) train_loss = 0 train_accuracy = 0 # test with chainer.using_config('train', False), chainer.no_backprop_mode(): evaluate_ex(model, test_iter, devices[0]) # logger logger.flush() # save serializers.save_npz(os.path.join(logger.out_dir, 'resume'), updater) if updater.iteration % 20000 == 0: if args.opt == 'adam': optimizer.alpha *= 0.1 else: optimizer.lr *= 0.1
train_iters = [ iterators.MultiprocessIterator(i, args.batchsize) for i in chainer.datasets.split_dataset_n_random(train_dataset, len(devices))] test_iter = iterators.MultiprocessIterator( test_dataset, args.batchsize, repeat=False, shuffle=False) chainer.config.cudnn_deterministic = True # To make sure reproduction chainer.config.train = True chainer.config.enable_backprop = True chainer.config.type_check = False chainer.config.autotune = True chainer.config.use_cudnn = 'always' chainer.config.show() updater = updaters.MultiprocessParallelUpdater(train_iters, opt, devices=devices) interval = (args.snapshot, 'epoch') trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=result_dir) trainer.extend(extensions.dump_graph('main/loss')) # Save parameters and optimization state trainer.extend(extensions.snapshot_object( model, 'epoch-{.updater.epoch}.model'), trigger=interval) trainer.extend(extensions.snapshot_object( opt, 'epoch-{.updater.epoch}.state'), trigger=interval) trainer.extend(extensions.snapshot(), trigger=interval) if args.opt == 'MomentumSGD' or args.opt == 'AdaGrad': trainer.reporter.add_observer('lr', opt.lr) trainer.extend(IntervalShift(