Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', default=20, type=int,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu0', '-g', default=0, type=int,
                        help='First GPU ID')
    parser.add_argument('--gpu1', '-G', default=1, type=int,
                        help='Second GPU ID')
    parser.add_argument('--out', '-o', default='result_parallel',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', default=1000, type=int,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}, {}'.format(args.gpu0, args.gpu1))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # See train_mnist.py for the meaning of these lines

    model = L.Classifier(ParallelMLP(args.unit, 10, args.gpu0, args.gpu1))
    chainer.backends.cuda.get_device_from_id(args.gpu0).use()

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train, test = chainer.datasets.get_mnist()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, device=args.gpu0)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
def main():
    # 辞書の読み込み
    dictionary = corpus.get_dictionary(create_flg=False)
    # 記事の読み込み
    contents = corpus.get_contents()

    # 特徴抽出
    data_train = []
    label_train = []
    for file_name, content in contents.items():
        data_train.append(corpus.get_vector(dictionary, content))
        label_train.append(corpus.get_class_id(file_name))

    data_train_s, data_test_s, label_train_s, label_test_s = train_test_split(data_train, label_train, test_size=0.5)

    N_test = len(data_test_s)         # test data size
    N = len(data_train_s)             # train data size
    in_units = len(data_train_s[0])  # 入力層のユニット数 (語彙数)

    n_units = 1000 # 隠れ層のユニット数
    n_label = 9    # 出力層のユニット数

    #モデルの定義
    model = L.Classifier(MLP(in_units, n_units, n_label))

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    np_data_train_s = np.array(data_train_s, dtype=np.float32)
    np_label_train_s = np.array(label_train_s, dtype=np.int32)
    np_data_test_s = np.array(data_test_s, dtype=np.float32)
    np_label_test_s = np.array(label_test_s, dtype=np.int32)

    train_iter = chainer.iterators.SerialIterator(tuple_dataset.TupleDataset(np_data_train_s, np_label_train_s), 100)
    test_iter = chainer.iterators.SerialIterator(tuple_dataset.TupleDataset(np_data_test_s, np_label_test_s), 100, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=-1)
    trainer = training.Trainer(updater, (20, 'epoch'), out='result')

    trainer.extend(extensions.Evaluator(test_iter, model, device=-1))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot())
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy']))
    trainer.extend(extensions.ProgressBar())
    trainer.run()
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description="Chainer example: MNIST")
    parser.add_argument("--batchsize", "-b", type=int, default=100, help="Number of images in each mini batch")
    parser.add_argument("--epoch", "-e", default=20, type=int, help="Number of sweeps over the dataset to train")
    parser.add_argument("--gpu0", "-g", default=0, type=int, help="First GPU ID")
    parser.add_argument("--gpu1", "-G", default=1, type=int, help="Second GPU ID")
    parser.add_argument("--out", "-o", default="result_parallel", help="Directory to output the result")
    parser.add_argument("--resume", "-r", default="", help="Resume the training from snapshot")
    parser.add_argument("--unit", "-u", default=1000, type=int, help="Number of units")
    args = parser.parse_args()

    print("GPU: {}, {}".format(args.gpu0, args.gpu1))
    print("# unit: {}".format(args.unit))
    print("# Minibatch-size: {}".format(args.batchsize))
    print("# epoch: {}".format(args.epoch))
    print("")

    # See train_mnist.py for the meaning of these lines

    model = L.Classifier(ParallelMLP(784, args.unit, 10, args.gpu0, args.gpu1))
    chainer.cuda.get_device(args.gpu0).use()

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train, test = chainer.datasets.get_mnist()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu0)
    trainer = training.Trainer(updater, (args.epoch, "epoch"), out=args.out)

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0))
    trainer.extend(extensions.dump_graph("main/loss"))
    trainer.extend(extensions.snapshot())
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport(
            ["epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy"]
        )
    )
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
def main():
    train,test=chainer.datasets.get_mnist()
    train_iter=chainer.iterators.SerialIterator(train,100)
    test_iter=chainer.iterators.SerialIterator(test,100,repeat=False,shuffle=False)

    model=L.Classifier(MLP(784,10))
    optimizer=chainer.optimizers.SGD()
    optimizer.setup(model)
    updater=training.StandardUpdater(train_iter,optimizer,device=-1)
    trainer=training.Trainer(updater,(500,'epoch'),out='result_')

    trainer.extend(extensions.Evaluator(test_iter,model,device=-1))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(),trigger=(100,'iteration'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch','main/loss','validation/main/loss','main/accuracy','validation/main/acuracy']))
    trainer.extend(extensions.ProgressBar())
    trainer.run()
Ejemplo n.º 5
0
def run(datasetPath, resultPath, modelPath="", resumePath=""):
    # set dataset
    if isinstance(datasetPath, str):
        ds = datasetVOC(datasetPath, 32)
    elif isinstance(datasetPath, list):
        ds = datasetVOCs(datasetPath, 32)
    else:
        raise Exception("データセットパスの型が不正です。")
    train, test = ds.getDataset()

    # set model
    model = chainer.links.Classifier(Alex())
    if os.path.isfile(modelPath):
        chainer.serializers.load_npz(modelPath, model)
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # set evaluation model
    eval_model = model.copy()
    eval_model.train = False

    # train and test
    train_iter = chainer.iterators.SerialIterator(train, BATCH_SIZE)
    test_iter = chainer.iterators.SerialIterator(test, BATCH_SIZE, repeat=False, shuffle=False)
    updater = chainer.training.StandardUpdater(train_iter, optimizer, device=-1)
    trainer = chainer.training.Trainer(updater, (EPOCH, "epoch"), out=resultPath)
    trainer.extend(extensions.Evaluator(test_iter, eval_model, device=-1))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport(
            ["epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy"]
        )
    )
    trainer.extend(extensions.ProgressBar(update_interval=5))
    trainer.extend(extensions.snapshot(filename="snapshot_epoch_{.updater.epoch}"))
    trainer.extend(extensions.snapshot_object(model, filename="model_epoch_{.updater.epoch}"))
    trainer.extend(extensions.dump_graph("main/loss"))
    if os.path.isfile(resumePath):
        chainer.serializers.load_npz(resumePath, trainer)
    trainer.run()
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(description='MNIST tutorial')
    parser.add_argument('--model', default='SLP', help='Model to use')
    parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train')
    parser.add_argument('--out', '-o', default='result', help='Directory to output the result')
    parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU to use')
    args = parser.parse_args()

    model = L.Classifier(models[args.model]())

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train, test = chainer.datasets.get_mnist()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png'))
    trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png'))
    trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: Faster R-CNN')
    parser.add_argument('--dataset', choices=('voc07', 'voc0712'),
                        help='The dataset to use: VOC07, VOC07+12',
                        default='voc07')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--step_size', '-ss', type=int, default=50000)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    args = parser.parse_args()

    np.random.seed(args.seed)

    if args.dataset == 'voc07':
        train_data = VOCBboxDataset(split='trainval', year='2007')
    elif args.dataset == 'voc0712':
        train_data = ConcatenatedDataset(
            VOCBboxDataset(year='2007', split='trainval'),
            VOCBboxDataset(year='2012', split='trainval'))
    test_data = VOCBboxDataset(split='test', year='2007',
                               use_difficult=True, return_difficult=True)
    faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names),
                                  pretrained_model='imagenet')
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    train_data = TransformDataset(train_data, Transform(faster_rcnn))

    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, batch_size=1, n_processes=None, shared_mem=100000000)
    test_iter = chainer.iterators.SerialIterator(
        test_data, batch_size=1, repeat=False, shuffle=False)
    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, device=args.gpu)

    trainer = training.Trainer(
        updater, (args.iteration, 'iteration'), out=args.out)

    trainer.extend(
        extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'),
        trigger=(args.iteration, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.step_size, 'iteration'))

    log_interval = 20, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 20, 'iteration'

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport(
        ['iteration', 'epoch', 'elapsed_time', 'lr',
         'main/loss',
         'main/roi_loc_loss',
         'main/roi_cls_loss',
         'main/rpn_loc_loss',
         'main/rpn_cls_loss',
         'validation/main/map',
         ]), trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss'],
                file_name='loss.png', trigger=plot_interval
            ),
            trigger=plot_interval
        )

    trainer.extend(
        DetectionVOCEvaluator(
            test_iter, model.faster_rcnn, use_07_metric=True,
            label_names=voc_bbox_label_names),
        trigger=ManualScheduleTrigger(
            [args.step_size, args.iteration], 'iteration'))

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
def main():
    archs = {
        'alex': alex.Alex,
        'alex_fp16': alex.AlexFp16,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16,
        'nin': nin.NIN
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch', '-a', choices=archs.keys(),
                        default='nin', help='Convnet architecture')
    parser.add_argument('--batchsize', '-B', type=int, default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch', '-E', type=int, default=10,
                        help='Number of epochs to train')
    parser.add_argument('--gpus', '-g', type=int, nargs="*",
                        default=[0, 1, 2, 3])
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob', '-j', type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean', '-m', default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume', '-r', default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--root', '-R', default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize', '-b', type=int, default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # Initialize the model to train
    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    # Load the datasets and mean file
    mean = np.load(args.mean)
    train = train_imagenet.PreprocessedDataset(
        args.train, args.root, mean, model.insize)
    val = train_imagenet.PreprocessedDataset(
        args.val, args.root, mean, model.insize, False)
    # These iterators load the images with subprocesses running in parallel to
    # the training/validation.
    devices = tuple(args.gpus)

    train_iters = [
        chainer.iterators.MultiprocessIterator(i,
                                               args.batchsize,
                                               n_processes=args.loaderjob)
        for i in chainer.datasets.split_dataset_n_random(train, len(devices))]
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)

    # Set up an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    # Set up a trainer
    updater = updaters.MultiprocessParallelUpdater(train_iters, optimizer,
                                                   devices=devices)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    if args.test:
        val_interval = 5, 'epoch'
        log_interval = 1, 'epoch'
    else:
        val_interval = 100000, 'iteration'
        log_interval = 1000, 'iteration'

    trainer.extend(train_imagenet.TestModeEvaluator(val_iter, model,
                                                    device=args.gpus[0]),
                   trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=2))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
def main():
    # Introduce argparse for clarity and organization.
    # Starting to use higher capacity models, thus set up for GPU.
    parser = argparse.ArgumentParser(description='Chainer-Tutorial: MLP')
    parser.add_argument('--batch_size', '-b', type=int, default=128,
                        help='Number of samples in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=100,
                        help='Number of times to train on data set')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID: -1 indicates CPU')
    parser.add_argument('--frequency', '-f', type=int, default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    args = parser.parse_args()

    # Load mnist data
    # http://docs.chainer.org/en/latest/reference/datasets.html
    train, test = chainer.datasets.get_mnist()

    # Define iterators.
    train_iter = chainer.iterators.SerialIterator(train, args.batch_size)
    test_iter = chainer.iterators.SerialIterator(test, args.batch_size,
                                                 repeat=False, shuffle=False)

    # Initialize model: Loss function defaults to softmax_cross_entropy.
    # 784 is dimension of the inputs, 625 is n_units in hidden layer
    # and 10 is the output dimension.
    model = L.Classifier(ModernMLP(625, 10))

    # Set up GPU usage if necessary. args.gpu is a condition as well as an
    # identification when passed to get_device().
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    # Define optimizer (SGD, Adam, RMSprop, etc)
    # http://docs.chainer.org/en/latest/reference/optimizers.html
    # RMSprop default parameter setting:
    # lr=0.01, alpha=0.99, eps=1e-8
    optimizer = chainer.optimizers.RMSprop()
    optimizer.setup(model)

    # Set up trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'))

    # Evaluate the model at end of each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Helper functions (extensions) to monitor progress on stdout.
    report_params = [
        'epoch',
        'main/loss',
        'validation/main/loss',
        'main/accuracy',
        'validation/main/accuracy',
        'elapsed_time'
    ]
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(report_params))
    trainer.extend(extensions.ProgressBar())

    # Here we add a bit more boiler plate code to help in output of useful
    # information in related to training. Very intuitive and great for post
    # analysis.
    # source:
    # https://github.com/pfnet/chainer/blob/master/examples/mnist/train_mnist.py

    # Take a snapshot for each specified epoch
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss', 'validation/main/loss'],
                'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    if args.resume:
        # Resume from a snapshot (NumPy NPZ format and HDF5 format available)
        # http://docs.chainer.org/en/latest/reference/serializers.html
        chainer.serializers.load_npz(args.resume, trainer)

    # Run trainer
    trainer.run()
Ejemplo n.º 10
0
def main():
    # Check if GPU is available
    # (ImageNet example does not support CPU execution)
    if not chainer.cuda.available:
        raise RuntimeError("ImageNet requires GPU support.")

    archs = {
        'alex': alex.Alex,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'nin': nin.NIN,
        'resnet50': resnet50.ResNet50,
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin',
                        help='Convnet architecture')
    parser.add_argument('--batchsize', '-B', type=int, default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch', '-E', type=int, default=10,
                        help='Number of epochs to train')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob', '-j', type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean', '-m', default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume', '-r', default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--root', '-R', default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize', '-b', type=int, default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.add_argument('--communicator', default='hierarchical')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # Prepare ChainerMN communicator.
    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank

    if comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(comm.size))
        print('Using {} communicator'.format(args.communicator))
        print('Using {} arch'.format(args.arch))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    chainer.cuda.get_device_from_id(device).use()  # Make the GPU current
    model.to_gpu()

    # Split and distribute the dataset. Only worker 0 loads the whole dataset.
    # Datasets of worker 0 are evenly split and distributed to all workers.
    mean = np.load(args.mean)
    if comm.rank == 0:
        train = PreprocessedDataset(args.train, args.root, mean, model.insize)
        val = PreprocessedDataset(
            args.val, args.root, mean, model.insize, False)
    else:
        train = None
        val = None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    val = chainermn.scatter_dataset(val, comm)

    # We need to change the start method of multiprocessing module if we are
    # using InfiniBand and MultiprocessIterator. This is because processes
    # often crash when calling fork if they are using Infiniband.
    # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning )
    multiprocessing.set_start_method('forkserver')
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, n_processes=args.loaderjob)
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)

    # Create a multi node optimizer from a standard Chainer optimizer.
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9), comm)
    optimizer.setup(model)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    checkpoint_interval = (10, 'iteration') if args.test else (1, 'epoch')
    val_interval = (10, 'iteration') if args.test else (1, 'epoch')
    log_interval = (10, 'iteration') if args.test else (1, 'epoch')

    checkpointer = chainermn.create_multi_node_checkpointer(
        name='imagenet-example', comm=comm)
    checkpointer.maybe_load(trainer, optimizer)
    trainer.extend(checkpointer, trigger=checkpoint_interval)

    # Create a multi node evaluator from an evaluator.
    evaluator = TestModeEvaluator(val_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator, trigger=val_interval)

    # Some display and output extensions are necessary only for one worker.
    # (Otherwise, there would just be repeated outputs.)
    if comm.rank == 0:
        trainer.extend(extensions.dump_graph('main/loss'))
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'validation/main/loss',
            'main/accuracy', 'validation/main/accuracy', 'lr'
        ]), trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Ejemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser(
        description='chainer line drawing colorization')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=2,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=6,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--dataset',
                        '-i',
                        default='./images/',
                        help='Directory of image files.')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--seed', type=int, default=0, help='Random seed')
    parser.add_argument('--snapshot_interval',
                        type=int,
                        default=10000,
                        help='Interval of snapshot')
    parser.add_argument('--display_interval',
                        type=int,
                        default=3,
                        help='Interval of displaying log to console')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    root = args.dataset
    #model = "./model_paint"

    cnn = unet.UNET()
    #serializers.load_npz("result/model_iter_10000", cnn)

    dis = unet.DIS()
    #serializers.load_npz("result/model_dis_iter_20000", dis)

    l = lnet.LNET()
    #serializers.load_npz("models/liner_f", l)

    dataset = Image2ImageDataset("dat/images_color_train.dat",
                                 root + "line/",
                                 root + "color/",
                                 train=True)
    # dataset.set_img_dict(img_dict)
    train_iter = chainer.iterators.SerialIterator(dataset, args.batchsize)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        cnn.to_gpu()  # Copy the model to the GPU
        dis.to_gpu()  # Copy the model to the GPU
        l.to_gpu()

    # Setup optimizer parameters.
    opt = optimizers.Adam(alpha=0.0001)
    opt.setup(cnn)
    opt.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_cnn')

    opt_d = chainer.optimizers.Adam(alpha=0.0001)
    opt_d.setup(dis)
    opt_d.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_dec')

    # Set up a trainer
    updater = ganUpdater(
        models=(cnn, dis, l),
        iterator={
            'main': train_iter,
            #'test': test_iter
        },
        optimizer={
            'cnn': opt,
            'dis': opt_d
        },
        device=args.gpu)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    snapshot_interval = (args.snapshot_interval, 'iteration')
    snapshot_interval2 = (args.snapshot_interval * 2, 'iteration')
    trainer.extend(extensions.dump_graph('cnn/loss'))
    trainer.extend(extensions.snapshot(), trigger=snapshot_interval2)
    trainer.extend(extensions.snapshot_object(
        cnn, 'cnn_128_iter_{.updater.iteration}'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        dis, 'cnn_128_dis_iter_{.updater.iteration}'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(opt, 'optimizer_'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=(10, 'iteration'), ))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'cnn/loss', 'cnn/loss_rec', 'cnn/loss_adv',
            'cnn/loss_tag', 'cnn/loss_l', 'dis/loss'
        ]))
    trainer.extend(extensions.ProgressBar(update_interval=20))

    trainer.run()

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Save the trained model
    out_dir = args.out
    chainer.serializers.save_npz(os.path.join(out_dir, 'model_final'), cnn)
    chainer.serializers.save_npz(os.path.join(out_dir, 'optimizer_final'), opt)
Ejemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=1,
                        help='Number of images in each mini batch')
    parser.add_argument('--epoch', '-e', type=int, default=40,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=10,
                        help='Number of units')
    args = parser.parse_args()

    # load csv
    n_in = 32*32

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    model = L.Classifier(MLP(n_in, args.unit, 3))

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load dataset from CSV
    csv = pd.read_csv('csv/images-data.csv')

    dd = []

    for file, label in zip(csv['file'], csv['label']):

        print file, label

        # load a color image
        img = cv2.imread(file, cv2.IMREAD_COLOR)

        # color -> grayscale
        imggray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

        # image -> array
        gray = []

        for y in range(len(imggray)):
            for x in range(len(imggray[y])):
                gray.append(imggray[y][x])

        imgdata = np.array(gray, dtype='f')
        imgdata = imgdata.reshape(1, 1, 32, 32)
        imgdata = imgdata / 255.0

        # set dataset
        x = imgdata
        y = np.array(label, dtype=np.int32)
        dataset = (x, y)

        dd.append(dataset)

    train, test = dd, dd

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot())

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy']))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    # Resume from a snapshot
    #chainer.serializers.load_npz(resume, trainer)

    # Run the training
    trainer.run()

    # Save Model
    serializers.save_npz('model/simple-3layer-perceptron.model', model)
    serializers.save_npz('model/simple-3layer-perceptron.state', optimizer)

    # Predictor
    xx = Variable(np.array([dd[1][0],]), volatile=True)
    y = model.predictor(xx)
    print y.data
    print np.argmax(y.data)
Ejemplo n.º 13
0
def main():
    # This script is almost identical to train_mnist.py. The only difference is
    # that this script uses data-parallel computation on two GPUs.
    # See train_mnist.py for more details.
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=400,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu0', '-g', type=int, default=0,
                        help='First GPU ID')
    parser.add_argument('--gpu1', '-G', type=int, default=1,
                        help='Second GPU ID')
    parser.add_argument('--out', '-o', default='result_parallel',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}, {}'.format(args.gpu0, args.gpu1))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    chainer.cuda.get_device(args.gpu0).use()

    model = L.Classifier(train_mnist.MLP(args.unit, 10))
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train, test = chainer.datasets.get_mnist()
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    # ParallelUpdater implements the data-parallel gradient computation on
    # multiple GPUs. It accepts "devices" argument that specifies which GPU to
    # use.
    updater = training.ParallelUpdater(
        train_iter,
        optimizer,
        # The device of the name 'main' is used as a "master", while others are
        # used as slaves. Names other than 'main' are arbitrary.
        devices={'main': args.gpu0, 'second': args.gpu1},
    )
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy']))
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Ejemplo n.º 14
0
def main():

    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=32,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=30,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--centerloss',
                        '-c',
                        action='store_true',
                        default=False,
                        help='Use center loss')
    parser.add_argument('--alpha_ratio',
                        '-a',
                        type=float,
                        default=0.5,
                        help='alpha ratio')
    parser.add_argument('--lambda_ratio',
                        '-l',
                        type=float,
                        default=0.1,
                        help='lambda ratio')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    args = parser.parse_args()

    logger = setup_logger(__name__)
    logger.info("GPU: {}".format(args.gpu))
    logger.info("# Minibatch-size: {}".format(args.batchsize))
    logger.info("# epoch: {}".format(args.epoch))
    logger.info("Calculate center loss: {}".format(args.centerloss))
    if args.centerloss:
        logger.info('# alpha: {}'.format(args.alpha_ratio))
        logger.info('# lambda: {}'.format(args.lambda_ratio))

    NUM_CLASSES = 10

    model = LeNets(
        out_dim=NUM_CLASSES,
        alpha_ratio=args.alpha_ratio,
        lambda_ratio=args.lambda_ratio,
        is_center_loss=args.centerloss,
    )

    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist(ndim=3)

    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize,
                                                        n_processes=4)
    test_iter = chainer.iterators.MultiprocessIterator(test,
                                                       args.batchsize,
                                                       n_processes=4,
                                                       repeat=False,
                                                       shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot for each specified epoch
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'validation/main/loss',
            'main/accuracy', 'validation/main/accuracy', 'elapsed_time'
        ]))

    # Visualize Deep Features
    trainer.extend(VisualizeDeepFeature(train[:10000], NUM_CLASSES,
                                        args.centerloss),
                   trigger=(1, 'epoch'))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Ejemplo n.º 15
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result_u100',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=100,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    model = mlp.MLP(args.unit, 10)
    classifier_model = L.Classifier(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()  # Make a specified GPU current
        classifier_model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(classifier_model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, classifier_model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot at each epoch
    #trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.snapshot(), trigger=(1, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))

    if extensions.PlotReport.available():
        # Plot graph for loss for each epoch
        trainer.extend(extensions.PlotReport(
            ['main/loss', 'validation/main/loss'],
            x_key='epoch', file_name='loss.png'))
        trainer.extend(extensions.PlotReport(
            ['main/accuracy', 'validation/main/accuracy'],
            x_key='epoch',
            file_name='accuracy.png'))
        # Print a progress bar to stdout
    #trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
    serializers.save_npz('{}/mlp.model'.format(args.out), model)
    serializers.save_npz('{}/clf.model'.format(args.out), classifier_model)
Ejemplo n.º 16
0
def main():
    """training script

    This loads specified configuration file from config/ directory.
    Multi-GPU is not supported. If you want, then resort to ChainerMN.
    """

    # commandline arguments
    parser = argparse.ArgumentParser()
    # configuration file
    parser.add_argument('config', type=str)
    # training
    parser.add_argument('--gpu', '-g', type=int, default=0)
    parser.add_argument('--loader_threads', '-l', type=int, default=4)
    parser.add_argument('--out', '-o', default='./result/')
    # util
    parser.add_argument('--wait', type=int)
    args = parser.parse_args()

    if args.config.endswith('.py'):
        args.config = args.config[:-3]

    # setup output directory
    prefix = os.path.join(args.out, args.config)
    cnt = len(glob.glob(prefix + '-*'))
    while True:
        output_dir = prefix + '-' + str(cnt).rjust(2, '0')
        try:
            os.makedirs(output_dir)
        except FileExistsError:
            cnt += 1
        else:
            break

    # load config
    config = importlib.import_module('.'.join(args.config.split('/')))

    # save config
    with open(args.config + '.py', 'r') as f:
        with open(os.path.join(output_dir, 'config.py'), 'w') as wf:
            for line in f:
                wf.write(line)

    # check whether config has required information
    for name in ('batchsize', 'dataset', 'epoch', 'mode', 'model', 'optimizer'):
        assert hasattr(config, name), \
            'Configuration file do not have attribute {}!'.format(name)

    # wait until specified process finish
    # this works as a pseudo job scheduler
    # Linux only
    pid = args.wait
    if pid is not None:
        while os.path.exists('/proc/{}'.format(pid)):
            time.sleep(1)

    # set up GPU
    gpu = args.gpu
    if gpu >= 0:
        # if non negative GPU id is specified: use specified GPU
        # else (e.g. -1): use CPU
        chainer.cuda.get_device_from_id(gpu).use()
        chainer.cuda.set_max_workspace_size(1 * 1024 * 1024 * 1024)
    else:
        raise ValueError('currently, execution on CPU is not supported')
    chainer.global_config.autotune = True

    # set up model
    model = config.model
    if args.gpu >= 0:
        model.to_gpu()

    # get iterator of dataset
    train_dataset, val_dataset = config.dataset
    if args.loader_threads > 1:
        train_iter = chainer.iterators.MultiprocessIterator(
            train_dataset, config.batchsize, n_processes=args.loader_threads)
        val_iter = chainer.iterators.MultiprocessIterator(
            val_dataset, config.batchsize, repeat=False, n_processes=args.loader_threads)
    else:
        train_iter = chainer.iterators.SerialIterator(
            train_dataset, config.batchsize)
        val_iter = chainer.iterators.SerialIterator(
            val_dataset, config.batchsize, repeat=False)

    # set up optimizer
    # optimizer means SGD algorithms like momentum SGD
    optimizer = config.optimizer
    optimizer.setup(model)
    for hook in getattr(config, 'hook', []):
        # hook is called before optimizer's update
        # weight decay is one of the most common optimizer hook
        optimizer.add_hook(hook)

    # updater is a Chainer's training utility
    # this does the following at every iteration:
    # 1) prepare mini-batch from data iterator
    # 2) run forward and backward computation
    # 3) call optimizer (e.g. calculation of Adam)
    # 4) update parameter
    updater = chainer.training.StandardUpdater(train_iter, optimizer, device=gpu)

    # trainer is a manager class of training
    # this invokes updater every iteration
    # this also calls extensions added later at every specified interval
    trainer = chainer.training.Trainer(updater, (config.epoch, 'epoch'), output_dir)

    # evaluator calculates accuracy and loss with network on test mode
    # usually, validation data is used for val_iter
    # in this example, I just set test data for simplicity (not recommended)
    val_interval = (1, 'epoch')
    evaluator = extensions.Evaluator(val_iter, model, device=gpu)
    trainer.extend(evaluator, trigger=val_interval, name='val')
    trainer.extend(extensions.dump_graph('main/loss'))

    #
    # additional extensions
    # learning rate scheduling is set here
    for extension, trigger in getattr(config, 'extension', []):
        trainer.extend(extension, trigger=trigger)

    # log file will be added in a result directory
    log_report_ext = extensions.LogReport(trigger=val_interval)
    trainer.extend(log_report_ext)

    # write progress of training to standard output
    trainer.extend(extensions.PrintReport([
        'elapsed_time', 'epoch', 'main/loss', 'val/main/loss',
        'main/accuracy', 'val/main/accuracy'
    ]), trigger=val_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    # keep snapshot of trained model for later use like evaluation against adversarial attacks
    trainer.extend(Snapshot(), trigger=(config.epoch, 'epoch'))

    # my implementation switches its behavior depending on chainer's config
    # for details on training modes, please read codes under src/ directory
    for mode in config.mode:
        setattr(chainer.config, mode, True)

    # this is a training loop
    trainer.run()

    # training is over
    print('Result: ', output_dir, flush=True)
Ejemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: VAE')
    parser.add_argument('--initmodel',
                        '-m',
                        default='',
                        help='Initialize the model from given file')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the optimization from snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        default=-1,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--epoch',
                        '-e',
                        default=100,
                        type=int,
                        help='number of epochs to learn')
    parser.add_argument('--dim-hidden',
                        '-u',
                        default=500,
                        type=int,
                        help='dimention of hidden layers')
    parser.add_argument('--dimz',
                        '-z',
                        default=20,
                        type=int,
                        help='dimention of encoded vector')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='learning minibatch size')
    parser.add_argument('--test',
                        action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.add_argument('--vqvae', action='store_true', help='Use VQVAE')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# dim z: {}'.format(args.dimz))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Prepare VAE model, defined in net.py
    if args.vqvae:
        model = net.VQVAE(784, args.dimz, args.dim_hidden)
    else:
        model = net.VAE(784, args.dimz, args.dim_hidden)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam(1e-4)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(5.))

    # Initialize
    if args.initmodel:
        chainer.serializers.load_npz(args.initmodel, model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist(withlabel=False)
    if args.test:
        train, _ = chainer.datasets.split_dataset(train, 100)
        test, _ = chainer.datasets.split_dataset(test, 100)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # Set up an updater. StandardUpdater can explicitly specify a loss function
    # used in the training with 'loss_func' option
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       device=args.gpu,
                                       loss_func=model.get_loss_func())

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(
        extensions.Evaluator(test_iter,
                             model,
                             device=args.gpu,
                             eval_func=model.get_loss_func(k=10)))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/rec_loss',
            'validation/main/rec_loss', 'main/other_loss',
            'validation/main/other_loss', 'elapsed_time'
        ]))
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    @chainer.training.make_extension()
    def confirm_images(trainer):
        # Visualize the results
        def save_images(x, filename):
            import matplotlib.pyplot as plt
            fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100)
            for ai, xi in zip(ax.flatten(), x):
                ai.imshow(xi.reshape(28, 28))
            fig.savefig(filename)
            plt.close()

        model.to_cpu()
        train_ind = [1, 3, 5, 10, 2, 0, 13, 15, 17]
        x = chainer.Variable(np.asarray(train[train_ind]))
        with chainer.using_config('train', False), chainer.no_backprop_mode():
            x1 = model(x)
        save_images(
            x.data,
            os.path.join(args.out,
                         '{.updater.iteration}_train'.format(trainer)))
        save_images(
            x1.data,
            os.path.join(
                args.out,
                '{.updater.iteration}_train_reconstructed'.format(trainer)))

        test_ind = [3, 2, 1, 18, 4, 8, 11, 17, 61]
        x = chainer.Variable(np.asarray(test[test_ind]))
        with chainer.using_config('train', False), chainer.no_backprop_mode():
            x1 = model(x)
        save_images(
            x.data,
            os.path.join(args.out,
                         '{.updater.iteration}_test'.format(trainer)))
        save_images(
            x1.data,
            os.path.join(
                args.out,
                '{.updater.iteration}_test_reconstructed'.format(trainer)))

        # draw images from randomly sampled z
        if args.vqvae:
            z = model.sample(size=9)
        else:
            z = chainer.Variable(
                np.random.normal(0, 1, (9, args.dimz)).astype(np.float32))
        x = model.decode(z)
        save_images(
            x.data,
            os.path.join(args.out,
                         '{.updater.iteration}_sampled'.format(trainer)))

        if args.gpu >= 0:
            chainer.cuda.get_device_from_id(args.gpu).use()
            model.to_gpu()

    trainer.extend(confirm_images, trigger=(args.epoch // 10, 'epoch'))

    # Run the training
    trainer.run()
Ejemplo n.º 18
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=5,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--frequency', '-f', type=int, default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='output',
                        help='Directory to output the graph descriptor and sample test data')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=100,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    os.makedirs(args.out, exist_ok=True)

    # Set up a neural network to train
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    model = L.Classifier(MLP(args.unit, 10))
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=os.path.join(args.out, 'chainer_model'))

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot for each specified epoch
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()

    # conversion

    print('Transpiling model to WebDNN graph descriptor')

    example_input = numpy.expand_dims(train[0][0], axis=0)  # example input (anything ok, (batch_size, 784))
    x = chainer.Variable(example_input)
    y = model.predictor(x)  # run model (without softmax)
    graph = ChainerGraphConverter().convert_from_inout_vars([x], [y])  # convert graph to intermediate representation
    for backend in ["webgpu", "webassembly", "fallback"]:
        try:
            exec_info = generate_descriptor(backend, graph)
            exec_info.save(args.out)
        except Exception as ex:
            print(f"Failed generating descriptor for backend {backend}: {str(ex)}\n")
        else:
            print(f"Backend {backend} ok\n")

    print('Exporting test samples (for demo purpose)')
    test_samples_json = []
    for i in range(10):
        image, label = test[i]
        test_samples_json.append({'x': image.tolist(), 'y': int(label)})
    with open(os.path.join(args.out, 'test_samples.json'), 'w') as f:
        json.dump(test_samples_json, f)
def main():
    parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
    parser.add_argument('--dataset',
                        '-d',
                        default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate',
                        '-l',
                        type=float,
                        default=0.05,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    if args.dataset == 'cifar10':
        print('Using CIFAR10 dataset.')
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == 'cifar100':
        print('Using CIFAR100 dataset.')
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')
    model = L.Classifier(models.VGG.VGG(class_labels))
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)
    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Reduce the learning rate by half every 25 epochs.
    trainer.extend(extensions.ExponentialShift('lr', 0.5),
                   trigger=(25, 'epoch'))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Ejemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--dataset',
                        '-d',
                        default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--model', '-m', type=str, default=None)
    parser.add_argument('--opt', type=str, default=None)
    parser.add_argument('--epoch', '-e', type=int, default=40)
    parser.add_argument('--looptimes', '-t', type=int, default=5)
    parser.add_argument('--lr', '-l', type=float, default=0.01)
    parser.add_argument('--batch', '-b', type=int, default=128)
    parser.add_argument('--noplot',
                        dest='plot',
                        action='store_false',
                        help='Disable PlotReport extension')
    args = parser.parse_args()

    if args.dataset == 'cifar10':
        print('Using CIFAR10 dataset.')
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == 'cifar100':
        print('Using CIFAR100 dataset.')
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')

    # Set up a neural network to train.
    model = L.Classifier(
        network.LocalPCN(class_labels=class_labels, LoopTimes=args.looptimes))

    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = optimizers.NesterovAG(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(1e-3))

    num_train_samples = 45000
    train_iter = iterators.SerialIterator(train[:num_train_samples],
                                          batch_size=args.batch,
                                          shuffle=True)
    test_iter = iterators.SerialIterator(train[num_train_samples:],
                                         batch_size=args.batch,
                                         repeat=False,
                                         shuffle=False)

    if args.model != None:
        print("loading model from " + args.model)
        serializers.load_npz(args.model, model)

    if args.opt != None:
        print("loading opt from " + args.opt)
        serializers.load_npz(args.opt, optimizer)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out='results')

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
    trainer.extend(extensions.LogReport(trigger=(10, 'iteration')))
    trainer.extend(extensions.observe_lr(), trigger=(10, 'iteration'))

    # Schedule of a learning rate (LinearShift)
    trainer.extend(
        extensions.LinearShift('lr', (args.lr, args.lr * 0.1),
                               (args.epoch * 0.5, args.epoch * 0.5 + 1)),
        trigger=(1, 'epoch'))

    # Save two plot images to the result dir
    if args.plot and extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    trainer.extend(extensions.PrintReport([
        'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
        'validation/main/accuracy', 'lr', 'elapsed_time'
    ]),
                   trigger=(1, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=1))

    #Plot computation graph
    trainer.extend(extensions.dump_graph('main/loss'))

    # Train
    trainer.run()

    # Save results
    modelname = "./results/model"
    print("saving model to " + modelname)
    serializers.save_npz(modelname, model)

    optimizername = "./results/optimizer"
    print("saving optimizer to " + optimizername)
    serializers.save_npz(optimizername, optimizer)
Ejemplo n.º 21
0
def main():
    parser = argparse.ArgumentParser(description="Chainer CIFAR example:")
    parser.add_argument("--dataset", "-d", default="cifar10", help="The dataset to use: cifar10 or cifar100")
    parser.add_argument("--batchsize", "-b", type=int, default=128, help="Number of images in each mini-batch")
    parser.add_argument("--epoch", "-e", type=int, default=300, help="Number of sweeps over the dataset to train")
    parser.add_argument("--gpu", "-g", type=int, default=0, help="GPU ID (negative value indicates CPU)")
    parser.add_argument("--out", "-o", default="result", help="Directory to output the result")
    parser.add_argument("--resume", "-r", default="", help="Resume the training from snapshot")
    args = parser.parse_args()

    print("GPU: {}".format(args.gpu))
    print("# Minibatch-size: {}".format(args.batchsize))
    print("# epoch: {}".format(args.epoch))
    print("")

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    if args.dataset == "cifar10":
        print("Using CIFAR10 dataset.")
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == "cifar100":
        print("Using CIFAR100 dataset.")
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError("Invalid dataset choice.")
    model = L.Classifier(models.VGG.VGG(class_labels))
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(0.1)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)
    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, "epoch"), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu))

    # Reduce the learning rate by half every 25 epochs.
    trainer.extend(extensions.ExponentialShift("lr", 0.5), trigger=(25, "epoch"))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph("main/loss"))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, "epoch"))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(
        extensions.PrintReport(
            ["epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy", "elapsed_time"]
        )
    )

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Ejemplo n.º 22
0
def main():
    # Check if GPU is available
    # (ImageNet example does not support CPU execution)
    if not chainer.cuda.available:
        raise RuntimeError("ImageNet requires GPU support.")

    archs = {
        'alex': alex.Alex,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'nin': nin.NIN,
        'resnet50': resnet50.ResNet50,
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch',
                        '-a',
                        choices=archs.keys(),
                        default='nin',
                        help='Convnet architecture')
    parser.add_argument('--batchsize',
                        '-B',
                        type=int,
                        default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch',
                        '-E',
                        type=int,
                        default=10,
                        help='Number of epochs to train')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob',
                        '-j',
                        type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean',
                        '-m',
                        default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--root',
                        '-R',
                        default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize',
                        '-b',
                        type=int,
                        default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.add_argument('--communicator', default='hierarchical')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # Prepare ChainerMN communicator.
    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank

    if comm.mpi_comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(comm.size))
        print('Using {} communicator'.format(args.communicator))
        print('Using {} arch'.format(args.arch))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    chainer.cuda.get_device(device).use()  # Make the GPU current
    model.to_gpu()

    # Split and distribute the dataset. Only worker 0 loads the whole dataset.
    # Datasets of worker 0 are evenly split and distributed to all workers.
    mean = np.load(args.mean)
    if comm.rank == 0:
        train = PreprocessedDataset(args.train, args.root, mean, model.insize)
        val = PreprocessedDataset(args.val, args.root, mean, model.insize,
                                  False)
    else:
        train = None
        val = None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    val = chainermn.scatter_dataset(val, comm)

    # We need to change the start method of multiprocessing module if we are
    # using InfiniBand and MultiprocessIterator. This is because processes
    # often crash when calling fork if they are using Infiniband.
    # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning )
    multiprocessing.set_start_method('forkserver')
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, n_processes=args.loaderjob)
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)

    # Create a multi node optimizer from a standard Chainer optimizer.
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9), comm)
    optimizer.setup(model)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    checkpoint_interval = (10, 'iteration') if args.test else (1, 'epoch')
    val_interval = (10, 'iteration') if args.test else (1, 'epoch')
    log_interval = (10, 'iteration') if args.test else (1, 'epoch')

    checkpointer = chainermn.create_multi_node_checkpointer(
        name='imagenet-example', comm=comm)
    checkpointer.maybe_load(trainer, optimizer)
    trainer.extend(checkpointer, trigger=checkpoint_interval)

    # Create a multi node evaluator from an evaluator.
    evaluator = TestModeEvaluator(val_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator, trigger=val_interval)

    # Some display and output extensions are necessary only for one worker.
    # (Otherwise, there would just be repeated outputs.)
    if comm.rank == 0:
        trainer.extend(extensions.dump_graph('main/loss'))
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'validation/main/loss',
            'main/accuracy', 'validation/main/accuracy', 'lr'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Ejemplo n.º 23
0
    # Setting up datasets
    train = VOC(args.train_img_dir, args.train_anno_dir, args.train_list_dir, args.train_list_suffix)
    valid = VOC(args.valid_img_dir, args.valid_anno_dir, args.valid_list_dir, args.valid_list_suffix)
    logging.info("train: {}, valid: {}".format(len(train), len(valid)))

    # Iterator
    train_iter = iterators.MultiprocessIterator(train, args.batchsize, shared_mem=10000000)
    valid_iter = iterators.SerialIterator(valid, args.valid_batchsize, repeat=False, shuffle=False)

    # Updater
    updater = ParallelUpdater(train_iter, optimizer, devices=devices)
    trainer = training.Trainer(updater, (args.epoch, "epoch"), out=result_dir)

    # Extentions
    trainer.extend(extensions.Evaluator(valid_iter, model, device=devices["main"]), trigger=(args.valid_freq, "epoch"))
    trainer.extend(extensions.dump_graph("main/rpn_loss_cls", out_name="rpn_loss_cls.dot"))
    trainer.extend(extensions.dump_graph("main/rpn_loss_bbox", out_name="rpn_loss_bbox.dot"))
    trainer.extend(extensions.dump_graph("main/loss_cls", out_name="loss_cls.dot"))
    trainer.extend(extensions.dump_graph("main/loss_bbox", out_name="loss_bbox.dot"))
    trainer.extend(extensions.snapshot(trigger=(args.snapshot_iter, "iteration")))
    trainer.extend(extensions.LogReport(trigger=(args.show_log_iter, "iteration")))
    trainer.extend(
        extensions.PrintReport(
            [
                "epoch",
                "iteration",
                "main/rpn_loss_cls",
                "main/rpn_loss_bbox",
                "main/loss_cls",
                "main/loss_bbox",
                "validation/main/rpn_loss_cls",
Ejemplo n.º 24
0
def main():
    parser = argparse.ArgumentParser(description='CNN Shogi:')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of data in each mini-batch')
    parser.add_argument('--alpha',
                        '-a',
                        type=float,
                        default=0.001,
                        help='Alpha parameter of Adam')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--datasize',
                        '-d',
                        type=int,
                        default=1000,
                        help='Number of data')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    train, test = get_data(args.datasize)
    model = net.Model()
    classifier = L.Classifier(model)

    # GPUを使う場合
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        classifier.to_gpu()

    # trainerの設定
    optimizer = chainer.optimizers.Adam(alpha=args.alpha)
    optimizer.setup(classifier)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(TestModeEvaluator(test_iter, classifier, device=args.gpu))
    trainer.extend(extensions.dump_graph('main/loss'))
    #-origin-
    #trainer.extend(extensions.snapshot_object(
    #    target=model, filename='snapshot', trigger=(args.epoch, 'epoch')))
    trainer.extend(
        extensions.snapshot_object(target=model, filename='snapshot'))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Ejemplo n.º 25
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: FCIS')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument(
        '--lr',
        '-l',
        type=float,
        default=0.0005,
        help='Default value is for 1 GPU.\n'
        'The learning rate should be multiplied by the number of gpu')
    parser.add_argument('--lr-cooldown-factor',
                        '-lcf',
                        type=float,
                        default=0.1)
    parser.add_argument('--epoch', '-e', type=int, default=42)
    parser.add_argument('--cooldown-epoch', '-ce', type=int, default=28)
    args = parser.parse_args()

    # chainermn
    comm = chainermn.create_communicator()
    device = comm.intra_rank

    np.random.seed(args.seed)

    # model
    fcis = FCISResNet101(n_fg_class=len(sbd_instance_segmentation_label_names),
                         pretrained_model='imagenet',
                         iter2=False)
    fcis.use_preset('evaluate')
    model = FCISTrainChain(fcis)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    # dataset
    train_dataset = TransformDataset(
        SBDInstanceSegmentationDataset(split='train'),
        ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis))
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1)

    if comm.rank == 0:
        test_dataset = SBDInstanceSegmentationDataset(split='val')
        test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                     batch_size=1,
                                                     repeat=False,
                                                     shuffle=False)

    # optimizer
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9), comm)
    optimizer.setup(model)

    model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0))
    model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.fcis.extractor.conv1.disable_update()
    model.fcis.extractor.res2.disable_update()

    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, converter=concat_examples, device=device)

    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    # lr scheduler
    trainer.extend(chainer.training.extensions.ExponentialShift(
        'lr', args.lr_cooldown_factor, init=args.lr),
                   trigger=(args.cooldown_epoch, 'epoch'))

    if comm.rank == 0:
        # interval
        log_interval = 100, 'iteration'
        plot_interval = 3000, 'iteration'
        print_interval = 20, 'iteration'

        # training extensions
        trainer.extend(extensions.snapshot_object(
            model.fcis, filename='snapshot_model.npz'),
                       trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        trainer.extend(extensions.PrintReport([
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'main/roi_mask_loss',
            'validation/main/map',
        ]),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(InstanceSegmentationVOCEvaluator(
            test_iter,
            model.fcis,
            iou_thresh=0.5,
            use_07_metric=True,
            label_names=sbd_instance_segmentation_label_names),
                       trigger=ManualScheduleTrigger([
                           len(train_dataset) * args.cooldown_epoch,
                           len(train_dataset) * args.epoch
                       ], 'iteration'))

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 26
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: CIFAR10')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the mini_cifar to train')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--dataset',
                        '-d',
                        default='mini_cifar/train',
                        help='Directory for train mini_cifar')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    model = CifarCNN(10)
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the Cifar-10 mini_cifar
    # trainとvalに分ける
    train, val = chainer.datasets.split_dataset_random(
        MyCifarDataset(args.dataset), 1000)
    print('train data : {}'.format(len(train)))
    print('val data : {}'.format(len(val)))

    train_iter = chainer.iterators.SerialIterator(train,
                                                  args.batchsize,
                                                  repeat=True,
                                                  shuffle=True)
    val_iter = chainer.iterators.SerialIterator(val,
                                                args.batchsize,
                                                repeat=False,
                                                shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test mini_cifar for each epoch
    trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu))
    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.extend(extensions.snapshot(filename='snapshot_{.updater.epoch}'),
                   trigger=(20, 'epoch'))
    trainer.extend(extensions.snapshot_object(model, 'model_{.updater.epoch}'),
                   trigger=(1, 'epoch'))

    trainer.extend(extensions.LogReport())
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))

    trainer.extend(extensions.ProgressBar(update_interval=1))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    with chainer.using_config('train', True):
        trainer.run()
Ejemplo n.º 27
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerMN example: pipelined neural network')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', action='store_true',
                        help='Use GPU')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    args = parser.parse_args()

    # Prepare ChainerMN communicator.
    if args.gpu:
        comm = chainermn.create_communicator('hierarchical')
        device = comm.intra_rank
    else:
        comm = chainermn.create_communicator('naive')
        device = -1

    if comm.size != 2:
        raise ValueError(
            'This example can only be executed on exactly 2 processes.')

    if comm.rank == 0:
        print('==========================================')
        if args.gpu:
            print('Using GPUs')
        print('Num unit: {}'.format(args.unit))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

    if comm.rank == 0:
        model = L.Classifier(MLP0(comm, args.unit))
    elif comm.rank == 1:
        model = MLP1(comm, args.unit, 10)

    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu()

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Iterate dataset only on worker 0.
    train, test = chainer.datasets.get_mnist()
    if comm.rank == 1:
        train = chainermn.datasets.create_empty_dataset(train)
        test = chainermn.datasets.create_empty_dataset(test)

    train_iter = chainer.iterators.SerialIterator(
        train, args.batchsize, shuffle=False)
    test_iter = chainer.iterators.SerialIterator(
        test, args.batchsize, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(extensions.Evaluator(test_iter, model, device=device))

    # Some display and output extentions are necessary only for worker 0.
    if comm.rank == 0:
        trainer.extend(extensions.dump_graph('main/loss'))
        trainer.extend(extensions.LogReport())
        trainer.extend(extensions.PrintReport(
            ['epoch', 'main/loss', 'validation/main/loss',
             'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
        trainer.extend(extensions.ProgressBar())

    trainer.run()
Ejemplo n.º 28
0
def main(args):
    # Initialize the model to train
    model = models.archs[args.arch]()
    if args.finetune and hasattr(model, 'finetuned_model_path'):
        utils.finetuning.load_param(model.finetuned_model_path, model, args.ignore)
        #model.finetune = True

    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    nowt = datetime.datetime.today()
    outputdir = args.out + '/' + args.arch + '/' + nowt.strftime("%Y%m%d-%H%M")  + '_bs' +  str(args.batchsize)
    if args.test and args.initmodel is not None:
        outputdir = os.path.dirname(args.initmodel)
    # Load the datasets and mean file
    mean = None
    if hasattr(model, 'mean_value'):
        mean = makeMeanImage(model.mean_value)
    else:
        mean = np.load(args.mean)
    assert mean is not None

    train = ppds.PreprocessedDataset(args.train, args.root, mean, model.insize)
    val = ppds.PreprocessedDataset(args.val, args.root, mean, model.insize, False)
    # These iterators load the images with subprocesses running in parallel to
    # the training/validation.
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, shuffle=False, n_processes=args.loaderjob)
    #val_iter = chainer.iterators.MultiprocessIterator(
    #    val, args.val_batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob)
    val_iter = chainer.iterators.SerialIterator(
            val, args.val_batchsize, repeat=False, shuffle=False)

    # Set up an optimizer
    optimizer = optimizers[args.opt]()
    #if args.opt == 'momentumsgd':
    if hasattr(optimizer, 'lr'):
        optimizer.lr = args.baselr
    if hasattr(optimizer, 'momentum'):
        optimizer.momentum = args.momentum
    optimizer.setup(model)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), outputdir)

    #val_interval = (10 if args.test else int(len(train) / args.batchsize)), 'iteration'
    val_interval = (10, 'iteration') if args.test else (1, 'epoch')
    snapshot_interval = (10, 'iteration') if args.test else (4, 'epoch')
    log_interval = (10 if args.test else 200), 'iteration'

    # Copy the chain with shared parameters to flip 'train' flag only in test
    eval_model = model.copy()
    eval_model.train = False
    if not args.test:
        val_evaluator = extensions.Evaluator(val_iter, eval_model, device=args.gpu)
    else:
        val_evaluator = utils.EvaluatorPlus(val_iter, eval_model, device=args.gpu)
        if 'googlenet' in args.arch:
            val_evaluator.lastname = 'validation/main/loss3'
    trainer.extend(val_evaluator, trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=(500, 'iteration'))
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy',
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))
    if args.opt == 'momentumsgd':
        trainer.extend(extensions.ExponentialShift('lr', args.gamma),
            trigger=(1, 'epoch'))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    if not args.test:
        chainer.serializers.save_npz(outputdir + '/model0', model)
        trainer.run()
        chainer.serializers.save_npz(outputdir + '/model', model)
        with open(outputdir + '/args.txt', 'w') as o:
            print(args, file=o)

    results = val_evaluator(trainer)
    results['outputdir'] = outputdir

    if args.test:
        print(val_evaluator.confmat)
        categories = utils.io.load_categories(args.categories)
        confmat_csv_name = args.initmodel + '.csv'
        confmat_fig_name = args.initmodel + '.eps'
        utils.io.save_confmat_csv(confmat_csv_name, val_evaluator.confmat, categories)
        utils.io.save_confmat_fig(confmat_fig_name, val_evaluator.confmat, categories,
                                mode="rate", saveFormat="eps")
    return results
Ejemplo n.º 29
0
def main():
    parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
    parser.add_argument('--train',
                        default='train.txt',
                        type=str,
                        help='File name of train data')
    parser.add_argument('--test',
                        default='validation.txt',
                        type=str,
                        help='File name of validation data')
    parser.add_argument('--root',
                        '-R',
                        default='.',
                        help='Root directory path of image files')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=128,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--mean',
                        default=None,
                        help='mean file (computed by compute_mean.py)')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    train = image_dataset.ImageDataset(args.train,
                                       args.root,
                                       max_size=128,
                                       mean=args.mean)
    test = image_dataset.ImageDataset(args.test,
                                      args.root,
                                      max_size=128,
                                      mean=args.mean)

    model = L.Classifier(alexnet.FromCaffeAlexnet(1),
                         lossfun=F.mean_squared_error)

    original_model = pickle.load(open('alexnet.pkl', 'rb'))
    copy_model(original_model, model.predictor)
    model.compute_accuracy = False

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))
    optimizer.add_hook(
        DelGradient(["conv1", "conv2", "conv3", "conv4", "conv5"]))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)
    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(args.epoch, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(
        extensions.PrintReport(
            ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: Faster R-CNN')
    parser.add_argument('--dataset',
                        choices=('voc07', 'voc0712'),
                        help='The dataset to use: VOC07, VOC07+12',
                        default='voc07')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--step_size', '-ss', type=int, default=50000)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    args = parser.parse_args()

    np.random.seed(args.seed)

    if args.dataset == 'voc07':
        train_data = VOCBboxDataset(split='trainval', year='2007')
    elif args.dataset == 'voc0712':
        train_data = ConcatenatedDataset(
            VOCBboxDataset(year='2007', split='trainval'),
            VOCBboxDataset(year='2012', split='trainval'))

    comm = chainermn.create_communicator('hierarchical')
    device = comm.intra_rank

    n_node = comm.intra_rank
    n_gpu = comm.size
    chainer.cuda.get_device_from_id(device).use()

    total_batch_size = n_gpu

    args.lr = args.lr * total_batch_size

    test_data = VOCBboxDataset(split='test',
                               year='2007',
                               use_difficult=True,
                               return_difficult=True)
    faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names),
                                  pretrained_model='imagenet')
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    model.to_gpu()

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9), comm)

    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005))

    train_data = TransformDataset(train_data, Transform(faster_rcnn))

    if comm.rank != 0:
        train_data = None
        test_data = None
    train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True)
    test_data = chainermn.scatter_dataset(test_data, comm)

    train_iter = chainer.iterators.SerialIterator(train_data, batch_size=1)
    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 batch_size=1,
                                                 repeat=False,
                                                 shuffle=False)

    updater = chainer.training.updaters.StandardUpdater(train_iter,
                                                        optimizer,
                                                        device=device)

    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               out=args.out)

    log_interval = 20, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 20, 'iteration'

    evaluator = DetectionVOCEvaluator(test_iter,
                                      model,
                                      device=device,
                                      use_07_metric=True,
                                      label_names=voc_bbox_label_names)

    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator,
                   trigger=ManualScheduleTrigger(
                       [args.step_size, args.iteration], 'iteration'))

    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.step_size, 'iteration'))

    if comm.rank == 0:
        trainer.extend(extensions.snapshot_object(model.faster_rcnn,
                                                  'snapshot_model.npz'),
                       trigger=(args.iteration, 'iteration'))

        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.PrintReport([
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'validation/main/map',
        ]),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 31
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
    parser.add_argument(
        'dataset',
        choices=['visible+occlusion', 'synthetic', 'occlusion'],
        help='The dataset.',
    )
    parser.add_argument('--model',
                        '-m',
                        choices=['vgg16', 'resnet50', 'resnet101'],
                        default='resnet50',
                        help='Base model of Mask R-CNN.')
    parser.add_argument('--pooling-func',
                        '-pf',
                        choices=['pooling', 'align', 'resize'],
                        default='align',
                        help='Pooling function.')
    parser.add_argument('--gpu', '-g', type=int, help='GPU id.')
    parser.add_argument('--multi-node',
                        '-mn',
                        action='store_true',
                        help='use multi node')
    parser.add_argument('--mask-loss',
                        default='softmax',
                        choices=contrib.models.MaskRCNN.mask_losses,
                        help='mask loss mode')
    default_max_epoch = (180e3 * 8) / 118287 * 3  # x3
    parser.add_argument('--max-epoch',
                        type=float,
                        default=default_max_epoch,
                        help='epoch')
    args = parser.parse_args()

    if args.multi_node:
        import chainermn
        comm = chainermn.create_communicator('hierarchical')
        device = comm.intra_rank

        args.n_node = comm.inter_size
        args.n_gpu = comm.size
        chainer.cuda.get_device_from_id(device).use()
    else:
        args.n_node = 1
        args.n_gpu = 1
        chainer.cuda.get_device_from_id(args.gpu).use()
        device = args.gpu

    args.seed = 0
    now = datetime.datetime.now()
    args.timestamp = now.isoformat()
    args.out = osp.join(
        here,
        'logs/train_mrcnn_lbl',
        now.strftime('%Y%m%d_%H%M%S'),
    )

    # 0.00125 * 8 = 0.01  in original
    args.batch_size = 1 * args.n_gpu
    args.lr = 0.00125 * args.batch_size
    args.weight_decay = 0.0001

    # lr / 10 at 120k iteration with
    # 160k iteration * 16 batchsize in original
    args.step_size = [(120e3 / 180e3) * args.max_epoch,
                      (160e3 / 180e3) * args.max_epoch]

    random.seed(args.seed)
    np.random.seed(args.seed)

    # Default Config
    args.min_size = 800
    args.max_size = 1333
    args.anchor_scales = (2, 4, 8, 16, 32)

    if args.dataset == 'visible+occlusion':
        train_data1 = contrib.datasets.ARC2017RealInstancesDataset(
            'train', aug='standard')
        train_data1 = MaskRcnnDataset(train_data1, zero_to_unlabeled=True)
        train_data2 = contrib.datasets.ARC2017RealInstancesDataset(
            'test', aug='standard')
        train_data2 = MaskRcnnDataset(train_data2, zero_to_unlabeled=True)
        train_data3 = contrib.datasets.ARC2017OcclusionDataset('train',
                                                               do_aug=True)
        train_data3 = MaskRcnnDataset(train_data3)
        train_data = chainer.datasets.ConcatenatedDataset(
            train_data1,
            train_data2,
            train_data3,
        )
    elif args.dataset == 'synthetic':
        train_data = contrib.datasets.ARC2017SyntheticInstancesDataset(
            do_aug=True, aug_level='all')
        train_data = MaskRcnnDataset(train_data)
    elif args.dataset == 'occlusion':
        train_data = contrib.datasets.ARC2017OcclusionDataset('train',
                                                              do_aug=True)
        train_data = MaskRcnnDataset(train_data)
    else:
        raise ValueError
    test_data = contrib.datasets.ARC2017OcclusionDataset('test')
    instance_class_names = test_data.class_names[1:]
    test_data_list = test_data.get_video_datasets()
    del test_data
    test_data_list = [MaskRcnnDataset(td) for td in test_data_list]

    if args.pooling_func == 'align':
        pooling_func = cmr.functions.roi_align_2d
    elif args.pooling_func == 'pooling':
        pooling_func = chainer.functions.roi_pooling_2d
    elif args.pooling_func == 'resize':
        pooling_func = cmr.functions.crop_and_resize
    else:
        raise ValueError

    if args.model in ['resnet50', 'resnet101']:
        n_layers = int(args.model.lstrip('resnet'))
        mask_rcnn = contrib.models.MaskRCNNResNet(
            n_layers=n_layers,
            n_fg_class=len(instance_class_names),
            pooling_func=pooling_func,
            anchor_scales=args.anchor_scales,
            min_size=args.min_size,
            max_size=args.max_size,
            mask_loss=args.mask_loss,
        )
    else:
        raise ValueError
    model = contrib.models.MaskRCNNTrainChain(mask_rcnn)
    if args.multi_node or args.gpu >= 0:
        model.to_gpu()

    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    if args.multi_node:
        optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay))

    for link in mask_rcnn.links():
        if isinstance(link, cmr.links.AffineChannel2D):
            link.disable_update()

    train_data = chainer.datasets.TransformDataset(
        train_data, cmr.datasets.MaskRCNNTransform(mask_rcnn))
    test_data_list = [
        chainer.datasets.TransformDataset(
            td, cmr.datasets.MaskRCNNTransform(mask_rcnn, train=False))
        for td in test_data_list
    ]
    test_concat_data = chainer.datasets.ConcatenatedDataset(*test_data_list)
    if args.multi_node:
        # XXX: test_data is only used on device0
        if comm.rank != 0:
            train_data = None
            # test_data = None
        train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True)
        # test_data = chainermn.scatter_dataset(test_data, comm)

    train_iter = chainer.iterators.SerialIterator(train_data, batch_size=1)
    test_iters = {
        i: chainer.iterators.SerialIterator(td,
                                            batch_size=1,
                                            repeat=False,
                                            shuffle=False)
        for i, td in enumerate(test_data_list)
    }
    test_concat_iter = chainer.iterators.SerialIterator(test_concat_data,
                                                        batch_size=1,
                                                        repeat=False,
                                                        shuffle=False)

    converter = functools.partial(
        cmr.datasets.concat_examples,
        padding=0,
        # img, bboxes, labels, masks, scales
        indices_concat=[0, 2, 3, 4],  # img, _, labels, masks, scales
        indices_to_device=[0, 1],  # img, bbox
    )
    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       device=device,
                                                       converter=converter)

    trainer = training.Trainer(updater, (args.max_epoch, 'epoch'),
                               out=args.out)

    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=training.triggers.ManualScheduleTrigger(
                       args.step_size, 'epoch'))

    eval_interval = 1, 'epoch'
    log_interval = 20, 'iteration'
    plot_interval = 0.1, 'epoch'
    print_interval = 20, 'iteration'

    if not args.multi_node or comm.rank == 0:
        evaluator = contrib.extensions.InstanceSegmentationVOCEvaluator(
            test_iters,
            model.mask_rcnn,
            device=device,
            use_07_metric=False,
            label_names=instance_class_names)
        trainer.extend(evaluator, trigger=eval_interval)
        trainer.extend(extensions.snapshot_object(model.mask_rcnn,
                                                  'snapshot_model.npz'),
                       trigger=training.triggers.MaxValueTrigger(
                           'validation/main/mpq', eval_interval))
        args.git_hash = cmr.utils.git_hash()
        args.hostname = socket.gethostname()
        trainer.extend(fcn.extensions.ParamsReport(args.__dict__))
        trainer.extend(contrib.extensions.InstanceSegmentationVisReport(
            test_concat_iter,
            model.mask_rcnn,
            label_names=instance_class_names),
                       trigger=eval_interval)
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(
            extensions.PrintReport([
                'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
                'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss',
                'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/mpq'
            ]),
            trigger=print_interval,
        )
        trainer.extend(extensions.ProgressBar(update_interval=10))

        # plot
        assert extensions.PlotReport.available()
        trainer.extend(
            extensions.PlotReport(
                [
                    'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss',
                    'main/roi_mask_loss', 'main/rpn_loc_loss',
                    'main/rpn_cls_loss'
                ],
                file_name='loss.png',
                trigger=plot_interval,
            ),
            trigger=plot_interval,
        )
        trainer.extend(
            extensions.PlotReport([
                'validation/main/map', 'validation/main/msq',
                'validation/main/mdq', 'validation/main/mpq'
            ],
                                  file_name='accuracy.png',
                                  trigger=plot_interval),
            trigger=eval_interval,
        )

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Ejemplo n.º 32
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Fully Convolutional Dual Center Pose Proposal Network for Pose Estimation'
    )
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=1,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=200,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='results/dual_cp',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--snapshot_interval',
                        type=int,
                        default=1000,
                        help='Interval of snapshot')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--train_resnet',
                        type=bool,
                        default=False,
                        help='train resnet')
    parser.add_argument('--train-resnet',
                        dest='train_resnet',
                        action='store_true')
    parser.set_defaults(train_resnet=False)
    parser.add_argument('--no-accuracy',
                        dest='compute_acc',
                        action='store_false')
    parser.set_defaults(compute_acc=True)
    parser.add_argument('--no-pose-accuracy',
                        dest='compute_pose_acc',
                        action='store_false')
    parser.set_defaults(compute_pose_acc=True)

    args = parser.parse_args()

    compute_class_accuracy = args.compute_acc
    compute_pose_accuracy = args.compute_pose_acc and args.compute_acc

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('# compute class accuracy: {}'.format(compute_class_accuracy))
    print('# compute pose accuracy: {}'.format(compute_pose_accuracy))
    print('')

    im_size = (640, 480)
    objs = np.arange(3) + 1
    n_class = len(objs) + 1

    train_path = os.path.join(os.getcwd(), root,
                              'train_data/JSK_Objects/train')
    bg_path = os.path.join(os.getcwd(), root, 'train_data/MS_COCO/train2017')
    # bg_path = os.path.join(os.getcwd(), root, 'train_data/VOCdevkit/VOC2012/JPEGImages')

    caffe_model = 'ResNet-50-model.caffemodel'

    distance_sanity = 0.05
    output_scale = 0.6
    eps = 0.05
    interval = 15

    chainer.using_config('cudnn_deterministic', True)
    model = DualCPNetClassifier(DualCenterProposalNetworkRes50_predict7(
        n_class=n_class, pretrained_model=not args.train_resnet),
                                basepath=train_path,
                                im_size=im_size,
                                distance_sanity=distance_sanity,
                                compute_class_accuracy=compute_class_accuracy,
                                compute_pose_accuracy=compute_pose_accuracy,
                                output_scale=output_scale)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))

    # load train data
    train = JSKPoseEstimationAutoContextDataset(train_path,
                                                objs,
                                                bg_path,
                                                interval=interval,
                                                iteration_per_epoch=1000,
                                                mode='test',
                                                resize_rate=0.5,
                                                metric_filter=output_scale +
                                                eps)

    # load test data
    # test = JSKPoseEstimationAutoContextDataset(train_path, objs, bg_path,
    #                                             interval=interval,
    #                                             mode='train',
    #                                             resize_rate=0.5,
    #                                             metric_filter=output_scale + eps)
    test = JSKPoseEstimationDataset(train_path,
                                    objs,
                                    mode='train',
                                    interval=interval,
                                    resize_rate=0.5,
                                    metric_filter=output_scale + eps)

    print "number of train data : ", train.__len__()
    print "number of test data : ", test.__len__()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)
    # test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize,
    #                                                    repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    evaluator = extensions.Evaluator(test_iter, model, device=args.gpu)
    evaluator.default_name = 'val'
    trainer.extend(evaluator)

    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot and snapshot object for each specified epoch
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model.predictor, filename='model_iteration-{.updater.iteration}'),
                   trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/l_cls', 'main/l_cp', 'main/l_ocp', 'main/cls_acc',
            'main/ocp_acc', 'main/rot_acc', 'val/main/l_cls', 'val/main/l_cp',
            'val/main/l_ocp', 'val/main/cls_acc', 'val/main/ocp_acc',
            'val/main/rot_acc', 'elapsed_time'
        ]))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)
    else:
        npz_name = 'DualCenterProposalNetworkRes50_jsk_class{}.npz'
        caffemodel_name = 'ResNet-50-model.caffemodel'
        path = os.path.join(root, 'trained_data/', npz_name.format(n_class))
        path_caffemodel = os.path.join(root, 'trained_data/', caffemodel_name)
        print 'npz model path : ' + path
        print 'caffe model path : ' + path_caffemodel
        download.cache_or_load_file(
            path, lambda path: _make_chainermodel_npz(path, path_caffemodel,
                                                      model, n_class),
            lambda path: serializers.load_npz(path, model))

    # Run the training
    trainer.run()
Ejemplo n.º 33
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--batchsize', type=int, default=12)
    parser.add_argument('--class_weight', type=str, default='class_weight.npy')
    parser.add_argument('--out', type=str, default='result')
    args = parser.parse_args()

    # Triggers
    log_trigger = (50, 'iteration')
    validation_trigger = (2000, 'iteration')
    end_trigger = (16000, 'iteration')

    # Dataset
    train = CamVidDataset(split='train')
    train = TransformDataset(train, transform)
    val = CamVidDataset(split='val')

    # Iterator
    train_iter = iterators.MultiprocessIterator(train, args.batchsize)
    val_iter = iterators.MultiprocessIterator(
        val, args.batchsize, shuffle=False, repeat=False)

    # Model
    class_weight = np.load(args.class_weight)
    model = SegNetBasic(n_class=11)
    model = PixelwiseSoftmaxClassifier(
        model, class_weight=class_weight)
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Optimizer
    optimizer = optimizers.MomentumSGD(lr=0.1, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    # Updater
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)

    # Trainer
    trainer = training.Trainer(updater, end_trigger, out=args.out)

    trainer.extend(extensions.LogReport(trigger=log_trigger))
    trainer.extend(extensions.observe_lr(), trigger=log_trigger)
    trainer.extend(extensions.dump_graph('main/loss'))

    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(
            ['main/loss'], x_key='iteration',
            file_name='loss.png'))
        trainer.extend(extensions.PlotReport(
            ['validation/main/miou'], x_key='iteration',
            file_name='miou.png'))

    trainer.extend(extensions.snapshot_object(
        model.predictor, filename='model_iteration-{.updater.iteration}'),
        trigger=end_trigger)
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'elapsed_time', 'lr',
         'main/loss', 'validation/main/miou',
         'validation/main/mean_class_accuracy',
         'validation/main/pixel_accuracy']),
        trigger=log_trigger)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(
        SemanticSegmentationEvaluator(
            val_iter, model.predictor,
            camvid_label_names),
        trigger=validation_trigger)

    trainer.run()
test_iter = I.SerialIterator(test, batchsize, repeat=False, shuffle=False)

updater = training.StandardUpdater(train_iter, optimizer)
trainer = training.Trainer(updater, (epoch, 'epoch'))

eval_model = model.copy()
eval_nfp = eval_model.predictor


trainer.extend(E.LogReport(trigger=(2, 'iteration')))
trainer.extend(E.PrintReport(['epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy','elapsed_time']))
trainer.extend(E.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png'))
trainer.extend(E.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png'))
#trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'))
#trainer.extend(extensions.snapshot_object(model.predictor, filename='model_epoch-{.updater.epoch}'))
trainer.extend(E.Evaluator(val_iter, model),
               trigger=(2, 'iteration'))
trainer.extend(E.dump_graph('main/loss'))

trainer.run()

print('test')
evaluator = E.Evaluator(test_iter, eval_model)
result = evaluator()
print('valid accuracy:', float(result['main/accuracy']))

# save model
chainer.serializers.save_npz('model.npz', model)


    warmup(model, train_iter)
    model.rpn_train = True

    # optimizer = optimizers.Adam()
    # optimizer.setup(model)
    optimizer = optimizers.MomentumSGD(lr=0.001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=0)
    trainer = training.Trainer(updater, (100, 'epoch'), out='tests/train_test')
    trainer.extend(extensions.LogReport(trigger=(100, 'iteration')))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration',
        'main/RPN/rpn_loss',
        'main/RPN/rpn_loss_cls',
        'main/RPN/rpn_cls_accuracy',
        'main/RPN/rpn_loss_bbox',
        'elapsed_time',
    ]), trigger=(100, 'iteration'))
    trainer.extend(
        extensions.snapshot_object(model, 'snapshot_{.updater.iteration}'),
        trigger=(1000, 'iteration'))
    trainer.extend(extensions.PlotReport(['main/RPN/rpn_loss'],
                                         trigger=(100, 'iteration')))
    trainer.extend(
        extensions.dump_graph('main/RPN/rpn_loss', out_name='rpn_loss.dot'))

    trainer.run()
Ejemplo n.º 36
0
    n_iteration = max(len(train) // config.batchsize, 1)
    test_interval = (max(len(train) // len(test), 1), 'iteration')
    save_interval = (5, 'epoch')
    log_interval = (max(n_iteration // 1, 1), 'iteration')
    progressbar_interval = 3
    imgview_face_interval = (5, 'iteration')
    imgview_weight_interval = (1, 'epoch')
    logger.info('Test interval : {}'.format(test_interval))
    logger.info('Save interval : {}'.format(save_interval))
    logger.info('Log interval :  {}'.format(log_interval))
    logger.info('ProgressBar interval :  {}'.format(progressbar_interval))
    logger.info('ImgView face interval :   {}'.format(imgview_face_interval))
    logger.info('ImgView weight interval : {}'.format(imgview_weight_interval))

    # Extensions
    trainer.extend(extensions.dump_graph('main/loss'), trigger=save_interval)
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'),
        trigger=save_interval)
    trainer.extend(extensions.snapshot_object(model,
                                              'model_epoch_{.updater.epoch}'),
                   trigger=save_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'main/loss', 'validation/main/loss']),
                   trigger=log_interval)
    trainer.extend(
        extensions.ProgressBar(update_interval=progressbar_interval))

    # My extensions
    # Sequential Evaluator
    model.rcnn_train = True

    # optimizer = optimizers.Adam()
    # optimizer.setup(model)
    optimizer = optimizers.MomentumSGD(lr=0.001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=0)
    trainer = training.Trainer(updater, (100, 'epoch'), out='train_rcnn')
    trainer.extend(extensions.LogReport(trigger=(100, 'iteration')))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration',
        'main/loss_cls',
        'main/cls_accuracy',
        'main/loss_bbox',
        'main/loss_rcnn',
        'elapsed_time',
    ]), trigger=(100, 'iteration'))
    trainer.extend(
        extensions.snapshot_object(model, 'snapshot_{.updater.iteration}'),
        trigger=(1000, 'iteration'))
    trainer.extend(extensions.PlotReport(['main/loss_rcnn'],
                                         trigger=(100, 'iteration')))
    trainer.extend(extensions.PlotReport(['main/cls_accuracy'],
                                         trigger=(100, 'iteration')))
    trainer.extend(
        extensions.dump_graph('main/loss_rcnn', out_name='loss_rcnn.dot'))

    trainer.run()
Ejemplo n.º 38
0
def main(arg_list=None):
    parser = argparse.ArgumentParser(description='Chainer LSTM')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        nargs='+',
                        default=[20],
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--optimizer',
                        '-o',
                        nargs='+',
                        default=['momentumsgd'],
                        help='Optimizer (sgd, momentumsgd, adam)')
    parser.add_argument('--batch-size',
                        '-b',
                        type=int,
                        nargs='+',
                        default=[128],
                        help='Number of training points in each mini-batch')
    parser.add_argument('--lr',
                        type=float,
                        nargs='+',
                        default=[1e-2, 1e-3, 1e-4, 1e-5],
                        help='Learning rate')
    parser.add_argument('--early-stopping',
                        type=str2bool,
                        nargs='+',
                        default=[True],
                        help="True if early stopping should be enabled")
    parser.add_argument(
        '--network',
        '-n',
        default='ff',
        help=
        'Neural network type, either "ff", "tdnn", "lstm", "zoneoutlstm", "peepholelstm" or "gru". Setting any recurrent network implies "--shuffle-sequences"'
    )
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--units',
                        '-u',
                        type=int,
                        nargs='+',
                        default=[1024],
                        help='Number of units')
    parser.add_argument('--layers',
                        '-l',
                        type=int,
                        default=2,
                        help='Number of hidden layers')
    parser.add_argument('--activation',
                        '-a',
                        default='relu',
                        help='FF activation function (sigmoid, tanh or relu)')
    parser.add_argument('--tdnn-ksize',
                        type=int,
                        nargs='+',
                        default=[5],
                        help='TDNN kernel size')
    parser.add_argument('--bproplen',
                        type=int,
                        default=20,
                        help='Backpropagation length')
    parser.add_argument('--timedelay',
                        type=int,
                        default=0,
                        help='Delay target values by this many time steps')
    parser.add_argument('--noplot',
                        dest='plot',
                        action='store_false',
                        help='Disable PlotReport extension')
    parser.add_argument('--splice', type=int, default=0, help='Splicing size')
    parser.add_argument(
        '--dropout',
        '-d',
        type=float,
        nargs='+',
        default=[0],
        help=
        'Dropout rate (0 to disable). In case of Zoneout LSTM, this parameter has 2 arguments: c_ratio h_ratio'
    )
    parser.add_argument('--ft',
                        default='final.feature_transform',
                        help='Kaldi feature transform file')
    parser.add_argument('--tri', action='store_true', help='Use triphones')
    parser.add_argument(
        '--shuffle-sequences',
        action='store_true',
        help=
        'True if sequences should be shuffled as a whole, otherwise all frames will be shuffled independent of each other'
    )
    parser.add_argument(
        '--data-dir',
        default='data/fmllr',
        help=
        'Data directory, this will be prepended to data files and feature transform'
    )
    parser.add_argument(
        '--offset-dir',
        default='data',
        help='Data directory, this will be prepended to offset files')
    parser.add_argument(
        '--target-dir',
        default='data/targets',
        help='Data directory, this will be prepended to target files')
    parser.add_argument(
        '--ivector-dir',
        help='Data directory, this will be prepended to ivector files')
    parser.add_argument('--data', default='data_{}.npy', help='Training data')
    parser.add_argument('--offsets',
                        default='offsets_{}.npy',
                        help='Training offsets')
    parser.add_argument('--targets',
                        default='targets_{}.npy',
                        help='Training targets')
    parser.add_argument('--ivectors',
                        default='ivectors_{}.npy',
                        help='Training ivectors')
    parser.add_argument('--no-validation',
                        dest='use_validation',
                        action='store_false',
                        help='Do not evaluate validation data while training')
    parser.add_argument('--train-fold',
                        type=int,
                        help='Train fold network with this ID')
    parser.add_argument('--train-rpl',
                        action='store_true',
                        help='Train RPL layer')
    parser.add_argument('--rpl-model',
                        default="result_rpl/model",
                        help='RPL layer model')
    parser.add_argument('--fold-data-dir',
                        default="fold_data",
                        help='Directory with fold input data')
    parser.add_argument('--fold-output-dir',
                        default="fold_data_out",
                        help='Directory with predicted fold output')
    parser.add_argument('--fold-model-dir',
                        default="fold_models",
                        help='Directory with output fold model')
    parser.add_argument(
        '--fold-data-pattern',
        default='data_{0}.npy',
        help=
        'Filename pattern of each fold data, {0} will be replaced by fold ID')
    parser.add_argument('--fold-offset-pattern',
                        default='offsets_{0}.npy',
                        help='Filename pattern of each fold offset')
    parser.add_argument('--fold-target-pattern',
                        default='targets_{0}.npy',
                        help='Filename pattern of each fold targets')
    parser.add_argument(
        '--fold-ivector-pattern',
        default='ivectors_{}.npy',
        help=
        'Filename pattern of each fold i-vectors file, {} will be replaced by fold ID'
    )
    parser.add_argument('--fold-output-pattern',
                        default='data_{0}.npy',
                        help='Filename pattern of each fold network output')
    parser.add_argument('--fold-network-pattern',
                        default='fold_{0}.npz',
                        help='Filename pattern of each fold network')
    parser.add_argument('--no-progress',
                        action='store_true',
                        help='Disable progress bar')
    if arg_list is not None:
        args = parser.parse_args(list(map(str, arg_list)))
    else:
        args = parser.parse_args()

    # set options implied by other options
    if is_nn_recurrent(args.network):
        args.shuffle_sequences = True

    # create output directories
    Path(args.out).mkdir(exist_ok=True, parents=True)
    if args.train_fold is not None:
        file_out = Path(args.fold_model_dir,
                        args.fold_network_pattern.format(args.train_fold))
        Path(file_out.parent).mkdir(exist_ok=True, parents=True)

    # print arguments to the file
    with open(args.out + "/args.txt", "w") as f:
        for attr in dir(args):
            if not attr.startswith('_'):
                f.write('# {}: {}\n'.format(attr, getattr(args, attr)))
        f.write(' '.join(
            map(lambda x: "'" + x + "'" if ' ' in x else x, sys.argv)) + '\n')

    # print arguments to stdout
    for attr in dir(args):
        if not attr.startswith('_'):
            print('# {}: {}'.format(attr, getattr(args, attr)))
    print('')

    # input feature vector length
    num_classes = 1909 if args.tri else 39

    # create model
    if args.train_rpl:
        model = RPL4(num_classes)
        model_cls = L.Classifier(model)
    else:
        if args.activation == "sigmoid":
            activation = F.sigmoid
        elif args.activation == "tanh":
            activation = F.tanh
        elif args.activation == "relu":
            activation = F.relu
        else:
            print("Wrong activation function specified")
            return
        model = get_nn(args.network, args.layers, args.units, num_classes,
                       activation, args.tdnn_ksize, args.dropout)

        # classifier reports softmax cross entropy loss and accuracy at every
        # iteration, which will be used by the PrintReport extension below.
        model_cls = L.Classifier(model)
    if args.gpu >= 0:
        # make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model_cls.to_gpu()  # copy the model to the GPU

    offsets = offsets_dev = None

    if args.train_rpl:
        # load training data
        fold = 0
        x = []
        y = []

        while True:
            x_file = Path(args.fold_output_dir,
                          args.fold_output_pattern.format(fold))
            y_file = Path(args.fold_data_dir,
                          args.fold_target_pattern.format(fold))
            if not x_file.is_file() or not y_file.is_file():
                break
            print("Loading fold {} data".format(fold))
            x_ = np.load(str(x_file))
            y_ = np.load(str(y_file))
            x.append(x_)
            y.append(y_)
            fold += 1

        if fold == 0:
            print("Error: No fold data found")
            return

        x = np.concatenate(x, axis=0)
        y = np.concatenate(y, axis=0)

        if args.use_validation:  #TODO: use args.data instead of args.dev_data
            x_dev = np.load(str(Path(args.data_dir, args.data.format("dev"))))
            # offsets_dev = loadBin(str(Path(args.datadir, args.dev_offsets)), np.int32)
            y_dev = np.load(
                str(Path(args.target_dir, args.targets.format("dev"))))
    else:
        # load training data
        ivectors = None
        ivectors_dev = None
        if args.train_fold is not None:
            x = []
            offsets = [0]
            y = []
            if args.ivector_dir is not None:
                ivectors = []
            num = 0
            fold = 0
            while True:
                if fold != args.train_fold:
                    x_file = Path(args.fold_data_dir,
                                  args.fold_data_pattern.format(fold))
                    if not x_file.is_file():
                        break
                    offsets_file = Path(args.fold_data_dir,
                                        args.fold_offset_pattern.format(fold))
                    y_file = Path(args.fold_data_dir,
                                  args.fold_target_pattern.format(fold))
                    if args.ivector_dir is not None:
                        ivectors_file = Path(
                            args.fold_data_dir,
                            args.fold_ivector_pattern.format(fold))
                        if not ivectors_file.is_file():
                            print("Error: missing ivectors for fold data {}".
                                  format(fold))
                            return

                    print("Loading fold {} data".format(fold))
                    x_fold = np.load(str(x_file))
                    x.append(x_fold)
                    if is_nn_recurrent(args.network):
                        offsets_fold = np.load(str(offsets_file))
                        offsets.extend(offsets_fold[1:] + num)
                    y_fold = np.load(str(y_file))
                    y.append(y_fold)
                    if args.ivector_dir is not None:
                        ivectors_fold = np.load(str(ivectors_file))
                        ivectors.append(ivectors_fold)
                    num += x_fold.shape[0]
                fold += 1

            if len(x) == 0:
                print("Error: No fold data found")
                return

            x = np.concatenate(x, axis=0)
            if is_nn_recurrent(args.network):
                offsets = np.array(offsets, dtype=np.int32)
            y = np.concatenate(y, axis=0)
            if args.ivector_dir is not None:
                ivectors = np.concatenate(ivectors, axis=0)
        else:
            x = np.load(str(Path(args.data_dir, args.data.format("train"))))
            if is_nn_recurrent(args.network):
                offsets = np.load(
                    str(Path(args.offset_dir, args.offsets.format("train"))))
            y = np.load(
                str(Path(args.target_dir, args.targets.format("train"))))
            if args.ivector_dir is not None:
                ivectors = np.load(
                    str(Path(args.ivector_dir, args.ivectors.format("train"))))

        if args.use_validation:
            x_dev = np.load(str(Path(args.data_dir, args.data.format("dev"))))
            if is_nn_recurrent(args.network):
                offsets_dev = np.load(
                    str(Path(args.offset_dir, args.offsets.format("dev"))))
            y_dev = np.load(
                str(Path(args.target_dir, args.targets.format("dev"))))
            if args.ivector_dir is not None:
                ivectors_dev = np.load(
                    str(Path(args.ivector_dir, args.ivectors.format("dev"))))

        # apply splicing
        if args.network == "tdnn":
            splice = (sum(args.tdnn_ksize) - len(args.tdnn_ksize)) // 2
        else:
            splice = args.splice
        if splice > 0:
            x = splicing(x, range(-splice, splice + 1))
            x_dev = splicing(x_dev, range(-splice, splice + 1))

        # load feature transform
        if args.ft is not None and args.ft != '-':
            ft = loadKaldiFeatureTransform(str(Path(args.data_dir, args.ft)))
            if is_nn_recurrent(
                    args.network
            ):  # select transform middle frame if the network is recurrent
                dim = ft["shape"][1]
                zi = ft["shifts"].index(0)
                ft["rescale"] = ft["rescale"][zi * dim:(zi + 1) * dim]
                ft["addShift"] = ft["addShift"][zi * dim:(zi + 1) * dim]
                ft["shape"][0] = dim
                ft["shifts"] = [0]
            elif args.network == "tdnn":
                dim = ft["shape"][1]
                zi = ft["shifts"].index(0)
                winlen = 2 * splice + 1
                ft["rescale"] = np.tile(ft["rescale"][zi * dim:(zi + 1) * dim],
                                        winlen)
                ft["addShift"] = np.tile(
                    ft["addShift"][zi * dim:(zi + 1) * dim], winlen)
                ft["shape"][0] = dim * winlen
                ft["shifts"] = list(range(-splice, splice + 1))
            # apply feature transform
            x = applyKaldiFeatureTransform(x, ft)
            if args.use_validation:
                x_dev = applyKaldiFeatureTransform(x_dev, ft)

        if ivectors is not None:
            x = np.concatenate((x, ivectors), axis=1)
        if ivectors_dev is not None:
            x_dev = np.concatenate((x_dev, ivectors_dev), axis=1)

        # shift the input dataset according to time delay
        if is_nn_recurrent(args.network) and args.timedelay != 0:
            x, y, offsets = apply_time_delay(x, y, offsets, args.timedelay)
            if args.use_validation:
                x_dev, y_dev, offsets_dev = apply_time_delay(
                    x_dev, y_dev, offsets_dev, args.timedelay)

    # create chainer datasets
    train_dataset = chainer.datasets.TupleDataset(x, y)
    if args.use_validation:
        dev_dataset = chainer.datasets.TupleDataset(x_dev, y_dev)

    # prepare train stages
    train_stages_len = max([
        len(a) for a in [
            args.epoch, args.optimizer, args.batch_size, args.lr,
            args.early_stopping
        ]
    ])
    train_stages = [{
        'epoch': index_padded(args.epoch, i),
        'opt': index_padded(args.optimizer, i),
        'bs': index_padded(args.batch_size, i),
        'lr': index_padded(args.lr, i),
        'es': index_padded(args.early_stopping, i)
    } for i in range(train_stages_len)]

    for i, ts in enumerate(train_stages):
        if ts['opt'] == 'adam':  # learning rate not used, don't print it
            print(
                "=== Training stage {}: epoch = {}, batch size = {}, optimizer = {}, early stopping = {}"
                .format(i, ts['epoch'], ts['bs'], ts['opt'], ts['es']))
        else:
            print(
                "=== Training stage {}: epoch = {}, batch size = {}, optimizer = {}, learning rate = {}, early stopping = {}"
                .format(i, ts['epoch'], ts['bs'], ts['opt'], ts['lr'],
                        ts['es']))

        # reset state to allow training with different batch size in each stage
        if not args.train_rpl and is_nn_recurrent(args.network):
            model.reset_state()

        # setup an optimizer
        if ts['opt'] == "sgd":
            optimizer = chainer.optimizers.SGD(lr=ts['lr'])
        elif ts['opt'] == "momentumsgd":
            optimizer = chainer.optimizers.MomentumSGD(lr=ts['lr'])
        elif ts['opt'] == "adam":
            optimizer = chainer.optimizers.Adam()
        else:
            print("Wrong optimizer specified: {}".format(ts['opt']))
            exit(1)
        optimizer.setup(model_cls)

        if args.shuffle_sequences:
            train_iter = SequenceShuffleIterator(train_dataset, offsets,
                                                 ts['bs'])
            if args.use_validation:
                dev_iter = SequenceShuffleIterator(dev_dataset,
                                                   None,
                                                   ts['bs'],
                                                   repeat=False,
                                                   shuffle=False)
        else:
            train_iter = SerialIterator(train_dataset, ts['bs'])
            if args.use_validation:
                dev_iter = SerialIterator(dev_dataset,
                                          ts['bs'],
                                          repeat=False,
                                          shuffle=False)

        # set up a trainer
        if is_nn_recurrent(args.network):
            updater = BPTTUpdater(train_iter,
                                  optimizer,
                                  args.bproplen,
                                  device=args.gpu)
        else:
            updater = StandardUpdater(train_iter, optimizer, device=args.gpu)
        if ts['es'] and args.use_validation:
            stop_trigger = EarlyStoppingTrigger(ts['epoch'],
                                                key='validation/main/loss',
                                                eps=-0.001)
        else:
            stop_trigger = (ts['epoch'], 'epoch')
        trainer = training.Trainer(updater,
                                   stop_trigger,
                                   out="{}/{}".format(args.out, i))

        if ts['es']:
            trainer.extend(model_saver)
        else:
            trainer.extend(BestModelSaver(key="validation/main/loss"))

        # evaluate the model with the development dataset for each epoch
        if args.use_validation:
            trainer.extend(
                extensions.Evaluator(dev_iter, model_cls, device=args.gpu))

        # dump a computational graph from 'loss' variable at the first iteration
        # the "main" refers to the target link of the "main" optimizer.
        trainer.extend(extensions.dump_graph('main/loss'))

        # take a snapshot for each specified epoch
        frequency = ts['epoch'] if args.frequency == -1 else max(
            1, args.frequency)
        trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

        # write a log of evaluation statistics for each epoch
        trainer.extend(extensions.LogReport())

        # save two plot images to the result dir
        if args.plot and extensions.PlotReport.available():
            plot_vars_loss = ['main/loss']
            plot_vars_acc = ['main/accuracy']
            if args.use_validation:
                plot_vars_loss.append('validation/main/loss')
                plot_vars_acc.append('validation/main/accuracy')
            trainer.extend(
                extensions.PlotReport(plot_vars_loss,
                                      'epoch',
                                      file_name='loss.png'))
            trainer.extend(
                extensions.PlotReport(plot_vars_acc,
                                      'epoch',
                                      file_name='accuracy.png'))

        # print selected entries of the log to stdout
        # here "main" refers to the target link of the "main" optimizer again, and
        # "validation" refers to the default name of the Evaluator extension.
        # entries other than 'epoch' are reported by the Classifier link, called by
        # either the updater or the evaluator.
        if args.use_validation:
            print_report_vars = [
                'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
                'validation/main/accuracy', 'elapsed_time'
            ]
        else:
            print_report_vars = [
                'epoch', 'main/loss', 'main/accuracy', 'elapsed_time'
            ]
        trainer.extend(extensions.PrintReport(print_report_vars))

        # print a progress bar to stdout
        # trainer.extend(extensions.ProgressBar())

        if args.resume:
            # Resume from a snapshot
            chainer.serializers.load_npz(args.resume, trainer)

        # Run the training
        trainer.run()

        if ts['es']:
            # load the last model if the max epoch was not reached (that means early stopping trigger stopped training
            # because the validation loss increased)
            if updater.epoch_detail < ts['epoch']:
                chainer.serializers.load_npz(
                    "{}/{}/model_tmp".format(args.out, i), model_cls)
            # remove temporary model from this training stage
            os.remove("{}/{}/model_tmp".format(args.out, i))
        else:
            # load the best model from this training stage
            chainer.serializers.load_npz(
                "{}/{}/model_best".format(args.out, i), model_cls)
            # remove the best model from this training stage
            os.remove("{}/{}/model_best".format(args.out, i))

    # save the final model
    chainer.serializers.save_npz("{}/model".format(args.out), model_cls)
    if args.train_fold is not None:
        chainer.serializers.save_npz(
            str(
                Path(args.fold_model_dir,
                     args.fold_network_pattern.format(args.train_fold))),
            model_cls)
Ejemplo n.º 39
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: VAE')
    parser.add_argument('--initmodel', '-m', default='',
                        help='Initialize the model from given file')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the optimization from snapshot')
    parser.add_argument('--gpu', '-g', default=-1, type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--epoch', '-e', default=100, type=int,
                        help='number of epochs to learn')
    parser.add_argument('--dimz', '-z', default=20, type=int,
                        help='dimention of encoded vector')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='learning minibatch size')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# dim z: {}'.format(args.dimz))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Prepare VAE model, defined in net.py
    model = net.VAE(784, args.dimz, 500)

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Initialize
    if args.initmodel:
        chainer.serializers.load_npz(args.initmodel, model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist(withlabel=False)
    if args.test:
        train, _ = chainer.datasets.split_dataset(train, 100)
        test, _ = chainer.datasets.split_dataset(test, 100)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    # Set up an updater. StandardUpdater can explicitly specify a loss function
    # used in the training with 'loss_func' option
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer,
        device=args.gpu, loss_func=model.get_loss_func())

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu,
                                        eval_func=model.get_loss_func(k=10)))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/rec_loss', 'validation/main/rec_loss', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()

    # Visualize the results
    def save_images(x, filename):
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100)
        for ai, xi in zip(ax.flatten(), x):
            ai.imshow(xi.reshape(28, 28))
        fig.savefig(filename)

    model.to_cpu()
    train_ind = [1, 3, 5, 10, 2, 0, 13, 15, 17]
    x = chainer.Variable(np.asarray(train[train_ind]))
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        x1 = model(x)
    save_images(x.array, os.path.join(args.out, 'train'))
    save_images(x1.array, os.path.join(args.out, 'train_reconstructed'))

    test_ind = [3, 2, 1, 18, 4, 8, 11, 17, 61]
    x = chainer.Variable(np.asarray(test[test_ind]))
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        x1 = model(x)
    save_images(x.array, os.path.join(args.out, 'test'))
    save_images(x1.array, os.path.join(args.out, 'test_reconstructed'))

    # draw images from randomly sampled z
    z = chainer.Variable(
        np.random.normal(0, 1, (9, args.dimz)).astype(np.float32))
    x = model.decode(z)
    save_images(x.array, os.path.join(args.out, 'sampled'))
Ejemplo n.º 40
0
def main():
    parser = argparse.ArgumentParser(description="Learning from flowers data")
    parser.add_argument("--gpu",
                        "-g",
                        type=int,
                        default=-1,
                        help="GPU ID (negative value indicates CPU")
    parser.add_argument("--init", help="Initialize the model from given file")
    parser.add_argument('--job',
                        '-j',
                        type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument("--resume",
                        '-r',
                        default='',
                        help="Initialize the trainer from given file")
    args = parser.parse_args()

    batch = 32
    epoch = 50
    val_batch = 200
    model = models.ResNet50V1(data.ClassNumber)
    if args.init:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.init, model)
    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    if data.fetch_flowers() and data.fetch_labels():
        print("Flower images and labels have been fetched.")
    else:
        print("Failed to fetch flower images and labels")
        return

    data.pre_process_data(224)

    output_name = output.init_train(model.__class__)
    output_path = path.join(output.OutPath, output_name)

    train, validate = data.get_datasets()

    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        batch,
                                                        n_processes=args.job)
    val_iter = chainer.iterators.MultiprocessIterator(validate,
                                                      val_batch,
                                                      repeat=False,
                                                      n_processes=args.job)

    classifier = chainer.links.Classifier(model)
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(classifier)
    model.base.disable_update()

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu)
    trainer = training.Trainer(updater, (epoch, 'epoch'), output_path)

    val_interval = 500, 'iteration'
    log_interval = 250, 'iteration'
    snapshot_interval = 5000, 'iteration'

    trainer.extend(extensions.Evaluator(val_iter, classifier, device=args.gpu),
                   trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.extend(extensions.snapshot(), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=snapshot_interval)

    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    print("Start training")
    trainer.run()

    model.to_cpu()
    chainer.serializers.save_npz(path.join(output_path, "model.npz"), model)
    print("Uploading files")
    output.upload_result(output_name)
    print("Finish training")
Ejemplo n.º 41
0
    loss_config=config.loss,
    predictor=predictor,
    discriminator=discriminator,
    device=config.train.gpu,
    iterator=train_iter,
    optimizer=opts,
    converter=converter,
)

# trainer
trigger_log = (config.train.log_iteration, 'iteration')
trigger_snapshot = (config.train.snapshot_iteration, 'iteration')

trainer = training.Trainer(updater, out=arguments.output)

ext = extensions.Evaluator(test_iter, models, converter, device=config.train.gpu, eval_func=updater.forward)
trainer.extend(ext, name='test', trigger=trigger_log)
ext = extensions.Evaluator(train_eval_iter, models, converter, device=config.train.gpu, eval_func=updater.forward)
trainer.extend(ext, name='train', trigger=trigger_log)

trainer.extend(extensions.dump_graph('predictor/loss'))

ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz')
trainer.extend(ext, trigger=trigger_snapshot)

trainer.extend(extensions.LogReport(trigger=trigger_log))
trainer.extend(extensions.PrintReport(['predictor/loss']))

save_args(arguments, arguments.output)
trainer.run()
Ejemplo n.º 42
0
    def train_model(self, datasets):
        parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
        parser.add_argument('--dataset',
                            '-d',
                            default='cifar10',
                            help='The dataset to use: cifar10 or cifar100')
        parser.add_argument('--batchsize',
                            '-b',
                            type=int,
                            default=10,
                            help='Number of images in each mini-batch')
        parser.add_argument('--learnrate',
                            '-l',
                            type=float,
                            default=0.05,
                            help='Learning rate for SGD')
        parser.add_argument('--epoch',
                            '-e',
                            type=int,
                            default=300,
                            help='Number of sweeps over the dataset to train')
        parser.add_argument('--gpu',
                            '-g',
                            type=int,
                            default=-1,
                            help='GPU ID (negative value indicates CPU)')
        parser.add_argument('--out',
                            '-o',
                            default='result',
                            help='Directory to output the result')
        parser.add_argument('--resume',
                            '-r',
                            default='',
                            help='Resume the training from snapshot')
        parser.add_argument('--early-stopping',
                            type=str,
                            help='Metric to watch for early stopping')
        args = parser.parse_args()

        print('GPU: {}'.format(args.gpu))
        print('# Minibatch-size: {}'.format(args.batchsize))
        print('# epoch: {}'.format(args.epoch))

        if args.gpu >= 0:
            chainer.backends.cuda.get_device_from_id(args.gpu).use()
            self.model.to_gpu()

        optimizer = chainer.optimizers.Adam(args.learnrate)
        optimizer.setup(self.model)
        optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4))

        train, test = split_dataset(datasets, 80)

        train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     args.batchsize,
                                                     repeat=False,
                                                     shuffle=False)

        stop_trigger = (args.epoch, 'epoch')
        # Early stopping option
        if args.early_stopping:
            stop_trigger = triggers.EarlyStoppingTrigger(
                monitor=args.early_stopping,
                verbose=True,
                max_trigger=(args.epoch, 'epoch'))

        # Set up a trainer
        updater = training.updaters.StandardUpdater(
            train_iter,
            optimizer,
            device=args.gpu,
            loss_func=mean_squared_error)
        trainer = training.Trainer(updater, stop_trigger, out=args.out)

        # Evaluate the model with the test dataset for each epoch
        trainer.extend(
            extensions.Evaluator(test_iter, self.model, device=args.gpu))

        # Reduce the learning rate by half every 25 epochs.
        trainer.extend(extensions.ExponentialShift('lr', 0.5),
                       trigger=(25, 'epoch'))

        # Dump a computational graph from 'loss' variable at the first iteration
        # The "main" refers to the target link of the "main" optimizer.
        trainer.extend(extensions.dump_graph('main/loss'))

        # Take a snapshot at each epoch
        trainer.extend(
            extensions.snapshot(filename='snaphot_epoch_{.updater.epoch}'))

        # Write a log of evaluation statistics for each epoch
        trainer.extend(extensions.LogReport())

        # Print selected entries of the log to stdout
        # Here "main" refers to the target link of the "main" optimizer again, and
        # "validation" refers to the default name of the Evaluator extension.
        # Entries other than 'epoch' are reported by the Classifier link, called by
        # either the updater or the evaluator.
        trainer.extend(
            extensions.PrintReport([
                'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
                'validation/main/accuracy', 'elapsed_time'
            ]))

        # Print a progress bar to stdout
        trainer.extend(extensions.ProgressBar())

        if args.resume:
            # Resume from a snapshot
            chainer.serializers.load_npz(args.resume, trainer)

        print(train[:1])

        # Run the training
        trainer.run()

        return self.model
Ejemplo n.º 43
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--frequency', '-f', type=int, default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--device', '-d', type=str, default='-1',
                        help='Device specifier. Either ChainerX device '
                        'specifier or an integer. If non-negative integer, '
                        'CuPy arrays with specified device id are used. If '
                        'negative integer, NumPy arrays are used')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    parser.add_argument('--noplot', dest='plot', action='store_false',
                        help='Disable PlotReport extension')
    group = parser.add_argument_group('deprecated arguments')
    group.add_argument('--gpu', '-g', type=int, nargs='?', const=0,
                       help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    device = parse_device(args)

    print('Device: {}'.format(device))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    model = L.Classifier(MLP(args.unit, 10))
    model.to_device(device)
    device.use()

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    # Set up a trainer
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=device))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    # TODO(niboshi): Temporarily disabled for chainerx. Fix it.
    if device.xp is not chainerx:
        trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot for each specified epoch
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if args.plot and extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Ejemplo n.º 44
0
def main():
    archs = {
        'alex': alex.Alex,
        'alex_fp16': alex.AlexFp16,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16,
        'nin': nin.NIN,
        'resnet50': resnet50.ResNet50,
        'resnext50': resnet50.ResNeXt50,
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch',
                        '-a',
                        choices=archs.keys(),
                        default='nin',
                        help='Convnet architecture')
    parser.add_argument('--batchsize',
                        '-B',
                        type=int,
                        default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch',
                        '-E',
                        type=int,
                        default=10,
                        help='Number of epochs to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob',
                        '-j',
                        type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean',
                        '-m',
                        default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--root',
                        '-R',
                        default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize',
                        '-b',
                        type=int,
                        default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # Initialize the model to train
    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()  # Make the GPU current
        model.to_gpu()

    # Load the datasets and mean file
    mean = np.load(args.mean)
    train = PreprocessedDataset(args.train, args.root, mean, model.insize)
    val = PreprocessedDataset(args.val, args.root, mean, model.insize, False)
    # These iterators load the images with subprocesses running in parallel to
    # the training/validation.
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, n_processes=args.loaderjob)
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)

    # Set up an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    # Set up a trainer
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    val_interval = (1 if args.test else 100000), 'iteration'
    log_interval = (1 if args.test else 1000), 'iteration'

    trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu),
                   trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=val_interval)
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Ejemplo n.º 45
0
def main():

    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=1,
                        help='Number of images in each mini batch')
    parser.add_argument('--epoch', '-e', type=int, default=10,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=10,
                        help='Number of units')
    args = parser.parse_args()

    # load a color image
    img = cv2.imread('images/blue.jpg', cv2.IMREAD_COLOR)
    # print img
    # print img.shape

    blue = []
    green = []
    red = []

    for y in range(len(img)):
        for x in range(len(img[y])):
            blue.append(img[y][x][0])
            green.append(img[y][x][1])
            red.append(img[y][x][2])

    bgr = blue + green + red
    imgdata = np.array(bgr, dtype='f')
    imgdata = imgdata.reshape(1, 3, 8, 16)
    imgdata = imgdata / 255.0
    print imgdata

    n_in = 3 * 8 * 16

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    model = L.Classifier(MLP(n_in, args.unit, 10))

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load dataset
    x = imgdata
    y = np.array(5, dtype=np.int32)
    dd = [(x, y)]
    train, test = dd, dd

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot())

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy']))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    # Resume from a snapshot
    # chainer.serializers.load_npz(resume, trainer)

    # Run the training
    trainer.run()
def main():
    # This script is almost identical to train_mnist.py. The only difference is
    # that this script uses data-parallel computation on two GPUs.
    # See train_mnist.py for more details.
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=400,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu0',
                        '-g',
                        type=int,
                        default=0,
                        help='First GPU ID')
    parser.add_argument('--gpu1',
                        '-G',
                        type=int,
                        default=1,
                        help='Second GPU ID')
    parser.add_argument('--out',
                        '-o',
                        default='result_parallel',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=1000,
                        help='Number of units')
    parser.add_argument('--train_imgs',
                        default='data/kmnist-train-imgs.npz',
                        help='Path to kmnist training images')
    parser.add_argument('--train_label',
                        default='data/kmnist-train-labels.npz',
                        help='Path to kmnist training labels')
    parser.add_argument('--test_imgs',
                        default='data/kmnist-test-imgs.npz',
                        help='Path to kmnist test images')
    parser.add_argument('--test_label',
                        default='data/kmnist-test-labels.npz',
                        help='Path to kmnist test labels')

    args = parser.parse_args()

    print('GPU: {}, {}'.format(args.gpu0, args.gpu1))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    chainer.backends.cuda.get_device_from_id(args.gpu0).use()

    model = L.Classifier(train_kmnist.MLP(args.unit, 10))
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load and prepare the KMNIST dataset
    train_data = np.load(args.train_imgs)['arr_0'].\
                 reshape((60000, 784)).astype(np.float32)/255.
    train_labels = [int(n) for n in np.load(args.train_label)['arr_0']]
    train = TupleDataset(train_data, train_labels)

    test_data = np.load(args.test_imgs)['arr_0'].\
                reshape((10000, 784)).astype(np.float32)/255.
    test_labels = [int(n) for n in np.load(args.test_label)['arr_0']]
    test = TupleDataset(test_data, test_labels)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # ParallelUpdater implements the data-parallel gradient computation on
    # multiple GPUs. It accepts "devices" argument that specifies which GPU to
    # use.
    updater = training.updaters.ParallelUpdater(
        train_iter,
        optimizer,
        # The device of the name 'main' is used as a "master", while others are
        # used as slaves. Names other than 'main' are arbitrary.
        devices={
            'main': args.gpu0,
            'second': args.gpu1
        },
    )
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=1,
                        help='Number of images in each mini batch')
    parser.add_argument('--epoch', '-e', type=int, default=100,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=10,
                        help='Number of units')
    args = parser.parse_args()

    # load a color image
    img1 = cv2.imread('images/zero.jpg', cv2.IMREAD_COLOR)
    img2 = cv2.imread('images/black.jpg', cv2.IMREAD_COLOR)
    img3 = cv2.imread('images/white.jpg', cv2.IMREAD_COLOR)

    # color -> grayscale
    imggray1 = cv2.cvtColor(img1, cv2.COLOR_RGB2GRAY)
    imggray2 = cv2.cvtColor(img2, cv2.COLOR_RGB2GRAY)
    imggray3 = cv2.cvtColor(img3, cv2.COLOR_RGB2GRAY)

    # image -> array
    gray = []

    for y in range(len(imggray1)):
        for x in range(len(imggray1[y])):
            gray.append(imggray1[y][x])

    imgdata1 = np.array(gray, dtype='f')
    imgdata1 = imgdata1.reshape(1, 1, 32, 32)
    imgdata1 = imgdata1 / 255.0

    gray = []

    for y in range(len(imggray2)):
        for x in range(len(imggray2[y])):
            gray.append(imggray2[y][x])

    imgdata2 = np.array(gray, dtype='f')
    imgdata2 = imgdata2.reshape(1, 1, 32, 32)
    imgdata2 = imgdata2 / 255.0

    gray = []

    for y in range(len(imggray3)):
        for x in range(len(imggray3[y])):
            gray.append(imggray3[y][x])

    imgdata3 = np.array(gray, dtype='f')
    imgdata3 = imgdata3.reshape(1, 1, 32, 32)
    imgdata3 = imgdata3 / 255.0

    n_in = 32*32

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    model = L.Classifier(MLP(n_in, args.unit, 3))

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load dataset
    x1 = imgdata1
    x2 = imgdata2
    x3 = imgdata3
    y1 = np.array(0, dtype=np.int32)
    y2 = np.array(1, dtype=np.int32)
    y3 = np.array(2, dtype=np.int32)
    dd = [(x1, y1), (x2, y2), (x3, y3)]
    train, test = dd, dd

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot())

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy']))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    # Resume from a snapshot
    #chainer.serializers.load_npz(resume, trainer)

    # Run the training
    trainer.run()
Ejemplo n.º 48
0
def train():
    parser = argparse.ArgumentParser(description='DAGMM')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=128,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=10000,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--cn_h_unit',
                        type=int,
                        default=10,
                        help='Number of Compression Network hidden units')
    parser.add_argument('--cn_z_unit',
                        type=int,
                        default=2,
                        help='Number of Compression Network z units')
    parser.add_argument('--en_h_unit',
                        type=int,
                        default=10,
                        help='Number of Estimation Network hidden units')
    parser.add_argument('--en_o_unit',
                        type=int,
                        default=2,
                        help='Number of Estimation Network output units')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=20,
                        help='Frequency of taking a snapshot')
    parser.add_argument(
        '--resume',
        '-r',
        type=int,
        help='Resume the training from snapshot that is designated epoch number'
    )
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('# Compression Network: Dim - {0} - {1} - {0} - Dim'.format(
        args.cn_h_unit, args.cn_z_unit))
    print('# Estimation Network: {} - {} - {}'.format(args.cn_z_unit + 2,
                                                      args.en_h_unit,
                                                      args.en_o_unit))
    print('# Output-directory: {}'.format(args.out))
    print('# Frequency-snapshot: {}'.format(args.frequency))
    if args.resume:
        print('# Resume-epochNumber: {}'.format(args.resume))
    print('')

    # データセット読み込み
    x_data = np.loadtxt('./dataset_arrhythmia/ExplanatoryVariables.csv',
                        delimiter=',')
    y_label = np.loadtxt('./dataset_arrhythmia/CriterionVariables.csv',
                         delimiter=',')

    # 正常データのみを抽出
    HealthData = x_data[y_label[:] == 1]

    # 正常データを学習用と検証用に分割
    NumOfHealthData = len(HealthData)
    trainData = HealthData[:math.floor(NumOfHealthData * 0.9)]
    validData = HealthData[len(trainData):]

    # 型変換
    trainData = trainData.astype(np.float32)
    validData = validData.astype(np.float32)

    train_iter = chainer.iterators.SerialIterator(trainData,
                                                  batch_size=args.batchsize,
                                                  repeat=True,
                                                  shuffle=True)
    valid_iter = chainer.iterators.SerialIterator(validData,
                                                  batch_size=len(validData),
                                                  repeat=False,
                                                  shuffle=False)

    model = DAGMM(args.cn_h_unit, args.cn_z_unit, len(trainData[0]),
                  args.en_h_unit, args.en_o_unit)

    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = optimizers.Adam(alpha=0.0001)
    optimizer.setup(model)

    if args.resume:
        serializers.load_npz(
            args.out + '/model_snapshot_epoch_' + str(args.resume), model)

    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       device=args.gpu,
                                       loss_func=model.lossFunc(gpu=args.gpu))
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(
        extensions.Evaluator(valid_iter,
                             model,
                             device=args.gpu,
                             eval_func=model.lossFunc(gpu=args.gpu)))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'),
        trigger=(args.frequency, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model, filename='model_snapshot_epoch_{.updater.epoch}'),
                   trigger=(args.frequency, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        optimizer, filename='optimizer_snapshot_epoch_{.updater.epoch}'),
                   trigger=(args.frequency, 'epoch'))
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              x_key='epoch',
                              file_name='loss1.png'))
    trainer.extend(
        extensions.PlotReport(['main/loss'],
                              x_key='epoch',
                              file_name='loss2.png'))
    trainer.extend(extensions.LogReport(log_name="log", trigger=(1, 'epoch')))
    trainer.extend(
        extensions.PrintReport(
            ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        serializers.load_npz(args.out + '/snapshot_epoch-' + str(args.resume),
                             trainer)

    trainer.run()
Ejemplo n.º 49
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    model = L.Classifier(MLP(args.unit, 10))
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Ejemplo n.º 50
0
def main():

    parser = argparse.ArgumentParser(description='Chainer example:cfiar-VGG')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--learnrate',
                        '-l',
                        type=float,
                        default=0.05,
                        help='Learning rate for SGD')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    train = get_data(r"/home/notsuji/chainer_src/origin/mnist_data/Data/train")
    test = get_data(r"/home/notsuji/chainer_src/origin/mnist_data/Data/test")

    # Setup model
    model = L.Classifier(VGG(10))

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # Setup an optimizer
    optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu),
                   trigger=(1, 'epoch'))

    # Reduce the learning rate by half every 25 epochs.
    trainer.extend(extensions.ExponentialShift('lr', 0.5),
                   trigger=(25, 'epoch'))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot for each specified epoch
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'),
        trigger=(10, 'epoch'))

    trainer.extend(extensions.snapshot_object(model,
                                              'model_epoch_{.updater.epoch}'),
                   trigger=(args.epoch, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
    else:
        train_iter = chainer.iterators.MultiprocessIterator(train, args.batch_size)
        test_iter = chainer.iterators.MultiprocessIterator(test, args.batch_size, repeat=False)
        updater = training.updater.StandardUpdater(train_iter, optimizer, device=device)

    stop_trigger = (args.epochs, 'epoch')
    trainer = training.Trainer(updater, stop_trigger, out=args.output_data_dir)
    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=device))

    # Reduce the learning rate by half every 25 epochs.
    trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch'))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
Ejemplo n.º 52
0
# trainer
trigger_log = (config.train.log_iteration, 'iteration')
trigger_snapshot = (config.train.snapshot_iteration, 'iteration')

trainer = training.Trainer(updater, out=arguments.output)

ext = extensions.Evaluator(test_iter,
                           models,
                           converter,
                           device=config.train.gpu,
                           eval_func=updater.forward)
trainer.extend(ext, name='test', trigger=trigger_log)
ext = extensions.Evaluator(train_eval_iter,
                           models,
                           converter,
                           device=config.train.gpu,
                           eval_func=updater.forward)
trainer.extend(ext, name='train', trigger=trigger_log)

trainer.extend(extensions.dump_graph('predictor/loss'))

ext = extensions.snapshot_object(predictor,
                                 filename='predictor_{.updater.iteration}.npz')
trainer.extend(ext, trigger=trigger_snapshot)

trainer.extend(extensions.LogReport(trigger=trigger_log))

print(args)
save_args(args, args['output'])
trainer.run()
Ejemplo n.º 53
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: VAE')
    parser.add_argument('--initmodel', '-m', default='',
                        help='Initialize the model from given file')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the optimization from snapshot')
    parser.add_argument('--gpu', '-g', default=-1, type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='results',
                        help='Directory to output the result')
    parser.add_argument('--epoch', '-e', default=100, type=int,
                        help='number of epochs to learn')
    parser.add_argument('--dim-z', '-z', default=20, type=int,
                        help='dimention of encoded vector')
    parser.add_argument('--dim-h', default=500, type=int,
                        help='dimention of hidden layer')
    parser.add_argument('--beta', default=1.0, type=float,
                        help='Regularization coefficient for '
                             'the second term of ELBO bound')
    parser.add_argument('--k', '-k', default=1, type=int,
                        help='Number of Monte Carlo samples used in '
                             'encoded vector')
    parser.add_argument('--binary', action='store_true',
                        help='Use binarized MNIST')
    parser.add_argument('--batch-size', '-b', type=int, default=100,
                        help='learning minibatch size')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# dim z: {}'.format(args.dim_z))
    print('# Minibatch-size: {}'.format(args.batch_size))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Prepare VAE model, defined in net.py
    encoder = net.make_encoder(784, args.dim_z, args.dim_h)
    decoder = net.make_decoder(784, args.dim_z, args.dim_h,
                               binary_check=args.binary)
    prior = net.make_prior(args.dim_z)
    avg_elbo_loss = net.AvgELBOLoss(encoder, decoder, prior,
                                    beta=args.beta, k=args.k)
    if args.gpu >= 0:
        avg_elbo_loss.to_gpu(args.gpu)

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(avg_elbo_loss)

    # Initialize
    if args.initmodel:
        chainer.serializers.load_npz(args.initmodel, avg_elbo_loss)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist(withlabel=False)

    if args.binary:
        # Binarize dataset
        train = (train >= 0.5).astype(np.float32)
        test = (test >= 0.5).astype(np.float32)

    if args.test:
        train, _ = chainer.datasets.split_dataset(train, 100)
        test, _ = chainer.datasets.split_dataset(test, 100)

    train_iter = chainer.iterators.SerialIterator(train, args.batch_size)
    test_iter = chainer.iterators.SerialIterator(test, args.batch_size,
                                                 repeat=False, shuffle=False)

    # Set up an updater. StandardUpdater can explicitly specify a loss function
    # used in the training with 'loss_func' option
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer,
        device=args.gpu, loss_func=avg_elbo_loss)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(extensions.Evaluator(
        test_iter, avg_elbo_loss, device=args.gpu))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/reconstr', 'main/kl_penalty', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()

    # Visualize the results
    def save_images(x, filename):
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100)
        for ai, xi in zip(ax.flatten(), x):
            ai.imshow(xi.reshape(28, 28))
        fig.savefig(filename)

    avg_elbo_loss.to_cpu()
    train_ind = [1, 3, 5, 10, 2, 0, 13, 15, 17]
    x = chainer.Variable(np.asarray(train[train_ind]))
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        x1 = decoder(encoder(x).mean, inference=True).mean
    save_images(x.array, os.path.join(args.out, 'train'))
    save_images(x1.array, os.path.join(args.out, 'train_reconstructed'))

    test_ind = [3, 2, 1, 18, 4, 8, 11, 17, 61]
    x = chainer.Variable(np.asarray(test[test_ind]))
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        x1 = decoder(encoder(x).mean, inference=True).mean
    save_images(x.array, os.path.join(args.out, 'test'))
    save_images(x1.array, os.path.join(args.out, 'test_reconstructed'))

    # draw images from randomly sampled z
    z = prior().sample(9)
    x = decoder(z, inference=True).mean
    save_images(x.array, os.path.join(args.out, 'sampled'))
Ejemplo n.º 54
0
# ニューラルネットワークの作成
## 2クラス分類問題のため、損失関数にソフトマックス交差エントロピーを使用
model = L.Classifier(Model(), lossfun=F.softmax_cross_entropy)

# 学習開始
run_training(
    model,
    train,
    epochs,
    chainer.optimizers.Adam(),  # 最適化関数=Adam
    batchsize=batchsize,
    validation=test,
    gpu_device=0,  # GPU使用
    extensions=[
        extensions.LogReport(),  # ログ表示
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]),  # 計算状態の表示
        extensions.dump_graph('main/loss'),  # ニューラルネットワークの構造
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              'epoch',
                              file_name='01.loss.png'),  # 誤差のグラフ
        extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'],
                              'epoch',
                              file_name='01.accuracy.png'),  # 精度のグラフ
    ])
# ニューラルネットワーク構造(dump_graph('main/loss'))の可視化
## > conda install -c anaconda graphviz
## > dot -Tpng result/cg.dot -o result/cg.png
Ejemplo n.º 55
0
def main():
    archs = {
        'alex': alex.Alex,
        'alex_fp16': alex.AlexFp16,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16,
        'nin': nin.NIN,
        'resnet50': resnet50.ResNet50,
        'resnext50': resnet50.ResNeXt50,
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin',
                        help='Convnet architecture')
    parser.add_argument('--batchsize', '-B', type=int, default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch', '-E', type=int, default=10,
                        help='Number of epochs to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob', '-j', type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean', '-m', default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume', '-r', default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--root', '-R', default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize', '-b', type=int, default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # Initialize the model to train
    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from {}'.format(args.initmodel))
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(
            args.gpu).use()  # Make the GPU current
        model.to_gpu()

    # Load the datasets and mean file
    mean = np.load(args.mean)
    train = PreprocessedDataset(args.train, args.root, mean, model.insize)
    val = PreprocessedDataset(args.val, args.root, mean, model.insize, False)
    # These iterators load the images with subprocesses running in parallel to
    # the training/validation.
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, n_processes=args.loaderjob)
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)

    # Set up an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    # Set up a trainer
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    val_interval = (1 if args.test else 100000), 'iteration'
    log_interval = (1 if args.test else 1000), 'iteration'

    trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu),
                   trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Ejemplo n.º 56
0
def main():
    # define options
    parser = argparse.ArgumentParser(
        description='Training script of DenseNet on CIFAR-10 dataset')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=300,
                        help='Number of epochs to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=64,
                        help='Validation minibatch size')
    parser.add_argument('--numlayers',
                        '-L',
                        type=int,
                        default=40,
                        help='Number of layers')
    parser.add_argument('--growth',
                        '-G',
                        type=int,
                        default=12,
                        help='Growth rate parameter')
    parser.add_argument('--dropout',
                        '-D',
                        type=float,
                        default=0.2,
                        help='Dropout ratio')
    parser.add_argument('--dataset',
                        type=str,
                        default='C10',
                        choices=('C10', 'C10+', 'C100', 'C100+'),
                        help='Dataset used for training (Default is C10)')
    args = parser.parse_args()

    # load dataset
    if args.dataset == 'C10':
        train, test = dataset.get_C10()
    elif args.dataset == 'C10+':
        train, test = dataset.get_C10_plus()
    elif args.dataset == 'C100':
        train, test = dataset.get_C100()
    elif args.dataset == 'C100+':
        train, test = dataset.get_C100_plus()

    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)
    test_iter = chainer.iterators.MultiprocessIterator(test,
                                                       args.batchsize,
                                                       repeat=False,
                                                       shuffle=False)

    # setup model
    model = L.Classifier(
        DenseNet(args.numlayers, args.growth, 16, args.dropout, 10))

    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    # setup optimizer
    optimizer = chainer.optimizers.NesterovAG(lr=0.1, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(1e-4))

    # setup trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(Evaluator(test_iter, model, device=args.gpu))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(10, 'epoch'))
    trainer.extend(
        extensions.snapshot_object(model, 'model_{.updater.epoch}.npz'))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy'
        ]))
    trainer.extend(extensions.ProgressBar())

    # devide lr by 10 at 0.5, 0.75 fraction of total number of training epochs
    iter_per_epoch = math.ceil(len(train) / args.batchsize)
    n_iter1 = int(args.epoch * 0.5 * iter_per_epoch)
    n_iter2 = int(args.epoch * 0.75 * iter_per_epoch)
    shifts = [(n_iter1, 0.01), (n_iter2, 0.001)]
    trainer.extend(StepShift('lr', shifts, optimizer))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # start training
    trainer.run()
Ejemplo n.º 57
0
def main():
    parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
    parser.add_argument('--dataset', '-d', default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--batchsize', '-b', type=int, default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate', '-l', type=float, default=0.05,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch', '-e', type=int, default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    if args.dataset == 'cifar10':
        print('Using CIFAR10 dataset.')
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == 'cifar100':
        print('Using CIFAR100 dataset.')
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')
    model = L.Classifier(models.VGG.VGG(class_labels))
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)
    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu))

    # Reduce the learning rate by half every 25 epochs.
    trainer.extend(extensions.ExponentialShift('lr', 0.5),
                   trigger=(25, 'epoch'))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Ejemplo n.º 58
0
                       multibox_encoder)

    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, n_processes=args.loaderjob)

    optimizer = chainer.optimizers.MomentumSGD(lr=0.001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (120000, 'iteration'), args.out)

    snapshot_interval = 1000, 'iteration'
    log_interval = 10, 'iteration'

    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'main/loss', 'main/loc', 'main/conf', 'lr']),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Ejemplo n.º 59
0
def main():
    archs = {
        'alex': alex.Alex,
        'alex_fp16': alex.AlexFp16,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16,
        'nin': nin.NIN,
        'resnet50': resnet50.ResNet50,
        'resnext50': resnext50.ResNeXt50,
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin',
                        help='Convnet architecture')
    parser.add_argument('--batchsize', '-B', type=int, default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch', '-E', type=int, default=10,
                        help='Number of epochs to train')
    parser.add_argument('--device', '-d', type=str, default='-1',
                        help='Device specifier. Either ChainerX device '
                        'specifier or an integer. If non-negative integer, '
                        'CuPy arrays with specified device id are used. If '
                        'negative integer, NumPy arrays are used')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob', '-j', type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean', '-m', default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume', '-r', default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--root', '-R', default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize', '-b', type=int, default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.set_defaults(test=False)
    parser.add_argument('--dali', action='store_true')
    parser.set_defaults(dali=False)
    group = parser.add_argument_group('deprecated arguments')
    group.add_argument('--gpu', '-g', type=int, nargs='?', const=0,
                       help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    device = parse_device(args)

    print('Device: {}'.format(device))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Initialize the model to train
    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from {}'.format(args.initmodel))
        chainer.serializers.load_npz(args.initmodel, model)
    model.to_device(device)
    device.use()

    # Load the mean file
    mean = np.load(args.mean)
    if args.dali:
        if not dali_util._dali_available:
            raise RuntimeError('DALI seems not available on your system.')
        num_threads = args.loaderjob
        if num_threads is None or num_threads <= 0:
            num_threads = 1
        ch_mean = list(np.average(mean, axis=(1, 2)))
        ch_std = [255.0, 255.0, 255.0]
        # Setup DALI pipelines
        train_pipe = dali_util.DaliPipelineTrain(
            args.train, args.root, model.insize, args.batchsize,
            num_threads, args.gpu, True, mean=ch_mean, std=ch_std)
        val_pipe = dali_util.DaliPipelineVal(
            args.val, args.root, model.insize, args.val_batchsize,
            num_threads, args.gpu, False, mean=ch_mean, std=ch_std)
        train_iter = chainer.iterators.DaliIterator(train_pipe)
        val_iter = chainer.iterators.DaliIterator(val_pipe, repeat=False)
        # converter = dali_converter
        converter = dali_util.DaliConverter(mean=mean, crop_size=model.insize)
    else:
        # Load the dataset files
        train = PreprocessedDataset(args.train, args.root, mean, model.insize)
        val = PreprocessedDataset(args.val, args.root, mean, model.insize,
                                  False)
        # These iterators load the images with subprocesses running in parallel
        # to the training/validation.
        train_iter = chainer.iterators.MultiprocessIterator(
            train, args.batchsize, n_processes=args.loaderjob)
        val_iter = chainer.iterators.MultiprocessIterator(
            val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)
        converter = dataset.concat_examples

    # Set up an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    # Set up a trainer
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, converter=converter, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    val_interval = (1 if args.test else 100000), 'iteration'
    log_interval = (1 if args.test else 1000), 'iteration'

    trainer.extend(extensions.Evaluator(val_iter, model, converter=converter,
                                        device=device), trigger=val_interval)
    # TODO(sonots): Temporarily disabled for chainerx. Fix it.
    if not (chainerx.is_available() and isinstance(device, chainerx.Device)):
        trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Ejemplo n.º 60
0
def main():
    parser = argparse.ArgumentParser(description='''\
ChainerMN example: MNIST with automatic checkpoints enabled''')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--communicator',
                        type=str,
                        default='hierarchical',
                        help='Type of communicator')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=1000,
                        help='Number of units')
    parser.add_argument('--run-id',
                        type=str,
                        default='train-mnist-example',
                        help='ID of the task name')
    args = parser.parse_args()

    # Prepare ChainerMN communicator.

    if args.gpu:
        if args.communicator == 'naive':
            print("Error: 'naive' communicator does not support GPU.\n")
            exit(-1)
        comm = chainermn.create_communicator(args.communicator)
        device = comm.intra_rank
    else:
        if args.communicator != 'naive':
            print('Warning: using naive communicator '
                  'because only naive supports CPU-only execution')
        comm = chainermn.create_communicator('naive')
        device = -1

    if comm.mpi_comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size()))
        if args.gpu:
            print('Using GPUs')
        print('Using {} communicator'.format(args.communicator))
        print('Num unit: {}'.format(args.unit))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

    model = L.Classifier(MLP(args.unit, 10))
    if device >= 0:
        chainer.cuda.get_device(device).use()
        model.to_gpu()

    # Create a multi node optimizer from a standard Chainer optimizer.
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.Adam(), comm)
    optimizer.setup(model)

    # Split and distribute the dataset. Only worker 0 loads the whole dataset.
    # Datasets of worker 0 are evenly split and distributed to all workers.
    if comm.rank == 0:
        train, test = chainer.datasets.get_mnist()
    else:
        train, test = None, None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    test = chainermn.scatter_dataset(test, comm, shuffle=True)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Enable checkpointer and recover from checkpoint if any checkpoint exists
    checkpointer = create_multi_node_checkpointer(name=args.run_id, comm=comm)
    checkpointer.maybe_load(trainer, optimizer)
    print("Rank", comm.rank, ": (Re)Starting from (epoch, iter) =",
          (trainer.updater.epoch, trainer.updater.iteration))
    trainer.extend(checkpointer, trigger=(1000, 'iteration'))

    # Create a multi node evaluator from a standard Chainer evaluator.
    evaluator = extensions.Evaluator(test_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator)

    # Some display and output extensions are necessary only for one worker.
    # (Otherwise, there would just be repeated outputs.)
    if comm.rank == 0:
        trainer.extend(extensions.dump_graph('main/loss'))
        trainer.extend(extensions.LogReport())
        trainer.extend(
            extensions.PrintReport([
                'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
                'validation/main/accuracy', 'elapsed_time'
            ]))
        trainer.extend(extensions.ProgressBar())

    trainer.run()