예제 #1
0
파일: test_mnist.py 프로젝트: zu3st/chainer
def check_mnist(gpu, display_log=True):
    epoch = 5
    batchsize = 100
    n_units = 100

    comm = chainermn.create_communicator('naive')
    if gpu:
        device = comm.intra_rank
        chainer.cuda.get_device_from_id(device).use()
    else:
        device = -1

    model = L.Classifier(MLP(n_units, 10))
    if gpu:
        model.to_device(cupy.cuda.Device())

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.Adam(), comm)
    optimizer.setup(model)

    if comm.rank == 0:
        train, test = chainer.datasets.get_mnist()
    else:
        train, test = None, None

    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    test = chainermn.scatter_dataset(test, comm, shuffle=True)

    train_iter = chainer.iterators.SerialIterator(train, batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=device)

    trainer = training.Trainer(updater, (epoch, 'epoch'))

    # Wrap standard Chainer evaluators by MultiNodeEvaluator.
    evaluator = extensions.Evaluator(test_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator)

    # Add checkpointer. This is just to check checkpointing runs
    # without errors
    path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + '-tmp-')
    checkpointer = create_multi_node_checkpointer(name=__name__,
                                                  comm=comm,
                                                  path=path)
    trainer.extend(checkpointer, trigger=(1, 'epoch'))

    # Some display and output extensions are necessary only for one worker.
    # (Otherwise, there would just be repeated outputs.)
    if comm.rank == 0 and display_log:
        trainer.extend(extensions.LogReport(trigger=(1, 'epoch')),
                       trigger=(1, 'epoch'))
        trainer.extend(extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ],
                                              out=sys.stderr),
                       trigger=(1, 'epoch'))
    trainer.run()

    err = evaluator()['validation/main/accuracy']
    assert err > 0.95

    # Check checkpointer successfully finalized snapshot directory
    assert [] == os.listdir(path)
    os.removedirs(path)
예제 #2
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--device',
                        '-d',
                        type=str,
                        default='-1',
                        help='Device specifier. Either ChainerX device '
                        'specifier or an integer. If non-negative integer, '
                        'CuPy arrays with specified device id are used. If '
                        'negative integer, NumPy arrays are used')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        type=str,
                        help='Resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=1000,
                        help='Number of units')
    parser.add_argument('--noplot',
                        dest='plot',
                        action='store_false',
                        help='Disable PlotReport extension')
    group = parser.add_argument_group('deprecated arguments')
    group.add_argument('--gpu',
                       '-g',
                       dest='device',
                       type=int,
                       nargs='?',
                       const=0,
                       help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    device = chainer.get_device(args.device)

    print('Device: {}'.format(device))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    model = L.Classifier(MLP(args.unit, 10))
    model.to_device(device)
    device.use()

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # Set up a trainer
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=device))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    # TODO(niboshi): Temporarily disabled for chainerx. Fix it.
    if device.xp is not chainerx:
        trainer.extend(extensions.DumpGraph('main/loss'))

    # Take a snapshot for each specified epoch
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if args.plot and extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume is not None:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
예제 #3
0
def main():
    args = parse_arguments()

    # Set up some useful variables that will be used later on.
    dataset_name = args.dataset
    method = args.method
    num_data = args.num_data
    n_unit = args.unit_num
    conv_layers = args.conv_layers

    task_type = molnet_default_config[dataset_name]['task_type']
    model_filename = {'classification': 'classifier.pkl',
                      'regression': 'regressor.pkl'}

    print('Using dataset: {}...'.format(dataset_name))

    # Set up some useful variables that will be used later on.
    if args.label:
        labels = args.label
        cache_dir = os.path.join('input', '{}_{}_{}'.format(dataset_name,
                                                            method, labels))
        class_num = len(labels) if isinstance(labels, list) else 1
    else:
        labels = None
        cache_dir = os.path.join('input', '{}_{}_all'.format(dataset_name,
                                                             method))
        class_num = len(molnet_default_config[args.dataset]['tasks'])

    # Load the train and validation parts of the dataset.
    filenames = [dataset_part_filename(p, num_data)
                 for p in ['train', 'valid']]

    paths = [os.path.join(cache_dir, f) for f in filenames]
    if all([os.path.exists(path) for path in paths]):
        dataset_parts = []
        for path in paths:
            print('Loading cached dataset from {}.'.format(path))
            dataset_parts.append(NumpyTupleDataset.load(path))
    else:
        dataset_parts = download_entire_dataset(dataset_name, num_data, labels,
                                                method, cache_dir)
    train, valid = dataset_parts[0], dataset_parts[1]

#    # Scale the label values, if necessary.
#    if args.scale == 'standardize':
#        if task_type == 'regression':
#            print('Applying standard scaling to the labels.')
#            datasets, scaler = standardize_dataset_labels(datasets)
#        else:
#            print('Label scaling is not available for classification tasks.')
#    else:
#        print('No label scaling was selected.')
#        scaler = None

    # Set up the predictor.
    predictor = set_up_predictor(method, n_unit, conv_layers, class_num)

    # Set up the iterators.
    train_iter = iterators.SerialIterator(train, args.batchsize)
    valid_iter = iterators.SerialIterator(valid, args.batchsize, repeat=False,
                                          shuffle=False)

    # Load metrics for the current dataset.
    metrics = molnet_default_config[dataset_name]['metrics']
    metrics_fun = {k: v for k, v in metrics.items()
                   if isinstance(v, types.FunctionType)}
    loss_fun = molnet_default_config[dataset_name]['loss']

    if task_type == 'regression':
        model = Regressor(predictor, lossfun=loss_fun,
                          metrics_fun=metrics_fun, device=args.gpu)
        # TODO: Use standard scaler for regression task
    elif task_type == 'classification':
        model = Classifier(predictor, lossfun=loss_fun,
                           metrics_fun=metrics_fun, device=args.gpu)
    else:
        raise ValueError('Invalid task type ({}) encountered when processing '
                         'dataset ({}).'.format(task_type, dataset_name))

    # Set up the optimizer.
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    # Save model-related output to this directory.
    model_dir = os.path.join(args.out, os.path.basename(cache_dir))
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    # Set up the updater.
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu,
                                       converter=concat_mols)

    # Set up the trainer.
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=model_dir)
    trainer.extend(E.Evaluator(valid_iter, model, device=args.gpu,
                               converter=concat_mols))
    trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(E.LogReport())

    # Report various metrics.
    print_report_targets = ['epoch', 'main/loss', 'validation/main/loss']
    for metric_name, metric_fun in metrics.items():
        if isinstance(metric_fun, types.FunctionType):
            print_report_targets.append('main/' + metric_name)
            print_report_targets.append('validation/main/' + metric_name)
        elif issubclass(metric_fun, BatchEvaluator):
            trainer.extend(metric_fun(valid_iter, model, device=args.gpu,
                                      eval_func=predictor,
                                      converter=concat_mols, name='val',
                                      raise_value_error=False))
            print_report_targets.append('val/main/' + metric_name)
        else:
            raise TypeError('{} is not a supported metrics function.'
                            .format(type(metrics_fun)))
    print_report_targets.append('elapsed_time')

    trainer.extend(E.PrintReport(print_report_targets))
    trainer.extend(E.ProgressBar())
    trainer.run()

    # Save the model's parameters.
    model_path = os.path.join(model_dir,  model_filename[task_type])
    print('Saving the trained model to {}...'.format(model_path))
    model.save_pickle(model_path, protocol=args.protocol)
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        default=20,
                        type=int,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu0',
                        '-g',
                        default=0,
                        type=int,
                        help='First GPU ID')
    parser.add_argument('--gpu1',
                        '-G',
                        default=1,
                        type=int,
                        help='Second GPU ID')
    parser.add_argument('--out',
                        '-o',
                        default='result_parallel',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        default=1000,
                        type=int,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}, {}'.format(args.gpu0, args.gpu1))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # See train_mnist.py for the meaning of these lines

    model = L.Classifier(ParallelMLP(args.unit, 10, args.gpu0, args.gpu1))
    chainer.backends.cuda.get_device_from_id(args.gpu0).use()

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train, test = chainer.datasets.get_mnist()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu0)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
예제 #5
0
    # make training data
    data_maker = DataMaker(steps_per_cycle=STEPS_PER_CYCLE,
                           number_of_cycles=NUMBER_OF_CYCLES)
    train_data = data_maker.make(LENGTH_OF_SEQUENCE)
    # Iterator
    batchsize = 100
    train_iter = iterators.SerialIterator(train_data, batchsize)
    #    import pdb; pdb.set_trace()

    # setup model
    model = LSTM(IN_UNITS, HIDDEN_UNITS, OUT_UNITS)

    # setup optimizer
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    start = time.time()

    updater = training.StandardUpdater(train_iter, optimizer, MyConverter)
    trainer = training.Trainer(updater, (20, 'epoch'), out='result')
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.observe_lr())
    trainer.extend(
        extensions.PrintReport(['epoch', 'main/loss', 'elapsed_time', 'lr']))
    trainer.run()

    end = time.time()

    print("{}[sec]".format(end - start))
예제 #6
0
def main():
    parser = argparse.ArgumentParser(description='SLPolicyNetwork', formatter_class=RawTextHelpFormatter)
    parser.add_argument('CONFIG', default=None, type=str, help='path to config file')
    parser.add_argument('--gpu', type=int, default=-1, help='gpu numbers\nto specify')
    parser.add_argument('--debug', default=False, action='store_true', help='switch to debug mode')
    args = parser.parse_args()

    with open(args.CONFIG, "r") as f:
        config = json.load(f)

    path = 'debug' if args.debug else 'data'

    b = config["arguments"]["batch_size"]
    epoch = config["arguments"]["epoch"]

    print('*** making training data ***')
    train_data = load_data(config[path]["train"])  # (state, action) = ((3, 8, 8), (1))
    train_iter = iterators.SerialIterator(train_data, b)

    valid_data = load_data(config[path]["valid"])
    valid_iter = iterators.SerialIterator(valid_data, b, repeat=False, shuffle=False)

    print('*** preparing model ***')
    n_input_channel = config["arguments"]["n_input_channel"]
    n_output_channel = config["arguments"]["n_output_channel"]
    model = SLPolicyNetwork(n_input_channel=n_input_channel,
                            n_output_channel=n_output_channel)
    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()
        model.to_gpu(args.gpu)
    model.set_cache()

    optimizer = chainer.optimizers.Adam(alpha=config["arguments"]["learning_rate"])
    optimizer.setup(model)

    updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu)

    trainer = training.Trainer(updater, (epoch, 'epoch'), out='result' + '/' + config["arguments"]["save_path"])

    @chainer.training.make_extension()
    def predict_next_move(_):
        state, action = valid_data[np.random.choice(len(valid_data))]
        n_channel, row, column = state.shape
        if args.gpu >= 0:
            state = cuda.to_gpu(state)
        prediction = model.predict(state.reshape(1, n_channel, row, column))
        print_board(state)
        print(f'action : {translate(int(action))}')
        print(f'prediction : {translate(int(np.argmax(F.softmax(prediction).data, axis=1)))}')

    trainer.extend(predict_next_move, trigger=(1, 'epoch'))

    trainer.extend(extensions.Evaluator(valid_iter, model, device=args.gpu))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy',
                                           'validation/main/loss', 'validation/main/accuracy', 'elapsed_time']))
    if args.debug is False:
        trainer.extend(extensions.snapshot_object(model, 'slpn.epoch{.updater.epoch}.npz'), trigger=(10, 'epoch'))
        save_trigger_for_accuracy = chainer.training.triggers.MaxValueTrigger(key='validation/main/accuracy',
                                                                              trigger=(1, 'epoch'))
        trainer.extend(extensions.snapshot_object(model, 'slpn.best_accuracy.npz'), trigger=save_trigger_for_accuracy)
        save_trigger_for_loss = chainer.training.triggers.MinValueTrigger(key='validation/main/loss',
                                                                          trigger=(1, 'epoch'))
        trainer.extend(extensions.snapshot_object(model, 'slpn.best_loss.npz'), trigger=save_trigger_for_loss)

    print('*** start training ***')
    trainer.run()
예제 #7
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=2,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--frequency', '-f', type=int, default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=50,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    predictor = MLP(args.unit, 10)
    model = L.Classifier(predictor)

    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot for each specified epoch
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))

    # Print a progress bar to stdout
    #trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()

    # Save trained model
    serializers.save_npz('{}/mlp.model'.format(args.out), model)


    save_dir = 'store_model'
    predictor.save(save_dir)

    print('model args  : ', predictor._init_args)
    print('model kwargs: ', predictor._init_kwargs)
예제 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config_path', type=str, default='config.ini')
    parser.add_argument('--resume')
    parser.add_argument('--plot_samples', type=int, default=0)
    args = parser.parse_args()

    config = configparser.ConfigParser()
    config.read(args.config_path, 'UTF-8')

    chainer.global_config.autotune = True
    # chainer.cuda.set_max_workspace_size(11388608)
    chainer.cuda.set_max_workspace_size(512 * 1024 * 1024)
    chainer.config.cudnn_fast_batch_normalization = True

    # create result dir and copy file
    logger.info('> store file to result dir %s', config.get('result', 'dir'))
    save_files(config.get('result', 'dir'))

    logger.info('> set up devices')
    devices = setup_devices(config.get('training_param', 'gpus'))
    set_random_seed(devices, config.getint('training_param', 'seed'))

    logger.info('> get dataset')
    dataset_type = config.get('dataset', 'type')
    if dataset_type == 'coco':
        # force to set `use_cache = False`
        train_set = get_coco_dataset(
            insize=parse_size(config.get('model_param', 'insize')),
            image_root=config.get(dataset_type, 'train_images'),
            annotations=config.get(dataset_type, 'train_annotations'),
            min_num_keypoints=config.getint(dataset_type, 'min_num_keypoints'),
            use_cache=False,
            do_augmentation=True,
        )
        test_set = get_coco_dataset(
            insize=parse_size(config.get('model_param', 'insize')),
            image_root=config.get(dataset_type, 'val_images'),
            annotations=config.get(dataset_type, 'val_annotations'),
            min_num_keypoints=config.getint(dataset_type, 'min_num_keypoints'),
            use_cache=False,
        )
    elif dataset_type == 'mpii':
        train_set, test_set = get_mpii_dataset(
            insize=parse_size(config.get('model_param', 'insize')),
            image_root=config.get(dataset_type, 'images'),
            annotations=config.get(dataset_type, 'annotations'),
            train_size=config.getfloat(dataset_type, 'train_size'),
            min_num_keypoints=config.getint(dataset_type, 'min_num_keypoints'),
            use_cache=config.getboolean(dataset_type, 'use_cache'),
            seed=config.getint('training_param', 'seed'),
        )
    else:
        raise Exception('Unknown dataset {}'.format(dataset_type))
    logger.info('dataset type: %s', dataset_type)
    logger.info('training images: %d', len(train_set))
    logger.info('validation images: %d', len(test_set))

    if args.plot_samples > 0:
        for i in range(args.plot_samples):
            data = train_set[i]
            visualize.plot('train-{}.png'.format(i), data['image'],
                           data['keypoints'], data['bbox'], data['is_labeled'],
                           data['edges'])
            data = test_set[i]
            visualize.plot('val-{}.png'.format(i), data['image'],
                           data['keypoints'], data['bbox'], data['is_labeled'],
                           data['edges'])

    logger.info('> load model')
    model = create_model(config, train_set)

    logger.info('> transform dataset')
    train_set = TransformDataset(train_set, model.encode)
    test_set = TransformDataset(test_set, model.encode)

    logger.info('> create iterators')
    train_iter = chainer.iterators.MultiprocessIterator(
        train_set,
        config.getint('training_param', 'batchsize'),
        n_processes=config.getint('training_param', 'num_process'))
    test_iter = chainer.iterators.MultiprocessIterator(
        test_set,
        config.getint('training_param', 'batchsize'),
        repeat=False,
        shuffle=False,
        n_processes=config.getint('training_param', 'num_process'))

    logger.info('> setup optimizer')
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

    logger.info('> setup trainer')
    updater = training.updaters.ParallelUpdater(train_iter,
                                                optimizer,
                                                devices=devices)
    trainer = training.Trainer(
        updater, (config.getint('training_param', 'train_iter'), 'iteration'),
        config.get('result', 'dir'))

    logger.info('> setup extensions')
    trainer.extend(extensions.LinearShift(
        'lr',
        value_range=(config.getfloat('training_param', 'learning_rate'), 0),
        time_range=(0, config.getint('training_param', 'train_iter'))),
                   trigger=(1, 'iteration'))

    trainer.extend(
        extensions.Evaluator(test_iter, model, device=devices['main']))
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport([
                'main/loss',
                'validation/main/loss',
            ],
                                  'epoch',
                                  file_name='loss.png'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.observe_lr())
    trainer.extend(
        extensions.PrintReport([
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'validation/main/loss',
            'main/loss_resp',
            'validation/main/loss_resp',
            'main/loss_iou',
            'validation/main/loss_iou',
            'main/loss_coor',
            'validation/main/loss_coor',
            'main/loss_size',
            'validation/main/loss_size',
            'main/loss_limb',
            'validation/main/loss_limb',
        ]))
    trainer.extend(extensions.ProgressBar())

    trainer.extend(
        extensions.snapshot(filename='best_snapshot'),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))
    trainer.extend(
        extensions.snapshot_object(model, filename='bestmodel.npz'),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    logger.info('> start training')
    trainer.run()
예제 #9
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: FCIS')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--lr',
                        '-l',
                        type=float,
                        default=None,
                        help='Learning rate for multi GPUs')
    parser.add_argument('--batch-size', type=int, default=8)
    parser.add_argument('--epoch', '-e', type=int, default=18)
    parser.add_argument('--cooldown-epoch', '-ce', type=int, default=12)
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    # chainermn
    comm = chainermn.create_communicator()
    device = comm.intra_rank

    np.random.seed(args.seed)

    # model
    proposal_creator_params = FCISResNet101.proposal_creator_params
    proposal_creator_params['min_size'] = 2
    fcis = FCISResNet101(
        n_fg_class=len(coco_instance_segmentation_label_names),
        anchor_scales=(4, 8, 16, 32),
        pretrained_model='imagenet',
        iter2=False,
        proposal_creator_params=proposal_creator_params)
    fcis.use_preset('coco_evaluate')
    proposal_target_creator = ProposalTargetCreator()
    proposal_target_creator.neg_iou_thresh_lo = 0.0
    model = FCISTrainChain(fcis,
                           proposal_target_creator=proposal_target_creator)

    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    # train dataset
    train_dataset = COCOInstanceSegmentationDataset(year='2014', split='train')
    vmml_dataset = COCOInstanceSegmentationDataset(year='2014',
                                                   split='valminusminival')

    # filter non-annotated data
    train_indices = np.array([
        i for i, label in enumerate(train_dataset.slice[:, ['label']])
        if len(label[0]) > 0
    ],
                             dtype=np.int32)
    train_dataset = train_dataset.slice[train_indices]
    vmml_indices = np.array([
        i for i, label in enumerate(vmml_dataset.slice[:, ['label']])
        if len(label[0]) > 0
    ],
                            dtype=np.int32)
    vmml_dataset = vmml_dataset.slice[vmml_indices]

    train_dataset = TransformDataset(
        ConcatenatedDataset(train_dataset, vmml_dataset),
        ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis))
    if comm.rank == 0:
        indices = np.arange(len(train_dataset))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train_dataset = train_dataset.slice[indices]
    train_iter = chainer.iterators.SerialIterator(train_dataset,
                                                  batch_size=args.batch_size //
                                                  comm.size)

    # test dataset
    if comm.rank == 0:
        test_dataset = COCOInstanceSegmentationDataset(year='2014',
                                                       split='minival',
                                                       use_crowded=True,
                                                       return_crowded=True,
                                                       return_area=True)
        indices = np.arange(len(test_dataset))
        test_dataset = test_dataset.slice[indices]
        test_iter = chainer.iterators.SerialIterator(test_dataset,
                                                     batch_size=1,
                                                     repeat=False,
                                                     shuffle=False)

    # optimizer
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(momentum=0.9), comm)
    optimizer.setup(model)

    model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0))
    model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0))
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    for param in model.params():
        if param.name in ['beta', 'gamma']:
            param.update_rule.enabled = False
    model.fcis.extractor.conv1.disable_update()
    model.fcis.extractor.res2.disable_update()

    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, converter=concat_examples, device=device)

    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)

    # lr scheduler
    @make_shift('lr')
    def lr_scheduler(trainer):
        if args.lr is None:
            base_lr = 0.0005 * args.batch_size
        else:
            base_lr = args.lr

        iteration = trainer.updater.iteration
        epoch = trainer.updater.epoch
        if (iteration * comm.size) < 2000:
            rate = 0.1
        elif epoch < args.cooldown_epoch:
            rate = 1
        else:
            rate = 0.1
        return rate * base_lr

    trainer.extend(lr_scheduler)

    if comm.rank == 0:
        # interval
        log_interval = 100, 'iteration'
        plot_interval = 3000, 'iteration'
        print_interval = 20, 'iteration'

        # training extensions
        trainer.extend(extensions.snapshot_object(
            model.fcis, filename='snapshot_model.npz'),
                       trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        report_items = [
            'iteration',
            'epoch',
            'elapsed_time',
            'lr',
            'main/loss',
            'main/rpn_loc_loss',
            'main/rpn_cls_loss',
            'main/roi_loc_loss',
            'main/roi_cls_loss',
            'main/roi_mask_loss',
            'validation/main/map/iou=0.50:0.95/area=all/max_dets=100',
        ]
        trainer.extend(extensions.PrintReport(report_items),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        if extensions.PlotReport.available():
            trainer.extend(extensions.PlotReport(['main/loss'],
                                                 file_name='loss.png',
                                                 trigger=plot_interval),
                           trigger=plot_interval)

        trainer.extend(InstanceSegmentationCOCOEvaluator(
            test_iter,
            model.fcis,
            label_names=coco_instance_segmentation_label_names),
                       trigger=ManualScheduleTrigger([
                           len(train_dataset) * args.cooldown_epoch,
                           len(train_dataset) * args.epoch
                       ], 'iteration'))

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
def main():
    # command line argument parsing
    parser = argparse.ArgumentParser(description='Digraph Embedding')
    parser.add_argument('input', help='Path to the digraph description file')
    parser.add_argument(
        '--validation',
        '-val',
        default=None,
        help='Path to the digraph description file for validation')
    parser.add_argument('--coordinates',
                        '-c',
                        help='Path to the coordinate file for initialization')
    parser.add_argument('--batchsize_edge',
                        '-be',
                        type=int,
                        default=100,
                        help='Number of samples in each edge mini-batch')
    parser.add_argument('--batchsize_anchor',
                        '-ba',
                        type=int,
                        default=-1,
                        help='Number of samples in each anchor mini-batch')
    parser.add_argument(
        '--batchsize_vert',
        '-bv',
        type=int,
        default=-1,
        help=
        'Number of samples in each vertex mini-batch (used for sampling negative edges)'
    )
    parser.add_argument(
        '--batchsize_negative',
        '-bn',
        type=int,
        default=0,
        help=
        'Number of negative edges sampled for each vertex mini-batch (positive: exact negative edge sampling, negative: random sampling to approximate negative edges)'
    )
    parser.add_argument('--vertex_offset',
                        type=int,
                        default=0,
                        help='the smallest index of vertices')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=100,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--dim',
                        '-d',
                        type=int,
                        default=2,
                        help='Embedding dimension')
    parser.add_argument('--dag',
                        type=float,
                        default=0,
                        help='0:non-acyclic, 1:acyclic')
    parser.add_argument('--margin',
                        '-m',
                        type=float,
                        default=0.01,
                        help='margin for the metric boundary')
    parser.add_argument('--weight_decay',
                        '-wd',
                        type=float,
                        default=0,
                        help='weight decay for regularization on coordinates')
    parser.add_argument('--wd_norm',
                        '-wn',
                        choices=['l1', 'l2'],
                        default='l2',
                        help='norm of weight decay for regularization')
    parser.add_argument('--learning_rate',
                        '-lr',
                        type=float,
                        default=5e-2,
                        help='learning rate')
    parser.add_argument('--learning_rate_drop',
                        '-ld',
                        type=int,
                        default=5,
                        help='how many times to half learning rate')
    #    parser.add_argument('--lambda_super_neg', '-lsn', type=float, default=0,
    #                        help='Super negative samples')
    parser.add_argument('--lambda_pos',
                        '-lp',
                        type=float,
                        default=1,
                        help='weight for loss for positive edges')
    parser.add_argument('--lambda_neg',
                        '-ln',
                        type=float,
                        default=1,
                        help='weight for loss for negative edges')
    parser.add_argument(
        '--lambda_anchor',
        '-la',
        type=float,
        default=1,
        help=
        'anchor should reside in the disk. if set to 0, anchors are fixed to the centre of the spheres'
    )
    parser.add_argument('--lambda_uniform_radius',
                        '-lur',
                        type=float,
                        default=0,
                        help='all radiuses should be similar')
    parser.add_argument('--outdir',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--optimizer',
                        '-op',
                        choices=optim.keys(),
                        default='Adam',
                        help='optimizer')
    parser.add_argument('--vis_freq',
                        '-vf',
                        type=int,
                        default=-1,
                        help='evaluation frequency in iteration')
    parser.add_argument('--mpi',
                        action='store_true',
                        help='parallelise with MPI')
    parser.add_argument('--reconstruct',
                        '-r',
                        action='store_true',
                        help='reconstruct graph during evaluation')
    parser.add_argument('--plot',
                        '-p',
                        action='store_true',
                        help='plot result (dim=2 only)')
    #    parser.add_argument('--training', '-t', action='store_false',help='reconstruct graph')
    args = parser.parse_args()

    # default batchsize
    if args.batchsize_anchor < 0:
        args.batchsize_anchor = 10 * args.batchsize_edge
    if args.batchsize_vert < 0:
        if args.batchsize_negative == 0:
            args.batchsize_vert = 10 * args.batchsize_edge
        else:
            args.batchsize_vert = args.batchsize_edge

    args.outdir = os.path.join(args.outdir, dt.now().strftime('%m%d_%H%M'))
    save_args(args, args.outdir)
    chainer.config.autotune = True

    vert, pos_edge = read_graph(args.input, args.vertex_offset)
    vnum = np.max(vert) + 1

    ## ChainerMN
    if args.mpi:
        import chainermn
        if args.gpu >= 0:
            comm = chainermn.create_communicator()
            chainer.cuda.get_device(comm.intra_rank).use()
        else:
            comm = chainermn.create_communicator('naive')
        if comm.rank == 0:
            primary = True
            print(args)
            chainer.print_runtime_info()
            print("#edges {}, #vertices {}".format(len(pos_edge), len(vert)))
        else:
            primary = False
        print("process {}".format(comm.rank))
    else:
        primary = True
        print(args)
        chainer.print_runtime_info()
        print("#edges {}, #vertices {}".format(len(pos_edge), len(vert)))
        if args.gpu >= 0:
            chainer.cuda.get_device(args.gpu).use()

    # data
    edge_iter = iterators.SerialIterator(datasets.TupleDataset(
        pos_edge[:, 0], pos_edge[:, 1]),
                                         args.batchsize_edge,
                                         shuffle=True)
    vert_iter = iterators.SerialIterator(datasets.TupleDataset(vert),
                                         args.batchsize_vert,
                                         shuffle=True)
    anchor_iter = iterators.SerialIterator(datasets.TupleDataset(vert),
                                           args.batchsize_anchor,
                                           shuffle=True)
    graph = nx.from_edgelist(pos_edge, nx.DiGraph())
    if args.validation and primary:
        val_vert, val_edge = read_graph(args.validation, args.vertex_offset)
        val_graph = nx.from_edgelist(val_edge, nx.DiGraph())
        print("validation #edges {}, #vertices {}".format(
            len(val_edge), len(val_vert)))
    else:
        val_graph = graph

    if args.vis_freq < 0:
        args.vis_freq = int(len(pos_edge) * args.epoch / 10)

    # initial embedding
    if args.coordinates:
        coords = np.loadtxt(args.coordinates, delimiter=",")
    else:
        coords = np.zeros((vnum, 1 + 2 * args.dim))
        # anchor = centre
        X = 2 * np.random.rand(vnum, args.dim) - 1
        coords[:, 1:args.dim + 1] = X
        coords[:, args.dim + 1:] = X
        # the first coordinate corresponds to the radius r=0.1
        coords[:, 0] = 0.1
    coords = L.Parameter(coords)

    # set up an optimizer
    def make_optimizer(model):
        if args.optimizer in [
                'SGD', 'Momentum', 'CMomentum', 'AdaGrad', 'RMSprop',
                'NesterovAG', 'LBFGS'
        ]:
            optimizer = optim[args.optimizer](lr=args.learning_rate)
        elif args.optimizer in ['AdaDelta']:
            optimizer = optim[args.optimizer]()
        elif args.optimizer in ['Adam', 'AdaBound', 'Eve']:
            optimizer = optim[args.optimizer](
                alpha=args.learning_rate, weight_decay_rate=args.weight_decay)
        if args.mpi:
            optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
        optimizer.setup(model)
        return optimizer

    opt = make_optimizer(coords)
    if args.weight_decay > 0 and (not args.optimizer
                                  in ['Adam', 'AdaBound', 'Eve']):
        if args.wd_norm == 'l2':
            opt.add_hook(chainer.optimizer_hooks.WeightDecay(
                args.weight_decay))
        else:
            opt.add_hook(chainer.optimizer_hooks.Lasso(args.weight_decay))

    if args.gpu >= 0:
        coords.to_gpu()

    updater = Updater(
        models=coords,
        iterator={
            'main': edge_iter,
            'vertex': vert_iter,
            'anchor': anchor_iter
        },
        optimizer={'main': opt},
        device=args.gpu,
        #        converter=convert.ConcatWithAsyncTransfer(),
        params={
            'args': args,
            'graph': graph
        })
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir)

    if primary:
        log_interval = 20, 'iteration'
        log_keys = [
            'iteration', 'lr', 'elapsed_time', 'main/loss_pos',
            'main/loss_neg', 'main/loss_anc'
        ]
        if args.validation:
            log_keys.extend(
                ['myval/prc', 'myval/rec', 'myval/f1', 'myval/anc'])
        if args.lambda_uniform_radius > 0:
            log_keys.append('main/loss_rad')
        trainer.extend(extensions.observe_lr('main'), trigger=log_interval)
        trainer.extend(
            extensions.LogReport(keys=log_keys, trigger=log_interval))
        #        trainer.extend(extensions.LogReport(keys=log_keys, trigger=log_interval))
        trainer.extend(extensions.PrintReport(log_keys), trigger=log_interval)
        #        trainer.extend(extensions.PrintReport(log_keys), trigger=(1, 'iteration'))
        if extensions.PlotReport.available():
            trainer.extend(
                extensions.PlotReport(log_keys[3:],
                                      'epoch',
                                      file_name='loss.png',
                                      postprocess=plot_log))
        trainer.extend(extensions.ProgressBar(update_interval=10))
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.snapshot_object(opt,
                                                  'opt{.updater.epoch}.npz'),
                       trigger=(args.epoch, 'epoch'))
        if args.vis_freq > 0:
            trainer.extend(Evaluator({'main': edge_iter},
                                     coords,
                                     params={
                                         'args': args,
                                         'graph': val_graph
                                     },
                                     device=args.gpu),
                           trigger=(args.vis_freq, 'iteration'))
#        trainer.extend(extensions.ParameterStatistics(coords))

# ChainerUI
        save_args(args, args.outdir)

    if args.optimizer in [
            'Momentum', 'CMomentum', 'AdaGrad', 'RMSprop', 'NesterovAG'
    ]:
        trainer.extend(extensions.ExponentialShift('lr', 0.5, optimizer=opt),
                       trigger=(args.epoch / args.learning_rate_drop, 'epoch'))
    elif args.optimizer in ['Adam', 'AdaBound', 'Eve']:
        trainer.extend(extensions.ExponentialShift("alpha", 0.5,
                                                   optimizer=opt),
                       trigger=(args.epoch / args.learning_rate_drop, 'epoch'))


#    if args.training:
    trainer.run()

    # result
    if primary:
        # save DAG data file
        if (args.gpu > -1):
            dat = coords.xp.asnumpy(coords.W.data)
        else:
            dat = coords.W.data
        if args.lambda_anchor == 0:  # anchor = centre
            dat[:, 1:(args.dim + 1)] = dat[:, (args.dim + 1):]
        redge = reconstruct(dat, dag=args.dag)
        np.savetxt(os.path.join(args.outdir, "original.csv"),
                   pos_edge,
                   fmt='%i',
                   delimiter=",")
        np.savetxt(os.path.join(args.outdir, "reconstructed.csv"),
                   redge,
                   fmt='%i',
                   delimiter=",")
        np.savetxt(os.path.join(args.outdir, "coords.csv"),
                   dat,
                   fmt='%1.5f',
                   delimiter=",")
        f1, prc, rec, acc = compare_graph(
            val_graph, nx.from_edgelist(redge, nx.DiGraph()))
        if args.plot:
            plot_digraph(pos_edge, os.path.join(args.outdir, "original.png"))
            plot_digraph(redge, os.path.join(args.outdir, "reconstructed.png"))
            plot_disks(dat, os.path.join(args.outdir, "plot.png"))
        with open(os.path.join(args.outdir, "args.txt"), 'w') as fh:
            fh.write(" ".join(sys.argv))
            fh.write(
                f"f1: {f1}, precision: {prc}, recall: {rec}, accuracy: {acc}")
예제 #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--out',
                        type=str,
                        default='result',
                        help='Output directory')
    parser.add_argument('--mscoco-root',
                        type=str,
                        default='data',
                        help='MSOCO dataset root directory')
    parser.add_argument('--max-iters',
                        type=int,
                        default=50000,
                        help='Maximum number of iterations to train')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        help='Minibatch size')
    parser.add_argument('--dropout-ratio',
                        type=float,
                        default=0.5,
                        help='Language model dropout ratio')
    parser.add_argument('--val-keep-quantity',
                        type=int,
                        default=100,
                        help='Keep every N-th validation image')
    parser.add_argument('--val-iter',
                        type=int,
                        default=100,
                        help='Run validation every N-th iteration')
    parser.add_argument('--log-iter',
                        type=int,
                        default=1,
                        help='Log every N-th iteration')
    parser.add_argument('--snapshot-iter',
                        type=int,
                        default=1000,
                        help='Model snapshot every N-th iteration')
    parser.add_argument('--rnn',
                        type=str,
                        default='nsteplstm',
                        choices=['nsteplstm', 'lstm'],
                        help='Language model layer type')
    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--max-caption-length',
                        type=int,
                        default=30,
                        help='Maxium caption length when using LSTM layer')
    args = parser.parse_args()

    # Load the MSCOCO dataset. Assumes that the dataset has been downloaded
    # already using e.g. the `download.py` script
    train, val = datasets.get_mscoco(args.mscoco_root)

    # Validation samples are used to address overfitting and see how well your
    # model generalizes to yet unseen data. However, since the number of these
    # samples in MSCOCO is quite large (~200k) and thus require time to
    # evaluate, you may choose to use only a fraction of the available samples
    val = val[::args.val_keep_quantity]

    # Number of unique words that are found in the dataset
    vocab_size = len(train.vocab)

    # Instantiate the model to be trained either with LSTM layers or with
    # NStepLSTM layers
    model = ImageCaptionModel(vocab_size,
                              dropout_ratio=args.dropout_ratio,
                              rnn=args.rnn)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    def transform(in_data):
        # Called for each sample and applies necessary preprocessing to the
        # image such as resizing and normalizing
        img, caption = in_data
        img = model.prepare(img)
        return img, caption

    # We need to preprocess the images since their sizes may vary (and the
    # model requires that they have the exact same fixed size)
    train = TransformDataset(train, transform)
    val = TransformDataset(val, transform)

    train_iter = iterators.MultiprocessIterator(train,
                                                args.batch_size,
                                                shared_mem=700000)
    val_iter = chainer.iterators.MultiprocessIterator(val,
                                                      args.batch_size,
                                                      repeat=False,
                                                      shuffle=False,
                                                      shared_mem=700000)

    optimizer = optimizers.Adam()
    optimizer.setup(model)

    def converter(batch, device):
        # The converted receives a batch of input samples any may modify it if
        # necessary. In our case, we need to align the captions depending on if
        # we are using LSTM layers of NStepLSTM layers in the model.
        if args.rnn == 'lstm':
            max_caption_length = args.max_caption_length
        elif args.rnn == 'nsteplstm':
            max_caption_length = None
        else:
            raise ValueError('Invalid RNN type.')
        return datasets.converter(batch,
                                  device,
                                  max_caption_length=max_caption_length)

    updater = training.updater.StandardUpdater(train_iter,
                                               optimizer=optimizer,
                                               device=args.gpu,
                                               converter=converter)

    trainer = training.Trainer(updater,
                               out=args.out,
                               stop_trigger=(args.max_iters, 'iteration'))
    trainer.extend(extensions.Evaluator(val_iter,
                                        target=model,
                                        converter=converter,
                                        device=args.gpu),
                   trigger=(args.val_iter, 'iteration'))
    trainer.extend(
        extensions.LogReport(['main/loss', 'validation/main/loss'],
                             trigger=(args.log_iter, 'iteration')))
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              trigger=(args.log_iter, 'iteration')))
    trainer.extend(extensions.PrintReport([
        'elapsed_time', 'epoch', 'iteration', 'main/loss',
        'validation/main/loss'
    ]),
                   trigger=(args.log_iter, 'iteration'))

    # Save model snapshots so that later on, we can load them and generate new
    # captions for any image. This can be done in the `predict.py` script
    trainer.extend(extensions.snapshot_object(model,
                                              'model_{.updater.iteration}'),
                   trigger=(args.snapshot_iter, 'iteration'))
    trainer.extend(extensions.ProgressBar())
    trainer.run()
예제 #12
0
def train(args):
    """Train with the given args.

    Args:
        args (namespace): The program arguments.

    """
    set_deterministic_pytorch(args)

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning("cuda is not available")

    # get input and output dimension info
    with open(args.valid_json, "rb") as f:
        valid_json = json.load(f)["utts"]
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]["input"][0]["shape"][-1])
    odim = int(valid_json[utts[0]]["output"][0]["shape"][-1])
    logging.info("#input dims : " + str(idim))
    logging.info("#output dims: " + str(odim))

    # Initialize with pre-trained ASR encoder and MT decoder
    if args.enc_init is not None or args.dec_init is not None:
        model = load_trained_modules(idim, odim, args, interface=STInterface)
    else:
        model_class = dynamic_import(args.model_module)
        model = model_class(idim, odim, args)
    assert isinstance(model, STInterface)

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + "/model.json"
    with open(model_conf, "wb") as f:
        logging.info("writing a model config file to " + model_conf)
        f.write(
            json.dumps((idim, odim, vars(args)),
                       indent=4,
                       ensure_ascii=False,
                       sort_keys=True).encode("utf_8"))
    for key in sorted(vars(args).keys()):
        logging.info("ARGS: " + key + ": " + str(vars(args)[key]))

    reporter = model.reporter

    # check the use of multi-gpu
    if args.ngpu > 1:
        if args.batch_size != 0:
            logging.warning(
                "batch size is automatically increased (%d -> %d)" %
                (args.batch_size, args.batch_size * args.ngpu))
            args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    if args.train_dtype in ("float16", "float32", "float64"):
        dtype = getattr(torch, args.train_dtype)
    else:
        dtype = torch.float32
    model = model.to(device=device, dtype=dtype)

    # Setup an optimizer
    if args.opt == "adadelta":
        optimizer = torch.optim.Adadelta(model.parameters(),
                                         rho=0.95,
                                         eps=args.eps,
                                         weight_decay=args.weight_decay)
    elif args.opt == "adam":
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
    elif args.opt == "noam":
        from espnet.nets.pytorch_backend.transformer.optimizer import get_std_opt

        optimizer = get_std_opt(
            model.parameters(),
            args.adim,
            args.transformer_warmup_steps,
            args.transformer_lr,
        )
    else:
        raise NotImplementedError("unknown optimizer: " + args.opt)

    # setup apex.amp
    if args.train_dtype in ("O0", "O1", "O2", "O3"):
        try:
            from apex import amp
        except ImportError as e:
            logging.error(
                f"You need to install apex for --train-dtype {args.train_dtype}. "
                "See https://github.com/NVIDIA/apex#linux")
            raise e
        if args.opt == "noam":
            model, optimizer.optimizer = amp.initialize(
                model, optimizer.optimizer, opt_level=args.train_dtype)
        else:
            model, optimizer = amp.initialize(model,
                                              optimizer,
                                              opt_level=args.train_dtype)
        use_apex = True
    else:
        use_apex = False

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    # Setup a converter
    converter = CustomConverter(
        subsampling_factor=model.subsample[0],
        dtype=dtype,
        use_source_text=args.asr_weight > 0 or args.mt_weight > 0,
    )

    # read json data
    with open(args.train_json, "rb") as f:
        train_json = json.load(f)["utts"]
    with open(args.valid_json, "rb") as f:
        valid_json = json.load(f)["utts"]

    use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0
    # make minibatch list (variable length)
    train = make_batchset(
        train_json,
        args.batch_size,
        args.maxlen_in,
        args.maxlen_out,
        args.minibatches,
        min_batch_size=args.ngpu if args.ngpu > 1 else 1,
        shortest_first=use_sortagrad,
        count=args.batch_count,
        batch_bins=args.batch_bins,
        batch_frames_in=args.batch_frames_in,
        batch_frames_out=args.batch_frames_out,
        batch_frames_inout=args.batch_frames_inout,
        iaxis=0,
        oaxis=0,
    )
    valid = make_batchset(
        valid_json,
        args.batch_size,
        args.maxlen_in,
        args.maxlen_out,
        args.minibatches,
        min_batch_size=args.ngpu if args.ngpu > 1 else 1,
        count=args.batch_count,
        batch_bins=args.batch_bins,
        batch_frames_in=args.batch_frames_in,
        batch_frames_out=args.batch_frames_out,
        batch_frames_inout=args.batch_frames_inout,
        iaxis=0,
        oaxis=0,
    )

    load_tr = LoadInputsAndTargets(
        mode="asr",
        load_output=True,
        preprocess_conf=args.preprocess_conf,
        preprocess_args={"train": True},  # Switch the mode of preprocessing
    )
    load_cv = LoadInputsAndTargets(
        mode="asr",
        load_output=True,
        preprocess_conf=args.preprocess_conf,
        preprocess_args={"train": False},  # Switch the mode of preprocessing
    )
    # hack to make batchsize argument as 1
    # actual bathsize is included in a list
    # default collate function converts numpy array to pytorch tensor
    # we used an empty collate function instead which returns list
    train_iter = ChainerDataLoader(
        dataset=TransformDataset(train,
                                 lambda data: converter([load_tr(data)])),
        batch_size=1,
        num_workers=args.n_iter_processes,
        shuffle=not use_sortagrad,
        collate_fn=lambda x: x[0],
    )
    valid_iter = ChainerDataLoader(
        dataset=TransformDataset(valid,
                                 lambda data: converter([load_cv(data)])),
        batch_size=1,
        shuffle=False,
        collate_fn=lambda x: x[0],
        num_workers=args.n_iter_processes,
    )

    # Set up a trainer
    updater = CustomUpdater(
        model,
        args.grad_clip,
        {"main": train_iter},
        optimizer,
        device,
        args.ngpu,
        args.grad_noise,
        args.accum_grad,
        use_apex=use_apex,
    )
    trainer = training.Trainer(updater, (args.epochs, "epoch"),
                               out=args.outdir)

    if use_sortagrad:
        trainer.extend(
            ShufflingEnabler([train_iter]),
            trigger=(args.sortagrad if args.sortagrad != -1 else args.epochs,
                     "epoch"),
        )

    # Resume from a snapshot
    if args.resume:
        logging.info("resumed from %s" % args.resume)
        torch_resume(args.resume, trainer)

    # Evaluate the model with the test dataset for each epoch
    if args.save_interval_iters > 0:
        trainer.extend(
            CustomEvaluator(model, {"main": valid_iter}, reporter, device,
                            args.ngpu),
            trigger=(args.save_interval_iters, "iteration"),
        )
    else:
        trainer.extend(
            CustomEvaluator(model, {"main": valid_iter}, reporter, device,
                            args.ngpu))

    # Save attention weight at each epoch
    if args.num_save_attention > 0:
        data = sorted(
            list(valid_json.items())[:args.num_save_attention],
            key=lambda x: int(x[1]["input"][0]["shape"][1]),
            reverse=True,
        )
        if hasattr(model, "module"):
            att_vis_fn = model.module.calculate_all_attentions
            plot_class = model.module.attention_plot_class
        else:
            att_vis_fn = model.calculate_all_attentions
            plot_class = model.attention_plot_class
        att_reporter = plot_class(
            att_vis_fn,
            data,
            args.outdir + "/att_ws",
            converter=converter,
            transform=load_cv,
            device=device,
        )
        trainer.extend(att_reporter, trigger=(1, "epoch"))
    else:
        att_reporter = None

    # Save CTC prob at each epoch
    if (args.asr_weight > 0 and args.mtlalpha > 0) and args.num_save_ctc > 0:
        # NOTE: sort it by output lengths
        data = sorted(
            list(valid_json.items())[:args.num_save_ctc],
            key=lambda x: int(x[1]["output"][0]["shape"][0]),
            reverse=True,
        )
        if hasattr(model, "module"):
            ctc_vis_fn = model.module.calculate_all_ctc_probs
            plot_class = model.module.ctc_plot_class
        else:
            ctc_vis_fn = model.calculate_all_ctc_probs
            plot_class = model.ctc_plot_class
        ctc_reporter = plot_class(
            ctc_vis_fn,
            data,
            args.outdir + "/ctc_prob",
            converter=converter,
            transform=load_cv,
            device=device,
            ikey="output",
            iaxis=1,
        )
        trainer.extend(ctc_reporter, trigger=(1, "epoch"))
    else:
        ctc_reporter = None

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport(
            [
                "main/loss",
                "validation/main/loss",
                "main/loss_asr",
                "validation/main/loss_asr",
                "main/loss_mt",
                "validation/main/loss_mt",
                "main/loss_st",
                "validation/main/loss_st",
            ],
            "epoch",
            file_name="loss.png",
        ))
    trainer.extend(
        extensions.PlotReport(
            [
                "main/acc",
                "validation/main/acc",
                "main/acc_asr",
                "validation/main/acc_asr",
                "main/acc_mt",
                "validation/main/acc_mt",
            ],
            "epoch",
            file_name="acc.png",
        ))
    trainer.extend(
        extensions.PlotReport(["main/bleu", "validation/main/bleu"],
                              "epoch",
                              file_name="bleu.png"))

    # Save best models
    trainer.extend(
        snapshot_object(model, "model.loss.best"),
        trigger=training.triggers.MinValueTrigger("validation/main/loss"),
    )
    trainer.extend(
        snapshot_object(model, "model.acc.best"),
        trigger=training.triggers.MaxValueTrigger("validation/main/acc"),
    )

    # save snapshot which contains model and optimizer states
    if args.save_interval_iters > 0:
        trainer.extend(
            torch_snapshot(filename="snapshot.iter.{.updater.iteration}"),
            trigger=(args.save_interval_iters, "iteration"),
        )
    else:
        trainer.extend(torch_snapshot(), trigger=(1, "epoch"))

    # epsilon decay in the optimizer
    if args.opt == "adadelta":
        if args.criterion == "acc":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.acc.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
            trainer.extend(
                adadelta_eps_decay(args.eps_decay),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
        elif args.criterion == "loss":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.loss.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )
            trainer.extend(
                adadelta_eps_decay(args.eps_decay),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )
    elif args.opt == "adam":
        if args.criterion == "acc":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.acc.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
            trainer.extend(
                adam_lr_decay(args.lr_decay),
                trigger=CompareValueTrigger(
                    "validation/main/acc",
                    lambda best_value, current_value: best_value >
                    current_value,
                ),
            )
        elif args.criterion == "loss":
            trainer.extend(
                restore_snapshot(model,
                                 args.outdir + "/model.loss.best",
                                 load_fn=torch_load),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )
            trainer.extend(
                adam_lr_decay(args.lr_decay),
                trigger=CompareValueTrigger(
                    "validation/main/loss",
                    lambda best_value, current_value: best_value <
                    current_value,
                ),
            )

    # Write a log of evaluation statistics for each epoch
    trainer.extend(
        extensions.LogReport(trigger=(args.report_interval_iters,
                                      "iteration")))
    report_keys = [
        "epoch",
        "iteration",
        "main/loss",
        "main/loss_st",
        "main/loss_asr",
        "validation/main/loss",
        "validation/main/loss_st",
        "validation/main/loss_asr",
        "main/acc",
        "validation/main/acc",
    ]
    if args.asr_weight > 0:
        report_keys.append("main/acc_asr")
        report_keys.append("validation/main/acc_asr")
    report_keys += ["elapsed_time"]
    if args.opt == "adadelta":
        trainer.extend(
            extensions.observe_value(
                "eps",
                lambda trainer: trainer.updater.get_optimizer("main").
                param_groups[0]["eps"],
            ),
            trigger=(args.report_interval_iters, "iteration"),
        )
        report_keys.append("eps")
    elif args.opt in ["adam", "noam"]:
        trainer.extend(
            extensions.observe_value(
                "lr",
                lambda trainer: trainer.updater.get_optimizer("main").
                param_groups[0]["lr"],
            ),
            trigger=(args.report_interval_iters, "iteration"),
        )
        report_keys.append("lr")
    if args.asr_weight > 0:
        if args.mtlalpha > 0:
            report_keys.append("main/cer_ctc")
            report_keys.append("validation/main/cer_ctc")
        if args.mtlalpha < 1:
            if args.report_cer:
                report_keys.append("validation/main/cer")
            if args.report_wer:
                report_keys.append("validation/main/wer")
    if args.report_bleu:
        report_keys.append("main/bleu")
        report_keys.append("validation/main/bleu")
    trainer.extend(
        extensions.PrintReport(report_keys),
        trigger=(args.report_interval_iters, "iteration"),
    )

    trainer.extend(
        extensions.ProgressBar(update_interval=args.report_interval_iters))
    set_early_stop(trainer, args)

    if args.tensorboard_dir is not None and args.tensorboard_dir != "":
        trainer.extend(
            TensorboardLogger(
                SummaryWriter(args.tensorboard_dir),
                att_reporter=att_reporter,
                ctc_reporter=ctc_reporter,
            ),
            trigger=(args.report_interval_iters, "iteration"),
        )
    # Run the training
    trainer.run()
    check_early_stop(trainer, args.epochs)
예제 #13
0
파일: asr_mix.py 프로젝트: yntcyjb/espnet
def train(args):
    """Train with the given args

    :param Namespace args: The program arguments
    """
    set_deterministic_pytorch(args)

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get input and output dimension info
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]['input'][0]['shape'][1])
    odim = int(valid_json[utts[0]]['output'][0]['shape'][1])
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # specify attention, CTC, hybrid mode
    if args.mtlalpha == 1.0:
        mtl_mode = 'ctc'
        logging.info('Pure CTC mode')
    elif args.mtlalpha == 0.0:
        mtl_mode = 'att'
        logging.info('Pure attention mode')
    else:
        mtl_mode = 'mtl'
        logging.info('Multitask learning mode')

    # specify model architecture
    model = E2E(idim, odim, args)
    subsampling_factor = model.subsample[0]

    if args.rnnlm is not None:
        rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf)
        rnnlm = lm_pytorch.ClassifierWithState(
            lm_pytorch.RNNLM(len(args.char_list), rnnlm_args.layer,
                             rnnlm_args.unit))
        torch.load(args.rnnlm, rnnlm)
        model.rnnlm = rnnlm

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to ' + model_conf)
        f.write(
            json.dumps((idim, odim, vars(args)),
                       indent=4,
                       ensure_ascii=False,
                       sort_keys=True).encode('utf_8'))
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    reporter = model.reporter

    # check the use of multi-gpu
    if args.ngpu > 1:
        model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu)))
        logging.info('batch size is automatically increased (%d -> %d)' %
                     (args.batch_size, args.batch_size * args.ngpu))
        args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    model = model.to(device)

    # Setup an optimizer
    if args.opt == 'adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(),
                                         rho=0.95,
                                         eps=args.eps,
                                         weight_decay=args.weight_decay)
    elif args.opt == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     weight_decay=args.weight_decay)

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    # Setup a converter
    converter = CustomConverter(subsampling_factor=subsampling_factor,
                                preprocess_conf=args.preprocess_conf)

    # read json data
    with open(args.train_json, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']

    # make minibatch list (variable length)
    train = make_batchset(train_json,
                          args.batch_size,
                          args.maxlen_in,
                          args.maxlen_out,
                          args.minibatches,
                          min_batch_size=args.ngpu if args.ngpu > 1 else 1)
    valid = make_batchset(valid_json,
                          args.batch_size,
                          args.maxlen_in,
                          args.maxlen_out,
                          args.minibatches,
                          min_batch_size=args.ngpu if args.ngpu > 1 else 1)
    # hack to make batchsize argument as 1
    # actual bathsize is included in a list
    if args.n_iter_processes > 0:
        train_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(train, converter.transform),
            batch_size=1,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
        valid_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(valid, converter.transform),
            batch_size=1,
            repeat=False,
            shuffle=False,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
    else:
        train_iter = chainer.iterators.SerialIterator(TransformDataset(
            train, converter.transform),
                                                      batch_size=1)
        valid_iter = chainer.iterators.SerialIterator(TransformDataset(
            valid, converter.transform),
                                                      batch_size=1,
                                                      repeat=False,
                                                      shuffle=False)

    # Set up a trainer
    updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer,
                            converter, device, args.ngpu)
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        logging.info('resumed from %s' % args.resume)
        torch_resume(args.resume, trainer)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        CustomEvaluator(model, valid_iter, reporter, converter, device))

    # Save attention weight each epoch
    if args.num_save_attention > 0 and args.mtlalpha != 1.0:
        data = sorted(list(valid_json.items())[:args.num_save_attention],
                      key=lambda x: int(x[1]['input'][0]['shape'][1]),
                      reverse=True)
        if hasattr(model, "module"):
            att_vis_fn = model.module.calculate_all_attentions
        else:
            att_vis_fn = model.calculate_all_attentions
        att_reporter = PlotAttentionReport(att_vis_fn,
                                           data,
                                           args.outdir + "/att_ws",
                                           converter=converter,
                                           device=device)
        trainer.extend(att_reporter, trigger=(1, 'epoch'))
    else:
        att_reporter = None

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport([
            'main/loss', 'validation/main/loss', 'main/loss_ctc',
            'validation/main/loss_ctc', 'main/loss_att',
            'validation/main/loss_att'
        ],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/acc', 'validation/main/acc'],
                              'epoch',
                              file_name='acc.png'))

    # Save best models
    trainer.extend(
        snapshot_object(model, 'model.loss.best'),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))
    if mtl_mode != 'ctc':
        trainer.extend(
            snapshot_object(model, 'model.acc.best'),
            trigger=training.triggers.MaxValueTrigger('validation/main/acc'))

    # save snapshot which contains model and optimizer states
    trainer.extend(torch_snapshot(), trigger=(1, 'epoch'))

    # epsilon decay in the optimizer
    if args.opt == 'adadelta':
        if args.criterion == 'acc' and mtl_mode != 'ctc':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.acc.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
        elif args.criterion == 'loss':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.loss.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL,
                                                 'iteration')))
    report_keys = [
        'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att',
        'validation/main/loss', 'validation/main/loss_ctc',
        'validation/main/loss_att', 'main/acc', 'validation/main/acc',
        'elapsed_time'
    ]
    if args.opt == 'adadelta':
        trainer.extend(extensions.observe_value(
            'eps', lambda trainer: trainer.updater.get_optimizer('main').
            param_groups[0]["eps"]),
                       trigger=(REPORT_INTERVAL, 'iteration'))
        report_keys.append('eps')
    if args.report_cer:
        report_keys.append('validation/main/cer')
    if args.report_wer:
        report_keys.append('validation/main/wer')
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(REPORT_INTERVAL, 'iteration'))

    trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL))
    set_early_stop(trainer, args)

    if args.tensorboard_dir is not None and args.tensorboard_dir != "":
        writer = SummaryWriter(args.tensorboard_dir)
        trainer.extend(TensorboardLogger(writer, att_reporter),
                       trigger=(REPORT_INTERVAL, 'iteration'))
    # Run the training
    trainer.run()
    check_early_stop(trainer, args.epochs)
예제 #14
0
def train(**args):
    set_seed(42)
    args = EasyDict(args)
    logger.info(args)
    dataset_file = Path(args.dataset_file)

    data = json.loads(dataset_file.read_text())
    ladder = data['ladder']
    train_data, valid_data = data['train'], data['valid']

    counter = Counter()
    pokes = train_data + valid_data
    for poke in pokes:
        counter.update(poke)

    counts = [0] * (args.topk + 1)
    index2poke = ['<unk>']
    for i, (name, freq) in enumerate(counter.most_common()):
        if i < args.topk:
            counts[i + 1] = freq
            index2poke.append(name)
        else:
            counts[0] += freq
    vocab = {x: i for i, x in enumerate(index2poke)}
    n_vocab = len(vocab)
    logger.info('n_vocab = {}'.format(n_vocab))

    train_data = vectorize(train_data, vocab)
    valid_data = vectorize(valid_data, vocab)

    X_valid, y_valid = convert(valid_data)
    X_train, y_train = convert(train_data)

    train = TupleDataset(X_train, y_train)
    valid = TupleDataset(X_valid, y_valid)

    logger.info('train size = {}'.format(len(train)))
    logger.info('valid size = {}'.format(len(valid)))

    train_iter = chainer.iterators.SerialIterator(train, 32)
    valid_iter = chainer.iterators.SerialIterator(valid,
                                                  32,
                                                  repeat=False,
                                                  shuffle=False)
    if args.loss_func == 'softmax':
        loss_func = SoftmaxCrossEntropyLoss(args.n_units, n_vocab)
    elif args.loss_func == 'ns':
        loss_func = L.NegativeSampling(args.n_units, counts,
                                       args.negative_size)
        loss_func.W.data[...] = 0
    else:
        raise ValueError('invalid loss_func: {}'.format(args.loss_func))

    prefix = '{}_{}_{}'.format(ladder, args.loss_func, args.n_units)

    model = ContinuousBoW(n_vocab, args.n_units, loss_func)
    optimizer = O.Adam()
    optimizer.setup(model)

    updater = training.updater.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (10, 'epoch'), out='results')
    trainer.extend(extensions.Evaluator(valid_iter, model))
    trainer.extend(extensions.LogReport(log_name='{}_log'.format(prefix)))
    trainer.extend(
        extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss']))
    trainer.extend(extensions.ProgressBar())

    trainer.run()

    # Save the word2vec model
    Path('results').mkdir(exist_ok=True)
    poke2vec_file = 'results/{}_poke2vec.model'.format(prefix)
    with open(poke2vec_file, 'w') as f:
        f.write('%d %d\n' % (n_vocab, args.n_units))
        w = model.embed.W.data
        for i, wi in enumerate(w):
            v = ' '.join(map(str, wi))
            f.write('%s %s\n' % (index2poke[i], v))
예제 #15
0
def main():

    # list of available GPUs
    devices = {'main':0, 'second':2, 'third':3, 'fourth':4, 'fifth':5}

    parser = argparse.ArgumentParser(description='Training of fully conncted newtork for indoor acoustic localization.')
    parser.add_argument('config', type=str, help="The config file for the training, model, and data.")
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--frequency', '-f', type=int, default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--gpu', default='main', choices=devices.keys(),
                        help='The GPU to use for the training')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--noplot', dest='plot', action='store_false',
                        help='Disable PlotReport extension')
    args = parser.parse_args()

    with open(args.config, 'r') as f:
        config = json.load(f)

    gpu = args.gpu
    epoch = config['training']['epoch']
    batchsize = config['training']['batchsize']
    out_dir = config['training']['out'] if 'out' in config['training'] else 'result'

    print('# Minibatch-size: {}'.format(batchsize))
    print('# epoch: {}'.format(epoch))
    print('')

    chainer.cuda.get_device_from_id(devices[gpu]).use()

    # Set up a neural network to train
    # Classifier reports mean squared error
    nn = models[config['model']['name']](
            *config['model']['args'],
            **config['model']['kwargs'],
            )

    model = L.Classifier(nn, lossfun=F.mean_squared_error)
    #model = L.Classifier(nn, lossfun=F.mean_absolute_error)
    model.compute_accuracy=False

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Helper to load the dataset
    data_formatter, label_formatter, skip = get_formatters(**config['data']['format_kwargs'])

    # Load the dataset
    train, validate, test = get_data(config['data']['file'],
            data_formatter=data_formatter, 
            label_formatter=label_formatter, skip=skip)

    train_iter = chainer.iterators.SerialIterator(train, batchsize)
    validate_iter = chainer.iterators.SerialIterator(validate, batchsize,
                                                 repeat=False, shuffle=False)

    # Set up a trainer
    #updater = training.ParallelUpdater(train_iter, optimizer, devices=devices)
    updater = training.StandardUpdater(train_iter, optimizer, device=devices[gpu])
    trainer = training.Trainer(updater, (epoch, 'epoch'), out=out_dir)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(validate_iter, model, device=devices[gpu]))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot for each specified epoch
    frequency = epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if args.plot and extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()

    # save the trained model
    chainer.serializers.save_npz(config['model']['file'], nn)

    return nn, train, test
예제 #16
0
def main():
    archs = {
        'alex': alex.Alex,
        'alex_fp16': alex.AlexFp16,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16,
        'nin': nin.NIN,
        'resnet50': resnet50.ResNet50
    }
    archs.update(dpns)

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch',
                        '-a',
                        choices=sorted(archs.keys()),
                        default='nin',
                        help='Convnet architecture')
    parser.add_argument('--batchsize',
                        '-B',
                        type=int,
                        default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch',
                        '-E',
                        type=int,
                        default=10,
                        help='Number of epochs to train')
    parser.add_argument('--gpus',
                        '-g',
                        type=int,
                        nargs="*",
                        default=[0, 1, 2, 3])
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob',
                        '-j',
                        type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean',
                        '-m',
                        default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--root',
                        '-R',
                        default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize',
                        '-b',
                        type=int,
                        default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # Initialize the model to train
    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    # Load the datasets and mean file
    mean = np.load(args.mean)
    train = train_imagenet.PreprocessedDataset(args.train, args.root, mean,
                                               model.insize)
    val = train_imagenet.PreprocessedDataset(args.val, args.root, mean,
                                             model.insize, False)
    # These iterators load the images with subprocesses running in parallel to
    # the training/validation.
    devices = tuple(args.gpus)

    train_iters = [
        chainer.iterators.MultiprocessIterator(i,
                                               args.batchsize,
                                               n_processes=args.loaderjob)
        for i in chainer.datasets.split_dataset_n_random(train, len(devices))
    ]
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)

    # Set up an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    # Set up a trainer
    updater = updaters.MultiprocessParallelUpdater(train_iters,
                                                   optimizer,
                                                   devices=devices)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    if args.test:
        val_interval = 5, 'epoch'
        log_interval = 1, 'epoch'
    else:
        val_interval = 100000, 'iteration'
        log_interval = 1000, 'iteration'

    trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpus[0]),
                   trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=val_interval)
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=2))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
예제 #17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='voc0712')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(via_bbox_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    train = TransformDataset(BboxDataset(),
                             Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)

    test = BboxDataset(split='test', use_difficult=True, return_difficult=True)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    # 120000->8000
    trainer = training.Trainer(updater, (500, 'iteration'), args.out)
    # 80000->5000,100000->7000
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger([300, 400],
                                                          'iteration'))
    # 10000->700
    trainer.extend(DetectionEvaluator(test_iter,
                                      model,
                                      use_07_metric=True,
                                      label_names=via_bbox_label_names),
                   trigger=(7, 'iteration'))

    log_interval = 10, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    # 10000->700
    trainer.extend(extensions.snapshot(), trigger=(50, 'iteration'))
    # 120000->8000
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(500, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()

    serializers.save_npz('via_model', model)
    serializers.save_npz('via_state', optimizer)
예제 #18
0
def main():
    parser = argparse.ArgumentParser(description='Train CycleGAN')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--base',
                        '-B',
                        default=os.path.dirname(os.path.abspath(__file__)),
                        help='base directory path of program files')
    parser.add_argument('--config_path',
                        type=str,
                        default='configs/training.yml',
                        help='path to config file')
    parser.add_argument('--out',
                        '-o',
                        default='results/training',
                        help='Directory to output the result')

    parser.add_argument('--model', '-m', default='', help='Load model data')

    parser.add_argument('--model2', '-m2', default='', help='Load model data')

    parser.add_argument('--resume',
                        '-res',
                        default='',
                        help='Resume the training from snapshot')

    parser.add_argument('--root',
                        '-R',
                        default=os.path.dirname(os.path.abspath(__file__)),
                        help='Root directory path of input image')

    args = parser.parse_args()

    config = yaml_utils.Config(
        yaml.load(open(os.path.join(args.base, args.config_path))))

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(config.batchsize))
    print('# iteration: {}'.format(config.iteration))
    print('Learning Rate: {}'.format(config.adam['alpha']))
    print('')

    #load the dataset
    print('----- Load dataset -----')
    train = CycleganDataset(args.root,
                            os.path.join(args.base,
                                         config.dataset['training_fn']),
                            config.patch['patchside'],
                            [config.patch['lrmin'], config.patch['lrmax']],
                            augmentation=True)
    train_iter = chainer.iterators.MultiprocessIterator(
        train, batch_size=config.batchsize)

    print('----- Set up model ------')
    gen = Generator_SR()
    gen2 = Generator_SR()
    disY = Discriminator()
    # chainer.serializers.load_npz(args.model, gen)
    # chainer.serializers.load_npz(args.model2, gen2)

    if args.gpu >= 0:
        chainer.backends.cuda.set_max_workspace_size(1024 * 1024 * 1024)  # 1GB
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        gen.to_gpu()
        gen2.to_gpu()
        disY.to_gpu()

    print('----- Make optimizer -----')

    def make_optimizer(model, alpha=0.00001, beta1=0.9, beta2=0.999):
        optimizer = chainer.optimizers.Adam(alpha=alpha,
                                            beta1=beta1,
                                            beta2=beta2)
        optimizer.setup(model)
        return optimizer

    gen_opt = make_optimizer(model=gen,
                             alpha=config.adam['alpha'],
                             beta1=config.adam['beta1'],
                             beta2=config.adam['beta2'])

    gen2_opt = make_optimizer(model=gen2,
                              alpha=config.adam['alpha'],
                              beta1=config.adam['beta1'],
                              beta2=config.adam['beta2'])

    disY_opt = make_optimizer(model=disY,
                              alpha=config.adam['alpha'],
                              beta1=config.adam['beta1'],
                              beta2=config.adam['beta2'])

    print('----- Make updater -----')
    updater = CinCGANUpdater(models=(gen, gen2, disY),
                             iterator=train_iter,
                             optimizer={
                                 'gen': gen_opt,
                                 'gen2': gen2_opt,
                                 'disY': disY_opt
                             },
                             device=args.gpu)

    print('----- Save configs -----')

    def create_result_dir(base_dir, output_dir, config_path, config):
        """https://github.com/pfnet-research/sngan_projection/blob/master/train.py"""
        result_dir = os.path.join(base_dir, output_dir)
        if not os.path.exists(result_dir):
            os.makedirs(result_dir)
        if not os.path.exists('{}/init'.format(result_dir)):
            os.makedirs('{}/init'.format(result_dir))

        def copy_to_result_dir(fn, result_dir):
            bfn = os.path.basename(fn)
            shutil.copy(fn, '{}/{}'.format(result_dir, bfn))

        copy_to_result_dir(os.path.join(base_dir, config_path), result_dir)

        copy_to_result_dir(os.path.join(base_dir, config.network['fn']),
                           result_dir)
        copy_to_result_dir(os.path.join(base_dir, config.updater['fn']),
                           result_dir)
        copy_to_result_dir(
            os.path.join(base_dir, config.dataset['training_fn']), result_dir)

    create_result_dir(args.base, args.out, args.config_path, config)

    print('----- Make trainer -----')
    trainer = training.Trainer(updater, (config.iteration, 'iteration'),
                               out=os.path.join(args.base, args.out))

    # Set up logging
    snapshot_interval = (config.snapshot_interval, 'iteration')
    display_interval = (config.display_interval, 'iteration')
    evaluation_interval = (config.evaluation_interval, 'iteration')
    trainer.extend(
        extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'),
        trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        gen, filename='gen_iter_{.updater.iteration}.npz'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        gen2, filename='gen2_iter_{.updater.iteration}.npz'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        disY, filename='disY_iter_{.updater.iteration}.npz'),
                   trigger=snapshot_interval)

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=display_interval))
    trainer.extend(reconstruct_hr_img(gen, gen2,
                                      os.path.join(args.base, args.out),
                                      train_iter, train),
                   trigger=evaluation_interval,
                   priority=extension.PRIORITY_WRITER)

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar(update_interval=10))

    # Save two plot images to the result dir
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['gen/loss_gen1'],
                                  'iteration',
                                  file_name='gen_loss.png',
                                  trigger=display_interval))
        trainer.extend(
            extensions.PlotReport([
                'disY/loss_dis1_fake', 'disY/loss_dis1_real', 'disY/loss_dis1'
            ],
                                  'iteration',
                                  file_name='dis_loss.png',
                                  trigger=display_interval))
        trainer.extend(
            extensions.PlotReport(['gen/loss_gen', 'disY/loss_dis1'],
                                  'iteration',
                                  file_name='adv_loss.png',
                                  trigger=display_interval))

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    print('----- Run the training -----')
    reset_seed(0)
    trainer.run()
예제 #19
0
파일: train4copa.py 프로젝트: losyer/ABCNN
def main(args):
    abs_dest = "/work/sasaki.shota/"
    if args.snapshot:
        start_time = datetime.now().strftime('%Y%m%d_%H_%M_%S')
        dest = "../result/" + start_time
        os.makedirs(dest)
        abs_dest = os.path.abspath(dest)
        with open(os.path.join(dest, "settings.json"), "w") as fo:
            fo.write(json.dumps(vars(args), sort_keys=True, indent=4))

    # load data
    data_processor = CopaDataProcessor(args.data, args.vocab, args.test, args.gpu, args)
    data_processor.prepare_dataset()
    train_data = data_processor.train_data
    copa_data = data_processor.copa_data

    # create model
    vocab_c = data_processor.vocab_c
    vocab_r = data_processor.vocab_r
    embed_dim = args.dim
    cnn = ABCNN_2(n_vocab_c=len(vocab_c), n_vocab_r=len(vocab_r), n_layer=args.layer\
         ,embed_dim=embed_dim, input_channel=1, output_channel=50,wordvec_unchain=args.wordvec_unchain)
    model = L.Classifier(cnn, lossfun=sigmoid_cross_entropy, accfun=binary_accuracy)
    if args.gpu >= 0:
        # cuda.get_device(str(args.gpu)).use()
        cuda.get_device(args.gpu).use()
        model.to_gpu()
    if args.word2vec:
        cnn.load_word2vec_embeddings(args.word2vec_path, data_processor.vocab_c, data_processor.vocab_r)
    cnn.pad_vec2zero(data_processor.vocab_c, data_processor.vocab_r)
    
    # setup optimizer
    optimizer = O.AdaGrad(args.lr)
    optimizer.setup(model)
    # do not use weight decay for embeddings
    decay_params = {name: 1 for name, variable in model.namedparams() if "embed" not in name}
    optimizer.add_hook(SelectiveWeightDecay(rate=args.decay, decay_params=decay_params))

    # train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize)
    train_iter = IteratorWithNS(train_data, args.batchsize)

    dev_train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize, repeat=False) #for SVM
    copa_iter = COPAIterator(copa_data)
    updater = training.StandardUpdater(train_iter, optimizer, converter=concat_examples, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=abs_dest)

    # setup evaluation
    # eval_predictor = model.copy().predictor.to_cpu()
    eval_predictor = model.copy().predictor
    eval_predictor.train = False
    iters = {"train": dev_train_iter, "test": copa_iter}
    trainer.extend(COPAEvaluator(iters, eval_predictor, converter=concat_examples, device=args.gpu)
        , trigger=(1000,'iteration')
        )
    # trainer.extend(COPAEvaluator(iters, eval_predictor, converter=concat_examples, device=args.gpu))

    # extentions...
    trainer.extend(extensions.LogReport(trigger=(1000,'iteration'))
        )
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'copa_dev_acc', 'copa_test_acc'])
            ,trigger=(1000,'iteration')
            )
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.snapshot:
        trainer.extend(extensions.snapshot_object(
            model, 'model_epoch_{.updater.epoch}',
            trigger=chainer.training.triggers.MaxValueTrigger('validation/map')))
    # trainer.extend(extensions.ExponentialShift("lr", 0.5, optimizer=optimizer),
    #                trigger=chainer.training.triggers.MinValueTrigger("validation/loss"))
    trainer.run()
예제 #20
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Fully Convolutional Dual Center Pose Proposal Network for Pose Estimation'
    )
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=1,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=200,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='results/dual_cp',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--seed', type=int, default=0, help='Random seed')
    parser.add_argument('--snapshot_interval',
                        type=int,
                        default=1000,
                        help='Interval of snapshot')
    parser.add_argument('--display_interval',
                        type=int,
                        default=100,
                        help='Interval of displaying log to console')
    parser.add_argument('--frequency',
                        '-f',
                        type=int,
                        default=-1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--train_resnet',
                        type=bool,
                        default=True,
                        help='train resnet')

    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    n_class = 9
    train_path = os.path.join(os.getcwd(), root,
                              'train_data/OcclusionChallengeICCV2015')
    caffe_model = 'ResNet-50-model.caffemodel'

    distance_sanity = 0.05

    chainer.using_config('cudnn_deterministic', True)

    model = DualCPNetClassifier(DualCenterProposalNetworkRes50FCN(
        n_class=n_class,
        output_scale=1.0,
        pretrained_model=not args.train_resnet),
                                mothod="RANSAC",
                                distance_sanity=distance_sanity)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    # load train data
    train = DualCPNetDataset(train_path,
                             range(0, 1200)[0::2],
                             img_height=192,
                             img_width=256,
                             random=True,
                             random_crop=True)
    # load test data
    test = DualCPNetDataset(train_path,
                            range(0, 1200)[1::2],
                            img_height=192,
                            img_width=256)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    evaluator = extensions.Evaluator(test_iter, model, device=args.gpu)
    evaluator.default_name = 'val'
    trainer.extend(evaluator)

    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot for each specified epoch
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    # if extensions.PlotReport.available():
    # trainer.extend(
    #     extensions.PlotReport(['main/loss'],
    #                           'epoch', file_name='loss.png'))
    # trainer.extend(
    #     extensions.PlotReport(
    #         ['main/accuracy'],
    #         'epoch', file_name='accuracy.png'))

    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/l_cls', 'main/l_cp', 'main/l_ocp', 'main/cls_acc',
            'main/cp_acc', 'main/ocp_acc', 'val/main/l_cls', 'val/main/l_cp',
            'val/main/l_ocp', 'val/main/cls_acc', 'val/main/cp_acc',
            'val/main/ocp_acc', 'elapsed_time'
        ]))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)
    else:
        npz_name = 'DualCenterProposalNetworkRes50FCN_occulusion_challenge.npz'
        caffemodel_name = 'ResNet-50-model.caffemodel'
        path = os.path.join(root, 'trained_data/', npz_name)
        path_caffemodel = os.path.join(root, 'trained_data/', caffemodel_name)
        print 'npz model path : ' + path
        print 'caffe model path : ' + path_caffemodel
        download.cache_or_load_file(
            path, lambda path: _make_chainermodel_npz(path, path_caffemodel,
                                                      model, n_class),
            lambda path: serializers.load_npz(path, model))

    # Run the training
    trainer.run()
예제 #21
0
def train(args=None):
    save_args(args)

    dataset = FoodDataset(dataset_dir=args.dataset,
                          model_name=args.model_name,
                          train=True)
    train_dataset, valid_dataset = split_dataset_random(dataset,
                                                        int(0.9 *
                                                            len(dataset)),
                                                        seed=args.seed)
    train_iter = MultiprocessIterator(train_dataset, args.batch_size)
    val_iter = MultiprocessIterator(valid_dataset,
                                    args.batch_size,
                                    repeat=False,
                                    shuffle=False)
    if args.model_name == 'mv2':
        model = MobilenetV2(num_classes=101, depth_multiplier=1.0)
    elif args.model_name == "vgg16":
        model = VGG16(num_classes=101)
    elif args.model_name == "resnet50":
        model = ResNet50(num_classes=101)
    else:
        raise Exception("illegal model name")
    model = L.Classifier(model)
    if args.model_name == "mv2":
        optimizer = chainer.optimizers.SGD(lr=0.005)
    else:
        optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    if args.model_name == "vgg16":
        model.predictor.disable_target_layers()
    if args.model_name == "resnet50":
        model.predictor.disable_target_layers()

    if args.device >= 0:
        chainer.backends.cuda.get_device_from_id(args.device).use()
        model.to_gpu()

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'),
                               out=args.destination)

    snapshot_interval = (1, 'epoch')

    trainer.extend(extensions.Evaluator(val_iter, model, device=args.device),
                   trigger=snapshot_interval)
    trainer.extend(extensions.ProgressBar())
    trainer.extend(
        extensions.LogReport(trigger=snapshot_interval, log_name='log.json'))
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'),
        trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_epoch_{.updater.epoch}.npz'),
                   trigger=snapshot_interval)

    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(
            ['main/loss', 'validation/main/loss'],
            'epoch',
            file_name='loss.png'),
                       trigger=snapshot_interval)
        trainer.extend(extensions.PlotReport(
            ['main/accuracy', 'validation/main/accuracy'],
            'epoch',
            file_name='accuracy.png'),
                       trigger=snapshot_interval)

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)
    trainer.run()
예제 #22
0
def train(args):
    # display torch version
    logging.info('torch version = ' + torch.__version__)

    # seed setting
    nseed = args.seed
    torch.manual_seed(nseed)
    logging.info('torch seed = ' + str(nseed))

    # debug mode setting
    # 0 would be fastest, but 1 seems to be reasonable
    # by considering reproducability
    # use determinisitic computation or not
    if args.debugmode < 1:
        torch.backends.cudnn.deterministic = False
        logging.info('torch cudnn deterministic is disabled')
    else:
        torch.backends.cudnn.deterministic = True

    # check cuda and cudnn availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get special label ids
    unk = args.char_list_dict['<unk>']
    eos = args.char_list_dict['<eos>']
    # read tokens as a sequence of sentences
    train = read_tokens(args.train_label, args.char_list_dict)
    val = read_tokens(args.valid_label, args.char_list_dict)
    # count tokens
    n_train_tokens, n_train_oovs = count_tokens(train, unk)
    n_val_tokens, n_val_oovs = count_tokens(val, unk)
    logging.info('#vocab = ' + str(args.n_vocab))
    logging.info('#sentences in the training data = ' + str(len(train)))
    logging.info('#tokens in the training data = ' + str(n_train_tokens))
    logging.info('oov rate in the training data = %.2f %%' %
                 (n_train_oovs / n_train_tokens * 100))
    logging.info('#sentences in the validation data = ' + str(len(val)))
    logging.info('#tokens in the validation data = ' + str(n_val_tokens))
    logging.info('oov rate in the validation data = %.2f %%' %
                 (n_val_oovs / n_val_tokens * 100))

    # Create the dataset iterators
    train_iter = ParallelSentenceIterator(train,
                                          args.batchsize,
                                          max_length=args.maxlen,
                                          sos=eos,
                                          eos=eos)
    val_iter = ParallelSentenceIterator(val,
                                        args.batchsize,
                                        max_length=args.maxlen,
                                        sos=eos,
                                        eos=eos,
                                        repeat=False)
    logging.info('#iterations per epoch = ' +
                 str(len(train_iter.batch_indices)))
    logging.info('#total iterations = ' +
                 str(args.epoch * len(train_iter.batch_indices)))
    # Prepare an RNNLM model
    rnn = RNNLM(args.n_vocab, args.layer, args.unit)
    model = ClassifierWithState(rnn)
    if args.ngpu > 1:
        logging.warn("currently, multi-gpu is not supported. use single gpu.")
    if args.ngpu > 0:
        # Make the specified GPU current
        gpu_id = 0
        model.cuda(gpu_id)
    else:
        gpu_id = -1

    # Save model conf to json
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to ' + model_conf)
        f.write(
            json.dumps(vars(args), indent=4, sort_keys=True).encode('utf_8'))

    # Set up an optimizer
    if args.opt == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=1.0)
    elif args.opt == 'adam':
        optimizer = torch.optim.Adam(model.parameters())

    # FIXME: TOO DIRTY HACK
    reporter = model.reporter
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    updater = BPTTUpdater(train_iter,
                          model,
                          optimizer,
                          gpu_id,
                          gradclip=args.gradclip)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir)
    trainer.extend(LMEvaluator(val_iter, model, reporter, device=gpu_id))
    trainer.extend(
        extensions.LogReport(postprocess=compute_perplexity,
                             trigger=(REPORT_INTERVAL, 'iteration')))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'perplexity', 'val_perplexity',
         'elapsed_time']),
                   trigger=(REPORT_INTERVAL, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL))
    # Save best models
    trainer.extend(torch_snapshot(filename='snapshot.ep.{.updater.epoch}'))
    trainer.extend(
        extensions.snapshot_object(model,
                                   'rnnlm.model.{.updater.epoch}',
                                   savefun=torch_save))
    # T.Hori: MinValueTrigger should be used, but it fails when resuming
    trainer.extend(
        MakeSymlinkToBestModel('validation/main/loss', 'rnnlm.model'))

    if args.resume:
        logging.info('resumed from %s' % args.resume)
        torch_resume(args.resume, trainer)

    trainer.run()

    # compute perplexity for test set
    if args.test_label:
        logging.info('test the best model')
        torch_load(args.outdir + '/rnnlm.model.best', model)
        test = read_tokens(args.test_label, args.char_list_dict)
        n_test_tokens, n_test_oovs = count_tokens(test, unk)
        logging.info('#sentences in the test data = ' + str(len(test)))
        logging.info('#tokens in the test data = ' + str(n_test_tokens))
        logging.info('oov rate in the test data = %.2f %%' %
                     (n_test_oovs / n_test_tokens * 100))
        test_iter = ParallelSentenceIterator(test,
                                             args.batchsize,
                                             max_length=args.maxlen,
                                             sos=eos,
                                             eos=eos,
                                             repeat=False)
        evaluator = LMEvaluator(test_iter, model, reporter, device=gpu_id)
        result = evaluator()
        logging.info('test perplexity: ' +
                     str(np.exp(float(result['main/loss']))))
예제 #23
0
def setup():
    # 設定ファイルの読み込み
    with open(CONFIG_FILE, "r") as f:
        config = yaml.load(f)

    xp = np if not config["use_gpu"] else cuda.cupy
    
    # 学習結果出力先の設定
    restart = config["restart_dir"] is not None
    if restart:
        result_children_dir = config["restart_dir"]
    else:
        result_children_dir = "result_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        
    result_dir = os.path.join(config["result_dir"], result_children_dir)
    result_dir_train = os.path.join(result_dir, MODEL_DIR)
    result_dir_val = os.path.join(result_dir, VALIDATE_DIR)

    # 学習データの読み込み
    train_scores = []
    with open(os.path.join(config["score_dir"], config["train_list"]), "r") as tr_f:
        train_info = list(map(lambda x: x.split("\n")[0], tr_f.readlines()))
        train_paths = list(map(lambda x: os.path.join(config["score_dir"], x.split("\t")[0]), train_info))
        train_score_lvs = list(map(lambda x: int(x.split("\t")[1])-1, train_info))

    for idx, npy_path in enumerate(train_paths):
        score = xp.load(npy_path)
        score[:, 8] /= 100.0
        # 譜面を小節ごとに区切る
        score = score.reshape((-1, 1728))
        train_scores.append(score)
        sys.stdout.write("\rtrain score loaded: {0:4d}/{1}".format(idx+1, len(train_paths)))
    sys.stdout.write("\n")

    # 検証データの読み込み
    val_scores = []
    val_score_names = []
    with open(os.path.join(config["score_dir"], config["validate_list"]), "r") as val_f:
        val_info = list(map(lambda x: x.split("\n")[0], val_f.readlines()))
        val_paths = list(map(lambda x: os.path.join(config["score_dir"], x.split("\t")[0]), val_info))
        val_score_lvs = list(map(lambda x: int(x.split("\t")[1])-1, val_info))

    for idx, npy_path in enumerate(val_paths):
        score = xp.load(npy_path)
        score[:, 8] /= 100.0
        # 譜面を小節ごとに区切る
        score = score.reshape((-1, 1728))
        val_scores.append(score)
        score_name = os.path.basename(npy_path)
        val_score_names.append(score_name)
        sys.stdout.write("\rvalidate score loaded: {0:4d}/{1}".format(idx+1, len(val_paths)))
    sys.stdout.write("\n")

    # model and optimizer
    model = Estimator()
    if xp is not np:
        model.to_device("@cupy:0")    
    optimizer = Adam(float(config["lr"]))
    optimizer.setup(model)

    # iterator, updater, trainer, extension
    train_dataset = TupleDataset(train_scores, train_score_lvs)
    train_iterator = SerialIterator(train_dataset, int(config["batch_size"]))
    val_dataset = TupleDataset(val_scores, val_score_lvs, val_score_names)
    val_iterator = SerialIterator(val_dataset, int(config["batch_size"]), repeat=False, shuffle=False)

    updater = EstimatorUpdater(iterator=train_iterator, optimizer=optimizer)
    trainer = Trainer(updater, stop_trigger=(config["epochs"], "epoch"), out=result_dir_train)

    trainer.extend(Validator(val_iterator, result_dir_val), trigger=(1, "epoch"))
    trainer.extend(extensions.snapshot(filename="snapshot_epoch_{.updater.epoch}"))
    trainer.extend(extensions.LogReport(trigger=(1, "epoch")), trigger=(1, "epoch"))
    trainer.extend(extensions.PrintReport(["epoch", "train/loss", "train/acc", "val/loss", "val/acc", "val/rough_acc"]))
    trainer.extend(extensions.ProgressBar(update_interval=5))

    if restart:
        # 学習を再開するモデルを特定
        snapshot_path_format = os.path.join(result_dir_train, "snapshot_epoch_*")
        snapshots = [os.path.basename(fname) for fname in glob.glob(snapshot_path_format)]
        if len(snapshots) == 0:
            print("There does not exist a model to restart training.")
            exit()
        else:
            pattern = re.compile("snapshot_epoch_([0-9]+)")
            snapshot_epochs = list(map(lambda x: int(pattern.search(x).group(1)), snapshots))
            prev_snapshot_idx = snapshot_epochs.index(max(snapshot_epochs))
            prev_snapshot = snapshots[prev_snapshot_idx]
            
        load_npz(os.path.join(result_dir_train, prev_snapshot), trainer)

    shutil.copy2(CONFIG_FILE, result_dir)

    return trainer
예제 #24
0
def main():
    parser = argparse.ArgumentParser(
        description='Chainer example: POS-tagging')
    parser.add_argument('--batchsize', '-b', type=int, default=30,
                        help='Number of images in each mini batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--device', '-d', type=str, default='-1',
                        help='Device specifier. Either ChainerX device '
                        'specifier or an integer. If non-negative integer, '
                        'CuPy arrays with specified device id are used. If '
                        'negative integer, NumPy arrays are used')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    group = parser.add_argument_group('deprecated arguments')
    group.add_argument('--gpu', '-g', dest='device',
                       type=int, nargs='?', const=0,
                       help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    if chainer.get_dtype() == numpy.float16:
        warnings.warn(
            'This example may cause NaN in FP16 mode.', RuntimeWarning)

    vocab = collections.defaultdict(lambda: len(vocab))
    pos_vocab = collections.defaultdict(lambda: len(pos_vocab))

    # Convert word sequences and pos sequences to integer sequences.
    nltk.download('brown')
    data = []
    for sentence in nltk.corpus.brown.tagged_sents():
        xs = numpy.array([vocab[lex] for lex, _ in sentence], numpy.int32)
        ys = numpy.array([pos_vocab[pos] for _, pos in sentence], numpy.int32)
        data.append((xs, ys))

    print('# of sentences: {}'.format(len(data)))
    print('# of words: {}'.format(len(vocab)))
    print('# of pos: {}'.format(len(pos_vocab)))

    device = chainer.get_device(args.device)
    device.use()

    model = CRF(len(vocab), len(pos_vocab))
    model.to_device(device)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))

    test_data, train_data = datasets.split_dataset_random(
        data, len(data) // 10, seed=0)

    train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test_data, args.batchsize,
                                                 repeat=False, shuffle=False)
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, converter=convert, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    evaluator = extensions.Evaluator(
        test_iter, model, device=device, converter=convert)
    # Only validate in each 1000 iteration
    trainer.extend(evaluator, trigger=(1000, 'iteration'))
    trainer.extend(extensions.LogReport(trigger=(100, 'iteration')),
                   trigger=(100, 'iteration'))

    trainer.extend(
        extensions.MicroAverage(
            'main/correct', 'main/total', 'main/accuracy'))
    trainer.extend(
        extensions.MicroAverage(
            'validation/main/correct', 'validation/main/total',
            'validation/main/accuracy'))

    trainer.extend(
        extensions.PrintReport(
            ['epoch', 'main/loss', 'validation/main/loss',
             'main/accuracy', 'validation/main/accuracy', 'elapsed_time']),
        trigger=(100, 'iteration'))

    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
예제 #25
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: seq2seq')
    parser.add_argument('SOURCE', help='source sentence list')
    parser.add_argument('TARGET', help='target sentence list')
    parser.add_argument('SOURCE_VOCAB', help='source vocabulary file')
    parser.add_argument('TARGET_VOCAB', help='target vocabulary file')
    parser.add_argument('--validation-source',
                        help='source sentence list for validation')
    parser.add_argument('--validation-target',
                        help='target sentence list for validation')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=64,
                        help='number of sentence pairs in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=1024,
                        help='number of units')
    parser.add_argument('--layer',
                        '-l',
                        type=int,
                        default=3,
                        help='number of layers')
    parser.add_argument('--min-source-sentence',
                        type=int,
                        default=1,
                        help='minimium length of source sentence')
    parser.add_argument('--max-source-sentence',
                        type=int,
                        default=50,
                        help='maximum length of source sentence')
    parser.add_argument('--min-target-sentence',
                        type=int,
                        default=1,
                        help='minimium length of target sentence')
    parser.add_argument('--max-target-sentence',
                        type=int,
                        default=50,
                        help='maximum length of target sentence')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='directory to output the result')
    args = parser.parse_args()

    source_ids = load_vocabulary(args.SOURCE_VOCAB)
    target_ids = load_vocabulary(args.TARGET_VOCAB)
    train_source = load_data(source_ids, args.SOURCE)
    train_target = load_data(target_ids, args.TARGET)
    assert len(train_source) == len(train_target)
    train_data = [
        (s, t) for s, t in six.moves.zip(train_source, train_target)
        if args.min_source_sentence <= len(s) <= args.max_source_sentence
        and args.min_source_sentence <= len(t) <= args.max_source_sentence
    ]
    train_source_unknown = calculate_unknown_ratio([s for s, _ in train_data])
    train_target_unknown = calculate_unknown_ratio([t for _, t in train_data])

    print('Source vocabulary size: %d' % len(source_ids))
    print('Target vocabulary size: %d' % len(target_ids))
    print('Train data size: %d' % len(train_data))
    print('Train source unknown ratio: %.2f%%' % (train_source_unknown * 100))
    print('Train target unknown ratio: %.2f%%' % (train_target_unknown * 100))

    target_words = {i: w for w, i in target_ids.items()}
    source_words = {i: w for w, i in source_ids.items()}

    model = Seq2seq(args.layer, len(source_ids), len(target_ids), args.unit)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu(args.gpu)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize)
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=convert,
                                       device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=(200, 'iteration')))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/perp',
        'validation/main/perp', 'validation/main/bleu', 'elapsed_time'
    ]),
                   trigger=(200, 'iteration'))

    if args.validation_source and args.validation_target:
        test_source = load_data(source_ids, args.validation_source)
        test_target = load_data(target_ids, args.validation_target)
        assert len(test_source) == len(test_target)
        test_data = list(six.moves.zip(test_source, test_target))
        test_data = [(s, t) for s, t in test_data if 0 < len(s) and 0 < len(t)]
        test_source_unknown = calculate_unknown_ratio(
            [s for s, _ in test_data])
        test_target_unknown = calculate_unknown_ratio(
            [t for _, t in test_data])

        print('Validation data: %d' % len(test_data))
        print('Validation source unknown ratio: %.2f%%' %
              (test_source_unknown * 100))
        print('Validation target unknown ratio: %.2f%%' %
              (test_target_unknown * 100))

        @chainer.training.make_extension(trigger=(200, 'iteration'))
        def translate(trainer):
            source, target = test_data[numpy.random.choice(len(test_data))]
            result = model.translate([model.xp.array(source)])[0]

            source_sentence = ' '.join([source_words[x] for x in source])
            target_sentence = ' '.join([target_words[y] for y in target])
            result_sentence = ' '.join([target_words[y] for y in result])
            print('# source : ' + source_sentence)
            print('#  result : ' + result_sentence)
            print('#  expect : ' + target_sentence)

        trainer.extend(translate, trigger=(4000, 'iteration'))
        trainer.extend(CalculateBleu(model,
                                     test_data,
                                     'validation/main/bleu',
                                     device=args.gpu),
                       trigger=(4000, 'iteration'))

    print('start training')
    trainer.run()