Example #1
0
    def test_savefun_and_writer_exclusive(self):
        # savefun and writer arguments cannot be specified together.
        def savefun(*args, **kwargs):
            assert False
        writer = extensions.snapshot_writers.SimpleWriter()
        with pytest.raises(TypeError):
            extensions.snapshot(savefun=savefun, writer=writer)

        trainer = mock.MagicMock()
        with pytest.raises(TypeError):
            extensions.snapshot_object(trainer, savefun=savefun, writer=writer)
Example #2
0
    def test_save_file(self):
        w = extensions.snapshot_writers.SimpleWriter()
        snapshot = extensions.snapshot_object(self.trainer, 'myfile.dat',
                                              writer=w)
        snapshot(self.trainer)

        self.assertTrue(os.path.exists('myfile.dat'))
Example #3
0
    def test_clean_up_tempdir(self):
        snapshot = extensions.snapshot_object(self.trainer, 'myfile.dat')
        snapshot(self.trainer)

        left_tmps = [fn for fn in os.listdir('.')
                     if fn.startswith('tmpmyfile.dat')]
        self.assertEqual(len(left_tmps), 0)
Example #4
0
def train():
    model = SuperResolution()
    if DEVICE >= 0:
        chainer.cuda.get_device_from_id(DEVICE).use()
        chainer.cuda.check_cuda_available()
        print("USEDEVICE", DEVICE)
        model.to_gpu()

    images = collect_train_patch('train')

    train_iter = iterators.SerialIterator(images,  BATCH_SIZE, shuffle=True)
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    updater = SRUpdater(train_iter, optimizer, device=DEVICE)
    snapshot_interval=(500, 'epoch')
    trainer = training.Trainer(updater, (10000, 'epoch'), out='result')
    trainer.extend(extensions.snapshot(
        filename='snapshot_epoch_{.updater.epoch}.npz'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.ProgressBar())
    trainer.extend(extensions.snapshot_object(model,'model_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval)
    if RESUME:
        # Resume from a snapshot
        chainer.serializers.load_npz('result/snapshot_epoch_25.npz', trainer)
    trainer.run()

    chainer.serializers.save_hdf5('model.hdf5', model)
Example #5
0
def train():
    model_gen = Generator()
    model_dis = Discriminator()

    if DEVICE >= 0:
        chainer.cuda.get_device_from_id(DEVICE).use()
        chainer.cuda.check_cuda_available()
        model_gen.to_gpu(DEVICE)
        model_dis.to_gpu(DEVICE)

    images = []

    fs = os.listdir('train')
    for f in fs:
        img = Image.open(
            'train/'+f).convert('RGB').resize((IMAGE_SIZE, IMAGE_SIZE))
        hpix = np.array(img, dtype=np.float32)/255.0
        hpix = hpix.transpose(2, 0, 1)
        images.append(hpix)

    train_iter = iterators.SerialIterator(images, BATCH_SIZE, shuffle=True)

    optimizer_gen = optimizers.Adam(alpha=0.0002, beta1=0.5)
    optimizer_gen.setup(model_gen)
    optimizers_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
    optimizers_dis.setup(model_dis)

    updater = Updater(
        train_iter, {'opt_gen': optimizer_gen, 'opt_dis': optimizers_dis}, device=DEVICE)

    trainer = training.Trainer(updater, (100000, 'epoch'), out='result')
    trainer.extend(extensions.ProgressBar())

    snapshot_interval = (5000, 'epoch')
    trainer.extend(extensions.snapshot(
        filename='snapshot_epoch_{.updater.epoch}.npz'),
        trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        model_gen, 'model_gen_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        model_dis, 'model_dis_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval)
    if RESUME:
        chainer.serializers.load_npz('result/snapshot_epoch_26797.npz',trainer)
    trainer.run()
Example #6
0
def get_trainer(optimizer, iter_train, iter_valid, iter_valid_raw,
                class_names, args):
    model = optimizer.target

    updater = chainer.training.StandardUpdater(
        iter_train, optimizer, device=args.gpu)

    trainer = chainer.training.Trainer(
        updater, (args.max_iteration, 'iteration'), out=args.out)

    trainer.extend(fcn.extensions.ParamsReport(args.__dict__))

    trainer.extend(extensions.ProgressBar(update_interval=5))

    trainer.extend(extensions.LogReport(
        trigger=(args.interval_print, 'iteration')))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'elapsed_time',
         'main/loss', 'validation/main/miou']))

    def pred_func(x):
        model(x)
        return model.score

    trainer.extend(
        fcn.extensions.SemanticSegmentationVisReport(
            pred_func, iter_valid_raw,
            transform=fcn.datasets.transform_lsvrc2012_vgg16,
            class_names=class_names, device=args.gpu, shape=(4, 2)),
        trigger=(args.interval_eval, 'iteration'))

    trainer.extend(
        chainercv.extensions.SemanticSegmentationEvaluator(
            iter_valid, model, label_names=class_names),
        trigger=(args.interval_eval, 'iteration'))

    trainer.extend(extensions.snapshot_object(
        target=model, filename='model_best.npz'),
        trigger=chainer.training.triggers.MaxValueTrigger(
            key='validation/main/miou',
            trigger=(args.interval_eval, 'iteration')))

    assert extensions.PlotReport.available()
    trainer.extend(extensions.PlotReport(
        y_keys=['main/loss'], x_key='iteration',
        file_name='loss.png', trigger=(args.interval_print, 'iteration')))
    trainer.extend(extensions.PlotReport(
        y_keys=['validation/main/miou'], x_key='iteration',
        file_name='miou.png', trigger=(args.interval_print, 'iteration')))

    return trainer
Example #7
0
File: train.py Project: Pinafore/qb
def main(model):
    train = read_data(BUZZER_TRAIN_FOLD)
    valid = read_data(BUZZER_DEV_FOLD)
    print('# train data: {}'.format(len(train)))
    print('# valid data: {}'.format(len(valid)))

    train_iter = chainer.iterators.SerialIterator(train, 64)
    valid_iter = chainer.iterators.SerialIterator(
            valid, 64, repeat=False, shuffle=False)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(1e-4))

    updater = training.updaters.StandardUpdater(
        train_iter, optimizer,
        converter=convert_seq, device=0)
    trainer = training.Trainer(updater, (20, 'epoch'),
                               out=model.model_dir)

    trainer.extend(extensions.Evaluator(
        valid_iter, model,
        converter=convert_seq, device=0))

    record_trigger = training.triggers.MaxValueTrigger(
        'validation/main/accuracy', (1, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model, 'buzzer.npz'),
        trigger=record_trigger)

    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))

    trainer.extend(extensions.ProgressBar())

    if not os.path.isdir(model.model_dir):
        os.mkdir(model.model_dir)

    # Run the training
    trainer.run()
Example #8
0
    def test_on_error(self):

        class TheOnlyError(Exception):
            pass

        @training.make_extension(trigger=(1, 'iteration'), priority=100)
        def exception_raiser(trainer):
            raise TheOnlyError()
        self.trainer.extend(exception_raiser)

        snapshot = extensions.snapshot_object(self.trainer, self.filename,
                                              snapshot_on_error=True)
        self.trainer.extend(snapshot)

        self.assertFalse(os.path.exists(self.filename))

        with self.assertRaises(TheOnlyError):
            self.trainer.run()

        self.assertTrue(os.path.exists(self.filename))
Example #9
0
def train(num_loop):
    chainer.cuda.get_device_from_id(0).use()
    model=CnnModel()
    model.to_gpu()

    optimizer=optimizers.Adam()
    optimizer.setup(model)
    minibatch_size=1000
    train, test = datasets.get_mnist(ndim=3)
    iterator = iterators.SerialIterator(train, minibatch_size)
    updater=training.StandardUpdater(iterator,optimizer,device=0)
    loops=(num_loop,'epoch')
    if not os.path.exists('result'):
        os.mkdir('result')
    trainer = training.Trainer(updater,loops,out='result')
    trainer.extend(extensions.ProgressBar())
    trainer.extend(extensions.snapshot_object(
        model, 'cnn_{.updater.epoch}.npz'), trigger=(1,'epoch'))
    print('start to train')
    trainer.run()
    print('finish to train')
Example #10
0
def run(datasetPath, resultPath, modelPath="", resumePath=""):
    # set dataset
    if isinstance(datasetPath, str):
        ds = datasetVOC(datasetPath, 32)
    elif isinstance(datasetPath, list):
        ds = datasetVOCs(datasetPath, 32)
    else:
        raise Exception("データセットパスの型が不正です。")
    train, test = ds.getDataset()

    # set model
    model = chainer.links.Classifier(Alex())
    if os.path.isfile(modelPath):
        chainer.serializers.load_npz(modelPath, model)
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # set evaluation model
    eval_model = model.copy()
    eval_model.train = False

    # train and test
    train_iter = chainer.iterators.SerialIterator(train, BATCH_SIZE)
    test_iter = chainer.iterators.SerialIterator(test, BATCH_SIZE, repeat=False, shuffle=False)
    updater = chainer.training.StandardUpdater(train_iter, optimizer, device=-1)
    trainer = chainer.training.Trainer(updater, (EPOCH, "epoch"), out=resultPath)
    trainer.extend(extensions.Evaluator(test_iter, eval_model, device=-1))
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport(
            ["epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy"]
        )
    )
    trainer.extend(extensions.ProgressBar(update_interval=5))
    trainer.extend(extensions.snapshot(filename="snapshot_epoch_{.updater.epoch}"))
    trainer.extend(extensions.snapshot_object(model, filename="model_epoch_{.updater.epoch}"))
    trainer.extend(extensions.dump_graph("main/loss"))
    if os.path.isfile(resumePath):
        chainer.serializers.load_npz(resumePath, trainer)
    trainer.run()
Example #11
0
    def test_save_file(self):
        snapshot = extensions.snapshot_object(self.trainer, 'myfile.dat')
        snapshot(self.trainer)

        self.assertTrue(os.path.exists('myfile.dat'))
    model.rcnn_train = True

    # optimizer = optimizers.Adam()
    # optimizer.setup(model)
    optimizer = optimizers.MomentumSGD(lr=0.001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=0)
    trainer = training.Trainer(updater, (100, 'epoch'), out='train_rcnn')
    trainer.extend(extensions.LogReport(trigger=(100, 'iteration')))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration',
        'main/loss_cls',
        'main/cls_accuracy',
        'main/loss_bbox',
        'main/loss_rcnn',
        'elapsed_time',
    ]), trigger=(100, 'iteration'))
    trainer.extend(
        extensions.snapshot_object(model, 'snapshot_{.updater.iteration}'),
        trigger=(1000, 'iteration'))
    trainer.extend(extensions.PlotReport(['main/loss_rcnn'],
                                         trigger=(100, 'iteration')))
    trainer.extend(extensions.PlotReport(['main/cls_accuracy'],
                                         trigger=(100, 'iteration')))
    trainer.extend(
        extensions.dump_graph('main/loss_rcnn', out_name='loss_rcnn.dot'))

    trainer.run()
Example #13
0
def main():
    parser = argparse.ArgumentParser(description='ChainerMN example: DCGAN')
    parser.add_argument('--batchsize', '-b', type=int, default=50,
                        help='Number of images in each mini-batch')
    parser.add_argument('--communicator', type=str,
                        default='hierarchical', help='Type of communicator')
    parser.add_argument('--epoch', '-e', type=int, default=1000,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', action='store_true',
                        help='Use GPU')
    parser.add_argument('--dataset', '-i', default='',
                        help='Directory of image files.  Default is cifar-10.')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--gen_model', '-r', default='',
                        help='Use pre-trained generator for training')
    parser.add_argument('--dis_model', '-d', default='',
                        help='Use pre-trained discriminator for training')
    parser.add_argument('--n_hidden', '-n', type=int, default=100,
                        help='Number of hidden units (z)')
    parser.add_argument('--seed', type=int, default=0,
                        help='Random seed of z at visualization stage')
    parser.add_argument('--snapshot_interval', type=int, default=1000,
                        help='Interval of snapshot')
    parser.add_argument('--display_interval', type=int, default=100,
                        help='Interval of displaying log to console')
    args = parser.parse_args()

    # Prepare ChainerMN communicator.

    if args.gpu:
        if args.communicator == 'naive':
            print("Error: 'naive' communicator does not support GPU.\n")
            exit(-1)
        comm = chainermn.create_communicator(args.communicator)
        device = comm.intra_rank
    else:
        if args.communicator != 'naive':
            print('Warning: using naive communicator '
                  'because only naive supports CPU-only execution')
        comm = chainermn.create_communicator('naive')
        device = -1

    if comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(comm.size))
        if args.gpu:
            print('Using GPUs')
        print('Using {} communicator'.format(args.communicator))
        print('Num hidden unit: {}'.format(args.n_hidden))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

    # Set up a neural network to train
    gen = Generator(n_hidden=args.n_hidden)
    dis = Discriminator()

    if device >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(device).use()
        gen.to_gpu()  # Copy the model to the GPU
        dis.to_gpu()

    # Setup an optimizer
    def make_optimizer(model, comm, alpha=0.0002, beta1=0.5):
        # Create a multi node optimizer from a standard Chainer optimizer.
        optimizer = chainermn.create_multi_node_optimizer(
            chainer.optimizers.Adam(alpha=alpha, beta1=beta1), comm)
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001), 'hook_dec')
        return optimizer

    opt_gen = make_optimizer(gen, comm)
    opt_dis = make_optimizer(dis, comm)

    # Split and distribute the dataset. Only worker 0 loads the whole dataset.
    # Datasets of worker 0 are evenly split and distributed to all workers.
    if comm.rank == 0:
        if args.dataset == '':
            # Load the CIFAR10 dataset if args.dataset is not specified
            train, _ = chainer.datasets.get_cifar10(withlabel=False,
                                                    scale=255.)
        else:
            all_files = os.listdir(args.dataset)
            image_files = [f for f in all_files if ('png' in f or 'jpg' in f)]
            print('{} contains {} image files'
                  .format(args.dataset, len(image_files)))
            train = chainer.datasets\
                .ImageDataset(paths=image_files, root=args.dataset)
    else:
        train = None

    train = chainermn.scatter_dataset(train, comm)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)

    # Set up a trainer
    updater = DCGANUpdater(
        models=(gen, dis),
        iterator=train_iter,
        optimizer={
            'gen': opt_gen, 'dis': opt_dis},
        device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Some display and output extensions are necessary only for one worker.
    # (Otherwise, there would just be repeated outputs.)
    if comm.rank == 0:
        snapshot_interval = (args.snapshot_interval, 'iteration')
        display_interval = (args.display_interval, 'iteration')
        # Save only model parameters.
        # `snapshot` extension will save all the trainer module's attribute,
        # including `train_iter`.
        # However, `train_iter` depends on scattered dataset, which means that
        # `train_iter` may be different in each process.
        # Here, instead of saving whole trainer module, only the network models
        # are saved.
        trainer.extend(extensions.snapshot_object(
            gen, 'gen_iter_{.updater.iteration}.npz'),
            trigger=snapshot_interval)
        trainer.extend(extensions.snapshot_object(
            dis, 'dis_iter_{.updater.iteration}.npz'),
            trigger=snapshot_interval)
        trainer.extend(extensions.LogReport(trigger=display_interval))
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'gen/loss', 'dis/loss', 'elapsed_time',
        ]), trigger=display_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))
        trainer.extend(
            out_generated_image(
                gen, dis,
                10, 10, args.seed, args.out),
            trigger=snapshot_interval)

    # Start the training using pre-trained model, saved by snapshot_object
    if args.gen_model:
        chainer.serializers.load_npz(args.gen_model, gen)
    if args.dis_model:
        chainer.serializers.load_npz(args.dis_model, dis)

    # Run the training
    trainer.run()
Example #14
0
def train(args):
    '''Run training'''
    # seed setting
    torch.manual_seed(args.seed)

    # debug mode setting
    # 0 would be fastest, but 1 seems to be reasonable
    # by considering reproducability
    # revmoe type check
    if args.debugmode < 2:
        chainer.config.type_check = False
        logging.info('torch type check is disabled')
    # use determinisitic computation or not
    if args.debugmode < 1:
        torch.backends.cudnn.deterministic = False
        logging.info('torch cudnn deterministic is disabled')
    else:
        torch.backends.cudnn.deterministic = True

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get input and output dimension info
    with open(args.valid_label, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]['idim'])
    odim = int(valid_json[utts[0]]['odim'])
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # specify model architecture
    e2e = E2E(idim, odim, args)
    model = Loss(e2e, args.mtlalpha)

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.conf'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to' + model_conf)
        # TODO(watanabe) use others than pickle, possibly json, and save as a text
        pickle.dump((idim, odim, args), f)
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    # Set gpu
    if args.ngpu > 1:
        logging.warn(
            "currently, pytorch does not support multi-gpu. use single gpu.")
    if args.ngpu > 0:
        gpu_id = 0
        # Make a specified GPU current
        model.cuda(gpu_id)  # Copy the model to the GPU
    else:
        gpu_id = -1

    # Setup an optimizer
    if args.opt == 'adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(),
                                         rho=0.95,
                                         eps=args.eps)
    elif args.opt == 'adam':
        optimizer = torch.optim.Adam(model.parameters())

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", model.reporter)
    setattr(optimizer, "serialize", lambda s: model.reporter.serialize(s))

    # read json data
    with open(args.train_label, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_label, 'rb') as f:
        valid_json = json.load(f)['utts']

    # make minibatch list (variable length)
    train = make_batchset(train_json, args.batch_size, args.maxlen_in,
                          args.maxlen_out, args.minibatches)
    valid = make_batchset(valid_json, args.batch_size, args.maxlen_in,
                          args.maxlen_out, args.minibatches)
    # hack to make batchsze argument as 1
    # actual bathsize is included in a list
    train_iter = chainer.iterators.SerialIterator(train, 1)
    valid_iter = chainer.iterators.SerialIterator(valid,
                                                  1,
                                                  repeat=False,
                                                  shuffle=False)

    # prepare Kaldi reader
    train_reader = lazy_io.read_dict_scp(args.train_feat)
    valid_reader = lazy_io.read_dict_scp(args.valid_feat)

    # Set up a trainer
    updater = PytorchSeqUpdaterKaldi(model, args.grad_clip, train_iter,
                                     optimizer, train_reader, gpu_id)
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)
        model = trainer.updater.model

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        PytorchSeqEvaluaterKaldi(model,
                                 valid_iter,
                                 model.reporter,
                                 valid_reader,
                                 device=gpu_id))

    # Take a snapshot for each specified epoch
    trainer.extend(extensions.snapshot(), trigger=(1, 'epoch'))

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport([
            'main/loss', 'validation/main/loss', 'main/loss_ctc',
            'validation/main/loss_ctc', 'main/loss_att',
            'validation/main/loss_att'
        ],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/acc', 'validation/main/acc'],
                              'epoch',
                              file_name='acc.png'))

    # Save best models
    def torch_save(path, _):
        torch.save(model.state_dict(), path)
        torch.save(model, path + ".pkl")

    trainer.extend(
        extensions.snapshot_object(model,
                                   'model.loss.best',
                                   savefun=torch_save),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))
    trainer.extend(
        extensions.snapshot_object(model, 'model.acc.best',
                                   savefun=torch_save),
        trigger=training.triggers.MaxValueTrigger('validation/main/acc'))

    # epsilon decay in the optimizer
    def torch_load(path, obj):
        model.load_state_dict(torch.load(path))
        return obj

    if args.opt == 'adadelta':
        if args.criterion == 'acc':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.acc.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
        elif args.criterion == 'loss':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.loss.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(100, 'iteration')))
    report_keys = [
        'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att',
        'validation/main/loss', 'validation/main/loss_ctc',
        'validation/main/loss_att', 'main/acc', 'validation/main/acc',
        'elapsed_time'
    ]
    if args.opt == 'adadelta':
        trainer.extend(extensions.observe_value(
            'eps', lambda trainer: trainer.updater.get_optimizer('main').
            param_groups[0]["eps"]),
                       trigger=(100, 'iteration'))
        report_keys.append('eps')
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(100, 'iteration'))

    trainer.extend(extensions.ProgressBar())

    # Run the training
    trainer.run()
Example #15
0
def main():
    parser = argparse.ArgumentParser(description='Embedding Compressor',
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('--batchsize', '-b', type=int, default=128,
                        help='Number of sentences in each mini-batch')
    parser.add_argument('--iter', '-i', dest='iteration', type=int, default=200000,
                        help='Number of iterations')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--optimizer', '-O', dest='optimizer', type=str, default='Adam',
                        choices=['Adam', 'SGD'], help='Type of optimizer')
    parser.add_argument('--learning-rate', '--lr', dest='learning_rate', type=float, default=0.0001,
                        help='learning rate')
    parser.add_argument('--M' '-M', dest='n_codebooks', type=int, default=32,
                        help='Number of Codebooks')
    parser.add_argument('--K' '-K', dest='n_centroids', type=int, default=16,
                        help='Number of Centroids (Number of vectors in each codebook)')
    parser.add_argument('--tau', dest='tau', type=float, default=1.0,
                        help='Tau value in Gumbel-softmax')

    # Arguments for the dataset / vocabulary path
    parser.add_argument('--input-matrix', dest='input_matrix', required=True,
                        help='path to the matrix (npy)')

    # Random Seed
    parser.add_argument('--seed', default=0, type=int, help='Seed for Random Module')

    # Arguments for directory
    parser.add_argument('--out', '-o', default='./result', type=os.path.abspath,
                        help='Directory to output the result')
    parser.add_argument('--dir-prefix', dest='dir_prefix', default='model', type=str, help='Prefix of the output dir')
    args = parser.parse_args()
    set_random_seed(args.seed, args.gpu)

    resource = Resource(args, train=True)
    resource.dump_git_info()
    resource.dump_command_info()
    resource.dump_python_info()
    resource.dump_chainer_info()
    resource.save_config_file()

    logger = resource.logger

    dataset = DataProcessor(resource.log_name)
    dataset.load_embed_matrix(args.input_matrix)
    train_data = dataset.load_data('train')
    valid_data = dataset.load_data('dev')
    model = EmbeddingCompressor(
        n_vocab=dataset.embed_matrix.shape[0],
        embed_dim=dataset.embed_matrix.shape[1],
        n_codebooks=args.n_codebooks,
        n_centroids=args.n_centroids,
        tau=args.tau,
        embed_mat=dataset.embed_matrix
    )

    if args.optimizer == 'Adam':
        optimizer = chainer.optimizers.Adam(alpha=args.learning_rate)
    else:
        optimizer = chainer.optimizers.SGD(lr=args.learning_rate)
    optimizer.setup(model)
    logger.info('Optimizer is set to [{}]'.format(args.optimizer))
    model.embed_mat.disable_update()  # call this after optimizer.setup()
    logger.info('Updating Embedding Layer is Disabled')

    # Send model to GPU (according to the arguments)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu(args.gpu)

    train_iter = SerialIterator(dataset=train_data, batch_size=args.batchsize, shuffle=True)

    updater = training.updater.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=resource.output_dir)

    short_term = (1000, 'iteration')

    dev_iter = SerialIterator(valid_data, args.batchsize, repeat=False)
    trainer.extend(
        extensions.Evaluator(dev_iter, model, device=args.gpu), trigger=short_term)
    trainer.extend(extensions.ProgressBar(update_interval=1))
    trainer.extend(extensions.LogReport(trigger=short_term, log_name='chainer_report_iteration.log'),
                   trigger=short_term, name='iteration')
    trainer.extend(extensions.LogReport(trigger=short_term, log_name='chainer_report_epoch.log'), trigger=short_term,
                   name='epoch')
    trainer.extend(extensions.snapshot_object(model, 'iter_{.updater.iteration}.npz', savefun=save_non_embed_npz),
                   trigger=MinValueTrigger('validation/main/loss', short_term))

    entries = ['epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/maxp', 'validation/main/maxp']
    trainer.extend(extensions.PrintReport(entries=entries, log_report='iteration'), trigger=short_term)
    trainer.extend(extensions.PrintReport(entries=entries, log_report='epoch'), trigger=short_term)

    logger.info('Start training...')
    trainer.run()
    logger.info('Training complete!!')
    resource.dump_duration()
Example #16
0
def main(fi):
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize', '-b', type=int, default=20,
                        help='Number of examples in each mini batch')
    parser.add_argument('--epoch', '-e', type=int, default=39,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--gradclip', '-c', type=float, default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.add_argument('--w2v', default='../twitter_model.bin',
                        help='')
    parser.set_defaults(test=False)
    parser.add_argument('--unit', '-u', type=int, default=300,
                        help='Number of LSTM units in each layer')
    args = parser.parse_args()

    with open('data/vocab_dict.txt', "r") as f_dict:
        vocab = set(unicode(l.split('\t')[0]) for l in f_dict)
        vocab_dict = {w: i for i, w in enumerate(vocab)}
        # vocab_dict['<EOS>'] = len(vocab_dict)
        # vocab_dict['<BOS>'] = len(vocab_dict)
    train = get_dataset('data/twitter.train.sort', vocab_dict)
    val = get_dataset('data/twitter.dev', vocab_dict)
    test = get_dataset('data/twitter.test', vocab_dict)

    n_vocab = len(vocab_dict) + 1
    print('#vocab =', n_vocab)

    w2v_model = word2vec.Word2Vec.load(args.w2v)

    train_iter = MyIterator(train, args.batchsize)
    val_iter = MyIterator(val, 1, repeat=False)
    test_iter = MyIterator(test, 1, repeat=False)

    blstm = BLSTM(n_vocab, args.unit, 2)
    model = L.Classifier(blstm)
    for key, index in vocab_dict.iteritems():
        key = unicode(key)
        if key in w2v_model:
            blstm.embed.W.data[index] = w2v_model[key]

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))

    updater = MyUpdater(train_iter, optimizer, args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    eval_model = model.copy()
    eval_model.train = False
    trainer.extend(MyEvaluator(
        val_iter, eval_model, device=args.gpu,
        eval_hook=lambda _: eval_model.predictor.reset_state()), priority=100)

    trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), priority=90)
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'main/accuracy', 'main/validation/accuracy']
    ), trigger=(1, 'epoch'), priority=80)
    trainer.extend(extensions.ProgressBar(
        update_interval=1 if args.test else 10), priority=0)
    trainer.extend(extensions.snapshot())
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter' + '_{.updater.epoch}'))
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()

    print('test')
    eval_model.predictor.reset_state()
    evaluator = MyEvaluator(test_iter, eval_model, device=args.gpu)
    result = evaluator()
    print('test accuracy:', result['main/validation/accuracy'])
    def __init__(self):
        super(MnistModel,self).__init__(
                l1 = L.Linear(784,100),
                l2 = L.Linear(100,100),
                l3 = L.Linear(100,10))

    def __call__(self,x):    
         h = F.relu(self.l1(x))
         h = F.relu(self.l2(h))
         return self.l3(h)
def save_pkl(filename,obj):
    with open(filename,'wb') as f:
        pickle.dump(obj,f)

model = L.Classifier(MnistModel())
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)

train, test = chainer.datasets.get_mnist()
train_iter = chainer.iterators.SerialIterator(train, 100)
test_iter = chainer.iterators.SerialIterator(test, 100,repeat=False, shuffle=False)

updater = training.StandardUpdater(train_iter, optimizer, device=-1)
trainer = training.Trainer(updater, (100, 'epoch'), out="result")
trainer.extend(extensions.Evaluator(test_iter, model, device=-1))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy']))
trainer.extend(extensions.ProgressBar())
trainer.extend(extensions.snapshot_object(model,savefun=save_pkl,filename='model.pkl',trigger=MinValueTrigger(key='validation/main/loss')))

trainer.run()
Example #18
0
                                                      device=args.gpu,
                                                      converter=convert_seq)
    trigger = chainer.training.triggers.MaxValueTrigger(
        key='validation/main/accuracy', trigger=(1, 'epoch'))

    trainer.extend(evaluator, trigger=(1, 'epoch'))
    trainer.extend(extensions.LogReport(
        log_name='log/domain-{0}_case-{1}.log'.format(domain, case)),
                   trigger=(1, 'epoch'))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss',
            'validation/main/accuracy', 'elapsed_time'
        ]))
    trainer.extend(extensions.snapshot_object(
        model,
        savefun=serializers.save_npz,
        filename='domain-{0}_case-{1}_epoch-{{.updater.epoch}}.npz'.format(
            domain, case)),
                   trigger=trigger)
    trainer.extend(
        extensions.PlotReport(
            ['main/accuracy', 'validation/main/accuracy'],
            file_name='accuracy_domain-{0}_case-{1}.png'.format(domain, case),
            x_key='epoch'))
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              file_name='loss_domain-{0}_case-{1}.png'.format(
                                  domain, case),
                              x_key='epoch'))
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.run()
Example #19
0
def train_model():
    parser = argparse.ArgumentParser()

    parser.add_argument('dataset', help='Path to directory containing train.txt, val.txt, and mean.npy')
    parser.add_argument('images',  help='Root directory of input images')
    parser.add_argument('labels',  help='Root directory of label images')
    
    parser.add_argument('--batchsize', '-b', type=int, default=16,
                        help='Number of images in each mini-batch')
    parser.add_argument('--test-batchsize', '-B', type=int, default=4,
                        help='Number of images in each test mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=50,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--frequency', '-f', type=int, default=1,
                        help='Frequency of taking a snapshot')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='logs',
                        help='Directory to output the result under "models" directory')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--noplot', dest='plot', action='store_false',
                        help='Disable PlotReport extension')

    parser.add_argument('--tcrop', type=int, default=400,
                        help='Crop size for train-set images')
    parser.add_argument('--vcrop', type=int, default=480,
                        help='Crop size for validation-set images')

    args = parser.parse_args()

    assert (args.tcrop % 16 == 0) and (args.vcrop % 16 == 0), "tcrop and vcrop must be divisible by 16."

    if args.gpu < 0:
        from tboard_logger_cpu import TensorboardLogger
    else:
        from tboard_logger import TensorboardLogger

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# Crop-size: {}'.format(args.tcrop))
    print('# epoch: {}'.format(args.epoch))
    print('')
    
    this_dir = os.path.dirname(os.path.abspath(__file__))
    models_dir = os.path.normpath(os.path.join(this_dir, "../../models"))
    log_dir = os.path.join(models_dir, args.out)
    writer = SummaryWriter(log_dir=log_dir)
    
    # Set up a neural network to train
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    model = UNet()
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    
    # Load mean image
    mean = np.load(os.path.join(args.dataset, "mean.npy"))
    
    # Load the MNIST dataset
    train = LabeledImageDataset(os.path.join(args.dataset, "train.txt"), args.images, args.labels, 
                                mean=mean, crop_size=args.tcrop, test=False, distort=False)
    
    test = LabeledImageDataset (os.path.join(args.dataset, "val.txt"), args.images, args.labels, 
                                mean=mean, crop_size=args.vcrop, test=True, distort=False)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(
        train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=log_dir)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # Dump a computational graph from 'loss' variable at the first iteration
    # The "main" refers to the target link of the "main" optimizer.
    trainer.extend(extensions.dump_graph('main/loss'))

    # Take a snapshot for each specified epoch
    frequency = args.epoch if args.frequency == -1 else max(1, args.frequency)
    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))
    
    # Save trained model for each specific epoch
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=(frequency, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Save two plot images to the result dir
    if args.plot and extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())
    
    # Write training log to TensorBoard log file
    trainer.extend(TensorboardLogger(writer,
        ['main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy']))
    
    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Example #20
0
def main():
    parser = argparse.ArgumentParser(
        description='Faster R-CNN Chainer version')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--step_size', '-ss', type=int, default=50000)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    args = parser.parse_args()

    np.random.seed(args.seed)

    train_data = VOCDataset(split='trainval', year='2007')
    # test_data = VOCDataset(split='test', year='2007',
    #                        use_difficult=True, return_difficult=True)

    faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names),
                                  pretrained_model='imagenet')
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    train_data = TransformDataset(train_data, Transform(faster_rcnn))

    train_iter = chainer.iterators.MultiprocessIterator(train_data,
                                                        batch_size=1,
                                                        n_processes=None,
                                                        shared_mem=100000000)
    # test_iter = chainer.iterators.SerialIterator(
    #         test_data, batch_size=1, repeat=False, shuffle=False)
    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       device=args.gpu)

    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               out=args.out)

    trainer.extend(extensions.snapshot_object(model.faster_rcnn,
                                              'snapshot_model.npz'),
                   trigger=(args.iteration, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.step_size, 'iteration'))

    log_interval = 20, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 5, 'iteration'

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/roi_loc_loss',
        'main/roi_cls_loss',
        'main/rpn_loc_loss',
        'main/rpn_cls_loss',
        'validation/main/map',
    ]),
                   trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(['main/loss'],
                                             file_name='loss.png',
                                             trigger=plot_interval),
                       trigger=plot_interval)

    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.run()
Example #21
0
def main():
    # コマンドライン引数の読み込み
    parser = argparse.ArgumentParser(description='Chainer MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=20, help='Batch size')
    parser.add_argument('--epoch'    , '-e', type=int, default=20, help='Epoch')
    parser.add_argument('--gpu'      , '-g', type=int, default=-1, help='GPU ID')
    parser.add_argument('--out'      , '-o', default='result', help='output directory')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # reading model
    model = L.Classifier(CNN(), lossfun=F.softmax_cross_entropy)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # adam optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # loading MNIST dataset
    train, test = chainer.datasets.get_mnist(ndim=3)

    # Iterator of dataset with Batchsize
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize) # for training
    test_iter  = chainer.iterators.SerialIterator(test,  args.batchsize, repeat=False, shuffle=False)

    # updater/trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # setup evaluator
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    # plotting mnist-cnn network
    trainer.extend(extensions.dump_graph('main/loss'))

    # Reporting
    # setup log
    trainer.extend(extensions.LogReport())

    # progress plot
    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(
            ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')
        )
        trainer.extend(extensions.PlotReport(
            ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')
        )

    # progress console
    trainer.extend(extensions.PrintReport(
            ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])
        )

    # Saving at updated test-accuracy
    trigger = triggers.MaxValueTrigger('validation/main/accuracy', trigger=(1, 'epoch'))
    trainer.extend(extensions.snapshot_object(model, filename='mnist-cnn-best'), trigger=trigger)

    # progress bar
    trainer.extend(extensions.ProgressBar())

    # Training
    trainer.run()

    # Saving model final
    serializers.save_npz('mnist-cnn.npz', model)
Example #22
0
def create_trainer(
    config: TrainConfig,
    project_path: str,
    updater,
    model: typing.Dict,
    eval_func,
    iterator_test,
    iterator_train_eval,
    loss_names,
    converter=chainer.dataset.convert.concat_examples,
    log_name='log.txt',
):
    trainer = chainer.training.Trainer(updater, out=project_path)

    log_trigger = (config.log_iteration, 'iteration')
    save_trigger = (config.save_iteration, 'iteration')

    eval_test_name = 'eval/test'
    eval_train_name = 'eval/train'

    snapshot = extensions.snapshot_object(model['encoder'],
                                          'encoder{.updater.iteration}.model')
    trainer.extend(snapshot, trigger=save_trigger)
    snapshot = extensions.snapshot_object(
        model['generator'], 'generator{.updater.iteration}.model')
    trainer.extend(snapshot, trigger=save_trigger)
    snapshot = extensions.snapshot_object(
        model['mismatch_discriminator'],
        'mismatch_discriminator{.updater.iteration}.model')
    trainer.extend(snapshot, trigger=save_trigger)

    trainer.extend(
        utility.chainer.dump_graph([
            'encoder/' + loss_names[0],
            'generator/' + loss_names[0],
            'mismatch_discriminator/' + loss_names[0],
        ],
                                   out_name='main.dot'))

    def _make_evaluator(iterator):
        return utility.chainer.NoVariableEvaluator(
            iterator,
            target=model,
            converter=converter,
            eval_func=eval_func,
            device=config.gpu,
        )

    trainer.extend(_make_evaluator(iterator_test),
                   name=eval_test_name,
                   trigger=log_trigger)
    trainer.extend(_make_evaluator(iterator_train_eval),
                   name=eval_train_name,
                   trigger=log_trigger)

    report_target = []
    for evaluator_name in ['', eval_test_name + '/', eval_train_name + '/']:
        for model_name in [s + '/' for s in model.keys()]:
            for loss_name in set(loss_names):
                report_target.append(evaluator_name + model_name + loss_name)

    trainer.extend(extensions.LogReport(trigger=log_trigger,
                                        log_name=log_name))
    trainer.extend(extensions.PrintReport(report_target))

    return trainer
Example #23
0
def train(args):
    '''Run training'''
    # seed setting
    torch.manual_seed(args.seed)

    # debug mode setting
    # 0 would be fastest, but 1 seems to be reasonable
    # by considering reproducability
    # revmoe type check
    if args.debugmode < 2:
        chainer.config.type_check = False
        logging.info('torch type check is disabled')
    # use determinisitic computation or not
    if args.debugmode < 1:
        torch.backends.cudnn.deterministic = False
        logging.info('torch cudnn deterministic is disabled')
    else:
        torch.backends.cudnn.deterministic = True

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get input and output dimension info
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]['input'][0]['shape'][1])
    odim = int(valid_json[utts[0]]['output'][0]['shape'][1])
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))
    odim_adv = None
    if args.adv:
        odim_adv = int(valid_json[utts[0]]['output'][1]['shape'][1])
        logging.info('#output dims adversarial: ' + str(odim_adv))

    # specify attention, CTC, hybrid mode
    if args.mtlalpha == 1.0:
        mtl_mode = 'ctc'
        logging.info('Pure CTC mode')
    elif args.mtlalpha == 0.0:
        mtl_mode = 'att'
        logging.info('Pure attention mode')
    else:
        mtl_mode = 'mtl'
        logging.info('Multitask learning mode')

    # specify model architecture
    e2e = E2E(idim, odim, args, odim_adv=odim_adv)
    model = Loss(e2e, args.mtlalpha)

    if args.rnnlm is not None:
        rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf)
        rnnlm = lm_pytorch.ClassifierWithState(
            lm_pytorch.RNNLM(len(args.char_list), rnnlm_args.layer,
                             rnnlm_args.unit))
        torch_load(args.rnnlm, rnnlm)
        e2e.rnnlm = rnnlm

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to ' + model_conf)
        f.write(
            json.dumps((idim, odim, odim_adv, vars(args)),
                       indent=4,
                       sort_keys=True).encode('utf_8'))
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    reporter = model.reporter

    # check the use of multi-gpu
    if args.ngpu > 1:
        model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu)))
        logging.info('batch size is automatically increased (%d -> %d)' %
                     (args.batch_size, args.batch_size * args.ngpu))
        args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    model = model.to(device)

    # Setup an optimizer
    # First distinguish between learning rates
    if args.ngpu > 1:
        param_grp = [{
            'params': model.module.predictor.enc.parameters(),
            'lr': args.asr_lr
        }, {
            'params': model.module.predictor.dec.parameters(),
            'lr': args.asr_lr
        }, {
            'params': model.module.predictor.adv.parameters(),
            'lr': args.adv_lr
        }]
    else:
        param_grp = [{
            'params': model.predictor.enc.parameters(),
            'lr': args.asr_lr
        }, {
            'params': model.predictor.dec.parameters(),
            'lr': args.asr_lr
        }, {
            'params': model.predictor.adv.parameters(),
            'lr': args.adv_lr
        }]
    if args.opt == 'adadelta':
        optimizer = torch.optim.Adadelta(param_grp, rho=0.95, eps=args.eps)
    elif args.opt == 'adam':
        optimizer = torch.optim.Adam(param_grp)

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    # Setup a converter
    converter = CustomConverter(e2e.subsample[0])

    # read json data
    with open(args.train_json, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']

    # make minibatch list (variable length)
    train = make_batchset(train_json,
                          args.batch_size,
                          args.maxlen_in,
                          args.maxlen_out,
                          args.minibatches,
                          min_batch_size=args.ngpu if args.ngpu > 1 else 1)
    valid = make_batchset(valid_json,
                          args.batch_size,
                          args.maxlen_in,
                          args.maxlen_out,
                          args.minibatches,
                          min_batch_size=args.ngpu if args.ngpu > 1 else 1)
    # hack to make batchsze argument as 1
    # actual bathsize is included in a list
    if args.n_iter_processes > 0:
        train_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(train, converter.transform),
            batch_size=1,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
        valid_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(valid, converter.transform),
            batch_size=1,
            repeat=False,
            shuffle=False,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
    else:
        train_iter = chainer.iterators.SerialIterator(TransformDataset(
            train, converter.transform),
                                                      batch_size=1)
        valid_iter = chainer.iterators.SerialIterator(TransformDataset(
            valid, converter.transform),
                                                      batch_size=1,
                                                      repeat=False,
                                                      shuffle=False)

    # Prepare adversarial training schedule dictionary
    adv_schedule = get_advsched(args.adv, args.epochs)

    # Set up a trainer
    updater = CustomUpdater(model,
                            args.grad_clip,
                            train_iter,
                            optimizer,
                            converter,
                            device,
                            args.ngpu,
                            adv_schedule=adv_schedule,
                            max_grlalpha=args.grlalpha)
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        logging.info('resumed from %s' % args.resume)
        #torch_resume(args.resume, trainer, weight_sharing=args.weight_sharing)
        torch_resume(args.resume,
                     trainer,
                     weight_sharing=args.weight_sharing,
                     reinit_adv=args.reinit_adv)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        CustomEvaluator(model, valid_iter, reporter, converter, device))

    # Save attention weight each epoch
    if args.num_save_attention > 0 and args.mtlalpha != 1.0:
        data = sorted(list(valid_json.items())[:args.num_save_attention],
                      key=lambda x: int(x[1]['input'][0]['shape'][1]),
                      reverse=True)
        if hasattr(model, "module"):
            att_vis_fn = model.module.predictor.calculate_all_attentions
        else:
            att_vis_fn = model.predictor.calculate_all_attentions
        trainer.extend(PlotAttentionReport(att_vis_fn,
                                           data,
                                           args.outdir + "/att_ws",
                                           converter=converter,
                                           device=device),
                       trigger=(1, 'epoch'))

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport([
            'main/loss', 'validation/main/loss', 'main/loss_ctc',
            'validation/main/loss_ctc', 'main/loss_att',
            'validation/main/loss_att', 'main/loss_adv',
            'validation/main/loss_adv'
        ],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport([
            'main/acc', 'validation/main/acc', 'main/acc_adv',
            'validation/main/acc_adv'
        ],
                              'epoch',
                              file_name='acc.png'))

    # Save best models
    trainer.extend(
        extensions.snapshot_object(model,
                                   'model.loss.best',
                                   savefun=torch_save),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))
    if mtl_mode is not 'ctc':
        trainer.extend(
            extensions.snapshot_object(model,
                                       'model.acc.best',
                                       savefun=torch_save),
            trigger=training.triggers.MaxValueTrigger('validation/main/acc'))

    # save snapshot which contains model and optimizer states
    trainer.extend(torch_snapshot(), trigger=(1, 'epoch'))

    # epsilon decay in the optimizer
    if args.opt == 'adadelta':
        if args.criterion == 'acc' and mtl_mode is not 'ctc':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.acc.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/acc', lambda best_value,
                               current_value: best_value > current_value))
        elif args.criterion == 'loss':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.loss.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/loss', lambda best_value,
                               current_value: best_value < current_value))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL,
                                                 'iteration')))
    report_keys = [
        'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att',
        'validation/main/loss', 'validation/main/loss_ctc',
        'validation/main/loss_att', 'main/acc', 'validation/main/acc',
        'elapsed_time'
    ]
    if args.opt == 'adadelta':
        trainer.extend(extensions.observe_value(
            'eps', lambda trainer: trainer.updater.get_optimizer('main').
            param_groups[0]["eps"]),
                       trigger=(REPORT_INTERVAL, 'iteration'))
        report_keys.append('eps')
    if args.report_cer:
        report_keys.append('validation/main/cer')
    if args.report_wer:
        report_keys.append('validation/main/wer')
    if args.adv:
        report_keys.extend([
            'main/loss_adv', 'main/acc_adv', 'validation/main/loss_adv',
            'validation/main/acc_adv'
        ])
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(REPORT_INTERVAL, 'iteration'))

    trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL))

    # Run the training
    trainer.run()
Example #24
0
                                        model,
                                        converter=convert_rsd_batch),
                   trigger=parse_trigger(args.log_trigger))
    trainer.extend(
        extensions.LogReport(trigger=parse_trigger(args.log_trigger)))
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              file_name=args.plot_loss_file,
                              trigger=parse_trigger(args.log_trigger)))
    trainer.extend(
        extensions.PlotReport(['main/f1', 'validation/main/f1'],
                              file_name=args.plot_f1_file,
                              trigger=parse_trigger(args.log_trigger)))
    trainer.extend(
        extensions.PlotReport(['main/recall', 'validation/main/recall'],
                              file_name=args.plot_recall_file,
                              trigger=parse_trigger(args.log_trigger)))
    trainer.extend(
        extensions.PlotReport(['main/precision', 'validation/main/precision'],
                              file_name=args.plot_precision_file,
                              trigger=parse_trigger(args.log_trigger)))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/f1',
        'main/precision', 'main/recall'
    ]),
                   trigger=parse_trigger(args.print_trigger))
    trainer.extend(extensions.snapshot_object(model, args.model_file),
                   trigger=parse_trigger(args.store_model_trigger))

    trainer.run()
Example #25
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerCV training example: Faster R-CNN')
    parser.add_argument('--dataset', choices=('voc07', 'voc0712'),
                        help='The dataset to use: VOC07, VOC07+12',
                        default='voc07')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--step_size', '-ss', type=int, default=50000)
    parser.add_argument('--iteration', '-i', type=int, default=70000)
    args = parser.parse_args()

    np.random.seed(args.seed)

    if args.dataset == 'voc07':
        train_data = VOCBboxDataset(split='trainval', year='2007')
    elif args.dataset == 'voc0712':
        train_data = ConcatenatedDataset(
            VOCBboxDataset(year='2007', split='trainval'),
            VOCBboxDataset(year='2012', split='trainval'))
    test_data = VOCBboxDataset(split='test', year='2007',
                               use_difficult=True, return_difficult=True)
    faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names),
                                  pretrained_model='imagenet')
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    train_data = TransformDataset(train_data, Transform(faster_rcnn))

    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, batch_size=1, n_processes=None, shared_mem=100000000)
    test_iter = chainer.iterators.SerialIterator(
        test_data, batch_size=1, repeat=False, shuffle=False)
    updater = chainer.training.updater.StandardUpdater(
        train_iter, optimizer, device=args.gpu)

    trainer = training.Trainer(
        updater, (args.iteration, 'iteration'), out=args.out)

    trainer.extend(
        extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'),
        trigger=(args.iteration, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(args.step_size, 'iteration'))

    log_interval = 20, 'iteration'
    plot_interval = 3000, 'iteration'
    print_interval = 20, 'iteration'

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport(
        ['iteration', 'epoch', 'elapsed_time', 'lr',
         'main/loss',
         'main/roi_loc_loss',
         'main/roi_cls_loss',
         'main/rpn_loc_loss',
         'main/rpn_cls_loss',
         'validation/main/map',
         ]), trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss'],
                file_name='loss.png', trigger=plot_interval
            ),
            trigger=plot_interval
        )

    trainer.extend(
        DetectionVOCEvaluator(
            test_iter, model.faster_rcnn, use_07_metric=True,
            label_names=voc_bbox_label_names),
        trigger=ManualScheduleTrigger(
            [args.step_size, args.iteration], 'iteration'))

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Example #26
0
def main():
    parser = argparse.ArgumentParser(description='Train Blending GAN')
    parser.add_argument('--nef',
                        type=int,
                        default=64,
                        help='# of base filters in encoder')
    parser.add_argument('--ngf',
                        type=int,
                        default=64,
                        help='# of base filters in decoder')
    parser.add_argument('--nc',
                        type=int,
                        default=3,
                        help='# of output channels in decoder')
    parser.add_argument('--nBottleneck',
                        type=int,
                        default=4000,
                        help='# of output channels in encoder')
    parser.add_argument('--ndf',
                        type=int,
                        default=64,
                        help='# of base filters in D')

    parser.add_argument('--lr_d',
                        type=float,
                        default=0.0002,
                        help='Learning rate for Critic, default=0.0002')
    parser.add_argument('--lr_g',
                        type=float,
                        default=0.002,
                        help='Learning rate for Generator, default=0.002')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.5,
                        help='Beta for Adam, default=0.5')
    parser.add_argument('--l2_weight',
                        type=float,
                        default=0.999,
                        help='Weight for l2 loss, default=0.999')

    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--n_epoch',
                        type=int,
                        default=25,
                        help='# of epochs to train for')

    parser.add_argument('--data_root', help='Path to dataset')
    parser.add_argument('--load_size',
                        type=int,
                        default=64,
                        help='Scale image to load_size')
    parser.add_argument(
        '--image_size',
        type=int,
        default=64,
        help='The height / width of the input image to network')
    parser.add_argument('--ratio',
                        type=float,
                        default=0.5,
                        help='Ratio for center square size v.s. image_size')
    parser.add_argument('--val_ratio',
                        type=float,
                        default=0.05,
                        help='Ratio for validation set v.s. data set')

    parser.add_argument('--d_iters',
                        type=int,
                        default=5,
                        help='# of D iters per each G iter')
    parser.add_argument('--clamp_lower',
                        type=float,
                        default=-0.01,
                        help='Lower bound for clipping')
    parser.add_argument('--clamp_upper',
                        type=float,
                        default=0.01,
                        help='Upper bound for clipping')

    parser.add_argument('--experiment',
                        default='encoder_decoder_blending_result',
                        help='Where to store samples and models')
    parser.add_argument('--test_folder',
                        default='samples',
                        help='Where to store test results')
    parser.add_argument('--workers',
                        type=int,
                        default=10,
                        help='# of data loading workers')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='Input batch size')
    parser.add_argument('--test_size',
                        type=int,
                        default=64,
                        help='Batch size for testing')

    parser.add_argument('--train_samples',
                        type=int,
                        default=150000,
                        help='# of training examples')
    parser.add_argument('--test_samples',
                        type=int,
                        default=256,
                        help='# of testing examples')

    parser.add_argument('--manual_seed',
                        type=int,
                        default=5,
                        help='Manul seed')

    parser.add_argument('--resume',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--snapshot_interval',
                        type=int,
                        default=1,
                        help='Interval of snapshot (epochs)')
    parser.add_argument('--print_interval',
                        type=int,
                        default=1,
                        help='Interval of printing log to console (iteration)')
    parser.add_argument('--plot_interval',
                        type=int,
                        default=10,
                        help='Interval of plot (iteration)')
    args = parser.parse_args()

    random.seed(args.manual_seed)

    print('Input arguments:')
    for key, value in vars(args).items():
        print('\t{}: {}'.format(key, value))
    print('')

    # Set up G & D
    print('Create & Init models ...')
    print('\tInit G network ...')
    G = EncoderDecoder(args.nef,
                       args.ngf,
                       args.nc,
                       args.nBottleneck,
                       image_size=args.image_size,
                       conv_init=init_conv,
                       bn_init=init_bn)
    print('\tInit D network ...')
    D = DCGAN_D(args.image_size,
                args.ndf,
                conv_init=init_conv,
                bn_init=init_bn)
    if args.gpu >= 0:
        print('\tCopy models to gpu {} ...'.format(args.gpu))
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        G.to_gpu()  # Copy the model to the GPU
        D.to_gpu()
    print('Init models done ...\n')
    # Setup an optimizer
    optimizer_d = make_optimizer(D, args.lr_d, args.beta1)
    optimizer_g = make_optimizer(G, args.lr_g, args.beta1)

    ########################################################################################################################
    # Setup dataset & iterator
    print('Load images from {} ...'.format(args.data_root))
    folders = sorted([
        folder for folder in os.listdir(args.data_root)
        if os.path.isdir(os.path.join(args.data_root, folder))
    ])
    val_end = int(args.val_ratio * len(folders))
    print('\t{} folders in total, {} val folders ...'.format(
        len(folders), val_end))
    trainset = BlendingDataset(args.train_samples, folders[val_end:],
                               args.data_root, args.ratio, args.load_size,
                               args.image_size)
    valset = BlendingDataset(args.test_samples, folders[:val_end],
                             args.data_root, args.ratio, args.load_size,
                             args.image_size)
    print('\tTrainset contains {} image files'.format(len(trainset)))
    print('\tValset contains {} image files'.format(len(valset)))
    print('')
    train_iter = chainer.iterators.MultiprocessIterator(
        trainset,
        args.batch_size,
        n_processes=args.workers,
        n_prefetch=args.workers)
    ########################################################################################################################

    # Set up a trainer
    updater = EncoderDecoderBlendingUpdater(models=(G, D),
                                            args=args,
                                            iterator=train_iter,
                                            optimizer={
                                                'main': optimizer_g,
                                                'D': optimizer_d
                                            },
                                            device=args.gpu)
    trainer = training.Trainer(updater, (args.n_epoch, 'epoch'),
                               out=args.experiment)

    # Snapshot
    snapshot_interval = (args.snapshot_interval, 'epoch')
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'),
        trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(G,
                                              'g_epoch_{.updater.epoch}.npz'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(D,
                                              'd_epoch_{.updater.epoch}.npz'),
                   trigger=snapshot_interval)

    # Display
    print_interval = (args.print_interval, 'iteration')
    trainer.extend(extensions.LogReport(trigger=print_interval))
    trainer.extend(extensions.PrintReport(
        ['iteration', 'main/loss', 'D/loss', 'main/l2_loss']),
                   trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=args.print_interval))

    trainer.extend(extensions.dump_graph('D/loss', out_name='TrainGraph.dot'))

    # Plot
    plot_interval = (args.plot_interval, 'iteration')

    trainer.extend(extensions.PlotReport(['main/loss'],
                                         'iteration',
                                         file_name='loss.png',
                                         trigger=plot_interval),
                   trigger=plot_interval)
    trainer.extend(extensions.PlotReport(['D/loss'],
                                         'iteration',
                                         file_name='d_loss.png',
                                         trigger=plot_interval),
                   trigger=plot_interval)
    trainer.extend(extensions.PlotReport(['main/l2_loss'],
                                         'iteration',
                                         file_name='l2_loss.png',
                                         trigger=plot_interval),
                   trigger=plot_interval)

    # Eval
    path = os.path.join(args.experiment, args.test_folder)
    if not os.path.isdir(path):
        os.makedirs(path)
    print('Saving samples to {} ...\n'.format(path))

    train_batch = [trainset[idx][0] for idx in range(args.test_size)]
    train_v = Variable(chainer.dataset.concat_examples(train_batch, args.gpu),
                       volatile='on')
    trainer.extend(sampler(G, path, train_v, 'fake_samples_train_{}.png'),
                   trigger=plot_interval)

    val_batch = [valset[idx][0] for idx in range(args.test_size)]
    val_v = Variable(chainer.dataset.concat_examples(val_batch, args.gpu),
                     volatile='on')
    trainer.extend(sampler(G, path, val_v, 'fake_samples_val_{}.png'),
                   trigger=plot_interval)

    if args.resume:
        # Resume from a snapshot
        print('Resume from {} ... \n'.format(args.resume))
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    print('Training start ...\n')
    trainer.run()
Example #27
0
)

# set dataset, model and optimizer
train, test = chainer.datasets.get_mnist()


model = chainer.links.Classifier(MnistMLP())
if os.path.isfile(MODEL_PATH):
    chainer.serializers.load_npz(MODEL_PATH, model)
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)

# set evaluation model
eval_model = model.copy()
eval_model.train = False

# train and test
train_iter = chainer.iterators.SerialIterator(train, 100)
test_iter = chainer.iterators.SerialIterator(test, 100,repeat=False, shuffle=False)
updater = chainer.training.StandardUpdater(train_iter, optimizer, device=-1)
trainer = chainer.training.Trainer(updater, (10, 'epoch'), out=DESKTOP_PATH + "/result")
trainer.extend(extensions.Evaluator(test_iter, eval_model, device=-1))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy']))
trainer.extend(extensions.ProgressBar(update_interval=5))
trainer.extend(extensions.snapshot())
trainer.extend(extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'))
trainer.extend(extensions.dump_graph('main/loss'))
if os.path.isfile(RESUME_PATH):
    chainer.serializers.load_npz(RESUME_PATH, trainer)
trainer.run()
Example #28
0
def main():
    # keyboard arguments
    parser = argparse.ArgumentParser(
        description='Chainer example: WordClassification')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=128,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=10,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=256,
                        help='Number of units')
    parser.add_argument('--window',
                        '-w',
                        type=int,
                        default=20,
                        help='Window Size')
    parser.add_argument('--max-length',
                        type=int,
                        default=200,
                        help='Maximum sentence length')
    args = parser.parse_args()

    # load data
    DATA_DIR = '/baobab/kiyomaru/2018-shinjin/jumanpp.midasi'
    PATH_TO_TRAIN = os.path.join(DATA_DIR, 'train.csv')
    PATH_TO_WE = '/share/data/word2vec/2016.08.02/w2v.midasi.256.100K.bin'
    train_x, train_y = load_data(PATH_TO_TRAIN)
    word_vectors = KeyedVectors.load_word2vec_format(PATH_TO_WE, binary=True)
    word2index = {}
    for index, word in enumerate(word_vectors.index2word):
        word2index[word] = index

    # convert document to ids
    train_ids = assign_id_to_document(train_x, word2index, args.max_length)

    # validation
    train_ids, valid_ids = train_ids[
        VALIDATION_SIZE:], train_ids[:VALIDATION_SIZE]
    train_y, valid_y = train_y[VALIDATION_SIZE:], train_y[:VALIDATION_SIZE]

    # define a model
    train = chainer.datasets.TupleDataset(train_ids, train_y)
    valid = chainer.datasets.TupleDataset(valid_ids, valid_y)

    model = LEAM(n_vocab=len(word2index),
                 n_embed=word_vectors.vector_size,
                 n_units=args.unit,
                 n_class=4,
                 n_window=args.window,
                 W=word_vectors.vectors)

    model.embed.disable_update()

    model = LeamClassifier(model)
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    valid_iter = chainer.iterators.SerialIterator(valid,
                                                  args.batchsize,
                                                  repeat=False,
                                                  shuffle=False)

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(extensions.LogReport())

    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))

    trainer.extend(extensions.Evaluator(valid_iter, model, device=args.gpu))

    trainer.extend(extensions.snapshot_object(model, 'best_model'),
                   trigger=chainer.training.triggers.MinValueTrigger(
                       'validation/main/loss'))

    trainer.run()
Example #29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--labelnum', type=int, default=50)
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')

    parser.add_argument('--image_label',
                        '-il',
                        help='Path to training image-label list file')
    parser.add_argument('--bbox', help='Path to training bbox list file')
    parser.add_argument('--image_label_test',
                        '-ilt',
                        help='Path to training image-label list file')
    parser.add_argument('--bbox_test', help='Path to training bbox list file')

    parser.add_argument('--image_root',
                        '-TR',
                        default='.',
                        help='Root directory path of image files')

    args = parser.parse_args()

    comm = chainermn.create_communicator('naive')
    if comm.mpi_comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size()))

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=args.labelnum, pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=args.labelnum, pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    from test_datasets import DeepFashionBboxDataset

    if comm.rank == 0:
        train = DeepFashionBboxDataset(args.bbox, args.image_label,
                                       args.image_root)
        test = DeepFashionBboxDataset(args.bbox_test, args.image_label_test,
                                      args.image_root)

        train = TransformDataset(
            train, Transform(model.coder, model.insize, model.mean))
    else:
        train, test = None, None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    test = chainermn.scatter_dataset(test, comm, shuffle=True)

    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)

    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (120000, 'iteration'), args.out)

    checkpoint_interval = (1000, 'iteration')

    checkpointer = chainermn.create_multi_node_checkpointer(
        name='imagenet-example', comm=comm)
    checkpointer.maybe_load(trainer, optimizer)
    trainer.extend(checkpointer, trigger=checkpoint_interval)

    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger([80000, 100000],
                                                          'iteration'))

    evaluator = DetectionVOCEvaluator(test_iter,
                                      model,
                                      use_07_metric=True,
                                      label_names=voc_bbox_label_names)

    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator, trigger=(10000, 'iteration'))

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
            'main/loss/conf', 'validation/main/map'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
        trainer.extend(extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}'),
                       trigger=(120000, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Example #30
0
def main():
    '''
    main function, start point
    '''
    # 引数関連
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=128,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate',
                        '-l',
                        type=float,
                        default=0.01,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=10,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--iter_parallel',
                        action='store_true',
                        default=False,
                        help='filter(kernel) sizes')
    args = parser.parse_args()

    # parameter出力
    print("-=Learning Parameter=-")
    print("# Max Epochs: {}".format(args.epoch))
    print("# Batch Size: {}".format(args.batchsize))
    print("# Learning Rate: {}".format(args.learnrate))
    print('# Train Dataet: General 100')
    print('# Test Dataet: Set 14')
    if args.iter_parallel:
        print("# Data Iters that loads in Parallel")
    print("\n")

    # 保存ディレクトリ
    # save didrectory
    outdir = path.join(ROOT_PATH, 'results/deconv_res_Test')
    if not path.exists(outdir):
        os.makedirs(outdir)
    with open(path.join(outdir, 'arg_param.txt'), 'w') as f:
        for k, v in args.__dict__.items():
            f.write('{}:{}\n'.format(k, v))

    print('# loading dataet(General100, Set14) ...')
    train, test = load_dataset()

    # prepare model
    model = GenEvaluator(DRLSR())
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # setup optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))
    optimizer.add_hook(chainer.optimizer.GradientClipping(0.1))

    # setup iter
    if args.iter_parallel:
        train_iter = chainer.iterators.MultiprocessIterator(train,
                                                            args.batchsize,
                                                            n_processes=8)
        test_iter = chainer.iterators.MultiprocessIterator(test,
                                                           args.batchsize,
                                                           repeat=False,
                                                           shuffle=False,
                                                           n_processes=8)
    else:
        train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     args.batchsize,
                                                     repeat=False,
                                                     shuffle=False)

    # setup trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir)

    # eval test data
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
    # dump loss graph
    trainer.extend(extensions.dump_graph('main/loss'))
    # lr shift
    trainer.extend(extensions.ExponentialShift("lr", 0.1),
                   trigger=(100, 'epoch'))
    # save snapshot
    trainer.extend(extensions.snapshot(), trigger=(10, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_snapshot_{.updater.epoch}'),
                   trigger=(10, 'epoch'))
    # log report
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch'))
    #  plot loss graph
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              'epoch',
                              file_name='loss.png'))
    # plot acc graph
    trainer.extend(
        extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'],
                              'epoch',
                              file_name='PSNR.png'))
    # print info
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/PSNR',
            'validation/main/PSNR', 'lr', 'elapsed_time'
        ]))
    # print progbar
    trainer.extend(extensions.ProgressBar())

    trainer.run()
                               num_iterations=args.test_iterations,
                               converter=get_concat_and_pad_examples(
                                   args.blank_label)), (args.test_interval,
                                                        'iteration'))
    epoch_validation_iterator = copy.copy(validation_iterator)
    epoch_validation_iterator._repeat = False

    # 该评估器接受所有的验证图像,对每个图像进行评估,并报告所有验证图像的验证度量。
    epoch_evaluator = (chainer.training.extensions.Evaluator(
        epoch_validation_iterator,
        model,
        device=updater._devices[0],
        converter=get_concat_and_pad_examples(args.blank_label),
    ), (1, 'epoch'))

    model_snapshotter = (extensions.snapshot_object(
        net, 'model_{.updater.iteration}.npz'), (args.snapshot_interval,
                                                 'iteration'))

    # bbox plotter test
    if not args.test_image:
        test_image = validation_dataset.get_example(0)[0]
    else:
        test_image = train_dataset.load_image(args.test_image)

    bbox_plotter = (TextRecBBOXPlotter(
        test_image,
        os.path.join(log_dir, 'boxes'),
        target_shape,
        metrics,
        send_bboxes=args.send_bboxes,
        upstream_port=args.port,
Example #32
0
def train(args):
    """Train with the given args

    :param Namespace args: The program arguments
    """
    # display chainer version
    logging.info('chainer version = ' + chainer.__version__)

    set_deterministic_chainer(args)

    # check cuda and cudnn availability
    if not chainer.cuda.available:
        logging.warning('cuda is not available')
    if not chainer.cuda.cudnn_enabled:
        logging.warning('cudnn is not available')

    # get input and output dimension info
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]['input'][0]['shape'][1])
    odim = int(valid_json[utts[0]]['output'][0]['shape'][1])
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # check attention type
    if args.atype not in ['noatt', 'dot', 'location']:
        raise NotImplementedError('chainer supports only noatt, dot, and location attention.')

    # specify attention, CTC, hybrid mode
    if args.mtlalpha == 1.0:
        mtl_mode = 'ctc'
        logging.info('Pure CTC mode')
    elif args.mtlalpha == 0.0:
        mtl_mode = 'att'
        logging.info('Pure attention mode')
    else:
        mtl_mode = 'mtl'
        logging.info('Multitask learning mode')

    # specify model architecture
    logging.info('import model module: ' + args.model_module)
    from importlib import import_module
    model_module = import_module(args.model_module)
    model = model_module.E2E(idim, odim, args, flag_return=False)

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to ' + model_conf)
        f.write(json.dumps((idim, odim, vars(args)), indent=4, sort_keys=True).encode('utf_8'))
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    # Set gpu
    ngpu = args.ngpu
    if ngpu == 1:
        gpu_id = 0
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(gpu_id).use()
        model.to_gpu()  # Copy the model to the GPU
        logging.info('single gpu calculation.')
    elif ngpu > 1:
        gpu_id = 0
        devices = {'main': gpu_id}
        for gid in six.moves.xrange(1, ngpu):
            devices['sub_%d' % gid] = gid
        logging.info('multi gpu calculation (#gpus = %d).' % ngpu)
        logging.info('batch size is automatically increased (%d -> %d)' % (
            args.batch_size, args.batch_size * args.ngpu))
    else:
        gpu_id = -1
        logging.info('cpu calculation')

    # Setup an optimizer
    if args.opt == 'adadelta':
        optimizer = chainer.optimizers.AdaDelta(eps=args.eps)
    elif args.opt == 'adam':
        optimizer = chainer.optimizers.Adam()
    elif args.opt == 'noam':
        optimizer = chainer.optimizers.Adam(alpha=0, beta1=0.9, beta2=0.98, eps=1e-9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip))

    # read json data
    with open(args.train_json, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']

    # set up training iterator and updater
    converter = CustomConverter(subsampling_factor=model.subsample[0],
                                preprocess_conf=args.preprocess_conf)
    use_sortagrad = args.sortagrad == -1 or args.sortagrad > 0
    accum_grad = args.accum_grad
    if ngpu <= 1:
        # make minibatch list (variable length)
        train = make_batchset(train_json, args.batch_size,
                              args.maxlen_in, args.maxlen_out, args.minibatches, shortest_first=use_sortagrad)
        # hack to make batchsize argument as 1
        # actual batchsize is included in a list
        if args.n_iter_processes > 0:
            train_iters = [ToggleableShufflingMultiprocessIterator(
                TransformDataset(train, converter.transform),
                batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20,
                shuffle=not use_sortagrad)]
        else:
            train_iters = [ToggleableShufflingSerialIterator(
                TransformDataset(train, converter.transform),
                batch_size=1, shuffle=not use_sortagrad)]

        # set up updater
        updater = CustomUpdater(
            train_iters[0], optimizer, converter=converter, device=gpu_id, accum_grad=accum_grad)
    else:
        # set up minibatches
        train_subsets = []
        for gid in six.moves.xrange(ngpu):
            # make subset
            train_json_subset = {k: v for i, (k, v) in enumerate(train_json.items())
                                 if i % ngpu == gid}
            # make minibatch list (variable length)
            train_subsets += [make_batchset(train_json_subset, args.batch_size,
                                            args.maxlen_in, args.maxlen_out, args.minibatches)]

        # each subset must have same length for MultiprocessParallelUpdater
        maxlen = max([len(train_subset) for train_subset in train_subsets])
        for train_subset in train_subsets:
            if maxlen != len(train_subset):
                for i in six.moves.xrange(maxlen - len(train_subset)):
                    train_subset += [train_subset[i]]

        # hack to make batchsize argument as 1
        # actual batchsize is included in a list
        if args.n_iter_processes > 0:
            train_iters = [ToggleableShufflingMultiprocessIterator(
                TransformDataset(train_subsets[gid], converter.transform),
                batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20,
                shuffle=not use_sortagrad)
                for gid in six.moves.xrange(ngpu)]
        else:
            train_iters = [ToggleableShufflingSerialIterator(
                TransformDataset(train_subsets[gid], converter.transform),
                batch_size=1, shuffle=not use_sortagrad)
                for gid in six.moves.xrange(ngpu)]

        # set up updater
        updater = CustomParallelUpdater(
            train_iters, optimizer, converter=converter, devices=devices)

    # Set up a trainer
    trainer = training.Trainer(
        updater, (args.epochs, 'epoch'), out=args.outdir)

    if use_sortagrad:
        trainer.extend(ShufflingEnabler(train_iters),
                       trigger=(args.sortagrad if args.sortagrad != -1 else args.epochs, 'epoch'))
    if args.opt == 'noam':
        trainer.extend(model_module.VaswaniRule('alpha', d=args.adim, warmup_steps=args.transformer_warmup_steps,
                                                scale=args.transformer_lr), trigger=(1, 'iteration'))
    # Resume from a snapshot
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # set up validation iterator
    valid = make_batchset(valid_json, args.batch_size,
                          args.maxlen_in, args.maxlen_out, args.minibatches)
    if args.n_iter_processes > 0:
        valid_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(valid, converter.transform),
            batch_size=1, repeat=False, shuffle=False,
            n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20)
    else:
        valid_iter = chainer.iterators.SerialIterator(
            TransformDataset(valid, converter.transform),
            batch_size=1, repeat=False, shuffle=False)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(
        valid_iter, model, converter=converter, device=gpu_id))

    # Save attention weight each epoch
    if args.num_save_attention > 0 and args.mtlalpha != 1.0:
        data = sorted(list(valid_json.items())[:args.num_save_attention],
                      key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True)
        if hasattr(model, "module"):
            att_vis_fn = model.module.calculate_all_attentions
        else:
            att_vis_fn = model.calculate_all_attentions
        try:
            PlotAttentionReport = model_module.PlotAttentionReport
            logging.info('Using custom PlotAttentionReport')
        except AttributeError:
            from espnet.asr.asr_utils import PlotAttentionReport
        att_reporter = PlotAttentionReport(
            att_vis_fn, data, args.outdir + "/att_ws",
            converter=converter, device=gpu_id)
        trainer.extend(att_reporter, trigger=(1, 'epoch'))
    else:
        att_reporter = None

    # Take a snapshot for each specified epoch
    trainer.extend(extensions.snapshot(filename='snapshot.ep.{.updater.epoch}'), trigger=(1, 'epoch'))

    # Make a plot for training and validation values
    trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss',
                                          'main/loss_ctc', 'validation/main/loss_ctc',
                                          'main/loss_att', 'validation/main/loss_att'],
                                         'epoch', file_name='loss.png'))
    trainer.extend(extensions.PlotReport(['main/acc', 'validation/main/acc'],
                                         'epoch', file_name='acc.png'))

    # Save best models
    trainer.extend(extensions.snapshot_object(model, 'model.loss.best'),
                   trigger=training.triggers.MinValueTrigger('validation/main/loss'))
    if mtl_mode is not 'ctc':
        trainer.extend(extensions.snapshot_object(model, 'model.acc.best'),
                       trigger=training.triggers.MaxValueTrigger('validation/main/acc'))

    # epsilon decay in the optimizer
    if args.opt == 'adadelta':
        if args.criterion == 'acc' and mtl_mode is not 'ctc':
            trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best'),
                           trigger=CompareValueTrigger(
                               'validation/main/acc',
                               lambda best_value, current_value: best_value > current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/acc',
                               lambda best_value, current_value: best_value > current_value))
        elif args.criterion == 'loss':
            trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best'),
                           trigger=CompareValueTrigger(
                               'validation/main/loss',
                               lambda best_value, current_value: best_value < current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/loss',
                               lambda best_value, current_value: best_value < current_value))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL, 'iteration')))
    report_keys = ['epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att',
                   'validation/main/loss', 'validation/main/loss_ctc', 'validation/main/loss_att',
                   'main/acc', 'validation/main/acc', 'elapsed_time']
    if args.opt == 'adadelta':
        trainer.extend(extensions.observe_value(
            'eps', lambda trainer: trainer.updater.get_optimizer('main').eps),
            trigger=(REPORT_INTERVAL, 'iteration'))
        report_keys.append('eps')
    trainer.extend(extensions.PrintReport(
        report_keys), trigger=(REPORT_INTERVAL, 'iteration'))

    trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL))

    set_early_stop(trainer, args)
    if args.tensorboard_dir is not None and args.tensorboard_dir != "":
        writer = SummaryWriter(args.tensorboard_dir)
        trainer.extend(TensorboardLogger(writer, att_reporter))

    # Run the training
    trainer.run()
    check_early_stop(trainer, args.epochs)
Example #33
0
def train(args, train_data, test_data, evaluator_type):
    required_args = [
        'dataset',
        'class_names',
        'logs_dir',
        'min_size',
        'max_size',
        'anchor_scales',
    ]
    for arg_key in required_args:
        if not hasattr(args, arg_key):
            raise ValueError(
                'args must contain required key: {}'.format(arg_key))

    assert evaluator_type in ['voc', 'coco'], \
        'Unsupported evaluator_type: {}'.format(evaluator_type)

    if args.multi_node:
        import chainermn

        comm = chainermn.create_communicator('hierarchical')
        device = comm.intra_rank

        args.n_node = comm.inter_size
        args.n_gpu = comm.size
        chainer.cuda.get_device_from_id(device).use()
    else:
        if args.gpu is None:
            print(
                'Option --gpu is required without --multi-node.',
                file=sys.stderr,
            )
            sys.exit(1)
        args.n_node = 1
        args.n_gpu = 1
        chainer.cuda.get_device_from_id(args.gpu).use()
        device = args.gpu

    args.seed = 0
    now = datetime.datetime.now()
    args.timestamp = now.isoformat()
    args.out = osp.join(args.logs_dir, now.strftime('%Y%m%d_%H%M%S'))

    args.batch_size = args.batch_size_per_gpu * args.n_gpu

    # lr: 0.00125 * 8 = 0.01  in original
    args.lr = 0.00125 * args.batch_size
    args.weight_decay = 0.0001

    # lr / 10 at 120k iteration with
    # 160k iteration * 16 batchsize in original
    args.step_size = [
        (120e3 / 180e3) * args.max_epoch,
        (160e3 / 180e3) * args.max_epoch,
    ]

    random.seed(args.seed)
    np.random.seed(args.seed)

    if args.pooling_func == 'align':
        pooling_func = cmr.functions.roi_align_2d
    elif args.pooling_func == 'pooling':
        pooling_func = cmr.functions.roi_pooling_2d
    elif args.pooling_func == 'resize':
        pooling_func = cmr.functions.crop_and_resize
    else:
        raise ValueError('Unsupported pooling_func: {}'.format(
            args.pooling_func))

    if args.initializer == 'normal':
        mask_initialW = chainer.initializers.Normal(0.01)
    elif args.initializer == 'he_normal':
        mask_initialW = chainer.initializers.HeNormal(fan_option='fan_out')
    else:
        raise ValueError('Unsupported initializer: {}'.format(
            args.initializer))

    if args.model == 'vgg16':
        mask_rcnn = cmr.models.MaskRCNNVGG16(
            n_fg_class=len(args.class_names),
            pretrained_model='imagenet',
            pooling_func=pooling_func,
            anchor_scales=args.anchor_scales,
            roi_size=args.roi_size,
            min_size=args.min_size,
            max_size=args.max_size,
            mask_initialW=mask_initialW,
        )
    elif args.model in ['resnet50', 'resnet101']:
        n_layers = int(args.model.lstrip('resnet'))
        mask_rcnn = cmr.models.MaskRCNNResNet(
            n_layers=n_layers,
            n_fg_class=len(args.class_names),
            pooling_func=pooling_func,
            anchor_scales=args.anchor_scales,
            roi_size=args.roi_size,
            min_size=args.min_size,
            max_size=args.max_size,
            mask_initialW=mask_initialW,
        )
    else:
        raise ValueError('Unsupported model: {}'.format(args.model))
    model = cmr.models.MaskRCNNTrainChain(mask_rcnn)
    if args.multi_node or args.gpu >= 0:
        model.to_gpu()

    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    if args.multi_node:
        optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay))

    if args.model in ['resnet50', 'resnet101']:
        # ResNetExtractor.freeze_at is not enough to freeze params
        # since WeightDecay updates the param little by little.
        mask_rcnn.extractor.conv1.disable_update()
        mask_rcnn.extractor.bn1.disable_update()
        mask_rcnn.extractor.res2.disable_update()
        for link in mask_rcnn.links():
            if isinstance(link, cmr.links.AffineChannel2D):
                link.disable_update()

    train_data = chainer.datasets.TransformDataset(
        train_data,
        cmr.datasets.MaskRCNNTransform(mask_rcnn),
    )
    test_data = chainer.datasets.TransformDataset(
        test_data,
        cmr.datasets.MaskRCNNTransform(mask_rcnn, train=False),
    )
    if args.multi_node:
        if comm.rank != 0:
            train_data = None
            test_data = None
        train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True)
        test_data = chainermn.scatter_dataset(test_data, comm)

    # FIXME: MultiProcessIterator sometimes hangs
    train_iter = chainer.iterators.SerialIterator(
        train_data,
        batch_size=args.batch_size_per_gpu,
    )
    test_iter = chainer.iterators.SerialIterator(
        test_data,
        batch_size=args.batch_size_per_gpu,
        repeat=False,
        shuffle=False,
    )

    converter = functools.partial(
        cmr.datasets.concat_examples,
        padding=0,
        # img, bboxes, labels, masks, scales
        indices_concat=[0, 2, 3, 4],  # img, _, labels, masks, scales
        indices_to_device=[0, 1],  # img, bbox
    )
    updater = chainer.training.updater.StandardUpdater(
        train_iter,
        optimizer,
        device=device,
        converter=converter,
    )

    trainer = training.Trainer(
        updater,
        (args.max_epoch, 'epoch'),
        out=args.out,
    )

    trainer.extend(
        extensions.ExponentialShift('lr', 0.1),
        trigger=training.triggers.ManualScheduleTrigger(
            args.step_size,
            'epoch',
        ),
    )

    eval_interval = 1, 'epoch'
    log_interval = 20, 'iteration'
    plot_interval = 0.1, 'epoch'
    print_interval = 20, 'iteration'

    if evaluator_type == 'voc':
        evaluator = cmr.extensions.InstanceSegmentationVOCEvaluator(
            test_iter,
            model.mask_rcnn,
            device=device,
            use_07_metric=True,
            label_names=args.class_names,
        )
    elif evaluator_type == 'coco':
        evaluator = cmr.extensions.InstanceSegmentationCOCOEvaluator(
            test_iter,
            model.mask_rcnn,
            device=device,
            label_names=args.class_names,
        )
    else:
        raise ValueError(
            'Unsupported evaluator_type: {}'.format(evaluator_type))
    if args.multi_node:
        evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator, trigger=eval_interval)

    if not args.multi_node or comm.rank == 0:
        # Save snapshot.
        trainer.extend(
            extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'),
            trigger=training.triggers.MaxValueTrigger(
                'validation/main/map',
                eval_interval,
            ),
        )

        # Dump params.yaml.
        args.git_hash = cmr.utils.git_hash()
        args.hostname = socket.gethostname()
        trainer.extend(fcn.extensions.ParamsReport(args.__dict__))

        # Visualization.
        trainer.extend(
            cmr.extensions.InstanceSegmentationVisReport(
                test_iter,
                model.mask_rcnn,
                label_names=args.class_names,
            ),
            trigger=eval_interval,
        )

        # Logging.
        trainer.extend(
            chainer.training.extensions.observe_lr(),
            trigger=log_interval,
        )
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(
            extensions.PrintReport([
                'iteration',
                'epoch',
                'elapsed_time',
                'lr',
                'main/loss',
                'main/roi_loc_loss',
                'main/roi_cls_loss',
                'main/roi_mask_loss',
                'main/rpn_loc_loss',
                'main/rpn_cls_loss',
                'validation/main/map',
            ], ),
            trigger=print_interval,
        )
        trainer.extend(extensions.ProgressBar(update_interval=10))

        # Plot.
        assert extensions.PlotReport.available()
        trainer.extend(
            extensions.PlotReport(
                [
                    'main/loss',
                    'main/roi_loc_loss',
                    'main/roi_cls_loss',
                    'main/roi_mask_loss',
                    'main/rpn_loc_loss',
                    'main/rpn_cls_loss',
                ],
                file_name='loss.png',
                trigger=plot_interval,
            ),
            trigger=plot_interval,
        )
        trainer.extend(
            extensions.PlotReport(
                ['validation/main/map'],
                file_name='accuracy.png',
                trigger=plot_interval,
            ),
            trigger=eval_interval,
        )

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Example #34
0
def train(args):
    '''RUN TRAINING'''
    # seed setting
    torch.manual_seed(args.seed)

    # use determinisitic computation or not
    if args.debugmode < 1:
        torch.backends.cudnn.deterministic = False
        logging.info('torch cudnn deterministic is disabled')
    else:
        torch.backends.cudnn.deterministic = True

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get input and output dimension info
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())

    # reverse input and output dimension
    idim = int(valid_json[utts[0]]['output'][0]['shape'][1])
    odim = int(valid_json[utts[0]]['input'][0]['shape'][1])
    if args.use_cbhg:
        args.spc_dim = int(valid_json[utts[0]]['input'][1]['shape'][1])
    if args.use_speaker_embedding:
        args.spk_embed_dim = int(valid_json[utts[0]]['input'][1]['shape'][0])
    else:
        args.spk_embed_dim = None
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to' + model_conf)
        f.write(
            json.dumps((idim, odim, vars(args)), indent=4,
                       sort_keys=True).encode('utf_8'))
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    # specify model architecture
    tacotron2 = Tacotron2(idim, odim, args)
    logging.info(tacotron2)

    # check the use of multi-gpu
    if args.ngpu > 1:
        tacotron2 = torch.nn.DataParallel(tacotron2,
                                          device_ids=list(range(args.ngpu)))
        logging.info('batch size is automatically increased (%d -> %d)' %
                     (args.batch_size, args.batch_size * args.ngpu))
        args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    tacotron2 = tacotron2.to(device)

    # define loss
    model = Tacotron2Loss(tacotron2, args.use_masking, args.bce_pos_weight)
    reporter = model.reporter

    # Setup an optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr,
                                 eps=args.eps,
                                 weight_decay=args.weight_decay)

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, 'target', reporter)
    setattr(optimizer, 'serialize', lambda s: reporter.serialize(s))

    # Setup a converter
    converter = CustomConverter(True, args.use_speaker_embedding,
                                args.use_cbhg)

    # read json data
    with open(args.train_json, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']

    # make minibatch list (variable length)
    train_batchset = make_batchset(
        train_json,
        args.batch_size,
        args.maxlen_in,
        args.maxlen_out,
        args.minibatches,
        args.batch_sort_key,
        min_batch_size=args.ngpu if args.ngpu > 1 else 1)
    valid_batchset = make_batchset(
        valid_json,
        args.batch_size,
        args.maxlen_in,
        args.maxlen_out,
        args.minibatches,
        args.batch_sort_key,
        min_batch_size=args.ngpu if args.ngpu > 1 else 1)
    # hack to make batchsze argument as 1
    # actual bathsize is included in a list
    if args.n_iter_processes > 0:
        train_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(train_batchset, converter.transform),
            batch_size=1,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
        valid_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(valid_batchset, converter.transform),
            batch_size=1,
            repeat=False,
            shuffle=False,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
    else:
        train_iter = chainer.iterators.SerialIterator(TransformDataset(
            train_batchset, converter.transform),
                                                      batch_size=1)
        valid_iter = chainer.iterators.SerialIterator(TransformDataset(
            valid_batchset, converter.transform),
                                                      batch_size=1,
                                                      repeat=False,
                                                      shuffle=False)

    # Set up a trainer
    updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer,
                            converter, device)
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        logging.info('resumed from %s' % args.resume)
        torch_resume(args.resume, trainer)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        CustomEvaluator(model, valid_iter, reporter, converter, device))

    # Save snapshot for each epoch
    trainer.extend(torch_snapshot(), trigger=(1, 'epoch'))

    # Save best models
    trainer.extend(
        extensions.snapshot_object(tacotron2,
                                   'model.loss.best',
                                   savefun=torch_save),
        trigger=training.triggers.MinValueTrigger('validation/main/loss'))

    # Save attention figure for each epoch
    if args.num_save_attention > 0:
        data = sorted(list(valid_json.items())[:args.num_save_attention],
                      key=lambda x: int(x[1]['input'][0]['shape'][1]),
                      reverse=True)
        if hasattr(tacotron2, "module"):
            att_vis_fn = tacotron2.module.calculate_all_attentions
        else:
            att_vis_fn = tacotron2.calculate_all_attentions
        trainer.extend(PlotAttentionReport(att_vis_fn,
                                           data,
                                           args.outdir + '/att_ws',
                                           converter=CustomConverter(
                                               False,
                                               args.use_speaker_embedding),
                                           device=device,
                                           reverse=True),
                       trigger=(1, 'epoch'))

    # Make a plot for training and validation values
    plot_keys = [
        'main/loss', 'validation/main/loss', 'main/l1_loss',
        'validation/main/l1_loss', 'main/mse_loss', 'validation/main/mse_loss',
        'main/bce_loss', 'validation/main/bce_loss'
    ]
    trainer.extend(
        extensions.PlotReport(['main/l1_loss', 'validation/main/l1_loss'],
                              'epoch',
                              file_name='l1_loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/mse_loss', 'validation/main/mse_loss'],
                              'epoch',
                              file_name='mse_loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/bce_loss', 'validation/main/bce_loss'],
                              'epoch',
                              file_name='bce_loss.png'))
    if args.use_cbhg:
        plot_keys += [
            'main/cbhg_l1_loss', 'validation/main/cbhg_l1_loss',
            'main/cbhg_mse_loss', 'validation/main/cbhg_mse_loss'
        ]
        trainer.extend(
            extensions.PlotReport(
                ['main/cbhg_l1_loss', 'validation/main/cbhg_l1_loss'],
                'epoch',
                file_name='cbhg_l1_loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/cbhg_mse_loss', 'validation/main/cbhg_mse_loss'],
                'epoch',
                file_name='cbhg_mse_loss.png'))
    trainer.extend(
        extensions.PlotReport(plot_keys, 'epoch', file_name='loss.png'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL,
                                                 'iteration')))
    report_keys = plot_keys[:]
    report_keys[0:0] = ['epoch', 'iteration', 'elapsed_time']
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(REPORT_INTERVAL, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL))

    # Run the training
    trainer.run()
def main(args):
    # Initialize the model to train
    model = models.archs[args.arch]()
    if args.finetune and hasattr(model, 'finetuned_model_path'):
        utils.finetuning.load_param(model.finetuned_model_path, model, args.ignore)
        #model.finetune = True

    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    nowt = datetime.datetime.today()
    outputdir = args.out + '/' + args.arch + '/' + nowt.strftime("%Y%m%d-%H%M")  + '_bs' +  str(args.batchsize)
    if args.test and args.initmodel is not None:
        outputdir = os.path.dirname(args.initmodel)
    # Load the datasets and mean file
    mean = None
    if hasattr(model, 'mean_value'):
        mean = makeMeanImage(model.mean_value)
    else:
        mean = np.load(args.mean)
    assert mean is not None

    train = ppds.PreprocessedDataset(args.train, args.root, mean, model.insize)
    val = ppds.PreprocessedDataset(args.val, args.root, mean, model.insize, False)
    # These iterators load the images with subprocesses running in parallel to
    # the training/validation.
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, shuffle=False, n_processes=args.loaderjob)
    #val_iter = chainer.iterators.MultiprocessIterator(
    #    val, args.val_batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob)
    val_iter = chainer.iterators.SerialIterator(
            val, args.val_batchsize, repeat=False, shuffle=False)

    # Set up an optimizer
    optimizer = optimizers[args.opt]()
    #if args.opt == 'momentumsgd':
    if hasattr(optimizer, 'lr'):
        optimizer.lr = args.baselr
    if hasattr(optimizer, 'momentum'):
        optimizer.momentum = args.momentum
    optimizer.setup(model)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), outputdir)

    #val_interval = (10 if args.test else int(len(train) / args.batchsize)), 'iteration'
    val_interval = (10, 'iteration') if args.test else (1, 'epoch')
    snapshot_interval = (10, 'iteration') if args.test else (4, 'epoch')
    log_interval = (10 if args.test else 200), 'iteration'

    # Copy the chain with shared parameters to flip 'train' flag only in test
    eval_model = model.copy()
    eval_model.train = False
    if not args.test:
        val_evaluator = extensions.Evaluator(val_iter, eval_model, device=args.gpu)
    else:
        val_evaluator = utils.EvaluatorPlus(val_iter, eval_model, device=args.gpu)
        if 'googlenet' in args.arch:
            val_evaluator.lastname = 'validation/main/loss3'
    trainer.extend(val_evaluator, trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=(500, 'iteration'))
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy',
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))
    if args.opt == 'momentumsgd':
        trainer.extend(extensions.ExponentialShift('lr', args.gamma),
            trigger=(1, 'epoch'))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    if not args.test:
        chainer.serializers.save_npz(outputdir + '/model0', model)
        trainer.run()
        chainer.serializers.save_npz(outputdir + '/model', model)
        with open(outputdir + '/args.txt', 'w') as o:
            print(args, file=o)

    results = val_evaluator(trainer)
    results['outputdir'] = outputdir

    if args.test:
        print(val_evaluator.confmat)
        categories = utils.io.load_categories(args.categories)
        confmat_csv_name = args.initmodel + '.csv'
        confmat_fig_name = args.initmodel + '.eps'
        utils.io.save_confmat_csv(confmat_csv_name, val_evaluator.confmat, categories)
        utils.io.save_confmat_fig(confmat_fig_name, val_evaluator.confmat, categories,
                                mode="rate", saveFormat="eps")
    return results
Example #36
0
def main():
    parser = argparse.ArgumentParser(
        description='chainer line drawing colorization')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=4,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--dataset',
                        '-i',
                        default='./images/',
                        help='Directory of image files.')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--seed', type=int, default=0, help='Random seed')
    parser.add_argument('--snapshot_interval',
                        type=int,
                        default=10000,
                        help='Interval of snapshot')
    parser.add_argument('--display_interval',
                        type=int,
                        default=100,
                        help='Interval of displaying log to console')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    root = args.dataset
    #model = "./model_paint"

    cnn = unet.UNET()
    #serializers.load_npz("result/model_iter_10000", cnn)
    cnn_128 = unet.UNET()
    serializers.load_npz("models/model_cnn_128_dfl2_9", cnn_128)

    dataset = Image2ImageDatasetX2("dat/images_color_train.dat",
                                   root + "linex2/",
                                   root + "colorx2/",
                                   train=True)
    # dataset.set_img_dict(img_dict)
    train_iter = chainer.iterators.SerialIterator(dataset, args.batchsize)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        cnn.to_gpu()  # Copy the model to the GPU
        cnn_128.to_gpu()  # Copy the model to the GPU

    # Setup optimizer parameters.
    opt = optimizers.Adam(alpha=0.0001)
    opt.setup(cnn)
    opt.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_cnn')

    # Set up a trainer
    updater = ganUpdater(models=(cnn, cnn_128),
                         iterator={
                             'main': train_iter,
                         },
                         optimizer={'cnn': opt},
                         device=args.gpu)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    snapshot_interval = (args.snapshot_interval, 'iteration')
    snapshot_interval2 = (args.snapshot_interval * 2, 'iteration')
    trainer.extend(extensions.dump_graph('cnn/loss'))
    trainer.extend(extensions.snapshot(), trigger=snapshot_interval2)
    trainer.extend(extensions.snapshot_object(
        cnn, 'cnn_x2_iter_{.updater.iteration}'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(opt, 'optimizer_'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=(10, 'iteration'), ))
    trainer.extend(
        extensions.PrintReport(['epoch', 'cnn/loss', 'cnn/loss_rec']))
    trainer.extend(extensions.ProgressBar(update_interval=20))

    trainer.run()

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Save the trained model
    chainer.serializers.save_npz(os.path.join(out_dir, 'model_final'), cnn)
    chainer.serializers.save_npz(os.path.join(out_dir, 'optimizer_final'), opt)
Example #37
0
def main():
    processors = {
        "cola": ColaProcessor,
        "mnli": MnliProcessor,
        "mrpc": MrpcProcessor,
    }

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_print_test:
        raise ValueError("At least one of `do_train` or `do_eval` "
                         "or `do_print_test` must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    if not os.path.isdir(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(
        vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None

    # TODO: use special Adam from "optimization.py"
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    bert = modeling.BertModel(config=bert_config)
    model = modeling.BertClassifier(bert, num_labels=len(label_list))
    chainer.serializers.load_npz(
        FLAGS.init_checkpoint, model,
        ignore_names=['output/W', 'output/b'])

    if FLAGS.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(FLAGS.gpu).use()
        model.to_gpu()

    if FLAGS.do_train:
        # Adam with weight decay only for 2D matrices
        optimizer = optimization.WeightDecayForMatrixAdam(
            alpha=1.,  # ignore alpha. instead, use eta as actual lr
            eps=1e-6, weight_decay_rate=0.01)
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.GradientClipping(1.))

        train_iter = chainer.iterators.SerialIterator(
            train_examples, FLAGS.train_batch_size)
        converter = Converter(
            label_list, FLAGS.max_seq_length, tokenizer)
        updater = training.updaters.StandardUpdater(
            train_iter, optimizer,
            converter=converter,
            device=FLAGS.gpu)
        trainer = training.Trainer(
            updater, (num_train_steps, 'iteration'), out=FLAGS.output_dir)

        # learning rate (eta) scheduling in Adam
        lr_decay_init = FLAGS.learning_rate * \
            (num_train_steps - num_warmup_steps) / num_train_steps
        trainer.extend(extensions.LinearShift(  # decay
            'eta', (lr_decay_init, 0.), (num_warmup_steps, num_train_steps)))
        trainer.extend(extensions.WarmupShift(  # warmup
            'eta', 0., num_warmup_steps, FLAGS.learning_rate))
        trainer.extend(extensions.observe_value(
            'eta', lambda trainer: trainer.updater.get_optimizer('main').eta),
            trigger=(50, 'iteration'))  # logging

        trainer.extend(extensions.snapshot_object(
            model, 'model_snapshot_iter_{.updater.iteration}.npz'),
            trigger=(num_train_steps, 'iteration'))
        trainer.extend(extensions.LogReport(
            trigger=(50, 'iteration')))
        trainer.extend(extensions.PrintReport(
            ['iteration', 'main/loss',
             'main/accuracy', 'elapsed_time']))
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.run()

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        test_iter = chainer.iterators.SerialIterator(
            eval_examples, FLAGS.train_batch_size * 2,
            repeat=False, shuffle=False)
        converter = Converter(
            label_list, FLAGS.max_seq_length, tokenizer)
        evaluator = extensions.Evaluator(
            test_iter, model, converter=converter, device=FLAGS.gpu)
        results = evaluator()
        print(results)

    # if you wanna see some output arrays for debugging
    if FLAGS.do_print_test:
        short_eval_examples = processor.get_dev_examples(FLAGS.data_dir)[:3]
        short_eval_examples = short_eval_examples[:FLAGS.eval_batch_size]
        short_test_iter = chainer.iterators.SerialIterator(
            short_eval_examples, FLAGS.eval_batch_size,
            repeat=False, shuffle=False)
        converter = Converter(
            label_list, FLAGS.max_seq_length, tokenizer)
        evaluator = extensions.Evaluator(
            test_iter, model, converter=converter, device=FLAGS.gpu)

        with chainer.using_config('train', False):
            with chainer.no_backprop_mode():
                data = short_test_iter.__next__()
                out = model.bert.get_pooled_output(
                    *converter(data, FLAGS.gpu)[:-1])
                print(out)
                print(out.shape)
            print(converter(data, -1))
Example #38
0
def train():
    parser = argparse.ArgumentParser()
    parser.add_argument('--train', action='store_true')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=50,
                        help='number of epochs to learn')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=128,
                        help='learning minibatch size')
    parser.add_argument('--optimizer',
                        type=str,
                        default='Adam',
                        help='optimizer to use for backprop')
    parser.add_argument(
        '--feature',
        '-f',
        type=str,
        default='mfcc',
        #choices=['mfcc', 'mfcc_delta', 'fbank', 'fbank_delta', 'plp', 'plp_delta'],
        help='feature type')
    parser.add_argument('--out',
                        '-o',
                        type=str,
                        default='GCNN/model/test',
                        help='path to the output directory')
    parser.add_argument('--seed', type=int, default=0, help='random seed')
    args = parser.parse_args()

    if not os.path.exists(args.out):
        os.makedirs(args.out)

    # Set seed
    np.random.seed(args.seed)
    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()
        cuda.cupy.random.seed(args.seed)

    print(args)

    model = net.GCNN(6)

    if args.gpu >= 0:
        model.to_gpu()
    xp = np if args.gpu < 0 else cuda.cupy

    opt_model = chainer.optimizers.Adam()
    opt_model.setup(model)

    print('Preparing data...')
    dataset = SegmentDataset('train', args.feature, 512, normalized=True)
    train_dat, val_dat = chainer.datasets.split_dataset_random(
        dataset, int(len(dataset) * 0.9), seed=args.seed)
    train_iter = chainer.iterators.SerialIterator(train_dat,
                                                  args.batchsize,
                                                  shuffle=True)
    val_iter = chainer.iterators.SerialIterator(val_dat,
                                                args.batchsize,
                                                repeat=False,
                                                shuffle=False)

    updater = GCNNUpdater(iterators={'main': train_iter},
                          models=(model),
                          optimizers={'model': opt_model},
                          device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    evaluator = GCNNEvaluator(iterators={
        'main': val_iter,
    },
                              models={'model': model},
                              device=args.gpu)
    trainer.extend(evaluator)

    snapshot_interval = (args.epoch, 'epoch')
    display_interval = (1, 'epoch')
    # snapshot
    trainer.extend(
        extensions.snapshot(filename='{.updater.epoch}_epoch_snapshot.npz'),
        trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(model, '{.updater.epoch}.model'),
                   trigger=snapshot_interval)

    # Report
    log_keys = [
        'epoch', 'model/loss', 'model/acc', 'val/model/loss', 'val/model/acc'
    ]
    trainer.extend(
        extensions.LogReport(keys=log_keys, trigger=display_interval))
    trainer.extend(extensions.PrintReport(log_keys), trigger=display_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(
        extensions.PlotReport(['model/loss', 'val/model/loss'],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['model/acc', 'val/model/acc'],
                              'epoch',
                              file_name='acc.png'))
    trainer.run()
def main():
    archs = {
        'alex': alex.Alex,
        'alex_fp16': alex.AlexFp16,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16,
        'nin': nin.NIN
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch', '-a', choices=archs.keys(),
                        default='nin', help='Convnet architecture')
    parser.add_argument('--batchsize', '-B', type=int, default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch', '-E', type=int, default=10,
                        help='Number of epochs to train')
    parser.add_argument('--gpus', '-g', type=int, nargs="*",
                        default=[0, 1, 2, 3])
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob', '-j', type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean', '-m', default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume', '-r', default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--root', '-R', default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize', '-b', type=int, default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # Initialize the model to train
    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    # Load the datasets and mean file
    mean = np.load(args.mean)
    train = train_imagenet.PreprocessedDataset(
        args.train, args.root, mean, model.insize)
    val = train_imagenet.PreprocessedDataset(
        args.val, args.root, mean, model.insize, False)
    # These iterators load the images with subprocesses running in parallel to
    # the training/validation.
    devices = tuple(args.gpus)

    train_iters = [
        chainer.iterators.MultiprocessIterator(i,
                                               args.batchsize,
                                               n_processes=args.loaderjob)
        for i in chainer.datasets.split_dataset_n_random(train, len(devices))]
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)

    # Set up an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    # Set up a trainer
    updater = updaters.MultiprocessParallelUpdater(train_iters, optimizer,
                                                   devices=devices)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    if args.test:
        val_interval = 5, 'epoch'
        log_interval = 1, 'epoch'
    else:
        val_interval = 100000, 'iteration'
        log_interval = 1000, 'iteration'

    trainer.extend(train_imagenet.TestModeEvaluator(val_iter, model,
                                                    device=args.gpus[0]),
                   trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=2))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Example #40
0
                                            optimizer,
                                            device=gpu_id)

result_dir = '../results/multi4_transfer_soft_{}_20_100_200_{}_{}_depth{}_valid{}'.format(
    mode, n_topic, iteration,
    sum(depth) * 2 + 1, args.valid)
trainer = training.Trainer(updater, (epoch_size * max_epoch, 'iteration'),
                           out=result_dir)

from chainer.training import extensions

trainer.extend(extensions.LogReport(trigger=(epoch_size, 'iteration')))
trainer.extend(
    extensions.snapshot(filename='snapshot_iteration-{.updater.iteration}'),
    trigger=(epoch_size, 'iteration'))
trainer.extend(extensions.snapshot_object(
    model.predictor, filename='model_iteration-{.updater.iteration}'),
               trigger=(epoch_size, 'iteration'))
trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id),
               trigger=(epoch_size, 'iteration'))
trainer.extend(extensions.observe_lr(), trigger=(epoch_size, 'iteration'))
trainer.extend(extensions.PrintReport([
    'iteration', 'lr', 'main/accuracy', 'validation/main/accuracy',
    'elapsed_time'
]),
               trigger=(epoch_size, 'iteration'))
trainer.extend(extensions.dump_graph('main/loss'))
trainer.extend(extensions.ExponentialShift('lr', 0.5),
               trigger=(epoch_size * 3, 'iteration'))
trainer.extend(extensions.ProgressBar(update_interval=30))

print('running')
Example #41
0
    loss_config=config.loss,
    predictor=predictor,
    discriminator=discriminator,
    device=config.train.gpu,
    iterator=train_iter,
    optimizer=opts,
    converter=converter,
)

# trainer
trigger_log = (config.train.log_iteration, 'iteration')
trigger_snapshot = (config.train.snapshot_iteration, 'iteration')

trainer = training.Trainer(updater, out=arguments.output)

ext = extensions.Evaluator(test_iter, models, converter, device=config.train.gpu, eval_func=updater.forward)
trainer.extend(ext, name='test', trigger=trigger_log)
ext = extensions.Evaluator(train_eval_iter, models, converter, device=config.train.gpu, eval_func=updater.forward)
trainer.extend(ext, name='train', trigger=trigger_log)

trainer.extend(extensions.dump_graph('predictor/loss'))

ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz')
trainer.extend(ext, trigger=trigger_snapshot)

trainer.extend(extensions.LogReport(trigger=trigger_log))
trainer.extend(extensions.PrintReport(['predictor/loss']))

save_args(arguments, arguments.output)
trainer.run()
Example #42
0
def main(config):
    opts = config()

    comm = chainermn.create_communicator(opts.communicator)
    device = comm.intra_rank

    backborn_cfg = opts.backborn_cfg

    df = pd.read_csv(opts.path_data + opts.train_df).sample(frac=1)

    ################### pseudo labeling #########################
    if opts.pseudo_labeling_path is not None:
        test_df = pd.read_csv(opts.path_data + opts.test_df)
        labels = np.load(opts.pseudo_labeling_path, allow_pickle=False)
        labels = np.concatenate((labels, labels))
        count = 0
        valid_array = []
        valid_sirna = []
        for i, label in enumerate(labels):
            if label.max() > 0.0013:
                count = count + 1
                valid_array.append(i)
                valid_sirna.append(label.argmax())
        print(count)
        pseudo_df = test_df.iloc[valid_array, :]
        pseudo_df["sirna"] = valid_sirna
        pseudo_df = pseudo_df
        df = pd.concat([df, pseudo_df]).sample(frac=1)
    ################### pseudo labeling #########################

    for i, (train_df, valid_df) in enumerate(
            stratified_groups_kfold(df,
                                    target=opts.fold_target,
                                    n_splits=opts.fold)):
        if comm.rank == 0:
            train_df.to_csv(
                opts.path_data + 'train' + '_fold' + str(i) + '.csv',
                columns=[
                    'id_code', 'experiment', 'plate', 'well', 'sirna',
                    'filename', 'cell', 'site'
                ])
            valid_df.to_csv(
                opts.path_data + 'valid' + '_fold' + str(i) + '.csv',
                columns=[
                    'id_code', 'experiment', 'plate', 'well', 'sirna',
                    'filename', 'cell', 'site'
                ])
            print("Save a csvfile of fold_" + str(i))
        dataset = opts.dataset
        train_dataset = dataset(train_df, opts.path_data)
        val_dataset = dataset(valid_df, opts.path_data)

        backborn = chcv2_get_model(
            backborn_cfg['name'],
            pretrained=backborn_cfg['pretrain'],
            in_size=opts.input_shape)[backborn_cfg['layer']]

        model = opts.model(backborn=backborn).copy(mode='init')
        if device >= 0:
            chainer.cuda.get_device(device).use()
            model.to_gpu()

        mean = opts.mean

        train_data = TransformDataset(train_dataset, opts.train_transform)
        val_data = TransformDataset(val_dataset, opts.valid_trainsform)

        if comm.rank == 0:
            train_indices = train_data
            val_indices = val_data
        else:
            train_indices = None
            val_indices = None

        train_data = chainermn.scatter_dataset(train_indices,
                                               comm,
                                               shuffle=True)
        val_data = chainermn.scatter_dataset(val_indices, comm, shuffle=False)
        train_iter = chainer.iterators.MultiprocessIterator(
            train_data,
            opts.batchsize,
            shuffle=True,
            n_processes=opts.loaderjob)
        val_iter = chainer.iterators.MultiprocessIterator(
            val_data,
            opts.batchsize,
            repeat=False,
            shuffle=False,
            n_processes=opts.loaderjob)
        print('finished loading dataset')

        if device >= 0:
            chainer.cuda.get_device(device).use()
            model.to_gpu()
        if opts.optimizer == "CorrectedMomentumSGD":
            optimizer = chainermn.create_multi_node_optimizer(
                CorrectedMomentumSGD(lr=opts.lr), comm)
        elif opts.optimizer == "NesterovAG":
            optimizer = chainermn.create_multi_node_optimizer(
                NesterovAG(lr=opts.lr), comm)
        else:
            optimizer = chainermn.create_multi_node_optimizer(
                Adam(alpha=opts.alpha,
                     weight_decay_rate=opts.weight_decay,
                     adabound=True,
                     final_lr=0.5), comm)

        optimizer.setup(model)
        if opts.optimizer == "CorrectedMomentumSGD":
            for param in model.params():
                if param.name not in ('beta', 'gamma'):
                    param.update_rule.add_hook(WeightDecay(opts.weight_decay))

        if opts.fc_lossfun == 'softmax_cross_entropy':
            fc_lossfun = F.softmax_cross_entropy
        elif opts.fc_lossfun == 'focal_loss':
            if opts.ls:
                focal_loss = FocalLoss(label_smoothing=True)
            else:
                focal_loss = FocalLoss()
            fc_lossfun = focal_loss.loss
        elif opts.fc_lossfun == 'auto_focal_loss':
            if opts.ls:
                focal_loss = AutoFocalLoss(label_smoothing=True)
            else:
                focal_loss = AutoFocalLoss()
            fc_lossfun = focal_loss.loss
        elif opts.fc_lossfun == 'auto_focal_loss_bce':
            if opts.ls:
                focal_loss = AutoFocalLossBCE(label_smoothing=True)
            else:
                focal_loss = AutoFocalLoss()
            fc_lossfun = focal_loss.loss
        if opts.metric_lossfun == 'arcface':
            arcface = ArcFace()
            metric_lossfun = arcface.loss
        elif opts.metric_lossfun == 'adacos':
            adacos = AdaCos()
            metric_lossfun = adacos.loss

        updater = opts.updater(train_iter,
                               optimizer,
                               model,
                               device=device,
                               max_epoch=opts.max_epoch,
                               fix_sche=opts.fix_sche,
                               metric_lossfun=metric_lossfun,
                               fc_lossfun=fc_lossfun,
                               metric_w=opts.metric_w,
                               fc_w=opts.fc_w)
        evaluator = chainermn.create_multi_node_evaluator(
            opts.evaluator(val_iter,
                           model,
                           device=device,
                           max_epoch=opts.max_epoch,
                           fix_sche=opts.fix_sche,
                           metric_lossfun=metric_lossfun,
                           fc_lossfun=fc_lossfun,
                           metric_w=opts.metric_w,
                           fc_w=opts.fc_w), comm)

        trainer = training.Trainer(updater, (opts.max_epoch, 'epoch'),
                                   out=opts.out + '_fold' + str(i))

        if opts.optimizer == "CorrectedMomentumSGD":
            trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))
        elif opts.optimizer == "NesterovAG":
            trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))
        else:
            trainer.extend(extensions.ExponentialShift('alpha', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))

        trainer.extend(evaluator, trigger=(int(opts.max_epoch / 10), 'epoch'))
        #         trainer.extend(evaluator, trigger=(int(1), 'epoch'))
        log_interval = 0.1, 'epoch'
        print_interval = 0.1, 'epoch'

        if comm.rank == 0:
            trainer.extend(chainer.training.extensions.observe_lr(),
                           trigger=log_interval)
            trainer.extend(extensions.snapshot_object(
                model, 'snapshot_model' + '_{.updater.epoch}.npz'),
                           trigger=(opts.max_epoch / 10, 'epoch'))
            trainer.extend(extensions.snapshot_object(
                model, 'snapshot_model_f1max.npz'),
                           trigger=chainer.training.triggers.MaxValueTrigger(
                               'validation/main/accuracy',
                               trigger=(opts.max_epoch / 10, 'epoch')))
            trainer.extend(extensions.LogReport(trigger=log_interval))
            trainer.extend(extensions.PrintReport([
                'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
                'main/face_loss', 'main/ce_loss', 'main/accuracy',
                'validation/main/loss', 'validation/main/face_loss',
                'validation/main/ce_loss', 'validation/main/accuracy'
            ]),
                           trigger=print_interval)
            trainer.extend(extensions.ProgressBar(update_interval=10))
        trainer.run()
Example #43
0
                                                        device=args.gpu)
    trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'),
                                       out=args.out)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    @make_shift('lr')
    def lr_schedule(trainer):
        max_lr = args.lr
        min_lr = 0

        epoch = trainer.updater.epoch_detail
        progress_ratio = epoch / args.epoch
        rate = 0.5 * (math.cos(math.pi * progress_ratio) + 1)
        return min_lr + max_lr * rate

    trainer.extend(lr_schedule)
    trainer.extend(extensions.LogReport(), trigger=(1, 'epoch'))

    trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch'))
    trainer.extend(extensions.PrintReport([
        'epoch', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy',
        'validation/main/accuracy', 'elapsed_time'
    ]),
                   trigger=(1, 'epoch'))
    trainer.extend(extensions.ProgressBar(update_interval=50))
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.epoch_detail}'),
                   trigger=(50, 'epoch'))

    trainer.run()
def main():
    bbox_label_names = ('loop')

    n_itrs = 70000
    n_step = 50000
    np.random.seed(0)
    train_data = MultiDefectDetectionDataset(split='train')
    test_data = MultiDefectDetectionDataset(split='test')
    proposal_params = {'min_size': 8}
    faster_rcnn = FasterRCNNVGG16(n_fg_class=2,
                                  pretrained_model='imagenet',
                                  ratios=[0.5, 1, 2],
                                  anchor_scales=[0.5, 1, 4, 8, 16],
                                  min_size=1024,
                                  max_size=1024,
                                  proposal_creator_params=proposal_params)
    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    chainer.cuda.get_device_from_id(0).use()
    model.to_gpu()
    optimizer = chainer.optimizers.MomentumSGD(lr=1e-3, momentum=0.8)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    train_data = TransformDataset(train_data, Transform(faster_rcnn))
    train_iter = chainer.iterators.MultiprocessIterator(train_data,
                                                        batch_size=1,
                                                        n_processes=None,
                                                        shared_mem=100000000)
    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 batch_size=1,
                                                 repeat=False,
                                                 shuffle=False)
    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       device=0)
    trainer = training.Trainer(updater, (n_itrs, 'iteration'), out='result')
    trainer.extend(extensions.snapshot_object(
        model.faster_rcnn, 'snapshot_model_{.updater.iteration}.npz'),
                   trigger=(n_itrs / 5, 'iteration'))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(n_step, 'iteration'))
    log_interval = 50, 'iteration'
    plot_interval = 100, 'iteration'
    print_interval = 20, 'iteration'
    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/roi_loc_loss',
        'main/roi_cls_loss',
        'main/rpn_loc_loss',
        'main/rpn_cls_loss',
        'validation/main/map',
    ]),
                   trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=5))
    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(['main/loss'],
                                             file_name='loss.png',
                                             trigger=plot_interval),
                       trigger=plot_interval)
    trainer.extend(
        DetectionVOCEvaluator(test_iter,
                              model.faster_rcnn,
                              use_07_metric=True,
                              label_names=bbox_label_names),
        trigger=ManualScheduleTrigger(
            [100, 500, 1000, 5000, 10000, 20000, 40000, 60000, n_step, n_itrs],
            'iteration'))

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
Example #45
0
def main():
    current_datetime = '{}'.format(datetime.datetime.today())
    parser = argparse.ArgumentParser(
        description='Chainer example: Text Classification')
    parser.add_argument('--batchsize', '-b', type=int, default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=30,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--unit', '-u', type=int, default=300,
                        help='Number of units')
    parser.add_argument('--layer', '-l', type=int, default=1,
                        help='Number of layers of RNN or MLP following CNN')
    parser.add_argument('--dropout', '-d', type=float, default=0.4,
                        help='Dropout rate')
    parser.add_argument('--dataset', '-data', default='imdb.binary',
                        choices=['dbpedia', 'imdb.binary', 'imdb.fine',
                                 'TREC', 'stsa.binary', 'stsa.fine',
                                 'custrev', 'mpqa', 'rt-polarity', 'subj'],
                        help='Name of dataset.')
    parser.add_argument('--model', '-model', default='cnn',
                        choices=['cnn', 'rnn', 'bow'],
                        help='Name of encoder model type.')
    parser.add_argument('--char-based', action='store_true')
    parser.add_argument('--test', dest='test', action='store_true')
    parser.set_defaults(test=False)

    args = parser.parse_args()
    print(json.dumps(args.__dict__, indent=2))

    # Load a dataset
    if args.dataset == 'dbpedia':
        train, test, vocab = text_datasets.get_dbpedia(
            char_based=args.char_based)
    elif args.dataset.startswith('imdb.'):
        train, test, vocab = text_datasets.get_imdb(
            fine_grained=args.dataset.endswith('.fine'),
            char_based=args.char_based)
    elif args.dataset in ['TREC', 'stsa.binary', 'stsa.fine',
                          'custrev', 'mpqa', 'rt-polarity', 'subj']:
        train, test, vocab = text_datasets.get_other_text_dataset(
            args.dataset, char_based=args.char_based)

    if args.test:
        train = train[:100]
        test = test[:100]

    print('# train data: {}'.format(len(train)))
    print('# test  data: {}'.format(len(test)))
    print('# vocab: {}'.format(len(vocab)))
    n_class = len(set([int(d[1]) for d in train]))
    print('# class: {}'.format(n_class))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    # Setup a model
    if args.model == 'rnn':
        Encoder = nets.RNNEncoder
    elif args.model == 'cnn':
        Encoder = nets.CNNEncoder
    elif args.model == 'bow':
        Encoder = nets.BOWMLPEncoder
    encoder = Encoder(n_layers=args.layer, n_vocab=len(vocab),
                      n_units=args.unit, dropout=args.dropout)
    model = nets.TextClassifier(encoder, n_class)
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(1e-4))

    # Set up a trainer
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer,
        converter=convert_seq, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(
        test_iter, model,
        converter=convert_seq, device=args.gpu))

    # Take a best snapshot
    record_trigger = training.triggers.MaxValueTrigger(
        'validation/main/accuracy', (1, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model, 'best_model.npz'),
        trigger=record_trigger)

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    # Save vocabulary and model's setting
    if not os.path.isdir(args.out):
        os.mkdir(args.out)
    vocab_path = os.path.join(args.out, 'vocab.json')
    with open(vocab_path, 'w') as f:
        json.dump(vocab, f)
    model_path = os.path.join(args.out, 'best_model.npz')
    model_setup = args.__dict__
    model_setup['vocab_path'] = vocab_path
    model_setup['model_path'] = model_path
    model_setup['n_class'] = n_class
    model_setup['datetime'] = current_datetime
    with open(os.path.join(args.out, 'args.json'), 'w') as f:
        json.dump(args.__dict__, f)

    # Run the training
    trainer.run()
Example #46
0
 def test_trigger(self):
     target = mock.MagicMock()
     snapshot_object = extensions.snapshot_object(target, 'myfile.dat',
                                                  trigger=self.trigger)
     self.assertEqual(snapshot_object.trigger, self.trigger)
Example #47
0
def train(args):
    '''Run training'''
    # seed setting
    torch.manual_seed(args.seed)

    # debug mode setting
    # 0 would be fastest, but 1 seems to be reasonable
    # by considering reproducability
    # revmoe type check
    if args.debugmode < 2:
        chainer.config.type_check = False
        logging.info('torch type check is disabled')
    # use determinisitic computation or not
    if args.debugmode < 1:
        torch.backends.cudnn.deterministic = False
        logging.info('torch cudnn deterministic is disabled')
    else:
        torch.backends.cudnn.deterministic = True

    # check cuda availability
    if not torch.cuda.is_available():
        logging.warning('cuda is not available')

    # get input and output dimension info
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']
    utts = list(valid_json.keys())
    idim = int(valid_json[utts[0]]['input'][0]['shape'][1])
    odim = int(valid_json[utts[0]]['output'][0]['shape'][1])
    logging.info('#input dims : ' + str(idim))
    logging.info('#output dims: ' + str(odim))
    odim_adv = int(valid_json[utts[0]]['output'][1]['shape'][1])
    logging.info('#output dims adversarial: ' + str(odim_adv))

    # specify model architecture
    e2e = E2E(idim, odim_adv, args)
    model = Loss(e2e)

    # write model config
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    model_conf = args.outdir + '/model.json'
    with open(model_conf, 'wb') as f:
        logging.info('writing a model config file to ' + model_conf)
        f.write(
            json.dumps((idim, odim, odim_adv, vars(args)),
                       indent=4,
                       sort_keys=True).encode('utf_8'))
    for key in sorted(vars(args).keys()):
        logging.info('ARGS: ' + key + ': ' + str(vars(args)[key]))

    # Log total number of parameters
    pytorch_total_params = sum(p.numel() for p in e2e.parameters())
    logging.info("Total parameters in e2e: " + str(pytorch_total_params))

    reporter = model.reporter

    # check the use of multi-gpu
    if args.ngpu > 1:
        model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu)))
        logging.info('batch size is automatically increased (%d -> %d)' %
                     (args.batch_size, args.batch_size * args.ngpu))
        args.batch_size *= args.ngpu

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    model = model.to(device)

    # Setup an optimizer
    if args.opt == 'adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(),
                                         rho=0.95,
                                         eps=args.eps)
    elif args.opt == 'adam':
        optimizer = torch.optim.Adam(model.parameters())

    # FIXME: TOO DIRTY HACK
    setattr(optimizer, "target", reporter)
    setattr(optimizer, "serialize", lambda s: reporter.serialize(s))

    # Setup a converter
    converter = CustomConverter()

    # read json data
    with open(args.train_json, 'rb') as f:
        train_json = json.load(f)['utts']
    with open(args.valid_json, 'rb') as f:
        valid_json = json.load(f)['utts']

    # make minibatch list (variable length)
    train = make_batchset(train_json,
                          args.batch_size,
                          args.maxlen_in,
                          args.maxlen_out,
                          args.minibatches,
                          min_batch_size=args.ngpu if args.ngpu > 1 else 1)
    valid = make_batchset(valid_json,
                          args.batch_size,
                          args.maxlen_in,
                          args.maxlen_out,
                          args.minibatches,
                          min_batch_size=args.ngpu if args.ngpu > 1 else 1)
    # hack to make batchsze argument as 1
    # actual bathsize is included in a list
    if args.n_iter_processes > 0:
        train_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(train, converter.transform),
            batch_size=1,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
        valid_iter = chainer.iterators.MultiprocessIterator(
            TransformDataset(valid, converter.transform),
            batch_size=1,
            repeat=False,
            shuffle=False,
            n_processes=args.n_iter_processes,
            n_prefetch=8,
            maxtasksperchild=20)
    else:
        train_iter = chainer.iterators.SerialIterator(TransformDataset(
            train, converter.transform),
                                                      batch_size=1)
        valid_iter = chainer.iterators.SerialIterator(TransformDataset(
            valid, converter.transform),
                                                      batch_size=1,
                                                      repeat=False,
                                                      shuffle=False)

    # Set up a trainer
    updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer,
                            converter, device, args.ngpu)
    trainer = training.Trainer(updater, (args.epochs, 'epoch'),
                               out=args.outdir)

    # Resume from a snapshot
    if args.resume:
        logging.info('resumed from %s' % args.resume)
        torch_resume(args.resume, trainer, weight_sharing=args.weight_sharing)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(
        CustomEvaluator(model, valid_iter, reporter, converter, device))

    # Make a plot for training and validation values
    trainer.extend(
        extensions.PlotReport(['main/loss_adv', 'validation/main/loss_adv'],
                              'epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/acc_adv', 'validation/main/acc_adv'],
                              'epoch',
                              file_name='acc.png'))

    # Save best models
    trainer.extend(
        extensions.snapshot_object(model,
                                   'model.loss.best',
                                   savefun=torch_save),
        trigger=training.triggers.MinValueTrigger('validation/main/loss_adv'))
    trainer.extend(
        extensions.snapshot_object(model, 'model.acc.best',
                                   savefun=torch_save),
        trigger=training.triggers.MaxValueTrigger('validation/main/acc_adv'))

    # save snapshot which contains model and optimizer states
    trainer.extend(torch_snapshot(), trigger=(1, 'epoch'))

    # epsilon decay in the optimizer
    if args.opt == 'adadelta':
        if args.criterion == 'acc':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.acc.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/acc_adv', lambda best_value,
                               current_value: best_value > current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/acc_adv', lambda best_value,
                               current_value: best_value > current_value))
        elif args.criterion == 'loss':
            trainer.extend(restore_snapshot(model,
                                            args.outdir + '/model.loss.best',
                                            load_fn=torch_load),
                           trigger=CompareValueTrigger(
                               'validation/main/loss_adv', lambda best_value,
                               current_value: best_value < current_value))
            trainer.extend(adadelta_eps_decay(args.eps_decay),
                           trigger=CompareValueTrigger(
                               'validation/main/loss_adv', lambda best_value,
                               current_value: best_value < current_value))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL,
                                                 'iteration')))
    report_keys = ['epoch', 'iteration', 'elapsed_time']
    if args.opt == 'adadelta':
        trainer.extend(extensions.observe_value(
            'eps', lambda trainer: trainer.updater.get_optimizer('main').
            param_groups[0]["eps"]),
                       trigger=(REPORT_INTERVAL, 'iteration'))
        report_keys.append('eps')
    report_keys.extend([
        'main/loss_adv', 'main/acc_adv', 'validation/main/loss_adv',
        'validation/main/acc_adv'
    ])
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(REPORT_INTERVAL, 'iteration'))

    trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL))

    # Run the training
    trainer.run()
Example #48
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--out', type=str, default='result',
                        help='Output directory')
    parser.add_argument('--mscoco-root', type=str, default='data',
                        help='MSOCO dataset root directory')
    parser.add_argument('--max-iters', type=int, default=50000,
                        help='Maximum number of iterations to train')
    parser.add_argument('--batch-size', type=int, default=128,
                        help='Minibatch size')
    parser.add_argument('--dropout-ratio', type=float, default=0.5,
                        help='Language model dropout ratio')
    parser.add_argument('--val-keep-quantity', type=int, default=100,
                        help='Keep every N-th validation image')
    parser.add_argument('--val-iter', type=int, default=100,
                        help='Run validation every N-th iteration')
    parser.add_argument('--log-iter', type=int, default=1,
                        help='Log every N-th iteration')
    parser.add_argument('--snapshot-iter', type=int, default=1000,
                        help='Model snapshot every N-th iteration')
    parser.add_argument('--rnn', type=str, default='nsteplstm',
                        choices=['nsteplstm', 'lstm'],
                        help='Language model layer type')
    parser.add_argument('--gpu', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--max-caption-length', type=int, default=30,
                        help='Maxium caption length when using LSTM layer')
    args = parser.parse_args()

    # Load the MSCOCO dataset. Assumes that the dataset has been downloaded
    # already using e.g. the `download.py` script
    train, val = datasets.get_mscoco(args.mscoco_root)

    # Validation samples are used to address overfitting and see how well your
    # model generalizes to yet unseen data. However, since the number of these
    # samples in MSCOCO is quite large (~200k) and thus require time to
    # evaluate, you may choose to use only a fraction of the available samples
    val = val[::args.val_keep_quantity]

    # Number of unique words that are found in the dataset
    vocab_size = len(train.vocab)

    # Instantiate the model to be trained either with LSTM layers or with
    # NStepLSTM layers
    model = ImageCaptionModel(
        vocab_size, dropout_ratio=args.dropout_ratio, rnn=args.rnn)

    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    def transform(in_data):
        # Called for each sample and applies necessary preprocessing to the
        # image such as resizing and normalizing
        img, caption = in_data
        img = model.prepare(img)
        return img, caption

    # We need to preprocess the images since their sizes may vary (and the
    # model requires that they have the exact same fixed size)
    train = TransformDataset(train, transform)
    val = TransformDataset(val, transform)

    train_iter = iterators.MultiprocessIterator(
        train, args.batch_size, shared_mem=700000)
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.batch_size, repeat=False, shuffle=False, shared_mem=700000)

    optimizer = optimizers.Adam()
    optimizer.setup(model)

    def converter(batch, device):
        # The converted receives a batch of input samples any may modify it if
        # necessary. In our case, we need to align the captions depending on if
        # we are using LSTM layers of NStepLSTM layers in the model.
        if args.rnn == 'lstm':
            max_caption_length = args.max_caption_length
        elif args.rnn == 'nsteplstm':
            max_caption_length = None
        else:
            raise ValueError('Invalid RNN type.')
        return datasets.converter(
            batch, device, max_caption_length=max_caption_length)

    updater = training.updater.StandardUpdater(
        train_iter, optimizer=optimizer, device=args.gpu, converter=converter)

    trainer = training.Trainer(
        updater, out=args.out, stop_trigger=(args.max_iters, 'iteration'))
    trainer.extend(
        extensions.Evaluator(
            val_iter,
            target=model,
            converter=converter,
            device=args.gpu
        ),
        trigger=(args.val_iter, 'iteration')
    )
    trainer.extend(
        extensions.LogReport(
            ['main/loss', 'validation/main/loss'],
            trigger=(args.log_iter, 'iteration')
        )
    )
    trainer.extend(
        extensions.PlotReport(
            ['main/loss', 'validation/main/loss'],
            trigger=(args.log_iter, 'iteration')
        )
    )
    trainer.extend(
        extensions.PrintReport(
            ['elapsed_time', 'epoch', 'iteration', 'main/loss',
             'validation/main/loss']
        ),
        trigger=(args.log_iter, 'iteration')
    )

    # Save model snapshots so that later on, we can load them and generate new
    # captions for any image. This can be done in the `predict.py` script
    trainer.extend(
        extensions.snapshot_object(model, 'model_{.updater.iteration}'),
        trigger=(args.snapshot_iter, 'iteration')
    )
    trainer.extend(extensions.ProgressBar())
    trainer.run()
Example #49
0
def main():
    parser = argparse.ArgumentParser(description='Train script')
    parser.add_argument('dataset_directory')
    parser.add_argument('--resize', type=int, default=32)
    parser.add_argument('--batchsize', '-b', type=int, default=16)
    parser.add_argument('--max_iter', '-m', type=int, default=4000000)
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default="result",
                        help='Directory to output the result')
    parser.add_argument('--snapshot_interval',
                        type=int,
                        default=5000,
                        help='Interval of snapshot')
    parser.add_argument('--evaluation_interval',
                        type=int,
                        default=50000,
                        help='Interval of evaluation')
    parser.add_argument('--out_image_interval',
                        type=int,
                        default=5000,
                        help='Interval of evaluation')
    parser.add_argument('--stage_interval',
                        type=int,
                        default=400000,
                        help='Interval of stage progress')
    parser.add_argument('--display_interval',
                        type=int,
                        default=100,
                        help='Interval of displaying log to console')
    parser.add_argument(
        '--n_dis',
        type=int,
        default=1,
        help='number of discriminator update per generator update')
    parser.add_argument('--lam',
                        type=float,
                        default=10,
                        help='gradient penalty')
    parser.add_argument('--gamma',
                        type=float,
                        default=750,
                        help='gradient penalty')
    parser.add_argument('--pooling_comp',
                        type=float,
                        default=1.0,
                        help='compensation')
    parser.add_argument('--pretrained_generator', type=str, default="")
    parser.add_argument('--pretrained_discriminator', type=str, default="")
    parser.add_argument('--initial_stage', type=float, default=0.0)
    parser.add_argument('--generator_smoothing', type=float, default=0.999)
    args = parser.parse_args()

    result_directory_name = "_".join([
        "resize{}".format(args.resize),
        "stage{}".format(args.initial_stage),
        "batch{}".format(args.batchsize),
        "stginterval{}".format(args.stage_interval),
        str(int(time.time())),
    ])
    result_directory = os.path.join(args.out, result_directory_name)

    record_setting(result_directory)
    check_chainer_version()

    report_keys = [
        "stage", "loss_dis", "loss_gp", "loss_gen", "g", "inception_mean",
        "inception_std", "FID"
    ]
    max_iter = args.max_iter

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()

    if args.resize == 32:
        channel_evolution = (512, 512, 512, 256)
    elif args.resize == 128:
        channel_evolution = (512, 512, 512, 512, 256, 128)
    elif args.resize == 256:
        channel_evolution = (512, 512, 512, 512, 256, 128, 64
                             )  # too much memory
        # channel_evolution = (512, 512, 512, 256, 128, 64, 32)
    elif args.resize == 512:
        channel_evolution = (512, 512, 512, 512, 256, 128, 64, 32)
    elif args.resize == 1024:
        channel_evolution = (512, 512, 512, 512, 256, 128, 64, 32, 16)
    else:
        raise Exception()

    # generator = Generator()
    # generator_smooth = Generator()
    generator = chainer_progressive_gan.models.progressive_generator.ProgressiveGenerator(
        channel_evolution=channel_evolution)
    generator_smooth = chainer_progressive_gan.models.progressive_generator.ProgressiveGenerator(
        channel_evolution=channel_evolution)
    # discriminator = Discriminator(pooling_comp=args.pooling_comp)
    discriminator = chainer_progressive_gan.models.progressive_discriminator.ProgressiveDiscriminator(
        pooling_comp=args.pooling_comp, channel_evolution=channel_evolution)

    # select GPU
    if args.gpu >= 0:
        generator.to_gpu()
        generator_smooth.to_gpu()
        discriminator.to_gpu()
        print("use gpu {}".format(args.gpu))

    if args.pretrained_generator != "":
        chainer.serializers.load_npz(args.pretrained_generator, generator)
    if args.pretrained_discriminator != "":
        chainer.serializers.load_npz(args.pretrained_discriminator,
                                     discriminator)
    copy_param(generator_smooth, generator)

    opt_gen = make_optimizer(generator)
    opt_dis = make_optimizer(discriminator)

    if args.dataset_directory == 'cifar10':
        import chainer_gan_lib.common.dataset
        train_dataset = chainer_gan_lib.common.dataset.Cifar10Dataset()
    else:
        dataset_pathes = list(glob.glob("{}/*".format(args.dataset_directory)))
        print("use {} files".format(len(dataset_pathes)))
        train_dataset = datasets.ResizedImageDataset(dataset_pathes,
                                                     resize=(args.resize,
                                                             args.resize))
    train_iter = chainer.iterators.SerialIterator(train_dataset,
                                                  args.batchsize)

    # Set up a trainer
    updater = progressive_updater.ProgressiveUpdater(
        resolution=args.resize,
        models=(generator, discriminator, generator_smooth),
        iterator={'main': train_iter},
        optimizer={
            'opt_gen': opt_gen,
            'opt_dis': opt_dis
        },
        device=args.gpu,
        n_dis=args.n_dis,
        lam=args.lam,
        gamma=args.gamma,
        smoothing=args.generator_smoothing,
        initial_stage=args.initial_stage,
        stage_interval=args.stage_interval)
    trainer = training.Trainer(updater, (max_iter, 'iteration'),
                               out=result_directory)
    trainer.extend(extensions.snapshot_object(
        generator, 'generator_{.updater.iteration}.npz'),
                   trigger=(args.snapshot_interval, 'iteration'))
    trainer.extend(extensions.snapshot_object(
        generator_smooth, 'generator_smooth_{.updater.iteration}.npz'),
                   trigger=(args.snapshot_interval, 'iteration'))
    trainer.extend(extensions.snapshot_object(
        discriminator, 'discriminator_{.updater.iteration}.npz'),
                   trigger=(args.snapshot_interval, 'iteration'))

    trainer.extend(
        extensions.LogReport(keys=report_keys,
                             trigger=(args.display_interval, 'iteration')))
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(args.display_interval, 'iteration'))
    trainer.extend(sample_generate(generator_smooth, result_directory),
                   trigger=(args.out_image_interval, 'iteration'),
                   priority=extension.PRIORITY_WRITER)
    trainer.extend(sample_generate_light(generator_smooth, result_directory),
                   trigger=(args.evaluation_interval // 10, 'iteration'),
                   priority=extension.PRIORITY_WRITER)
    # trainer.extend(calc_inception(generator_smooth), trigger=(args.evaluation_interval, 'iteration'),
    #                priority=extension.PRIORITY_WRITER)
    # trainer.extend(calc_FID(generator_smooth), trigger=(args.evaluation_interval, 'iteration'),
    #                priority=extension.PRIORITY_WRITER)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    # Run the training
    trainer.run()
Example #50
0
def main():
    archs = {
        'alex': alex.Alex,
        'alex_fp16': alex.AlexFp16,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16,
        'nin': nin.NIN,
        'resnet50': resnet50.ResNet50,
        'resnext50': resnet50.ResNeXt50,
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin',
                        help='Convnet architecture')
    parser.add_argument('--batchsize', '-B', type=int, default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch', '-E', type=int, default=10,
                        help='Number of epochs to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob', '-j', type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean', '-m', default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume', '-r', default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--root', '-R', default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize', '-b', type=int, default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # Initialize the model to train
    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from {}'.format(args.initmodel))
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(
            args.gpu).use()  # Make the GPU current
        model.to_gpu()

    # Load the datasets and mean file
    mean = np.load(args.mean)
    train = PreprocessedDataset(args.train, args.root, mean, model.insize)
    val = PreprocessedDataset(args.val, args.root, mean, model.insize, False)
    # These iterators load the images with subprocesses running in parallel to
    # the training/validation.
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, n_processes=args.loaderjob)
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)

    # Set up an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    # Set up a trainer
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    val_interval = (1 if args.test else 100000), 'iteration'
    log_interval = (1 if args.test else 1000), 'iteration'

    trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu),
                   trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Example #51
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: DCGAN')
    parser.add_argument('--batchsize', '-b', type=int, default=50,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=1000,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--device', '-d', type=str, default='-1',
                        help='Device specifier. Either ChainerX device '
                        'specifier or an integer. If non-negative integer, '
                        'CuPy arrays with specified device id are used. If '
                        'negative integer, NumPy arrays are used')
    parser.add_argument('--dataset', '-i', default='',
                        help='Directory of image files.  Default is cifar-10.')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', type=str,
                        help='Resume the training from snapshot')
    parser.add_argument('--n_hidden', '-n', type=int, default=100,
                        help='Number of hidden units (z)')
    parser.add_argument('--seed', type=int, default=0,
                        help='Random seed of z at visualization stage')
    parser.add_argument('--snapshot_interval', type=int, default=1000,
                        help='Interval of snapshot')
    parser.add_argument('--display_interval', type=int, default=100,
                        help='Interval of displaying log to console')
    group = parser.add_argument_group('deprecated arguments')
    group.add_argument('--gpu', '-g', dest='device',
                       type=int, nargs='?', const=0,
                       help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    device = chainer.get_device(args.device)
    device.use()

    print('Device: {}'.format(device))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# n_hidden: {}'.format(args.n_hidden))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    gen = Generator(n_hidden=args.n_hidden)
    dis = Discriminator()

    gen.to_device(device)  # Copy the model to the device
    dis.to_device(device)

    # Setup an optimizer
    def make_optimizer(model, alpha=0.0002, beta1=0.5):
        optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1)
        optimizer.setup(model)
        optimizer.add_hook(
            chainer.optimizer_hooks.WeightDecay(0.0001), 'hook_dec')
        return optimizer

    opt_gen = make_optimizer(gen)
    opt_dis = make_optimizer(dis)

    if args.dataset == '':
        # Load the CIFAR10 dataset if args.dataset is not specified
        train, _ = chainer.datasets.get_cifar10(withlabel=False, scale=255.)
    else:
        all_files = os.listdir(args.dataset)
        image_files = [f for f in all_files if ('png' in f or 'jpg' in f)]
        print('{} contains {} image files'
              .format(args.dataset, len(image_files)))
        train = chainer.datasets\
            .ImageDataset(paths=image_files, root=args.dataset)

    # Setup an iterator
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)

    # Setup an updater
    updater = DCGANUpdater(
        models=(gen, dis),
        iterator=train_iter,
        optimizer={
            'gen': opt_gen, 'dis': opt_dis},
        device=device)

    # Setup a trainer
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    snapshot_interval = (args.snapshot_interval, 'iteration')
    display_interval = (args.display_interval, 'iteration')
    trainer.extend(
        extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'),
        trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        gen, 'gen_iter_{.updater.iteration}.npz'), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        dis, 'dis_iter_{.updater.iteration}.npz'), trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=display_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'gen/loss', 'dis/loss',
    ]), trigger=display_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(
        out_generated_image(
            gen, dis,
            10, 10, args.seed, args.out),
        trigger=snapshot_interval)

    if args.resume is not None:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
def train(args):
    san_check = args.san_check
    epoch = args.epoch
    lr = args.lr
    b_size = args.b_size
    device = args.device
    w_decay = args.w_decay
    image_net = args.image_net
    phrase_net = args.phrase_net
    preload = args.preload
    wo_image = (image_net is None)

    out_base = 'checkpoint/'
    time_stamp = dt.now().strftime("%Y%m%d-%H%M%S")
    saveto = out_base + '{}{}-{}_{}/'.format(
        'sc_' * san_check,
        phrase_net,
        image_net,
        time_stamp)
    os.makedirs(saveto)
    json.dump(vars(args), open(saveto+'settings.json', 'w'))

    print('setup dataset...')
    train, conv_f = get_dataset(phrase_net, image_net=image_net, split='train', preload=preload, san_check=args.san_check)
    val, _ = get_dataset(phrase_net, image_net=image_net, split='val', skip=10*4, preload=preload, san_check=args.san_check)

    train_iter = SampleManager(train, b_size, p_batch_ratio=.15)
    val_iter = SerialIterator(val, b_size, shuffle=False, repeat=False)

    print('setup a model ...')
    chainer.cuda.get_device_from_id(device).use()
    model = setup_model(phrase_net, image_net)
    model.to_gpu()

    optimizer = chainer.optimizers.Adam(alpha=lr)
    optimizer.setup(model)

    if hasattr(model, 'vis_cnn'):
        model.vis_cnn.disable_update() # This line protects vgg paramters from weight decay.

    if w_decay:
        optimizer.add_hook(
            chainer.optimizer.WeightDecay(w_decay), 'hook_dec')

    updater = training.StandardUpdater(train_iter, optimizer,converter=conv_f, device=device)
    trainer = training.Trainer(updater, (epoch, 'epoch'), saveto)

    val_interval = (1, 'epoch') if san_check else (1000, 'iteration')
    log_interval = (1, 'iteration') if san_check else (10, 'iteration')
    plot_interval = (1, 'iteration') if san_check else (10, 'iteration')
    dataset_interval = (1, 'iteration') if san_check else (1000, 'iteration')

    trainer.extend(extensions.Evaluator(val_iter, model, converter=conv_f, device=device),
                trigger=val_interval)
    
    if not san_check:
        trainer.extend(extensions.ExponentialShift(
            'alpha', 0.5), trigger=(1, 'epoch'))
    
    # # Comment out to enable visualization of a computational graph.
    # trainer.extend(extensions.dump_graph('main/loss'))

    if not san_check:
        ## Comment out next line to save a checkpoint at each epoch, which enable you to restart training loop from the saved point. Note that saving a checkpoint may cost a few minutes.
        trainer.extend(extensions.snapshot(), trigger=(1, 'epoch'))
        trainer.extend(extensions.snapshot_object(model, 'model_{.updater.iteration}'), trigger=val_interval)
        trainer.extend(extensions.snapshot_object(
            model, 'model'), trigger=training.triggers.MaxValueTrigger('validation/main/f1', trigger=val_interval))
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/f1', 'validation/main/f1', 'pr', 'lr'
    ]), trigger=log_interval)
    trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], file_name='loss.png', trigger=plot_interval))
    trainer.extend(extensions.PlotReport(['main/f1', 'validation/main/f1'], file_name='f1.png', trigger=plot_interval))
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    print('start training')
    trainer.run()

    chainer.serializers.save_npz(saveto+'final_model', model)
def main():
    parser = argparse.ArgumentParser(description='chainer implementation of pix2pix')
    parser.add_argument('--batchsize', '-b', type=int, default=1,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=200,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--dataset', '-i', default='./facade/base',
                        help='Directory of image files.')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--seed', type=int, default=0,
                        help='Random seed')
    parser.add_argument('--snapshot_interval', type=int, default=1000,
                        help='Interval of snapshot')
    parser.add_argument('--display_interval', type=int, default=100,
                        help='Interval of displaying log to console')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    enc = Encoder(in_ch=12)
    dec = Decoder(out_ch=3)
    dis = Discriminator(in_ch=12, out_ch=3)
    
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        enc.to_gpu()  # Copy the model to the GPU
        dec.to_gpu()
        dis.to_gpu()

    # Setup an optimizer
    def make_optimizer(model, alpha=0.0002, beta1=0.5):
        optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1)
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.WeightDecay(0.00001), 'hook_dec')
        return optimizer
    opt_enc = make_optimizer(enc)
    opt_dec = make_optimizer(dec)
    opt_dis = make_optimizer(dis)

    train_d = FacadeDataset(args.dataset, data_range=(1,300))
    test_d = FacadeDataset(args.dataset, data_range=(300,379))
    #train_iter = chainer.iterators.MultiprocessIterator(train_d, args.batchsize, n_processes=4)
    #test_iter = chainer.iterators.MultiprocessIterator(test_d, args.batchsize, n_processes=4)
    train_iter = chainer.iterators.SerialIterator(train_d, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test_d, args.batchsize)

    # Set up a trainer
    updater = FacadeUpdater(
        models=(enc, dec, dis),
        iterator={
            'main': train_iter,
            'test': test_iter},
        optimizer={
            'enc': opt_enc, 'dec': opt_dec, 
            'dis': opt_dis},
        device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    snapshot_interval = (args.snapshot_interval, 'iteration')
    display_interval = (args.display_interval, 'iteration')
    trainer.extend(extensions.snapshot(
        filename='snapshot_iter_{.updater.iteration}.npz'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        enc, 'enc_iter_{.updater.iteration}.npz'), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        dec, 'dec_iter_{.updater.iteration}.npz'), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        dis, 'dis_iter_{.updater.iteration}.npz'), trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=display_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'enc/loss', 'dec/loss', 'dis/loss',
    ]), trigger=display_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(
        out_image(
            updater, enc, dec,
            5, 5, args.seed, args.out),
        trigger=snapshot_interval)

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()
Example #54
0
def main():
    parser = argparse.ArgumentParser(
        description='chainer line drawing colorization')
    parser.add_argument('--batchsize', '-b', type=int, default=16,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=500,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--dataset', '-i', default='./images/',
                        help='Directory of image files.')
    parser.add_argument('--out', '-o', default='./result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--seed', type=int, default=0,
                        help='Random seed')
    parser.add_argument('--snapshot_interval', type=int, default=1000,
                        help='Interval of snapshot')
    parser.add_argument('--display_interval', type=int, default=10,
                        help='Interval of displaying log to console')
    parser.add_argument('--test_visual_interval', type=int, default=1000,
                        help='Interval of drawing test images')
    parser.add_argument('--test_out', default='./test_result/',
                        help='DIrectory to output test samples')
    parser.add_argument('--test_image_path', default='./test_samples/test_sample3/',
                        help='Directory of image files for testing')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    root = args.dataset

    gen = generator.GEN()
    #serializers.load_npz("result_cnn/gen_iter_2000", gen)
    #print('generator loaded')

    dataset = Rough2LineDatasetNote(
        "dat/paired_dataset.dat", root + "rough/", root + "line/", root + "note", train=True,size = 328)

    train_iter = chainer.iterators.SerialIterator(dataset, args.batchsize)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        gen.to_gpu()  # Copy the model to the GPU

    # Setup optimizer parameters.
    opt = optimizers.Adam(alpha=0.0001)
    opt.setup(gen)
    opt.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_gen')


    # Set up a trainer
    updater = cnnUpdater(
        models=(gen),
        iterator={
            'main': train_iter,
            #'test': test_iter
        },
        optimizer={
            'gen': opt,},
        device=args.gpu)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    snapshot_interval = (args.snapshot_interval, 'iteration')
    snapshot_interval2 = (args.snapshot_interval * 2, 'iteration')
    trainer.extend(extensions.dump_graph('gen/loss'))
    trainer.extend(extensions.snapshot(), trigger=snapshot_interval2)
    trainer.extend(extensions.snapshot_object(
        gen, 'gen_iter_{.updater.iteration}'), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        opt, 'optimizer_'), trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=(10, 'iteration'), ))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'gen/loss', 'gen/loss_L']))
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(test_samples_simplification(updater, gen, args.test_out, args.test_image_path),
                   trigger=(args.test_visual_interval, 'iteration'))

    trainer.run()

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Save the trained model
    chainer.serializers.save_npz(os.path.join(out, 'model_final'), gen)
    chainer.serializers.save_npz(os.path.join(out, 'optimizer_final'), opt)
Example #55
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize', '-b', type=int, default=20,
                        help='Number of examples in each mini batch')
    parser.add_argument('--bproplen', '-l', type=int, default=35,
                        help='Number of words in each mini batch '
                             '(= length of truncated BPTT)')
    parser.add_argument('--epoch', '-e', type=int, default=39,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--gradclip', '-c', type=float, default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.set_defaults(test=False)
    parser.add_argument('--unit', '-u', type=int, default=650,
                        help='Number of LSTM units in each layer')
    args = parser.parse_args()

    # Load the Penn Tree Bank long word sequence dataset
    train, val, test = chainer.datasets.get_ptb_words()
    n_vocab = max(train) + 1  # train is just an array of integers
    print('#vocab =', n_vocab)

    if args.test:
        train = train[:100]
        val = val[:100]
        test = test[:100]

    train_iter = ParallelSequentialIterator(train, args.batchsize)
    val_iter = ParallelSequentialIterator(val, 1, repeat=False)
    test_iter = ParallelSequentialIterator(test, 1, repeat=False)

    # Prepare an RNNLM model
    rnn = RNNForLM(n_vocab, args.unit)
    model = L.Classifier(rnn)
    model.compute_accuracy = False  # we only want the perplexity
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # make the GPU current
        model.to_gpu()

    # Set up an optimizer
    optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))

    # Set up a trainer
    updater = BPTTUpdater(train_iter, optimizer, args.bproplen, args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    eval_model = model.copy()  # Model with shared params and distinct states
    eval_rnn = eval_model.predictor
    eval_rnn.train = False
    trainer.extend(extensions.Evaluator(
        val_iter, eval_model, device=args.gpu,
        # Reset the RNN state at the beginning of each evaluation
        eval_hook=lambda _: eval_rnn.reset_state()))

    interval = 10 if args.test else 500
    trainer.extend(extensions.LogReport(postprocess=compute_perplexity,
                                        trigger=(interval, 'iteration')))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'perplexity', 'val_perplexity']
    ), trigger=(interval, 'iteration'))
    trainer.extend(extensions.ProgressBar(
        update_interval=1 if args.test else 10))
    trainer.extend(extensions.snapshot())
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'))
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()

    # Evaluate the final model
    print('test')
    eval_rnn.reset_state()
    evaluator = extensions.Evaluator(test_iter, eval_model, device=args.gpu)
    result = evaluator()
    print('test perplexity:', np.exp(float(result['main/loss'])))
Example #56
0
def main():
    parser = argparse.ArgumentParser(description="DCGAN")
    parser.add_argument("--batchsize", "-b", type=int, default=128)
    parser.add_argument("--epoch", "-e", type=int, default=100)
    parser.add_argument("--gpu", "-g", type=int, default=0)
    parser.add_argument("--snapshot_interval", "-s", type=int, default=10)
    parser.add_argument("--display_interval", "-d", type=int, default=1)
    parser.add_argument("--n_dimz", "-z", type=int, default=100)
    parser.add_argument("--dataset", "-ds", type=str, default="cifar10")
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--out", "-o", type=str, default="result")
    parser.add_argument("--resume", '-r', default='')
    args = parser.parse_args()

    #import .py
    import Updater
    import Visualize
    if args.dataset == "mnist":
        import Network.mnist_net as Network
    else:
        import Network.cifar10_net as Network  #print settings
    print("GPU:{}".format(args.gpu))
    print("epoch:{}".format(args.epoch))
    print("Minibatch_size:{}".format(args.batchsize))
    print("Dataset:{}".format(args.dataset))
    print('')
    out = os.path.join(args.out, args.dataset)
    #Set up NN
    gen = Network.Generator(n_hidden=args.n_dimz)
    dis = Network.Discriminator()

    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        gen.to_gpu()
        dis.to_gpu()
    #Make optimizer
    def make_optimizer(model, alpha=0.0002, beta1=0.5):
        optimizer = optimizers.Adam(alpha=alpha, beta1=beta1)  #init_lr = alpha
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0001),
                           'hook_dec')
        return optimizer

    opt_gen = make_optimizer(gen)
    opt_dis = make_optimizer(dis)

    #Get dataset
    if args.dataset == "mnist":
        train, _ = mnist.get_mnist(withlabel=False, ndim=3, scale=255.)
    else:
        train, _ = chainer.datasets.get_cifar10(withlabel=False, scale=255.)
    #Setup iterator
    train_iter = iterators.SerialIterator(train, args.batchsize)
    #Setup updater
    updater = Updater.DCGANUpdater(models=(gen, dis),
                                   iterator=train_iter,
                                   optimizer={
                                       'gen': opt_gen,
                                       'dis': opt_dis
                                   },
                                   device=args.gpu)

    #Setup trainer
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=out)
    snapshot_interval = (args.snapshot_interval, 'epoch')
    display_interval = (args.display_interval, 'epoch')
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'),
        trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        gen, 'gen_epoch_{.updater.epoch}.npz'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        dis, 'dis_epoch_{.updater.epoch}.npz'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=display_interval))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'gen/loss', 'dis/loss', 'elapsed_time']),
                   trigger=display_interval)
    trainer.extend(extensions.ProgressBar())
    trainer.extend(Visualize.out_generated_image(gen, dis, 10, 10, args.seed,
                                                 args.out, args.dataset),
                   trigger=snapshot_interval)

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Example #57
0
def main():
    archs = {
        'alex': alex.Alex,
        'alex_fp16': alex.AlexFp16,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16,
        'nin': nin.NIN,
        'resnet50': resnet50.ResNet50,
        'resnext50': resnext50.ResNeXt50,
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin',
                        help='Convnet architecture')
    parser.add_argument('--batchsize', '-B', type=int, default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch', '-E', type=int, default=10,
                        help='Number of epochs to train')
    parser.add_argument('--device', '-d', type=str, default='-1',
                        help='Device specifier. Either ChainerX device '
                        'specifier or an integer. If non-negative integer, '
                        'CuPy arrays with specified device id are used. If '
                        'negative integer, NumPy arrays are used')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob', '-j', type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean', '-m', default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume', '-r', default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--root', '-R', default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize', '-b', type=int, default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.set_defaults(test=False)
    parser.add_argument('--dali', action='store_true')
    parser.set_defaults(dali=False)
    group = parser.add_argument_group('deprecated arguments')
    group.add_argument('--gpu', '-g', type=int, nargs='?', const=0,
                       help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    device = parse_device(args)

    print('Device: {}'.format(device))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Initialize the model to train
    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from {}'.format(args.initmodel))
        chainer.serializers.load_npz(args.initmodel, model)
    model.to_device(device)
    device.use()

    # Load the mean file
    mean = np.load(args.mean)
    if args.dali:
        if not dali_util._dali_available:
            raise RuntimeError('DALI seems not available on your system.')
        num_threads = args.loaderjob
        if num_threads is None or num_threads <= 0:
            num_threads = 1
        ch_mean = list(np.average(mean, axis=(1, 2)))
        ch_std = [255.0, 255.0, 255.0]
        # Setup DALI pipelines
        train_pipe = dali_util.DaliPipelineTrain(
            args.train, args.root, model.insize, args.batchsize,
            num_threads, args.gpu, True, mean=ch_mean, std=ch_std)
        val_pipe = dali_util.DaliPipelineVal(
            args.val, args.root, model.insize, args.val_batchsize,
            num_threads, args.gpu, False, mean=ch_mean, std=ch_std)
        train_iter = chainer.iterators.DaliIterator(train_pipe)
        val_iter = chainer.iterators.DaliIterator(val_pipe, repeat=False)
        # converter = dali_converter
        converter = dali_util.DaliConverter(mean=mean, crop_size=model.insize)
    else:
        # Load the dataset files
        train = PreprocessedDataset(args.train, args.root, mean, model.insize)
        val = PreprocessedDataset(args.val, args.root, mean, model.insize,
                                  False)
        # These iterators load the images with subprocesses running in parallel
        # to the training/validation.
        train_iter = chainer.iterators.MultiprocessIterator(
            train, args.batchsize, n_processes=args.loaderjob)
        val_iter = chainer.iterators.MultiprocessIterator(
            val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)
        converter = dataset.concat_examples

    # Set up an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    # Set up a trainer
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, converter=converter, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    val_interval = (1 if args.test else 100000), 'iteration'
    log_interval = (1 if args.test else 1000), 'iteration'

    trainer.extend(extensions.Evaluator(val_iter, model, converter=converter,
                                        device=device), trigger=val_interval)
    # TODO(sonots): Temporarily disabled for chainerx. Fix it.
    if not (chainerx.is_available() and isinstance(device, chainerx.Device)):
        trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Example #58
0
def main():
    archs = {
        'alex': alex.Alex,
        'alex_fp16': alex.AlexFp16,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16,
        'nin': nin.NIN,
        'resnet50': resnet50.ResNet50
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch',
                        '-a',
                        choices=archs.keys(),
                        default='nin',
                        help='Convnet architecture')
    parser.add_argument('--batchsize',
                        '-B',
                        type=int,
                        default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch',
                        '-E',
                        type=int,
                        default=10,
                        help='Number of epochs to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob',
                        '-j',
                        type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean',
                        '-m',
                        default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--root',
                        '-R',
                        default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize',
                        '-b',
                        type=int,
                        default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # Initialize the model to train
    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()  # Make the GPU current
        model.to_gpu()

    # Load the datasets and mean file
    mean = np.load(args.mean)
    train = PreprocessedDataset(args.train, args.root, mean, model.insize)
    val = PreprocessedDataset(args.val, args.root, mean, model.insize, False)
    # These iterators load the images with subprocesses running in parallel to
    # the training/validation.
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, n_processes=args.loaderjob)
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)

    # Set up an optimizer
    optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    optimizer.setup(model)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    val_interval = (10 if args.test else 100000), 'iteration'
    log_interval = (10 if args.test else 1000), 'iteration'

    trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu),
                   trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=val_interval)
    # Be careful to pass the interval directly to LogReport
    # (it determines when to emit log rather than when to read observations)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy', 'lr'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Example #59
0
def main():
    args = parse_arguments()

    # Set up some useful variables that will be used later on.
    dataset_name = args.dataset
    method = args.method
    num_data = args.num_data
    n_unit = args.unit_num
    conv_layers = args.conv_layers

    task_type = molnet_default_config[dataset_name]['task_type']
    model_filename = {'classification': 'classifier.pkl',
                      'regression': 'regressor.pkl'}

    print('Using dataset: {}...'.format(dataset_name))

    # Set up some useful variables that will be used later on.
    if args.label:
        labels = args.label
        cache_dir = os.path.join('input', '{}_{}_{}'.format(dataset_name,
                                                            method, labels))
        class_num = len(labels) if isinstance(labels, list) else 1
    else:
        labels = None
        cache_dir = os.path.join('input', '{}_{}_all'.format(dataset_name,
                                                             method))
        class_num = len(molnet_default_config[args.dataset]['tasks'])

    # Load the train and validation parts of the dataset.
    filenames = [dataset_part_filename(p, num_data)
                 for p in ['train', 'valid']]

    paths = [os.path.join(cache_dir, f) for f in filenames]
    if all([os.path.exists(path) for path in paths]):
        dataset_parts = []
        for path in paths:
            print('Loading cached dataset from {}.'.format(path))
            dataset_parts.append(NumpyTupleDataset.load(path))
    else:
        dataset_parts = download_entire_dataset(dataset_name, num_data, labels,
                                                method, cache_dir)
    train, valid = dataset_parts[0], dataset_parts[1]

#    # Scale the label values, if necessary.
#    if args.scale == 'standardize':
#        if task_type == 'regression':
#            print('Applying standard scaling to the labels.')
#            datasets, scaler = standardize_dataset_labels(datasets)
#        else:
#            print('Label scaling is not available for classification tasks.')
#    else:
#        print('No label scaling was selected.')
#        scaler = None

    # Set up the predictor.
    predictor = set_up_predictor(method, n_unit, conv_layers, class_num)

    # Set up the iterators.
    train_iter = iterators.SerialIterator(train, args.batchsize)
    valid_iter = iterators.SerialIterator(valid, args.batchsize, repeat=False,
                                          shuffle=False)

    # Load metrics for the current dataset.
    metrics = molnet_default_config[dataset_name]['metrics']
    metrics_fun = {k: v for k, v in metrics.items()
                   if isinstance(v, types.FunctionType)}
    loss_fun = molnet_default_config[dataset_name]['loss']

    if task_type == 'regression':
        model = Regressor(predictor, lossfun=loss_fun,
                          metrics_fun=metrics_fun, device=args.gpu)
        # TODO: Use standard scaler for regression task
    elif task_type == 'classification':
        model = Classifier(predictor, lossfun=loss_fun,
                           metrics_fun=metrics_fun, device=args.gpu)
    else:
        raise ValueError('Invalid task type ({}) encountered when processing '
                         'dataset ({}).'.format(task_type, dataset_name))

    # Set up the optimizer.
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    # Save model-related output to this directory.
    model_dir = os.path.join(args.out, os.path.basename(cache_dir))
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    # Set up the updater.
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu,
                                       converter=concat_mols)

    # Set up the trainer.
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=model_dir)
    trainer.extend(E.Evaluator(valid_iter, model, device=args.gpu,
                               converter=concat_mols))
    trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(E.LogReport())

    # Report various metrics.
    print_report_targets = ['epoch', 'main/loss', 'validation/main/loss']
    for metric_name, metric_fun in metrics.items():
        if isinstance(metric_fun, types.FunctionType):
            print_report_targets.append('main/' + metric_name)
            print_report_targets.append('validation/main/' + metric_name)
        elif issubclass(metric_fun, BatchEvaluator):
            trainer.extend(metric_fun(valid_iter, model, device=args.gpu,
                                      eval_func=predictor,
                                      converter=concat_mols, name='val',
                                      raise_value_error=False))
            print_report_targets.append('val/main/' + metric_name)
        else:
            raise TypeError('{} is not a supported metrics function.'
                            .format(type(metrics_fun)))
    print_report_targets.append('elapsed_time')

    # Augmented by Ishiguro
    # ToDo: consider go/no-go of the following block
    # (i) more reporting for val/evalutaion
    # (ii) best validation score snapshot
    if task_type == 'regression':
        if 'RMSE' in metric_name:
            trainer.extend(E.snapshot_object(model, "best_val_" + model_filename[task_type]), trigger=training.triggers.MinValueTrigger('validation/main/RMSE'))
        elif 'MAE' in metric_name:
            trainer.extend(E.snapshot_object(model, "best_val_" + model_filename[task_type]), trigger=training.triggers.MinValueTrigger('validation/main/MAE'))
        else:
            print("No validation metric defined?")
            assert(False)

    elif task_type == 'classification':
        train_eval_iter = iterators.SerialIterator(train, args.batchsize,repeat=False, shuffle=False)
        trainer.extend(ROCAUCEvaluator(
            train_eval_iter, predictor, eval_func=predictor,
            device=args.gpu, converter=concat_mols, name='train',
            pos_labels=1, ignore_labels=-1, raise_value_error=False))
        # extension name='validation' is already used by `Evaluator`,
        # instead extension name `val` is used.
        trainer.extend(ROCAUCEvaluator(
            valid_iter, predictor, eval_func=predictor,
            device=args.gpu, converter=concat_mols, name='val',
            pos_labels=1, ignore_labels=-1))
        print_report_targets.append('train/main/roc_auc')
        print_report_targets.append('validation/main/loss')
        print_report_targets.append('val/main/roc_auc')

        trainer.extend(E.snapshot_object(model, "best_val_" + model_filename[task_type]), trigger=training.triggers.MaxValueTrigger('val/main/roc_auc'))
    else:
        raise NotImplementedError(
            'Not implemented task_type = {}'.format(task_type))


    trainer.extend(E.PrintReport(print_report_targets))
    trainer.extend(E.ProgressBar())
    trainer.run()

    # Save the model's parameters.
    model_path = os.path.join(model_dir,  model_filename[task_type])
    print('Saving the trained model to {}...'.format(model_path))
    model.save_pickle(model_path, protocol=args.protocol)
Example #60
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
    parser.add_argument(
        'dataset',
        choices=['visible+occlusion', 'synthetic', 'occlusion'],
        help='The dataset.',
    )
    parser.add_argument('--model',
                        '-m',
                        choices=['vgg16', 'resnet50', 'resnet101'],
                        default='resnet50',
                        help='Base model of Mask R-CNN.')
    parser.add_argument('--pooling-func',
                        '-p',
                        choices=['pooling', 'align', 'resize'],
                        default='align',
                        help='Pooling function.')
    parser.add_argument('--gpu', '-g', type=int, help='GPU id.')
    parser.add_argument('--multi-node',
                        action='store_true',
                        help='use multi node')
    parser.add_argument('--mask-loss',
                        default='softmax',
                        choices=synthetic2d.models.MaskRCNN.mask_losses,
                        help='mask loss mode')
    default_max_epoch = (180e3 * 8) / 118287 * 3  # x3
    parser.add_argument('--max-epoch',
                        type=float,
                        default=default_max_epoch,
                        help='epoch')
    args = parser.parse_args()

    if args.multi_node:
        import chainermn
        comm = chainermn.create_communicator('hierarchical')
        device = comm.intra_rank

        args.n_node = comm.inter_size
        args.n_gpu = comm.size
        chainer.cuda.get_device_from_id(device).use()
    else:
        args.n_node = 1
        args.n_gpu = 1
        chainer.cuda.get_device_from_id(args.gpu).use()
        device = args.gpu

    args.seed = 0
    now = datetime.datetime.now()
    args.timestamp = now.isoformat()

    if not args.multi_node or comm.rank == 0:
        out = osp.join(here, 'logs', now.strftime('%Y%m%d_%H%M%S.%f'))
    else:
        out = None
    if args.multi_node:
        args.out = comm.bcast_obj(out)
    else:
        args.out = out
    del out

    # 0.00125 * 8 = 0.01  in original
    args.batch_size = 1 * args.n_gpu
    args.lr = 0.00125 * args.batch_size
    args.weight_decay = 0.0001

    # lr / 10 at 120k iteration with
    # 160k iteration * 16 batchsize in original
    args.step_size = [(120e3 / 180e3) * args.max_epoch,
                      (160e3 / 180e3) * args.max_epoch]

    random.seed(args.seed)
    np.random.seed(args.seed)

    # Default Config
    # args.min_size = 800
    # args.max_size = 1333
    # args.anchor_scales = (2, 4, 8, 16, 32)
    args.min_size = 600
    args.max_size = 1000
    args.anchor_scales = (4, 8, 16, 32)
    args.rpn_dim = 512

    # -------------------------------------------------------------------------
    # Dataset

    if args.dataset == 'visible+occlusion':
        train_data1 = instance_occlsegm_lib.datasets.apc.\
            ARC2017InstanceSegmentationDataset('train', aug='standard')
        train_data1 = chainer.datasets.TransformDataset(
            train_data1, transform_visible_only_to_with_occlusion)
        train_data2 = instance_occlsegm_lib.datasets.apc.\
            ARC2017InstanceSegmentationDataset('test', aug='standard')
        train_data2 = chainer.datasets.TransformDataset(
            train_data2, transform_visible_only_to_with_occlusion)
        train_data3 = synthetic2d.datasets.ARC2017OcclusionDataset('train',
                                                                   do_aug=True)
        train_data = chainer.datasets.ConcatenatedDataset(
            train_data1,
            train_data2,
            train_data3,
        )
    elif args.dataset == 'synthetic':
        train_data = synthetic2d.datasets.ARC2017SyntheticInstancesDataset(
            do_aug=True, aug_level='all')
    elif args.dataset == 'occlusion':
        train_data = synthetic2d.datasets.ARC2017OcclusionDataset('train',
                                                                  do_aug=True)
    else:
        raise ValueError
    test_data = synthetic2d.datasets.ARC2017OcclusionDataset('test')
    fg_class_names = test_data.class_names
    test_data_list = test_data.get_video_datasets()
    del test_data

    # -------------------------------------------------------------------------
    # Model + Optimizer.

    if args.pooling_func == 'align':
        pooling_func = cmr.functions.roi_align_2d
    elif args.pooling_func == 'pooling':
        pooling_func = chainer.functions.roi_pooling_2d
    elif args.pooling_func == 'resize':
        pooling_func = cmr.functions.crop_and_resize
    else:
        raise ValueError

    assert args.model in ['resnet50', 'resnet101']
    n_layers = int(args.model.lstrip('resnet'))
    mask_rcnn = synthetic2d.models.MaskRCNNResNet(
        n_layers=n_layers,
        n_fg_class=len(fg_class_names),
        pooling_func=pooling_func,
        anchor_scales=args.anchor_scales,
        min_size=args.min_size,
        max_size=args.max_size,
        rpn_dim=args.rpn_dim,
        mask_loss=args.mask_loss,
    )
    mask_rcnn.nms_thresh = 0.3
    mask_rcnn.score_thresh = 0.05

    model = synthetic2d.models.MaskRCNNTrainChain(mask_rcnn)
    if args.multi_node or args.gpu >= 0:
        model.to_gpu()

    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    if args.multi_node:
        optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay))

    mask_rcnn.extractor.conv1.disable_update()
    mask_rcnn.extractor.bn1.disable_update()
    mask_rcnn.extractor.res2.disable_update()
    for link in mask_rcnn.links():
        if isinstance(link, cmr.links.AffineChannel2D):
            link.disable_update()

    # -------------------------------------------------------------------------
    # Iterator.

    train_data = chainer.datasets.TransformDataset(
        train_data, cmr.datasets.MaskRCNNTransform(mask_rcnn))
    test_data_list = [
        chainer.datasets.TransformDataset(
            td, cmr.datasets.MaskRCNNTransform(mask_rcnn, train=False))
        for td in test_data_list
    ]
    test_concat_data = chainer.datasets.ConcatenatedDataset(*test_data_list)
    if args.multi_node:
        if comm.rank != 0:
            train_data = None
        train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True)

    # for training
    train_iter = chainer.iterators.SerialIterator(train_data, batch_size=1)
    # for evaluation
    test_iters = {
        i: chainer.iterators.SerialIterator(td,
                                            batch_size=1,
                                            repeat=False,
                                            shuffle=False)
        for i, td in enumerate(test_data_list)
    }
    # for visualization
    test_concat_iter = chainer.iterators.SerialIterator(test_concat_data,
                                                        batch_size=1,
                                                        repeat=False,
                                                        shuffle=False)

    # -------------------------------------------------------------------------

    converter = functools.partial(
        cmr.datasets.concat_examples,
        padding=0,
        # img, bboxes, labels, masks, scales
        indices_concat=[0, 2, 3, 4],  # img, _, labels, masks, scales
        indices_to_device=[0, 1],  # img, bbox
    )
    updater = chainer.training.updater.StandardUpdater(train_iter,
                                                       optimizer,
                                                       device=device,
                                                       converter=converter)

    trainer = training.Trainer(updater, (args.max_epoch, 'epoch'),
                               out=args.out)

    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=training.triggers.ManualScheduleTrigger(
                       args.step_size, 'epoch'))

    eval_interval = 1, 'epoch'
    log_interval = 20, 'iteration'
    plot_interval = 0.1, 'epoch'
    print_interval = 20, 'iteration'

    if not args.multi_node or comm.rank == 0:
        evaluator = synthetic2d.extensions.InstanceSegmentationVOCEvaluator(
            test_iters,
            model.mask_rcnn,
            device=device,
            use_07_metric=False,
            label_names=fg_class_names)
        trainer.extend(evaluator, trigger=eval_interval)
        trainer.extend(extensions.snapshot_object(model.mask_rcnn,
                                                  'snapshot_model.npz'),
                       trigger=training.triggers.MaxValueTrigger(
                           'validation/main/mpq', eval_interval))
        args.git_hash = cmr.utils.git_hash()
        args.hostname = socket.gethostname()
        trainer.extend(fcn.extensions.ParamsReport(args.__dict__))
        trainer.extend(synthetic2d.extensions.InstanceSegmentationVisReport(
            test_concat_iter, model.mask_rcnn, label_names=fg_class_names),
                       trigger=eval_interval)
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(
            extensions.PrintReport([
                'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
                'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss',
                'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/mpq'
            ]),
            trigger=print_interval,
        )
        trainer.extend(extensions.ProgressBar(update_interval=10))

        # plot
        assert extensions.PlotReport.available()
        trainer.extend(
            extensions.PlotReport(
                [
                    'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss',
                    'main/roi_mask_loss', 'main/rpn_loc_loss',
                    'main/rpn_cls_loss'
                ],
                file_name='loss.png',
                trigger=plot_interval,
            ),
            trigger=plot_interval,
        )
        trainer.extend(
            extensions.PlotReport([
                'validation/main/map', 'validation/main/msq',
                'validation/main/mdq', 'validation/main/mpq'
            ],
                                  file_name='accuracy.png',
                                  trigger=plot_interval),
            trigger=eval_interval,
        )

        trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()