Example #1
0
    def train(self, nz=100, batch_size=64, epochs=10000, gpu=-1, **kwargs):

        # CIFAR-10 images in range [-1, 1] (tanh generator outputs)
        train, _ = datasets.get_cifar10(withlabel=False, ndim=3, scale=2)
        train -= 1.0
        train_iter = chainer.iterators.SerialIterator(train, batch_size)

        z_iter = RandomNoiseIterator(GaussianNoiseGenerator(0, 1, nz),
                                     batch_size)

        optimizer_generator = optimizers.RMSprop(lr=0.00005)
        optimizer_critic = optimizers.RMSprop(lr=0.00005)
        optimizer_generator.setup(self.generator)
        optimizer_critic.setup(self.critic)

        updater = WassersteinGANUpdater(
            iterator=train_iter,
            noise_iterator=z_iter,
            optimizer_generator=optimizer_generator,
            optimizer_critic=optimizer_critic,
            device=gpu)

        trainer = training.Trainer(updater,
                                   stop_trigger=(epochs, 'epoch'),
                                   out=self.model_path)
        trainer.extend(training.extensions.ProgressBar())
        trainer.extend(training.extensions.LogReport(trigger=(1, 'iteration')))
        trainer.extend(GeneratorSample(), trigger=(1, 'epoch'))
        trainer.extend(
            training.extensions.PrintReport([
                'epoch', 'iteration', 'critic/loss', 'critic/loss/real',
                'critic/loss/fake', 'generator/loss'
            ]))
        trainer.run()
def train(args):
    nz = args.nz
    batch_size = args.batch_size
    epochs = args.epochs
    gpu = args.gpu

    # CIFAR-10 images in range [-1, 1] (tanh generator outputs)
    train, _ = datasets.get_cifar10(withlabel=True, ndim=3, scale=2)

    train_iter = iterators.SerialIterator(train, batch_size)

    z_iter = RandomNoiseIterator(GaussianNoiseGenerator(0, 1, args.nz),
                                 batch_size)

    optimizer_generator = optimizers.RMSprop(lr=0.00005)
    optimizer_critic = optimizers.RMSprop(lr=0.00005)
    generator = Generator()
    optimizer_generator.setup(generator)
    optimizer_critic.setup(Critic())

    updater = WassersteinGANUpdater(iterator=train_iter,
                                    noise_iterator=z_iter,
                                    optimizer_generator=optimizer_generator,
                                    optimizer_critic=optimizer_critic,
                                    device=gpu)

    trainer = training.Trainer(updater,
                               stop_trigger=(epochs, 'epoch'),
                               out=args.out)
    trainer.extend(extensions.ProgressBar())
    trainer.extend(extensions.LogReport(trigger=(10, 'iteration')))
    trainer.extend(GeneratorSample(), trigger=(1, 'epoch'))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'iteration', 'critic/loss', 'critic/loss/real',
            'critic/loss/fake', 'generator/loss'
        ]))
    # Take a snapshot at each epoch
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'),
        trigger=(1, 'epoch'))
    trainer.extend(extensions.snapshot_object(generator,
                                              'model_epoch_{.updater.epoch}'),
                   trigger=(1, 'epoch'))

    if args.resume:
        # Resume from a snapshot
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Example #3
0
    def setup_optimizer(self,
                        optimizer_name,
                        gradient_clipping=3,
                        weight_decay=0.00001,
                        **kwargs):
        # set optimizer
        if optimizer_name == "Adam":
            self.opt = optimizers.Adam(**kwargs)
        elif optimizer_name == "AdaDelta":
            self.opt = optimizers.AdaDelta(**kwargs)
        elif optimizer_name == "AdaGrad":
            self.opt = optimizers.AdaGrad(**kwargs)
        elif optimizer_name == "RMSprop":
            self.opt = optimizers.RMSprop(**kwargs)
        elif optimizer_name == "RMSpropGraves":
            self.opt = optimizers.RMSpropGraves(**kwargs)
        elif optimizer_name == "SGD":
            self.opt = optimizers.SGD(**kwargs)
        elif optimizer_name == "MomentumSGD":
            self.opt = optimizers.MomentumSGD(**kwargs)

        # self.opt.use_cleargrads()
        self.opt.setup(self)
        self.opt.add_hook(optimizer.GradientClipping(gradient_clipping))
        self.opt.add_hook(optimizer.WeightDecay(weight_decay))

        self.opt_params = {
            "optimizer_name": optimizer_name,
            "gradient_clipping": gradient_clipping,
            "weight_decay": weight_decay
        }
Example #4
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if (opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif (opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif (opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif (opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif (opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif (opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif (opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif (opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif (opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        logger.warning('{}->{}'.format(opt_str, opt.__doc__.split('.')[0]))

    logger.debug('Optimizer: {}'.format(opt.__doc__.split('.')[0]))
    return opt
Example #5
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if(opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif(opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif(opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif(opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif(opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif(opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif(opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif(opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif(opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        print('\n[Warning] {0}\n\t{1}->{2}\n'.format(
            fileFuncLine(), opt_str, opt.__doc__.split('.')[0])
        )

    print('Optimizer:', opt.__doc__.split('.')[0])
    return opt
Example #6
0
def get_optimizer(model,
                  opt,
                  lr=None,
                  adam_alpha=None,
                  adam_beta1=None,
                  adam_beta2=None,
                  adam_eps=None,
                  weight_decay=None):
    if opt == 'MomentumSGD':
        optimizer = optimizers.MomentumSGD(lr=lr, momentum=0.9)
    elif opt == 'Adam':
        optimizer = optimizers.Adam(alpha=adam_alpha,
                                    beta1=adam_beta1,
                                    beta2=adam_beta2,
                                    eps=adam_eps)
    elif opt == 'AdaGrad':
        optimizer = optimizers.AdaGrad(lr=lr)
    elif opt == 'RMSprop':
        optimizer = optimizers.RMSprop(lr=lr)
    else:
        raise Exception('No optimizer is selected')

    # The first model as the master model
    optimizer.setup(model)
    if opt == 'MomentumSGD':
        optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay))

    return optimizer
 def __init__(self, model=None, lr=0.045, decay=0.9, eps=1.0, weight_decay=4.0e-5, clip=2.0):
     super(OptimizerGooglenetV3, self).__init__(model)
     optimizer = optimizers.RMSprop(lr, decay, eps)
     weight_decay = chainer.optimizer.WeightDecay(weight_decay)
     clip = chainer.optimizer.GradientClipping(clip)
     optimizer.setup(self.model)
     optimizer.add_hook(weight_decay)
     optimizer.add_hook(clip)
     self.optimizer = optimizer
Example #8
0
    def init_optimizer(self, clip, decay, lr=0.001, alpha=0.9, eps=1e-6):
        self.optimizer = optimizers.RMSprop(lr=lr, alpha=alpha, eps=eps)
        self.optimizer.setup(self.model)

        # Clip Gradient
        self.optimizer.add_hook(chainer.optimizer.GradientClipping(clip))

        # L2 Regularization
        self.optimizer.add_hook(chainer.optimizer.WeightDecay(decay))
Example #9
0
    def test_share_states(self):

        model = L.Linear(2, 2)
        opt_a = optimizers.RMSprop()
        opt_a.setup(model)
        arrays = async .share_states_as_shared_arrays(opt_a)
        opt_b = optimizers.RMSprop()
        opt_b.setup(model)
        opt_c = optimizers.RMSprop()
        opt_c.setup(model)

        def assert_different_pointers(a, b):
            self.assertTrue(a)
            for param_name in a:
                self.assertTrue(a[param_name])
                for state_name in a[param_name]:
                    self.assertTrue(
                        isinstance(a[param_name][state_name], np.ndarray))
                    self.assertTrue(
                        isinstance(b[param_name][state_name], np.ndarray))
                    self.assertNotEqual(a[param_name][state_name].ctypes.data,
                                        b[param_name][state_name].ctypes.data)

        assert_different_pointers(opt_a._states, opt_b._states)
        assert_different_pointers(opt_a._states, opt_c._states)

        async .set_shared_states(opt_b, arrays)
        async .set_shared_states(opt_c, arrays)

        def assert_same_pointers(a, b):
            self.assertTrue(a)
            for param_name in a:
                self.assertTrue(a[param_name])
                for state_name in a[param_name]:
                    self.assertTrue(
                        isinstance(a[param_name][state_name], np.ndarray))
                    self.assertTrue(
                        isinstance(b[param_name][state_name], np.ndarray))
                    self.assertEqual(a[param_name][state_name].ctypes.data,
                                     b[param_name][state_name].ctypes.data)

        assert_same_pointers(opt_a._states, opt_b._states)
        assert_same_pointers(opt_a._states, opt_c._states)
Example #10
0
    def test_share_states(self):

        model = L.Linear(2, 2)
        opt_a = optimizers.RMSprop()
        opt_a.setup(model)
        arrays = async_.share_states_as_shared_arrays(opt_a)
        opt_b = optimizers.RMSprop()
        opt_b.setup(copy.deepcopy(model))
        # In Chainer v2, a model cannot be set up by two optimizers or more.

        opt_c = optimizers.RMSprop()
        opt_c.setup(copy.deepcopy(model))

        """
        Removed the tests by assert_different_pointers
        since they are trivial now.
        """

        async_.set_shared_states(opt_b, arrays)
        async_.set_shared_states(opt_c, arrays)

        def assert_same_pointers(a, b):
            a = a.target
            b = b.target
            for param_name, param_a in a.namedparams():
                param_b = dict(b.namedparams())[param_name]
                state_a = param_a.update_rule.state
                state_b = param_b.update_rule.state
                self.assertTrue(state_a)
                self.assertTrue(state_b)
                for state_name, state_val_a in state_a.items():
                    state_val_b = state_b[state_name]
                    self.assertTrue(isinstance(
                        state_val_a, np.ndarray))
                    self.assertTrue(isinstance(
                        state_val_b, np.ndarray))
                    self.assertEqual(state_val_a.ctypes.data,
                                     state_val_b.ctypes.data)

        assert_same_pointers(opt_a, opt_b)
        assert_same_pointers(opt_a, opt_c)
Example #11
0
def get_optimizer(name, lr, momentum):
    if name == "sgd":
        return optimizers.SGD(lr=lr)
    if name == "msgd":
        return optimizers.MomentumSGD(lr=lr, momentum=momentum)
    if name == "nesterov":
        return optimizers.NesterovAG(lr=lr, momentum=momentum)
    if name == "adam":
        return optimizers.Adam(alpha=lr, beta1=momentum)
    if name == "rmsprop":
        return optimizers.RMSprop(lr=lr, alpha=momentum)
    raise NotImplementedError()
Example #12
0
 def init_optimizer(self):
     if self.optimizer == 'SGD':
         self.optimizer = optimizers.MomentumSGD(lr=self.learning_rate,
                                                 momentum=self.momentum)
     elif self.optimizer == 'AdaDelta':
         self.optimizer = optimizers.AdaDelta()
     elif self.optimizer == 'AdaGrad':
         self.optimizer = optimizers.AdaGrad()
     elif self.optimizer == 'Adam':
         self.optimizer = optimizers.Adam()
     elif self.optimizer == 'RMSprop':
         self.optimizer = optimizers.RMSprop()
Example #13
0
def cross_optimizers(opt):
    if opt == 'SGD':
        optimizer = optimizers.SGD()
    elif opt == 'MomentumSGD':
        optimizer = optimizers.MomentumSGD()
    elif opt == 'AdaGrad':
        optimizer = optimizers.AdaGrad()
    elif opt == 'RMSprop':
        optimizer = optimizers.RMSprop()
    elif opt == 'AdaDelta':
        optimizer = optimizers.AdaDelta()
    elif opt == 'Adam':
        optimizer = optimizers.Adam()
    return copy.deepcopy(optimizer)
    def _build_optimizer(self, trial, model):
        # option of optimizer funciton
        optimizer_name = trial.suggest_categorical(
            'optimizer', ['Adam', "AdaDelta", 'RMSProp'])
        if optimizer_name == 'Adam':
            adam_alpha = trial.suggest_loguniform('adam_alpha', 1e-5, 1e-1)
            optimizer = optimizers.Adam(alpha=adam_alpha)
        elif optimizer_name == "AdaDelta":
            optimizer = optimizers.AdaDelta()
        elif optimizer_name == "RMSprop":
            optimizer = optimizers.RMSprop()

        weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay))
        return optimizer
Example #15
0
def select_optimizer(name):
    if name == "AdaGrad":
        optimizer = optimizers.AdaGrad(lr=0.001)
    elif name == "Adam":
        optimizer = chainer.optimizers.Adam(alpha=0.0001)
    elif name == "MomentumSGD":
        optimizer = optimizers.MomentumSGD(lr=0.01)
    elif name == "RMSprop":
        optimizer = optimizers.RMSprop(lr=0.01)
    elif name == "SGD":
        optimizer = optimizers.SGD(lr=0.01)
    elif name == "AdaDelta":
        optimizer = optimizers.AdaDelta(rho=0.9)
    else:
        raise Exception("Unknown network optimizer: " + args.optimizer)
    return optimizer
Example #16
0
 def get_optimizer(self, name, lr, momentum=0.9):
     if name.lower() == "adam":
         return optimizers.Adam(alpha=lr, beta1=momentum)
     if name.lower() == "smorms3":
         return optimizers.SMORMS3(lr=lr)
     if name.lower() == "adagrad":
         return optimizers.AdaGrad(lr=lr)
     if name.lower() == "adadelta":
         return optimizers.AdaDelta(rho=momentum)
     if name.lower() == "nesterov" or name.lower() == "nesterovag":
         return optimizers.NesterovAG(lr=lr, momentum=momentum)
     if name.lower() == "rmsprop":
         return optimizers.RMSprop(lr=lr, alpha=momentum)
     if name.lower() == "momentumsgd":
         return optimizers.MomentumSGD(lr=lr, mommentum=mommentum)
     if name.lower() == "sgd":
         return optimizers.SGD(lr=lr)
Example #17
0
def get_optimizer(name):
    """
    :type name: str
    :rtype: chainer.Optimizer
    """
    if name == "adadelta":
        opt = optimizers.AdaDelta()
    elif name == "adagrad":
        opt = optimizers.AdaGrad()
    elif name == "adam":
        opt = optimizers.Adam()
    elif name == "rmsprop":
        opt = optimizers.RMSprop()
    elif name == "smorms3":
        opt = optimizers.SMORMS3()
    else:
        raise ValueError("Unknown optimizer_name=%s" % name)
    return opt
Example #18
0
def select_optimizer(opt_name, learning_rate):
    if opt_name == "Adam":
        return optimizers.Adam(alpha=learning_rate)
    elif opt_name == "SGD":
        return optimizers.SGD(lr=learning_rate)
    elif opt_name == "RMSpropGraves":
        return optimizers.RMSpropGraves(lr=learning_rate)
    elif opt_name == "RMSprop":
        return optimizers.RMSprop(lr=learning_rate)
    elif opt_name == "AdaDelta":
        return optimizers.AdaDelta()
    elif opt_name == "AdaGrad":
        return optimizers.AdaGrad(lr=learning_rate)
    elif opt_name == "MomentumSGD":
        return optimizers.MomentumSGD(lr=learning_rate)
    elif opt_name == "NesterovAG":
        return optimizers.NesterovAG(lr=learning_rate)
    else:
        print('please select correct optimizer')
        exit()
def setOptimizer(model, method, params):
    learningRate = params['learningRate'] if (
        params.has_key('learningRate')) else 0.001
    alpha = params['alpha'] if (params.has_key('alpha')) else 0.001
    if (method == 'adam'):
        optimizer = optimizers.Adam(alpha=alpha)
    elif (method == 'smorms3'):
        optimizer = optimizers.SMORMS3(lr=learningRate)
    elif (method == 'rmsprop'):
        optimizer = optimizers.RMSprop(lr=learningRate)
    elif (method == 'sgd'):
        optimizer = optimizers.SGD(lr=learningRate)
    elif (method == 'momentum'):
        optimizer = optimizers.MomentumSGD(lr=learningRate)
    elif (method == 'adagrad'):
        optimizer = optimizers.AdaGrad(lr=learningRate)
    elif (method == 'adadelta'):
        optimizer = optimizers.AdaDelta()
    optimizer.setup(model)
    return optimizer
Example #20
0
def train(max_epoch, train_size, valid_size):
    model = RNN()

    # train に1000サンプル、 testに1000サンプル使用
    x_train, x_test, y_train, y_test = dataset(train_size + valid_size, train_size)

    optimizer = optimizers.RMSprop(lr=0.03)
    optimizer.setup(model)

    early_stopping = 20
    min_valid_loss = 1e8
    min_epoch = 0

    train_loss, valid_loss = [], []

    for epoch in range(1, max_epoch):
        _y = model(x_test)
        y = _y.data
        y = np.array([1 - y, y], dtype='f').T[0]
        accuracy = F.accuracy(y, y_test.data.flatten()).data

        _train_loss = F.sigmoid_cross_entropy(model(x_train), y_train).data
        _valid_loss = F.sigmoid_cross_entropy(_y, y_test).data
        train_loss.append(_train_loss)
        valid_loss.append(_valid_loss)

        # valid_lossが20回連続で更新されなかった時点で学習を終了
        if min_valid_loss >= _valid_loss:
            min_valid_loss = _valid_loss
            min_epoch = epoch

        elif epoch - min_epoch >= early_stopping:
            break

        optimizer.update(forward, x_train, y_train, model)
        print('epoch: {} acc: {} loss: {} valid_loss: {}'.format(epoch, accuracy, _train_loss, _valid_loss))

    loss_plot(train_loss, valid_loss)
    serializers.save_npz('model.npz', model)
Example #21
0
    def set_params(self, params):

        self.gpu = params.get('gpu', False)
        self.learning_rate = params.get('learning_rate', 0.00025)
        self.decay_rate = params.get('decay_rate', 0.95)
        self.discount = params.get('discount', 0.95)
        self.clip_err = params.get('clip_err', False)
        self.target_net_update = params.get('target_net_update', 10000)
        self.double_DQN = params.get('double_DQN', False)

        # setting up various possible gradient update algorithms
        opt = params.get('optim_name', 'ADAM')
        if opt == 'RMSprop':
            self.optimizer = optimizers.RMSprop(lr=self.learning_rate,
                                                alpha=self.decay_rate)

        elif opt == 'ADADELTA':
            print(
                "Supplied learning rate not used with ADADELTA gradient update method"
            )
            self.optimizer = optimizers.AdaDelta()

        elif opt == 'ADAM':
            self.optimizer = optimizers.Adam(alpha=self.learning_rate)

        elif opt == 'SGD':
            self.optimizer = optimizers.SGD(lr=self.learning_rate)

        else:
            print('The requested optimizer is not supported!!!')
            exit()

        if self.clip_err is not False:
            self.optimizer.add_hook(
                chainer.optimizer.GradientClipping(self.clip_err))

        self.optim_name = params['optim_name']
Example #22
0
def main():
    parser = argparse.ArgumentParser(description='Chainer: WGAN MNIST')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=50,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=100,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--n_hidden',
                        '-n',
                        type=int,
                        default=100,
                        help='Number of hidden dim of units (z)')
    parser.add_argument('--snapshot_interval',
                        type=int,
                        default=1000,
                        help='Interval of snapshot')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='Random seed of z at visualization stage')
    parser.add_argument('--display_interval',
                        type=int,
                        default=100,
                        help='Interval of displaying log to console')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# n_hidden: {}'.format(args.n_hidden))
    print('# epoch: {}'.format(args.epoch))
    print('')

    train, _ = datasets.get_mnist(withlabel=False, ndim=3,
                                  scale=1.)  # ndim=3 : (ch,width,height)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)

    #z_iter = RandomNoiseIterator(GaussianNoiseGenerator(0, 1, args.n_hidden), args.batchsize)
    z_iter = RandomNoiseIterator(UniformNoiseGenerator(-1, 1, args.n_hidden),
                                 args.batchsize)

    # make the model
    gen = Generator(n_hidden=args.n_hidden)
    critic = Critic()

    if args.gpu >= 0:
        # make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        gen.to_gpu()  # copy the model to the GPU
        critic.to_gpu()

    # make the optimizer
    optimizer_generator = optimizers.RMSprop(lr=0.00005)
    optimizer_critic = optimizers.RMSprop(lr=0.00005)
    optimizer_generator.setup(gen)
    optimizer_critic.setup(critic)

    updater = WGANUpdater(iterator=train_iter,
                          noise_iterator=z_iter,
                          optimizer_generator=optimizer_generator,
                          optimizer_critic=optimizer_critic,
                          device=args.gpu)

    epoch_interval = (10, 'epoch')
    display_interval = (10, 'iteration')

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    trainer.extend(extensions.LogReport(trigger=display_interval))
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'),
        trigger=epoch_interval)
    trainer.extend(extensions.snapshot_object(
        gen, 'gen_epoch_{.updater.epoch}.npz'),
                   trigger=epoch_interval)
    trainer.extend(extensions.snapshot_object(
        critic, 'critic_epoch_{.updater.epoch}.npz'),
                   trigger=epoch_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'critic/loss', 'critic/loss/real',
        'critic/loss/fake', 'generator/loss'
    ]),
                   trigger=display_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(out_generated_image(gen, 10, 10, args.seed, args.out,
                                       args.n_hidden),
                   trigger=epoch_interval)

    trainer.run()
 def create(self):
     kwargs = {'eps_inside_sqrt': self.eps_inside_sqrt}
     if self.dtype == numpy.float16:
         kwargs['eps'] = 1e-6
     return optimizers.RMSprop(0.1, **kwargs)
Example #24
0
def do_train(
        db_model,
        root_output_dir,
        filename,
        vocabulary,
        use_mecab=False,
        initmodel=None,
        resume=False,
        rnn_size=128,
        learning_rate=2e-3,
        learning_rate_decay=0.97,
        learning_rate_decay_after=10,
        decay_rate=0.95,
        dropout=0.0,
        seq_length=50,
        batchsize=50,  # minibatch size
        grad_clip=5,  # gradient norm threshold to clip
        interruptable=None
):
    logger.info('Start LSTM training. model_id: {0}, use_mecab: {1}, initmodel: {2}, gpu: {3}'
                .format(db_model.id, use_mecab, initmodel, db_model.gpu))
    n_epoch = db_model.epoch
    bprop_len = seq_length  # length of truncated BPTT
    grad_clip = grad_clip

    (model_dir, model_name) = os.path.split(db_model.network_path)
    model_name = re.sub(r"\.py$", "", model_name)
    model_module = load_module(model_dir, model_name)

    if db_model.trained_model_path is None:
        db_model.trained_model_path = os.path.join(root_output_dir, model_name)
    if not os.path.exists(db_model.trained_model_path):
        os.mkdir(db_model.trained_model_path)

    vocab = {}
    vocab_size = 0

    if vocabulary != '':
        vocab = pickle.load(open(vocabulary, 'rb'))
        vocab_size = len(vocab)

    if db_model.gpu >= 0:
        cuda.check_cuda_available()
    xp = cuda.cupy if db_model.gpu >= 0 else np

    train_data, words, vocab = load_data(filename, use_mecab, vocab)
    pickle.dump(vocab, open('%s/vocab2.bin' % db_model.trained_model_path, 'wb'))

    # Prepare model
    lm = model_module.Network(len(vocab), rnn_size, dropout_ratio=dropout, train=False)
    model = L.Classifier(lm)
    model.compute_accuracy = False  # we only want the perplexity
    for param in model.params():
        data = param.data
        data[:] = np.random.uniform(-0.1, 0.1, data.shape)

    # Setup optimizer
    optimizer = optimizers.RMSprop(lr=learning_rate, alpha=decay_rate, eps=1e-8)
    optimizer.setup(model)

    # Load pretrained model
    if initmodel is not None and initmodel.find("model") > -1:
        if vocabulary == '':
            logger.info("Load model from : " + db_model.trained_model_path + os.sep + initmodel)
            serializers.load_npz(os.path.join(db_model.trained_model_path, initmodel), model)
        else:
            lm2 = model_module.Network(vocab_size, rnn_size, dropout_ratio=dropout, train=False)
            model2 = L.Classifier(lm2)
            model2.compute_accuracy = False  # we only want the perplexity
            logger.info("Load model from : " + db_model.trained_model_path + os.sep + initmodel)
            serializers.load_npz(os.path.join(db_model.trained_model_path, initmodel), model2)
            copy_model(model2, model)
        # delete old models
        try:
            shutil.copyfile(os.path.join(db_model.trained_model_path, initmodel),
                            os.path.join(db_model.trained_model_path, 'previous_' + initmodel))
        except Exception as e:
            logger.exception('Could not copy {0} to {1}. {2}'
                             .format(os.path.join(db_model.trained_model_path, initmodel),
                                     os.path.join(db_model.trained_model_path,
                                                  'previous_' + initmodel), e))
            raise e
        pretrained_models = sorted(os.listdir(db_model.trained_model_path), reverse=True)
        for m in pretrained_models:
            if m.startswith('model') and initmodel != m:
                try:
                    os.remove(os.path.join(db_model.trained_model_path, m))
                except Exception as e:
                    logger.exception('Could not remove old trained model: {0} {1}'
                                     .format(os.path.join(db_model.trained_model_path, m), e))
                    raise e

    if db_model.gpu >= 0:
        cuda.get_device(db_model.gpu).use()
        model.to_gpu()

    # Load pretrained optimizer
    resume_path = os.path.join(db_model.trained_model_path, 'resume')
    if resume:
        logger.info("Load optimizer state from : {}".format(os.path.join(db_model.trained_model_path, 'resume.state')))
        serializers.load_npz(os.path.join(resume_path, 'resume.model'), model)
        serializers.load_npz(os.path.join(resume_path, 'resume.state'), optimizer)

    db_model.is_trained = 1
    db_model.update_and_commit()

    # Learning loop
    whole_len = train_data.shape[0]
    jump = whole_len // batchsize
    if resume:
        resume_data = json.load(open(os.path.join(resume_path, 'resume.json')))
        initmodel = resume_data['initmodel']
        cur_log_perp = xp.zeros(())
        cur_log_perp += resume_data['cur_log_perp']
        loss_for_graph = xp.zeros(())
        loss_for_graph += resume_data['loss_for_graph']
        iteration_from = resume_data['i']
        epoch = (iteration_from + 1) / jump
    else:
        cur_log_perp = xp.zeros(())
        loss_for_graph = xp.zeros(())
        iteration_from = 0
        epoch = 0

    start_at = time.time()
    cur_at = start_at
    accum_loss = 0
    batch_idxs = list(range(batchsize))

    graph_tsv_path = os.path.join(db_model.trained_model_path, 'line_graph.tsv')
    train_log_path = os.path.join(db_model.trained_model_path, 'train.log')
    if not resume:
        with open(graph_tsv_path, 'a') as fp:
            fp.write('count\tepoch\tperplexity\n')

        with open(train_log_path, 'a') as fp:
            fp.write(json.dumps({
                'type': 'text',
                'text': "going to train {} iterations".format(jump * n_epoch)
            }) + '\n')

    # delete layer visualization cache
    # trained_model_pathに存在する全てのディレクトリを削除している。
    for f in os.listdir(db_model.trained_model_path):
        if os.path.isdir(os.path.join(db_model.trained_model_path, f)):
            try:
                shutil.rmtree(os.path.join(db_model.trained_model_path, f))
            except Exception as e:
                logger.exception('Could not remove visualization cache: {0} {1}'
                                 .format(os.path.join(db_model.trained_model_path, f), e))
                raise e
    # ので、↓のresumeファイルの削除は不要
    # remove_resume_file(db_model.trained_model_path)

    for i in six.moves.range(iteration_from, jump * n_epoch):
        # 1バッチが終わったタイミングを意図している。
        if interruptable.is_interrupting() and isinstance(accum_loss, int):
            os.mkdir(resume_path)
            serializers.save_npz(os.path.join(resume_path, 'resume.state'), optimizer)
            serializers.save_npz(os.path.join(resume_path, 'resume.model'), model)
            json.dump({
                'i': i,
                'initmodel': initmodel,
                'cur_log_perp': float(cur_log_perp),
                'loss_for_graph': float(loss_for_graph),
                'epoch': epoch
            }, open(os.path.join(resume_path, 'resume.json'), 'w'))
            interruptable.set_interruptable()
            while True:
                time.sleep(1)
        x = chainer.Variable(
            xp.asarray([train_data[(jump * j + i) % whole_len] for j in batch_idxs]))
        t = chainer.Variable(
            xp.asarray([train_data[(jump * j + i + 1) % whole_len] for j in batch_idxs]))
        loss_i = model(x, t)
        accum_loss += loss_i
        loss_for_graph += loss_i.data
        cur_log_perp += loss_i.data

        if (i + 1) % bprop_len == 0:  # Run truncated BPTT
            model.zerograds()
            accum_loss.backward()
            accum_loss.unchain_backward()  # truncate
            accum_loss = 0
            optimizer.update()

        if (i + 1) % 100 == 0:
            now = time.time()
            throuput = 10000. / (now - cur_at)
            perp = math.exp(float(cur_log_perp) / 10000)
            with open(train_log_path, 'a') as fp:
                fp.write(json.dumps({
                    'type': 'log',
                    'log': 'iter {} training perplexity: {:.2f} ({:.2f} iters/sec)'.format(i + 1, perp, throuput),
                    'time_stamp': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                    'epoch': epoch
                }) + '\n')
            cur_at = now
            cur_log_perp.fill(0)

        if (i + 1) % 100 == 0:
            perp_for_graph = math.exp(float(loss_for_graph) / 100)
            with open(graph_tsv_path, 'a') as fp:
                fp.write('{}\t{}\t{:.2f}\n'.format(i + 1, epoch, perp_for_graph))
            loss_for_graph.fill(0)

        if (i + 1) % jump == 0:
            epoch += 1
            now = time.time()
            cur_at += time.time() - now  # skip time of evaluation

            with open(train_log_path, 'a') as fp:
                if epoch >= 6:
                    optimizer.lr /= 1.2
                    fp.write(json.dumps({
                        'type': 'data',
                        'text': 'learning rate = {:.10f}'.format(optimizer.lr),
                    }) + '\n')
                fp.write(json.dumps({
                    'type': 'text',
                    'text': '--- epoch: {} ------------------------'.format(epoch),
                }) + '\n')
            # Save the model and the optimizer
            serializers.save_npz(os.path.join(db_model.trained_model_path,
                                              'model%04d' % epoch), model)
            serializers.save_npz(os.path.join(db_model.trained_model_path,
                                              'rnnlm.state'), optimizer)

        sys.stdout.flush()
    if os.path.exists(os.path.join(db_model.trained_model_path, 'previous_' + initmodel)):
        # delete backup file
        try:
            os.remove(os.path.join(db_model.trained_model_path, 'previous_' + initmodel))
        except Exception as e:
            logger.exception('Could not remove backuped file: {0} {1}'
                             .format(os.path.join(db_model.trained_model_path,
                                                  'previous_' + initmodel), e))
            raise e
    with open(train_log_path, 'a') as fp:
        fp.write(json.dumps({
            'type': 'text',
            'text': '===== finish train. =====',
        }) + '\n')
    db_model.is_trained = 2
    db_model.pid = None
    db_model.gpu = None
    db_model.update_and_commit()
    interruptable.clear_interrupt()
    interruptable.terminate()
    logger.info('Finish LSTM train. model_id: {0}'.format(db_model.id))
Example #25
0
 def create(self):
     if self.dtype == numpy.float16:
         kwargs = {'eps': 1e-6}
     else:
         kwargs = {}
     return optimizers.RMSprop(0.1, **kwargs)
Example #26
0
bprop_len   = args.seq_length
grad_clip   = args.grad_clip

train_data, words, vocab = load_data(args)
pickle.dump(vocab, open('%s/vocab.bin'%args.data_dir, 'wb'))

if len(args.init_from) > 0:
    model = pickle.load(open(args.init_from, 'rb'))
else:
    model = CharRNN(len(vocab), n_units)

if args.gpu >= 0:
    cuda.init()
    model.to_gpu()

optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8)
optimizer.setup(model)

whole_len    = train_data.shape[0]
jump         = whole_len / batchsize
epoch        = 0
start_at     = time.time()
cur_at       = start_at
state        = make_initial_state(n_units, batchsize=batchsize)
if args.gpu >= 0:
    accum_loss   = Variable(cuda.zeros(()))
    for key, value in state.items():
        value.data = cuda.to_gpu(value.data)
else:
    accum_loss = Variable(np.zeros(()).astype(np.float32))
Example #27
0
                                          repeat=False,
                                          shuffle=False)
    test_iter = iterators.SerialIterator(test,
                                         batch_size,
                                         repeat=False,
                                         shuffle=False)

    # model = LeNet5()
    model = CNN()
    model = L.Classifier(model)
    device = -1

    if device >= 0:
        model.to_gpu(device)

    optimizer = optimizers.RMSprop()

    optimizer.setup(model)

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)

    trainer = training.Trainer(updater, (max_epochs, 'epoch'), out=output_file)

    # add extensions to current trainer
    trainer.extend(extensions.LogReport())  # save log fiels automatically
    # automatically serialize the state periodically
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch--{.updater.epoch}'))
    # evaluate models on validation set
def train(
        output_path,
        train_data,
        words,
        vocab,
        fine_tuning=True,
        pretrained_vocab_size=0,
        gpu=-1,
        n_epoch=100,
        rnn_size=128,
        learning_rate=2e-3,
        learning_rate_decay=0.97,
        learning_rate_decay_after=10,
        decay_rate=0.95,
        dropout=0.0,
        bprop_len=50,
        batchsize=50,  # minibatch size
        grad_clip=5  # gradient norm threshold to clip
):

    xp = cuda.cupy if gpu >= 0 else np

    # Prepare model
    lm = Network(len(vocab), rnn_size, dropout_ratio=dropout, train=True)
    model = L.Classifier(lm)
    model.compute_accuracy = False  # we only want the perplexity

    # load pre-trained model
    pretrained_model_path = os.path.join(output_path, 'model.npz')
    if fine_tuning and os.path.exists(pretrained_model_path):
        lm2 = Network(pretrained_vocab_size,
                      rnn_size,
                      dropout_ratio=dropout,
                      train=True)
        model2 = L.Classifier(lm2)
        model2.compute_accuracy = False
        serializers.load_npz(pretrained_model_path, model2)
        copy_model(model2, model)

    # Setup optimizer
    optimizer = optimizers.RMSprop(lr=learning_rate,
                                   alpha=decay_rate,
                                   eps=1e-8)
    optimizer.setup(model)

    if gpu >= 0:
        cuda.get_device(gpu).use()
        model.to_gpu()

    # Learning loop
    whole_len = train_data.shape[0]
    jump = whole_len // batchsize
    cur_log_perp = xp.zeros(())
    epoch = 0
    start_at = time.time()
    cur_at = start_at
    accum_loss = 0
    batch_idxs = list(range(batchsize))
    print 'Goint to train {} iterations'.format(jump * n_epoch)

    for i in six.moves.range(jump * n_epoch):
        x = chainer.Variable(
            xp.asarray(
                [train_data[(jump * j + i) % whole_len] for j in batch_idxs]))
        t = chainer.Variable(
            xp.asarray([
                train_data[(jump * j + i + 1) % whole_len] for j in batch_idxs
            ]))
        loss_i = model(x, t)
        accum_loss += loss_i

        if (i + 1) % bprop_len == 0:  # Run truncated BPTT
            model.zerograds()
            accum_loss.backward()
            accum_loss.unchain_backward()  # truncate
            accum_loss = 0
            optimizer.update()

        if (i + 1) % 500 == 0:
            now = time.time()
            throuput = 10000. / (now - cur_at)
            perp = math.exp(float(cur_log_perp) / 10000)
            print 'epoch {} iter {} training perplexity: {:.2f} ({:.2f} iters/sec)'.format(
                epoch, i + 1, perp, throuput)
            cur_at = now
            cur_log_perp.fill(0)

        if (i + 1) % jump == 0:
            epoch += 1
            now = time.time()
            cur_at += time.time() - now  # skip time of evaluation

            if epoch >= 6:
                optimizer.lr /= 1.2
                print 'learning rate = {:.10f}'.format(optimizer.lr)
            # Save the model and the optimizer
            serializers.save_npz('{}/model.npz'.format(output_path), model)
            serializers.save_npz('{}/rnnlm.state.npz'.format(output_path),
                                 optimizer)

    print '===== finish train. ====='
Example #29
0
    def train(self, x, t, train=True):
        h = self.__call__(x, train=train)
        loss = chainer.functions.mean_squared_error(h, t)
        return loss


print "Loading dataset"
X, y = loadDataset()
X, y = X.astype(np.float32), y.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1337)
y_train.shape, y_test.shape = (y_train.shape[0], 1), (y_test.shape[0], 1)

alex = Alex()
optimizer = optimizers.RMSprop(lr=0.001, alpha=0.9)
# optimizer = optimizers.RMSprop()
optimizer.setup(alex)
for i in range(150):
    optimizer.zero_grads()
    alex.zerograds()
    loss = alex.train(chainer.Variable(X_train), chainer.Variable(y_train))
    eval_loss = F.mean_squared_error(alex(chainer.Variable(X_test)),
                                     chainer.Variable(y_test))
    print "epoch: %d, eval, train loss: %f, eval loss: %f" % (i, loss.data,
                                                              eval_loss.data)
    loss.backward()
    optimizer.clip_grads(2.0)
    optimizer.update()
Example #30
0
 def create(self):
     return optimizers.RMSprop(0.1)