def train(self, nz=100, batch_size=64, epochs=10000, gpu=-1, **kwargs): # CIFAR-10 images in range [-1, 1] (tanh generator outputs) train, _ = datasets.get_cifar10(withlabel=False, ndim=3, scale=2) train -= 1.0 train_iter = chainer.iterators.SerialIterator(train, batch_size) z_iter = RandomNoiseIterator(GaussianNoiseGenerator(0, 1, nz), batch_size) optimizer_generator = optimizers.RMSprop(lr=0.00005) optimizer_critic = optimizers.RMSprop(lr=0.00005) optimizer_generator.setup(self.generator) optimizer_critic.setup(self.critic) updater = WassersteinGANUpdater( iterator=train_iter, noise_iterator=z_iter, optimizer_generator=optimizer_generator, optimizer_critic=optimizer_critic, device=gpu) trainer = training.Trainer(updater, stop_trigger=(epochs, 'epoch'), out=self.model_path) trainer.extend(training.extensions.ProgressBar()) trainer.extend(training.extensions.LogReport(trigger=(1, 'iteration'))) trainer.extend(GeneratorSample(), trigger=(1, 'epoch')) trainer.extend( training.extensions.PrintReport([ 'epoch', 'iteration', 'critic/loss', 'critic/loss/real', 'critic/loss/fake', 'generator/loss' ])) trainer.run()
def train(args): nz = args.nz batch_size = args.batch_size epochs = args.epochs gpu = args.gpu # CIFAR-10 images in range [-1, 1] (tanh generator outputs) train, _ = datasets.get_cifar10(withlabel=True, ndim=3, scale=2) train_iter = iterators.SerialIterator(train, batch_size) z_iter = RandomNoiseIterator(GaussianNoiseGenerator(0, 1, args.nz), batch_size) optimizer_generator = optimizers.RMSprop(lr=0.00005) optimizer_critic = optimizers.RMSprop(lr=0.00005) generator = Generator() optimizer_generator.setup(generator) optimizer_critic.setup(Critic()) updater = WassersteinGANUpdater(iterator=train_iter, noise_iterator=z_iter, optimizer_generator=optimizer_generator, optimizer_critic=optimizer_critic, device=gpu) trainer = training.Trainer(updater, stop_trigger=(epochs, 'epoch'), out=args.out) trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.LogReport(trigger=(10, 'iteration'))) trainer.extend(GeneratorSample(), trigger=(1, 'epoch')) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'critic/loss', 'critic/loss/real', 'critic/loss/fake', 'generator/loss' ])) # Take a snapshot at each epoch trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object(generator, 'model_epoch_{.updater.epoch}'), trigger=(1, 'epoch')) if args.resume: # Resume from a snapshot serializers.load_npz(args.resume, trainer) trainer.run()
def setup_optimizer(self, optimizer_name, gradient_clipping=3, weight_decay=0.00001, **kwargs): # set optimizer if optimizer_name == "Adam": self.opt = optimizers.Adam(**kwargs) elif optimizer_name == "AdaDelta": self.opt = optimizers.AdaDelta(**kwargs) elif optimizer_name == "AdaGrad": self.opt = optimizers.AdaGrad(**kwargs) elif optimizer_name == "RMSprop": self.opt = optimizers.RMSprop(**kwargs) elif optimizer_name == "RMSpropGraves": self.opt = optimizers.RMSpropGraves(**kwargs) elif optimizer_name == "SGD": self.opt = optimizers.SGD(**kwargs) elif optimizer_name == "MomentumSGD": self.opt = optimizers.MomentumSGD(**kwargs) # self.opt.use_cleargrads() self.opt.setup(self) self.opt.add_hook(optimizer.GradientClipping(gradient_clipping)) self.opt.add_hook(optimizer.WeightDecay(weight_decay)) self.opt_params = { "optimizer_name": optimizer_name, "gradient_clipping": gradient_clipping, "weight_decay": weight_decay }
def optimizer(opt_str): """ 入力文字列からオプティマイザを推定する """ if (opt_str.lower() == 'adam'): opt = O.Adam(amsgrad=True) elif (opt_str.lower() == 'ada_d'): opt = O.AdaDelta() elif (opt_str.lower() == 'ada_g'): opt = O.AdaGrad() elif (opt_str.lower() == 'm_sgd'): opt = O.MomentumSGD() elif (opt_str.lower() == 'n_ag'): opt = O.NesterovAG() elif (opt_str.lower() == 'rmsp'): opt = O.RMSprop() elif (opt_str.lower() == 'rmsp_g'): opt = O.RMSpropGraves() elif (opt_str.lower() == 'sgd'): opt = O.SGD() elif (opt_str.lower() == 'smorms'): opt = O.SMORMS3() else: opt = O.Adam(amsgrad=True) logger.warning('{}->{}'.format(opt_str, opt.__doc__.split('.')[0])) logger.debug('Optimizer: {}'.format(opt.__doc__.split('.')[0])) return opt
def optimizer(opt_str): """ 入力文字列からオプティマイザを推定する """ if(opt_str.lower() == 'adam'): opt = O.Adam(amsgrad=True) elif(opt_str.lower() == 'ada_d'): opt = O.AdaDelta() elif(opt_str.lower() == 'ada_g'): opt = O.AdaGrad() elif(opt_str.lower() == 'm_sgd'): opt = O.MomentumSGD() elif(opt_str.lower() == 'n_ag'): opt = O.NesterovAG() elif(opt_str.lower() == 'rmsp'): opt = O.RMSprop() elif(opt_str.lower() == 'rmsp_g'): opt = O.RMSpropGraves() elif(opt_str.lower() == 'sgd'): opt = O.SGD() elif(opt_str.lower() == 'smorms'): opt = O.SMORMS3() else: opt = O.Adam(amsgrad=True) print('\n[Warning] {0}\n\t{1}->{2}\n'.format( fileFuncLine(), opt_str, opt.__doc__.split('.')[0]) ) print('Optimizer:', opt.__doc__.split('.')[0]) return opt
def get_optimizer(model, opt, lr=None, adam_alpha=None, adam_beta1=None, adam_beta2=None, adam_eps=None, weight_decay=None): if opt == 'MomentumSGD': optimizer = optimizers.MomentumSGD(lr=lr, momentum=0.9) elif opt == 'Adam': optimizer = optimizers.Adam(alpha=adam_alpha, beta1=adam_beta1, beta2=adam_beta2, eps=adam_eps) elif opt == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=lr) elif opt == 'RMSprop': optimizer = optimizers.RMSprop(lr=lr) else: raise Exception('No optimizer is selected') # The first model as the master model optimizer.setup(model) if opt == 'MomentumSGD': optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay)) return optimizer
def __init__(self, model=None, lr=0.045, decay=0.9, eps=1.0, weight_decay=4.0e-5, clip=2.0): super(OptimizerGooglenetV3, self).__init__(model) optimizer = optimizers.RMSprop(lr, decay, eps) weight_decay = chainer.optimizer.WeightDecay(weight_decay) clip = chainer.optimizer.GradientClipping(clip) optimizer.setup(self.model) optimizer.add_hook(weight_decay) optimizer.add_hook(clip) self.optimizer = optimizer
def init_optimizer(self, clip, decay, lr=0.001, alpha=0.9, eps=1e-6): self.optimizer = optimizers.RMSprop(lr=lr, alpha=alpha, eps=eps) self.optimizer.setup(self.model) # Clip Gradient self.optimizer.add_hook(chainer.optimizer.GradientClipping(clip)) # L2 Regularization self.optimizer.add_hook(chainer.optimizer.WeightDecay(decay))
def test_share_states(self): model = L.Linear(2, 2) opt_a = optimizers.RMSprop() opt_a.setup(model) arrays = async .share_states_as_shared_arrays(opt_a) opt_b = optimizers.RMSprop() opt_b.setup(model) opt_c = optimizers.RMSprop() opt_c.setup(model) def assert_different_pointers(a, b): self.assertTrue(a) for param_name in a: self.assertTrue(a[param_name]) for state_name in a[param_name]: self.assertTrue( isinstance(a[param_name][state_name], np.ndarray)) self.assertTrue( isinstance(b[param_name][state_name], np.ndarray)) self.assertNotEqual(a[param_name][state_name].ctypes.data, b[param_name][state_name].ctypes.data) assert_different_pointers(opt_a._states, opt_b._states) assert_different_pointers(opt_a._states, opt_c._states) async .set_shared_states(opt_b, arrays) async .set_shared_states(opt_c, arrays) def assert_same_pointers(a, b): self.assertTrue(a) for param_name in a: self.assertTrue(a[param_name]) for state_name in a[param_name]: self.assertTrue( isinstance(a[param_name][state_name], np.ndarray)) self.assertTrue( isinstance(b[param_name][state_name], np.ndarray)) self.assertEqual(a[param_name][state_name].ctypes.data, b[param_name][state_name].ctypes.data) assert_same_pointers(opt_a._states, opt_b._states) assert_same_pointers(opt_a._states, opt_c._states)
def test_share_states(self): model = L.Linear(2, 2) opt_a = optimizers.RMSprop() opt_a.setup(model) arrays = async_.share_states_as_shared_arrays(opt_a) opt_b = optimizers.RMSprop() opt_b.setup(copy.deepcopy(model)) # In Chainer v2, a model cannot be set up by two optimizers or more. opt_c = optimizers.RMSprop() opt_c.setup(copy.deepcopy(model)) """ Removed the tests by assert_different_pointers since they are trivial now. """ async_.set_shared_states(opt_b, arrays) async_.set_shared_states(opt_c, arrays) def assert_same_pointers(a, b): a = a.target b = b.target for param_name, param_a in a.namedparams(): param_b = dict(b.namedparams())[param_name] state_a = param_a.update_rule.state state_b = param_b.update_rule.state self.assertTrue(state_a) self.assertTrue(state_b) for state_name, state_val_a in state_a.items(): state_val_b = state_b[state_name] self.assertTrue(isinstance( state_val_a, np.ndarray)) self.assertTrue(isinstance( state_val_b, np.ndarray)) self.assertEqual(state_val_a.ctypes.data, state_val_b.ctypes.data) assert_same_pointers(opt_a, opt_b) assert_same_pointers(opt_a, opt_c)
def get_optimizer(name, lr, momentum): if name == "sgd": return optimizers.SGD(lr=lr) if name == "msgd": return optimizers.MomentumSGD(lr=lr, momentum=momentum) if name == "nesterov": return optimizers.NesterovAG(lr=lr, momentum=momentum) if name == "adam": return optimizers.Adam(alpha=lr, beta1=momentum) if name == "rmsprop": return optimizers.RMSprop(lr=lr, alpha=momentum) raise NotImplementedError()
def init_optimizer(self): if self.optimizer == 'SGD': self.optimizer = optimizers.MomentumSGD(lr=self.learning_rate, momentum=self.momentum) elif self.optimizer == 'AdaDelta': self.optimizer = optimizers.AdaDelta() elif self.optimizer == 'AdaGrad': self.optimizer = optimizers.AdaGrad() elif self.optimizer == 'Adam': self.optimizer = optimizers.Adam() elif self.optimizer == 'RMSprop': self.optimizer = optimizers.RMSprop()
def cross_optimizers(opt): if opt == 'SGD': optimizer = optimizers.SGD() elif opt == 'MomentumSGD': optimizer = optimizers.MomentumSGD() elif opt == 'AdaGrad': optimizer = optimizers.AdaGrad() elif opt == 'RMSprop': optimizer = optimizers.RMSprop() elif opt == 'AdaDelta': optimizer = optimizers.AdaDelta() elif opt == 'Adam': optimizer = optimizers.Adam() return copy.deepcopy(optimizer)
def _build_optimizer(self, trial, model): # option of optimizer funciton optimizer_name = trial.suggest_categorical( 'optimizer', ['Adam', "AdaDelta", 'RMSProp']) if optimizer_name == 'Adam': adam_alpha = trial.suggest_loguniform('adam_alpha', 1e-5, 1e-1) optimizer = optimizers.Adam(alpha=adam_alpha) elif optimizer_name == "AdaDelta": optimizer = optimizers.AdaDelta() elif optimizer_name == "RMSprop": optimizer = optimizers.RMSprop() weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay)) return optimizer
def select_optimizer(name): if name == "AdaGrad": optimizer = optimizers.AdaGrad(lr=0.001) elif name == "Adam": optimizer = chainer.optimizers.Adam(alpha=0.0001) elif name == "MomentumSGD": optimizer = optimizers.MomentumSGD(lr=0.01) elif name == "RMSprop": optimizer = optimizers.RMSprop(lr=0.01) elif name == "SGD": optimizer = optimizers.SGD(lr=0.01) elif name == "AdaDelta": optimizer = optimizers.AdaDelta(rho=0.9) else: raise Exception("Unknown network optimizer: " + args.optimizer) return optimizer
def get_optimizer(self, name, lr, momentum=0.9): if name.lower() == "adam": return optimizers.Adam(alpha=lr, beta1=momentum) if name.lower() == "smorms3": return optimizers.SMORMS3(lr=lr) if name.lower() == "adagrad": return optimizers.AdaGrad(lr=lr) if name.lower() == "adadelta": return optimizers.AdaDelta(rho=momentum) if name.lower() == "nesterov" or name.lower() == "nesterovag": return optimizers.NesterovAG(lr=lr, momentum=momentum) if name.lower() == "rmsprop": return optimizers.RMSprop(lr=lr, alpha=momentum) if name.lower() == "momentumsgd": return optimizers.MomentumSGD(lr=lr, mommentum=mommentum) if name.lower() == "sgd": return optimizers.SGD(lr=lr)
def get_optimizer(name): """ :type name: str :rtype: chainer.Optimizer """ if name == "adadelta": opt = optimizers.AdaDelta() elif name == "adagrad": opt = optimizers.AdaGrad() elif name == "adam": opt = optimizers.Adam() elif name == "rmsprop": opt = optimizers.RMSprop() elif name == "smorms3": opt = optimizers.SMORMS3() else: raise ValueError("Unknown optimizer_name=%s" % name) return opt
def select_optimizer(opt_name, learning_rate): if opt_name == "Adam": return optimizers.Adam(alpha=learning_rate) elif opt_name == "SGD": return optimizers.SGD(lr=learning_rate) elif opt_name == "RMSpropGraves": return optimizers.RMSpropGraves(lr=learning_rate) elif opt_name == "RMSprop": return optimizers.RMSprop(lr=learning_rate) elif opt_name == "AdaDelta": return optimizers.AdaDelta() elif opt_name == "AdaGrad": return optimizers.AdaGrad(lr=learning_rate) elif opt_name == "MomentumSGD": return optimizers.MomentumSGD(lr=learning_rate) elif opt_name == "NesterovAG": return optimizers.NesterovAG(lr=learning_rate) else: print('please select correct optimizer') exit()
def setOptimizer(model, method, params): learningRate = params['learningRate'] if ( params.has_key('learningRate')) else 0.001 alpha = params['alpha'] if (params.has_key('alpha')) else 0.001 if (method == 'adam'): optimizer = optimizers.Adam(alpha=alpha) elif (method == 'smorms3'): optimizer = optimizers.SMORMS3(lr=learningRate) elif (method == 'rmsprop'): optimizer = optimizers.RMSprop(lr=learningRate) elif (method == 'sgd'): optimizer = optimizers.SGD(lr=learningRate) elif (method == 'momentum'): optimizer = optimizers.MomentumSGD(lr=learningRate) elif (method == 'adagrad'): optimizer = optimizers.AdaGrad(lr=learningRate) elif (method == 'adadelta'): optimizer = optimizers.AdaDelta() optimizer.setup(model) return optimizer
def train(max_epoch, train_size, valid_size): model = RNN() # train に1000サンプル、 testに1000サンプル使用 x_train, x_test, y_train, y_test = dataset(train_size + valid_size, train_size) optimizer = optimizers.RMSprop(lr=0.03) optimizer.setup(model) early_stopping = 20 min_valid_loss = 1e8 min_epoch = 0 train_loss, valid_loss = [], [] for epoch in range(1, max_epoch): _y = model(x_test) y = _y.data y = np.array([1 - y, y], dtype='f').T[0] accuracy = F.accuracy(y, y_test.data.flatten()).data _train_loss = F.sigmoid_cross_entropy(model(x_train), y_train).data _valid_loss = F.sigmoid_cross_entropy(_y, y_test).data train_loss.append(_train_loss) valid_loss.append(_valid_loss) # valid_lossが20回連続で更新されなかった時点で学習を終了 if min_valid_loss >= _valid_loss: min_valid_loss = _valid_loss min_epoch = epoch elif epoch - min_epoch >= early_stopping: break optimizer.update(forward, x_train, y_train, model) print('epoch: {} acc: {} loss: {} valid_loss: {}'.format(epoch, accuracy, _train_loss, _valid_loss)) loss_plot(train_loss, valid_loss) serializers.save_npz('model.npz', model)
def set_params(self, params): self.gpu = params.get('gpu', False) self.learning_rate = params.get('learning_rate', 0.00025) self.decay_rate = params.get('decay_rate', 0.95) self.discount = params.get('discount', 0.95) self.clip_err = params.get('clip_err', False) self.target_net_update = params.get('target_net_update', 10000) self.double_DQN = params.get('double_DQN', False) # setting up various possible gradient update algorithms opt = params.get('optim_name', 'ADAM') if opt == 'RMSprop': self.optimizer = optimizers.RMSprop(lr=self.learning_rate, alpha=self.decay_rate) elif opt == 'ADADELTA': print( "Supplied learning rate not used with ADADELTA gradient update method" ) self.optimizer = optimizers.AdaDelta() elif opt == 'ADAM': self.optimizer = optimizers.Adam(alpha=self.learning_rate) elif opt == 'SGD': self.optimizer = optimizers.SGD(lr=self.learning_rate) else: print('The requested optimizer is not supported!!!') exit() if self.clip_err is not False: self.optimizer.add_hook( chainer.optimizer.GradientClipping(self.clip_err)) self.optim_name = params['optim_name']
def main(): parser = argparse.ArgumentParser(description='Chainer: WGAN MNIST') parser.add_argument('--batchsize', '-b', type=int, default=50, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--n_hidden', '-n', type=int, default=100, help='Number of hidden dim of units (z)') parser.add_argument('--snapshot_interval', type=int, default=1000, help='Interval of snapshot') parser.add_argument('--seed', type=int, default=0, help='Random seed of z at visualization stage') parser.add_argument('--display_interval', type=int, default=100, help='Interval of displaying log to console') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# n_hidden: {}'.format(args.n_hidden)) print('# epoch: {}'.format(args.epoch)) print('') train, _ = datasets.get_mnist(withlabel=False, ndim=3, scale=1.) # ndim=3 : (ch,width,height) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) #z_iter = RandomNoiseIterator(GaussianNoiseGenerator(0, 1, args.n_hidden), args.batchsize) z_iter = RandomNoiseIterator(UniformNoiseGenerator(-1, 1, args.n_hidden), args.batchsize) # make the model gen = Generator(n_hidden=args.n_hidden) critic = Critic() if args.gpu >= 0: # make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() gen.to_gpu() # copy the model to the GPU critic.to_gpu() # make the optimizer optimizer_generator = optimizers.RMSprop(lr=0.00005) optimizer_critic = optimizers.RMSprop(lr=0.00005) optimizer_generator.setup(gen) optimizer_critic.setup(critic) updater = WGANUpdater(iterator=train_iter, noise_iterator=z_iter, optimizer_generator=optimizer_generator, optimizer_critic=optimizer_critic, device=args.gpu) epoch_interval = (10, 'epoch') display_interval = (10, 'iteration') trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=epoch_interval) trainer.extend(extensions.snapshot_object( gen, 'gen_epoch_{.updater.epoch}.npz'), trigger=epoch_interval) trainer.extend(extensions.snapshot_object( critic, 'critic_epoch_{.updater.epoch}.npz'), trigger=epoch_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'critic/loss', 'critic/loss/real', 'critic/loss/fake', 'generator/loss' ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(out_generated_image(gen, 10, 10, args.seed, args.out, args.n_hidden), trigger=epoch_interval) trainer.run()
def create(self): kwargs = {'eps_inside_sqrt': self.eps_inside_sqrt} if self.dtype == numpy.float16: kwargs['eps'] = 1e-6 return optimizers.RMSprop(0.1, **kwargs)
def do_train( db_model, root_output_dir, filename, vocabulary, use_mecab=False, initmodel=None, resume=False, rnn_size=128, learning_rate=2e-3, learning_rate_decay=0.97, learning_rate_decay_after=10, decay_rate=0.95, dropout=0.0, seq_length=50, batchsize=50, # minibatch size grad_clip=5, # gradient norm threshold to clip interruptable=None ): logger.info('Start LSTM training. model_id: {0}, use_mecab: {1}, initmodel: {2}, gpu: {3}' .format(db_model.id, use_mecab, initmodel, db_model.gpu)) n_epoch = db_model.epoch bprop_len = seq_length # length of truncated BPTT grad_clip = grad_clip (model_dir, model_name) = os.path.split(db_model.network_path) model_name = re.sub(r"\.py$", "", model_name) model_module = load_module(model_dir, model_name) if db_model.trained_model_path is None: db_model.trained_model_path = os.path.join(root_output_dir, model_name) if not os.path.exists(db_model.trained_model_path): os.mkdir(db_model.trained_model_path) vocab = {} vocab_size = 0 if vocabulary != '': vocab = pickle.load(open(vocabulary, 'rb')) vocab_size = len(vocab) if db_model.gpu >= 0: cuda.check_cuda_available() xp = cuda.cupy if db_model.gpu >= 0 else np train_data, words, vocab = load_data(filename, use_mecab, vocab) pickle.dump(vocab, open('%s/vocab2.bin' % db_model.trained_model_path, 'wb')) # Prepare model lm = model_module.Network(len(vocab), rnn_size, dropout_ratio=dropout, train=False) model = L.Classifier(lm) model.compute_accuracy = False # we only want the perplexity for param in model.params(): data = param.data data[:] = np.random.uniform(-0.1, 0.1, data.shape) # Setup optimizer optimizer = optimizers.RMSprop(lr=learning_rate, alpha=decay_rate, eps=1e-8) optimizer.setup(model) # Load pretrained model if initmodel is not None and initmodel.find("model") > -1: if vocabulary == '': logger.info("Load model from : " + db_model.trained_model_path + os.sep + initmodel) serializers.load_npz(os.path.join(db_model.trained_model_path, initmodel), model) else: lm2 = model_module.Network(vocab_size, rnn_size, dropout_ratio=dropout, train=False) model2 = L.Classifier(lm2) model2.compute_accuracy = False # we only want the perplexity logger.info("Load model from : " + db_model.trained_model_path + os.sep + initmodel) serializers.load_npz(os.path.join(db_model.trained_model_path, initmodel), model2) copy_model(model2, model) # delete old models try: shutil.copyfile(os.path.join(db_model.trained_model_path, initmodel), os.path.join(db_model.trained_model_path, 'previous_' + initmodel)) except Exception as e: logger.exception('Could not copy {0} to {1}. {2}' .format(os.path.join(db_model.trained_model_path, initmodel), os.path.join(db_model.trained_model_path, 'previous_' + initmodel), e)) raise e pretrained_models = sorted(os.listdir(db_model.trained_model_path), reverse=True) for m in pretrained_models: if m.startswith('model') and initmodel != m: try: os.remove(os.path.join(db_model.trained_model_path, m)) except Exception as e: logger.exception('Could not remove old trained model: {0} {1}' .format(os.path.join(db_model.trained_model_path, m), e)) raise e if db_model.gpu >= 0: cuda.get_device(db_model.gpu).use() model.to_gpu() # Load pretrained optimizer resume_path = os.path.join(db_model.trained_model_path, 'resume') if resume: logger.info("Load optimizer state from : {}".format(os.path.join(db_model.trained_model_path, 'resume.state'))) serializers.load_npz(os.path.join(resume_path, 'resume.model'), model) serializers.load_npz(os.path.join(resume_path, 'resume.state'), optimizer) db_model.is_trained = 1 db_model.update_and_commit() # Learning loop whole_len = train_data.shape[0] jump = whole_len // batchsize if resume: resume_data = json.load(open(os.path.join(resume_path, 'resume.json'))) initmodel = resume_data['initmodel'] cur_log_perp = xp.zeros(()) cur_log_perp += resume_data['cur_log_perp'] loss_for_graph = xp.zeros(()) loss_for_graph += resume_data['loss_for_graph'] iteration_from = resume_data['i'] epoch = (iteration_from + 1) / jump else: cur_log_perp = xp.zeros(()) loss_for_graph = xp.zeros(()) iteration_from = 0 epoch = 0 start_at = time.time() cur_at = start_at accum_loss = 0 batch_idxs = list(range(batchsize)) graph_tsv_path = os.path.join(db_model.trained_model_path, 'line_graph.tsv') train_log_path = os.path.join(db_model.trained_model_path, 'train.log') if not resume: with open(graph_tsv_path, 'a') as fp: fp.write('count\tepoch\tperplexity\n') with open(train_log_path, 'a') as fp: fp.write(json.dumps({ 'type': 'text', 'text': "going to train {} iterations".format(jump * n_epoch) }) + '\n') # delete layer visualization cache # trained_model_pathに存在する全てのディレクトリを削除している。 for f in os.listdir(db_model.trained_model_path): if os.path.isdir(os.path.join(db_model.trained_model_path, f)): try: shutil.rmtree(os.path.join(db_model.trained_model_path, f)) except Exception as e: logger.exception('Could not remove visualization cache: {0} {1}' .format(os.path.join(db_model.trained_model_path, f), e)) raise e # ので、↓のresumeファイルの削除は不要 # remove_resume_file(db_model.trained_model_path) for i in six.moves.range(iteration_from, jump * n_epoch): # 1バッチが終わったタイミングを意図している。 if interruptable.is_interrupting() and isinstance(accum_loss, int): os.mkdir(resume_path) serializers.save_npz(os.path.join(resume_path, 'resume.state'), optimizer) serializers.save_npz(os.path.join(resume_path, 'resume.model'), model) json.dump({ 'i': i, 'initmodel': initmodel, 'cur_log_perp': float(cur_log_perp), 'loss_for_graph': float(loss_for_graph), 'epoch': epoch }, open(os.path.join(resume_path, 'resume.json'), 'w')) interruptable.set_interruptable() while True: time.sleep(1) x = chainer.Variable( xp.asarray([train_data[(jump * j + i) % whole_len] for j in batch_idxs])) t = chainer.Variable( xp.asarray([train_data[(jump * j + i + 1) % whole_len] for j in batch_idxs])) loss_i = model(x, t) accum_loss += loss_i loss_for_graph += loss_i.data cur_log_perp += loss_i.data if (i + 1) % bprop_len == 0: # Run truncated BPTT model.zerograds() accum_loss.backward() accum_loss.unchain_backward() # truncate accum_loss = 0 optimizer.update() if (i + 1) % 100 == 0: now = time.time() throuput = 10000. / (now - cur_at) perp = math.exp(float(cur_log_perp) / 10000) with open(train_log_path, 'a') as fp: fp.write(json.dumps({ 'type': 'log', 'log': 'iter {} training perplexity: {:.2f} ({:.2f} iters/sec)'.format(i + 1, perp, throuput), 'time_stamp': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'epoch': epoch }) + '\n') cur_at = now cur_log_perp.fill(0) if (i + 1) % 100 == 0: perp_for_graph = math.exp(float(loss_for_graph) / 100) with open(graph_tsv_path, 'a') as fp: fp.write('{}\t{}\t{:.2f}\n'.format(i + 1, epoch, perp_for_graph)) loss_for_graph.fill(0) if (i + 1) % jump == 0: epoch += 1 now = time.time() cur_at += time.time() - now # skip time of evaluation with open(train_log_path, 'a') as fp: if epoch >= 6: optimizer.lr /= 1.2 fp.write(json.dumps({ 'type': 'data', 'text': 'learning rate = {:.10f}'.format(optimizer.lr), }) + '\n') fp.write(json.dumps({ 'type': 'text', 'text': '--- epoch: {} ------------------------'.format(epoch), }) + '\n') # Save the model and the optimizer serializers.save_npz(os.path.join(db_model.trained_model_path, 'model%04d' % epoch), model) serializers.save_npz(os.path.join(db_model.trained_model_path, 'rnnlm.state'), optimizer) sys.stdout.flush() if os.path.exists(os.path.join(db_model.trained_model_path, 'previous_' + initmodel)): # delete backup file try: os.remove(os.path.join(db_model.trained_model_path, 'previous_' + initmodel)) except Exception as e: logger.exception('Could not remove backuped file: {0} {1}' .format(os.path.join(db_model.trained_model_path, 'previous_' + initmodel), e)) raise e with open(train_log_path, 'a') as fp: fp.write(json.dumps({ 'type': 'text', 'text': '===== finish train. =====', }) + '\n') db_model.is_trained = 2 db_model.pid = None db_model.gpu = None db_model.update_and_commit() interruptable.clear_interrupt() interruptable.terminate() logger.info('Finish LSTM train. model_id: {0}'.format(db_model.id))
def create(self): if self.dtype == numpy.float16: kwargs = {'eps': 1e-6} else: kwargs = {} return optimizers.RMSprop(0.1, **kwargs)
bprop_len = args.seq_length grad_clip = args.grad_clip train_data, words, vocab = load_data(args) pickle.dump(vocab, open('%s/vocab.bin'%args.data_dir, 'wb')) if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.init() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model) whole_len = train_data.shape[0] jump = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(np.zeros(()).astype(np.float32))
repeat=False, shuffle=False) test_iter = iterators.SerialIterator(test, batch_size, repeat=False, shuffle=False) # model = LeNet5() model = CNN() model = L.Classifier(model) device = -1 if device >= 0: model.to_gpu(device) optimizer = optimizers.RMSprop() optimizer.setup(model) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (max_epochs, 'epoch'), out=output_file) # add extensions to current trainer trainer.extend(extensions.LogReport()) # save log fiels automatically # automatically serialize the state periodically trainer.extend( extensions.snapshot(filename='snapshot_epoch--{.updater.epoch}')) # evaluate models on validation set
def train( output_path, train_data, words, vocab, fine_tuning=True, pretrained_vocab_size=0, gpu=-1, n_epoch=100, rnn_size=128, learning_rate=2e-3, learning_rate_decay=0.97, learning_rate_decay_after=10, decay_rate=0.95, dropout=0.0, bprop_len=50, batchsize=50, # minibatch size grad_clip=5 # gradient norm threshold to clip ): xp = cuda.cupy if gpu >= 0 else np # Prepare model lm = Network(len(vocab), rnn_size, dropout_ratio=dropout, train=True) model = L.Classifier(lm) model.compute_accuracy = False # we only want the perplexity # load pre-trained model pretrained_model_path = os.path.join(output_path, 'model.npz') if fine_tuning and os.path.exists(pretrained_model_path): lm2 = Network(pretrained_vocab_size, rnn_size, dropout_ratio=dropout, train=True) model2 = L.Classifier(lm2) model2.compute_accuracy = False serializers.load_npz(pretrained_model_path, model2) copy_model(model2, model) # Setup optimizer optimizer = optimizers.RMSprop(lr=learning_rate, alpha=decay_rate, eps=1e-8) optimizer.setup(model) if gpu >= 0: cuda.get_device(gpu).use() model.to_gpu() # Learning loop whole_len = train_data.shape[0] jump = whole_len // batchsize cur_log_perp = xp.zeros(()) epoch = 0 start_at = time.time() cur_at = start_at accum_loss = 0 batch_idxs = list(range(batchsize)) print 'Goint to train {} iterations'.format(jump * n_epoch) for i in six.moves.range(jump * n_epoch): x = chainer.Variable( xp.asarray( [train_data[(jump * j + i) % whole_len] for j in batch_idxs])) t = chainer.Variable( xp.asarray([ train_data[(jump * j + i + 1) % whole_len] for j in batch_idxs ])) loss_i = model(x, t) accum_loss += loss_i if (i + 1) % bprop_len == 0: # Run truncated BPTT model.zerograds() accum_loss.backward() accum_loss.unchain_backward() # truncate accum_loss = 0 optimizer.update() if (i + 1) % 500 == 0: now = time.time() throuput = 10000. / (now - cur_at) perp = math.exp(float(cur_log_perp) / 10000) print 'epoch {} iter {} training perplexity: {:.2f} ({:.2f} iters/sec)'.format( epoch, i + 1, perp, throuput) cur_at = now cur_log_perp.fill(0) if (i + 1) % jump == 0: epoch += 1 now = time.time() cur_at += time.time() - now # skip time of evaluation if epoch >= 6: optimizer.lr /= 1.2 print 'learning rate = {:.10f}'.format(optimizer.lr) # Save the model and the optimizer serializers.save_npz('{}/model.npz'.format(output_path), model) serializers.save_npz('{}/rnnlm.state.npz'.format(output_path), optimizer) print '===== finish train. ====='
def train(self, x, t, train=True): h = self.__call__(x, train=train) loss = chainer.functions.mean_squared_error(h, t) return loss print "Loading dataset" X, y = loadDataset() X, y = X.astype(np.float32), y.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1337) y_train.shape, y_test.shape = (y_train.shape[0], 1), (y_test.shape[0], 1) alex = Alex() optimizer = optimizers.RMSprop(lr=0.001, alpha=0.9) # optimizer = optimizers.RMSprop() optimizer.setup(alex) for i in range(150): optimizer.zero_grads() alex.zerograds() loss = alex.train(chainer.Variable(X_train), chainer.Variable(y_train)) eval_loss = F.mean_squared_error(alex(chainer.Variable(X_test)), chainer.Variable(y_test)) print "epoch: %d, eval, train loss: %f, eval loss: %f" % (i, loss.data, eval_loss.data) loss.backward() optimizer.clip_grads(2.0) optimizer.update()
def create(self): return optimizers.RMSprop(0.1)