def get_models(): # tanh + backward rnn1 = Single_layer_RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, output_size=OUTPUT_SIZE) optim1 = Adam() # tanh + backward_truncate rnn2 = Single_layer_RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, output_size=OUTPUT_SIZE, bptt_truncate=BPTT_TRUNCATE) optim2 = Adam() # relu + backward_truncate rnn3 = Single_layer_RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, output_size=OUTPUT_SIZE, bptt_truncate=BPTT_TRUNCATE, activation_func='relu') optim3 = Adam() labels = [ 'model1: tanh + backward', 'model2: tanh + backward_truncate', 'model3: relu + backward' ] rnns = [rnn1, rnn2, rnn3] optims = [optim1, optim2, optim3] return labels, rnns, optims
def setup_actor_optimizer(self): logger.info('setting up actor optimizer') self.actor_loss = -tf.reduce_mean(self.critic_with_actor_tf) actor_shapes = [var.get_shape().as_list() for var in self.actor.trainable_vars] actor_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in actor_shapes]) logger.info(' actor shapes: {}'.format(actor_shapes)) logger.info(' actor params: {}'.format(actor_nb_params)) self.actor_grads = U.flatgrad(self.actor_loss, self.actor.trainable_vars, clip_norm=self.clip_norm) self.actor_optimizer = Adam(var_list=self.actor.trainable_vars, beta1=0.9, beta2=0.999, epsilon=1e-08)
def train_eval(x_train, x_test, is_peeky): if is_peeky: model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size) else: model = Seq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list.append(acc) print('val acc %.3f%%' % (acc * 100)) return acc_list
def main(): # ハイパーパラメータの設定 window_size = 5 hidden_size = 100 batch_size = 100 max_epoch = 10 # データの読み込み corpus, word_to_id, id_to_word = ptb.load_data('train') vocab_size = len(word_to_id) contexts, target = create_contexts_target(corpus, window_size) # モデルなどの生成 model = CBOW(vocab_size, hidden_size, window_size, corpus) optimizer = Adam() trainer = Trainer(model, optimizer) # 学習開始 trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot() # 後ほど利用できるように、必要なデータを保存 word_vecs = model.word_vecs params = {} params['word_vecs'] = word_vecs.astype(np.float16) params['word_to_id'] = word_to_id params['id_to_word'] = id_to_word pkl_file = 'cbow_params.pkl' with open(pkl_file, 'wb') as f: pickle.dump(params, f, -1)
def train(network, x_train, y_train, x_test, y_test, iter_times=10000, hidden_size=10, batch_size=100, lr=0.1): nn = network optimizers = { 'SGD': SGD(lr), 'Momentum': Momentum(lr), 'Nesterov': Nesterov(lr), 'AdaGrad': AdaGrad(lr), 'RMSProp': RMSProp(0.02), # lr == 0.1 may make loss += ln(eps), eps == 1e-15 'Adam': Adam(0.005) } opt = optimizers['Adam'] for i in range(iter_times): if i % max(x_train.shape[0] // batch_size, 1) == 0: print('{:.1%}'.format(i / iter_times)) batch_mask = np.random.choice(x_train.shape[0], batch_size) x_batch, y_batch = x_train[batch_mask], y_train[batch_mask] grads = nn.grad(x_batch, y_batch) opt.update(nn.params, grads) print('Train acc: {:.4} Test acc: {:.4}'.format( nn.accuracy(x_train, y_train), nn.accuracy(x_test, y_test)))
def setup_critic_optimizer(self): logger.info('setting up critic optimizer') normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms), self.return_range[0], self.return_range[1]) self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf)) if self.critic_l2_reg > 0.: critic_reg_vars = [var for var in self.critic.trainable_vars if 'kernel' in var.name and 'output' not in var.name] for var in critic_reg_vars: logger.info(' regularizing: {}'.format(var.name)) logger.info(' applying l2 regularization with {}'.format(self.critic_l2_reg)) critic_reg = tc.layers.apply_regularization( tc.layers.l2_regularizer(self.critic_l2_reg), weights_list=critic_reg_vars ) self.critic_loss += critic_reg critic_shapes = [var.get_shape().as_list() for var in self.critic.trainable_vars] critic_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in critic_shapes]) logger.info(' critic shapes: {}'.format(critic_shapes)) logger.info(' critic params: {}'.format(critic_nb_params)) self.critic_grads = U.flatgrad(self.critic_loss, self.critic.trainable_vars, clip_norm=self.clip_norm) self.critic_optimizer = Adam(var_list=self.critic.trainable_vars, beta1=0.9, beta2=0.999, epsilon=1e-08)
def main(): # データセットの読み込み (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt') char_to_id, id_to_char = sequence.get_vocab() # 入力列を逆順にするとSeq2Se2の精度が上がるらしいが。。。クソ理論 is_reverse = True if is_reverse: x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] # ハイパーパラメータの設定 vocab_size = len(char_to_id) wordvec_size = 16 hidden_size = 128 batch_size = 128 max_epoch = 25 max_grad = 5.0 # モデル/オプティマイザ/トレーナーの生成 # model = Seq2seq(vocab_size, wordvec_size, hidden_size) model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list.append(acc) print(f'val acc {acc * 100}')
def main(): # データの読み込み (x_train, t_train), (x_test, t_test) = sequence.load_data('date.txt') char_to_id, id_to_char = sequence.get_vocab() # 入力文を反転 x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] # ハイパーパラメータの設定 vocab_size = len(char_to_id) wordvec_size = 16 hidden_size = 256 batch_size = 128 max_epoch = 10 max_grad = 5.0 model = AttentionSeq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose, is_reverse=True) acc = float(correct_num) / len(x_test) acc_list.append(acc) print('val acc %.3f%%' % (acc * 100))
def main(): window_size = 1 hidden_size = 5 batch_size = 3 max_epoch = 1000 text = 'You say goodbye and I say hello.' corpus, word_to_id, id_to_word = preprocess(text) vocab_size = len(word_to_id) contexts, target = create_contexts_target(corpus, window_size) target = convert_one_hot(target, vocab_size) contexts = convert_one_hot(contexts, vocab_size) model = SimpleCBOW(vocab_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot()
def test_train_word2vec_model(): """word2vecモデルの学習 """ window_size = 1 hidden_size = 5 # 単語の分散表現ベクトルの次元数 batch_size = 3 max_epoch = 1000 text = 'You say goodbye and I say hello.' # コーパスの作成 corpus, word_to_id, id_to_word = preprocess(text) # コンテキストとターゲットの作成 vocab_size = len(word_to_id) contexts, target = create_context_target(corpus, window_size) target = convert_one_hot(target, vocab_size) contexts = convert_one_hot(contexts, vocab_size) print("one-hot target: ", target) print("one-hot contexts: ", contexts) # CBOWモデル model = SimpleCBOW(vocab_size, hidden_size) optimizer = Adam() # trainer trainer = Trainer(model, optimizer) # 学習 trainer.fit(contexts, target, max_epoch=max_epoch, batch_size=batch_size) trainer.plot() # CBOWの重み(W_in)を取得する word_vecs = model.word_vecs for word_id, word in id_to_word.items(): print(word, word_vecs[word_id])
max_grad = 5.0 x_test, x_train = preprocessing.divide_test_train(x_train, test_rate=0.1) t_test, t_train = preprocessing.divide_test_train(t_train, test_rate=0.1) model = Transformer(vocab_size, wordvec_size, head_size, num_heads=8, num_encoders=1, num_decoders=1) if os.path.isfile("../pkl/myTransformer_params.pkl"): model.load_params("../pkl/myTransformer_params.pkl") optimizer = Adam(lr=0.00001) # optimizer = SGD(lr=0.00005) # optimizer = RMSprop(lr=0.00005) trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad, eval_interval=10) model.save_params('../pkl/myTransformer_params.pkl') correct_num = 0
def main(): parser = argparse.ArgumentParser() parser.add_argument('--D', '-d', type=int, default=8, help='Dimension of feature vector') parser.add_argument('--T', '-t', type=int, default=2, help='Max step of aggregation') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of training dataset') parser.add_argument('--batch', '-b', type=int, default=256, help='batch size') parser.add_argument('--flag', '-f', action='store_true', help='make prediction file') args = parser.parse_args() train_H, train_y, train_node_size = get_train() seed = 1996 train_H, train_y, val_H, val_y, train_node_size, val_node_size = shuffle_split(train_H, train_y, train_node_size, split_size=0.7, seed=seed) # feature dimension D = args.D # step size T = args.T # learning rate alpha = 0.015 # epoch size max_epoch = args.epoch # batch size batch_size = args.batch # get step per epoch train_size = len(train_H) iter_per_epoch = train_size//batch_size if (train_size%batch_size) == 0 else (train_size//batch_size)+1 make_pred = args.flag ## make feature vector(train) train_x = get_feature(D, train_H, train_node_size) ## make feature vector(validation) val_x = get_feature(D, val_H, val_node_size) model = GNN(D, T) optimizer = Adam(alpha=alpha, beta1=0.9, beta2=0.999, eps=1e-8) train_loss_list = [] train_acc_list = [] val_loss_list = [] val_acc_list = [] for epoch in range(max_epoch): np.random.seed(int(epoch*1234)) shuffle_idx = np.random.permutation(train_H.shape[0]) train_H = train_H[shuffle_idx] train_x = train_x[shuffle_idx] train_y = train_y[shuffle_idx] for num in range(iter_per_epoch): if train_size > (num+1)*batch_size: batch_H = train_H[num*batch_size:(num+1)*batch_size] batch_x = train_x[num*batch_size:(num+1)*batch_size] batch_y = train_y[num*batch_size:(num+1)*batch_size] else: batch_H = train_H[num*(batch_size):] batch_x = train_x[num*(batch_size):] batch_y = train_y[num*(batch_size):] # get batch gradient and update parameters batch_grads = None for idx in range(len(batch_H)): grad = model.get_gradient(batch_x[idx], batch_H[idx], batch_y[idx]) if batch_grads == None: batch_grads = {} for key, val in grad.items(): batch_grads[key] = np.zeros_like(val) for key in grad.keys(): batch_grads[key] += (grad[key] / len(batch_H)) optimizer.update(model.params, batch_grads) # train loss and average accuracy loss = 0 train_pred = np.zeros((len(train_y), 1)) for idx in range(len(train_H)): loss += model.loss(train_x[idx], train_H[idx], train_y[idx]) / len(train_H) predict = 0 if model.predict(train_x[idx], train_H[idx]) < 1/2 else 1 train_pred[idx] = predict train_score = avg_acc(train_y, train_pred) # validation loss and average accuracy val_loss = 0 val_pred = np.zeros((len(val_y), 1)) for idx in range(len(val_H)): val_loss += model.loss(val_x[idx], val_H[idx], val_y[idx]) / len(val_H) predict = 0 if model.predict(val_x[idx], val_H[idx]) < 1/2 else 1 val_pred[idx] = predict val_score = avg_acc(val_y, val_pred) print('epoch:{} loss:{:.5f} val_loss:{:.5f} avg_acc:{:.5f} val_avg_acc:{:.5f}'.format(epoch+1, loss, val_loss, train_score, val_score)) train_loss_list.append(loss) val_loss_list.append(val_loss) train_acc_list.append(train_score) val_acc_list.append(val_score) fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10,4)) x = np.arange(len(train_loss_list)) ax1.plot(x, train_loss_list, label='train') x = np.arange(len(val_loss_list)) ax1.plot(x, val_loss_list, label='validation') ax1.legend() ax1.set_xlabel('epoch') ax1.set_ylabel('loss') x = np.arange(len(train_acc_list)) ax2.plot(x, train_acc_list, label='train') x = np.arange(len(val_acc_list)) ax2.plot(x, val_acc_list, label='validation') ax2.legend() ax2.set_xlabel('epoch') ax2.set_ylabel('average accuracy') fig.savefig('src/graph/GNN_Adam.png') plt.close() if make_pred: ## predict test data test_H, test_node_size = get_test() ## make feature vector(test) test_x = get_feature(D, test_H, test_node_size) with open('prediction.txt', mode='w') as f: for idx in range(len(test_node_size)): predict = 0 if model.predict(test_x[idx], test_H[idx]) < 1/2 else 1 f.write('{}'.format(predict) + '\n')
def df(x, y): return x / 10.0, 2.0*y init_pos = (-7.0, 2.0) params = {} params['x'], params['y'] = init_pos[0], init_pos[1] grads = {} grads['x'], grads['y'] = 0, 0 optimizers = OrderedDict() optimizers["SGD"] = SGD(lr=0.95) optimizers["Momentum"] = Momentum(lr=0.1) optimizers["AdaGrad"] = AdaGrad(lr=1.5) optimizers["Adam"] = Adam(lr=0.3) idx = 1 for key in optimizers: optimizer = optimizers[key] x_history = [] y_history = [] params['x'], params['y'] = init_pos[0], init_pos[1] for i in range(30): x_history.append(params['x']) y_history.append(params['y']) grads['x'], grads['y'] = df(params['x'], params['y']) optimizer.update(params, grads)
from dataset.mnist import load_mnist from common.util import smooth_curve from common.multi_layer_net import MultiLayerNet from common.optimizer import Adam (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) train_size = x_train.shape[0] batch_size = 128 max_iterations = 2000 optimizers = {} optimizers['SGD'] = SGD() optimizers['Momentum'] = Momentum() optimizers['AdaGrad'] = AdaGrad() optimizers['Adam'] = Adam() networks = {} train_loss = {} for key in optimizers.keys(): networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10) train_loss[key] = [] for i in range(max_iterations): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for key in networks.keys():
[0 0 0 0 1 0 0] [0 1 0 0 0 0 0] [0 0 0 0 0 1 0]] contexts: [[[1 0 0 0 0 0 0] [0 0 1 0 0 0 0]] [[0 1 0 0 0 0 0] [0 0 0 1 0 0 0]] [[0 0 1 0 0 0 0] [0 0 0 0 1 0 0]] [[0 0 0 1 0 0 0] [0 1 0 0 0 0 0]] [[0 0 0 0 1 0 0] [0 0 0 0 0 1 0]] [[0 1 0 0 0 0 0] [0 0 0 0 0 0 1]]]""" model = SimpleCBOW(vocab_size, hidden_size) optimizier = Adam() trainer = Trainer(model, optimizier) trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot()
class DDPG(object): def __init__(self, actor, critic, memory, observation_shape, action_shape, param_noise=None, action_noise=None, gamma=0.99, tau=0.001, normalize_returns=False, enable_popart=False, normalize_observations=True, batch_size=128, observation_range=(-5., 5.), action_range=(-1., 1.), return_range=(-np.inf, np.inf), critic_l2_reg=0., actor_lr=1e-4, critic_lr=1e-3, clip_norm=None, reward_scale=1.): # Inputs. self.obs0 = tf.placeholder(tf.float32, shape=(None,) + observation_shape, name='obs0') self.obs1 = tf.placeholder(tf.float32, shape=(None,) + observation_shape, name='obs1') self.terminals1 = tf.placeholder(tf.float32, shape=(None, 1), name='terminals1') self.rewards = tf.placeholder(tf.float32, shape=(None, 1), name='rewards') self.actions = tf.placeholder(tf.float32, shape=(None,) + action_shape, name='actions') self.critic_target = tf.placeholder(tf.float32, shape=(None, 1), name='critic_target') self.param_noise_stddev = tf.placeholder(tf.float32, shape=(), name='param_noise_stddev') # Parameters. self.gamma = gamma self.tau = tau self.memory = memory self.normalize_observations = normalize_observations self.normalize_returns = normalize_returns self.action_noise = action_noise self.param_noise = param_noise self.action_range = action_range self.return_range = return_range self.observation_range = observation_range self.critic = critic self.actor = actor self.actor_lr = actor_lr self.critic_lr = critic_lr self.clip_norm = clip_norm self.enable_popart = enable_popart self.reward_scale = reward_scale self.batch_size = batch_size self.stats_sample = None self.critic_l2_reg = critic_l2_reg # Observation normalization. if self.normalize_observations: with tf.variable_scope('obs_rms'): self.obs_rms = RunningMeanStd(shape=observation_shape) else: self.obs_rms = None normalized_obs0 = tf.clip_by_value(normalize(self.obs0, self.obs_rms), self.observation_range[0], self.observation_range[1]) normalized_obs1 = tf.clip_by_value(normalize(self.obs1, self.obs_rms), self.observation_range[0], self.observation_range[1]) # Return normalization. if self.normalize_returns: with tf.variable_scope('ret_rms'): self.ret_rms = RunningMeanStd() else: self.ret_rms = None # Create target networks. target_actor = copy(actor) target_actor.name = 'target_actor' self.target_actor = target_actor target_critic = copy(critic) target_critic.name = 'target_critic' self.target_critic = target_critic # Create networks and core TF parts that are shared across setup parts. self.actor_tf = actor(normalized_obs0) self.normalized_critic_tf = critic(normalized_obs0, self.actions) self.critic_tf = denormalize( tf.clip_by_value(self.normalized_critic_tf, self.return_range[0], self.return_range[1]), self.ret_rms) self.normalized_critic_with_actor_tf = critic(normalized_obs0, self.actor_tf, reuse=True) self.critic_with_actor_tf = denormalize( tf.clip_by_value(self.normalized_critic_with_actor_tf, self.return_range[0], self.return_range[1]), self.ret_rms) Q_obs1 = denormalize(target_critic(normalized_obs1, target_actor(normalized_obs1)), self.ret_rms) self.target_Q = self.rewards + (1. - self.terminals1) * gamma * Q_obs1 # Set up parts. if self.param_noise is not None: self.setup_param_noise(normalized_obs0) self.setup_actor_optimizer() self.setup_critic_optimizer() if self.normalize_returns and self.enable_popart: self.setup_popart() self.setup_stats() self.setup_target_network_updates() def setup_target_network_updates(self): actor_init_updates, actor_soft_updates = get_target_updates(self.actor.vars, self.target_actor.vars, self.tau) critic_init_updates, critic_soft_updates = get_target_updates(self.critic.vars, self.target_critic.vars, self.tau) self.target_init_updates = [actor_init_updates, critic_init_updates] self.target_soft_updates = [actor_soft_updates, critic_soft_updates] def setup_param_noise(self, normalized_obs0): assert self.param_noise is not None # Configure perturbed actor. param_noise_actor = copy(self.actor) param_noise_actor.name = 'param_noise_actor' self.perturbed_actor_tf = param_noise_actor(normalized_obs0) logger.info('setting up param noise') self.perturb_policy_ops = get_perturbed_actor_updates(self.actor, param_noise_actor, self.param_noise_stddev) # Configure separate copy for stddev adoption. adaptive_param_noise_actor = copy(self.actor) adaptive_param_noise_actor.name = 'adaptive_param_noise_actor' adaptive_actor_tf = adaptive_param_noise_actor(normalized_obs0) self.perturb_adaptive_policy_ops = get_perturbed_actor_updates(self.actor, adaptive_param_noise_actor, self.param_noise_stddev) self.adaptive_policy_distance = tf.sqrt(tf.reduce_mean(tf.square(self.actor_tf - adaptive_actor_tf))) def setup_actor_optimizer(self): logger.info('setting up actor optimizer') self.actor_loss = -tf.reduce_mean(self.critic_with_actor_tf) actor_shapes = [var.get_shape().as_list() for var in self.actor.trainable_vars] actor_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in actor_shapes]) logger.info(' actor shapes: {}'.format(actor_shapes)) logger.info(' actor params: {}'.format(actor_nb_params)) self.actor_grads = U.flatgrad(self.actor_loss, self.actor.trainable_vars, clip_norm=self.clip_norm) self.actor_optimizer = Adam(var_list=self.actor.trainable_vars, beta1=0.9, beta2=0.999, epsilon=1e-08) def setup_critic_optimizer(self): logger.info('setting up critic optimizer') normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms), self.return_range[0], self.return_range[1]) self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf)) if self.critic_l2_reg > 0.: critic_reg_vars = [var for var in self.critic.trainable_vars if 'kernel' in var.name and 'output' not in var.name] for var in critic_reg_vars: logger.info(' regularizing: {}'.format(var.name)) logger.info(' applying l2 regularization with {}'.format(self.critic_l2_reg)) critic_reg = tc.layers.apply_regularization( tc.layers.l2_regularizer(self.critic_l2_reg), weights_list=critic_reg_vars ) self.critic_loss += critic_reg critic_shapes = [var.get_shape().as_list() for var in self.critic.trainable_vars] critic_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in critic_shapes]) logger.info(' critic shapes: {}'.format(critic_shapes)) logger.info(' critic params: {}'.format(critic_nb_params)) self.critic_grads = U.flatgrad(self.critic_loss, self.critic.trainable_vars, clip_norm=self.clip_norm) self.critic_optimizer = Adam(var_list=self.critic.trainable_vars, beta1=0.9, beta2=0.999, epsilon=1e-08) def setup_popart(self): # See https://arxiv.org/pdf/1602.07714.pdf for details. self.old_std = tf.placeholder(tf.float32, shape=[1], name='old_std') new_std = self.ret_rms.std self.old_mean = tf.placeholder(tf.float32, shape=[1], name='old_mean') new_mean = self.ret_rms.mean self.renormalize_Q_outputs_op = [] for vs in [self.critic.output_vars, self.target_critic.output_vars]: assert len(vs) == 2 M, b = vs assert 'kernel' in M.name assert 'bias' in b.name assert M.get_shape()[-1] == 1 assert b.get_shape()[-1] == 1 self.renormalize_Q_outputs_op += [M.assign(M * self.old_std / new_std)] self.renormalize_Q_outputs_op += [b.assign((b * self.old_std + self.old_mean - new_mean) / new_std)] def setup_stats(self): ops = [] names = [] if self.normalize_returns: ops += [self.ret_rms.mean, self.ret_rms.std] names += ['ret_rms_mean', 'ret_rms_std'] if self.normalize_observations: ops += [tf.reduce_mean(self.obs_rms.mean), tf.reduce_mean(self.obs_rms.std)] names += ['obs_rms_mean', 'obs_rms_std'] ops += [tf.reduce_mean(self.critic_tf)] names += ['reference_Q_mean'] ops += [reduce_std(self.critic_tf)] names += ['reference_Q_std'] ops += [tf.reduce_mean(self.critic_with_actor_tf)] names += ['reference_actor_Q_mean'] ops += [reduce_std(self.critic_with_actor_tf)] names += ['reference_actor_Q_std'] ops += [tf.reduce_mean(self.actor_tf)] names += ['reference_action_mean'] ops += [reduce_std(self.actor_tf)] names += ['reference_action_std'] if self.param_noise: ops += [tf.reduce_mean(self.perturbed_actor_tf)] names += ['reference_perturbed_action_mean'] ops += [reduce_std(self.perturbed_actor_tf)] names += ['reference_perturbed_action_std'] self.stats_ops = ops self.stats_names = names def pi(self, obs, apply_noise=True, compute_Q=True): if self.param_noise is not None and apply_noise: actor_tf = self.perturbed_actor_tf else: actor_tf = self.actor_tf feed_dict = {self.obs0: [obs]} if compute_Q: action, q = self.sess.run([actor_tf, self.critic_with_actor_tf], feed_dict=feed_dict) else: action = self.sess.run(actor_tf, feed_dict=feed_dict) q = None action = action.flatten() if self.action_noise is not None and apply_noise: noise = self.action_noise() assert noise.shape == action.shape action += noise action = np.clip(action, self.action_range[0], self.action_range[1]) return action, q def store_transition(self, obs0, action, reward, obs1, terminal1): reward *= self.reward_scale self.memory.append(obs0, action, reward, obs1, terminal1) if self.normalize_observations: self.obs_rms.update(np.array([obs0])) def train(self): # Get a batch. batch = self.memory.sample(batch_size=self.batch_size) if self.normalize_returns and self.enable_popart: old_mean, old_std, target_Q = self.sess.run([self.ret_rms.mean, self.ret_rms.std, self.target_Q], feed_dict={ self.obs1: batch['obs1'], self.rewards: batch['rewards'], self.terminals1: batch['terminals1'].astype('float32'), }) self.ret_rms.update(target_Q.flatten()) self.sess.run(self.renormalize_Q_outputs_op, feed_dict={ self.old_std: np.array([old_std]), self.old_mean: np.array([old_mean]), }) else: target_Q = self.sess.run(self.target_Q, feed_dict={ self.obs1: batch['obs1'], self.rewards: batch['rewards'], self.terminals1: batch['terminals1'].astype('float32'), }) # Get all gradients and perform a synced update. ops = [self.actor_grads, self.actor_loss, self.critic_grads, self.critic_loss] actor_grads, actor_loss, critic_grads, critic_loss = self.sess.run(ops, feed_dict={ self.obs0: batch['obs0'], self.actions: batch['actions'], self.critic_target: target_Q, }) self.actor_optimizer.update(actor_grads, stepsize=self.actor_lr) self.critic_optimizer.update(critic_grads, stepsize=self.critic_lr) return critic_loss, actor_loss def initialize(self, sess): self.sess = sess self.sess.run(tf.global_variables_initializer()) self.sess.run(self.target_init_updates) def update_target_net(self): self.sess.run(self.target_soft_updates) def get_stats(self): if self.stats_sample is None: # Get a sample and keep that fixed for all further computations. # This allows us to estimate the change in value for the same set of inputs. self.stats_sample = self.memory.sample(batch_size=self.batch_size) values = self.sess.run(self.stats_ops, feed_dict={ self.obs0: self.stats_sample['obs0'], self.actions: self.stats_sample['actions'], }) names = self.stats_names[:] assert len(names) == len(values) stats = dict(zip(names, values)) if self.param_noise is not None: stats = {**stats, **self.param_noise.get_stats()} return stats def adapt_param_noise(self): if self.param_noise is None: return 0. # Perturb a separate copy of the policy to adjust the scale for the next "real" perturbation. batch = self.memory.sample(batch_size=self.batch_size) self.sess.run(self.perturb_adaptive_policy_ops, feed_dict={ self.param_noise_stddev: self.param_noise.current_stddev, }) distance = self.sess.run(self.adaptive_policy_distance, feed_dict={ self.obs0: batch['obs0'], self.param_noise_stddev: self.param_noise.current_stddev, }) self.param_noise.adapt(distance) return distance def reset(self): # Reset internal state after an episode is complete. if self.action_noise is not None: self.action_noise.reset() if self.param_noise is not None: self.sess.run(self.perturb_policy_ops, feed_dict={ self.param_noise_stddev: self.param_noise.current_stddev, })
x, t, x_submission = hp_data.load(scale=True, label_log10=True, non_nan_ratio=0.8) print('x.shape:', x.shape) feature_count = x.shape[-1] train_num = 1450 train_x, train_y, test_x, test_y = x[:train_num, :], t[:train_num, :], x[ train_num:, :], t[train_num:, :] max_iterations = 30000 batch_size = 128 # initialize network optimizer weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} # optimizer = SGD(lr=0.01) optimizer = Adam(lr=1e-3) # network = MultiLayerRegression(input_size=feature_count, hidden_size_list=[100, 100, 100, 300], output_size=1, # weight_init_std='relu', activation='relu', # weight_decay_lambda=1e-4, # use_dropout=True, dropout_ratio=0.2, # use_batchnorm=True) network = MultiLayerRegression(input_size=feature_count, hidden_size_list=[300, 200, 100, 10], output_size=1, weight_init_std='relu', activation='relu', weight_decay_lambda=1e-4, use_dropout=True, dropout_ratio=0.3, use_batchnorm=True)
print("y train: ",y_train.shape) print("x_test: ",x_test.shape) print("y_test: ",y_test.shape) if(run): #MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latiture, Longitude network = MultiLayerNetRegression( input_size=8, hidden_size_list=[ 100,1000,100, ], output_size=1, ) optimizer = Adam(lr=learning_rate) train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 #学習 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] y_batch = y_train[batch_mask] grads = network.gradient(x_batch, y_batch)
max_epoch = 1000 text = 'You say goodbye and I say hello.' corpus, word_to_id, id_to_word = preprocess(text) vocab_size = len(word_to_id) contexts, target = create_contexts_target, convert_one_hot window_size = 1 hidden_size = 5 batch_size = 3 max_epoch = 1000 text = 'You say goodbye and I say hello.' corpus, word_to_id, id_to_word = preprocess(text) vocab_size = len(word_to_id) contexts, target = create_contexts_target(corpus, window_size) target = convert_one_hot(target, vocab_size) contexts = convert_one_hot(contexts, vocab_size) model = SimpleCBOW(vocab_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot() word_vecs = model.word_vecs for word_id, word in id_to_word.items(): print(word, word_vecs[word_id])
train_per_epoch = sample_num / batch_size # 5000 / 100 = 50train/1epoch input_dim = x_train[0].shape # (1, 28, 28) conv_param = {} conv_param["filter_num"] = 30 conv_param["filter_size"] = 5 conv_param["pad"] = 0 conv_param["stride"] = 1 hidden_size = 100 output_size = 10 weight_init_std = 0.01 learning_rate = 0.001 train_loss_list = [] train_acc_list = [] test_acc_list = [] optimizer = Adam() # ネットワークの生成 network = SimpleConvNet(input_dim=input_dim, conv_param=conv_param, hidden_size=hidden_size, output_size=output_size, weight_init_std=weight_init_std) """ trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=epoch_max, mini_batch_size=100, optimizer='Adam', optimizer_param={'lr': 0.001}, evaluate_sample_num_per_epoch=1000) trainer.train() """
if config.GPU: corpus = to_gpu(corpus_train) corpus_val = to_gpu(corpus_val) corpus_test = to_gpu(corpus_test) vocab_size = len(preprocessing.word_to_id) xs = sum(corpus_train, [])[:-1] ts = sum(corpus_train, [])[1:] corpus_val = sum(corpus_val, []) corpus_test = sum(corpus_test, []) model = BetterRnnlm(vocab_size, wordvec_size, hidden_size, dropout) # optimizer = SGD(lr) optimizer = Adam(lr=lr) trainer = RnnlmTrainer(model, optimizer) best_ppl = float('inf') for epoch in range(max_epoch): trainer.fit(xs, ts, max_epoch=1, batch_size=batch_size, time_size=time_size, max_grad=max_grad) model.reset_state() ppl = eval_perplexity(model, corpus_val) print('검증 퍼플렉서티: ', ppl)
ts = corpus[1:] # ハイパーパラメータの設定 vocab_size = len(word_to_id) wordvec_size = 16 hidden_size = 128 batch_size = 1 max_epoch = 50 max_grad = 5.0 sample_size = 100 lr = 0.001 time_size = 35 #モデルの生成 model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = RnnlmTrainer(model, optimizer) #学習 best_ppl = float('inf') t1 = time.time() for epoch in range(max_epoch): trainer.fit(xs, ts, max_epoch=1, batch_size=batch_size, max_grad=max_grad) model.reset_state() ppl = eval_perplexity(model, corpus) print('valid perplexity: ', ppl) if best_ppl > ppl: best_ppl = ppl model.save_params()
def df(x, y): return x / 10.0, 2.0 * y init_pos = (-7.0, 2.0) params = {} params['x'], params['y'] = init_pos[0], init_pos[1] grads = {} grads['x'], grads['y'] = 0, 0 optimizers = OrderedDict() optimizers['SGD'] = SGD(lr=0.95) optimizers['Momentum'] = Momentum(lr=0.1) optimizers['AdaGrad'] = AdaGrad(lr=1.5) optimizers['Adam'] = Adam(lr=0.3) idx = 1 for key in optimizers: optimizer = optimizers[key] x_history = [] y_history = [] params['x'], params['y'] = init_pos[0], init_pos[1] for i in range(30): x_history.append(params['x']) y_history.append(params['y']) grads['x'], grads['y'] = df(params['x'], params['y']) optimizer.update(params, grads)