コード例 #1
0
def get_models():
    # tanh + backward
    rnn1 = Single_layer_RNN(input_size=INPUT_SIZE,
                            hidden_size=HIDDEN_SIZE,
                            output_size=OUTPUT_SIZE)
    optim1 = Adam()

    # tanh + backward_truncate
    rnn2 = Single_layer_RNN(input_size=INPUT_SIZE,
                            hidden_size=HIDDEN_SIZE,
                            output_size=OUTPUT_SIZE,
                            bptt_truncate=BPTT_TRUNCATE)
    optim2 = Adam()

    # relu + backward_truncate
    rnn3 = Single_layer_RNN(input_size=INPUT_SIZE,
                            hidden_size=HIDDEN_SIZE,
                            output_size=OUTPUT_SIZE,
                            bptt_truncate=BPTT_TRUNCATE,
                            activation_func='relu')
    optim3 = Adam()

    labels = [
        'model1: tanh + backward', 'model2: tanh + backward_truncate',
        'model3: relu + backward'
    ]
    rnns = [rnn1, rnn2, rnn3]
    optims = [optim1, optim2, optim3]
    return labels, rnns, optims
コード例 #2
0
ファイル: ddpg.py プロジェクト: titi2338432/pytorch-gym
 def setup_actor_optimizer(self):
     logger.info('setting up actor optimizer')
     self.actor_loss = -tf.reduce_mean(self.critic_with_actor_tf)
     actor_shapes = [var.get_shape().as_list() for var in self.actor.trainable_vars]
     actor_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in actor_shapes])
     logger.info('  actor shapes: {}'.format(actor_shapes))
     logger.info('  actor params: {}'.format(actor_nb_params))
     self.actor_grads = U.flatgrad(self.actor_loss, self.actor.trainable_vars, clip_norm=self.clip_norm)
     self.actor_optimizer = Adam(var_list=self.actor.trainable_vars,
                                 beta1=0.9, beta2=0.999, epsilon=1e-08)
コード例 #3
0
ファイル: train_seq2seq.py プロジェクト: YaGiNA/DLfS2
def train_eval(x_train, x_test, is_peeky):
    if is_peeky:
        model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size)
    else:
        model = Seq2seq(vocab_size, wordvec_size, hidden_size)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    acc_list = []
    for epoch in range(max_epoch):
        trainer.fit(x_train,
                    t_train,
                    max_epoch=1,
                    batch_size=batch_size,
                    max_grad=max_grad)
        correct_num = 0
        for i in range(len(x_test)):
            question, correct = x_test[[i]], t_test[[i]]
            verbose = i < 10
            correct_num += eval_seq2seq(model, question, correct, id_to_char,
                                        verbose)
        acc = float(correct_num) / len(x_test)
        acc_list.append(acc)
        print('val acc %.3f%%' % (acc * 100))
    return acc_list
コード例 #4
0
def main():
    # ハイパーパラメータの設定
    window_size = 5
    hidden_size = 100
    batch_size = 100
    max_epoch = 10

    # データの読み込み
    corpus, word_to_id, id_to_word = ptb.load_data('train')
    vocab_size = len(word_to_id)

    contexts, target = create_contexts_target(corpus, window_size)

    # モデルなどの生成
    model = CBOW(vocab_size, hidden_size, window_size, corpus)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    # 学習開始
    trainer.fit(contexts, target, max_epoch, batch_size)
    trainer.plot()

    # 後ほど利用できるように、必要なデータを保存
    word_vecs = model.word_vecs

    params = {}
    params['word_vecs'] = word_vecs.astype(np.float16)
    params['word_to_id'] = word_to_id
    params['id_to_word'] = id_to_word
    pkl_file = 'cbow_params.pkl'
    with open(pkl_file, 'wb') as f:
        pickle.dump(params, f, -1)
コード例 #5
0
def train(network,
          x_train,
          y_train,
          x_test,
          y_test,
          iter_times=10000,
          hidden_size=10,
          batch_size=100,
          lr=0.1):
    nn = network
    optimizers = {
        'SGD': SGD(lr),
        'Momentum': Momentum(lr),
        'Nesterov': Nesterov(lr),
        'AdaGrad': AdaGrad(lr),
        'RMSProp':
        RMSProp(0.02),  # lr == 0.1 may make loss += ln(eps), eps == 1e-15
        'Adam': Adam(0.005)
    }
    opt = optimizers['Adam']

    for i in range(iter_times):
        if i % max(x_train.shape[0] // batch_size, 1) == 0:
            print('{:.1%}'.format(i / iter_times))
        batch_mask = np.random.choice(x_train.shape[0], batch_size)
        x_batch, y_batch = x_train[batch_mask], y_train[batch_mask]
        grads = nn.grad(x_batch, y_batch)
        opt.update(nn.params, grads)

    print('Train acc: {:.4}  Test acc: {:.4}'.format(
        nn.accuracy(x_train, y_train), nn.accuracy(x_test, y_test)))
コード例 #6
0
ファイル: ddpg.py プロジェクト: titi2338432/pytorch-gym
 def setup_critic_optimizer(self):
     logger.info('setting up critic optimizer')
     normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms),
                                                    self.return_range[0], self.return_range[1])
     self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf))
     if self.critic_l2_reg > 0.:
         critic_reg_vars = [var for var in self.critic.trainable_vars if
                            'kernel' in var.name and 'output' not in var.name]
         for var in critic_reg_vars:
             logger.info('  regularizing: {}'.format(var.name))
         logger.info('  applying l2 regularization with {}'.format(self.critic_l2_reg))
         critic_reg = tc.layers.apply_regularization(
             tc.layers.l2_regularizer(self.critic_l2_reg),
             weights_list=critic_reg_vars
         )
         self.critic_loss += critic_reg
     critic_shapes = [var.get_shape().as_list() for var in self.critic.trainable_vars]
     critic_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in critic_shapes])
     logger.info('  critic shapes: {}'.format(critic_shapes))
     logger.info('  critic params: {}'.format(critic_nb_params))
     self.critic_grads = U.flatgrad(self.critic_loss, self.critic.trainable_vars, clip_norm=self.clip_norm)
     self.critic_optimizer = Adam(var_list=self.critic.trainable_vars,
                                  beta1=0.9, beta2=0.999, epsilon=1e-08)
コード例 #7
0
def main():

    # データセットの読み込み
    (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')
    char_to_id, id_to_char = sequence.get_vocab()

    # 入力列を逆順にするとSeq2Se2の精度が上がるらしいが。。。クソ理論
    is_reverse = True
    if is_reverse:
        x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]

    # ハイパーパラメータの設定
    vocab_size = len(char_to_id)
    wordvec_size = 16
    hidden_size = 128
    batch_size = 128
    max_epoch = 25
    max_grad = 5.0

    # モデル/オプティマイザ/トレーナーの生成
    # model = Seq2seq(vocab_size, wordvec_size, hidden_size)
    model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    acc_list = []
    for epoch in range(max_epoch):
        trainer.fit(x_train,
                    t_train,
                    max_epoch=1,
                    batch_size=batch_size,
                    max_grad=max_grad)

        correct_num = 0
        for i in range(len(x_test)):
            question, correct = x_test[[i]], t_test[[i]]
            verbose = i < 10
            correct_num += eval_seq2seq(model, question, correct, id_to_char,
                                        verbose)

        acc = float(correct_num) / len(x_test)
        acc_list.append(acc)
        print(f'val acc {acc * 100}')
コード例 #8
0
ファイル: train.py プロジェクト: MATOBAD/NLP
def main():
    # データの読み込み
    (x_train, t_train), (x_test, t_test) = sequence.load_data('date.txt')
    char_to_id, id_to_char = sequence.get_vocab()

    # 入力文を反転
    x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]

    # ハイパーパラメータの設定
    vocab_size = len(char_to_id)
    wordvec_size = 16
    hidden_size = 256
    batch_size = 128
    max_epoch = 10
    max_grad = 5.0

    model = AttentionSeq2seq(vocab_size, wordvec_size, hidden_size)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    acc_list = []
    for epoch in range(max_epoch):
        trainer.fit(x_train,
                    t_train,
                    max_epoch=1,
                    batch_size=batch_size,
                    max_grad=max_grad)

        correct_num = 0
        for i in range(len(x_test)):
            question, correct = x_test[[i]], t_test[[i]]
            verbose = i < 10
            correct_num += eval_seq2seq(model,
                                        question,
                                        correct,
                                        id_to_char,
                                        verbose,
                                        is_reverse=True)

        acc = float(correct_num) / len(x_test)
        acc_list.append(acc)
        print('val acc %.3f%%' % (acc * 100))
コード例 #9
0
def main():
    window_size = 1
    hidden_size = 5
    batch_size = 3
    max_epoch = 1000

    text = 'You say goodbye and I say hello.'
    corpus, word_to_id, id_to_word = preprocess(text)

    vocab_size = len(word_to_id)
    contexts, target = create_contexts_target(corpus, window_size)
    target = convert_one_hot(target, vocab_size)
    contexts = convert_one_hot(contexts, vocab_size)

    model = SimpleCBOW(vocab_size, hidden_size)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    trainer.fit(contexts, target, max_epoch, batch_size)
    trainer.plot()
コード例 #10
0
def test_train_word2vec_model():
    """word2vecモデルの学習
    """

    window_size = 1
    hidden_size = 5 # 単語の分散表現ベクトルの次元数
    batch_size = 3
    max_epoch = 1000

    text = 'You say goodbye and I say hello.'

    # コーパスの作成
    corpus, word_to_id, id_to_word = preprocess(text)

    # コンテキストとターゲットの作成
    vocab_size = len(word_to_id)
    contexts, target = create_context_target(corpus, window_size)
    target = convert_one_hot(target, vocab_size)
    contexts = convert_one_hot(contexts, vocab_size)
    print("one-hot target: ", target)
    print("one-hot contexts: ", contexts)

    # CBOWモデル
    model = SimpleCBOW(vocab_size, hidden_size)
    optimizer = Adam()

    # trainer
    trainer = Trainer(model, optimizer)

    # 学習
    trainer.fit(contexts, target, max_epoch=max_epoch, batch_size=batch_size)
    trainer.plot()

    # CBOWの重み(W_in)を取得する
    word_vecs = model.word_vecs
    for word_id, word in id_to_word.items():
        print(word, word_vecs[word_id])
コード例 #11
0
max_grad = 5.0

x_test, x_train = preprocessing.divide_test_train(x_train, test_rate=0.1)
t_test, t_train = preprocessing.divide_test_train(t_train, test_rate=0.1)

model = Transformer(vocab_size,
                    wordvec_size,
                    head_size,
                    num_heads=8,
                    num_encoders=1,
                    num_decoders=1)

if os.path.isfile("../pkl/myTransformer_params.pkl"):
    model.load_params("../pkl/myTransformer_params.pkl")

optimizer = Adam(lr=0.00001)
# optimizer = SGD(lr=0.00005)
# optimizer = RMSprop(lr=0.00005)
trainer = Trainer(model, optimizer)

acc_list = []
for epoch in range(max_epoch):
    trainer.fit(x_train,
                t_train,
                max_epoch=1,
                batch_size=batch_size,
                max_grad=max_grad,
                eval_interval=10)
    model.save_params('../pkl/myTransformer_params.pkl')

    correct_num = 0
コード例 #12
0
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument('--D', '-d', type=int, default=8, help='Dimension of feature vector')
    parser.add_argument('--T', '-t', type=int, default=2, help='Max step of aggregation')
    parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of training dataset')
    parser.add_argument('--batch', '-b', type=int, default=256, help='batch size')
    parser.add_argument('--flag', '-f', action='store_true', help='make prediction file')

    args = parser.parse_args()

    train_H, train_y, train_node_size = get_train()

    seed = 1996

    train_H, train_y, val_H, val_y, train_node_size, val_node_size = shuffle_split(train_H, train_y, train_node_size, split_size=0.7, seed=seed)

    # feature dimension
    D = args.D

    # step size
    T = args.T

    # learning rate
    alpha = 0.015

    # epoch size
    max_epoch = args.epoch

    # batch size
    batch_size = args.batch

    # get step per epoch
    train_size = len(train_H)
    iter_per_epoch = train_size//batch_size if (train_size%batch_size) == 0 else (train_size//batch_size)+1

    make_pred = args.flag

    ## make feature vector(train)
    train_x = get_feature(D, train_H, train_node_size)

    ## make feature vector(validation)
    val_x = get_feature(D, val_H, val_node_size)

    model = GNN(D, T)
    optimizer = Adam(alpha=alpha, beta1=0.9, beta2=0.999, eps=1e-8)

    train_loss_list = []
    train_acc_list = []
    val_loss_list = []
    val_acc_list = []

    for epoch in range(max_epoch):
        np.random.seed(int(epoch*1234))
        shuffle_idx = np.random.permutation(train_H.shape[0])
        train_H = train_H[shuffle_idx]
        train_x = train_x[shuffle_idx]
        train_y = train_y[shuffle_idx]
        for num in range(iter_per_epoch):
            if train_size > (num+1)*batch_size:
                batch_H = train_H[num*batch_size:(num+1)*batch_size]
                batch_x = train_x[num*batch_size:(num+1)*batch_size]
                batch_y = train_y[num*batch_size:(num+1)*batch_size]
            else:
                batch_H = train_H[num*(batch_size):]
                batch_x = train_x[num*(batch_size):]
                batch_y = train_y[num*(batch_size):]
        
            # get batch gradient and update parameters
            batch_grads = None
            for idx in range(len(batch_H)):
                grad = model.get_gradient(batch_x[idx], batch_H[idx], batch_y[idx])
                if batch_grads == None:
                    batch_grads = {}
                    for key, val in grad.items():
                        batch_grads[key] = np.zeros_like(val)
                for key in grad.keys():
                    batch_grads[key] += (grad[key] / len(batch_H))
            optimizer.update(model.params, batch_grads)
        
        # train loss and average accuracy
        loss = 0
        train_pred = np.zeros((len(train_y), 1))
        for idx in range(len(train_H)):
            loss += model.loss(train_x[idx], train_H[idx], train_y[idx]) / len(train_H)
            predict = 0 if model.predict(train_x[idx], train_H[idx]) < 1/2 else 1
            train_pred[idx] = predict
        train_score = avg_acc(train_y, train_pred)
        
        # validation loss and average accuracy
        val_loss = 0
        val_pred = np.zeros((len(val_y), 1))
        for idx in range(len(val_H)):
            val_loss += model.loss(val_x[idx], val_H[idx], val_y[idx]) / len(val_H)
            predict = 0 if model.predict(val_x[idx], val_H[idx]) < 1/2 else 1
            val_pred[idx] = predict
        val_score = avg_acc(val_y, val_pred)

        print('epoch:{} loss:{:.5f} val_loss:{:.5f} avg_acc:{:.5f} val_avg_acc:{:.5f}'.format(epoch+1, loss, val_loss, train_score, val_score))
        train_loss_list.append(loss)
        val_loss_list.append(val_loss)
        train_acc_list.append(train_score)
        val_acc_list.append(val_score)
    
    fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10,4))
    x = np.arange(len(train_loss_list))
    ax1.plot(x, train_loss_list, label='train')
    x = np.arange(len(val_loss_list))
    ax1.plot(x, val_loss_list, label='validation')
    ax1.legend()
    ax1.set_xlabel('epoch')
    ax1.set_ylabel('loss')

    x = np.arange(len(train_acc_list))
    ax2.plot(x, train_acc_list, label='train')
    x = np.arange(len(val_acc_list))
    ax2.plot(x, val_acc_list, label='validation')
    ax2.legend()
    ax2.set_xlabel('epoch')
    ax2.set_ylabel('average accuracy')

    fig.savefig('src/graph/GNN_Adam.png')
    plt.close()

    if make_pred:
        ## predict test data
        test_H, test_node_size = get_test()

        ## make feature vector(test)
        test_x = get_feature(D, test_H, test_node_size)

        with open('prediction.txt', mode='w') as f:
            for idx in range(len(test_node_size)):
                predict = 0 if model.predict(test_x[idx], test_H[idx]) < 1/2 else 1
                f.write('{}'.format(predict) + '\n')
コード例 #13
0
def df(x, y):
    return x / 10.0, 2.0*y

init_pos = (-7.0, 2.0)
params = {}
params['x'], params['y'] = init_pos[0], init_pos[1]
grads = {}
grads['x'], grads['y'] = 0, 0


optimizers = OrderedDict()
optimizers["SGD"] = SGD(lr=0.95)
optimizers["Momentum"] = Momentum(lr=0.1)
optimizers["AdaGrad"] = AdaGrad(lr=1.5)
optimizers["Adam"] = Adam(lr=0.3)

idx = 1

for key in optimizers:
    optimizer = optimizers[key]
    x_history = []
    y_history = []
    params['x'], params['y'] = init_pos[0], init_pos[1]
    
    for i in range(30):
        x_history.append(params['x'])
        y_history.append(params['y'])
        
        grads['x'], grads['y'] = df(params['x'], params['y'])
        optimizer.update(params, grads)
コード例 #14
0
from dataset.mnist import load_mnist
from common.util import smooth_curve
from common.multi_layer_net import MultiLayerNet
from common.optimizer import Adam

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

train_size = x_train.shape[0]
batch_size = 128
max_iterations = 2000

optimizers = {}
optimizers['SGD'] = SGD()
optimizers['Momentum'] = Momentum()
optimizers['AdaGrad'] = AdaGrad()
optimizers['Adam'] = Adam()

networks = {}
train_loss = {}
for key in optimizers.keys():
    networks[key] = MultiLayerNet(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100],
                                  output_size=10)
    train_loss[key] = []

for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    for key in networks.keys():
コード例 #15
0
 [0 0 0 0 1 0 0]
 [0 1 0 0 0 0 0]
 [0 0 0 0 0 1 0]]
 
 contexts:
[[[1 0 0 0 0 0 0]
  [0 0 1 0 0 0 0]]

 [[0 1 0 0 0 0 0]
  [0 0 0 1 0 0 0]]

 [[0 0 1 0 0 0 0]
  [0 0 0 0 1 0 0]]

 [[0 0 0 1 0 0 0]
  [0 1 0 0 0 0 0]]

 [[0 0 0 0 1 0 0]
  [0 0 0 0 0 1 0]]

 [[0 1 0 0 0 0 0]
  [0 0 0 0 0 0 1]]]"""

model = SimpleCBOW(vocab_size, hidden_size)

optimizier = Adam()
trainer = Trainer(model, optimizier)

trainer.fit(contexts, target, max_epoch, batch_size)
trainer.plot()
コード例 #16
0
ファイル: ddpg.py プロジェクト: titi2338432/pytorch-gym
class DDPG(object):
    def __init__(self, actor, critic, memory, observation_shape, action_shape, param_noise=None, action_noise=None,
                 gamma=0.99, tau=0.001, normalize_returns=False, enable_popart=False, normalize_observations=True,
                 batch_size=128, observation_range=(-5., 5.), action_range=(-1., 1.), return_range=(-np.inf, np.inf),
                 critic_l2_reg=0., actor_lr=1e-4, critic_lr=1e-3, clip_norm=None, reward_scale=1.):
        # Inputs.
        self.obs0 = tf.placeholder(tf.float32, shape=(None,) + observation_shape, name='obs0')
        self.obs1 = tf.placeholder(tf.float32, shape=(None,) + observation_shape, name='obs1')
        self.terminals1 = tf.placeholder(tf.float32, shape=(None, 1), name='terminals1')
        self.rewards = tf.placeholder(tf.float32, shape=(None, 1), name='rewards')
        self.actions = tf.placeholder(tf.float32, shape=(None,) + action_shape, name='actions')
        self.critic_target = tf.placeholder(tf.float32, shape=(None, 1), name='critic_target')
        self.param_noise_stddev = tf.placeholder(tf.float32, shape=(), name='param_noise_stddev')

        # Parameters.
        self.gamma = gamma
        self.tau = tau
        self.memory = memory
        self.normalize_observations = normalize_observations
        self.normalize_returns = normalize_returns
        self.action_noise = action_noise
        self.param_noise = param_noise
        self.action_range = action_range
        self.return_range = return_range
        self.observation_range = observation_range
        self.critic = critic
        self.actor = actor
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.clip_norm = clip_norm
        self.enable_popart = enable_popart
        self.reward_scale = reward_scale
        self.batch_size = batch_size
        self.stats_sample = None
        self.critic_l2_reg = critic_l2_reg

        # Observation normalization.
        if self.normalize_observations:
            with tf.variable_scope('obs_rms'):
                self.obs_rms = RunningMeanStd(shape=observation_shape)
        else:
            self.obs_rms = None
        normalized_obs0 = tf.clip_by_value(normalize(self.obs0, self.obs_rms),
                                           self.observation_range[0], self.observation_range[1])
        normalized_obs1 = tf.clip_by_value(normalize(self.obs1, self.obs_rms),
                                           self.observation_range[0], self.observation_range[1])

        # Return normalization.
        if self.normalize_returns:
            with tf.variable_scope('ret_rms'):
                self.ret_rms = RunningMeanStd()
        else:
            self.ret_rms = None

        # Create target networks.
        target_actor = copy(actor)
        target_actor.name = 'target_actor'
        self.target_actor = target_actor
        target_critic = copy(critic)
        target_critic.name = 'target_critic'
        self.target_critic = target_critic

        # Create networks and core TF parts that are shared across setup parts.
        self.actor_tf = actor(normalized_obs0)
        self.normalized_critic_tf = critic(normalized_obs0, self.actions)
        self.critic_tf = denormalize(
            tf.clip_by_value(self.normalized_critic_tf, self.return_range[0], self.return_range[1]), self.ret_rms)
        self.normalized_critic_with_actor_tf = critic(normalized_obs0, self.actor_tf, reuse=True)
        self.critic_with_actor_tf = denormalize(
            tf.clip_by_value(self.normalized_critic_with_actor_tf, self.return_range[0], self.return_range[1]),
            self.ret_rms)
        Q_obs1 = denormalize(target_critic(normalized_obs1, target_actor(normalized_obs1)), self.ret_rms)
        self.target_Q = self.rewards + (1. - self.terminals1) * gamma * Q_obs1

        # Set up parts.
        if self.param_noise is not None:
            self.setup_param_noise(normalized_obs0)
        self.setup_actor_optimizer()
        self.setup_critic_optimizer()
        if self.normalize_returns and self.enable_popart:
            self.setup_popart()
        self.setup_stats()
        self.setup_target_network_updates()

    def setup_target_network_updates(self):
        actor_init_updates, actor_soft_updates = get_target_updates(self.actor.vars, self.target_actor.vars, self.tau)
        critic_init_updates, critic_soft_updates = get_target_updates(self.critic.vars, self.target_critic.vars,
                                                                      self.tau)
        self.target_init_updates = [actor_init_updates, critic_init_updates]
        self.target_soft_updates = [actor_soft_updates, critic_soft_updates]

    def setup_param_noise(self, normalized_obs0):
        assert self.param_noise is not None

        # Configure perturbed actor.
        param_noise_actor = copy(self.actor)
        param_noise_actor.name = 'param_noise_actor'
        self.perturbed_actor_tf = param_noise_actor(normalized_obs0)
        logger.info('setting up param noise')
        self.perturb_policy_ops = get_perturbed_actor_updates(self.actor, param_noise_actor, self.param_noise_stddev)

        # Configure separate copy for stddev adoption.
        adaptive_param_noise_actor = copy(self.actor)
        adaptive_param_noise_actor.name = 'adaptive_param_noise_actor'
        adaptive_actor_tf = adaptive_param_noise_actor(normalized_obs0)
        self.perturb_adaptive_policy_ops = get_perturbed_actor_updates(self.actor, adaptive_param_noise_actor,
                                                                       self.param_noise_stddev)
        self.adaptive_policy_distance = tf.sqrt(tf.reduce_mean(tf.square(self.actor_tf - adaptive_actor_tf)))

    def setup_actor_optimizer(self):
        logger.info('setting up actor optimizer')
        self.actor_loss = -tf.reduce_mean(self.critic_with_actor_tf)
        actor_shapes = [var.get_shape().as_list() for var in self.actor.trainable_vars]
        actor_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in actor_shapes])
        logger.info('  actor shapes: {}'.format(actor_shapes))
        logger.info('  actor params: {}'.format(actor_nb_params))
        self.actor_grads = U.flatgrad(self.actor_loss, self.actor.trainable_vars, clip_norm=self.clip_norm)
        self.actor_optimizer = Adam(var_list=self.actor.trainable_vars,
                                    beta1=0.9, beta2=0.999, epsilon=1e-08)

    def setup_critic_optimizer(self):
        logger.info('setting up critic optimizer')
        normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms),
                                                       self.return_range[0], self.return_range[1])
        self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf))
        if self.critic_l2_reg > 0.:
            critic_reg_vars = [var for var in self.critic.trainable_vars if
                               'kernel' in var.name and 'output' not in var.name]
            for var in critic_reg_vars:
                logger.info('  regularizing: {}'.format(var.name))
            logger.info('  applying l2 regularization with {}'.format(self.critic_l2_reg))
            critic_reg = tc.layers.apply_regularization(
                tc.layers.l2_regularizer(self.critic_l2_reg),
                weights_list=critic_reg_vars
            )
            self.critic_loss += critic_reg
        critic_shapes = [var.get_shape().as_list() for var in self.critic.trainable_vars]
        critic_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in critic_shapes])
        logger.info('  critic shapes: {}'.format(critic_shapes))
        logger.info('  critic params: {}'.format(critic_nb_params))
        self.critic_grads = U.flatgrad(self.critic_loss, self.critic.trainable_vars, clip_norm=self.clip_norm)
        self.critic_optimizer = Adam(var_list=self.critic.trainable_vars,
                                     beta1=0.9, beta2=0.999, epsilon=1e-08)

    def setup_popart(self):
        # See https://arxiv.org/pdf/1602.07714.pdf for details.
        self.old_std = tf.placeholder(tf.float32, shape=[1], name='old_std')
        new_std = self.ret_rms.std
        self.old_mean = tf.placeholder(tf.float32, shape=[1], name='old_mean')
        new_mean = self.ret_rms.mean

        self.renormalize_Q_outputs_op = []
        for vs in [self.critic.output_vars, self.target_critic.output_vars]:
            assert len(vs) == 2
            M, b = vs
            assert 'kernel' in M.name
            assert 'bias' in b.name
            assert M.get_shape()[-1] == 1
            assert b.get_shape()[-1] == 1
            self.renormalize_Q_outputs_op += [M.assign(M * self.old_std / new_std)]
            self.renormalize_Q_outputs_op += [b.assign((b * self.old_std + self.old_mean - new_mean) / new_std)]

    def setup_stats(self):
        ops = []
        names = []

        if self.normalize_returns:
            ops += [self.ret_rms.mean, self.ret_rms.std]
            names += ['ret_rms_mean', 'ret_rms_std']

        if self.normalize_observations:
            ops += [tf.reduce_mean(self.obs_rms.mean), tf.reduce_mean(self.obs_rms.std)]
            names += ['obs_rms_mean', 'obs_rms_std']

        ops += [tf.reduce_mean(self.critic_tf)]
        names += ['reference_Q_mean']
        ops += [reduce_std(self.critic_tf)]
        names += ['reference_Q_std']

        ops += [tf.reduce_mean(self.critic_with_actor_tf)]
        names += ['reference_actor_Q_mean']
        ops += [reduce_std(self.critic_with_actor_tf)]
        names += ['reference_actor_Q_std']

        ops += [tf.reduce_mean(self.actor_tf)]
        names += ['reference_action_mean']
        ops += [reduce_std(self.actor_tf)]
        names += ['reference_action_std']

        if self.param_noise:
            ops += [tf.reduce_mean(self.perturbed_actor_tf)]
            names += ['reference_perturbed_action_mean']
            ops += [reduce_std(self.perturbed_actor_tf)]
            names += ['reference_perturbed_action_std']

        self.stats_ops = ops
        self.stats_names = names

    def pi(self, obs, apply_noise=True, compute_Q=True):
        if self.param_noise is not None and apply_noise:
            actor_tf = self.perturbed_actor_tf
        else:
            actor_tf = self.actor_tf
        feed_dict = {self.obs0: [obs]}
        if compute_Q:
            action, q = self.sess.run([actor_tf, self.critic_with_actor_tf], feed_dict=feed_dict)
        else:
            action = self.sess.run(actor_tf, feed_dict=feed_dict)
            q = None
        action = action.flatten()
        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            assert noise.shape == action.shape
            action += noise
        action = np.clip(action, self.action_range[0], self.action_range[1])
        return action, q

    def store_transition(self, obs0, action, reward, obs1, terminal1):
        reward *= self.reward_scale
        self.memory.append(obs0, action, reward, obs1, terminal1)
        if self.normalize_observations:
            self.obs_rms.update(np.array([obs0]))

    def train(self):
        # Get a batch.
        batch = self.memory.sample(batch_size=self.batch_size)

        if self.normalize_returns and self.enable_popart:
            old_mean, old_std, target_Q = self.sess.run([self.ret_rms.mean, self.ret_rms.std, self.target_Q],
                                                        feed_dict={
                                                            self.obs1: batch['obs1'],
                                                            self.rewards: batch['rewards'],
                                                            self.terminals1: batch['terminals1'].astype('float32'),
                                                        })
            self.ret_rms.update(target_Q.flatten())
            self.sess.run(self.renormalize_Q_outputs_op, feed_dict={
                self.old_std: np.array([old_std]),
                self.old_mean: np.array([old_mean]),
            })
        else:
            target_Q = self.sess.run(self.target_Q, feed_dict={
                self.obs1: batch['obs1'],
                self.rewards: batch['rewards'],
                self.terminals1: batch['terminals1'].astype('float32'),
            })

        # Get all gradients and perform a synced update.
        ops = [self.actor_grads, self.actor_loss, self.critic_grads, self.critic_loss]
        actor_grads, actor_loss, critic_grads, critic_loss = self.sess.run(ops, feed_dict={
            self.obs0: batch['obs0'],
            self.actions: batch['actions'],
            self.critic_target: target_Q,
        })
        self.actor_optimizer.update(actor_grads, stepsize=self.actor_lr)
        self.critic_optimizer.update(critic_grads, stepsize=self.critic_lr)

        return critic_loss, actor_loss

    def initialize(self, sess):
        self.sess = sess
        self.sess.run(tf.global_variables_initializer())
        self.sess.run(self.target_init_updates)

    def update_target_net(self):
        self.sess.run(self.target_soft_updates)

    def get_stats(self):
        if self.stats_sample is None:
            # Get a sample and keep that fixed for all further computations.
            # This allows us to estimate the change in value for the same set of inputs.
            self.stats_sample = self.memory.sample(batch_size=self.batch_size)
        values = self.sess.run(self.stats_ops, feed_dict={
            self.obs0: self.stats_sample['obs0'],
            self.actions: self.stats_sample['actions'],
        })

        names = self.stats_names[:]
        assert len(names) == len(values)
        stats = dict(zip(names, values))

        if self.param_noise is not None:
            stats = {**stats, **self.param_noise.get_stats()}

        return stats

    def adapt_param_noise(self):
        if self.param_noise is None:
            return 0.

        # Perturb a separate copy of the policy to adjust the scale for the next "real" perturbation.
        batch = self.memory.sample(batch_size=self.batch_size)
        self.sess.run(self.perturb_adaptive_policy_ops, feed_dict={
            self.param_noise_stddev: self.param_noise.current_stddev,
        })
        distance = self.sess.run(self.adaptive_policy_distance, feed_dict={
            self.obs0: batch['obs0'],
            self.param_noise_stddev: self.param_noise.current_stddev,
        })

        self.param_noise.adapt(distance)
        return distance

    def reset(self):
        # Reset internal state after an episode is complete.
        if self.action_noise is not None:
            self.action_noise.reset()
        if self.param_noise is not None:
            self.sess.run(self.perturb_policy_ops, feed_dict={
                self.param_noise_stddev: self.param_noise.current_stddev,
            })
コード例 #17
0
    x, t, x_submission = hp_data.load(scale=True,
                                      label_log10=True,
                                      non_nan_ratio=0.8)
    print('x.shape:', x.shape)
    feature_count = x.shape[-1]

    train_num = 1450
    train_x, train_y, test_x, test_y = x[:train_num, :], t[:train_num, :], x[
        train_num:, :], t[train_num:, :]

    max_iterations = 30000
    batch_size = 128
    # initialize network optimizer
    weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'}
    # optimizer = SGD(lr=0.01)
    optimizer = Adam(lr=1e-3)

    # network = MultiLayerRegression(input_size=feature_count, hidden_size_list=[100, 100, 100, 300], output_size=1,
    #                                weight_init_std='relu', activation='relu',
    #                                weight_decay_lambda=1e-4,
    #                                use_dropout=True, dropout_ratio=0.2,
    #                                use_batchnorm=True)
    network = MultiLayerRegression(input_size=feature_count,
                                   hidden_size_list=[300, 200, 100, 10],
                                   output_size=1,
                                   weight_init_std='relu',
                                   activation='relu',
                                   weight_decay_lambda=1e-4,
                                   use_dropout=True,
                                   dropout_ratio=0.3,
                                   use_batchnorm=True)
コード例 #18
0
    print("y train: ",y_train.shape)
    print("x_test: ",x_test.shape)
    print("y_test: ",y_test.shape)

    if(run):
       
        #MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latiture, Longitude
        network = MultiLayerNetRegression(
            input_size=8, 
            hidden_size_list=[
            100,1000,100,
            ], 
            output_size=1,
            )
        
        optimizer = Adam(lr=learning_rate)
        
        train_acc_list = []

        iter_per_epoch = max(train_size / batch_size, 1)

        epoch_cnt = 0

        #学習
        for i in range(1000000000):
            batch_mask = np.random.choice(train_size, batch_size)

            x_batch = x_train[batch_mask]
            y_batch = y_train[batch_mask]

            grads = network.gradient(x_batch, y_batch)
コード例 #19
0
ファイル: ch03_.py プロジェクト: irohaui/deep-learning
max_epoch = 1000

text = 'You say goodbye and I say hello.'
corpus, word_to_id, id_to_word = preprocess(text)

vocab_size = len(word_to_id)
contexts, target = create_contexts_target, convert_one_hot

window_size = 1
hidden_size = 5
batch_size = 3
max_epoch = 1000

text = 'You say goodbye and I say hello.'
corpus, word_to_id, id_to_word = preprocess(text)

vocab_size = len(word_to_id)
contexts, target = create_contexts_target(corpus, window_size)
target = convert_one_hot(target, vocab_size)
contexts = convert_one_hot(contexts, vocab_size)

model = SimpleCBOW(vocab_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

trainer.fit(contexts, target, max_epoch, batch_size)
trainer.plot()

word_vecs = model.word_vecs
for word_id, word in id_to_word.items():
    print(word, word_vecs[word_id])
コード例 #20
0
train_per_epoch = sample_num / batch_size   # 5000 / 100 = 50train/1epoch

input_dim = x_train[0].shape  # (1, 28, 28)
conv_param = {}
conv_param["filter_num"] = 30
conv_param["filter_size"] = 5
conv_param["pad"] = 0
conv_param["stride"] = 1
hidden_size = 100
output_size = 10
weight_init_std = 0.01
learning_rate = 0.001
train_loss_list = []
train_acc_list = []
test_acc_list = []
optimizer = Adam()

# ネットワークの生成
network = SimpleConvNet(input_dim=input_dim,
                        conv_param=conv_param,
                        hidden_size=hidden_size,
                        output_size=output_size,
                        weight_init_std=weight_init_std)

"""
trainer = Trainer(network, x_train, t_train, x_test, t_test,
                  epochs=epoch_max, mini_batch_size=100,
                  optimizer='Adam', optimizer_param={'lr': 0.001},
                  evaluate_sample_num_per_epoch=1000)
trainer.train()
"""
コード例 #21
0
ファイル: learn_lstm.py プロジェクト: pickybeak/chatbot
if config.GPU:
    corpus = to_gpu(corpus_train)
    corpus_val = to_gpu(corpus_val)
    corpus_test = to_gpu(corpus_test)

vocab_size = len(preprocessing.word_to_id)

xs = sum(corpus_train, [])[:-1]
ts = sum(corpus_train, [])[1:]
corpus_val = sum(corpus_val, [])
corpus_test = sum(corpus_test, [])

model = BetterRnnlm(vocab_size, wordvec_size, hidden_size, dropout)
# optimizer = SGD(lr)
optimizer = Adam(lr=lr)
trainer = RnnlmTrainer(model, optimizer)

best_ppl = float('inf')
for epoch in range(max_epoch):
    trainer.fit(xs,
                ts,
                max_epoch=1,
                batch_size=batch_size,
                time_size=time_size,
                max_grad=max_grad)

    model.reset_state()
    ppl = eval_perplexity(model, corpus_val)
    print('검증 퍼플렉서티: ', ppl)
コード例 #22
0
ts = corpus[1:]

# ハイパーパラメータの設定
vocab_size = len(word_to_id)
wordvec_size = 16
hidden_size = 128
batch_size = 1
max_epoch = 50
max_grad = 5.0
sample_size = 100
lr = 0.001
time_size = 35

#モデルの生成
model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size)
optimizer = Adam()
trainer = RnnlmTrainer(model, optimizer)

#学習
best_ppl = float('inf')
t1 = time.time()
for epoch in range(max_epoch):
    trainer.fit(xs, ts, max_epoch=1, batch_size=batch_size, max_grad=max_grad)

    model.reset_state()
    ppl = eval_perplexity(model, corpus)
    print('valid perplexity: ', ppl)

    if best_ppl > ppl:
        best_ppl = ppl
        model.save_params()
コード例 #23
0
def df(x, y):
    return x / 10.0, 2.0 * y


init_pos = (-7.0, 2.0)
params = {}
params['x'], params['y'] = init_pos[0], init_pos[1]
grads = {}
grads['x'], grads['y'] = 0, 0

optimizers = OrderedDict()
optimizers['SGD'] = SGD(lr=0.95)
optimizers['Momentum'] = Momentum(lr=0.1)
optimizers['AdaGrad'] = AdaGrad(lr=1.5)
optimizers['Adam'] = Adam(lr=0.3)

idx = 1

for key in optimizers:
    optimizer = optimizers[key]
    x_history = []
    y_history = []
    params['x'], params['y'] = init_pos[0], init_pos[1]

    for i in range(30):
        x_history.append(params['x'])
        y_history.append(params['y'])

        grads['x'], grads['y'] = df(params['x'], params['y'])
        optimizer.update(params, grads)