def main(_): config = AttrDict(default_config()) # Define Agent that train with REINFORCE algorithm. agent = REINFORCE(config) train_results = [] start_time = time.time() # Train for num_iters times. for i, result in enumerate(agent.train(config.num_episodes)): evals = np.mean([agent.policy.evaluate() for _ in range(5)]) result = result._replace(eval=np.mean(evals)) print('\rEpisode {}/{} policy loss ({}), value loss ({}), eval ({})'. format(i, config.num_episodes, result.policy_loss, result.val_loss, result.eval), end='', flush=True) train_results.append(result) end_time = time.time() duration = end_time - start_time # 100 episodes, Process duration: 59.979570150375366[s] print('\nProcess duration: {0}[s]'.format(duration)) plot_agent_stats(train_results, 'REINFORCE algorithm.') plt.show()
def main(_): config = AttrDict(default_config()) # Define Agent that train with ActorCrtic algorithm. agent = ActorCrtic(config) remote_policies = [RemotePolicy.remote(config) for _ in range(5)] train_results = [] start_time = time.time() # Train for num_iters times. for i, result in enumerate(agent.train(config.num_episodes)): evals = agent.evaluate(remote_policies) result = result._replace(eval=np.mean(evals)) print('\rEpisode {}/{} policy loss ({}), value loss ({}), eval ({})'. format(i, config.num_episodes, result.policy_loss, result.val_loss, result.eval), end='', flush=True) train_results.append(result) end_time = time.time() duration = end_time - start_time # 100 episodes, Process duration: 40.74085235595703[s] using GPU print('\nProcess duration: {0}[s]'.format(duration)) plot_agent_stats(train_results, 'Actor Critic algorithm.') plt.show()
def main(_): config = AttrDict(default_config()) policy = Policy(config) saver = tf.train.Saver() saver.restore(policy._sess, './checkpoints/reinforce_debug') frames = video_evaluate_policy(policy, config.env_name)
def main(_): # Set configuration config = AttrDict(default_config()) # Build one hot mnist model. model = benchmark_model.build_tf_one_hot_model(batch_size=config.batch_size, use_bias=config.use_bias, activation=config.activation) # Load one hot mnist data. (x_train, y_train), (x_test, y_test) = benchmark_model.load_one_hot_data(dataset=config.dataset) # Testing whether the dataset have correct shape. assert x_train.shape == (60000, 784) assert y_train.shape == (60000, 10) # Minimize model's loss with NMF optimizer. # optimizer = NMFOptimizer(config) optimizer = NMFOptimizer(config=config) train_op = optimizer.minimize(model.frob_norm) # Minimize model's loss with Adam optimizer. bp_optimizer = tf.train.AdamOptimizer(config.learning_rate) bp_train_op = bp_optimizer.minimize(model.cross_entropy) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for i in range(1000): x, y = benchmark_model.batch(x_train, y_train, batch_size=config.batch_size) loss, _ = sess.run([optimizer.autoencoder_loss, optimizer.autoencoder_train_op], feed_dict={model.inputs: x}) print('\rloss {}'.format(loss), end='', flush=True) print() _train_and_test = functools.partial(train_and_test, sess=sess, model=model, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, batch_size=config.batch_size) print('NMF-optimizer') # Train with NMF optimizer. _train_and_test(train_op, num_iters=config.num_mf_iters) print('Adam-optimizer') # Train with Adam optimizer. _train_and_test(bp_train_op, num_iters=config.num_bp_iters)
def main(_): # Set configuration config = AttrDict(default_config()) # Build one hot mnist model. model = benchmark_model.build_tf_cross_entropy_model( batch_size=config.batch_size, use_bias=config.use_bias, activation=config.activation) # Load one hot mnist data. (x_train, y_train), (x_test, y_test) = benchmark_model.load_one_hot_data( dataset=config.dataset) # Testing whether the dataset have correct shape. assert x_train.shape == (60000, 784) assert y_train.shape == (60000, 10) # Minimize model's loss with NMF optimizer. # optimizer = NMFOptimizer(config) optimizer = NMFOptimizer() train_op = optimizer.minimize(model.cross_entropy) # Minimize model's loss with Adam optimizer. bp_optimizer = tf.train.AdamOptimizer(config.learning_rate) bp_train_op = bp_optimizer.minimize(model.cross_entropy) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) _train_and_test = functools.partial(train_and_test, sess=sess, model=model, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, batch_size=config.batch_size) print('NMF-optimizer') # Train with NMF optimizer. _train_and_test(train_op, num_iters=config.num_mf_iters) print('Adam-optimizer') # Train with Adam optimizer. _train_and_test(bp_train_op, num_iters=config.num_bp_iters)
def minimize(self, loss=None, pretrain=False): """Construct the control dependencies for calculating neural net optimized. Returns: tf.no_op. The import """ self._init(loss) # pre-train with auto encoder. pretrain_op = self._autoencoder() if pretrain else tf.no_op() a = self.labels updates = [] # Reverse layers = self._layers[::-1] for i, layer in enumerate(layers): u = layer.output v = layer.kernel if layer.use_bias: v = tf.concat((v, layer.bias[None, ...]), axis=0) # Not use activation (ReLU) if not layer.activation: u, v = mf.semi_nmf( a=a, u=u, v=v, use_tf=True, use_bias=layer.use_bias, num_iters=1, first_nneg=True, ) # Use activation (ReLU) elif utility.get_op_name(layer.activation) == 'Relu': u, v = mf.nonlin_semi_nmf( a=a, u=u, v=v, use_tf=True, use_bias=layer.use_bias, num_calc_v=1, num_calc_u=1, first_nneg=True, ) # Use Softmax elif utility.get_op_name(layer.activation) == 'Softmax': print('used softmax!!') u, v = mf.softmax_nmf( a=a, u=u, v=v, use_tf=True, use_bias=layer.use_bias, ) if layer.use_bias: v, bias = utility.split_v_bias(v) updates.append(layer.bias.assign(bias)) updates.append(layer.kernel.assign(v)) a = tf.identity(u) return AttrDict(ae=pretrain_op, nmf=tf.group(*updates))
def main(_): print('use_bias', FLAGS.use_bias) LABEL_SIZE = 4 # Set configuration config = AttrDict(default_config()) # Build one hot mnist model. model = benchmark_model.build_tf_hitachi_simple_model(config.batch_size, use_bias=config.use_bias, activation=config.activation, label_size=LABEL_SIZE) # Load hitachi data. (x_train, y_train), (x_test, y_test) = benchmark_model.load_hitachi_data( config.path, test_size=0.1) # Testing whether the dataset have correct shape. # assert x_train.shape[1] == 3 # assert y_train.shape[1] == 4 # Minimize model's loss with NMF optimizer. # optimizer = NMFOptimizer(config) optimizer = NMFOptimizer() train_op = optimizer.minimize(model.loss) # Minimize model's loss with Adam optimizer. bp_optimizer = tf.train.AdamOptimizer(config.learning_rate) bp_train_op = bp_optimizer.minimize(model.loss) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) _train_and_test = functools.partial(train_and_test, sess=sess, model=model, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, batch_size=config.batch_size) print('Adam-optimizer') # Train with Adam optimizer. bp_train_losses, bp_test_losses = _train_and_test(bp_train_op, num_iters=config.num_bp_iters, add_loss_before_train=True) print('NMF-optimizer') # Train with NMF optimizer. mf_train_losses, mf_test_losses = _train_and_test(train_op, num_iters=config.num_mf_iters) length_bp = bp_train_losses.shape[1] # mf_train_losses.insert(0, bp_train_losses[-1]) mf_train_losses = np.hstack([bp_train_losses[:, -1, np.newaxis], mf_train_losses]) # mf_test_losses.insert(0, bp_test_losses[-1]) length_mf = mf_train_losses.shape[1] bp_x = np.arange(0, length_bp) mf_x = np.arange(length_bp - 1, length_bp + length_mf - 1) plt.rc('grid', linestyle="--", color='black') # 'dodgerblue', 'black' fig, ax = plt.subplots(2, 2, figsize=(6 * 2.5, 4 * 2.5)) if config.num_bp_iters > 0: ax[0, 0].plot(bp_x, bp_train_losses[0], linestyle='-', color='dodgerblue', label='BP-B{}'.format(1)) ax[0, 1].plot(bp_x, bp_train_losses[1], linestyle='-', color='dodgerblue', label='BP-B{}'.format(2)) ax[1, 0].plot(bp_x, bp_train_losses[2], linestyle='-', color='dodgerblue', label='BP-B{}'.format(3)) ax[1, 1].plot(bp_x, bp_train_losses[3], linestyle='-', color='dodgerblue', label='BP-B{}'.format(4)) if config.num_mf_iters > 0: ax[0, 0].plot(mf_x, mf_train_losses[0], linestyle='-', color='black', label='NMF-B{}'.format(1)) ax[0, 1].plot(mf_x, mf_train_losses[1], linestyle='-', color='black', label='NMF-B{}'.format(2)) ax[1, 0].plot(mf_x, mf_train_losses[2], linestyle='-', color='black', label='NMF-B{}'.format(3)) ax[1, 1].plot(mf_x, mf_train_losses[3], linestyle='-', color='black', label='NMF-B{}'.format(4)) ax[0, 0].set_title("B1") ax[0, 1].set_title("B2") ax[1, 0].set_title("B3") ax[1, 1].set_title("B4") # plt.plot(bp_x, bp_test_losses, linestyle='--', color='dodgerblue', label='test-bp') # plt.plot(mf_x, mf_test_losses, linestyle='--', color='black', label='test-nmf') plt.xlabel('#Epoch') plt.ylabel('') # plt.legend() plt.grid(True) plt.show()