x = tf.reshape(x, [-1, 7 * 7 * 64], name='flatten') x = tf.layers.dense( x, units=1, activation=None, name='fc1' ) # sigmoid is applied implicitly in the computation of the loss return x if __name__ == '__main__': _BATCH_SIZE = 128 # Size of the minibatches _CODE_SIZE = 10 # Size of the latent code _MAX_IT = 20000 # Number of iterations _LOG_BASE_DIR = 'log' _LOG_DIR_SUFFIX = 'mnist_gan' _LOG_DIR = get_log_dir(_LOG_BASE_DIR, _LOG_DIR_SUFFIX) _CHECKPOINT_FILE = '%s/model.ckpt' % _LOG_DIR # Load mnist data and create a function to get the batches mnist = input_data.read_data_sets('MNIST_data', one_hot=True) mnist_batch = lambda x: mnist.train.next_batch(x)[0].reshape(x, 28, 28, 1) with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: # Create the network and initialise the parameters gan = DCGAN([28, 28, 1], _BATCH_SIZE, _CODE_SIZE) sess.run(tf.global_variables_initializer()) # Create file writers and summary merged = tf.summary.merge_all() saver = tf.train.Saver() train_writer = tf.summary.FileWriter(_LOG_DIR, sess.graph)
from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from agent import QAgent from configs import object_seaquest_config from util import get_log_dir if __name__ == '__main__': config = object_seaquest_config log_dir = get_log_dir('log', config['game']+'_'+str(config['double_q'])) # Name of logging directory agent = QAgent(config=config, log_dir=log_dir) saver = tf.train.Saver(max_to_keep=None) saver.restore(agent.session, '%s/episode_%d.ckpt'%("log/log/2017-12-09_23-40-34_SeaquestDeterministic-v4_True",800)) print('Validate....\n==============') scores = agent.validate_episode(epsilon=0, visualise=True)
from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import tensorflow as tf from agent import QAgent from configs import pong_config, breakout_config from util import get_log_dir if __name__ == '__main__': config = breakout_config log_dir = get_log_dir('log', config['game']+'_'+str(config['double_q'])) agent = QAgent(config=config, log_dir=log_dir) saver = tf.train.Saver() for episode in range(config['episodes']): print('\n\nepisode: %d, step: %d, eps: %.4f\n\n---------------------' % (episode, agent.steps, agent.epsilon)) # Store the rewards... agent._update_training_reward(agent.train_episode()) if episode % config['episodes_validate']==0: print('Validate....\n==============') scores = [agent.validate_episode(epsilon=0.05) for i in range(config['episodes_validate_runs'])] agent._update_validation_reward(np.mean(scores)) print(scores) # Store every validation interval if episode % config['episodes_save_interval']==0: saver.save(agent.session,'%s/episode_%d.ckpt'%(log_dir,episode))