Exemple #1
0
        x = tf.reshape(x, [-1, 7 * 7 * 64], name='flatten')
        x = tf.layers.dense(
            x, units=1, activation=None, name='fc1'
        )  # sigmoid is applied implicitly in the computation of the loss
        return x


if __name__ == '__main__':
    _BATCH_SIZE = 128  # Size of the minibatches
    _CODE_SIZE = 10  # Size of the latent code
    _MAX_IT = 20000  # Number of iterations

    _LOG_BASE_DIR = 'log'
    _LOG_DIR_SUFFIX = 'mnist_gan'
    _LOG_DIR = get_log_dir(_LOG_BASE_DIR, _LOG_DIR_SUFFIX)

    _CHECKPOINT_FILE = '%s/model.ckpt' % _LOG_DIR

    # Load mnist data and create a function to get the batches
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    mnist_batch = lambda x: mnist.train.next_batch(x)[0].reshape(x, 28, 28, 1)

    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        # Create the network and initialise the parameters
        gan = DCGAN([28, 28, 1], _BATCH_SIZE, _CODE_SIZE)
        sess.run(tf.global_variables_initializer())
        # Create file writers and summary
        merged = tf.summary.merge_all()
        saver = tf.train.Saver()
        train_writer = tf.summary.FileWriter(_LOG_DIR, sess.graph)
Exemple #2
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from agent import QAgent
from configs import object_seaquest_config
from util import get_log_dir


if __name__ == '__main__':
    config = object_seaquest_config
    log_dir = get_log_dir('log', config['game']+'_'+str(config['double_q'])) # Name of logging directory
    agent = QAgent(config=config, log_dir=log_dir)
    saver = tf.train.Saver(max_to_keep=None)

    saver.restore(agent.session, '%s/episode_%d.ckpt'%("log/log/2017-12-09_23-40-34_SeaquestDeterministic-v4_True",800))

    print('Validate....\n==============')
    scores = agent.validate_episode(epsilon=0, visualise=True)
        
Exemple #3
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from agent import QAgent
from configs import pong_config, breakout_config
from util import get_log_dir

if __name__ == '__main__':
    config = breakout_config
    log_dir = get_log_dir('log', config['game']+'_'+str(config['double_q']))
    agent = QAgent(config=config, log_dir=log_dir)
    saver = tf.train.Saver()
    for episode in range(config['episodes']):
        print('\n\nepisode: %d, step: %d, eps: %.4f\n\n---------------------' % (episode, agent.steps, agent.epsilon))
        # Store the rewards...
        agent._update_training_reward(agent.train_episode())

        if episode % config['episodes_validate']==0:
            print('Validate....\n==============')
            scores = [agent.validate_episode(epsilon=0.05) for i in range(config['episodes_validate_runs'])]
            agent._update_validation_reward(np.mean(scores))
            print(scores)
        # Store every validation interval
        if episode % config['episodes_save_interval']==0:
            saver.save(agent.session,'%s/episode_%d.ckpt'%(log_dir,episode))