Example #1
0
def _mlp_configured(input_, num_actions, hiddens, dueling=False, summaries=False):
    """ Constructs dueling architecture """

    # build standard MLP

    if not dueling:
        output = _mlp(input_, num_actions, hiddens)
    else:
        # advantage value function
        A = _mlp(input_, num_actions, hiddens)

        # state-value function
        V = _mlp(input_, num_actions, hiddens)

        # plot mean(V(s, a))
        if summaries:
            scalar_summary('state_value', tf.reduce_mean(V))

        # mean-center advantage values
        A_mean = tf.reduce_mean(A, axis=1)
        A_centered = A - tf.expand_dims(A_mean, axis=1)

        output = V + A_centered

    return output
Example #2
0
        def __init__(self, m, ovo):
            self.mse_ph = tf.placeholder(tf.float32, (), name='mse-train')
            self.mae_ph = tf.placeholder(tf.float32, (), name='mae-train')

            self.val_mse_ph = tf.placeholder(tf.float32, (), name='mse-test')
            self.val_mae_ph = tf.placeholder(tf.float32, (), name='mae-test')

            self.im_ph = tf.placeholder(tf.uint8, (1, 210 * 3, 160 * 5, 3),
                                        name='pred-ball-pos-ph')

            tr_sum = []
            tr_sum.append(
                scalar_summary('mse-train', self.mse_ph, scope='train'))
            tr_sum.append(
                scalar_summary('mae-train', self.mae_ph, scope='train'))

            te_sum = []
            te_sum.append(
                scalar_summary('mse-test', self.val_mse_ph, scope='test'))
            te_sum.append(
                scalar_summary('mae-test', self.val_mae_ph, scope='test'))

            self.im_sum = tf.summary.image('pred-ball-pos', self.im_ph)
            self.mtr_sum = tf.summary.merge(tr_sum)
            self.mte_sum = tf.summary.merge(te_sum)

            self.fw = tf.summary.FileWriter(f'{home}/ball/{model_name}',
                                            graph=sess.graph)
            self.ovo = ovo
            self.step = 0
            self.m = m
Example #3
0
    def _setup_tensorboard(self):
        """
        Adds all variables that might help debugging to Tensorboard.
        At the end, the FileWriter is constructed pointing to the specified directory.

        """

        # more placeholders for summarised variables; along with summaries
        self.eps_ph = tf.placeholder(tf.float32, (), name='epsilon')
        self.rew_ph = tf.placeholder(tf.float32, (), name='rolling-reward')

        scalar_summary('epsilon', self.eps_ph)
        scalar_summary('reward', self.rew_ph)

        # display q_values while training
        for a_i in range(self.num_actions):
            scalar_summary('QTa_{}'.format(a_i + 1),
                           tf.reduce_mean(self.target_tp1[:, a_i]),
                           scope='Q-Values')
            scalar_summary('Qa_{}'.format(a_i + 1),
                           tf.reduce_mean(self.q_t[:, a_i]),
                           scope='Q-Values')

        # plot network weights
        with tf.variable_scope('weights'):
            for qv in self.q_net_vars:
                tf.summary.histogram('{}'.format(qv.name), qv)
            for tv in self.target_net_vars:
                tf.summary.histogram('{}'.format(tv.name), tv)

        # gradient histograms
        with tf.variable_scope('gradients'):
            for g in self.gradients:
                tf.summary.histogram('{}-grad'.format(g[1].name), g[0])
Example #4
0
    def _loss(self):
        """ Defines loss as layed out in the original Nature paper """

        with tf.variable_scope('loss'):

            # either use maximum target q or use value from target network while the action is chosen by the q net
            if self.double_q:
                act_tp1_idxs = tf.stop_gradient(tf.argmax(self.q_tp1, axis=1))
                q_tp1 = tf.reduce_sum(
                    self.target_tp1 *
                    tf.one_hot(act_tp1_idxs, self.num_actions),
                    axis=1)
            else:

                q_tp1 = tf.reduce_max(self.target_tp1, axis=1)

            # bellman target
            y = self._L_r + (self.gamma * (1.0 - self._L_d) * q_tp1)

            # select q value of taken action
            qj = tf.reduce_sum(self.q_t *
                               tf.one_hot(self._L_a, self.num_actions),
                               axis=1)

            # TD errors
            self._td_errors = qj - y

            # apply huber loss
            loss = tf.losses.huber_loss(y, qj)

        if self.use_tensorboard:
            scalar_summary('target', tf.reduce_mean(y))
            scalar_summary('huber-loss', tf.reduce_mean(loss))
            tf.summary.histogram('selected_Q', qj)

        #  importance sampling weights
        if self.prioritized_replay:
            updates = tf.reduce_mean(self._is_weights * loss)
        else:
            updates = tf.reduce_mean(loss)

        return updates
Example #5
0
    def _setup_tensorboard(self):
        """
        Adds all variables that might help debugging to Tensorboard.
        At the end, the FileWriter is constructed pointing to the specified directory.

        """

        self.logger.info('Saving Tensorboard summaries to {}'.format(
            self.tensorboard_dir))

        self.ret_ph = tf.placeholder(tf.float32, (), name='mean-return')
        self.kl_ph = tf.placeholder(tf.float32, (), name='kl')
        self.pl_diff_ph = tf.placeholder(tf.float32, (), name='pl-diff')

        scalar_summary('mean-return', self.ret_ph)
        scalar_summary('kl', self.kl_ph)
        scalar_summary('pl-diff', self.pl_diff_ph)

        with tf.variable_scope('loss'):
            scalar_summary('value-loss', self.vf_loss)
            scalar_summary('policy-loss', self.policy_loss)
            # scalar_summary('policy-entropy', self.pi_entropy)

        with tf.variable_scope('value'):
            scalar_summary('value_target', tf.reduce_mean(self.d_rew_ph))
            scalar_summary('value', tf.reduce_mean(self.values))

        # plot network weights
        with tf.variable_scope('weights'):
            for pv in self.theta_vars:
                tf.summary.histogram('{}'.format(pv.name), pv)

        # gradient histograms
        with tf.variable_scope('gradients'):
            vector_summary('policy-gradient', self.pg)
Example #6
0
    def _tensorboard_setup(self):
        """ Tensorboard (TB) setup """

        with tf.variable_scope('{}-ph'.format(self.name)):
            self.bps_ph = tf.placeholder(tf.int32, (),
                                         name='batches-per-second')
            self.ep_ph = tf.placeholder(tf.int32, (), name='episode')

        scalar_summary('batches-per-second', self.bps_ph)
        scalar_summary('episode', self.ep_ph)

        self.v_loss = tf.placeholder(tf.float32, (), name='vae-loss')
        self.rel_ph = tf.placeholder(tf.float32, (), name='rec-loss')
        self.kll_ph = tf.placeholder(tf.float32, (), name='kl-loss')
        self.klls_ph = [
            tf.placeholder(tf.float32, (), name=f'z{i}-kl')
            for i in range(self.latent_dim)
        ]

        with tf.variable_scope('loss'):
            scalar_summary('reconstruction-loss', self.rel_ph)
            scalar_summary('total-loss', self.vae_loss)
            scalar_summary('kl-loss', self.kll_ph)

        for i in range(self.latent_dim):
            scalar_summary(f'z{i}-kl', self.klls_ph[i], scope='z-kl')

        self.merge_op = tf.summary.merge_all()

        import os
        home = os.environ['HOME']

        self.writer = tf.summary.FileWriter(f'{home}/vae/{self.savename}',
                                            graph=tf.get_default_graph())
Example #7
0
    def train(self, dataset, batch_size=155, num_episodes=50, print_freq=5):
        import numpy as np
        import time
        import datetime

        from tabulate import tabulate
        from forkan.common import CSVLogger
        from forkan.common.tf_utils import scalar_summary

        num_samples = len(dataset)

        assert np.max(dataset) <= 1, 'provide normalized dataset!'

        self.log.info('Training on {} samples for {} episodes.'.format(
            num_samples, num_episodes))
        tstart = time.time()
        nb = 1

        train_op = tf.train.AdamOptimizer().minimize(self.vae_loss)

        csv_header = ['date', '#episode', '#batch', 'rec-loss', 'kl-loss'] + \
                     ['z{}-kl'.format(i) for i in range(self.latent_dim)]
        csv = CSVLogger('{}/progress.csv'.format(self.savepath), *csv_header)

        rel_ph = tf.placeholder(tf.float32, (), name='rec-loss')
        kll_ph = tf.placeholder(tf.float32, (), name='kl-loss')
        klls_ph = [
            tf.placeholder(tf.float32, (), name=f'z{i}-kl')
            for i in range(self.latent_dim)
        ]

        scalar_summary('reconstruction-loss', rel_ph, scope='vae-loss')
        scalar_summary('kl-loss', kll_ph, scope='vae-loss')
        for i in range(self.latent_dim):
            scalar_summary(f'z{i}-kl', klls_ph[i], scope='z-kl')
        merged_ = tf.summary.merge_all()
        writer = tf.summary.FileWriter(f'{self.savepath}/board', self.s.graph)

        self.s.run(tf.global_variables_initializer())

        du = []

        for _ in range(5):
            a = np.linspace(0, 1, 64)
            ar = np.repeat(a, 64, 0).reshape([64, 64])
            du.append(ar)
        print(np.asarray(du).shape)
        du = np.reshape(du, [1, 5, 64, 64, 1])

        file_writer = tf.summary.FileWriter('/Users/llach/board_test')
        im_ph = tf.placeholder(tf.float32, shape=(1, 64, 128, 1))
        im_sum = tf.summary.image('img', im_ph)

        # rollout N episodes
        for ep in range(num_episodes):

            # shuffle dataset
            np.random.shuffle(dataset)

            for n, idx in enumerate(np.arange(0, num_samples, batch_size)):
                bps = max(int(nb / (time.time() - tstart)), 1)
                x = dataset[idx:min(idx + batch_size, num_samples), ...]

                _, loss, re_loss, kl_losses = self.s.run(
                    [train_op, self.vae_loss, self.re_loss, self.kl_loss],
                    feed_dict={self.X: x})

                # mean losses
                re_loss = np.mean(re_loss)
                kl_loss = self.beta * np.sum(kl_losses)

                fd = {
                    rel_ph: re_loss,
                    kll_ph: kl_loss,
                }

                for i, kph in enumerate(klls_ph):
                    fd.update({kph: kl_losses[i]})

                suma = self.s.run(merged_, feed_dict=fd)

                writer.add_summary(suma, nb)

                # increase batch counter
                nb += 1

                csv.writeline(datetime.datetime.now().isoformat(), ep, nb,
                              re_loss, kl_loss, *kl_losses)

                if n % print_freq == 0 and print_freq is not -1:

                    total_batches = (num_samples // batch_size) * num_episodes

                    perc = ((nb) / total_batches) * 100
                    steps2go = total_batches - nb
                    secs2go = steps2go / bps
                    min2go = secs2go / 60

                    hrs = int(min2go // 60)
                    mins = int(min2go) % 60

                    tab = tabulate([
                        ['name', f'retrainvae-clean-b{self.beta}'],
                        ['episode', ep],
                        ['batch', n],
                        ['bps', bps],
                        ['rec-loss', re_loss],
                        ['kl-loss', kl_loss],
                        ['ETA', '{}h {}min'.format(hrs, mins)],
                        ['done', '{}%'.format(int(perc))],
                    ])

                    print('\n{}'.format(tab))

                reca = self.reconstruct_stacked(du)
                print(reca[0].shape, ar.shape)
                fin = np.concatenate((reca[0], np.expand_dims(ar, axis=-1)),
                                     axis=1)
                isu = self.s.run(
                    im_sum, feed_dict={im_ph: np.expand_dims(fin, axis=0)})
                file_writer.add_summary(isu, nb)
                file_writer.flush()
            self.save()
        file_writer.close()
        self.save()
        print('training done!')