Example #1
0
def main():
    config = tf.ConfigProto()
    #  config.gpu_options.allow_growth = True
    #  config.log_device_placement = True
    with tf.Session(config=config) as sess:
        agent = DeepDeterministicPolicyGradientAgent(env=env)

        # setup saver util and either load latest ckpt or init variables
        saver_util = None
        if opts.ckpt_dir is not None:
            saver_util = util.SaverUtil(sess, opts.ckpt_dir, opts.ckpt_freq)
        else:
            sess.run(tf.global_variables_initializer())

        for v in tf.global_variables():
            print(v.name, util.shape_and_product_of(v), file=sys.stderr)
    #   print >>sys.stderr, v.name, util.shape_and_product_of(v)

    # now that we've either init'd from scratch, or loaded up a checkpoint,
    # we can do any required post init work.
        agent.post_var_init_setup()

        #opts.num_eval = 100
        # run either eval or training
        if opts.num_eval > 0:
            agent.run_eval(opts.num_eval, opts.eval_action_noise)
        else:
            agent.run_training(opts.max_num_actions, opts.max_run_time,
                               opts.batch_size, opts.batches_per_step,
                               saver_util)
            if saver_util is not None:
                saver_util.force_save()

        env.reset()  # just to flush logging, clumsy :/
Example #2
0
  def conv_net_on(self, input_layer, opts):
    # TODO: reinclude batch_norm config
#    if opts.use_batch_norm:
#      normalizer_fn = slim.batch_norm
#      normalizer_params = { 'is_training': IS_TRAINING }
#    else:
    normalizer_fn = None
    normalizer_params = None

    # whiten image, per channel, using batch_normalisation layer with
    # params calculated directly from batch.
    axis = list(range(input_layer.get_shape().ndims - 1))
    batch_mean, batch_var = tf.nn.moments(input_layer, axis)  # calcs moments per channel
    whitened_input_layer = tf.nn.batch_normalization(input_layer, batch_mean, batch_var,
                                                     scale=None, offset=None,
                                                     variance_epsilon=1e-6)

    # TODO: num_outputs here are really dependant on the incoming channels,
    # which depend on the #repeats & cameras so they should be a param.
    model = slim.conv2d(whitened_input_layer, num_outputs=32, kernel_size=[7, 7],
                        normalizer_fn=normalizer_fn,
                        normalizer_params=normalizer_params,
                        scope='conv1')
    model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool1')
    self.pool1 = model
    print >>sys.stderr, "pool1", util.shape_and_product_of(model)

    model = slim.conv2d(model, num_outputs=32, kernel_size=[5, 5],
                        normalizer_fn=normalizer_fn,
                        normalizer_params=normalizer_params,
                        scope='conv2')
    model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2')
    self.pool2 = model
    print >>sys.stderr, "pool2", util.shape_and_product_of(model)

    model = slim.conv2d(model, num_outputs=16, kernel_size=[3, 3],
                        normalizer_fn=normalizer_fn,
                        normalizer_params=normalizer_params,
                        scope='conv3')
    model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2')
    self.pool3 = model
    print >>sys.stderr, "pool3", util.shape_and_product_of(model)

    return slim.flatten(model, scope='flat')
Example #3
0
    def __init__(self, opts):
        self.opts = opts

        config = tf.ConfigProto()
        #config.gpu_options.allow_growth = True
        #config.log_device_placement = True
        config.gpu_options.per_process_gpu_memory_fraction = 0.5  #opts.gpu_mem_fraction
        self.sess = tf.Session(config=config)

        render_shape = (opts.height, opts.width, 3)
        self.replay_memory = replay_memory.ReplayMemory(
            opts=opts, state_shape=render_shape, action_dim=2, load_factor=1.2)
        if opts.event_log_in:
            self.replay_memory.reset_from_event_log(opts.event_log_in,
                                                    opts.event_log_in_num)

        # s1 and s2 placeholders
        batched_state_shape = [None] + list(render_shape)
        s1 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32)
        s2 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32)

        # initialise base models for value & naf networks. value subportion of net is
        # explicitly created seperate because it has a target network note: in the case of
        # --share-input-state-representation the input state network of the value_net will
        # be reused by the naf.l_value and naf.output_actions net
        self.value_net = models.ValueNetwork("value", s1, opts)
        self.target_value_net = models.ValueNetwork("target_value", s2, opts)
        self.network = models.NafNetwork("naf",
                                         s1,
                                         s2,
                                         self.value_net,
                                         self.target_value_net,
                                         action_dim=2,
                                         opts=opts)

        with self.sess.as_default():
            # setup saver util and either load latest ckpt or init variables
            self.saver_util = None
            if opts.ckpt_dir is not None:
                self.saver_util = util.SaverUtil(self.sess, opts.ckpt_dir,
                                                 opts.ckpt_freq)
            else:
                self.sess.run(tf.initialize_all_variables())
            for v in tf.all_variables():
                print >> sys.stderr, v.name, util.shape_and_product_of(v)

            # setup target network
            self.target_value_net.set_as_target_network_for(
                self.value_net, 0.01)
Example #4
0
    def __init__(self, opts):
        #    self.opts = opts

        self.network = models.NafNetwork("naf", action_dim=2, opts=opts)

        config = tf.ConfigProto()
        #config.gpu_options.allow_growth = True
        #config.log_device_placement = True
        config.gpu_options.per_process_gpu_memory_fraction = opts.gpu_mem_fraction
        self.sess = tf.Session(config=config)

        with self.sess.as_default():
            # setup saver to load first set of ckpts. block until some are available
            self.loader = ckpt_util.AgentCkptLoader(self.sess, opts.ckpt_dir)
            self.loader.blocking_load_ckpt()
            # dump info on vars
            for v in tf.all_variables():
                if '/biases:' not in v.name:
                    print >> sys.stderr, v.name, util.shape_and_product_of(v)
Example #5
0
    def simple_conv_net_on(self, input_layer, opts):
        if opts.use_batch_norm:
            normalizer_fn = slim.batch_norm
            normalizer_params = {'is_training': IS_TRAINING}
        else:
            normalizer_fn = None
            normalizer_params = None

        # optionally drop blue channel, in a simple cart pole env we only need r/g
        #if opts.drop_blue_channel:
        #  input_layer = input_layer[:,:,:,0:2,:,:]

        # state is (batch, height, width, rgb, camera_idx, repeat)
        # rollup rgb, camera_idx and repeat into num_channels
        # i.e. (batch, height, width, rgb*camera_idx*repeat)
        height, width = map(int, input_layer.get_shape()[1:3])
        num_channels = input_layer.get_shape()[3:].num_elements()
        input_layer = tf.reshape(input_layer,
                                 [-1, height, width, num_channels])
        print("input_layer",
              util.shape_and_product_of(input_layer),
              file=sys.stderr)

        # whiten image, per channel, using batch_normalisation layer with
        # params calculated directly from batch.
        axis = list(range(input_layer.get_shape().ndims - 1))
        batch_mean, batch_var = tf.nn.moments(
            input_layer, axis)  # gives moments per channel
        whitened_input_layer = tf.nn.batch_normalization(input_layer,
                                                         batch_mean,
                                                         batch_var,
                                                         scale=None,
                                                         offset=None,
                                                         variance_epsilon=1e-6)

        # TODO: num_outputs here are really dependant on the incoming channels,
        # which depend on the #repeats & cameras so they should be a param.
        model = slim.conv2d(whitened_input_layer,
                            num_outputs=10,
                            kernel_size=[5, 5],
                            normalizer_fn=normalizer_fn,
                            normalizer_params=normalizer_params,
                            scope='conv1')
        model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool1')
        self.pool1 = model
        print("pool1", util.shape_and_product_of(model), file=sys.stderr)

        model = slim.conv2d(model,
                            num_outputs=10,
                            kernel_size=[5, 5],
                            normalizer_fn=normalizer_fn,
                            normalizer_params=normalizer_params,
                            scope='conv2')
        model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2')
        self.pool2 = model
        print("pool2", util.shape_and_product_of(model), file=sys.stderr)

        model = slim.conv2d(model,
                            num_outputs=10,
                            kernel_size=[3, 3],
                            normalizer_fn=normalizer_fn,
                            normalizer_params=normalizer_params,
                            scope='conv3')
        model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2')
        self.pool3 = model
        print("pool3", util.shape_and_product_of(model), file=sys.stderr)

        return model
Example #6
0
    def conv_net_on(self, input_layer, opts):
        # TODO: reinclude batch_norm config, hasn't been helping at all...

        # convert input_layer from uint8 (0, 255) to float32 (0.0, 1.0)
        input_layer = tf.to_float(input_layer) / 255

        # whiten image, per channel, using batch_normalisation layer with
        # params calculated directly from batch.
        axis = list(range(input_layer.get_shape().ndims - 1))
        batch_mean, batch_var = tf.nn.moments(
            input_layer, axis)  # calcs moments per channel
        whitened_input_layer = tf.nn.batch_normalization(input_layer,
                                                         batch_mean,
                                                         batch_var,
                                                         scale=None,
                                                         offset=None,
                                                         variance_epsilon=1e-6)

        model = slim.conv2d(whitened_input_layer,
                            num_outputs=8,
                            kernel_size=[5, 5],
                            scope='conv1a')
        #    model = slim.conv2d(whitened_input_layer, num_outputs=8, kernel_size=[5, 5], scope='conv1b')
        model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool1')
        self.pool1 = model
        print >> sys.stderr, "pool1", util.shape_and_product_of(model)

        model = slim.conv2d(model,
                            num_outputs=16,
                            kernel_size=[5, 5],
                            scope='conv2a')
        #    model = slim.conv2d(model, num_outputs=16, kernel_size=[5, 5], scope='conv2b')
        model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2')
        self.pool2 = model
        print >> sys.stderr, "pool2", util.shape_and_product_of(model)

        model = slim.conv2d(model,
                            num_outputs=32,
                            kernel_size=[3, 3],
                            scope='conv3a')
        #    model = slim.conv2d(model, num_outputs=32, kernel_size=[3, 3], scope='conv3b')
        model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool3')
        self.pool3 = model
        print >> sys.stderr, "pool3", util.shape_and_product_of(model)

        # a final unpooled conv net just to drop params down. maybe pool here too actually?
        #    model = slim.conv2d(model, num_outputs=32, kernel_size=[3, 3], scope='conv4a')
        #    model = slim.conv2d(model, num_outputs=32, kernel_size=[3, 3], scope='conv3b')
        #    model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool4')
        #    self.pool3 = model
        #    print >>sys.stderr, "pool4", util.shape_and_product_of(model)

        # do simple maxout on output to reduce dimensionality down for the upcoming
        # fully connected layers. see  https://arxiv.org/abs/1302.4389
        #    model = tf.reshape(model, (-1, 15, 20, 8, 4))      # (?, 15, 20, 32) -> (?, 15, 20, 8, 4)
        #    model = tf.reduce_max(model, reduction_indices=4)  # (?, 15, 20, 8)
        #    print >>sys.stderr, "maxout", util.shape_and_product_of(model)

        model = slim.flatten(model, scope='flat')
        if opts.use_dropout:
            model = slim.dropout(model,
                                 is_training=IS_TRAINING,
                                 scope="drop" % i)
        return model
Example #7
0
def run_trainer(episodes, opts):
    # init replay memory
    render_shape = (opts.height, opts.width, 3)
    replay_memory = rm.ReplayMemory(opts=opts,
                                    state_shape=render_shape,
                                    action_dim=2,
                                    load_factor=1.1)
    if opts.event_log_in:
        replay_memory.reset_from_event_logs(opts.event_log_in,
                                            opts.event_log_in_num,
                                            opts.reset_smooth_reward_factor)

    # init network for training
    config = tf.ConfigProto()
    #config.gpu_options.allow_growth = True
    #config.log_device_placement = True
    config.gpu_options.per_process_gpu_memory_fraction = opts.gpu_mem_fraction
    sess = tf.Session(config=config)

    network = models.NafNetwork("naf", action_dim=2, opts=opts)

    with sess.as_default():
        # setup saver util and either load saved ckpt or init variables
        saver = ckpt_util.TrainerCkptSaver(sess, opts.ckpt_dir,
                                           opts.ckpt_save_freq)
        for v in tf.all_variables():
            if '/biases:' not in v.name:
                print >> sys.stderr, v.name, util.shape_and_product_of(v)
        network.setup_target_network()

        # while true process episodes from run_agents
        print util.dts(), "waiting for episodes"
        while True:
            start_time = time.time()
            episode = episodes.get()
            wait_time = time.time() - start_time

            start_time = time.time()
            replay_memory.add_episode(
                episode, smooth_reward_factor=opts.smooth_reward_factor)
            losses = []
            if replay_memory.burnt_in():
                for _ in xrange(opts.batches_per_new_episode):
                    batch = replay_memory.batch(opts.batch_size)
                    batch_losses = network.train(batch).T[
                        0]  # .T[0] => (B, 1) -> (B,)
                    replay_memory.update_priorities(batch.idxs, batch_losses)
                    network.target_value_net.update_target_weights()
                    losses.extend(batch_losses)
                saver.save_if_required()
            process_time = time.time() - start_time

            stats = {
                "wait_time": wait_time,
                "process_time": process_time,
                "pending": episodes.qsize(),
                "replay_memory": replay_memory.stats
            }
            if losses:
                stats['loss'] = {
                    "min": float(np.min(losses)),
                    "median": float(np.median(losses)),
                    "mean": float(np.mean(losses)),
                    "max": float(np.max(losses))
                }
            print "STATS\t%s\t%s" % (util.dts(), json.dumps(stats))