def main(): config = tf.ConfigProto() # config.gpu_options.allow_growth = True # config.log_device_placement = True with tf.Session(config=config) as sess: agent = DeepDeterministicPolicyGradientAgent(env=env) # setup saver util and either load latest ckpt or init variables saver_util = None if opts.ckpt_dir is not None: saver_util = util.SaverUtil(sess, opts.ckpt_dir, opts.ckpt_freq) else: sess.run(tf.global_variables_initializer()) for v in tf.global_variables(): print(v.name, util.shape_and_product_of(v), file=sys.stderr) # print >>sys.stderr, v.name, util.shape_and_product_of(v) # now that we've either init'd from scratch, or loaded up a checkpoint, # we can do any required post init work. agent.post_var_init_setup() #opts.num_eval = 100 # run either eval or training if opts.num_eval > 0: agent.run_eval(opts.num_eval, opts.eval_action_noise) else: agent.run_training(opts.max_num_actions, opts.max_run_time, opts.batch_size, opts.batches_per_step, saver_util) if saver_util is not None: saver_util.force_save() env.reset() # just to flush logging, clumsy :/
def conv_net_on(self, input_layer, opts): # TODO: reinclude batch_norm config # if opts.use_batch_norm: # normalizer_fn = slim.batch_norm # normalizer_params = { 'is_training': IS_TRAINING } # else: normalizer_fn = None normalizer_params = None # whiten image, per channel, using batch_normalisation layer with # params calculated directly from batch. axis = list(range(input_layer.get_shape().ndims - 1)) batch_mean, batch_var = tf.nn.moments(input_layer, axis) # calcs moments per channel whitened_input_layer = tf.nn.batch_normalization(input_layer, batch_mean, batch_var, scale=None, offset=None, variance_epsilon=1e-6) # TODO: num_outputs here are really dependant on the incoming channels, # which depend on the #repeats & cameras so they should be a param. model = slim.conv2d(whitened_input_layer, num_outputs=32, kernel_size=[7, 7], normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope='conv1') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool1') self.pool1 = model print >>sys.stderr, "pool1", util.shape_and_product_of(model) model = slim.conv2d(model, num_outputs=32, kernel_size=[5, 5], normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope='conv2') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2') self.pool2 = model print >>sys.stderr, "pool2", util.shape_and_product_of(model) model = slim.conv2d(model, num_outputs=16, kernel_size=[3, 3], normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope='conv3') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2') self.pool3 = model print >>sys.stderr, "pool3", util.shape_and_product_of(model) return slim.flatten(model, scope='flat')
def __init__(self, opts): self.opts = opts config = tf.ConfigProto() #config.gpu_options.allow_growth = True #config.log_device_placement = True config.gpu_options.per_process_gpu_memory_fraction = 0.5 #opts.gpu_mem_fraction self.sess = tf.Session(config=config) render_shape = (opts.height, opts.width, 3) self.replay_memory = replay_memory.ReplayMemory( opts=opts, state_shape=render_shape, action_dim=2, load_factor=1.2) if opts.event_log_in: self.replay_memory.reset_from_event_log(opts.event_log_in, opts.event_log_in_num) # s1 and s2 placeholders batched_state_shape = [None] + list(render_shape) s1 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32) s2 = tf.placeholder(shape=batched_state_shape, dtype=tf.float32) # initialise base models for value & naf networks. value subportion of net is # explicitly created seperate because it has a target network note: in the case of # --share-input-state-representation the input state network of the value_net will # be reused by the naf.l_value and naf.output_actions net self.value_net = models.ValueNetwork("value", s1, opts) self.target_value_net = models.ValueNetwork("target_value", s2, opts) self.network = models.NafNetwork("naf", s1, s2, self.value_net, self.target_value_net, action_dim=2, opts=opts) with self.sess.as_default(): # setup saver util and either load latest ckpt or init variables self.saver_util = None if opts.ckpt_dir is not None: self.saver_util = util.SaverUtil(self.sess, opts.ckpt_dir, opts.ckpt_freq) else: self.sess.run(tf.initialize_all_variables()) for v in tf.all_variables(): print >> sys.stderr, v.name, util.shape_and_product_of(v) # setup target network self.target_value_net.set_as_target_network_for( self.value_net, 0.01)
def __init__(self, opts): # self.opts = opts self.network = models.NafNetwork("naf", action_dim=2, opts=opts) config = tf.ConfigProto() #config.gpu_options.allow_growth = True #config.log_device_placement = True config.gpu_options.per_process_gpu_memory_fraction = opts.gpu_mem_fraction self.sess = tf.Session(config=config) with self.sess.as_default(): # setup saver to load first set of ckpts. block until some are available self.loader = ckpt_util.AgentCkptLoader(self.sess, opts.ckpt_dir) self.loader.blocking_load_ckpt() # dump info on vars for v in tf.all_variables(): if '/biases:' not in v.name: print >> sys.stderr, v.name, util.shape_and_product_of(v)
def simple_conv_net_on(self, input_layer, opts): if opts.use_batch_norm: normalizer_fn = slim.batch_norm normalizer_params = {'is_training': IS_TRAINING} else: normalizer_fn = None normalizer_params = None # optionally drop blue channel, in a simple cart pole env we only need r/g #if opts.drop_blue_channel: # input_layer = input_layer[:,:,:,0:2,:,:] # state is (batch, height, width, rgb, camera_idx, repeat) # rollup rgb, camera_idx and repeat into num_channels # i.e. (batch, height, width, rgb*camera_idx*repeat) height, width = map(int, input_layer.get_shape()[1:3]) num_channels = input_layer.get_shape()[3:].num_elements() input_layer = tf.reshape(input_layer, [-1, height, width, num_channels]) print("input_layer", util.shape_and_product_of(input_layer), file=sys.stderr) # whiten image, per channel, using batch_normalisation layer with # params calculated directly from batch. axis = list(range(input_layer.get_shape().ndims - 1)) batch_mean, batch_var = tf.nn.moments( input_layer, axis) # gives moments per channel whitened_input_layer = tf.nn.batch_normalization(input_layer, batch_mean, batch_var, scale=None, offset=None, variance_epsilon=1e-6) # TODO: num_outputs here are really dependant on the incoming channels, # which depend on the #repeats & cameras so they should be a param. model = slim.conv2d(whitened_input_layer, num_outputs=10, kernel_size=[5, 5], normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope='conv1') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool1') self.pool1 = model print("pool1", util.shape_and_product_of(model), file=sys.stderr) model = slim.conv2d(model, num_outputs=10, kernel_size=[5, 5], normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope='conv2') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2') self.pool2 = model print("pool2", util.shape_and_product_of(model), file=sys.stderr) model = slim.conv2d(model, num_outputs=10, kernel_size=[3, 3], normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope='conv3') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2') self.pool3 = model print("pool3", util.shape_and_product_of(model), file=sys.stderr) return model
def conv_net_on(self, input_layer, opts): # TODO: reinclude batch_norm config, hasn't been helping at all... # convert input_layer from uint8 (0, 255) to float32 (0.0, 1.0) input_layer = tf.to_float(input_layer) / 255 # whiten image, per channel, using batch_normalisation layer with # params calculated directly from batch. axis = list(range(input_layer.get_shape().ndims - 1)) batch_mean, batch_var = tf.nn.moments( input_layer, axis) # calcs moments per channel whitened_input_layer = tf.nn.batch_normalization(input_layer, batch_mean, batch_var, scale=None, offset=None, variance_epsilon=1e-6) model = slim.conv2d(whitened_input_layer, num_outputs=8, kernel_size=[5, 5], scope='conv1a') # model = slim.conv2d(whitened_input_layer, num_outputs=8, kernel_size=[5, 5], scope='conv1b') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool1') self.pool1 = model print >> sys.stderr, "pool1", util.shape_and_product_of(model) model = slim.conv2d(model, num_outputs=16, kernel_size=[5, 5], scope='conv2a') # model = slim.conv2d(model, num_outputs=16, kernel_size=[5, 5], scope='conv2b') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2') self.pool2 = model print >> sys.stderr, "pool2", util.shape_and_product_of(model) model = slim.conv2d(model, num_outputs=32, kernel_size=[3, 3], scope='conv3a') # model = slim.conv2d(model, num_outputs=32, kernel_size=[3, 3], scope='conv3b') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool3') self.pool3 = model print >> sys.stderr, "pool3", util.shape_and_product_of(model) # a final unpooled conv net just to drop params down. maybe pool here too actually? # model = slim.conv2d(model, num_outputs=32, kernel_size=[3, 3], scope='conv4a') # model = slim.conv2d(model, num_outputs=32, kernel_size=[3, 3], scope='conv3b') # model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool4') # self.pool3 = model # print >>sys.stderr, "pool4", util.shape_and_product_of(model) # do simple maxout on output to reduce dimensionality down for the upcoming # fully connected layers. see https://arxiv.org/abs/1302.4389 # model = tf.reshape(model, (-1, 15, 20, 8, 4)) # (?, 15, 20, 32) -> (?, 15, 20, 8, 4) # model = tf.reduce_max(model, reduction_indices=4) # (?, 15, 20, 8) # print >>sys.stderr, "maxout", util.shape_and_product_of(model) model = slim.flatten(model, scope='flat') if opts.use_dropout: model = slim.dropout(model, is_training=IS_TRAINING, scope="drop" % i) return model
def run_trainer(episodes, opts): # init replay memory render_shape = (opts.height, opts.width, 3) replay_memory = rm.ReplayMemory(opts=opts, state_shape=render_shape, action_dim=2, load_factor=1.1) if opts.event_log_in: replay_memory.reset_from_event_logs(opts.event_log_in, opts.event_log_in_num, opts.reset_smooth_reward_factor) # init network for training config = tf.ConfigProto() #config.gpu_options.allow_growth = True #config.log_device_placement = True config.gpu_options.per_process_gpu_memory_fraction = opts.gpu_mem_fraction sess = tf.Session(config=config) network = models.NafNetwork("naf", action_dim=2, opts=opts) with sess.as_default(): # setup saver util and either load saved ckpt or init variables saver = ckpt_util.TrainerCkptSaver(sess, opts.ckpt_dir, opts.ckpt_save_freq) for v in tf.all_variables(): if '/biases:' not in v.name: print >> sys.stderr, v.name, util.shape_and_product_of(v) network.setup_target_network() # while true process episodes from run_agents print util.dts(), "waiting for episodes" while True: start_time = time.time() episode = episodes.get() wait_time = time.time() - start_time start_time = time.time() replay_memory.add_episode( episode, smooth_reward_factor=opts.smooth_reward_factor) losses = [] if replay_memory.burnt_in(): for _ in xrange(opts.batches_per_new_episode): batch = replay_memory.batch(opts.batch_size) batch_losses = network.train(batch).T[ 0] # .T[0] => (B, 1) -> (B,) replay_memory.update_priorities(batch.idxs, batch_losses) network.target_value_net.update_target_weights() losses.extend(batch_losses) saver.save_if_required() process_time = time.time() - start_time stats = { "wait_time": wait_time, "process_time": process_time, "pending": episodes.qsize(), "replay_memory": replay_memory.stats } if losses: stats['loss'] = { "min": float(np.min(losses)), "median": float(np.median(losses)), "mean": float(np.mean(losses)), "max": float(np.max(losses)) } print "STATS\t%s\t%s" % (util.dts(), json.dumps(stats))