def __init__(self, state_size, action_size, global_step, rlConfig, **kwargs): self.action_size = action_size self.layer_sizes = [128, 128] self.layers = [] self.actor = tfl.Sequential() self.critic = tfl.Sequential() prev_size = state_size for i, next_size in enumerate(self.layer_sizes): for net in ['actor', 'critic']: with tf.variable_scope("%s/layer_%d" % (net, i)): getattr(self, net).append( tfl.makeAffineLayer(prev_size, next_size, tfl.leaky_softplus())) prev_size = next_size with tf.variable_scope('actor'): actor = tfl.makeAffineLayer(prev_size, action_size, tf.nn.log_softmax) self.actor.append(actor) with tf.variable_scope('critic'): v_out = tfl.makeAffineLayer(prev_size, 1) v_out = util.compose(lambda x: tf.squeeze(x, [-1]), v_out) self.critic.append(v_out) self.rlConfig = rlConfig
def __init__(self, state_size, action_size, global_step, rlConfig, epsilon=0.04, **kwargs): self.action_size = action_size self.layer_sizes = [128, 128] self.layers = [] with tf.name_scope('epsilon'): #epsilon = tf.constant(0.02) self.epsilon = epsilon# + 0.5 * tf.exp(-tf.cast(global_step, tf.float32) / 50000.0) self.actor = tfl.Sequential() self.critic = tfl.Sequential() prev_size = state_size for i, next_size in enumerate(self.layer_sizes): for net in ['actor', 'critic']: with tf.variable_scope("%s/layer_%d" % (net, i)): getattr(self, net).append(tfl.makeAffineLayer(prev_size, next_size, tfl.leaky_relu)) prev_size = next_size with tf.variable_scope('actor'): actor = tfl.makeAffineLayer(prev_size, action_size, tf.nn.softmax) smooth = lambda probs: (1.0 - self.epsilon) * probs + self.epsilon / action_size actor = util.compose(smooth, actor) self.actor.append(actor) with tf.variable_scope('critic'): v_out = tfl.makeAffineLayer(prev_size, 1) v_out = util.compose(lambda x: tf.squeeze(x, [-1]), v_out) self.critic.append(v_out) self.rlConfig = rlConfig
def __init__(self, state_size, action_size, global_step, rlConfig, epsilon=0.04, temperature=0.01, **kwargs): self.action_size = action_size self.layer_sizes = [128, 128] self.q_net = tfl.Sequential() prev_size = state_size for i, size in enumerate(self.layer_sizes): with tf.variable_scope("layer_%d" % i): self.q_net.append(tfl.FCLayer(prev_size, size, tfl.leaky_relu)) prev_size = size with tf.variable_scope("q_out"): # no non-linearity on output layer self.q_net.append(tfl.FCLayer(prev_size, action_size)) self.rlConfig = rlConfig with tf.name_scope('epsilon'): #epsilon = tf.constant(0.02) self.epsilon = epsilon + 0.5 * tf.exp( -tf.cast(global_step, tf.float32) / 1000.0) with tf.name_scope('temperature'): #temperature = 0.05 * (0.5 ** (tf.cast(global_step, tf.float32) / 100000.0) + 0.1) self.temperature = temperature self.global_step = global_step
def __init__(self, state_size, action_size, global_step, rlConfig, **kwargs): Default.__init__(self, **kwargs) self.action_size = action_size self.q_net = tfl.Sequential() prev_size = state_size for i, size in enumerate(self.q_layers): with tf.variable_scope("layer_%d" % i): self.q_net.append(tfl.FCLayer(prev_size, size, tfl.leaky_softplus())) prev_size = size with tf.variable_scope("q_out"): # no non-linearity on output layer self.q_net.append(tfl.FCLayer(prev_size, action_size)) self.rlConfig = rlConfig self.global_step = global_step
def __init__(self, state_size, action_size, global_step, rlConfig, **kwargs): Default.__init__(self, **kwargs) self.action_size = action_size for name in ['actor', 'critic']: net = tfl.Sequential() with tf.variable_scope(name): prev_size = state_size for i, next_size in enumerate(getattr(self, name + "_layers")): with tf.variable_scope("layer_%d" % i): net.append(tfl.makeAffineLayer(prev_size, next_size, tfl.leaky_softplus())) prev_size = next_size setattr(self, name, net) with tf.variable_scope('actor'): self.actor.append(tfl.makeAffineLayer(prev_size, action_size, tf.nn.softmax)) self.actor.append(lambda p: (1. - self.epsilon) * p + self.epsilon / action_size) with tf.variable_scope('critic'): self.critic.append(tfl.makeAffineLayer(prev_size, 1)) self.critic.append(lambda x: tf.squeeze(x, [-1])) self.rlConfig = rlConfig