def __init__(self, name, learning_rate=0.001):
        with tf.variable_scope(name, 'connection'):
            self.name = name
            self.ind_inp = tf.placeholder(tf.float32, [None, 1],
                                          name='ind_inp')
            self.soc_inp = tf.placeholder(tf.float32, [None, 1],
                                          name='soc_inp')
            self.reward_ = tf.placeholder(tf.float32, [None, 1], name='reward')
            self.discount_ = tf.placeholder(tf.float32, [None, 1],
                                            name='discount')
            self.bootstrap_ = tf.placeholder(tf.float32, [None],
                                             name='bootstrap')

            self.concat = tf.concat([self.ind_inp, self.soc_inp], 1)

            self.con_layer = tf.contrib.layers.fully_connected(
                self.concat, 1, activation_fn=None)

            self.con_return, self.advantage = trfl.sequence_advantage_critic_loss(
                self.con_layer,
                self.reward_,
                self.discount_,
                self.bootstrap_,
                lambda_=lambda_,
                baseline_cost=1)

            self.critic_loss = tf.reduce_mean(self.con_return.loss)
            self.critic_optim = tf.train.AdamOptimizer(
                learning_rate=learning_rate).minimize(self.critic_loss)
Example #2
0
    def __init__(self, name, socialism_learning_rate=0.01):
        with tf.variable_scope(name):
            self.name=name
            self.individual_input_ = tf.placeholder(tf.float32, [None, 1], name='i_input')
            self.social_input_ = tf.placeholder(tf.float32, [None, 1], name='s_input')
            self.reward_ = tf.placeholder(tf.float32, [None, 1], name='reward')
            self.discount_ = tf.placeholder(tf.float32, [None, 1], name='discount')
            self.bootstrap_ = tf.placeholder(tf.float32, [None], name='bootstrap')

            self.input_reshape = tf.reshape([self.individual_input, self.social_input], [-1, 2])

            self.combined_baseline_ = tf.contrib.layers.fully_connected(self.input_reshape, 1, activation_fn=None)

            self.combined_return, _ = trfl.sequence_advantage_critic_loss(self.combined_baseline_,
                self.reward_, self.discount_, self.bootstrap_, lambda_=lambda_, 
                baseline_cost=baseline_cost)
            
            self.combined_loss_ = tf.reduce_mean(self.combined_return.loss)
            self.combined_optim = tf.train.AdamOptimizer(learning_rate=socialism_learning_rate).minimize(self.combined_loss_)
Example #3
0
    def __init__(self,
                 name,
                 critic_hidden_size=32,
                 critic_learning_rate=0.0001):
        with tf.variable_scope(name, "Critic"):

            # define inputs for critic networks
            self.name = name
            self.input_ = tf.placeholder(tf.float32, [None, obs_size],
                                         name='inputs')
            self.reward_ = tf.placeholder(tf.float32, [None, 1], name='reward')
            self.discount_ = tf.placeholder(tf.float32, [None, 1],
                                            name='discount')
            self.bootstrap_ = tf.placeholder(tf.float32, [None],
                                             name='bootstrap')

            # set up critic network (hidden layers)
            self.fc1_critic_ = tf.contrib.layers.fully_connected(
                self.input_, critic_hidden_size, activation_fn=tf.nn.elu)
            self.fc2_critic_ = tf.contrib.layers.fully_connected(
                self.fc1_critic_, critic_hidden_size, activation_fn=tf.nn.elu)
            self.fc3_critic_ = tf.contrib.layers.fully_connected(
                self.fc2_critic_, critic_hidden_size, activation_fn=tf.nn.elu)

            # set up critic network (output layer)
            self.baseline_ = tf.contrib.layers.fully_connected(
                self.fc3_critic_, 1, activation_fn=None)

            # get critic loss
            self.Critic_return, self.advantage = trfl.sequence_advantage_critic_loss(
                self.baseline_,
                self.reward_,
                self.discount_,
                self.bootstrap_,
                lambda_=lambda_,
                baseline_cost=baseline_cost)

            # Optimize the loss
            self.critic_loss_ = tf.reduce_mean(self.Critic_return.loss)
            self.critic_optim = tf.train.AdamOptimizer(
                learning_rate=critic_learning_rate).minimize(self.critic_loss_)