Esempio n. 1
0
class ExplorerWorker:
    def __init__(self, name, actions):

        self.e_model = EncoderModel()
        self.act_model = ActorModel(actions)
        self.c_model = CriticModel()
        self.t_model = A2CGaeTrain(self.e_model,
                                   self.act_model,
                                   self.c_model,
                                   name,
                                   lr=2e-6)
        self.name = name
        self.steps = 0

        self.log_dir = 'logs/{}'.format(name)
        self.summary_writer = tf.summary.create_file_writer(self.log_dir)
        self.boards = {
            'reward': tf.keras.metrics.Mean('reward_board', dtype=tf.float32),
            'actor_loss': tf.keras.metrics.Mean('train_loss',
                                                dtype=tf.float32),
            'critic_loss': tf.keras.metrics.Mean('train_loss_c',
                                                 dtype=tf.float32),
        }

    def set_weights(self, model):
        self.e_model.update_model(model.e_model)
        self.act_model.update_model(model.act_model)
        self.c_model.update_model(model.c_model)

    def get_weights(self):
        return self.e_model.trainable_variables + self.act_model.trainable_variables + self.c_model.trainable_variables

    def set_board(self, name, value):
        self.boards[name](value)

    def get_board(self, name):
        return self.boards[name].result()

    def get_values(self, state):

        embeding = self.e_model(
            tf.convert_to_tensor(state[None, ...], dtype=tf.float32))
        action, p, log_policy = self.act_model.get_action(embeding)
        value = np.asarray(self.c_model(embeding))[0][0]

        return action, p, log_policy, value

    def train(self, states, actions, rewards, next_states, values,
              old_log_policies, R, adv, dones):
        return self.t_model.get_gradients(states, actions, rewards,
                                          next_states, values,
                                          old_log_policies, R, adv, dones)

    def update_model(self, cg, ag, eg):
        self.t_model.apply_grads(cg, ag, eg)
    def __init__(self, name, actions):

        self.e_model = EncoderModel()
        self.act_model = ActorModel( actions )
        self.c_model = CriticModel()
        self.t_model = A2CGaeTrain(self.e_model, self.act_model, self.c_model, name, lr=2e-6)
        self.name = name
        self.steps = 0
        
        self.log_dir = 'logs/{}'.format(name)
        self.summary_writer = tf.summary.create_file_writer(self.log_dir)
        self.boards = {
            'reward': tf.keras.metrics.Mean('reward_board', dtype=tf.float32),
            'actor_loss': tf.keras.metrics.Mean('train_loss', dtype=tf.float32),
            'critic_loss': tf.keras.metrics.Mean('train_loss_c', dtype=tf.float32)
        }
Esempio n. 3
0
    cv.waitKey(1)

size = ( 84, 84 )
bs = 1024
buffer_memory = 5000
t_steps = 1000
n_episodes = 100000
num_local_steps = 100
num_epochs = 5
gamma = 0.9
tau = 1.0

env = create_train_env( 1, 1, RIGHT_ONLY, random=True )

e_model = EncoderModel()
act_model = ActorModel( env.action_space.n )
c_model = CriticModel()
t_model = A2CGaeTrain( e_model, act_model, c_model )

fig, axs = plt.subplots(1)
fig.suptitle('Vertically stacked subplots')

curr_states, state_ = env.reset()
curr_episode = 0
ct = 0
while True:
    curr_episode += 1
    actions = []
    values = []
    states = []
    _states = []