Example #1
0
class AntAgent:

    def __init__(self, render=False, model=None):
        # create an environment
        self.environment = gym.make('MountainCarContinuous-v0')
        # reset environment when an agent is initialized
        self.current_observation = self.reset_environment()
        self.render = render
        self.model = model

        self.buffer = ReplayBuffer()

    def reset_environment(self):
        current_observation = self.environment.reset()
        return current_observation

    def get_action(self, current_observation):
        """Fetch an action according to model policy"""
        if self.model is None:
            action = self.environment.action_space.sample()
        else:
            action = self.model.predict(current_observation)
        return action

    def get_transitions(self, action):
        """Take one step in the environment and return the observations"""
        next_observation, reward, done, _ = self.environment.step(action)
        if self.render:
            self.environment.render()
        return next_observation, reward, done

    def run_episode(self, num_episodes=1):
        """run episodes `num_episodes` times using `model` policy"""
        for episode in range(num_episodes):
            self.current_observation = self.reset_environment()
            episode_id = self.buffer.create_episode()

            done = False
            transition = dict()

            while not done:
                transition['current_observation'] = self.current_observation
                transition['action'] = self.get_action(self.current_observation)
                transition['next_observation'], transition['reward'], done = self.get_transitions(transition['action'])

                self.buffer.add_sample(episode_id, transition)

            self.buffer.add_episode(episode_id)

    def learn(self, step=0, restore=False):
        """Train SAC model using transitions in replay buffer"""
        if self.model is None:
            raise Exception("This agent has no brain! Add a model which implements fit() function to train.")

        # Sample array of transitions from replay buffer.
        transition_matrices = self.buffer.fetch_sample()

        if step != 0:
            restore = True

        # Fit the SAC model.
        self.model.fit(transition_matrices, restore=restore, global_step=step)
Example #2
0
            # Store transition in replay buffer
            replay.store(current_state, action, reward, next_state, end)

            # Update current state
            current_state = next_state

            step += 1
            global_step += 1



        if (step % 1 == 0) and (global_step > args.start_steps):
            for epoch in range(args.epochs):

                # Randomly sample minibatch of transitions from replay buffer
                current_states, actions, rewards, next_states, ends = replay.fetch_sample(num_samples=args.batch_size)

                # Perform single step of gradient descent on Q and policy
                # network
                critic1_loss, critic2_loss, actor_loss, alpha_loss = sac.train(current_states, actions, rewards, next_states, ends)
                if args.verbose:
                    print(episode, global_step, epoch, critic1_loss.numpy(),
                          critic2_loss.numpy(), actor_loss.numpy(), episode_reward)


                with writer.as_default():
                    tf.summary.scalar("actor_loss", actor_loss, sac.epoch_step)
                    tf.summary.scalar("critic1_loss", critic1_loss, sac.epoch_step)
                    tf.summary.scalar("critic2_loss", critic2_loss, sac.epoch_step)
                    tf.summary.scalar("alpha_loss", alpha_loss, sac.epoch_step)