Python Agent.clear Examples

Programming Language: Python

Namespace/Package Name: agents.Agent

Class/Type: Agent

Method/Function: clear

Examples at hotexamples.com: 2

Python Agent.clear - 2 examples found. These are the top rated real world Python examples of agents.Agent.Agent.clear extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

__init__(9)

Agent(6)

cuda(4)

clear(2)

max_action(2)

finish_episode(1)

initGame(1)

makeMove(1)

manual_distribution(1)

sample_batch(1)

start(1)

values(1)

Example #1

Show file

    def train(self,
              Q: Agent,
              task: Task,
              epsilon: Epsilon,
              alpha: LearningRate,
              episodes,
              cache_train=True,
              test_times=1):
        Q.clear()
        epsilon.clear()
        alpha.clear()
        rewards_history = np.zeros(episodes, dtype=np.float32)
        steps_history = np.zeros(episodes, dtype=np.float32)
        episode_epsilon_history = np.zeros(episodes, dtype=np.float32)
        epsilon_history = []
        conseq_200 = 0
        self.episode = 0
        for e in range(episodes):
            steps, rewards, epsilons = self.run_episode(
                Q, task, epsilon, alpha)
            if cache_train:
                returns = 0.0
                for r in rewards[::-1]:
                    returns = r + self.gamma * returns
            else:
                returns, steps = 0.0, 0.0
                for _ in range(test_times):
                    returns_, steps_ = self.evaluate(Q, task)
                    returns += returns_ / test_times
                    steps += steps_ / test_times
            rewards_history[e] = returns
            steps_history[e] = steps
            episode_epsilon_history[e] = np.mean(epsilons)
            epsilon_history.append(epsilons)
            if e % 10 == 0:
                print('{} {} {}'.format(episode_epsilon_history[e], returns,
                                        steps))
            epsilon.update_end_of_episode(self.episode)
            alpha.update_end_of_episode(self.episode)
            self.episode += 1

            if steps >= 199.99:
                conseq_200 += 1
            else:
                conseq_200 = 0
            # if conseq_200 >= 4:
            #    rewards_history[e:] = rewards_history[e]
            #    steps_history[e:] = steps_history[e]
            #    episode_epsilon_history[e:] = episode_epsilon_history[e]
            #    break

        return steps_history, rewards_history, episode_epsilon_history, \
            np.concatenate(epsilon_history, axis=0)

Example #2

Show file

    def train(self, Q: Agent, task: Task, policy: Policy, episodes):
        """ Trains the specified agent on the specified task using the specified
        exploration policy using the current implementation. A specified number of episodes
        is generated for training.
        
        inputs:
            Q - an Agent object storing the Q-values
            task - a Task object representing the task the agent is learning
            policy - a Policy object representing the exploration policy used to 
            balance exploration and exploitation
            episodes - the number of episodes of training to perform
        outputs:
            - a one-dimensional numpy array containing the lengths of each episode - this
            can be used to check the learning progress of the agent
            - a one-dimensional numpy array containing the sum of the discounted 
            rewards from the environment obtained on each episode - this can be used to check
            the learning progress of the agent 
        """

        # initialization
        self.clear()
        Q.clear()
        policy.clear()

        # for storing history of trial
        rewards_history = np.zeros(episodes, dtype=float)
        steps_history = np.zeros(episodes, dtype=int)

        # run episodes
        for e in range(episodes):

            # run an episode of training
            steps, rewards = self.run_episode(Q, task, policy)

            # compute the value of the backup and update the history
            R = 0.0
            for reward in rewards[::-1]:
                R = reward + self.gamma * R
            rewards_history[e] = R
            steps_history[e] = steps

            # finish episode
            policy.finish_episode(e)
            Q.finish_episode(e)

        return steps_history, rewards_history