Python ReplayMemory.push Examples

Programming Language: Python

Namespace/Package Name: replaymemory

Class/Type: ReplayMemory

Method/Function: push

Examples at hotexamples.com: 3

Python ReplayMemory.push - 3 examples found. These are the top rated real world Python examples of replaymemory.ReplayMemory.push extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ReplayMemory(20)

sample(7)

add(4)

getCurrentIndex(3)

push(3)

pushFrame(3)

pushTransition(3)

sampleTransition(3)

sample_buffer(3)

store_transition(3)

update_priorities(2)

addMemory(1)

append(1)

getRandomMemories(1)

Example #1

Show file

                window.append(next_state)
                next_phi = window()
            else:
                atlas.clear()
                next_phi = None
        else:
            next_phi = None

        # Store the transition in memory if valid.
        # Torch the reward.
        if phi is not None and next_phi is not None:

            if isinstance(reward, float) and isinstance(action, float):
                tensor_reward = torch.tensor([reward])
                tensor_action = torch.tensor([action])
                memory.push(phi, tensor_action, next_phi, tensor_reward)

        # Move to the next state.
        phi = next_phi

        # dont update if this is validation
        #if (i_episode + 1) % 5 == 0:
        #	continue

        # perform the optimization.
        if len(memory) > 128:
            trainer.optimize()

        # Check the episode counter to end simulation.
        if counter > MAX_EPISODE_COUNTER:
            done = True

Example #2

Show file

        print('Action threshold met', frames)

        action = select_action(state)
        _, reward, done, info = env.step(action)
        reward = torch.tensor([reward], device=device)

        # Observe new state
        last_screen = current_screen
        current_screen = get_screen()
        if not done:
            next_state = current_screen - last_screen
        else:
            next_state = None

        # Store the transition in memory
        memory.push(state, action, next_state, reward)

        # Move to the next state
        state = next_state

        optimize_model()
        # print(len(memory))

        # Break if pacman is caught.
        if done:
            break

        # print(done) # bool
        # print(info) # json with lives
        # time.sleep(0.1)

Example #3

Show file

class DQNAgent(GymAgent):
    """
    an agent for running the DQN algorithm (Minh et al 2013)
    """
    def __init__(self, env, mode, pre_trained_model, tensorboard_writer=None):
        super(DQNAgent, self).__init__(env, mode, tensorboard_writer)
        self.agent_name = 'DQN' + str(self.agent_no)
        self.memory = ReplayMemory()

        self.network = DeepQNetwork(self.obs_space[0], self.action_space)

        if self.mode == 'play':
            self.network.load_params(pre_trained_model)
            self.network.eval()

        elif self.mode == 'train':

            self.eval_network = DeepQNetwork(self.obs_space[0],
                                             self.action_space)
            self.eval_network.eval()

            if pre_trained_model:
                self.eval_network.load_params(pre_trained_model)

            self.optimizer = optim.RMSprop(self.network.parameters(), lr=LR)
            self.loss_func = SmoothL1Loss()
        else:
            raise ValueError(
                'Please set a valid mode for the agent (play or train)')

    def interact(self, state, action):
        """
        returns:
        state, reward, done, info
        """
        return self.env.step(action, state)

    def select_action(self, state):
        if self.mode == 'play':
            return self.network(prep_exploitation(state)).max(1)[1].view(1, 1)
        ##epsilon greedy policy
        eps_threshold = EPS_START * EPS_DECAY**self.no_training_steps if EPS_DECAY > EPS_END else EPS_END

        self.no_training_steps += 1

        if random.random() > eps_threshold:
            with torch.no_grad():
                return self.network(prep_exploitation(state)).max(1)[1].view(
                    1, 1)
        else:
            return prep_exploration(self.action_space)

    def optimize(self):
        sum_loss = 0

        if len(self.memory) < BATCH_SIZE:
            batch_size = len(self.memory)
        else:
            batch_size = BATCH_SIZE

        s, a, _s, r = prep_mem_batch(self.memory.sample(batch_size))

        non_final_next = torch.cat([sa for sa in _s if sa is not None])
        non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, _s)))
        state_action_values = self.network(s).gather(1, a.long().unsqueeze(1))

        next_state_values = torch.zeros(batch_size)
        next_state_values[non_final_mask] = self.eval_network(
            non_final_next).detach().max(1)[0]

        expected_q = prep_q(next_state_values, r)
        loss = self.loss_func(state_action_values, expected_q.unsqueeze(1))

        self.optimizer.zero_grad()
        loss.backward()

        self.optimizer.step()

        return loss.item()

    def train(self, num_episodes, render=False, lr_decay=False):

        end_state = np.zeros(self.obs_space)
        state = end_state

        for episode in range(1, num_episodes + 1):
            done = False
            timesteps = 0
            rewards = []
            sum_rewards = []
            loss = 0
            times_alive = []

            while not done:
                if state is end_state:
                    state = self.env.initialize()

                if render: self.env.render()
                action = self.select_action(state)
                _state, reward, done, _ = self.interact(action.item(), state)
                rewards.append(reward)

                timesteps += 1

                if done:
                    _state = end_state

                    sum_reward = np.sum(rewards)
                    sum_rewards.append(sum_reward)

                    mean_loss = loss / timesteps
                    times_alive.append(timesteps)
                    timesteps = 0

                    if self.writer:
                        self.writer.add_scalar(
                            self.agent_name + 'duration of episode', timesteps,
                            episode)
                        self.writer.add_scalar(
                            self.agent_name + 'mean reward of episode',
                            sum_reward, episode)
                        self.writer.add_scalar(
                            self.agent_name + 'mean loss of episode',
                            mean_loss, episode)

                self.memory.push(state, action,
                                 _state if _state is not None else end_state,
                                 reward)

                state = _state
                episode_loss = self.optimize()
                loss += episode_loss

            if lr_decay:
                for g in self.optimizer.param_groups:
                    g['lr'] = g['lr'] / (1 + (episode / LR_DECAY))

            if episode % TARGET_UPDATE == 0:
                if self.env.goal(times_alive):
                    print('goal reached your computer is smart :)')
                    self.eval_network.save_params(self.agent_name,
                                                  self.env.env_name)
                    break
                else:
                    times_alive = []

                self.eval_network.update_params(self.network)
                print('episode ', episode, 'loss ', mean_loss, 'reward ',
                      np.mean(sum_rewards))
                #add your custom goals

    def play(self, num_episodes):
        for episode in range(1, num_episodes + 1):
            done = False
            state = self.env.initialize()
            while not done:
                self.env.render()
                action = self.select_action(state)
                _state, reward, done, _ = self.interact(action.item(), state)
                if done:
                    state = self.env.initialize()