예제 #1
0
    def append(self, observation, action, reward, done, next_observation):
        # append data: if the game is not finished, append it; if a game is finished, append the game to self.buffer
        if self.symbolic_env:
            self.buffer[self.game_idx]["obs"].append(observation.numpy())
        else:
            self.buffer[self.game_idx]["obs"].append(
                postprocess_observation(observation.numpy(), self.bit_depth))
        self.buffer[self.game_idx]["action"].append(action.numpy())
        self.buffer[self.game_idx]["reward"].append(reward)
        self.buffer[self.game_idx]["nonterminal"].append(not done)
        # print(self.buffer[self.game_idx]["nonterminal"])
        self.num_steps += 1

        if done:
            # when a game is finished, change the idx of game and init new game_buffer
            self.buffer[
                self.game_idx]["terminal_obs"] = postprocess_observation(
                    next_observation.numpy(), self.bit_depth)
            self.game_idx = (
                self.game_idx + 1
            ) % self.size  # when buffer is full, filling buffer begins from head
            self.full = self.full or self.game_idx == 0
            self.buffer[self.game_idx] = {
                "obs": [],
                "action": [],
                "reward": [],
                "nonterminal": []
            }
            self.num_game += 1
예제 #2
0
 def append(self, observation, action, reward, done):
     self.observations[self.idx] = postprocess_observation(
         observation.numpy(), self.bit_depth
     )  # Decentre and discretise visual observations (to save memory)
     self.actions[self.idx] = action.numpy()
     self.rewards[self.idx] = reward
     self.nonterminals[self.idx] = not done
     self.idx = (self.idx + 1) % self.size
     self.full = self.full or self.idx == 0  #flag
     self.steps, self.episodes = self.steps + 1, self.episodes + (1 if done
                                                                  else 0)
예제 #3
0
 def append(self, observation, action, reward, done):
     if self.symbolic_env:
         self.observations[self.idx] = observation.numpy()
     else:
         self.observations[self.idx] = postprocess_observation(
             observation.numpy(), self.bit_depth
         )  # Decentre and discretise visual observations (to save memory)
     self.actions[self.idx] = action.numpy() if isinstance(
         action, torch.Tensor) else action
     self.rewards[self.idx] = reward
     self.nonterminals[self.idx] = not done
     if done:
         self.ends_idx.append(self.idx)
     self.idx = (self.idx + 1) % self.size
     self.full = self.full or self.idx == 0
     self.steps, self.episodes = self.steps + 1, self.episodes + (1 if done
                                                                  else 0)
예제 #4
0
 # Initialise dataset D with S random seed episodes
 mapping = {
     ord("a"): 4,
     ord("d"): 3,
     ord("s"): 5,
     ord("q"): 2,
     ord("e"): 1,
     32: 0,
     ord("l"): 0
 }
 for s in range(1, args.seed_episodes + 1):
     observation, done, t = env.reset(), False, 0
     step = 0
     while not done:
         print(observation.shape)
         x = postprocess_observation(observation.numpy(), args.bit_depth)
         cv2.imshow("Tetris", cv2.resize(x[0].transpose(1, 2, 0),
                                         (512, 512)))
         key = cv2.waitKey()
         if key == 27:
             break
         idx = mapping[key]
         action = np.zeros((6, ))
         action[idx] = 1
         step += 1
         # action = env.sample_random_action()
         next_observation, reward, done = env.step(action)
         print(reward, done)
         if t > 2000:
             done = True
         D.append((x, action, reward, done))