Ejemplo n.º 1
0
    def generate_buffer(self):
        print("Generating offline dataset...")
        while not self.buffer.full():
            obs, acs, rewards, next_obs, terminals, image_obs = utils.sample_trajectory(self.env, self.collect_policy, 200, True, render_mode=())
            self.buffer.add_trajectory(utils.Path(obs, image_obs, acs, rewards, next_obs, terminals))
        print("Offline dataset Generated")
        state_freq = np.zeros(self.ob_dim)
        for i in range(len(self.buffer.obs)):
            state_freq[self.buffer.obs[i]] += 1
            if self.buffer.terminals[i]:
                state_freq[self.buffer.next_obs[i]] += 1
        state_freq = state_freq.reshape(8,8)
        fix, ax = plt.subplots()

        im, cbar = heatmap(state_freq, np.arange(8), np.arange(8), ax=ax, cmap="YlGn")

        for i in range(8):
            for j in range(8):
                text = ax.text(j, i, "{:.0f}".format(state_freq[i, j]),
                            ha="center", va="center", color="black")

        # plt.show()
        ax.set_title("Offline Buffer State Frequency")
        plt.savefig('./state_freq_buffer_{}.png'.format(1))
        plt.close()
Ejemplo n.º 2
0
    def test(self):
        self.agent.testing()
        ep_rewards = []
        state_feq = np.zeros(self.ob_dim)
        for itr in range(self.test_iter):
            path = utils.sample_trajectory(self.env, self.agent.actor, 200, True, render_mode=())
            rewards = path["reward"]
            ep_rewards.append(np.sum(rewards))

        print("Average Total Rewards: {}".format(np.mean(ep_rewards)))
        if self.save_model:
            expert_dir = os.path.join('.', 'Experts', 'Offline')
            self.agent.save(expert_dir)
Ejemplo n.º 3
0
    def test(self):
        self.agent.testing()
        ep_rewards = []
        for itr in range(self.test_iter):
            paths = utils.sample_trajectory(self.env,
                                            self.agent.actor,
                                            200,
                                            True,
                                            render_mode=())
            # ep_rewards.append(np.sum(rewards))

        # print("Average Total Rewards: {}".format(np.mean(ep_rewards)))
        if self.save_model:
            expert_dir = os.path.join('.', 'Experts')
            self.agent.save(expert_dir)
Ejemplo n.º 4
0
 def generate_buffer(self):
     print("Generating offline dataset...")
     counter = 0
     data = defaultdict(lambda : defaultdict(list))
     while counter < self.buffer_size:
         path = utils.sample_trajectory(self.env, self.collect_policy, 200, True, render_mode=())
         obs, acs, rewards, next_obs, terminals = path["observation"], path["action"], path["reward"], path["next_observation"], path["terminal"]
         # assert len(obs) == len(acs) == len(rewards) == len(terminals) == len(next_obs)
         # print(len(obs), len(acs), len(rewards), len(terminals), len(next_obs))
         for i in range(len(obs)):
             s = obs[i]
             a = acs[i]
             data[s][a].append((rewards[i], next_obs[i], terminals[i]))
         counter += len(obs)
     print("Offline dataset Generated")
     self.data = data
Ejemplo n.º 5
0
    def test(self):
        self.agent.testing()
        ep_rewards = []
        state_feq = np.zeros(self.ob_dim)
        for itr in range(self.test_iter):
            path = utils.sample_trajectory(self.env,
                                           self.agent.actor,
                                           50,
                                           True,
                                           render_mode=())
            rewards = path["reward"]
            ep_rewards.append(np.sum(rewards))

        if self.save_model:
            expert_dir = os.path.join('.', 'Experts', 'MDP')
            if not os.path.exists(expert_dir):
                os.makedirs(expert_dir)
            self.agent.save(expert_dir)
Ejemplo n.º 6
0
    def test(self):
        self.agent.testing()
        ep_rewards = []
        state_feq = np.zeros(self.ob_dim)
        for itr in range(self.test_iter):
            obs, acs, rewards, next_obs, terminals, image_obs = utils.sample_trajectory(self.env, self.agent.actor, 200, True, render_mode=())
            ep_rewards.append(np.sum(rewards))

            for i in range(len(obs)):
                state_feq[obs[i]] += 1
                if terminals[i]:
                    state_feq[next_obs[i]] += 1

        plt.bar(list(range(self.ob_dim)), state_feq, log=True)
        plt.savefig('./img2.png')
        plt.close()

        print("Average Total Rewards: {}".format(np.mean(ep_rewards)))
        if self.save_model:
            expert_dir = os.path.join('.', 'Experts', 'Offline')
            self.agent.save(expert_dir)