예제 #1
0
                #KOETODO: I think I can remove these. storing straight to file instead.

                mavg_score = np.mean(np.array(reward_buffer))
                var_score = np.var(np.array(reward_buffer))
                mavg_food = np.mean(np.array(food_buffer))
                mavg_poison = np.mean(np.array(poison_buffer))
                mavg_loss = np.mean(loss_buffer)
                food_buffer = []
                poison_buffer = []
                reward_buffer = []
                loss_buffer = []

                # Write Rolling Statistics to file
                with open(SAVE_TO_FOLDER + "/dfp_stats.txt",
                          "a+") as stats_file:
                    stats_file.write(str(GAME) + " ")
                    stats_file.write(str(max_reward) + " ")
                    stats_file.write(str(mavg_score) + ' ')
                    stats_file.write(str(mavg_loss) + ' ')
                    stats_file.write(str(var_score) + ' ')
                    stats_file.write(str(mavg_food) + ' ')
                    stats_file.write(str(mavg_poison) + '\n')

    env.close()
    end = time.time()
    time_elapsed = end - start
    with open(SAVE_TO_FOLDER + "/timing_info.txt", "w") as text_file:
        print("Time Elapsed: {}".format(time_elapsed), file=text_file)

#KOE: Made it to the end. Now test running, print out, debug, etc.
예제 #2
0
class UnityEnvWrapper:
    def __init__(self, env_config=None, use_eval=False, rpc_mode=False):
        self.env = None
        if not rpc_mode:
            assert (env_config is not None)
            self.launch(env_config, use_eval)

    def launch(self, env_config, use_eval=False):
        environment_path = (env_config["environment_path_eval"]
                            if use_eval else env_config["environment_path"])

        port = env_config.get("port", 0)
        if use_eval and port:
            port += 2
        use_visual = env_config.get("use_visual", False)
        use_vector = env_config.get("use_vector", True)
        multiagent = env_config.get("multiagent", False)
        uint8_visual = env_config.get("uint8_visual", True)
        flatten_branched = env_config.get("flatten_branched", True)

        self.env = UnityEnv(
            environment_path,
            port,
            use_visual=use_visual,
            use_vector=use_vector,
            uint8_visual=uint8_visual,
            multiagent=multiagent,
            flatten_branched=flatten_branched,
        )
        self.action_space = self.env._action_space
        self.observation_space = self.env._observation_space
        # agent name must be unique among **all** agents
        self.agent_name = [
            f'{port}_{i}' for i in range(self.env.number_agents)
        ]

    def _transform_list_to_dict(self, objs):
        return {name: obj for name, obj in zip(self.agent_name, objs)}

    def _transform_dict_to_list(self, objs):
        return [objs[name] for name in self.agent_name]

    def step(self, act, action_settings=None):
        action = np.stack(self._transform_dict_to_list(act)).tolist()
        observation, reward, done, info = self.env.step(action)
        transform = self._transform_list_to_dict
        info = list(map(json.loads, info['text_observation']))
        for i, x in enumerate(info):
            x['done'] = done[i]
        done = [False] * 4
        done_dict = transform(done)
        done_dict['__all__'] = False  # no early termination (for logging)
        return transform(observation), transform(reward), done_dict, transform(
            info)

    def reset(self, reset_settings=None):
        obs = self.env.reset()
        return self._transform_list_to_dict(obs)

    def get_env_spaces(self):
        spaces = self.action_space, self.observation_space, self.agent_name
        p = pickle.dumps(spaces)
        z = zlib.compress(p)
        return z

    def get_action_count(self):
        if isinstance(self.env.action_space, gym.spaces.Discrete):
            return self.env.action_space.n
        elif isinstance(self.env.action_space, gym.spaces.MultiDiscrete):
            return self.env.action_space.nvec.tolist()
        raise NotImplementedError

    def sample(self):
        return self.env.action_space.sample()

    def number_agents(self):
        return self.env.number_agents

    def env_close(self):
        if self.env:
            self.env.close()
            self.env = None

    def close(self):
        self.env_close()

    def hello(self):
        print('Hello World')