class EnvWrapper(object): ''' Wrapper around UnityEnvironment that resets each arena if the episode is done It will only work correctly if using a single arena on each environment ''' def __init__(self, *args, **kwargs): ''' Check UnityEnvironment parameters ''' self._env = UnityEnvironment(*args, **kwargs) self._arenas_configurations = None def __getattr__(self, attr): if attr in self.__dict__: return getattr(self, attr) return getattr(self._env, attr) def reset(self, arenas_configurations=None, train_mode=True): """ Shuffle arenas and reset configuration """ if arenas_configurations is not None: self._arenas_configurations = arenas_configurations self._arenas_configurations.shuffle_arenas() return self._env.reset(self._arenas_configurations, train_mode) def step(self, *args, **kwargs): ret = self._env.step(*args, **kwargs) if ret['Learner'].local_done[0]: new_ret = self.reset() ret['Learner'].visual_observations = new_ret['Learner'].visual_observations return ret
def main(args): docker_training = docker_target_name is not None env = UnityEnvironment( n_arenas=args.n_arenas, file_name=env_path, worker_id=worker_id, seed=seed, docker_training=docker_training, play=False, resolution=resolution ) arena_config_in = ArenaConfig('configs/3-Obstacles.yaml') env.reset(arenas_configurations=arena_config_in) start_time = time.time() for i in range(args.frames): res = env.step(np.random.randint(0, 3, size=2 * args.n_arenas)) elapsed_time = time.time() - start_time fps = float(args.frames) / elapsed_time print("n_arenas={0}, fps={1:.3f}".format(args.n_arenas, fps)) env.close()
# let the agent generate an action based on the information action = agent.step(obs, reward, done, info) # Visualization{visual, direction, path} image.set_data(obs[0]) if agent.chaser.newest_path is not None: sca.set_offsets(np.array(agent.chaser.newest_path)) else: sca.set_offsets(AgentConstants.standpoint[::-1]) if agent.chaser.newest_end is not None: line.set_xdata( [AgentConstants.standpoint[1], agent.chaser.newest_end[0]]) line.set_ydata( [AgentConstants.standpoint[0], agent.chaser.newest_end[1]]) else: line.set_xdata([]) line.set_ydata([]) fig.canvas.draw() fig.canvas.flush_events() # go to next test if the current one is finised if all(brainInfo['Learner'].local_done): break else: brainInfo = env.step(action) # cleanup plt.close(fig) env.close()
# step_time_length = 0.0595 try: while True: step = 0 direction = input() if direction == "w": action = [1, 0] if direction == "s": action = [2, 0] if direction == "d": action = [0, 1] if direction == "a": action = [0, 2] while step < 1.0: start = time.time() # start a timer before taking a step res = env.step(action) # send a forward action to the environment if action == [0, 1] or action == [0, 2]: break step_time_length = time.time( ) - start # compute the time it took to take the step speed = res['Learner'].vector_observations delta_distance = step_time_length * speed[ 0, 2] # compute the distance covered in one step total_distance += delta_distance step += step_time_length * speed[ 0, 2] # compute the distance covered in one step print( "speed = {0:.4f}, delta_time = {1:.4f}, delta_distance = {2:.4f}, total_distance = {3:.4f}" .format(speed[0, 2], step_time_length, delta_distance, total_distance)) if speed[0, 2] == 0: