def random_episodes(env_ctor, num_episodes, output_dir=None): env = env_ctor() env = wrappers.CollectGymDataset(env, output_dir) episodes = [] for _ in range(num_episodes): policy = lambda env, obs: env.action_space.sample() done = False obs = env.reset() while not done: action = policy(env, obs) obs, _, done, info = env.step(action) episodes.append(info['episode']) return episodes
def random_episodes(env_ctor, num_episodes, outdir=None): env = env_ctor() env = wrappers.CollectGymDataset(env, outdir) episodes = [] if outdir else None for _ in range(num_episodes): policy = lambda env, obs: env.action_space.sample() done = False obs = env.reset() while not done: action = policy(env, obs) obs, _, done, info = env.step(action) if outdir is None: episodes.append(info['episode']) try: env.close() except AttributeError: pass return episodes
def random_episodes(env_ctor, num_episodes, output_dir=None): env = env_ctor() # env is an <ExternalProcess object>. env = wrappers.CollectGymDataset(env, output_dir) episodes = [] for _ in range(num_episodes): policy = lambda env, obs: env.action_space.sample() done = False obs = env.reset() # cnt = 0 while not done: action = policy(env, obs) obs, _, done, info = env.step(action) # env.step # cnt += 1 # print(cnt) episodes.append( info['episode'] ) # if done is True, info stores the 'episode' information and 'episode' is written in a file(e.g. "~/planet/log_debug/00001/test_episodes"). return episodes