def __call__(self, frames, pre_process=False): if len(frames.shape) == 3: frames = F.expand_dims(frames, 0) if pre_process: frames = pre_process_image_tensor(frames) frames_variational = self.decode(self.encode(frames, return_z=True)) if pre_process: frames_variational = post_process_image_tensor(frames_variational) return frames_variational
def load_frames_worker(frames_file): with gzip.GzipFile(frames_file, "r") as file: rollout_frames = pre_process_image_tensor(np.load(file)) return rollout_frames
def worker(worker_arg_tuple): try: rollout_num, args, vision, model, W_c, b_c, output_dir = worker_arg_tuple np.random.seed() model.reset_state() if args.game in DOOM_GAMES: env = ViZDoomWrapper(args.game) else: env = gym.make(args.game) h_t = np.zeros(args.hidden_dim).astype(np.float32) c_t = np.zeros(args.hidden_dim).astype(np.float32) t = 0 cumulative_reward = 0 if args.record: frames_array = [] observation = env.reset() if args.record: frames_array.append(observation) start_time = time.time() while True: observation = imresize(observation, (args.frame_resize, args.frame_resize)) observation = pre_process_image_tensor( np.expand_dims(observation, 0)) z_t = vision.encode(observation, return_z=True).data[0] a_t = action(args, W_c, b_c, z_t, h_t, c_t, None) observation, reward, done, _ = env.step(a_t) model(z_t, a_t, temperature=args.temperature) if args.record: frames_array.append(observation) cumulative_reward += reward h_t = model.get_h().data[0] c_t = model.get_c().data[0] t += 1 if done: break log( ID, "> Rollout #{} finished after {} timesteps in {:.2f}s with cumulative reward {:.2f}" .format((rollout_num + 1), t, (time.time() - start_time), cumulative_reward)) env.close() if args.record: frames_array = np.asarray(frames_array) imageio.mimsave(os.path.join(output_dir, str(rollout_num + 1) + '.gif'), post_process_image_tensor(frames_array), fps=20) return cumulative_reward except Exception: print(traceback.format_exc()) return 0.
def rollout(rollout_arg_tuple): try: global initial_z_t generation, mutation_idx, trial, args, vision, model, gpu, W_c, b_c, max_timesteps, with_frames = rollout_arg_tuple # The same starting seed gets passed in multiprocessing, need to reset it for each process: np.random.seed() if not with_frames: log( ID, ">>> Starting generation #" + str(generation) + ", mutation #" + str(mutation_idx + 1) + ", trial #" + str(trial + 1)) else: frames_array = [] start_time = time.time() model.reset_state() if args.in_dream: z_t, _, _, _ = initial_z_t[np.random.randint(len(initial_z_t))] z_t = z_t[0] if gpu is not None: z_t = cuda.to_gpu(z_t) if with_frames: observation = vision.decode(z_t).data if gpu is not None: observation = cp.asnumpy(observation) observation = post_process_image_tensor(observation)[0] else: # free up precious GPU memory: if gpu is not None: vision.to_cpu() vision = None if args.initial_z_noise > 0.: if gpu is not None: z_t += cp.random.normal(0., args.initial_z_noise, z_t.shape).astype(cp.float32) else: z_t += np.random.normal(0., args.initial_z_noise, z_t.shape).astype(np.float32) else: if args.game in DOOM_GAMES: env = ViZDoomWrapper(args.game) else: env = gym.make(args.game) observation = env.reset() if with_frames: frames_array.append(observation) if gpu is not None: h_t = cp.zeros(args.hidden_dim).astype(cp.float32) c_t = cp.zeros(args.hidden_dim).astype(cp.float32) else: h_t = np.zeros(args.hidden_dim).astype(np.float32) c_t = np.zeros(args.hidden_dim).astype(np.float32) done = False cumulative_reward = 0 t = 0 while not done: if not args.in_dream: observation = imresize(observation, (args.frame_resize, args.frame_resize)) observation = pre_process_image_tensor( np.expand_dims(observation, 0)) if gpu is not None: observation = cuda.to_gpu(observation) z_t = vision.encode(observation, return_z=True).data[0] a_t = action(args, W_c, b_c, z_t, h_t, c_t, gpu) if args.in_dream: z_t, done = model(z_t, a_t, temperature=args.temperature) done = done.data[0] if with_frames: observation = post_process_image_tensor( vision.decode(z_t).data)[0] reward = 1 if done >= args.done_threshold: done = True else: done = False else: observation, reward, done, _ = env.step( a_t if gpu is None else cp.asnumpy(a_t)) model(z_t, a_t, temperature=args.temperature) if with_frames: frames_array.append(observation) cumulative_reward += reward h_t = model.get_h().data[0] c_t = model.get_c().data[0] t += 1 if max_timesteps is not None and t == max_timesteps: break elif args.in_dream and t == args.dream_max_len: log( ID, ">>> generation #{}, mutation #{}, trial #{}: maximum length of {} timesteps reached in dream!" .format(generation, str(mutation_idx + 1), str(trial + 1), t)) break if not args.in_dream: env.close() if not with_frames: log( ID, ">>> Finished generation #{}, mutation #{}, trial #{} in {} timesteps in {:.2f}s with cumulative reward {:.2f}" .format(generation, str(mutation_idx + 1), str(trial + 1), t, (time.time() - start_time), cumulative_reward)) return cumulative_reward else: frames_array = np.asarray(frames_array) if args.game in DOOM_GAMES and not args.in_dream: frames_array = post_process_image_tensor(frames_array) return cumulative_reward, np.asarray(frames_array) except Exception: print(traceback.format_exc()) return 0.