Example #1
0
 def __call__(self, frames, pre_process=False):
     if len(frames.shape) == 3:
         frames = F.expand_dims(frames, 0)
     if pre_process:
         frames = pre_process_image_tensor(frames)
     frames_variational = self.decode(self.encode(frames, return_z=True))
     if pre_process:
         frames_variational = post_process_image_tensor(frames_variational)
     return frames_variational
Example #2
0
def load_frames_worker(frames_file):
    with gzip.GzipFile(frames_file, "r") as file:
        rollout_frames = pre_process_image_tensor(np.load(file))
    return rollout_frames
Example #3
0
def worker(worker_arg_tuple):
    try:
        rollout_num, args, vision, model, W_c, b_c, output_dir = worker_arg_tuple

        np.random.seed()

        model.reset_state()

        if args.game in DOOM_GAMES:
            env = ViZDoomWrapper(args.game)
        else:
            env = gym.make(args.game)

        h_t = np.zeros(args.hidden_dim).astype(np.float32)
        c_t = np.zeros(args.hidden_dim).astype(np.float32)

        t = 0
        cumulative_reward = 0
        if args.record:
            frames_array = []

        observation = env.reset()
        if args.record:
            frames_array.append(observation)

        start_time = time.time()
        while True:
            observation = imresize(observation,
                                   (args.frame_resize, args.frame_resize))
            observation = pre_process_image_tensor(
                np.expand_dims(observation, 0))

            z_t = vision.encode(observation, return_z=True).data[0]

            a_t = action(args, W_c, b_c, z_t, h_t, c_t, None)

            observation, reward, done, _ = env.step(a_t)
            model(z_t, a_t, temperature=args.temperature)

            if args.record:
                frames_array.append(observation)
            cumulative_reward += reward

            h_t = model.get_h().data[0]
            c_t = model.get_c().data[0]

            t += 1

            if done:
                break

        log(
            ID,
            "> Rollout #{} finished after {} timesteps in {:.2f}s with cumulative reward {:.2f}"
            .format((rollout_num + 1), t, (time.time() - start_time),
                    cumulative_reward))

        env.close()

        if args.record:
            frames_array = np.asarray(frames_array)
            imageio.mimsave(os.path.join(output_dir,
                                         str(rollout_num + 1) + '.gif'),
                            post_process_image_tensor(frames_array),
                            fps=20)

        return cumulative_reward
    except Exception:
        print(traceback.format_exc())
        return 0.
Example #4
0
def rollout(rollout_arg_tuple):
    try:
        global initial_z_t
        generation, mutation_idx, trial, args, vision, model, gpu, W_c, b_c, max_timesteps, with_frames = rollout_arg_tuple

        # The same starting seed gets passed in multiprocessing, need to reset it for each process:
        np.random.seed()

        if not with_frames:
            log(
                ID, ">>> Starting generation #" + str(generation) +
                ", mutation #" + str(mutation_idx + 1) + ", trial #" +
                str(trial + 1))
        else:
            frames_array = []
        start_time = time.time()

        model.reset_state()

        if args.in_dream:
            z_t, _, _, _ = initial_z_t[np.random.randint(len(initial_z_t))]
            z_t = z_t[0]
            if gpu is not None:
                z_t = cuda.to_gpu(z_t)
            if with_frames:
                observation = vision.decode(z_t).data
                if gpu is not None:
                    observation = cp.asnumpy(observation)
                observation = post_process_image_tensor(observation)[0]
            else:
                # free up precious GPU memory:
                if gpu is not None:
                    vision.to_cpu()
                vision = None
            if args.initial_z_noise > 0.:
                if gpu is not None:
                    z_t += cp.random.normal(0., args.initial_z_noise,
                                            z_t.shape).astype(cp.float32)
                else:
                    z_t += np.random.normal(0., args.initial_z_noise,
                                            z_t.shape).astype(np.float32)
        else:
            if args.game in DOOM_GAMES:
                env = ViZDoomWrapper(args.game)
            else:
                env = gym.make(args.game)
            observation = env.reset()
        if with_frames:
            frames_array.append(observation)

        if gpu is not None:
            h_t = cp.zeros(args.hidden_dim).astype(cp.float32)
            c_t = cp.zeros(args.hidden_dim).astype(cp.float32)
        else:
            h_t = np.zeros(args.hidden_dim).astype(np.float32)
            c_t = np.zeros(args.hidden_dim).astype(np.float32)

        done = False
        cumulative_reward = 0
        t = 0
        while not done:
            if not args.in_dream:
                observation = imresize(observation,
                                       (args.frame_resize, args.frame_resize))
                observation = pre_process_image_tensor(
                    np.expand_dims(observation, 0))

                if gpu is not None:
                    observation = cuda.to_gpu(observation)
                z_t = vision.encode(observation, return_z=True).data[0]

            a_t = action(args, W_c, b_c, z_t, h_t, c_t, gpu)

            if args.in_dream:
                z_t, done = model(z_t, a_t, temperature=args.temperature)
                done = done.data[0]
                if with_frames:
                    observation = post_process_image_tensor(
                        vision.decode(z_t).data)[0]
                reward = 1
                if done >= args.done_threshold:
                    done = True
                else:
                    done = False
            else:
                observation, reward, done, _ = env.step(
                    a_t if gpu is None else cp.asnumpy(a_t))
                model(z_t, a_t, temperature=args.temperature)
            if with_frames:
                frames_array.append(observation)

            cumulative_reward += reward

            h_t = model.get_h().data[0]
            c_t = model.get_c().data[0]

            t += 1
            if max_timesteps is not None and t == max_timesteps:
                break
            elif args.in_dream and t == args.dream_max_len:
                log(
                    ID,
                    ">>> generation #{}, mutation #{}, trial #{}: maximum length of {} timesteps reached in dream!"
                    .format(generation, str(mutation_idx + 1), str(trial + 1),
                            t))
                break

        if not args.in_dream:
            env.close()

        if not with_frames:
            log(
                ID,
                ">>> Finished generation #{}, mutation #{}, trial #{} in {} timesteps in {:.2f}s with cumulative reward {:.2f}"
                .format(generation, str(mutation_idx + 1), str(trial + 1), t,
                        (time.time() - start_time), cumulative_reward))
            return cumulative_reward
        else:
            frames_array = np.asarray(frames_array)
            if args.game in DOOM_GAMES and not args.in_dream:
                frames_array = post_process_image_tensor(frames_array)
            return cumulative_reward, np.asarray(frames_array)
    except Exception:
        print(traceback.format_exc())
        return 0.