Beispiel #1
0
 def __call__(self, trainer):
     if self.args.gpu >= 0:
         self.model.to_cpu()
     with chainer.using_config('train', False), chainer.no_backprop_mode():
         self.model.reset_state()
         z_t_plus_1s = []
         dones = []
         for i in range(self.z_t.shape[0]):
             output = self.model(self.z_t[i],
                                 self.action[i],
                                 temperature=self.args.sample_temperature)
             if self.args.predict_done:
                 z_t_plus_1, done = output
                 z_t_plus_1 = z_t_plus_1.data
                 done = done.data
             else:
                 z_t_plus_1 = output.data
             z_t_plus_1s.append(z_t_plus_1)
             if self.args.predict_done:
                 dones.append(done[0])
         z_t_plus_1s = np.asarray(z_t_plus_1s)
         dones = np.asarray(dones).reshape(-1)
         img_t_plus_1 = post_process_image_tensor(
             self.vision.decode(z_t_plus_1s).data)
         if self.args.predict_done:
             img_t_plus_1[np.where(
                 dones >= 0.5), :, :, :] = 0  # Make all the done's black
         save_images_collage(img_t_plus_1,
                             os.path.join(
                                 self.output_dir,
                                 'train_t_plus_1_{}.png'.format(
                                     trainer.updater.iteration)),
                             pre_processed=False)
     if self.args.gpu >= 0:
         self.model.to_gpu()
Beispiel #2
0
 def __call__(self, frames, pre_process=False):
     if len(frames.shape) == 3:
         frames = F.expand_dims(frames, 0)
     if pre_process:
         frames = pre_process_image_tensor(frames)
     frames_variational = self.decode(self.encode(frames, return_z=True))
     if pre_process:
         frames_variational = post_process_image_tensor(frames_variational)
     return frames_variational
Beispiel #3
0
def worker(worker_arg_tuple):
    try:
        rollout_num, args, vision, model, W_c, b_c, output_dir = worker_arg_tuple

        np.random.seed()

        model.reset_state()

        if args.game in DOOM_GAMES:
            env = ViZDoomWrapper(args.game)
        else:
            env = gym.make(args.game)

        h_t = np.zeros(args.hidden_dim).astype(np.float32)
        c_t = np.zeros(args.hidden_dim).astype(np.float32)

        t = 0
        cumulative_reward = 0
        if args.record:
            frames_array = []

        observation = env.reset()
        if args.record:
            frames_array.append(observation)

        start_time = time.time()
        while True:
            observation = imresize(observation,
                                   (args.frame_resize, args.frame_resize))
            observation = pre_process_image_tensor(
                np.expand_dims(observation, 0))

            z_t = vision.encode(observation, return_z=True).data[0]

            a_t = action(args, W_c, b_c, z_t, h_t, c_t, None)

            observation, reward, done, _ = env.step(a_t)
            model(z_t, a_t, temperature=args.temperature)

            if args.record:
                frames_array.append(observation)
            cumulative_reward += reward

            h_t = model.get_h().data[0]
            c_t = model.get_c().data[0]

            t += 1

            if done:
                break

        log(
            ID,
            "> Rollout #{} finished after {} timesteps in {:.2f}s with cumulative reward {:.2f}"
            .format((rollout_num + 1), t, (time.time() - start_time),
                    cumulative_reward))

        env.close()

        if args.record:
            frames_array = np.asarray(frames_array)
            imageio.mimsave(os.path.join(output_dir,
                                         str(rollout_num + 1) + '.gif'),
                            post_process_image_tensor(frames_array),
                            fps=20)

        return cumulative_reward
    except Exception:
        print(traceback.format_exc())
        return 0.
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser(description='World Models ' + ID)
    parser.add_argument('--data_dir',
                        '-d',
                        default="./data/wm",
                        help='The base data/output directory')
    parser.add_argument(
        '--game', default='CarRacing-v0',
        help='Game to use')  # https://gym.openai.com/envs/CarRacing-v0/
    parser.add_argument('--experiment_name',
                        default='experiment_1',
                        help='To isolate its files from others')
    parser.add_argument(
        '--load_batch_size',
        default=100,
        type=int,
        help='Load rollouts in batches so as not to run out of memory')
    parser.add_argument(
        '--model',
        '-m',
        default='',
        help=
        'Initialize the model from given file, or "default" for one in data folder'
    )
    parser.add_argument('--no_resume',
                        action='store_true',
                        help='Don'
                        't auto resume from the latest snapshot')
    parser.add_argument(
        '--resume_from',
        '-r',
        default='',
        help='Resume the optimization from a specific snapshot')
    parser.add_argument('--test',
                        action='store_true',
                        help='Generate samples only')
    parser.add_argument('--gpu',
                        '-g',
                        default=-1,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--epoch',
                        '-e',
                        default=20,
                        type=int,
                        help='number of epochs to learn')
    parser.add_argument('--snapshot_interval',
                        '-s',
                        default=200,
                        type=int,
                        help='snapshot every x games')
    parser.add_argument('--z_dim',
                        '-z',
                        default=32,
                        type=int,
                        help='dimension of encoded vector')
    parser.add_argument('--hidden_dim',
                        default=256,
                        type=int,
                        help='LSTM hidden units')
    parser.add_argument('--mixtures',
                        default=5,
                        type=int,
                        help='number of gaussian mixtures for MDN')
    parser.add_argument('--no_progress_bar',
                        '-p',
                        action='store_true',
                        help='Display progress bar during training')
    parser.add_argument('--predict_done',
                        action='store_true',
                        help='Whether MDN-RNN should also predict done state')
    parser.add_argument('--sample_temperature',
                        default=1.,
                        type=float,
                        help='Temperature for generating samples')
    parser.add_argument('--gradient_clip',
                        default=0.,
                        type=float,
                        help='Clip grads L2 norm threshold. 0 = no clip')
    parser.add_argument('--sequence_length',
                        type=int,
                        default=128,
                        help='sequence length for LSTM for TBPTT')
    parser.add_argument('--in_dream',
                        action='store_true',
                        help='Whether to train in dream, or real environment')
    parser.add_argument(
        '--initial_z_noise',
        default=0.,
        type=float,
        help="Gaussian noise std for initial z for dream training")
    parser.add_argument('--done_threshold',
                        default=0.5,
                        type=float,
                        help='What done probability really means done')
    parser.add_argument('--temperature',
                        '-t',
                        default=1.0,
                        type=float,
                        help='Temperature (tau) for MDN-RNN (model)')
    parser.add_argument('--dream_max_len',
                        default=2100,
                        type=int,
                        help="Maximum timesteps for dream to avoid runaway")
    parser.add_argument(
        '--weights_type',
        default=1,
        type=int,
        help="1=action_dim*(z_dim+hidden_dim), 2=z_dim+2*hidden_dim")
    parser.add_argument(
        '--initial_z_size',
        default=10000,
        type=int,
        help="How many real initial frames to load for dream training")

    args = parser.parse_args()
    log(ID, "args =\n " + str(vars(args)).replace(",", ",\n "))

    output_dir = os.path.join(args.data_dir, args.game, args.experiment_name,
                              ID)
    mkdir(output_dir)
    random_rollouts_dir = os.path.join(args.data_dir, args.game,
                                       args.experiment_name, 'random_rollouts')
    vision_dir = os.path.join(args.data_dir, args.game, args.experiment_name,
                              'vision')

    log(ID, "Starting")

    max_iter = 0
    auto_resume_file = None
    files = os.listdir(output_dir)
    for file in files:
        if re.match(r'^snapshot_iter_', file):
            iter = int(re.search(r'\d+', file).group())
            if (iter > max_iter):
                max_iter = iter
    if max_iter > 0:
        auto_resume_file = os.path.join(output_dir,
                                        "snapshot_iter_{}".format(max_iter))

    model = MDN_RNN(args.hidden_dim, args.z_dim, args.mixtures,
                    args.predict_done)
    vision = CVAE(args.z_dim)
    chainer.serializers.load_npz(os.path.join(vision_dir, "vision.model"),
                                 vision)

    if args.model:
        if args.model == 'default':
            args.model = os.path.join(output_dir, ID + ".model")
        log(ID, "Loading saved model from: " + args.model)
        chainer.serializers.load_npz(args.model, model)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    if args.gradient_clip > 0.:
        optimizer.add_hook(
            chainer.optimizer_hooks.GradientClipping(args.gradient_clip))

    log(ID, "Loading training data")
    train = ModelDataset(dir=random_rollouts_dir,
                         load_batch_size=args.load_batch_size,
                         verbose=False)
    train_iter = chainer.iterators.SerialIterator(train,
                                                  batch_size=1,
                                                  shuffle=False)

    env = gym.make(args.game)
    action_dim = len(env.action_space.low)
    args.action_dim = action_dim

    updater = TBPTTUpdater(train_iter, optimizer, model.get_loss_func(), args,
                           model)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir)
    trainer.extend(extensions.snapshot(),
                   trigger=(args.snapshot_interval, 'iteration'))
    trainer.extend(
        extensions.LogReport(trigger=(10 if args.gpu >= 0 else 1,
                                      'iteration')))
    trainer.extend(
        extensions.PrintReport(['epoch', 'iteration', 'loss', 'elapsed_time']))
    if not args.no_progress_bar:
        trainer.extend(
            extensions.ProgressBar(update_interval=10 if args.gpu >= 0 else 1))

    sample_size = 256
    rollout_z_t, rollout_z_t_plus_1, rollout_action, _, done = train[0]
    sample_z_t = rollout_z_t[0:sample_size]
    sample_z_t_plus_1 = rollout_z_t_plus_1[0:sample_size]
    sample_action = rollout_action[0:sample_size]
    img_t = vision.decode(sample_z_t).data
    img_t_plus_1 = vision.decode(sample_z_t_plus_1).data
    if args.predict_done:
        done = done.reshape(-1)
        img_t_plus_1[np.where(
            done[0:sample_size] >= 0.5), :, :, :] = 0  # Make done black
    save_images_collage(img_t, os.path.join(output_dir, 'train_t.png'))
    save_images_collage(img_t_plus_1,
                        os.path.join(output_dir, 'train_t_plus_1.png'))
    image_sampler = ImageSampler(model.copy(), vision, args, output_dir,
                                 sample_z_t, sample_action)
    trainer.extend(image_sampler,
                   trigger=(args.snapshot_interval, 'iteration'))

    if args.resume_from:
        log(ID, "Resuming trainer manually from snapshot: " + args.resume_from)
        chainer.serializers.load_npz(args.resume_from, trainer)
    elif not args.no_resume and auto_resume_file is not None:
        log(ID,
            "Auto resuming trainer from last snapshot: " + auto_resume_file)
        chainer.serializers.load_npz(auto_resume_file, trainer)

    if not args.test:
        log(ID, "Starting training")
        trainer.run()
        log(ID, "Done training")
        log(ID, "Saving model")
        chainer.serializers.save_npz(os.path.join(output_dir, ID + ".model"),
                                     model)

    if args.test:
        log(ID, "Saving test samples")
        image_sampler(trainer)

    log(ID, "Generating gif for a rollout generated in dream")
    if args.gpu >= 0:
        model.to_cpu()
    model.reset_state()
    # current_z_t = np.random.randn(64).astype(np.float32)  # Noise as starting frame
    rollout_z_t, rollout_z_t_plus_1, rollout_action, done = train[
        np.random.randint(len(train))]  # Pick a random real rollout
    current_z_t = rollout_z_t[0]  # Starting frame from the real rollout
    current_z_t += np.random.normal(0, 0.5, current_z_t.shape).astype(
        np.float32)  # Add some noise to the real rollout starting frame
    all_z_t = [current_z_t]
    # current_action = np.asarray([0., 1.]).astype(np.float32)
    for i in range(rollout_z_t.shape[0]):
        # if i != 0 and i % 200 == 0: current_action = 1 - current_action  # Flip actions every 100 frames
        current_action = np.expand_dims(
            rollout_action[i], 0)  # follow actions performed in a real rollout
        output = model(current_z_t,
                       current_action,
                       temperature=args.sample_temperature)
        if args.predict_done:
            current_z_t, done = output
            done = done.data
            # print(i, current_action, done)
        else:
            current_z_t = output
        all_z_t.append(current_z_t.data)
        if args.predict_done and done[0] >= 0.5:
            break
    dream_rollout_imgs = vision.decode(np.asarray(all_z_t).astype(
        np.float32)).data
    dream_rollout_imgs = post_process_image_tensor(dream_rollout_imgs)
    imageio.mimsave(os.path.join(output_dir, 'dream_rollout.gif'),
                    dream_rollout_imgs,
                    fps=20)

    log(ID, "Done")
Beispiel #5
0
def rollout(rollout_arg_tuple):
    try:
        global initial_z_t
        generation, mutation_idx, trial, args, vision, model, gpu, W_c, b_c, max_timesteps, with_frames = rollout_arg_tuple

        # The same starting seed gets passed in multiprocessing, need to reset it for each process:
        np.random.seed()

        if not with_frames:
            log(
                ID, ">>> Starting generation #" + str(generation) +
                ", mutation #" + str(mutation_idx + 1) + ", trial #" +
                str(trial + 1))
        else:
            frames_array = []
        start_time = time.time()

        model.reset_state()

        if args.in_dream:
            z_t, _, _, _ = initial_z_t[np.random.randint(len(initial_z_t))]
            z_t = z_t[0]
            if gpu is not None:
                z_t = cuda.to_gpu(z_t)
            if with_frames:
                observation = vision.decode(z_t).data
                if gpu is not None:
                    observation = cp.asnumpy(observation)
                observation = post_process_image_tensor(observation)[0]
            else:
                # free up precious GPU memory:
                if gpu is not None:
                    vision.to_cpu()
                vision = None
            if args.initial_z_noise > 0.:
                if gpu is not None:
                    z_t += cp.random.normal(0., args.initial_z_noise,
                                            z_t.shape).astype(cp.float32)
                else:
                    z_t += np.random.normal(0., args.initial_z_noise,
                                            z_t.shape).astype(np.float32)
        else:
            if args.game in DOOM_GAMES:
                env = ViZDoomWrapper(args.game)
            else:
                env = gym.make(args.game)
            observation = env.reset()
        if with_frames:
            frames_array.append(observation)

        if gpu is not None:
            h_t = cp.zeros(args.hidden_dim).astype(cp.float32)
            c_t = cp.zeros(args.hidden_dim).astype(cp.float32)
        else:
            h_t = np.zeros(args.hidden_dim).astype(np.float32)
            c_t = np.zeros(args.hidden_dim).astype(np.float32)

        done = False
        cumulative_reward = 0
        t = 0
        while not done:
            if not args.in_dream:
                observation = imresize(observation,
                                       (args.frame_resize, args.frame_resize))
                observation = pre_process_image_tensor(
                    np.expand_dims(observation, 0))

                if gpu is not None:
                    observation = cuda.to_gpu(observation)
                z_t = vision.encode(observation, return_z=True).data[0]

            a_t = action(args, W_c, b_c, z_t, h_t, c_t, gpu)

            if args.in_dream:
                z_t, done = model(z_t, a_t, temperature=args.temperature)
                done = done.data[0]
                if with_frames:
                    observation = post_process_image_tensor(
                        vision.decode(z_t).data)[0]
                reward = 1
                if done >= args.done_threshold:
                    done = True
                else:
                    done = False
            else:
                observation, reward, done, _ = env.step(
                    a_t if gpu is None else cp.asnumpy(a_t))
                model(z_t, a_t, temperature=args.temperature)
            if with_frames:
                frames_array.append(observation)

            cumulative_reward += reward

            h_t = model.get_h().data[0]
            c_t = model.get_c().data[0]

            t += 1
            if max_timesteps is not None and t == max_timesteps:
                break
            elif args.in_dream and t == args.dream_max_len:
                log(
                    ID,
                    ">>> generation #{}, mutation #{}, trial #{}: maximum length of {} timesteps reached in dream!"
                    .format(generation, str(mutation_idx + 1), str(trial + 1),
                            t))
                break

        if not args.in_dream:
            env.close()

        if not with_frames:
            log(
                ID,
                ">>> Finished generation #{}, mutation #{}, trial #{} in {} timesteps in {:.2f}s with cumulative reward {:.2f}"
                .format(generation, str(mutation_idx + 1), str(trial + 1), t,
                        (time.time() - start_time), cumulative_reward))
            return cumulative_reward
        else:
            frames_array = np.asarray(frames_array)
            if args.game in DOOM_GAMES and not args.in_dream:
                frames_array = post_process_image_tensor(frames_array)
            return cumulative_reward, np.asarray(frames_array)
    except Exception:
        print(traceback.format_exc())
        return 0.