Esempio n. 1
0
 def __call__(self, trainer):
     if self.args.gpu >= 0:
         self.model.to_cpu()
     with chainer.using_config('train', False), chainer.no_backprop_mode():
         self.model.reset_state()
         z_t_plus_1s = []
         dones = []
         for i in range(self.z_t.shape[0]):
             output = self.model(self.z_t[i],
                                 self.action[i],
                                 temperature=self.args.sample_temperature)
             if self.args.predict_done:
                 z_t_plus_1, done = output
                 z_t_plus_1 = z_t_plus_1.data
                 done = done.data
             else:
                 z_t_plus_1 = output.data
             z_t_plus_1s.append(z_t_plus_1)
             if self.args.predict_done:
                 dones.append(done[0])
         z_t_plus_1s = np.asarray(z_t_plus_1s)
         dones = np.asarray(dones).reshape(-1)
         img_t_plus_1 = post_process_image_tensor(
             self.vision.decode(z_t_plus_1s).data)
         if self.args.predict_done:
             img_t_plus_1[np.where(
                 dones >= 0.5), :, :, :] = 0  # Make all the done's black
         save_images_collage(img_t_plus_1,
                             os.path.join(
                                 self.output_dir,
                                 'train_t_plus_1_{}.png'.format(
                                     trainer.updater.iteration)),
                             pre_processed=False)
     if self.args.gpu >= 0:
         self.model.to_gpu()
Esempio n. 2
0
    def __call__(self, trainer):
        if self.args.gpu >= 0:
            self.model.to_cpu()

        with chainer.using_config('train', False), chainer.no_backprop_mode():
            frames_variational = self.model(self.frames)
            save_images_collage(
                frames_variational.data,
                os.path.join(
                    self.output_dir, 'train_reconstructed_{}.png'.format(
                        trainer.updater.iteration)))

        with chainer.using_config('train', False), chainer.no_backprop_mode():
            frames_variational = self.model.decode(self.z)
            save_images_collage(
                frames_variational.data,
                os.path.join(
                    self.output_dir,
                    'sampled_{}.png'.format(trainer.updater.iteration)))

        if self.args.gpu >= 0:
            self.model.to_gpu()
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser(description='World Models ' + ID)
    parser.add_argument('--data_dir',
                        '-d',
                        default="./data/wm",
                        help='The base data/output directory')
    parser.add_argument(
        '--game', default='CarRacing-v0',
        help='Game to use')  # https://gym.openai.com/envs/CarRacing-v0/
    parser.add_argument('--experiment_name',
                        default='experiment_1',
                        help='To isolate its files from others')
    parser.add_argument(
        '--load_batch_size',
        default=10,
        type=int,
        help='Load game frames in batches so as not to run out of memory')
    parser.add_argument(
        '--model',
        '-m',
        default='',
        help=
        'Initialize the model from given file, or "default" for one in data folder'
    )
    parser.add_argument('--no_resume',
                        action='store_true',
                        help='Don'
                        't auto resume from the latest snapshot')
    parser.add_argument(
        '--resume_from',
        '-r',
        default='',
        help='Resume the optimization from a specific snapshot')
    parser.add_argument('--test',
                        action='store_true',
                        help='Generate samples only')
    parser.add_argument('--gpu',
                        '-g',
                        default=-1,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--epoch',
                        '-e',
                        default=1,
                        type=int,
                        help='number of epochs to learn')
    parser.add_argument(
        '--snapshot_interval',
        '-s',
        default=100,
        type=int,
        help='100 = snapshot every 100itr*batch_size imgs processed')
    parser.add_argument('--z_dim',
                        '-z',
                        default=32,
                        type=int,
                        help='dimension of encoded vector')
    parser.add_argument('--batch_size',
                        '-b',
                        type=int,
                        default=100,
                        help='learning minibatch size')
    parser.add_argument('--no_progress_bar',
                        '-p',
                        action='store_true',
                        help='Display progress bar during training')
    parser.add_argument('--kl_tolerance', type=float, default=0.5, help='')

    args = parser.parse_args()
    log(ID, "args =\n " + str(vars(args)).replace(",", ",\n "))

    output_dir = os.path.join(args.data_dir, args.game, args.experiment_name,
                              ID)
    random_rollouts_dir = os.path.join(args.data_dir, args.game,
                                       args.experiment_name, 'random_rollouts')
    mkdir(output_dir)

    max_iter = 0
    auto_resume_file = None
    files = os.listdir(output_dir)
    for file in files:
        if re.match(r'^snapshot_iter_', file):
            iter = int(re.search(r'\d+', file).group())
            if (iter > max_iter):
                max_iter = iter
    if max_iter > 0:
        auto_resume_file = os.path.join(output_dir,
                                        "snapshot_iter_{}".format(max_iter))

    model = CVAE(args.z_dim)

    if args.model:
        if args.model == 'default':
            args.model = os.path.join(output_dir, ID + ".model")
        log(ID, "Loading saved model from: " + args.model)
        chainer.serializers.load_npz(args.model, model)

    optimizer = chainer.optimizers.Adam(alpha=0.0001)
    optimizer.setup(model)

    log(ID, "Loading training data")
    train = VisionDataset(dir=random_rollouts_dir,
                          load_batch_size=args.load_batch_size,
                          shuffle=True,
                          verbose=True)
    train_iter = chainer.iterators.SerialIterator(train,
                                                  args.batch_size,
                                                  shuffle=False)

    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       device=args.gpu,
                                       loss_func=model.get_loss_func(
                                           args.kl_tolerance))

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir)
    trainer.extend(extensions.snapshot(),
                   trigger=(args.snapshot_interval, 'iteration'))
    trainer.extend(
        extensions.LogReport(trigger=(100 if args.gpu >= 0 else 10,
                                      'iteration')))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'main/kl_loss', 'main/rec_loss',
            'elapsed_time'
        ]))
    if not args.no_progress_bar:
        trainer.extend(
            extensions.ProgressBar(
                update_interval=100 if args.gpu >= 0 else 10))

    sample_idx = np.random.choice(range(train.get_current_batch_size()),
                                  64,
                                  replace=False)
    sample_frames = chainer.Variable(np.asarray(train[sample_idx]))
    np.random.seed(31337)
    sample_z = chainer.Variable(
        np.random.normal(0, 1, (64, args.z_dim)).astype(np.float32))
    save_images_collage(sample_frames.data,
                        os.path.join(output_dir, 'train.png'))
    sampler = Sampler(model, args, output_dir, sample_frames, sample_z)
    trainer.extend(sampler, trigger=(args.snapshot_interval, 'iteration'))

    if args.resume_from:
        log(ID, "Resuming trainer manually from snapshot: " + args.resume_from)
        chainer.serializers.load_npz(args.resume_from, trainer)
    elif not args.no_resume and auto_resume_file is not None:
        log(ID,
            "Auto resuming trainer from last snapshot: " + auto_resume_file)
        chainer.serializers.load_npz(auto_resume_file, trainer)

    if not args.test:
        log(ID, "Starting training")
        trainer.run()
        log(ID, "Done training")
        log(ID, "Saving model")
        chainer.serializers.save_npz(os.path.join(output_dir, ID + ".model"),
                                     model)

    if args.test:
        log(ID, "Saving test samples")
        sampler(trainer)

    if not args.test:
        log(ID, "Saving latent z's for all training data")
        train = VisionDataset(dir=random_rollouts_dir,
                              load_batch_size=args.load_batch_size,
                              shuffle=False,
                              verbose=True)
        total_batches = train.get_total_batches()
        for batch in range(total_batches):
            gc.collect()
            train.load_batch(batch)
            batch_frames, batch_rollouts, batch_rollouts_counts = train.get_current_batch(
            )
            mu = None
            ln_var = None
            splits = batch_frames.shape[0] // args.batch_size
            if batch_frames.shape[0] % args.batch_size != 0:
                splits += 1
            for i in range(splits):
                start_idx = i * args.batch_size
                end_idx = (i + 1) * args.batch_size
                sample_frames = batch_frames[start_idx:end_idx]
                if args.gpu >= 0:
                    sample_frames = chainer.Variable(cp.asarray(sample_frames))
                else:
                    sample_frames = chainer.Variable(sample_frames)
                this_mu, this_ln_var = model.encode(sample_frames)
                this_mu = this_mu.data
                this_ln_var = this_ln_var.data
                if args.gpu >= 0:
                    this_mu = cp.asnumpy(this_mu)
                    this_ln_var = cp.asnumpy(this_ln_var)
                if mu is None:
                    mu = this_mu
                    ln_var = this_ln_var
                else:
                    mu = np.concatenate((mu, this_mu), axis=0)
                    ln_var = np.concatenate((ln_var, this_ln_var), axis=0)
            running_count = 0
            for rollout in batch_rollouts:
                rollout_dir = os.path.join(random_rollouts_dir, rollout)
                rollout_count = batch_rollouts_counts[rollout]
                start_idx = running_count
                end_idx = running_count + rollout_count
                this_mu = mu[start_idx:end_idx]
                this_ln_var = ln_var[start_idx:end_idx]
                np.savez_compressed(os.path.join(rollout_dir, "mu+ln_var.npz"),
                                    mu=this_mu,
                                    ln_var=this_ln_var)
                running_count = running_count + rollout_count
            log(ID, "> Processed z's for rollouts " + str(batch_rollouts))
            # Free up memory:
            batch_frames = None
            mu = None
            ln_var = None

    log(ID, "Done")
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser(description='World Models ' + ID)
    parser.add_argument('--data_dir',
                        '-d',
                        default="./data/wm",
                        help='The base data/output directory')
    parser.add_argument(
        '--game', default='CarRacing-v0',
        help='Game to use')  # https://gym.openai.com/envs/CarRacing-v0/
    parser.add_argument('--experiment_name',
                        default='experiment_1',
                        help='To isolate its files from others')
    parser.add_argument(
        '--load_batch_size',
        default=100,
        type=int,
        help='Load rollouts in batches so as not to run out of memory')
    parser.add_argument(
        '--model',
        '-m',
        default='',
        help=
        'Initialize the model from given file, or "default" for one in data folder'
    )
    parser.add_argument('--no_resume',
                        action='store_true',
                        help='Don'
                        't auto resume from the latest snapshot')
    parser.add_argument(
        '--resume_from',
        '-r',
        default='',
        help='Resume the optimization from a specific snapshot')
    parser.add_argument('--test',
                        action='store_true',
                        help='Generate samples only')
    parser.add_argument('--gpu',
                        '-g',
                        default=-1,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--epoch',
                        '-e',
                        default=20,
                        type=int,
                        help='number of epochs to learn')
    parser.add_argument('--snapshot_interval',
                        '-s',
                        default=200,
                        type=int,
                        help='snapshot every x games')
    parser.add_argument('--z_dim',
                        '-z',
                        default=32,
                        type=int,
                        help='dimension of encoded vector')
    parser.add_argument('--hidden_dim',
                        default=256,
                        type=int,
                        help='LSTM hidden units')
    parser.add_argument('--mixtures',
                        default=5,
                        type=int,
                        help='number of gaussian mixtures for MDN')
    parser.add_argument('--no_progress_bar',
                        '-p',
                        action='store_true',
                        help='Display progress bar during training')
    parser.add_argument('--predict_done',
                        action='store_true',
                        help='Whether MDN-RNN should also predict done state')
    parser.add_argument('--sample_temperature',
                        default=1.,
                        type=float,
                        help='Temperature for generating samples')
    parser.add_argument('--gradient_clip',
                        default=0.,
                        type=float,
                        help='Clip grads L2 norm threshold. 0 = no clip')
    parser.add_argument('--sequence_length',
                        type=int,
                        default=128,
                        help='sequence length for LSTM for TBPTT')
    parser.add_argument('--in_dream',
                        action='store_true',
                        help='Whether to train in dream, or real environment')
    parser.add_argument(
        '--initial_z_noise',
        default=0.,
        type=float,
        help="Gaussian noise std for initial z for dream training")
    parser.add_argument('--done_threshold',
                        default=0.5,
                        type=float,
                        help='What done probability really means done')
    parser.add_argument('--temperature',
                        '-t',
                        default=1.0,
                        type=float,
                        help='Temperature (tau) for MDN-RNN (model)')
    parser.add_argument('--dream_max_len',
                        default=2100,
                        type=int,
                        help="Maximum timesteps for dream to avoid runaway")
    parser.add_argument(
        '--weights_type',
        default=1,
        type=int,
        help="1=action_dim*(z_dim+hidden_dim), 2=z_dim+2*hidden_dim")
    parser.add_argument(
        '--initial_z_size',
        default=10000,
        type=int,
        help="How many real initial frames to load for dream training")

    args = parser.parse_args()
    log(ID, "args =\n " + str(vars(args)).replace(",", ",\n "))

    output_dir = os.path.join(args.data_dir, args.game, args.experiment_name,
                              ID)
    mkdir(output_dir)
    random_rollouts_dir = os.path.join(args.data_dir, args.game,
                                       args.experiment_name, 'random_rollouts')
    vision_dir = os.path.join(args.data_dir, args.game, args.experiment_name,
                              'vision')

    log(ID, "Starting")

    max_iter = 0
    auto_resume_file = None
    files = os.listdir(output_dir)
    for file in files:
        if re.match(r'^snapshot_iter_', file):
            iter = int(re.search(r'\d+', file).group())
            if (iter > max_iter):
                max_iter = iter
    if max_iter > 0:
        auto_resume_file = os.path.join(output_dir,
                                        "snapshot_iter_{}".format(max_iter))

    model = MDN_RNN(args.hidden_dim, args.z_dim, args.mixtures,
                    args.predict_done)
    vision = CVAE(args.z_dim)
    chainer.serializers.load_npz(os.path.join(vision_dir, "vision.model"),
                                 vision)

    if args.model:
        if args.model == 'default':
            args.model = os.path.join(output_dir, ID + ".model")
        log(ID, "Loading saved model from: " + args.model)
        chainer.serializers.load_npz(args.model, model)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    if args.gradient_clip > 0.:
        optimizer.add_hook(
            chainer.optimizer_hooks.GradientClipping(args.gradient_clip))

    log(ID, "Loading training data")
    train = ModelDataset(dir=random_rollouts_dir,
                         load_batch_size=args.load_batch_size,
                         verbose=False)
    train_iter = chainer.iterators.SerialIterator(train,
                                                  batch_size=1,
                                                  shuffle=False)

    env = gym.make(args.game)
    action_dim = len(env.action_space.low)
    args.action_dim = action_dim

    updater = TBPTTUpdater(train_iter, optimizer, model.get_loss_func(), args,
                           model)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir)
    trainer.extend(extensions.snapshot(),
                   trigger=(args.snapshot_interval, 'iteration'))
    trainer.extend(
        extensions.LogReport(trigger=(10 if args.gpu >= 0 else 1,
                                      'iteration')))
    trainer.extend(
        extensions.PrintReport(['epoch', 'iteration', 'loss', 'elapsed_time']))
    if not args.no_progress_bar:
        trainer.extend(
            extensions.ProgressBar(update_interval=10 if args.gpu >= 0 else 1))

    sample_size = 256
    rollout_z_t, rollout_z_t_plus_1, rollout_action, _, done = train[0]
    sample_z_t = rollout_z_t[0:sample_size]
    sample_z_t_plus_1 = rollout_z_t_plus_1[0:sample_size]
    sample_action = rollout_action[0:sample_size]
    img_t = vision.decode(sample_z_t).data
    img_t_plus_1 = vision.decode(sample_z_t_plus_1).data
    if args.predict_done:
        done = done.reshape(-1)
        img_t_plus_1[np.where(
            done[0:sample_size] >= 0.5), :, :, :] = 0  # Make done black
    save_images_collage(img_t, os.path.join(output_dir, 'train_t.png'))
    save_images_collage(img_t_plus_1,
                        os.path.join(output_dir, 'train_t_plus_1.png'))
    image_sampler = ImageSampler(model.copy(), vision, args, output_dir,
                                 sample_z_t, sample_action)
    trainer.extend(image_sampler,
                   trigger=(args.snapshot_interval, 'iteration'))

    if args.resume_from:
        log(ID, "Resuming trainer manually from snapshot: " + args.resume_from)
        chainer.serializers.load_npz(args.resume_from, trainer)
    elif not args.no_resume and auto_resume_file is not None:
        log(ID,
            "Auto resuming trainer from last snapshot: " + auto_resume_file)
        chainer.serializers.load_npz(auto_resume_file, trainer)

    if not args.test:
        log(ID, "Starting training")
        trainer.run()
        log(ID, "Done training")
        log(ID, "Saving model")
        chainer.serializers.save_npz(os.path.join(output_dir, ID + ".model"),
                                     model)

    if args.test:
        log(ID, "Saving test samples")
        image_sampler(trainer)

    log(ID, "Generating gif for a rollout generated in dream")
    if args.gpu >= 0:
        model.to_cpu()
    model.reset_state()
    # current_z_t = np.random.randn(64).astype(np.float32)  # Noise as starting frame
    rollout_z_t, rollout_z_t_plus_1, rollout_action, done = train[
        np.random.randint(len(train))]  # Pick a random real rollout
    current_z_t = rollout_z_t[0]  # Starting frame from the real rollout
    current_z_t += np.random.normal(0, 0.5, current_z_t.shape).astype(
        np.float32)  # Add some noise to the real rollout starting frame
    all_z_t = [current_z_t]
    # current_action = np.asarray([0., 1.]).astype(np.float32)
    for i in range(rollout_z_t.shape[0]):
        # if i != 0 and i % 200 == 0: current_action = 1 - current_action  # Flip actions every 100 frames
        current_action = np.expand_dims(
            rollout_action[i], 0)  # follow actions performed in a real rollout
        output = model(current_z_t,
                       current_action,
                       temperature=args.sample_temperature)
        if args.predict_done:
            current_z_t, done = output
            done = done.data
            # print(i, current_action, done)
        else:
            current_z_t = output
        all_z_t.append(current_z_t.data)
        if args.predict_done and done[0] >= 0.5:
            break
    dream_rollout_imgs = vision.decode(np.asarray(all_z_t).astype(
        np.float32)).data
    dream_rollout_imgs = post_process_image_tensor(dream_rollout_imgs)
    imageio.mimsave(os.path.join(output_dir, 'dream_rollout.gif'),
                    dream_rollout_imgs,
                    fps=20)

    log(ID, "Done")