def __call__(self, trainer): if self.args.gpu >= 0: self.model.to_cpu() with chainer.using_config('train', False), chainer.no_backprop_mode(): self.model.reset_state() z_t_plus_1s = [] dones = [] for i in range(self.z_t.shape[0]): output = self.model(self.z_t[i], self.action[i], temperature=self.args.sample_temperature) if self.args.predict_done: z_t_plus_1, done = output z_t_plus_1 = z_t_plus_1.data done = done.data else: z_t_plus_1 = output.data z_t_plus_1s.append(z_t_plus_1) if self.args.predict_done: dones.append(done[0]) z_t_plus_1s = np.asarray(z_t_plus_1s) dones = np.asarray(dones).reshape(-1) img_t_plus_1 = post_process_image_tensor( self.vision.decode(z_t_plus_1s).data) if self.args.predict_done: img_t_plus_1[np.where( dones >= 0.5), :, :, :] = 0 # Make all the done's black save_images_collage(img_t_plus_1, os.path.join( self.output_dir, 'train_t_plus_1_{}.png'.format( trainer.updater.iteration)), pre_processed=False) if self.args.gpu >= 0: self.model.to_gpu()
def __call__(self, trainer): if self.args.gpu >= 0: self.model.to_cpu() with chainer.using_config('train', False), chainer.no_backprop_mode(): frames_variational = self.model(self.frames) save_images_collage( frames_variational.data, os.path.join( self.output_dir, 'train_reconstructed_{}.png'.format( trainer.updater.iteration))) with chainer.using_config('train', False), chainer.no_backprop_mode(): frames_variational = self.model.decode(self.z) save_images_collage( frames_variational.data, os.path.join( self.output_dir, 'sampled_{}.png'.format(trainer.updater.iteration))) if self.args.gpu >= 0: self.model.to_gpu()
def main(): parser = argparse.ArgumentParser(description='World Models ' + ID) parser.add_argument('--data_dir', '-d', default="./data/wm", help='The base data/output directory') parser.add_argument( '--game', default='CarRacing-v0', help='Game to use') # https://gym.openai.com/envs/CarRacing-v0/ parser.add_argument('--experiment_name', default='experiment_1', help='To isolate its files from others') parser.add_argument( '--load_batch_size', default=10, type=int, help='Load game frames in batches so as not to run out of memory') parser.add_argument( '--model', '-m', default='', help= 'Initialize the model from given file, or "default" for one in data folder' ) parser.add_argument('--no_resume', action='store_true', help='Don' 't auto resume from the latest snapshot') parser.add_argument( '--resume_from', '-r', default='', help='Resume the optimization from a specific snapshot') parser.add_argument('--test', action='store_true', help='Generate samples only') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--epoch', '-e', default=1, type=int, help='number of epochs to learn') parser.add_argument( '--snapshot_interval', '-s', default=100, type=int, help='100 = snapshot every 100itr*batch_size imgs processed') parser.add_argument('--z_dim', '-z', default=32, type=int, help='dimension of encoded vector') parser.add_argument('--batch_size', '-b', type=int, default=100, help='learning minibatch size') parser.add_argument('--no_progress_bar', '-p', action='store_true', help='Display progress bar during training') parser.add_argument('--kl_tolerance', type=float, default=0.5, help='') args = parser.parse_args() log(ID, "args =\n " + str(vars(args)).replace(",", ",\n ")) output_dir = os.path.join(args.data_dir, args.game, args.experiment_name, ID) random_rollouts_dir = os.path.join(args.data_dir, args.game, args.experiment_name, 'random_rollouts') mkdir(output_dir) max_iter = 0 auto_resume_file = None files = os.listdir(output_dir) for file in files: if re.match(r'^snapshot_iter_', file): iter = int(re.search(r'\d+', file).group()) if (iter > max_iter): max_iter = iter if max_iter > 0: auto_resume_file = os.path.join(output_dir, "snapshot_iter_{}".format(max_iter)) model = CVAE(args.z_dim) if args.model: if args.model == 'default': args.model = os.path.join(output_dir, ID + ".model") log(ID, "Loading saved model from: " + args.model) chainer.serializers.load_npz(args.model, model) optimizer = chainer.optimizers.Adam(alpha=0.0001) optimizer.setup(model) log(ID, "Loading training data") train = VisionDataset(dir=random_rollouts_dir, load_batch_size=args.load_batch_size, shuffle=True, verbose=True) train_iter = chainer.iterators.SerialIterator(train, args.batch_size, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, loss_func=model.get_loss_func( args.kl_tolerance)) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir) trainer.extend(extensions.snapshot(), trigger=(args.snapshot_interval, 'iteration')) trainer.extend( extensions.LogReport(trigger=(100 if args.gpu >= 0 else 10, 'iteration'))) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'main/kl_loss', 'main/rec_loss', 'elapsed_time' ])) if not args.no_progress_bar: trainer.extend( extensions.ProgressBar( update_interval=100 if args.gpu >= 0 else 10)) sample_idx = np.random.choice(range(train.get_current_batch_size()), 64, replace=False) sample_frames = chainer.Variable(np.asarray(train[sample_idx])) np.random.seed(31337) sample_z = chainer.Variable( np.random.normal(0, 1, (64, args.z_dim)).astype(np.float32)) save_images_collage(sample_frames.data, os.path.join(output_dir, 'train.png')) sampler = Sampler(model, args, output_dir, sample_frames, sample_z) trainer.extend(sampler, trigger=(args.snapshot_interval, 'iteration')) if args.resume_from: log(ID, "Resuming trainer manually from snapshot: " + args.resume_from) chainer.serializers.load_npz(args.resume_from, trainer) elif not args.no_resume and auto_resume_file is not None: log(ID, "Auto resuming trainer from last snapshot: " + auto_resume_file) chainer.serializers.load_npz(auto_resume_file, trainer) if not args.test: log(ID, "Starting training") trainer.run() log(ID, "Done training") log(ID, "Saving model") chainer.serializers.save_npz(os.path.join(output_dir, ID + ".model"), model) if args.test: log(ID, "Saving test samples") sampler(trainer) if not args.test: log(ID, "Saving latent z's for all training data") train = VisionDataset(dir=random_rollouts_dir, load_batch_size=args.load_batch_size, shuffle=False, verbose=True) total_batches = train.get_total_batches() for batch in range(total_batches): gc.collect() train.load_batch(batch) batch_frames, batch_rollouts, batch_rollouts_counts = train.get_current_batch( ) mu = None ln_var = None splits = batch_frames.shape[0] // args.batch_size if batch_frames.shape[0] % args.batch_size != 0: splits += 1 for i in range(splits): start_idx = i * args.batch_size end_idx = (i + 1) * args.batch_size sample_frames = batch_frames[start_idx:end_idx] if args.gpu >= 0: sample_frames = chainer.Variable(cp.asarray(sample_frames)) else: sample_frames = chainer.Variable(sample_frames) this_mu, this_ln_var = model.encode(sample_frames) this_mu = this_mu.data this_ln_var = this_ln_var.data if args.gpu >= 0: this_mu = cp.asnumpy(this_mu) this_ln_var = cp.asnumpy(this_ln_var) if mu is None: mu = this_mu ln_var = this_ln_var else: mu = np.concatenate((mu, this_mu), axis=0) ln_var = np.concatenate((ln_var, this_ln_var), axis=0) running_count = 0 for rollout in batch_rollouts: rollout_dir = os.path.join(random_rollouts_dir, rollout) rollout_count = batch_rollouts_counts[rollout] start_idx = running_count end_idx = running_count + rollout_count this_mu = mu[start_idx:end_idx] this_ln_var = ln_var[start_idx:end_idx] np.savez_compressed(os.path.join(rollout_dir, "mu+ln_var.npz"), mu=this_mu, ln_var=this_ln_var) running_count = running_count + rollout_count log(ID, "> Processed z's for rollouts " + str(batch_rollouts)) # Free up memory: batch_frames = None mu = None ln_var = None log(ID, "Done")
def main(): parser = argparse.ArgumentParser(description='World Models ' + ID) parser.add_argument('--data_dir', '-d', default="./data/wm", help='The base data/output directory') parser.add_argument( '--game', default='CarRacing-v0', help='Game to use') # https://gym.openai.com/envs/CarRacing-v0/ parser.add_argument('--experiment_name', default='experiment_1', help='To isolate its files from others') parser.add_argument( '--load_batch_size', default=100, type=int, help='Load rollouts in batches so as not to run out of memory') parser.add_argument( '--model', '-m', default='', help= 'Initialize the model from given file, or "default" for one in data folder' ) parser.add_argument('--no_resume', action='store_true', help='Don' 't auto resume from the latest snapshot') parser.add_argument( '--resume_from', '-r', default='', help='Resume the optimization from a specific snapshot') parser.add_argument('--test', action='store_true', help='Generate samples only') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--epoch', '-e', default=20, type=int, help='number of epochs to learn') parser.add_argument('--snapshot_interval', '-s', default=200, type=int, help='snapshot every x games') parser.add_argument('--z_dim', '-z', default=32, type=int, help='dimension of encoded vector') parser.add_argument('--hidden_dim', default=256, type=int, help='LSTM hidden units') parser.add_argument('--mixtures', default=5, type=int, help='number of gaussian mixtures for MDN') parser.add_argument('--no_progress_bar', '-p', action='store_true', help='Display progress bar during training') parser.add_argument('--predict_done', action='store_true', help='Whether MDN-RNN should also predict done state') parser.add_argument('--sample_temperature', default=1., type=float, help='Temperature for generating samples') parser.add_argument('--gradient_clip', default=0., type=float, help='Clip grads L2 norm threshold. 0 = no clip') parser.add_argument('--sequence_length', type=int, default=128, help='sequence length for LSTM for TBPTT') parser.add_argument('--in_dream', action='store_true', help='Whether to train in dream, or real environment') parser.add_argument( '--initial_z_noise', default=0., type=float, help="Gaussian noise std for initial z for dream training") parser.add_argument('--done_threshold', default=0.5, type=float, help='What done probability really means done') parser.add_argument('--temperature', '-t', default=1.0, type=float, help='Temperature (tau) for MDN-RNN (model)') parser.add_argument('--dream_max_len', default=2100, type=int, help="Maximum timesteps for dream to avoid runaway") parser.add_argument( '--weights_type', default=1, type=int, help="1=action_dim*(z_dim+hidden_dim), 2=z_dim+2*hidden_dim") parser.add_argument( '--initial_z_size', default=10000, type=int, help="How many real initial frames to load for dream training") args = parser.parse_args() log(ID, "args =\n " + str(vars(args)).replace(",", ",\n ")) output_dir = os.path.join(args.data_dir, args.game, args.experiment_name, ID) mkdir(output_dir) random_rollouts_dir = os.path.join(args.data_dir, args.game, args.experiment_name, 'random_rollouts') vision_dir = os.path.join(args.data_dir, args.game, args.experiment_name, 'vision') log(ID, "Starting") max_iter = 0 auto_resume_file = None files = os.listdir(output_dir) for file in files: if re.match(r'^snapshot_iter_', file): iter = int(re.search(r'\d+', file).group()) if (iter > max_iter): max_iter = iter if max_iter > 0: auto_resume_file = os.path.join(output_dir, "snapshot_iter_{}".format(max_iter)) model = MDN_RNN(args.hidden_dim, args.z_dim, args.mixtures, args.predict_done) vision = CVAE(args.z_dim) chainer.serializers.load_npz(os.path.join(vision_dir, "vision.model"), vision) if args.model: if args.model == 'default': args.model = os.path.join(output_dir, ID + ".model") log(ID, "Loading saved model from: " + args.model) chainer.serializers.load_npz(args.model, model) optimizer = chainer.optimizers.Adam() optimizer.setup(model) if args.gradient_clip > 0.: optimizer.add_hook( chainer.optimizer_hooks.GradientClipping(args.gradient_clip)) log(ID, "Loading training data") train = ModelDataset(dir=random_rollouts_dir, load_batch_size=args.load_batch_size, verbose=False) train_iter = chainer.iterators.SerialIterator(train, batch_size=1, shuffle=False) env = gym.make(args.game) action_dim = len(env.action_space.low) args.action_dim = action_dim updater = TBPTTUpdater(train_iter, optimizer, model.get_loss_func(), args, model) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir) trainer.extend(extensions.snapshot(), trigger=(args.snapshot_interval, 'iteration')) trainer.extend( extensions.LogReport(trigger=(10 if args.gpu >= 0 else 1, 'iteration'))) trainer.extend( extensions.PrintReport(['epoch', 'iteration', 'loss', 'elapsed_time'])) if not args.no_progress_bar: trainer.extend( extensions.ProgressBar(update_interval=10 if args.gpu >= 0 else 1)) sample_size = 256 rollout_z_t, rollout_z_t_plus_1, rollout_action, _, done = train[0] sample_z_t = rollout_z_t[0:sample_size] sample_z_t_plus_1 = rollout_z_t_plus_1[0:sample_size] sample_action = rollout_action[0:sample_size] img_t = vision.decode(sample_z_t).data img_t_plus_1 = vision.decode(sample_z_t_plus_1).data if args.predict_done: done = done.reshape(-1) img_t_plus_1[np.where( done[0:sample_size] >= 0.5), :, :, :] = 0 # Make done black save_images_collage(img_t, os.path.join(output_dir, 'train_t.png')) save_images_collage(img_t_plus_1, os.path.join(output_dir, 'train_t_plus_1.png')) image_sampler = ImageSampler(model.copy(), vision, args, output_dir, sample_z_t, sample_action) trainer.extend(image_sampler, trigger=(args.snapshot_interval, 'iteration')) if args.resume_from: log(ID, "Resuming trainer manually from snapshot: " + args.resume_from) chainer.serializers.load_npz(args.resume_from, trainer) elif not args.no_resume and auto_resume_file is not None: log(ID, "Auto resuming trainer from last snapshot: " + auto_resume_file) chainer.serializers.load_npz(auto_resume_file, trainer) if not args.test: log(ID, "Starting training") trainer.run() log(ID, "Done training") log(ID, "Saving model") chainer.serializers.save_npz(os.path.join(output_dir, ID + ".model"), model) if args.test: log(ID, "Saving test samples") image_sampler(trainer) log(ID, "Generating gif for a rollout generated in dream") if args.gpu >= 0: model.to_cpu() model.reset_state() # current_z_t = np.random.randn(64).astype(np.float32) # Noise as starting frame rollout_z_t, rollout_z_t_plus_1, rollout_action, done = train[ np.random.randint(len(train))] # Pick a random real rollout current_z_t = rollout_z_t[0] # Starting frame from the real rollout current_z_t += np.random.normal(0, 0.5, current_z_t.shape).astype( np.float32) # Add some noise to the real rollout starting frame all_z_t = [current_z_t] # current_action = np.asarray([0., 1.]).astype(np.float32) for i in range(rollout_z_t.shape[0]): # if i != 0 and i % 200 == 0: current_action = 1 - current_action # Flip actions every 100 frames current_action = np.expand_dims( rollout_action[i], 0) # follow actions performed in a real rollout output = model(current_z_t, current_action, temperature=args.sample_temperature) if args.predict_done: current_z_t, done = output done = done.data # print(i, current_action, done) else: current_z_t = output all_z_t.append(current_z_t.data) if args.predict_done and done[0] >= 0.5: break dream_rollout_imgs = vision.decode(np.asarray(all_z_t).astype( np.float32)).data dream_rollout_imgs = post_process_image_tensor(dream_rollout_imgs) imageio.mimsave(os.path.join(output_dir, 'dream_rollout.gif'), dream_rollout_imgs, fps=20) log(ID, "Done")