def __call__(self, trainer): if self.args.gpu >= 0: self.model.to_cpu() with chainer.using_config('train', False), chainer.no_backprop_mode(): self.model.reset_state() z_t_plus_1s = [] dones = [] for i in range(self.z_t.shape[0]): output = self.model(self.z_t[i], self.action[i], temperature=self.args.sample_temperature) if self.args.predict_done: z_t_plus_1, done = output z_t_plus_1 = z_t_plus_1.data done = done.data else: z_t_plus_1 = output.data z_t_plus_1s.append(z_t_plus_1) if self.args.predict_done: dones.append(done[0]) z_t_plus_1s = np.asarray(z_t_plus_1s) dones = np.asarray(dones).reshape(-1) img_t_plus_1 = post_process_image_tensor( self.vision.decode(z_t_plus_1s).data) if self.args.predict_done: img_t_plus_1[np.where( dones >= 0.5), :, :, :] = 0 # Make all the done's black save_images_collage(img_t_plus_1, os.path.join( self.output_dir, 'train_t_plus_1_{}.png'.format( trainer.updater.iteration)), pre_processed=False) if self.args.gpu >= 0: self.model.to_gpu()
def __call__(self, frames, pre_process=False): if len(frames.shape) == 3: frames = F.expand_dims(frames, 0) if pre_process: frames = pre_process_image_tensor(frames) frames_variational = self.decode(self.encode(frames, return_z=True)) if pre_process: frames_variational = post_process_image_tensor(frames_variational) return frames_variational
def worker(worker_arg_tuple): try: rollout_num, args, vision, model, W_c, b_c, output_dir = worker_arg_tuple np.random.seed() model.reset_state() if args.game in DOOM_GAMES: env = ViZDoomWrapper(args.game) else: env = gym.make(args.game) h_t = np.zeros(args.hidden_dim).astype(np.float32) c_t = np.zeros(args.hidden_dim).astype(np.float32) t = 0 cumulative_reward = 0 if args.record: frames_array = [] observation = env.reset() if args.record: frames_array.append(observation) start_time = time.time() while True: observation = imresize(observation, (args.frame_resize, args.frame_resize)) observation = pre_process_image_tensor( np.expand_dims(observation, 0)) z_t = vision.encode(observation, return_z=True).data[0] a_t = action(args, W_c, b_c, z_t, h_t, c_t, None) observation, reward, done, _ = env.step(a_t) model(z_t, a_t, temperature=args.temperature) if args.record: frames_array.append(observation) cumulative_reward += reward h_t = model.get_h().data[0] c_t = model.get_c().data[0] t += 1 if done: break log( ID, "> Rollout #{} finished after {} timesteps in {:.2f}s with cumulative reward {:.2f}" .format((rollout_num + 1), t, (time.time() - start_time), cumulative_reward)) env.close() if args.record: frames_array = np.asarray(frames_array) imageio.mimsave(os.path.join(output_dir, str(rollout_num + 1) + '.gif'), post_process_image_tensor(frames_array), fps=20) return cumulative_reward except Exception: print(traceback.format_exc()) return 0.
def main(): parser = argparse.ArgumentParser(description='World Models ' + ID) parser.add_argument('--data_dir', '-d', default="./data/wm", help='The base data/output directory') parser.add_argument( '--game', default='CarRacing-v0', help='Game to use') # https://gym.openai.com/envs/CarRacing-v0/ parser.add_argument('--experiment_name', default='experiment_1', help='To isolate its files from others') parser.add_argument( '--load_batch_size', default=100, type=int, help='Load rollouts in batches so as not to run out of memory') parser.add_argument( '--model', '-m', default='', help= 'Initialize the model from given file, or "default" for one in data folder' ) parser.add_argument('--no_resume', action='store_true', help='Don' 't auto resume from the latest snapshot') parser.add_argument( '--resume_from', '-r', default='', help='Resume the optimization from a specific snapshot') parser.add_argument('--test', action='store_true', help='Generate samples only') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--epoch', '-e', default=20, type=int, help='number of epochs to learn') parser.add_argument('--snapshot_interval', '-s', default=200, type=int, help='snapshot every x games') parser.add_argument('--z_dim', '-z', default=32, type=int, help='dimension of encoded vector') parser.add_argument('--hidden_dim', default=256, type=int, help='LSTM hidden units') parser.add_argument('--mixtures', default=5, type=int, help='number of gaussian mixtures for MDN') parser.add_argument('--no_progress_bar', '-p', action='store_true', help='Display progress bar during training') parser.add_argument('--predict_done', action='store_true', help='Whether MDN-RNN should also predict done state') parser.add_argument('--sample_temperature', default=1., type=float, help='Temperature for generating samples') parser.add_argument('--gradient_clip', default=0., type=float, help='Clip grads L2 norm threshold. 0 = no clip') parser.add_argument('--sequence_length', type=int, default=128, help='sequence length for LSTM for TBPTT') parser.add_argument('--in_dream', action='store_true', help='Whether to train in dream, or real environment') parser.add_argument( '--initial_z_noise', default=0., type=float, help="Gaussian noise std for initial z for dream training") parser.add_argument('--done_threshold', default=0.5, type=float, help='What done probability really means done') parser.add_argument('--temperature', '-t', default=1.0, type=float, help='Temperature (tau) for MDN-RNN (model)') parser.add_argument('--dream_max_len', default=2100, type=int, help="Maximum timesteps for dream to avoid runaway") parser.add_argument( '--weights_type', default=1, type=int, help="1=action_dim*(z_dim+hidden_dim), 2=z_dim+2*hidden_dim") parser.add_argument( '--initial_z_size', default=10000, type=int, help="How many real initial frames to load for dream training") args = parser.parse_args() log(ID, "args =\n " + str(vars(args)).replace(",", ",\n ")) output_dir = os.path.join(args.data_dir, args.game, args.experiment_name, ID) mkdir(output_dir) random_rollouts_dir = os.path.join(args.data_dir, args.game, args.experiment_name, 'random_rollouts') vision_dir = os.path.join(args.data_dir, args.game, args.experiment_name, 'vision') log(ID, "Starting") max_iter = 0 auto_resume_file = None files = os.listdir(output_dir) for file in files: if re.match(r'^snapshot_iter_', file): iter = int(re.search(r'\d+', file).group()) if (iter > max_iter): max_iter = iter if max_iter > 0: auto_resume_file = os.path.join(output_dir, "snapshot_iter_{}".format(max_iter)) model = MDN_RNN(args.hidden_dim, args.z_dim, args.mixtures, args.predict_done) vision = CVAE(args.z_dim) chainer.serializers.load_npz(os.path.join(vision_dir, "vision.model"), vision) if args.model: if args.model == 'default': args.model = os.path.join(output_dir, ID + ".model") log(ID, "Loading saved model from: " + args.model) chainer.serializers.load_npz(args.model, model) optimizer = chainer.optimizers.Adam() optimizer.setup(model) if args.gradient_clip > 0.: optimizer.add_hook( chainer.optimizer_hooks.GradientClipping(args.gradient_clip)) log(ID, "Loading training data") train = ModelDataset(dir=random_rollouts_dir, load_batch_size=args.load_batch_size, verbose=False) train_iter = chainer.iterators.SerialIterator(train, batch_size=1, shuffle=False) env = gym.make(args.game) action_dim = len(env.action_space.low) args.action_dim = action_dim updater = TBPTTUpdater(train_iter, optimizer, model.get_loss_func(), args, model) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir) trainer.extend(extensions.snapshot(), trigger=(args.snapshot_interval, 'iteration')) trainer.extend( extensions.LogReport(trigger=(10 if args.gpu >= 0 else 1, 'iteration'))) trainer.extend( extensions.PrintReport(['epoch', 'iteration', 'loss', 'elapsed_time'])) if not args.no_progress_bar: trainer.extend( extensions.ProgressBar(update_interval=10 if args.gpu >= 0 else 1)) sample_size = 256 rollout_z_t, rollout_z_t_plus_1, rollout_action, _, done = train[0] sample_z_t = rollout_z_t[0:sample_size] sample_z_t_plus_1 = rollout_z_t_plus_1[0:sample_size] sample_action = rollout_action[0:sample_size] img_t = vision.decode(sample_z_t).data img_t_plus_1 = vision.decode(sample_z_t_plus_1).data if args.predict_done: done = done.reshape(-1) img_t_plus_1[np.where( done[0:sample_size] >= 0.5), :, :, :] = 0 # Make done black save_images_collage(img_t, os.path.join(output_dir, 'train_t.png')) save_images_collage(img_t_plus_1, os.path.join(output_dir, 'train_t_plus_1.png')) image_sampler = ImageSampler(model.copy(), vision, args, output_dir, sample_z_t, sample_action) trainer.extend(image_sampler, trigger=(args.snapshot_interval, 'iteration')) if args.resume_from: log(ID, "Resuming trainer manually from snapshot: " + args.resume_from) chainer.serializers.load_npz(args.resume_from, trainer) elif not args.no_resume and auto_resume_file is not None: log(ID, "Auto resuming trainer from last snapshot: " + auto_resume_file) chainer.serializers.load_npz(auto_resume_file, trainer) if not args.test: log(ID, "Starting training") trainer.run() log(ID, "Done training") log(ID, "Saving model") chainer.serializers.save_npz(os.path.join(output_dir, ID + ".model"), model) if args.test: log(ID, "Saving test samples") image_sampler(trainer) log(ID, "Generating gif for a rollout generated in dream") if args.gpu >= 0: model.to_cpu() model.reset_state() # current_z_t = np.random.randn(64).astype(np.float32) # Noise as starting frame rollout_z_t, rollout_z_t_plus_1, rollout_action, done = train[ np.random.randint(len(train))] # Pick a random real rollout current_z_t = rollout_z_t[0] # Starting frame from the real rollout current_z_t += np.random.normal(0, 0.5, current_z_t.shape).astype( np.float32) # Add some noise to the real rollout starting frame all_z_t = [current_z_t] # current_action = np.asarray([0., 1.]).astype(np.float32) for i in range(rollout_z_t.shape[0]): # if i != 0 and i % 200 == 0: current_action = 1 - current_action # Flip actions every 100 frames current_action = np.expand_dims( rollout_action[i], 0) # follow actions performed in a real rollout output = model(current_z_t, current_action, temperature=args.sample_temperature) if args.predict_done: current_z_t, done = output done = done.data # print(i, current_action, done) else: current_z_t = output all_z_t.append(current_z_t.data) if args.predict_done and done[0] >= 0.5: break dream_rollout_imgs = vision.decode(np.asarray(all_z_t).astype( np.float32)).data dream_rollout_imgs = post_process_image_tensor(dream_rollout_imgs) imageio.mimsave(os.path.join(output_dir, 'dream_rollout.gif'), dream_rollout_imgs, fps=20) log(ID, "Done")
def rollout(rollout_arg_tuple): try: global initial_z_t generation, mutation_idx, trial, args, vision, model, gpu, W_c, b_c, max_timesteps, with_frames = rollout_arg_tuple # The same starting seed gets passed in multiprocessing, need to reset it for each process: np.random.seed() if not with_frames: log( ID, ">>> Starting generation #" + str(generation) + ", mutation #" + str(mutation_idx + 1) + ", trial #" + str(trial + 1)) else: frames_array = [] start_time = time.time() model.reset_state() if args.in_dream: z_t, _, _, _ = initial_z_t[np.random.randint(len(initial_z_t))] z_t = z_t[0] if gpu is not None: z_t = cuda.to_gpu(z_t) if with_frames: observation = vision.decode(z_t).data if gpu is not None: observation = cp.asnumpy(observation) observation = post_process_image_tensor(observation)[0] else: # free up precious GPU memory: if gpu is not None: vision.to_cpu() vision = None if args.initial_z_noise > 0.: if gpu is not None: z_t += cp.random.normal(0., args.initial_z_noise, z_t.shape).astype(cp.float32) else: z_t += np.random.normal(0., args.initial_z_noise, z_t.shape).astype(np.float32) else: if args.game in DOOM_GAMES: env = ViZDoomWrapper(args.game) else: env = gym.make(args.game) observation = env.reset() if with_frames: frames_array.append(observation) if gpu is not None: h_t = cp.zeros(args.hidden_dim).astype(cp.float32) c_t = cp.zeros(args.hidden_dim).astype(cp.float32) else: h_t = np.zeros(args.hidden_dim).astype(np.float32) c_t = np.zeros(args.hidden_dim).astype(np.float32) done = False cumulative_reward = 0 t = 0 while not done: if not args.in_dream: observation = imresize(observation, (args.frame_resize, args.frame_resize)) observation = pre_process_image_tensor( np.expand_dims(observation, 0)) if gpu is not None: observation = cuda.to_gpu(observation) z_t = vision.encode(observation, return_z=True).data[0] a_t = action(args, W_c, b_c, z_t, h_t, c_t, gpu) if args.in_dream: z_t, done = model(z_t, a_t, temperature=args.temperature) done = done.data[0] if with_frames: observation = post_process_image_tensor( vision.decode(z_t).data)[0] reward = 1 if done >= args.done_threshold: done = True else: done = False else: observation, reward, done, _ = env.step( a_t if gpu is None else cp.asnumpy(a_t)) model(z_t, a_t, temperature=args.temperature) if with_frames: frames_array.append(observation) cumulative_reward += reward h_t = model.get_h().data[0] c_t = model.get_c().data[0] t += 1 if max_timesteps is not None and t == max_timesteps: break elif args.in_dream and t == args.dream_max_len: log( ID, ">>> generation #{}, mutation #{}, trial #{}: maximum length of {} timesteps reached in dream!" .format(generation, str(mutation_idx + 1), str(trial + 1), t)) break if not args.in_dream: env.close() if not with_frames: log( ID, ">>> Finished generation #{}, mutation #{}, trial #{} in {} timesteps in {:.2f}s with cumulative reward {:.2f}" .format(generation, str(mutation_idx + 1), str(trial + 1), t, (time.time() - start_time), cumulative_reward)) return cumulative_reward else: frames_array = np.asarray(frames_array) if args.game in DOOM_GAMES and not args.in_dream: frames_array = post_process_image_tensor(frames_array) return cumulative_reward, np.asarray(frames_array) except Exception: print(traceback.format_exc()) return 0.