def generate_demo_video(): env = StarIntrudersEnvironment(screen_size=512) state = env.reset() done = False print('Playing sample game with environment {}'.format(env)) filename = 'game-StarIntruders-{}.mp4'.format(int(time.time())) vid = imutil.Video(filename, framerate=8) rewards = [] action = env.action_space.sample() for t in range(200): if done: print('Finished episode with {} total reward after {} timesteps'.format(sum(rewards), len(rewards))) rewards = [] state = env.reset() if np.random.random() < .2: action = env.action_space.sample() state, reward, done, info = env.step(action) minimap_ftr, screen_ftr, minimap_rgb, screen_rgb = state caption = 't={} Reward: {:.2f}'.format(t, reward) vid(screen_rgb, normalize=False, caption=caption, resize_to=(512,512)) rewards.append(reward) print('Timestep t={} took action {} got reward {}'.format(len(rewards), action, reward)) vid.finish() print('Finished episode with {} total reward after {} timesteps'.format(sum(rewards), len(rewards)))
def __init__(self, sc2_env, game_number, env_name, action_names, tensor_reward_key, replay_dimension=256): time_string = "{}".format(int(time.time())) self.json_pathname = os.path.join( REPLAY_DIR_PATH, "game_" + time_string + "_" + str(replay_dimension) + ".json") self.video_pathname = os.path.join( REPLAY_DIR_PATH, "game_" + time_string + "_" + str(replay_dimension) + ".mp4") self.saliency_pathname = os.path.join( REPLAY_DIR_PATH, "game_" + time_string + "_" + str(replay_dimension) + ".expl") self.sc2_env = sc2_env self.game_clock_tick = 0 self.frames = [] self.action_names = action_names self.video = imutil.Video(filename=self.video_pathname) self.decision_point_number = 1 self.tensor_reward_key = tensor_reward_key self.explanation_points_array = []
def generate_trajectory_video(datasource): print("Writing example video of datasource {} to file".format(datasource)) filename = 'example_trajectory.mp4' vid = imutil.Video(filename, framerate=10) states, rewards, dones, infos = datasource.get_trajectories(batch_size=1) for state in states[0]: img = state.transpose(1, 2, 0) vid.write_frame(img, resize_to=(256, 256)) vid.finish()
def __init__(self, render=True, video_filename=None, verbose=False, num_players=2): if video_filename: render = True self.render = render self.num_players = num_players self.sc2env = make_sc2env(num_players, render=render) self.video = None if video_filename: self.video = imutil.Video(filename=video_filename) self.verbose = verbose self.action_space = self.get_action_space()
def __init__(self, sc2_env, game_number, env_name, action_component_names, replay_dimension = 256): time_string = "{}".format(int(time.time())) self.game_number = game_number self.json_pathname = os.path.join(REPLAY_DIR_PATH,"game_" + str(self.game_number) + "_" + time_string + "_" + str(replay_dimension) + ".json") self.video_pathname = os.path.join(REPLAY_DIR_PATH,"game_" + str(self.game_number) + "_" + time_string + "_" + str(replay_dimension) + ".mp4") self.saliency_pathname = os.path.join(REPLAY_DIR_PATH,"game_" + str(self.game_number) + "_" + time_string + "_" + str(replay_dimension) + ".expl") self.sc2_env = sc2_env #self.game_clock_tick = 0 self.frames = [] self.action_component_names = action_component_names self.video = imutil.Video(filename=self.video_pathname, framerate=25) self.decision_point_number = 1 self.explanation_points_array = [] self.current_wave_number = 0 self.jpg_number = 0
def visualize_forward_simulation(datasource, encoder, decoder, transition, reward_pred, train_iter=0, timesteps=60, num_factors=16): start_time = time.time() print('Starting trajectory simulation for {} frames'.format(timesteps)) states, rewards, dones, actions = datasource.get_trajectories( batch_size=1, timesteps=timesteps, random_start=False) states = torch.Tensor(states).cuda() num_actions = datasource.binary_input_channels num_rewards = datasource.scalar_output_channels # rgb_states = torch.Tensor(rgb_states.transpose(0, 1, 4, 2, 3)).cuda() # We begin *at* state t=2, then we simulate from t=2 until t=timesteps # Encoder input is t=0, t=1, t=2 to produce t=1 z = encoder(states[:, :3]) z = transition(z, torch.eye(num_actions)[actions[:, 1]].cuda()) z.detach() ftr_vid = imutil.Video('simulation_ftr_iter_{:06d}.mp4'.format(train_iter), framerate=3) # First: replay in simulation the true trajectory caption = 'Real' simulate_trajectory_from_actions(z.clone(), decoder, reward_pred, transition, states, rewards, dones, actions, ftr_vid, caption_tag=caption, num_rewards=num_rewards, num_actions=num_actions) ftr_vid.finish() print('Finished trajectory simulation in {:.02f}s'.format(time.time() - start_time))
def generate_demo_video(): env = ZerglingDefenseEnvironment() state = env.reset() done = False print('Playing sample game with environment {}'.format(env)) filename = 'game-ZerglingDefense-{}.mp4'.format(int(time.time())) vid = imutil.Video(filename, framerate=8) # This function will run on each *rendered frame* of the game, including # frames in-between the agent's actions def video_write_frame(state, reward, done, info): minimap_ftr, screen_ftr, minimap_rgb, screen_rgb = state vid(screen_rgb, normalize=False) rewards = [] for i in range(100): if done: break action = env.action_space.sample() state, reward, done, info = env.step( action, animation_callback=video_write_frame) video_write_frame(state, reward, done, info) imutil.show(state[1], img_padding=8, filename='ftr_{:06d}.png'.format(i), resize_to=(600, 800)) imutil.show(state[1][10], filename='ftr10_{:06d}.png'.format(i), resize_to=(512, 512)) imutil.show(state[3], filename='rgb_{:06d}.png'.format(i), resize_to=(512, 512)) rewards.append(reward) print('Timestep t={} took action {} got reward {}'.format( len(rewards), action, reward)) vid.finish() print('Finished episode with {} total reward after {} timesteps'.format( sum(rewards), len(rewards)))
def __init__(self, sc2_env, game_number, env_name, tensor_action_key, tensor_reward_key, replay_dimension=256): #self.json_filename = env_name + "_" + str(game_number) + ".json" #self.video_filename = env_name + "_" + str(game_number) + ".mp4" time_string = "{}".format(int(time.time())) self.json_filename = "game_" + time_string + "_" + str( replay_dimension) + ".json" self.video_filename = "game_" + time_string + "_" + str( replay_dimension) + ".mp4" self.sc2_env = sc2_env self.game_clock_tick = 0 self.frames = [] self.action_names = [ 'Top_Left', 'Top_Right', 'Bottom_Left', 'Bottom_Right' ] self.video = imutil.Video(filename=self.video_filename) self.decision_point_number = 1 self.tensor_action_key = tensor_action_key self.tensor_reward_key = tensor_reward_key
def play_episode(env, agent, episode_num=0, video=False): start_time = time.time() print('Starting episode {}...'.format(episode_num)) state = env.reset() done = False cumulative_reward = 0 if video: vid = imutil.Video('training_episode_{:04d}.mp4'.format(episode_num)) for t in range(MAX_STEPS): if done: break action = agent.step(state) state, reward, done, info = env.step(action) caption = 't={} reward={}'.format(t, reward) if video: vid.write_frame(state[3], normalize=False) agent.update(reward) cumulative_reward += reward if video: vid.finish() print('Finished episode ({} actions) in {:.3f} sec total reward {}'.format( t, time.time() - start_time, cumulative_reward)) return cumulative_reward
t_states.append(new_states) for i in range(batch_size): states[i] = new_states[i] t_rewards.append(rewards) t_dones.append(dones) t_actions.append(actions) # Reshape to (batch_size, timesteps, ...) s, r, d, i = [np.swapaxes(t, 0, 1) for t in (t_states, t_rewards, t_dones, t_actions)] return s, r, d, i if __name__ == '__main__': import imutil batches = 10 timesteps = 100 batch_size = 1 print('Simulation time benchmark: Centipede') vid = imutil.Video('centipede.mp4', framerate=5) start_time = time.time() for _ in range(batches): print('Simulating {} timesteps batch size {}...'.format(timesteps, batch_size)) states, rewards, dones, actions = get_trajectories(batch_size, timesteps=timesteps) for state, action, reward in zip(states[0], actions[0], rewards[0]): caption = "Prev. Action {} Prev Reward {}".format(action, reward) vid.write_frame(state.transpose(1,2,0), img_padding=8, resize_to=(512,512), caption=caption) duration = time.time() - start_time print('Finished simulating {} games for {} timesteps in {:.3f} sec'.format( MAX_BATCH_SIZE, timesteps*batches, duration)) vid.finish()
def visualize_bptt(z, transition, reward_predictor, decoder, rgb_decoder, num_actions, vid=None): z.retain_grad() actions = [] zees = [] if vid is None: vid = imutil.Video(filename='excitation_bptt_{}.mp4'.format( int(time.time())), framerate=10) for t in range(30): a = onehot(1) if t == 0 else onehot(3) a.requires_grad = True a.retain_grad() actions.append(a) # Keep track of previous actions z = transition(z, a) z.retain_grad() zees.append(z) r, rmap = reward_predictor(z, visualize=True) r.retain_grad() caption = 'Neural Simulation: expected r = {:.2f} {:.2f}'.format( r[0, 0], r[0, 1]) vid.write_frame(rgb_decoder(decoder(z))[0], resize_to=(512, 512), caption=caption) rewards = rmap[0].sum(dim=0) rewards = torch.clamp(rewards * 128 + 128, 0, 255) #imutil.show(rewards, resize_to=(256, 256), normalize=False, save=False) if r.sum().abs() > 0.8: print('Expected reward of {:.2f} at time t+{}'.format(r.sum(), t)) for _ in range(20): vid.write_frame(rgb_decoder(decoder(z))[0], resize_to=(512, 512), caption=caption) localized_expected_reward = (rmap * (rmap.abs() == rmap.abs().max()).type( torch.cuda.FloatTensor)).sum() localized_expected_reward.backward(retain_graph=True) print([at * at.grad for at in actions]) ''' for z in zees[::-1] + zees: caption = 'Plan for reward R={:.2f} at time t+{}'.format(r.sum(), t) mask = (z.grad.abs() / (.001 + z.grad.abs().max())) ** 0.5 img = rgb_decoder(decoder(z * mask))[0] for _ in range(4): vid.write_frame(img, resize_to=(512,512), img_padding=8, caption=caption) ''' for z in zees[::-1]: caption = 'Causal Backtrack, reward R={:.2f} at time t+{}'.format( r.sum(), t) mask = (z.grad.abs() / (.001 + z.grad.abs().max())) img1 = decoder(z * mask)[0].sum(dim=0) for _ in range(4): vid.write_frame(img1, resize_to=(512, 512), img_padding=8, caption=caption) break return True
def get_trajectories(batch_size=32, timesteps=10, policy='random', random_start=False, training=False): envs = MultiEnvironment([Env() for _ in range(batch_size)]) t_states, t_rewards, t_dones, t_actions = [], [], [], [] # Initial actions/stats actions = np.random.randint(envs.action_space.n, size=(batch_size,)) for t in range(timesteps): states, rewards, dones, _ = envs.step(actions) rewards = [rewards] actions = np.random.randint(envs.action_space.n, size=(batch_size,)) t_states.append(states) t_rewards.append(rewards) t_dones.append(dones) t_actions.append(actions) # Reshape to (batch_size, timesteps, ...) states = np.swapaxes(t_states, 0, 1) rewards = np.swapaxes(t_rewards, 0, 1) dones = np.swapaxes(t_dones, 0, 1) actions = np.swapaxes(t_actions, 0, 1) return states, rewards, dones, actions if __name__ == '__main__': states, rewards, dones, actions = get_trajectories(batch_size=1, timesteps=100) import imutil vid = imutil.Video('gameoflife.mp4', framerate=5) for state, action, reward in zip(states[0], actions[0], rewards[0]): pixels = np.transpose(state, (1, 2, 0)) caption = "Prev. Action {} Prev Reward {}".format(action, reward) vid.write_frame(pixels, img_padding=8, resize_to=(512,512), caption=caption) vid.finish()
if not ret: break yield frame[:,:,::-1] fig = plt.figure(figsize=(6.4, 6.4)) ax = fig.add_subplot(111, projection='3d') ax.view_init(10, 0) fig.tight_layout(rect=[0, 0.01, 1, 0.99]) fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D) vid = imutil.Video('output_{}.mp4'.format(int(time.time()))) for img in read_images(): start_time = time.time() preds = fa.get_landmarks(img)[0] print('Timing: {:.02f} seconds for one frame'.format(time.time() - start_time)) import pdb; pdb.set_trace() #ax.set_xlim3d(-500, 500) #ax.set_ylim3d(-500, 500) #ax.set_zlim3d(-200, 200) ax.scatter(preds[:, 2], preds[:, 0], -preds[:, 1]) left = imutil.get_pixels(img, 640, 640) right = imutil.get_pixels(plt, 640, 640) pixels = np.concatenate([left, right], axis=1) vid.write_frame(pixels) imutil.show(pixels, save=False)
def generate_images_batched(latents, max_batch_size=16): i = 0 while i < len(latents): for img in generate_images(latents[i:i+max_batch_size]): yield img i += max_batch_size # Generate latent vectors. latents = np.random.RandomState(1000).randn(1000, *Gs.input_shapes[0][1:]) # 1000 random latents latents = latents[[477, 56, 83, 887, 583, 391, 86, 340, 341, 415]] # hand-picked top-10 images = generate_images(latents) # Save images as PNG. for idx in range(images.shape[0]): PIL.Image.fromarray(images[idx], 'RGB').save('img%d.jpg' % idx) # Generate latent videos latent_start = latents[6] latent_end = latents[9] FRAMES = 120 latent_interp = [] for i in range(FRAMES): theta = i / FRAMES latent_interp.append(theta * latent_start + (1 - theta) * latent_end) vid = imutil.Video('interpolated_face.mp4') for img in generate_images_batched(np.array(latent_interp)): vid.write_frame(img) vid.finish()
if policy == 'random': actions = np.random.randint(envs.action_space.n, size=(batch_size, )) if policy == 'repeat': actions = [i % envs.action_space.n for i in range(batch_size)] t_states.append(states) t_rewards.append(rewards) t_dones.append(dones) t_actions.append(actions) # Reshape to (batch_size, timesteps, ...) states = np.swapaxes(t_states, 0, 1) rewards = np.swapaxes(t_rewards, 0, 1) dones = np.swapaxes(t_dones, 0, 1) actions = np.swapaxes(t_actions, 0, 1) return states, rewards, dones, actions if __name__ == '__main__': states, rewards, dones, actions = get_trajectories(batch_size=1, timesteps=100) import imutil vid = imutil.Video('gridworld.mp4', framerate=5) for state, action, reward in zip(states[0], actions[0], rewards[0]): pixels = np.transpose(state, (1, 2, 0)) caption = "Prev. Action {} Prev Reward {}".format(action, reward) vid.write_frame(pixels, img_padding=8, resize_to=(512, 512), caption=caption) vid.finish()
actions = np.random.randint(envs.action_space.n, size=(batch_size, )) if policy == 'repeat': actions = [i % envs.action_space.n for i in range(batch_size)] states, rewards, dones, _ = envs.step(actions) t_states.append(states) t_rewards.append(rewards) t_dones.append(dones) t_actions.append(actions) # Reshape to (batch_size, timesteps, ...) states = np.swapaxes(t_states, 0, 1) rewards = np.swapaxes(t_rewards, 0, 1) dones = np.swapaxes(t_dones, 0, 1) actions = np.swapaxes(t_actions, 0, 1) return states, rewards, dones, actions if __name__ == '__main__': states, rewards, dones, actions = get_trajectories(batch_size=1, timesteps=200) import imutil vid = imutil.Video('roomba1.mp4', framerate=10) for state, action, reward in zip(states[0], actions[0], rewards[0]): pixels = np.transpose(state, (1, 2, 0)) caption = "Action {} Reward {}".format(action, reward) vid.write_frame(pixels, img_padding=8, resize_to=(512, 512), caption=caption) vid.finish()
actions = np.random.randint(envs.action_space.n, size=(batch_size, )) if policy == 'repeat': actions = [i % envs.action_space.n for i in range(batch_size)] states, rewards, dones, _ = envs.step(actions) t_states.append(states) t_rewards.append(rewards) t_dones.append(dones) t_actions.append(actions) # Reshape to (batch_size, timesteps, ...) states = np.swapaxes(t_states, 0, 1) rewards = np.swapaxes(t_rewards, 0, 1) dones = np.swapaxes(t_dones, 0, 1) actions = np.swapaxes(t_actions, 0, 1) return states, rewards, dones, actions if __name__ == '__main__': states, rewards, dones, actions = get_trajectories(batch_size=1, timesteps=100) import imutil vid = imutil.Video('realpong.mp4', framerate=5) for state, action, reward in zip(states[0], actions[0], rewards[0]): pixels = np.transpose(state, (1, 2, 0)) caption = "Prev. Action {} Prev Reward {}".format(action, reward) vid.write_frame(pixels, img_padding=8, resize_to=(512, 512), caption=caption) vid.finish()
def visualize_reconstruction(datasource, encoder, decoder, transition, reward_predictor, train_iter=0): num_actions = datasource.binary_input_channels num_rewards = datasource.scalar_output_channels timesteps = 45 batch_size = 1 states, rewards, dones, actions = datasource.get_trajectories( batch_size, timesteps, random_start=False) states = torch.Tensor(states).cuda() rewards = torch.Tensor(rewards).cuda() actions = torch.LongTensor(actions).cuda() offsets = [1, 3] print('Generating videos for offsets {}'.format(offsets)) for offset in offsets: vid_rgb = imutil.Video('prediction_{:02}_iter_{:06d}.mp4'.format( offset, train_iter), framerate=3) #vid_aleatoric = imutil.Video('anomaly_detection_{:02}_iter_{:06d}.mp4'.format(offset, train_iter), framerate=3) vid_reward = imutil.Video( 'reward_prediction_{:02}_iter_{:06d}.mp4'.format( offset, train_iter), framerate=3) for t in range(3, timesteps - offset): # Encode frames t-2, t-1, t to produce state at t-1 # Then step forward once to produce state at t z = encoder(states[:, t - 2:t + 1]) z = transition(z, torch.eye(num_actions)[actions[:, t - 1]].cuda()) # Now step forward *offset* times to produce state at t+offset for t_i in range(t, t + offset): onehot_a = torch.eye(num_actions)[actions[:, t_i]].cuda() z = transition(z, onehot_a) # Our prediction of the world from 'offset' steps back predicted_features = decoder(z) predicted_features = torch.sigmoid(predicted_features) predicted_rgb = predicted_features predicted_reward, reward_map = reward_predictor(z, visualize=True) # The ground truth actual_features = states[:, t + offset] actual_rgb = convert_ndim_image_to_rgb(actual_features) # Difference between actual and predicted outcomes is "surprise" surprise_map = torch.clamp( (actual_features - predicted_features)**2, 0, 1) #caption = "t={} surprise (aleatoric): {:.03f}".format(t, surprise_map.sum()) #pixels = composite_aleatoric_surprise_image(actual_rgb, surprise_map, z) #vid_aleatoric.write_frame(pixels, normalize=False, img_padding=8, caption=caption) caption = "Left: True t={} Right: Predicted t+{}, Pred. R: {}".format( t, offset, format_reward_vector(predicted_reward[0])) pixels = composite_feature_rgb_image(actual_features, actual_rgb, predicted_features, predicted_rgb) vid_rgb.write_frame(pixels, normalize=False, img_padding=8, caption=caption) caption = "t={} fwd={}, Pred. R: {}".format( t, offset, format_reward_vector(predicted_reward[0])) reward_pixels = composite_rgb_reward_factor_image( predicted_rgb, reward_map, z, num_rewards=num_rewards) vid_reward.write_frame(reward_pixels, normalize=False, caption=caption) vid_rgb.finish() #vid_aleatoric.finish() vid_reward.finish() print('Finished generating forward-prediction videos')
dones_batch), np.array(actions_batch) def convert_frame(state): return state.transpose((2, 0, 1)).copy() if __name__ == '__main__': start_time = time.time() env = make_env() simulate_to_replay_buffer(1) env = make_env() batch_size = 8 vid = imutil.Video('minipacman.mp4', framerate=5) states, rewards, dones, actions = get_trajectories(batch_size, random_start=False, timesteps=100) i = 0 for state, reward, done, action in zip(states[0], rewards[0], dones[0], actions[0]): caption = "t={} Prev. Action {} Prev Reward {} Done {}".format( i, action, reward, done) vid.write_frame(state.transpose(1, 2, 0), img_padding=8, resize_to=(512, 512), caption=caption) print('state {}, {}'.format(state.mean(), caption)) i += 1 duration = time.time() - start_time
def play(latent_dim, datasource, num_actions, num_rewards, encoder, decoder, reward_predictor, discriminator, transition): # Initialize environment env = datasource.make_env(screen_size=512) # No-op through the first 3 frames for initial state estimation state = env.reset() no_op = 3 s_0, _ = datasource.convert_frame(state) state, reward, done, info = env.step(no_op) s_1, _ = datasource.convert_frame(state) state, reward, done, info = env.step(no_op) s_2, _ = datasource.convert_frame(state) state_list = [s_0, s_1, s_2] # Estimate initial state (given t=0,1,2 estimate state at t=2) states = torch.Tensor(state_list).cuda().unsqueeze(0) z = encoder(states) z = transition(z, onehot(no_op, num_actions)) cumulative_reward = 0 filename = 'SimpleRolloutAgent-{}.mp4'.format(int(time.time())) vid = imutil.Video(filename, framerate=10) t = 2 cumulative_negative_reward = 0 cumulative_positive_reward = 0 while not done: z = z.detach() # In simulation, compute all possible futures to select the best action rewards = [] for a in range(num_actions): z_a = transition(z, onehot(a, num_actions)) # Look ahead three steps, using 3-step lookahead r_a = compute_rollout_reward(z_a, transition, reward_predictor, num_actions, a, rollout_depth=12, rollout_policy='noop') rewards.append(r_a) #print('Expected reward from taking action {} is {:.03f}'.format(a, r_a)) max_r = max(rewards) max_a = int(np.argmax(rewards)) # Take the best action, in real life new_state, new_reward, done, info = env.step(max_a) if len(info) > 1: positive_reward = sum(v for v in info.values() if v > 0) negative_reward = sum(v for v in info.values() if v < 0) else: positive_reward = max(0, new_reward) negative_reward = min(0, new_reward) cumulative_positive_reward += positive_reward cumulative_negative_reward -= negative_reward cumulative_reward += new_reward # Re-estimate state ftr_state, rgb_state = datasource.convert_frame(new_state) print('t={} curr. r={:.02f} future r: {:.02f} {:.02f} {:.02f} {:.02f}'. format(t, cumulative_reward, rewards[0], rewards[1], rewards[2], rewards[3])) caption = 'Negative Reward: {} Positive Reward: {}'.format( int(cumulative_negative_reward), int(cumulative_positive_reward)) print(caption) vid.write_frame(rgb_state, resize_to=(512, 512), caption=caption) state_list = state_list[1:] + [ftr_state] z = encoder(torch.Tensor(state_list).cuda().unsqueeze(0)) z = transition(z, onehot(max_a, num_actions)) t += 1 if t > 300: print('Ending evaluation due to time limit') break vid.finish() msg = 'Finished at t={} with cumulative reward {}'.format( t, cumulative_reward) with open('evaluation_metrics_{}.txt'.format(int(time.time())), 'w') as fp: fp.write(msg + '\n') print(msg)