def main(): print('Number of mutations:', len(seeds)) env = gym_tensorflow.make(game, 1) model = Model() obs_op = env.observation() reset_op = env.reset() action_op = model.make_net(tf.expand_dims(obs_op, axis=1), env.action_space, batch_size=1) if env.discrete_action: action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32) rew_op, done_op = env.step(action_op) from gym.envs.classic_control import rendering viewer = rendering.SimpleImageViewer() if hasattr(env.unwrapped, 'render'): obs_op = env.unwrapped.render() def display_obs(im): im = im[0, 0, ...] viewer.imshow(im) else: def display_obs(im): im = im[0, :, :, -1] im = np.stack([im] * 3, axis=-1) im = (im * 255).astype(np.uint8) im = np.array(Image.fromarray(im).resize((256, 256), resample=Image.BILINEAR), dtype=np.uint8) viewer.imshow(im) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) model.initialize() tlogger.info(model.description) noise = SharedNoiseTable() weights = model.compute_weights_from_seeds(noise, seeds) model.load(sess, 0, weights, seeds) sess.run(reset_op) display_obs(sess.run(obs_op)) total_rew = 0 num_frames = 0 while True: rew, done = sess.run([rew_op, done_op]) num_frames += 1 total_rew += rew[0] display_obs(sess.run(obs_op)) time.sleep(4/60) if done[0]: print('Final reward: ', total_rew, 'after', num_frames, 'steps') break
def main(env, num_timesteps=int(10e6), dueling=True, **kwargs): env_f = lambda batch_size: gym_tensorflow.make(env, batch_size=batch_size) model = models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[512], dueling=bool(dueling), ) act = learn(env_f, q_func=model, max_timesteps=int(num_timesteps), dueling=True, **kwargs)
def make_env(b): tlogger.info('GA: Creating environment for game: %s' % config["game"]) return gym_tensorflow.make(game=config["game"], batch_size=b)
def make_env(b): return gym_tensorflow.make(game=exp["game"], batch_size=b)
def make_env_game1(b): return gym_tensorflow.make(game=exp['games'][1], batch_size=b)
def make_env(b): return gym_tensorflow.make(game=game, batch_size=1)
def main(game, filename=None, outfile=None, model_name="LargeModel", no_video=False, add_text=False, num_runs=RUNS, graph=None): seeds = default_seeds outvid = None viewer = None iteration = None state = None if filename: with open(filename, 'rb+') as file: state = pickle.load(file) #if hasattr(state, 'best_score'): # seeds = state.best_score.seeds # iteration = len(seeds) # print("Loading GA snapshot from best_score, iteration: ", len(seeds)) if hasattr(state, 'elite'): seeds = state.elite.seeds iteration = state.it print("Loading GA snapshot from elite, iteration: {} / {}", len(seeds), iteration) else: seeds = None iteration = state.it print("Loading ES snapshot, iteration: {}".format(state.it)) if outfile: pass fourcc = cv.VideoWriter_fourcc(*'MJPG') outvid = cv.VideoWriter(outfile, fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE)) env = gym_tensorflow.make(game, 1) model = get_model(model_name) obs_op = env.observation() reset_op = env.reset() if model.requires_ref_batch: def make_env(b): return gym_tensorflow.make(game=game, batch_size=1) with tf.Session() as sess: ref_batch = gym_tensorflow.get_ref_batch(make_env, sess, 128) ref_batch = ref_batch[:, ...] else: ref_batch = None action_op = model.make_net(tf.expand_dims(obs_op, axis=1), env.action_space, batch_size=1, ref_batch=ref_batch) if env.discrete_action: action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32) rew_op, done_op = env.step(action_op) if not no_video: from gym.envs.classic_control import rendering viewer = rendering.SimpleImageViewer() if hasattr(env.unwrapped, 'render'): obs_op = env.unwrapped.render() def display_obs(im): # pdb.set_trace() if im.shape[1] > 1: im = np.bitwise_or(im[0, 0, ...], im[0, 1, ...]) else: im = im[0, 0, ...] handle_frame(im, outvid, viewer, game, iteration, add_text) else: def display_obs(im): pdb.set_trace() im = im[0, :, :, -1] im = np.stack([im] * 3, axis=-1) im = (im * 255).astype(np.uint8) handle_frame(im, outvid, viewer, game, iteration, add_text) rewards = [] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) model.initialize() tlogger.info(model.description) import pdb pdb.set_trace() if seeds: noise = SharedNoiseTable() weights = model.compute_weights_from_seeds(noise, seeds) model.load(sess, 0, weights, seeds) else: weights = state.theta model.load(sess, 0, weights, (weights, 0)) if graph: saver.save(sess, graph) for i in range(num_runs): sess.run(reset_op) sess.run(obs_op) #recorder.capture_frame() display_obs(sess.run(obs_op)) total_rew = 0 num_frames = 0 while True: rew, done = sess.run([rew_op, done_op]) num_frames += 1 total_rew += rew[0] display_obs(sess.run(obs_op)) time.sleep(4 / 60) if done[0]: break rewards += [total_rew] print('Final reward: ', total_rew, 'after', num_frames, 'steps') print(rewards) print("Mean: ", np.mean(rewards)) print("Std: ", np.std(rewards)) if outvid: outvid.release()
def main(): env = gym_tensorflow.make(game, 1) # loading frozen model model = lucid_model() model.model_path = LOGDIR + file_name + '.pb' model.image_shape = image_shape model.input_scale = 1.0 model.image_value_range = (0, 1) model.input_name = 'X_t' model.ph_type = 'float32' model.layers = layers = [ # names here must correlate to the frozens models layer names { 'type': 'conv', 'name': 'ga/Relu', 'size': 32 }, { 'type': 'conv', 'name': 'ga/Relu_1', 'size': 64 }, { 'type': 'conv', 'name': 'ga/Relu_2', 'size': 64 }, { 'type': 'dense', 'name': 'ga/Relu_3', 'size': 512 }, { 'type': 'dense', 'name': 'ga/Reshape_1', 'size': 18 } ] model.load_graphdef() model.save(LOGDIR + file_name + "2.pb") # ---------------------------------------------------------------- obs_op = env.observation() reset_op = env.reset() T = import_model(model, obs_op, obs_op) action_op = T(model.layers[-1]['name']) # ---------------------------------------------------------------- if env.discrete_action: action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32) rew_op, done_op = env.step(action_op) # viewer = rendering.SimpleImageViewer() if hasattr(env.unwrapped, 'render'): obs_op = env.unwrapped.render() def display_obs(im): im = im[0, 0, ...] # viewer.imshow(im) else: def display_obs(im): im = im[0, :, :, -1] im = np.stack([im] * 3, axis=-1) im = (im * 255).astype(np.uint8) im = np.array(Image.fromarray(im).resize((256, 256), resample=Image.BILINEAR), dtype=np.uint8) # viewer.imshow(im) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(reset_op) display_obs(sess.run(obs_op)) # for debugging purposes create graph of frozen model # train_writer = tf.summary.FileWriter(LOGDIR) # train_writer.add_graph(sess.graph.as_graph_def()) total_rew = 0 num_frames = 0 # get intermediate level representations activations = [T(layer['name']) for layer in model.layers] print(activations) high_level_rep = activations[-2] #not output layer, but layer before sample_observations = [] #(84, 84, 4) sample_frames = [] #(210, 160, 3) sample_ram = [] #(128,) sample_representation = [] #(1, 512) sample_score = [] rewards = [] # run through each frame a record; observations, frames, ram, and representation while True: rew, done = sess.run([rew_op, done_op]) num_frames += 1 total_rew += rew[0] obs = sess.run(obs_op) display_obs(obs) # sample_observations wrapped = sess.run(env.observation()) wrapped = np.reshape( wrapped, (wrapped.shape[1], wrapped.shape[2], wrapped.shape[3])) sample_observations.append(wrapped) # sample_frames frame = obs[0, 0, :, :, :, ] sample_frames.append(frame) # sample_ram # sample_ram.append(env.unwrapped._get_ram()) # sample_representation representation = sess.run(high_level_rep) representation = np.reshape( representation, (representation.shape[1], representation.shape[2])) sample_representation.append(representation) time.sleep(1 / 240) # when complete, write to file if done[0]: print('Final reward: ', total_rew, 'after', num_frames, 'steps') results = { 'observations': sample_observations, 'frames': sample_frames, 'ram': sample_ram, 'representation': sample_representation, 'score': sample_score, 'ep_rewards': rewards } np.savez_compressed(LOGDIR + file_name + "_rollout", **results) break
import gym_tensorflow from pathlib import Path gamelist = """ adventure.bin air_raid.bin alien.bin amidar.bin assault.bin asterix.bin asteroids.bin atlantis.bin bank_heist.bin battle_zone.bin beam_rider.bin berzerk.bin bowling.bin boxing.bin breakout.bin carnival.bin centipede.bin chopper_command.bin crazy_climber.bin defender.bin demon_attack.bin double_dunk.bin elevator_action.bin enduro.bin fishing_derby.bin freeway.bin frostbite.bin gopher.bin gravitar.bin hero.bin ice_hockey.bin jamesbond.bin journey_escape.bin kaboom.bin kangaroo.bin krull.bin kung_fu_master.bin montezuma_revenge.bin ms_pacman.bin name_this_game.bin phoenix.bin pitfall.bin pong.bin pooyan.bin private_eye.bin qbert.bin riverraid.bin road_runner.bin robotank.bin seaquest.bin skiing.bin solaris.bin space_invaders.bin star_gunner.bin tennis.bin time_pilot.bin tutankham.bin up_n_down.bin venture.bin video_pinball.bin wizard_of_wor.bin yars_revenge.bin zaxxon.bin """ games = [g.replace('.bin', '') for g in gamelist.split()] skip_games = ['adventure', 'defender', 'kaboom'] games = set(games) - set(skip_games) for game in games: env = gym_tensorflow.make(game, batch_size=8) print(game, env.action_space) #env = gym.make('CartPole-v0') #env.reset() #for _ in range(1000): # env.render() # env.step(env.action_space.sample()) # take a random action #env.close()
def main(game, filename=None, out_dir=None, model_name='LargeModel', add_text=False, num_runs=RUNS, layer=None): seeds = default_seeds outvid = None viewer = None iteration = None state = None if filename: with open(filename, 'rb+') as file: state = pickle.load(file) #if hasattr(state, 'best_score'): # seeds = state.best_score.seeds # iteration = len(seeds) # print("Loading GA snapshot from best_score, iteration: ", len(seeds)) if hasattr(state, 'elite'): seeds = state.elite.seeds iteration = state.it print("Loading GA snapshot from elite, iteration: {} / {}".format(len(seeds), iteration)) else: seeds = None iteration = state.it print("Loading ES snapshot, iteration: {}", state.it) fourcc = cv.VideoWriter_fourcc(*'H264') env = gym_tensorflow.make(game, 1) model = get_model(model_name) obs_op = env.observation() reset_op = env.reset() if model.requires_ref_batch: def make_env(b): return gym_tensorflow.make(game=game, batch_size=1) with tf.Session() as sess: ref_batch = gym_tensorflow.get_ref_batch(make_env, sess, 128) ref_batch = ref_batch[:, ...] else: ref_batch = None input_op = tf.expand_dims(obs_op, axis=1) action_op = model.make_net(input_op, env.action_space, batch_size=1, ref_batch=ref_batch) if env.discrete_action: action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32) rew_op, done_op = env.step(action_op) out_vids = {'all': cv.VideoWriter(os.path.join(out_dir, 'all.mp4'), fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE))} if hasattr(env.unwrapped, 'render'): obs_op = env.unwrapped.render() def display_obs(im, viz): # pdb.set_trace() if im.shape[1] > 1: im = np.bitwise_or(im[0, 0, ...], im[0, 1, ...]) else: im = im[0, 0, ...] for key in out_vids.keys(): im = combine_viz(im, viz, key) handle_frame(im, out_vids[key], viewer, game, iteration, add_text) else: def display_obs(im, viz): im = im[0, :, :, -1] im = np.stack([im] * 3, axis=-1) im = (im * 255).astype(np.uint8) for key in out_vids.keys(): im = combine_viz(im, viz, key) handle_frame(im, out_vids[key], viewer, game, iteration, add_text) rewards = [] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) model.initialize() tlogger.info(model.description) if seeds: noise = SharedNoiseTable() weights = model.compute_weights_from_seeds(noise, seeds) model.load(sess, 0, weights, seeds) else: weights = state.theta model.load(sess, 0, weights, (weights, 0)) success, images = get_nn_images(sess, input_op, model) for key in images.keys(): out_vids[key] = cv.VideoWriter( os.path.join(out_dir, '{}.mp4'.format(key.replace('/', '-'))), fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE)) for i in range(num_runs): sess.run(reset_op) # recorder.capture_frame() total_rew = 0 num_frames = 0 while True: img = sess.run(obs_op) success, images = get_nn_images(sess, input_op, model) rew, done = sess.run([rew_op, done_op]) num_frames += 1 total_rew += rew[0] display_obs(img, images) # time.sleep(4/60) if done[0] or num_frames == 50: rewards += [total_rew] print('Final reward: ', total_rew, 'after', num_frames, 'steps') break print(rewards) print("Mean: ", np.mean(rewards)) print("Std: ", np.std(rewards)) for key in out_vids: out_vids[key].release()