Ejemplo n.º 1
0
def main():
    print('Number of mutations:', len(seeds))

    env = gym_tensorflow.make(game, 1)

    model = Model()
    obs_op = env.observation()
    reset_op = env.reset()

    action_op = model.make_net(tf.expand_dims(obs_op, axis=1), env.action_space, batch_size=1)
    if env.discrete_action:
        action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32)
    rew_op, done_op = env.step(action_op)

    from gym.envs.classic_control import rendering
    viewer = rendering.SimpleImageViewer()
    if hasattr(env.unwrapped, 'render'):
        obs_op = env.unwrapped.render()
        def display_obs(im):
            im = im[0, 0, ...]

            viewer.imshow(im)
    else:
        def display_obs(im):
            im = im[0, :, :, -1]
            im = np.stack([im] * 3, axis=-1)
            im = (im * 255).astype(np.uint8)

            im = np.array(Image.fromarray(im).resize((256, 256), resample=Image.BILINEAR), dtype=np.uint8)
            viewer.imshow(im)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        model.initialize()
        tlogger.info(model.description)

        noise = SharedNoiseTable()

        weights = model.compute_weights_from_seeds(noise, seeds)
        model.load(sess, 0, weights, seeds)

        sess.run(reset_op)
        display_obs(sess.run(obs_op))

        total_rew = 0
        num_frames = 0
        while True:
            rew, done = sess.run([rew_op, done_op])
            num_frames += 1
            total_rew += rew[0]
            display_obs(sess.run(obs_op))
            time.sleep(4/60)
            if done[0]:
                print('Final reward: ', total_rew, 'after', num_frames, 'steps')
                break
Ejemplo n.º 2
0
def main(env, num_timesteps=int(10e6), dueling=True, **kwargs):
    env_f = lambda batch_size: gym_tensorflow.make(env, batch_size=batch_size)
    model = models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[512],
        dueling=bool(dueling),
    )
    act = learn(env_f,
                q_func=model,
                max_timesteps=int(num_timesteps),
                dueling=True,
                **kwargs)
 def make_env(b):
     tlogger.info('GA: Creating environment for game: %s' % config["game"])
     return gym_tensorflow.make(game=config["game"], batch_size=b)
Ejemplo n.º 4
0
 def make_env(b):
     return gym_tensorflow.make(game=exp["game"], batch_size=b)
Ejemplo n.º 5
0
 def make_env_game1(b):
     return gym_tensorflow.make(game=exp['games'][1], batch_size=b)
Ejemplo n.º 6
0
 def make_env(b):
     return gym_tensorflow.make(game=game, batch_size=1)
Ejemplo n.º 7
0
def main(game,
         filename=None,
         outfile=None,
         model_name="LargeModel",
         no_video=False,
         add_text=False,
         num_runs=RUNS,
         graph=None):

    seeds = default_seeds
    outvid = None
    viewer = None
    iteration = None
    state = None

    if filename:
        with open(filename, 'rb+') as file:
            state = pickle.load(file)
            #if hasattr(state, 'best_score'):
            #    seeds = state.best_score.seeds
            #    iteration = len(seeds)
            #    print("Loading GA snapshot from best_score, iteration: ", len(seeds))
            if hasattr(state, 'elite'):
                seeds = state.elite.seeds
                iteration = state.it
                print("Loading GA snapshot from elite, iteration: {} / {}",
                      len(seeds), iteration)
            else:
                seeds = None
                iteration = state.it
                print("Loading ES snapshot, iteration: {}".format(state.it))

    if outfile:
        pass
        fourcc = cv.VideoWriter_fourcc(*'MJPG')
        outvid = cv.VideoWriter(outfile, fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE))

    env = gym_tensorflow.make(game, 1)

    model = get_model(model_name)
    obs_op = env.observation()
    reset_op = env.reset()

    if model.requires_ref_batch:

        def make_env(b):
            return gym_tensorflow.make(game=game, batch_size=1)

        with tf.Session() as sess:
            ref_batch = gym_tensorflow.get_ref_batch(make_env, sess, 128)
            ref_batch = ref_batch[:, ...]
    else:
        ref_batch = None

    action_op = model.make_net(tf.expand_dims(obs_op, axis=1),
                               env.action_space,
                               batch_size=1,
                               ref_batch=ref_batch)
    if env.discrete_action:
        action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32)
    rew_op, done_op = env.step(action_op)

    if not no_video:
        from gym.envs.classic_control import rendering
        viewer = rendering.SimpleImageViewer()

    if hasattr(env.unwrapped, 'render'):
        obs_op = env.unwrapped.render()

        def display_obs(im):
            # pdb.set_trace()
            if im.shape[1] > 1:
                im = np.bitwise_or(im[0, 0, ...], im[0, 1, ...])
            else:
                im = im[0, 0, ...]
            handle_frame(im, outvid, viewer, game, iteration, add_text)
    else:

        def display_obs(im):
            pdb.set_trace()
            im = im[0, :, :, -1]
            im = np.stack([im] * 3, axis=-1)
            im = (im * 255).astype(np.uint8)
            handle_frame(im, outvid, viewer, game, iteration, add_text)

    rewards = []

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        model.initialize()
        tlogger.info(model.description)

        import pdb
        pdb.set_trace()
        if seeds:
            noise = SharedNoiseTable()
            weights = model.compute_weights_from_seeds(noise, seeds)
            model.load(sess, 0, weights, seeds)
        else:
            weights = state.theta
            model.load(sess, 0, weights, (weights, 0))

        if graph:
            saver.save(sess, graph)

        for i in range(num_runs):
            sess.run(reset_op)
            sess.run(obs_op)
            #recorder.capture_frame()
            display_obs(sess.run(obs_op))

            total_rew = 0
            num_frames = 0
            while True:
                rew, done = sess.run([rew_op, done_op])
                num_frames += 1
                total_rew += rew[0]
                display_obs(sess.run(obs_op))
                time.sleep(4 / 60)
                if done[0]:
                    break

            rewards += [total_rew]
            print('Final reward: ', total_rew, 'after', num_frames, 'steps')

    print(rewards)
    print("Mean: ", np.mean(rewards))
    print("Std: ", np.std(rewards))

    if outvid:
        outvid.release()
Ejemplo n.º 8
0
def main():
    env = gym_tensorflow.make(game, 1)

    # loading frozen model
    model = lucid_model()
    model.model_path = LOGDIR + file_name + '.pb'
    model.image_shape = image_shape
    model.input_scale = 1.0
    model.image_value_range = (0, 1)
    model.input_name = 'X_t'
    model.ph_type = 'float32'
    model.layers = layers = [  # names here must correlate to the frozens models layer names
        {
            'type': 'conv',
            'name': 'ga/Relu',
            'size': 32
        }, {
            'type': 'conv',
            'name': 'ga/Relu_1',
            'size': 64
        }, {
            'type': 'conv',
            'name': 'ga/Relu_2',
            'size': 64
        }, {
            'type': 'dense',
            'name': 'ga/Relu_3',
            'size': 512
        }, {
            'type': 'dense',
            'name': 'ga/Reshape_1',
            'size': 18
        }
    ]
    model.load_graphdef()
    model.save(LOGDIR + file_name + "2.pb")
    # ----------------------------------------------------------------
    obs_op = env.observation()
    reset_op = env.reset()

    T = import_model(model, obs_op, obs_op)
    action_op = T(model.layers[-1]['name'])
    # ----------------------------------------------------------------

    if env.discrete_action:
        action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32)
    rew_op, done_op = env.step(action_op)

    # viewer = rendering.SimpleImageViewer()
    if hasattr(env.unwrapped, 'render'):
        obs_op = env.unwrapped.render()

        def display_obs(im):
            im = im[0, 0, ...]
            # viewer.imshow(im)
    else:

        def display_obs(im):
            im = im[0, :, :, -1]
            im = np.stack([im] * 3, axis=-1)
            im = (im * 255).astype(np.uint8)

            im = np.array(Image.fromarray(im).resize((256, 256),
                                                     resample=Image.BILINEAR),
                          dtype=np.uint8)
            # viewer.imshow(im)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        sess.run(reset_op)
        display_obs(sess.run(obs_op))

        # for debugging purposes create graph of frozen model
        # train_writer = tf.summary.FileWriter(LOGDIR)
        # train_writer.add_graph(sess.graph.as_graph_def())

        total_rew = 0
        num_frames = 0

        # get intermediate level representations
        activations = [T(layer['name']) for layer in model.layers]
        print(activations)
        high_level_rep = activations[-2]  #not output layer, but layer before

        sample_observations = []  #(84, 84, 4)
        sample_frames = []  #(210, 160, 3)
        sample_ram = []  #(128,)
        sample_representation = []  #(1, 512)
        sample_score = []
        rewards = []

        # run through each frame a record; observations, frames, ram, and representation
        while True:
            rew, done = sess.run([rew_op, done_op])
            num_frames += 1
            total_rew += rew[0]
            obs = sess.run(obs_op)
            display_obs(obs)

            # sample_observations
            wrapped = sess.run(env.observation())
            wrapped = np.reshape(
                wrapped,
                (wrapped.shape[1], wrapped.shape[2], wrapped.shape[3]))
            sample_observations.append(wrapped)

            # sample_frames
            frame = obs[0, 0, :, :, :, ]
            sample_frames.append(frame)

            # sample_ram
            # sample_ram.append(env.unwrapped._get_ram())

            # sample_representation
            representation = sess.run(high_level_rep)
            representation = np.reshape(
                representation,
                (representation.shape[1], representation.shape[2]))
            sample_representation.append(representation)

            time.sleep(1 / 240)

            # when complete, write to file
            if done[0]:
                print('Final reward: ', total_rew, 'after', num_frames,
                      'steps')
                results = {
                    'observations': sample_observations,
                    'frames': sample_frames,
                    'ram': sample_ram,
                    'representation': sample_representation,
                    'score': sample_score,
                    'ep_rewards': rewards
                }
                np.savez_compressed(LOGDIR + file_name + "_rollout", **results)
                break
Ejemplo n.º 9
0
import gym_tensorflow
from pathlib import Path

gamelist = """
adventure.bin air_raid.bin alien.bin amidar.bin assault.bin asterix.bin asteroids.bin atlantis.bin bank_heist.bin battle_zone.bin beam_rider.bin berzerk.bin bowling.bin boxing.bin breakout.bin carnival.bin centipede.bin chopper_command.bin crazy_climber.bin defender.bin demon_attack.bin double_dunk.bin elevator_action.bin enduro.bin fishing_derby.bin freeway.bin frostbite.bin gopher.bin gravitar.bin hero.bin ice_hockey.bin jamesbond.bin journey_escape.bin kaboom.bin kangaroo.bin krull.bin kung_fu_master.bin montezuma_revenge.bin ms_pacman.bin name_this_game.bin phoenix.bin pitfall.bin pong.bin pooyan.bin private_eye.bin qbert.bin riverraid.bin road_runner.bin robotank.bin seaquest.bin skiing.bin solaris.bin space_invaders.bin star_gunner.bin tennis.bin time_pilot.bin tutankham.bin up_n_down.bin venture.bin video_pinball.bin wizard_of_wor.bin yars_revenge.bin zaxxon.bin
"""

games = [g.replace('.bin', '') for g in gamelist.split()]
skip_games = ['adventure', 'defender', 'kaboom']
games = set(games) - set(skip_games)

for game in games:
    env = gym_tensorflow.make(game, batch_size=8)
    print(game, env.action_space)
#env = gym.make('CartPole-v0')
#env.reset()
#for _ in range(1000):
#    env.render()
#    env.step(env.action_space.sample()) # take a random action
#env.close()
Ejemplo n.º 10
0
def main(game, filename=None, out_dir=None, model_name='LargeModel',
         add_text=False, num_runs=RUNS, layer=None):

    seeds = default_seeds
    outvid = None
    viewer = None
    iteration = None
    state = None

    if filename:
        with open(filename, 'rb+') as file:
            state = pickle.load(file)
            #if hasattr(state, 'best_score'):
            #    seeds = state.best_score.seeds
            #    iteration = len(seeds)
            #    print("Loading GA snapshot from best_score, iteration: ", len(seeds))
            if hasattr(state, 'elite'):
                seeds = state.elite.seeds
                iteration = state.it
                print("Loading GA snapshot from elite, iteration: {} / {}".format(len(seeds), iteration))
            else:
                seeds = None
                iteration = state.it
                print("Loading ES snapshot, iteration: {}", state.it)

    fourcc = cv.VideoWriter_fourcc(*'H264')

    env = gym_tensorflow.make(game, 1)

    model = get_model(model_name)
    obs_op = env.observation()
    reset_op = env.reset()

    if model.requires_ref_batch:
        def make_env(b):
            return gym_tensorflow.make(game=game, batch_size=1)
        with tf.Session() as sess:
            ref_batch = gym_tensorflow.get_ref_batch(make_env, sess, 128)
            ref_batch = ref_batch[:, ...]
    else:
        ref_batch = None

    input_op = tf.expand_dims(obs_op, axis=1)
    action_op = model.make_net(input_op, env.action_space, batch_size=1, ref_batch=ref_batch)
    if env.discrete_action:
        action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32)
    rew_op, done_op = env.step(action_op)

    out_vids = {'all': cv.VideoWriter(os.path.join(out_dir, 'all.mp4'),
                                      fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE))}

    if hasattr(env.unwrapped, 'render'):
        obs_op = env.unwrapped.render()

        def display_obs(im, viz):
            # pdb.set_trace()
            if im.shape[1] > 1:
                im = np.bitwise_or(im[0, 0, ...], im[0, 1, ...])
            else:
                im = im[0, 0, ...]
            for key in out_vids.keys():
                im = combine_viz(im, viz, key)
                handle_frame(im, out_vids[key], viewer, game, iteration, add_text)
    else:
        def display_obs(im, viz):
            im = im[0, :, :, -1]
            im = np.stack([im] * 3, axis=-1)
            im = (im * 255).astype(np.uint8)
            for key in out_vids.keys():
                im = combine_viz(im, viz, key)
                handle_frame(im, out_vids[key], viewer, game, iteration, add_text)

    rewards = []

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        model.initialize()
        tlogger.info(model.description)

        if seeds:
            noise = SharedNoiseTable()
            weights = model.compute_weights_from_seeds(noise, seeds)
            model.load(sess, 0, weights, seeds)
        else:
            weights = state.theta
            model.load(sess, 0, weights, (weights, 0))

        success, images = get_nn_images(sess, input_op, model)

        for key in images.keys():
            out_vids[key] = cv.VideoWriter(
                os.path.join(out_dir, '{}.mp4'.format(key.replace('/', '-'))),
                fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE))

        for i in range(num_runs):
            sess.run(reset_op)
            # recorder.capture_frame()

            total_rew = 0
            num_frames = 0
            while True:
                img = sess.run(obs_op)
                success, images = get_nn_images(sess, input_op, model)

                rew, done = sess.run([rew_op, done_op])
                num_frames += 1
                total_rew += rew[0]
                display_obs(img, images)
                # time.sleep(4/60)
                if done[0] or num_frames == 50:
                    rewards += [total_rew]
                    print('Final reward: ', total_rew, 'after', num_frames, 'steps')
                    break

    print(rewards)
    print("Mean: ", np.mean(rewards))
    print("Std: ", np.std(rewards))

    for key in out_vids:
        out_vids[key].release()