コード例 #1
0
    def create_saliency(model_idx, sess):
        graph = tf.get_default_graph()
        env = utils.make_general_env(1)
        env = wrappers.add_final_wrappers(env)
        agent = create_act_model(sess, env, 1)
        action_selector = tf.placeholder(tf.int32)
        gradient_saliency = saliency.GradientSaliency(graph, sess, agent.pd.logits[0][action_selector], agent.X)
        sess.run(tf.compat.v1.global_variables_initializer())

        # setup_utils.restore_file(models[model_idx])
        try:
            loaded_params = utils.load_params_for_scope(sess, 'model')
            if not loaded_params:
                print('NO SAVED PARAMS LOADED')
        except AssertionError as e:
            models[model_idx] = None
            return [None]*3
        return agent, gradient_saliency, action_selector
コード例 #2
0
ファイル: test_agent.py プロジェクト: Desein-Yang/GARL
def test(sess, load_path, env, should_render=False, rep_count=Config.REP):
    rank = MPI.COMM_WORLD.Get_rank()
    size = MPI.COMM_WORLD.Get_size()

    should_eval = Config.TRAIN_EVAL or Config.TEST_EVAL
    if should_eval:
        #env = utils.make_general_env(Config.NUM_EVAL)
        should_render = False
    else:
        env = utils.make_general_env(1)

    env = wrappers.add_final_wrappers(env)

    if should_render:
        from gym.envs.classic_control import rendering

    nenvs = env.num_envs

    model = load_model(sess, filename)

    agent = create_act_model(sess, env, nenvs)

    sess.run(tf.global_variables_initializer())
    loaded_params = utils.load_params_for_scope(sess, 'model')

    if not loaded_params:
        print('NO SAVED PARAMS LOADED')

    obs = env.reset()
    t_step = 0

    if should_render:
        viewer = rendering.SimpleImageViewer()

    should_render_obs = not Config.IS_HIGH_RES

    def maybe_render(info=None):
        if should_render and not should_render_obs:
            env.render()

    maybe_render()

    scores = np.array([0] * nenvs)
    score_counts = np.array([0] * nenvs)
    curr_rews = np.zeros((nenvs, 3))

    def should_continue():
        if should_eval:
            return np.sum(score_counts) < rep_count * nenvs

        return True

    state = agent.initial_state
    done = np.zeros(nenvs)

    while should_continue():
        action, values, state, _ = agent.step(obs, state, done)
        obs, rew, done, info = env.step(action)

        if should_render and should_render_obs:
            if np.shape(obs)[-1] % 3 == 0:
                ob_frame = obs[0, :, :, -3:]
            else:
                ob_frame = obs[0, :, :, -1]
                ob_frame = np.stack([ob_frame] * 3, axis=2)
            viewer.imshow(ob_frame)

        curr_rews[:, 0] += rew

        for i, d in enumerate(done):
            if d:
                if score_counts[i] < rep_count:
                    score_counts[i] += 1

                    if 'episode' in info[i]:
                        scores[i] += info[i].get('episode')['r']

        if t_step % 100 == 0:
            mpi_print('t', t_step, values[0], done[0], rew[0], curr_rews[0],
                      np.shape(obs))

        maybe_render(info[0])

        t_step += 1

        if should_render:
            time.sleep(.02)

        if done[0]:
            if should_render:
                mpi_print('ep_rew', curr_rews)

            curr_rews[:] = 0

    result = {
        'steps_elapsed': steps_elapsed,
    }

    if should_eval:
        testset_size = rep_count * nenvs
        mean_score = np.sum(scores) / testset_size
        succ_rate = np.sum(scores == 10.0) / testset_size
        max_idx = np.argmax(scores)
        mpi_print('max idx', max_idx)
        mpi_print('steps_elapsed', steps_elapsed)
        if size > 1:
            mean_score = utils.mpi_average([mean_score])
        mpi_print('mpi_mean', mpi_mean_score)
        wandb.log({'Test_Rew_mean': mean_score, 'Test_Succ_rate': succ_rate})
        result['scores'] = scores
        result['testset_size'] = testset_size
        result['test_rew_mean'] = mean_score
        result['test_succ_rate'] = succ_rate

    return result
コード例 #3
0
def enjoy_env_sess(sess):
    should_render = True
    should_eval = Config.TRAIN_EVAL or Config.TEST_EVAL
    rep_count = Config.REP

    if should_eval:
        env = utils.make_general_env(Config.NUM_EVAL)
        should_render = False
    else:
        env = utils.make_general_env(1)

    env = wrappers.add_final_wrappers(env)

    if should_render:
        from gym.envs.classic_control import rendering

    nenvs = env.num_envs

    agent = create_act_model(sess, env, nenvs)

    sess.run(tf.global_variables_initializer())
    loaded_params = utils.load_params_for_scope(sess, 'model')

    if not loaded_params:
        print('NO SAVED PARAMS LOADED')

    obs = env.reset()
    t_step = 0

    if should_render:
        viewer = rendering.SimpleImageViewer()

    should_render_obs = not Config.IS_HIGH_RES

    def maybe_render(info=None):
        if should_render and not should_render_obs:
            env.render()

    maybe_render()

    scores = np.array([0] * nenvs)
    score_counts = np.array([0] * nenvs)
    curr_rews = np.zeros((nenvs, 3))

    def should_continue():
        if should_eval:
            return np.sum(score_counts) < rep_count * nenvs

        return True

    state = agent.initial_state
    done = np.zeros(nenvs)

    while should_continue():
        action, values, state, _ = agent.step(obs, state, done)
        obs, rew, done, info = env.step(action)

        if should_render and should_render_obs:
            if np.shape(obs)[-1] % 3 == 0:
                ob_frame = obs[0, :, :, -3:]
            else:
                ob_frame = obs[0, :, :, -1]
                ob_frame = np.stack([ob_frame] * 3, axis=2)
            viewer.imshow(ob_frame)

        curr_rews[:, 0] += rew

        for i, d in enumerate(done):
            if d:
                if score_counts[i] < rep_count:
                    score_counts[i] += 1

                    if 'episode' in info[i]:
                        scores[i] += info[i].get('episode')['r']

        if t_step % 100 == 0:
            mpi_print('t', t_step, values[0], done[0], rew[0], curr_rews[0],
                      np.shape(obs))

        maybe_render(info[0])

        t_step += 1

        if should_render:
            time.sleep(.02)

        if done[0]:
            if should_render:
                mpi_print('ep_rew', curr_rews)

            curr_rews[:] = 0

    result = 0

    if should_eval:
        mean_score = np.mean(scores) / rep_count
        max_idx = np.argmax(scores)
        mpi_print('scores', scores / rep_count)
        print('mean_score', mean_score)
        mpi_print('max idx', max_idx)

        mpi_mean_score = utils.mpi_average([mean_score])
        mpi_print('mpi_mean', mpi_mean_score)

        result = mean_score

    return result
コード例 #4
0
def enjoy_env_sess(sess, DIR_NAME):
    should_render = True
    should_eval = Config.TRAIN_EVAL or Config.TEST_EVAL
    rep_count = Config.REP

    file_name = '%s/%s.txt' % (DIR_NAME, Config.RESTORE_ID)
    f_io = open(file_name, 'a')

    if should_eval:
        if Config.TEST_NUM_EVAL > -1:
            env = utils.make_general_env(Config.TEST_NUM_EVAL)
        else:
            env = utils.make_general_env(Config.NUM_EVAL)
        should_render = False
    else:
        env = utils.make_general_env(1)

    env = wrappers.add_final_wrappers(env)

    if should_render:
        from gym.envs.classic_control import rendering

    nenvs = env.num_envs

    vae = ConvVAE(z_size=Config.VAE_Z_SIZE,
                  batch_size=nenvs,
                  is_training=False,
                  reuse=False,
                  gpu_mode=True,
                  use_coord_conv=True)
    agent = create_act_model(sess, env, nenvs, Config.VAE_Z_SIZE)
    num_actions = env.action_space.n

    init_rand = tf.variables_initializer(
        [v for v in tf.global_variables() if 'randcnn' in v.name])
    sess.run(tf.compat.v1.global_variables_initializer())

    soft_numpy = tf.placeholder(tf.float32, [nenvs, num_actions],
                                name='soft_numpy')
    dist = tfp.distributions.Categorical(probs=soft_numpy)
    sampled_action = dist.sample()

    loaded_params = utils.load_params_for_scope(sess, 'model')
    vae.load_json_full(Config.VAE_PATH)

    if not loaded_params:
        print('NO SAVED PARAMS LOADED')

    obs = env.reset()
    t_step = 0

    if should_render:
        viewer = rendering.SimpleImageViewer()

    should_render_obs = not Config.IS_HIGH_RES

    def maybe_render(info=None):
        if should_render and not should_render_obs:
            env.render()

    maybe_render()

    scores = np.array([0] * nenvs)
    score_counts = np.array([0] * nenvs)
    curr_rews = np.zeros((nenvs, 3))

    def should_continue():
        if should_eval:
            return np.sum(score_counts) < rep_count * nenvs

        return True

    state = agent.initial_state
    done = np.zeros(nenvs)

    actions = [env.action_space.sample() for _ in range(nenvs)]
    actions = np.array(actions)
    obs, _, _, _ = env.step(actions)

    sess.run(init_rand)
    while should_continue():

        #scipy.misc.imsave('raw_inputs.png', obs[0])
        encoder_in = obs.astype(np.float32) / 255.0
        batch_z = vae.encode(encoder_in)
        #reconstruct = vae.decode(batch_z)
        #scipy.misc.imsave('recon.png', reconstruct[0])

        action, values, state, _ = agent.step(batch_z, state, done)
        obs, rew, done, info = env.step(action)

        if should_render and should_render_obs:
            if np.shape(obs)[-1] % 3 == 0:
                ob_frame = obs[0, :, :, -3:]
            else:
                ob_frame = obs[0, :, :, -1]
                ob_frame = np.stack([ob_frame] * 3, axis=2)
            viewer.imshow(ob_frame)

        curr_rews[:, 0] += rew

        for i, d in enumerate(done):
            if d:
                if score_counts[i] < rep_count:
                    score_counts[i] += 1

                    if 'episode' in info[i]:
                        scores[i] += info[i].get('episode')['r']

        maybe_render(info[0])

        t_step += 1

        if should_render:
            time.sleep(.02)

        if done[0]:
            if should_render:
                mpi_print('ep_rew', curr_rews)

            curr_rews[:] = 0

    result = 0

    if should_eval:
        mean_score = np.mean(scores) / rep_count
        max_idx = np.argmax(scores)

        result = mean_score

        f_io.write("{}\n".format(result))
        f_io.close()

    return result
コード例 #5
0
def main(sess):

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    seed = int(time.time()) % 10000

    if Config.EXTRACT_SEED != -1:
        seed = Config.EXTRACT_SEED
    if Config.EXTRACT_RANK != -1:
        rank = Config.EXTRACT_RANK

    set_global_seeds(seed * 100 + rank)

    utils.setup_mpi_gpus()

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True # pylint: disable=E1101

    use_policy = (Config.RESTORE_ID != '')

    nenvs = Config.NUM_ENVS
    total_timesteps = int(502e6)
    env = utils.make_general_env(nenvs, seed=rank)

    if use_policy:
        agent = create_act_model(sess, env, nenvs)
        sess.run(tf.compat.v1.global_variables_initializer())
        loaded_params = utils.load_params_for_scope(sess, 'model')
        if not loaded_params:
            print('NO SAVED PARAMS LOADED')

    # make directory
    DIR_NAME = './VAE/records/'
    if not os.path.exists(DIR_NAME):
        os.makedirs(DIR_NAME, exist_ok=True)
    
    # set file name
    filename = DIR_NAME+"/"+Config.get_save_file()+"_"+str(seed * 100 + rank)+".npz"
    
    with tf.compat.v1.Session(config=config):
        env = wrappers.add_final_wrappers(env)
        nenv = nenv = env.num_envs if hasattr(env, 'num_envs') else 1
        obs = np.zeros((nenv,) + env.observation_space.shape, dtype=env.observation_space.dtype.name)
        obs[:] = env.reset()
        dones = [False for _ in range(nenv)]
        
        # remove noisy inputs
        actions = [env.action_space.sample() for _ in range(nenv)]
        actions = np.array(actions)
        obs[:], rewards, dones, _ = env.step(actions)
        state = agent.initial_state
        
        mb_obs, mb_rewards, mb_actions, mb_next_obs, mb_dones = [],[],[],[],[]
        # For n in range number of steps
        for _ in range(400):
            # Given observations, get action value and neglopacs
            # We already have self.obs because Runner superclass run self.obs[:] = env.reset() on init
            if use_policy:
                actions, _, _, _ = agent.step(obs, state, dones)
            else:
                actions = [env.action_space.sample() for _ in range(nenv)]
            actions = np.array(actions)
            mb_obs.append(obs.copy())
            mb_actions.append(actions)
            mb_dones.append(dones)
            
            # Take actions in env and look the results
            # Infos contains a ton of useful informations
            obs[:], rewards, dones, _ = env.step(actions)
            mb_next_obs.append(obs.copy())
            mb_rewards.append(rewards)
        #batch of steps to batch of rollouts
        mb_obs = np.asarray(mb_obs, dtype=obs.dtype)
        mb_next_obs = np.asarray(mb_next_obs, dtype=obs.dtype)
        mb_rewards = np.asarray(mb_rewards, dtype=np.float32)
        mb_actions = np.asarray(mb_actions)
        mb_dones = np.asarray(mb_dones, dtype=np.bool)
        
        #np.savez_compressed(filename, obs=mb_obs, action=mb_actions, next_obs=mb_next_obs, reward=mb_rewards, dones=mb_dones)
        np.savez_compressed(filename, obs=mb_obs)
        return filename
コード例 #6
0
def enjoy_env_sess(sess, checkpoint, overlap):
    #base_name = str(8*checkpoint)  + 'M'
    #load_file = setup_utils.restore_file(Config.RESTORE_ID,base_name=base_name)
    should_eval = True
    mpi_print('test levels seed', Config.SET_SEED)
    mpi_print('test levels ', Config.NUM_LEVELS)
    rep_count = 50

    env = utils.make_general_env(20)
    env = wrappers.add_final_wrappers(env)
    nenvs = env.num_envs

    sess.run(tf.global_variables_initializer())
    args_now = Config.get_args_dict()
    #args_run = utils.load_args()
    agent = create_act_model(sess, env, nenvs)

    # load name is specified by config.RESTORE_ID adn return True/False
    if checkpoint != 32:
        base_name = str(8 * checkpoint) + 'M'
    elif checkpoint == 0:
        mean_score = 0.0
        succ_rate = 0.0
        wandb.log({
            'Rew_mean': mean_score,
            'Succ_rate': succ_rate,
            'Step_elapsed': steps_elapsed
        })
        return mean_score, succ_rate
    else:
        base_name = None

    sess.run(tf.global_variables_initializer())
    # env init here
    load_file = setup_utils.restore_file(Config.RESTORE_ID,
                                         overlap_config=overlap,
                                         base_name=base_name)

    is_loaded = utils.load_params_for_scope(sess, 'model')
    if not is_loaded:
        mpi_print('NO SAVED PARAMS LOADED')
        return mean_score, succ_rate

    obs = env.reset()
    t_step = 0

    scores = np.zeros((nenvs, rep_count))
    eplens = np.zeros((nenvs, rep_count))
    #scores = np.array([0] * nenvs)
    score_counts = np.array([0] * nenvs)

    # curr_rews = np.zeros((nenvs, 3))

    def should_continue():
        if should_eval:
            return np.sum(score_counts) < rep_count * nenvs

        return True

    state = agent.initial_state
    done = np.zeros(nenvs)

    def rollout(obs, state, done):
        """rollout for rep * nenv times and return scores"""
        t = 0
        count = 0
        rews = np.zeros((nenvs, rep_count))
        while should_continue():
            action, values, state, _ = agent.step(obs, state, done)
            obs, rew, done, info = env.step(action)
            rews[:, count] += rew
            t += 1

            for i, d in enumerate(done):
                if d:
                    eplens[i][count] = t
                    if score_counts[i] < rep_count:
                        score_counts[i] += 1
                        count = score_counts[i] - 1
                        # aux score
                        if 'episode' in info[i]:
                            scores[i][count] = info[i].get('episode')['r']

        return scores, rews, eplens

    if is_loaded:
        mpi_print(load_file)
        scores, rews, eplens = rollout(obs, state, done)

    size = MPI.COMM_WORLD.Get_size()
    rank = MPI.COMM_WORLD.Get_rank()
    if size == 1:
        if rank == 0:
            testset_size = rep_count * nenvs
            utils.save_pickle(scores, Config.LOGDIR + 'scores')
            mean_score = np.sum(scores) / testset_size
            succ_rate = np.sum(scores == 10.0) / testset_size
            mpi_print('cpus ', size)
            mpi_print('testset size', testset_size)
            # NUM_LEVELS = 0 means unbounded set so the set size is rep_counts * nenvs
            # each one has a new seed(maybe counted)
            # mpi_print('score detail',scores.flatten())
            mpi_print('succ_rate', succ_rate)
            steps_elapsed = checkpoint * 8000000
            mpi_print('steps_elapsed:', steps_elapsed)
            mpi_print('mean score', mean_score)
            wandb.log({
                'Rew_mean': mean_score,
                'Succ_rate': succ_rate,
                'Step_elapsed': steps_elapsed
            })
            #mpi_print('mean score of each env',[np.mean(s) for s in scores])
    else:
        testset_size = rep_count * nenvs
        succ = np.sum(scores=10.0) / testset_size
        succ_rate = utils.mpi_average([succ])
        mean_score_tmp = np.sum(scores) / testset_size
        mean_score = utils.mpi_average([mean_score_tmp])
        if rank == 0:
            mpi_print('testset size', rep_count * nenvs * size)
            mpi_print('load file name', load_file)
            mpi_print('testset size', testset_size)
            # NUM_LEVELS = 0 means unbounded set so the set size is rep_counts * nenvs
            # each one has a new seed(maybe counted)
            # mpi_print('score detail',scores.flatten())
            mpi_print('succ_rate', succ_rate)
            mpi_print('mean score', mean_score)
            wandb.log({'Rew_mean': mean_score, 'Succ_rate': succ_rate})

    return mean_score, succ_rate
コード例 #7
0
def enjoy_env_sess(sess, DIR_NAME):
    should_render = True
    should_eval = Config.TRAIN_EVAL or Config.TEST_EVAL
    rep_count = Config.REP
    mpi_print = utils.mpi_print

    file_name = '%s/%s.txt' % (DIR_NAME, Config.RESTORE_ID)
    f_io = open(file_name, 'a')

    if should_eval:
        if Config.TEST_NUM_EVAL > -1:
            env = utils.make_general_env(Config.TEST_NUM_EVAL)
        else:
            env = utils.make_general_env(Config.NUM_EVAL)
        should_render = False
    else:
        env = utils.make_general_env(1)

    env = wrappers.add_final_wrappers(env)

    if should_render:
        from gym.envs.classic_control import rendering

    nenvs = env.num_envs

    agent = create_act_model(sess, env, nenvs)
    num_actions = env.action_space.n

    init_rand = tf.variables_initializer(
        [v for v in tf.global_variables() if 'randcnn' in v.name])
    sess.run(tf.compat.v1.global_variables_initializer())

    soft_numpy = tf.placeholder(tf.float32, [nenvs, num_actions],
                                name='soft_numpy')
    dist = tfp.distributions.Categorical(probs=soft_numpy)
    sampled_action = dist.sample()

    loaded_params = utils.load_params_for_scope(sess, 'model')

    if not loaded_params:
        print('NO SAVED PARAMS LOADED')

    obs = env.reset()
    t_step = 0

    if should_render:
        viewer = rendering.SimpleImageViewer()

    should_render_obs = not Config.IS_HIGH_RES

    def maybe_render(info=None):
        if should_render and not should_render_obs:
            env.render()

    maybe_render()

    scores = np.array([0] * nenvs)
    score_counts = np.array([0] * nenvs)
    curr_rews = np.zeros((nenvs, 3))

    def should_continue():
        if should_eval:
            return np.sum(score_counts) < rep_count * nenvs

        return True

    state = agent.initial_state
    done = np.zeros(nenvs)

    sess.run(init_rand)
    while should_continue():
        if Config.USE_LSTM == 8425 or Config.USE_LSTM == 1081:
            q_actions, values, state, _ = agent.step(obs, state, done)
            # e-greedy
            greedy_flag = np.random.rand(q_actions.shape[0])
            greedy_flag = greedy_flag < 0.1
            greedy_flag.astype(np.int)
            random_actions = np.random.randint(0,
                                               num_actions,
                                               size=q_actions.shape[0])
            action = random_actions * greedy_flag + (1 -
                                                     greedy_flag) * q_actions
        else:
            total_soft = agent.get_softmax(obs, state, done)
            action = sess.run([sampled_action], {soft_numpy: total_soft})
            action = action[0]
            #action, values, state, _ = agent.step(obs, state, done)

        obs, rew, done, info = env.step(action)
        #scipy.misc.imsave('raw_inputs.png', obs[0])
        #print(dd)

        if should_render and should_render_obs:
            if np.shape(obs)[-1] % 3 == 0:
                ob_frame = obs[0, :, :, -3:]
            else:
                ob_frame = obs[0, :, :, -1]
                ob_frame = np.stack([ob_frame] * 3, axis=2)
            viewer.imshow(ob_frame)

        curr_rews[:, 0] += rew

        for i, d in enumerate(done):
            if d:
                if score_counts[i] < rep_count:
                    score_counts[i] += 1

                    if 'episode' in info[i]:
                        scores[i] += info[i].get('episode')['r']

        maybe_render(info[0])

        t_step += 1

        if should_render:
            time.sleep(.02)

        if done[0]:
            if should_render:
                mpi_print('ep_rew', curr_rews)

            curr_rews[:] = 0

    result = 0

    if should_eval:
        mean_score = np.mean(scores) / rep_count
        max_idx = np.argmax(scores)

        result = mean_score

        f_io.write("{}\n".format(result))
        f_io.close()

    return result