Ejemplo n.º 1
0
def test(env_id, policy_name, seed, nstack=1, numAgents=2):
    iters = 100
    rwd = []
    percent_exp = []
    env = gym.make(env_id)
    env.seed(seed)
    print("logger dir: ", logger.get_dir())
    env = bench.Monitor(env,
                        logger.get_dir() and os.path.join(logger.get_dir()))
    if env_id == 'Pendulum-v0':
        if continuous_actions:
            env.action_space.n = env.action_space.shape[0]
        else:
            env.action_space.n = 10
    gym.logger.setLevel(logging.WARN)
    # img_shape = (84, 84, 3)
    img_shape = (84, 84, 3)
    ob_space = spaces.Box(low=0, high=255, shape=img_shape)
    ac_space = env.action_space

    # def get_img(env):
    #     ax, img = env.get_img()
    #    return ax, img

    # def process_img(img):
    #     img = rgb2grey(copy.deepcopy(img))
    #    img = resize(img, img_shape)
    #    return img

    policy_fn = policy_fn_name(policy_name)

    nsteps = 5
    total_timesteps = int(80e6)
    vf_coef = 0.5
    ent_coef = 0.01
    max_grad_norm = 0.5
    lr = 7e-4
    lrschedule = 'linear'
    epsilon = 1e-5
    alpha = 0.99
    continuous_actions = False
    debug = False
    if numAgents == 1:
        model = Model(policy=policy_fn,
                      ob_space=ob_space,
                      ac_space=ac_space,
                      nenvs=1,
                      nsteps=nsteps,
                      nstack=nstack,
                      num_procs=1,
                      ent_coef=ent_coef,
                      vf_coef=vf_coef,
                      max_grad_norm=max_grad_norm,
                      lr=lr,
                      alpha=alpha,
                      epsilon=epsilon,
                      total_timesteps=total_timesteps,
                      lrschedule=lrschedule,
                      continuous_actions=continuous_actions,
                      debug=debug)
        m_name = 'test_model_Mar7_1mil.pkl'
        model.load(m_name)
    else:
        model = []
        for i in range(numAgents):
            model.append(
                Model(policy=policy_fn,
                      ob_space=ob_space,
                      ac_space=ac_space,
                      nenvs=1,
                      nsteps=nsteps,
                      nstack=nstack,
                      num_procs=1,
                      ent_coef=ent_coef,
                      vf_coef=vf_coef,
                      max_grad_norm=max_grad_norm,
                      lr=lr,
                      alpha=alpha,
                      epsilon=epsilon,
                      total_timesteps=total_timesteps,
                      lrschedule=lrschedule,
                      continuous_actions=continuous_actions,
                      debug=debug,
                      itr=i))
        for i in range(numAgents):
            m_name = 'test_model_' + str(i) + '_300k.pkl'  # + '100k.pkl'
            model[i].load(m_name)
            print('---------------------------------------------')
            print("Successfully Loaded: ", m_name)
            print('---------------------------------------------')

    env.env, img = env.reset()
    rwd = [[], []]
    percent_exp = [[], []]
    for i in range(1, iters + 1):
        if i % 10 == 0:
            for j in range(numAgents):
                print('-----------------------------------')
                print('Agent ' + str(j))
                print('Iteration: ', i)
                avg_rwd = sum(rwd[j]) / i
                avg_pct_exp = sum(percent_exp[j]) / i
                med_pct_exp = statistics.median(percent_exp[j])
                print('Average Reward: ', avg_rwd)
                print('Average Percent Explored: ', avg_pct_exp, '%')
                print('Median Percent Explored: ', med_pct_exp)
                print('-----------------------------------')
        # ax, img = get_img(env)
        img_hist = []
        frames_dir = []
        for j in range(numAgents):
            frames_dir.append('exp_frames' + str(j * 100 + i + 200))
            if os.path.exists(frames_dir[j]):
                # raise ValueError('Frames directory already exists.')
                shutil.rmtree(frames_dir[j])
            os.makedirs(frames_dir[j])
            img_hist.append(deque([img[j] for _ in range(4)], maxlen=nstack))
        action = 0
        total_rewards = [0, 0]
        nstack = 1
        for tidx in range(1000):
            # if tidx % nstack == 0:
            for j in range(numAgents):
                if tidx > 0:
                    input_imgs = np.expand_dims(
                        np.squeeze(np.stack(img_hist, -1)), 0)
                    # print(np.shape(input_imgs))
                    # plt.imshow(input_imgs[0, :, :, 0])
                    # plt.imshow(input_imgs[0, :, :, 1])
                    # plt.draw()
                    # plt.pause(0.000001)
                    if input_imgs.shape == (1, 84, 84, 3):
                        actions, values, states = model[j].step_model.step(
                            input_imgs)
                    else:
                        actions, values, states = model[j].step_model.step(
                            input_imgs[:, :, :, :, 0])
                    # actions, values, states = model.step_model.step(input_imgs)
                    action = actions[0]
                    value = values[0]
                    # print('Value: ', value, '   Action: ', action)

                img, reward, done, _ = env.step(action, j)
                total_rewards[j] += reward
                # img = get_img(env)
                img_hist[j].append(img[j])
                imsave(
                    os.path.join(frames_dir[j],
                                 'frame_{:04d}.png'.format(tidx)),
                    resize(img[j], (img_shape[0], img_shape[1], 3)))
            # print(tidx, '\tAction: ', action, '\tValues: ', value, '\tRewards: ', reward, '\tTotal rewards: ', total_rewards)#, flush=True)
            if done:
                # print('Faultered at tidx: ', tidx)
                for j in range(numAgents):
                    rwd[j].append(total_rewards[j])
                    percent_exp[j].append(env.env.percent_explored[j])
                # env.env, img = env.reset()
                break
    for i in range(numAgents):
        print('-----------------------------------')
        print('Agent ' + str(i))
        print('Iteration: ', iters)
        avg_rwd = sum(rwd[i]) / iters
        avg_pct_exp = sum(percent_exp[i]) / iters
        med_pct_exp = statistics.median(percent_exp[i])
        print('Average Reward: ', avg_rwd)
        print('Average Percent Explored: ', avg_pct_exp, '%')
        print('Median Percent Explored: ', med_pct_exp)
        print('-----------------------------------')
Ejemplo n.º 2
0
def test(env_id, policy_name, seed, nstack=1, numAgents=2, benchmark=False):
    iters = 100
    rwd = []
    percent_exp = []
    env = EnvVec([
        make_env(env_id, benchmark=benchmark, rank=idx, seed=seed)
        for idx in range(1)
    ],
                 particleEnv=True,
                 test=True)
    # print(env_id)
    # print("logger dir: ", logger.get_dir())
    # env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir()))
    if env_id == 'Pendulum-v0':
        if continuous_actions:
            env.action_space.n = env.action_space.shape[0]
        else:
            env.action_space.n = 10
    gym.logger.setLevel(logging.WARN)
    ob_space = env.observation_space
    ac_space = env.action_space

    # def get_img(env):
    #     ax, img = env.get_img()
    #    return ax, img

    # def process_img(img):
    #     img = rgb2grey(copy.deepcopy(img))
    #    img = resize(img, img_shape)
    #    return img

    policy_fn = policy_fn_name(policy_name)

    nsteps = 5
    total_timesteps = int(80e6)
    vf_coef = 0.9
    ent_coef = 0.01
    max_grad_norm = 0.5
    lr = 7e-4
    lrschedule = 'linear'
    epsilon = 1e-5
    alpha = 0.99
    continuous_actions = False
    debug = False
    if numAgents == 1:
        model = Model(policy=policy_fn,
                      ob_space=ob_space,
                      ac_space=ac_space,
                      nenvs=1,
                      nsteps=nsteps,
                      nstack=nstack,
                      num_procs=1,
                      ent_coef=ent_coef,
                      vf_coef=vf_coef,
                      max_grad_norm=max_grad_norm,
                      lr=lr,
                      alpha=alpha,
                      epsilon=epsilon,
                      total_timesteps=total_timesteps,
                      lrschedule=lrschedule,
                      continuous_actions=continuous_actions,
                      debug=debug)
        m_name = 'test_model_Mar7_1mil.pkl'
        model.load(m_name)
    else:
        model = []
        for i in range(numAgents):
            model.append(
                Model(policy=policy_fn,
                      ob_space=ob_space,
                      ac_space=ac_space,
                      nenvs=1,
                      nsteps=nsteps,
                      nstack=nstack,
                      num_procs=1,
                      ent_coef=ent_coef,
                      vf_coef=vf_coef,
                      max_grad_norm=max_grad_norm,
                      lr=lr,
                      alpha=alpha,
                      epsilon=epsilon,
                      total_timesteps=total_timesteps,
                      lrschedule=lrschedule,
                      continuous_actions=continuous_actions,
                      debug=debug,
                      itr=i,
                      particleEnv=True))
        for i in range(numAgents):
            m_name = 'partEnv_model_' + str(i) + '.pkl'
            model[i].load(m_name)
            print('---------------------------------------------')
            print("Successfully Loaded: ", m_name)
            print('---------------------------------------------')

    obs = env.reset()
    states = [[], []]
    dones = [False, False]
    rwd = [[], []]
    percent_exp = [[], []]
    for i in range(1, iters + 1):
        if i % 1 == 0:
            for j in range(numAgents):
                print('-----------------------------------')
                print('Agent ' + str(j))
                print('Iteration: ', i)
                avg_rwd = sum(rwd[j]) / i
                # avg_pct_exp = sum(percent_exp[j])/i
                # med_pct_exp = statistics.median(percent_exp[j])
                print('Average Reward: ', avg_rwd)
                # print('Average Percent Explored: ', avg_pct_exp, '%')
                # print('Median Percent Explored: ', med_pct_exp)
                print('-----------------------------------')
        actions = [[], []]
        values = [[], []]
        total_rewards = [[0], [0]]
        nstack = 1
        for tidx in range(1000):
            # if tidx % nstack == 0:
            for j in range(numAgents):
                # if tidx > 0:
                # input_imgs = np.expand_dims(np.squeeze(np.stack(img_hist, -1)), 0)
                # print(np.shape(input_imgs))
                # plt.imshow(input_imgs[0, :, :, 0])
                # plt.imshow(input_imgs[0, :, :, 1])
                # plt.draw()
                # plt.pause(0.000001)
                # print(obs[:, j])
                # print(states[j])
                # print(dones)
                # actions[j], values[j], states[j] = model[j].step(obs[:, j].reshape(1, 21), states[j], dones[j])
                ob_shape = np.asarray([
                    env.observation_space[i].shape for i in range(env.n)
                ]).flatten()
                print(ob_shape)
                actions[j], values[j], states[j] = model[j].step(
                    obs[:, j].reshape(1, ob_shape[1]), states[j], dones[j])
                # action = actions[0]
                # value = values[0]

            obs, rewards, dones, _ = env.step(actions)
            dones = dones.flatten()
            total_rewards += rewards  # wrong?
            print(total_rewards)
            # print(dones)
            # img = get_img(env)
            # obs_hist[j].append(img[j])
            # imsave(os.path.join(frames_dir[j], 'frame_{:04d}.png'.format(tidx)), resize(img[j], (img_shape[0], img_shape[1], 3)))
            # print(tidx, '\tAction: ', action, '\tValues: ', value, '\tRewards: ', reward, '\tTotal rewards: ', total_rewards)#, flush=True)
            if True in dones:
                # print('Faultered at tidx: ', tidx)
                for j in range(numAgents):
                    rwd[j].append(total_rewards[j])
                    # percent_exp[j].append(env.env.percent_explored[j])
                obs = env.reset()
                break
    for i in range(numAgents):
        print('-----------------------------------')
        print('Agent ' + str(i))
        print('Iteration: ', iters)
        avg_rwd = sum(rwd[i]) / iters
        # avg_pct_exp = sum(percent_exp[i])/iters
        # med_pct_exp = statistics.median(percent_exp[i])
        print('Average Reward: ', avg_rwd)
        # print('Average Percent Explored: ', avg_pct_exp, '%')
        # print('Median Percent Explored: ', med_pct_exp)
        print('-----------------------------------')