Python DummyVecEnv.norm_reward Exemples, stable_baselines3.common.vec_env.DummyVecEnv.norm_reward Python Exemples

Exemple #1

0

Afficher le fichier

def pybullet_example():
    # PyBullet: Normalizing input features

    import pybullet_envs

    env = DummyVecEnv([lambda: gym.make("HalfCheetahBulletEnv-v0")])
    # Automatically normalize the input features and reward.
    env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0)

    model = PPO("MlpPolicy", env)
    model.learn(total_timesteps=2000)

    # Don't forget to save the VecNormalize statistics when saving the agent.
    log_dir = "/tmp/"
    model.save(log_dir + "ppo_halfcheetah")
    stats_path = os.path.join(log_dir, "vec_normalize.pkl")
    env.save(stats_path)

    # To demonstrate loading.
    del model, env

    # Load the saved statistics.
    env = DummyVecEnv([lambda: gym.make("HalfCheetahBulletEnv-v0")])
    env = VecNormalize.load(stats_path, env)
    # Do not update them at test time.
    env.training = False
    # reward normalization is not needed at test time.
    env.norm_reward = False

    # Load the agent.
    model = PPO.load(log_dir + "ppo_halfcheetah", env=env)

Exemple #2

0

Afficher le fichier

Fichier : test_simple.py Projet : liusida/thesis-bodies

def test(seed, model_filename, vec_filename, train, test, body_info=0, render=False):
    print("Testing:")
    print(f" Seed {seed}, model {model_filename} vec {vec_filename}")
    print(f" Train on {train}, test on {test}, w/ bodyinfo {body_info}")
    eval_env = utils.make_env(render=render, robot_body=test, body_info=body_info)
    eval_env = DummyVecEnv([eval_env])
    eval_env = VecNormalize.load(vec_filename, eval_env)
    eval_env.norm_reward = False

    eval_env.seed(seed)
    model = PPO.load(model_filename)

    obs = eval_env.reset()
    if render:
        eval_env.env_method("set_view")
    distance_x = 0
    # print(obs)
    total_reward = 0
    for step in range(1000):
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, info = eval_env.step(action)
        if done:
            break
        else:  # the last observation will be after reset, so skip the last
            distance_x = eval_env.envs[0].robot.body_xyz[0]
        total_reward += reward[0]
        if render:
            time.sleep(0.01)

    eval_env.close()
    print(f"train {train}, test {test}, body_info {body_info}, step {step}, total_reward {total_reward}, distance_x {distance_x}")
    return total_reward, distance_x

Exemple #3

0

Afficher le fichier

def test(test_n, seed, model_filename, vec_filename, train, test, test_as_class=0, render=False, save_file="default.yml"):

    print("Testing:")
    total_rewards = []
    distance_xs = []
    for i in range(test_n):
        print(f" Seed {seed+i}, model {model_filename} vec {vec_filename}")
        print(f" Train on {train}, test on {test}, w/ bodyinfo {test_as_class}")
        eval_env = utils.make_env(render=render, wrapper=None, robot_body=test, body_info=test_as_class)
        eval_env = DummyVecEnv([eval_env])
        eval_env = VecNormalize.load(vec_filename, eval_env)
        eval_env.norm_reward = False

        eval_env.seed(seed+i)
        model = PPO.load(model_filename)

        obs = eval_env.reset()
        if render:
            eval_env.env_method("set_view")
        distance_x = 0
        # print(obs)
        total_reward = 0
        for step in range(1000):
            action, _states = model.predict(obs, deterministic=True)
            obs, reward, done, info = eval_env.step(action)
            if done:
                break
            else:  # the last observation will be after reset, so skip the last
                distance_x = eval_env.envs[0].robot.body_xyz[0]
            total_reward += reward[0]
            if render:
                time.sleep(0.01)

        eval_env.close()
        print(f"train {train}, test {test}, test_as_class {test_as_class}, step {step}, total_reward {total_reward}, distance_x {distance_x}")

        total_rewards.append(total_reward)
        distance_xs.append(distance_x)

    # avoid yaml turn float64 to numpy array
    total_rewards = [float(x) for x in total_rewards]
    distance_xs = [float(x) for x in distance_xs]

    data = {
        "title": "test",
        "train": train,
        "test": test,
        "total_reward": total_rewards,
        "distance_x": distance_xs,
    }
    with open(f"{save_file}", "w") as f:
        yaml.dump(data, f)

Exemple #4

0

Afficher le fichier

def test(seed, model_filename, vec_filename, train, test, test_as_class=0, render=False, save_file="default.yml"):
    global g_step, g_obs_data
    print("Testing:")
    total_rewards = []
    distance_xs = []
    if True:
        os.makedirs(f"{folder}/obs_data_videos", exist_ok=True)
        g_step = 0

        print(f" Seed {seed}, model {model_filename} vec {vec_filename}")
        print(f" Train on {train}, test on {test}, w/ bodyinfo {test_as_class}")
        if test_as_class>=0:
            bodyinfo = test_as_class
        else:
            if args.with_bodyinfo:
                bodyinfo = test//100
            else:
                bodyinfo = 0
        # default_wrapper = wrapper.BodyinfoWrapper
        # if args.disable_wrapper:
        #     default_wrapper = None
        default_wrapper = wrapper.WalkerWrapper

        eval_env = utils.make_env(template=utils.template(test), render=render, robot_body=test, wrapper=default_wrapper, body_info=bodyinfo)
        eval_env = DummyVecEnv([eval_env])
        if args.vec_normalize:
            eval_env = VecNormalize.load(vec_filename, eval_env)
        eval_env.norm_reward = False

        eval_env.seed(seed)
        model = PPO.load(model_filename)

        obs = eval_env.reset()
        g_obs_data = np.zeros(shape=[args.test_steps, obs.shape[1]], dtype=np.float32)

        if render:
            # eval_env.env_method("set_view")
            import common.linux
            common.linux.fullscreen()
            print("\n\nWait for a while, so I have the time to press Ctrl+F11 to enter FullScreen Mode.\n\n")
            time.sleep(2) # Wait for a while, so I have the time to press Ctrl+F11 to enter FullScreen Mode.
        distance_x = 0
        # print(obs)
        total_reward = 0
        for step in tqdm(range(args.test_steps)):
            g_obs_data[step,:] = obs[0]
            action, _states = model.predict(obs, deterministic=True)
            obs, reward, done, info = eval_env.step(action)
            if render:
                eval_env.envs[0].camera_adjust()
                if args.disable_saving_image:
                    time.sleep(0.01)
                else:
                    (width, height, rgbPixels, _, _) = eval_env.envs[0].pybullet.getCameraImage(1920,1080, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
                    image = rgbPixels[:,:,:3]
                    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                    cv2.imwrite(f"{folder}/obs_data_videos/getCameraImage_b{test}_s{seed}_{step:05}.png", image)
            if done:
                # it should not matter if the env reset. I guess...
                # break
                pass
            else:  # the last observation will be after reset, so skip the last
                distance_x = eval_env.envs[0].robot.body_xyz[0]
            total_reward += reward[0]
            # if render:
            #    time.sleep(0.01)

        eval_env.close()
        print(f"train {train}, test {test}, test_as_class {test_as_class}, step {step}, total_reward {total_reward}, distance_x {distance_x}")

        if args.save_obs_data:
            base_obs_data = None
            sorted_data = g_obs_data.copy()
            if test!=0 or seed!=0:
                # if sorted_arg_obs_data exists, use the existing one
                # because we want to compare the patterns of two experiments
                sorted_arg_obs_data = np.load(f"{folder}/sorted_arg_obs_data.npy")
                base_obs_data = np.load(f"{folder}/base_obs_data.npy")
            else:
                sorted_arg_obs_data = np.argsort(np.mean(sorted_data,axis=0))
                np.save(f"{folder}/sorted_arg_obs_data.npy", sorted_arg_obs_data)
                base_obs_data = g_obs_data.copy()
                np.save(f"{folder}/base_obs_data.npy", base_obs_data)

            # sorted_data = sorted_data[:,sorted_arg_obs_data]
            # base_obs_data = base_obs_data[:, sorted_arg_obs_data]

            for step in tqdm(range(args.test_steps)):
                plt.close()
                plt.figure(figsize=[10,4])
                if test!=0 or seed!=0:
                    x = sorted_data[step]
                    plt.bar(np.arange(len(x)), x, color=[0.1, 0.3, 0.7, 0.5])
                x = base_obs_data[step]
                plt.bar(np.arange(len(x)), x, color=[0.6, 0.6, 0.6, 0.5])
                plt.ylim(-2,2)
                plt.savefig(f"{folder}/obs_data_videos/barchart_b{test}_s{seed}_{step:05}.png")
                plt.close()

        total_rewards.append(total_reward)
        distance_xs.append(distance_x)

    # avoid yaml turn float64 to numpy array
    total_rewards = [float(x) for x in total_rewards]
    distance_xs = [float(x) for x in distance_xs]

    data = {
        "title": "test",
        "train": train,
        "test": test,
        "total_reward": total_rewards,
        "distance_x": distance_xs,
    }
    with open(f"{save_file}", "w") as f:
        yaml.dump(data, f)