Ejemplo n.º 1
0
from runner import Runner
from common.arguments import get_args
from common.utils import make_env
import numpy as np
import random
import torch

if __name__ == '__main__':
    # get the params
    args = get_args()
    env, args = make_env(args)
    runner = Runner(args, env)
    if args.evaluate:
        returns = runner.evaluate()
        print('Average returns is', returns)
    else:
        runner.run()
Ejemplo n.º 2
0
import numpy as np
from keras.utils import to_categorical
import copy
from common.utils import eligibility_traces, default_config, make_env, RunningMeanStd, str2bool, discount_rewards
from common.ppo_independant import PPOPolicyNetwork, ValueNetwork

render = False
normalize_inputs = True

config = default_config()
LAMBDA = float(config['agent']['lambda'])
lr_actor = float(config['agent']['lr_actor'])
meta_skip_etrace = str2bool(config['agent']['meta_skip_etrace'])
communication_round = int(config['agent']['fen_communication_round'])
env=make_env(config, normalize_inputs)
env.toggle_compute_neighbors()

n_agent=env.n_agent
T = env.T
GAMMA = env.GAMMA
n_episode = env.n_episode
max_steps = env.max_steps
n_actions = env.n_actions
n_signal = env.n_signal
max_u = env.max_u

i_episode = 0
meta_Pi=[]
meta_V=[]
for i in range(n_agent):
    meta_Pi.append(PPOPolicyNetwork(num_features=env.input_size+2, num_actions=n_signal,layer_size=128,epsilon=0.1,learning_rate=lr_actor))
Ejemplo n.º 3
0
    model_path = Path(models_dir).joinpath(
        "{}_{}".format("wave" if wave else "gym", label)
    )
    # e.g.: ./logs
    log_path = Path(log_dir)

    # create folders
    model_path.mkdir(parents=True, exist_ok=True)
    log_path.mkdir(exist_ok=True)

    # create the wave or gym environment, with or without multiprocessing
    id = "LunarLander" if wave else "LunarLander-v2"
    if num_cpu > 1:
        env = make_multi_env(num_cpu, id, wave, render_mode=False, reset_mode="random")
    else:
        env = make_env(id, wave, render_mode=False, reset_mode="random")

    # create A2C with Mlp policy, and the callback to save snapshots
    model = A2C(MlpPolicy, env, ent_coef=0.1, verbose=0, tensorboard_log=log_dir)
    callback = save_callback(
        model_path,
        "snapshot-",
        save_interval,
        call_interval=model.n_steps * num_cpu,
        order=order,
        order_str=order_str,
    )

    # save final model
    print("Training...")
    _t = time.time()
Ejemplo n.º 4
0
from stable_baselines import A2C  # noqa: E402

wave = True
render_episodes = 20

if len(sys.argv) < 2:
    print("USAGE: {} PATH-TO-MODEL-FILE".format(sys.argv[0]))
    exit(-1)

model_path = sys.argv[1]
if not os.path.isfile(model_path):
    print("Path '{}' does not exist.".format(model_path))
    exit(-1)

id = "LunarLander" if wave else "LunarLander-v2"
env = make_env(id, wave, port=4000, reset_mode="random")

model = A2C.load(model_path)

episode = render_episodes
reward_sum = 0
obs = env.reset()
while episode > 0:
    action, _states = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    reward_sum += reward
    env.render()
    if done:
        print("Points: {}".format(reward_sum))
        episode -= 1
        reward_sum = 0
Ejemplo n.º 5
0
def test(seed, model_filename, vec_filename, train, test, test_as_class=0, render=False, save_file="default.yml"):
    global g_step, g_obs_data
    print("Testing:")
    total_rewards = []
    distance_xs = []
    if True:
        os.makedirs(f"{folder}/obs_data_videos", exist_ok=True)
        g_step = 0

        print(f" Seed {seed}, model {model_filename} vec {vec_filename}")
        print(f" Train on {train}, test on {test}, w/ bodyinfo {test_as_class}")
        if test_as_class>=0:
            bodyinfo = test_as_class
        else:
            if args.with_bodyinfo:
                bodyinfo = test//100
            else:
                bodyinfo = 0
        # default_wrapper = wrapper.BodyinfoWrapper
        # if args.disable_wrapper:
        #     default_wrapper = None
        default_wrapper = wrapper.WalkerWrapper

        eval_env = utils.make_env(template=utils.template(test), render=render, robot_body=test, wrapper=default_wrapper, body_info=bodyinfo)
        eval_env = DummyVecEnv([eval_env])
        if args.vec_normalize:
            eval_env = VecNormalize.load(vec_filename, eval_env)
        eval_env.norm_reward = False

        eval_env.seed(seed)
        model = PPO.load(model_filename)

        obs = eval_env.reset()
        g_obs_data = np.zeros(shape=[args.test_steps, obs.shape[1]], dtype=np.float32)

        if render:
            # eval_env.env_method("set_view")
            import common.linux
            common.linux.fullscreen()
            print("\n\nWait for a while, so I have the time to press Ctrl+F11 to enter FullScreen Mode.\n\n")
            time.sleep(2) # Wait for a while, so I have the time to press Ctrl+F11 to enter FullScreen Mode.
        distance_x = 0
        # print(obs)
        total_reward = 0
        for step in tqdm(range(args.test_steps)):
            g_obs_data[step,:] = obs[0]
            action, _states = model.predict(obs, deterministic=True)
            obs, reward, done, info = eval_env.step(action)
            if render:
                eval_env.envs[0].camera_adjust()
                if args.disable_saving_image:
                    time.sleep(0.01)
                else:
                    (width, height, rgbPixels, _, _) = eval_env.envs[0].pybullet.getCameraImage(1920,1080, renderer=pybullet.ER_BULLET_HARDWARE_OPENGL)
                    image = rgbPixels[:,:,:3]
                    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                    cv2.imwrite(f"{folder}/obs_data_videos/getCameraImage_b{test}_s{seed}_{step:05}.png", image)
            if done:
                # it should not matter if the env reset. I guess...
                # break
                pass
            else:  # the last observation will be after reset, so skip the last
                distance_x = eval_env.envs[0].robot.body_xyz[0]
            total_reward += reward[0]
            # if render:
            #    time.sleep(0.01)

        eval_env.close()
        print(f"train {train}, test {test}, test_as_class {test_as_class}, step {step}, total_reward {total_reward}, distance_x {distance_x}")

        if args.save_obs_data:
            base_obs_data = None
            sorted_data = g_obs_data.copy()
            if test!=0 or seed!=0:
                # if sorted_arg_obs_data exists, use the existing one
                # because we want to compare the patterns of two experiments
                sorted_arg_obs_data = np.load(f"{folder}/sorted_arg_obs_data.npy")
                base_obs_data = np.load(f"{folder}/base_obs_data.npy")
            else:
                sorted_arg_obs_data = np.argsort(np.mean(sorted_data,axis=0))
                np.save(f"{folder}/sorted_arg_obs_data.npy", sorted_arg_obs_data)
                base_obs_data = g_obs_data.copy()
                np.save(f"{folder}/base_obs_data.npy", base_obs_data)

            # sorted_data = sorted_data[:,sorted_arg_obs_data]
            # base_obs_data = base_obs_data[:, sorted_arg_obs_data]

            for step in tqdm(range(args.test_steps)):
                plt.close()
                plt.figure(figsize=[10,4])
                if test!=0 or seed!=0:
                    x = sorted_data[step]
                    plt.bar(np.arange(len(x)), x, color=[0.1, 0.3, 0.7, 0.5])
                x = base_obs_data[step]
                plt.bar(np.arange(len(x)), x, color=[0.6, 0.6, 0.6, 0.5])
                plt.ylim(-2,2)
                plt.savefig(f"{folder}/obs_data_videos/barchart_b{test}_s{seed}_{step:05}.png")
                plt.close()

        total_rewards.append(total_reward)
        distance_xs.append(distance_x)

    # avoid yaml turn float64 to numpy array
    total_rewards = [float(x) for x in total_rewards]
    distance_xs = [float(x) for x in distance_xs]

    data = {
        "title": "test",
        "train": train,
        "test": test,
        "total_reward": total_rewards,
        "distance_x": distance_xs,
    }
    with open(f"{save_file}", "w") as f:
        yaml.dump(data, f)
Ejemplo n.º 6
0
import gym
import pybullet_envs
from stable_baselines3.common.vec_env.vec_frame_stack import VecFrameStack
from stable_baselines3.common.vec_env import DummyVecEnv
import common.utils as utils

utils.folder = "exp5"
venv = DummyVecEnv([utils.make_env(template=utils.template(400), robot_body=400, wrapper=None)])
venv = VecFrameStack(venv, 4)
obs = venv.reset()
print(obs.shape)
Ejemplo n.º 7
0
    str_ids = "-".join(str(x) for x in training_bodies)
    if args.test_bodies == "":
        test_bodies = []
    else:
        test_bodies = [int(x) for x in args.test_bodies.split(",")]

    default_wrapper = wrapper.BodyinfoWrapper
    if args.disable_wrapper:
        default_wrapper = None

    if with_bodyinfo:
        env = DummyVecEnv([
            utils.make_env(
                rank=i,
                seed=utils.seed,
                wrapper=default_wrapper,
                render=args.render,
                robot_body=training_bodies[i % len(training_bodies)],
                body_info=training_bodies[i % len(training_bodies)] // 100)
            for i in range(train_num_envs)
        ])
        save_filename = f"model-ant-{str_ids}-with-bodyinfo"
    else:
        env = DummyVecEnv([
            utils.make_env(rank=i,
                           seed=utils.seed,
                           wrapper=default_wrapper,
                           render=args.render,
                           robot_body=training_bodies[i %
                                                      len(training_bodies)],
                           body_info=0) for i in range(train_num_envs)
Ejemplo n.º 8
0
    training_bodies = [int(x) for x in args.train_bodies.split(",")]
    str_ids = "-".join(str(x) for x in training_bodies)
    if args.test_bodies=="":
        test_bodies = []
    else:
        test_bodies = [int(x) for x in args.test_bodies.split(",")]
    
    # default_wrapper = wrapper.BodyinfoWrapper
    # if args.disable_wrapper:
    #     default_wrapper = None
    default_wrapper = wrapper.WalkerWrapper
    # default_wrapper = None

    if with_bodyinfo:
        env = DummyVecEnv([utils.make_env(template=utils.template(training_bodies[i%len(training_bodies)]), rank=i, seed=utils.seed, wrapper=default_wrapper, render=args.render, robot_body=training_bodies[i%len(training_bodies)], body_info=training_bodies[i%len(training_bodies)]//100) for i in range(train_num_envs)])
        save_filename = f"model-ant-{str_ids}-with-bodyinfo"
    else:
        env = DummyVecEnv([utils.make_env(template=utils.template(training_bodies[i%len(training_bodies)]), rank=i, seed=utils.seed, wrapper=default_wrapper, render=args.render, robot_body=training_bodies[i%len(training_bodies)], body_info=0) for i in range(train_num_envs)])
        save_filename = f"model-ant-{str_ids}"

    if args.vec_normalize:
        env = VecNormalize(env, **normalize_kwargs)

    if args.stack_frames>1:
        env = VecFrameStack(env, args.stack_frames)


    keys_remove =["normalize", "n_envs", "n_timesteps", "policy"]
    for key in keys_remove:
        del hyperparams[key]