コード例 #1
0
    return targets


if __name__=="__main__":
    T.set_num_threads(1)

    ID = ''.join(random.choices(string.ascii_uppercase + string.digits, k=3))
    params = {"iters": 500000, "batchsize": 20, "gamma": 0.99, "lambda_init" : 1., "lambda_min" : 0.3, "policy_lr": 0.0005, "latent_predictor_lr": 0.0005, "weight_decay" : 0.0001, "ppo": True,
              "ppo_update_iters": 6, "animate": False, "train" : True,
              "note" : "HP, active latent estimation", "ID" : ID}

    if socket.gethostname() == "goedel":
        params["animate"] = False
        params["train"] = True

    from src.envs.cartpole_pbt.hangpole import HangPoleBulletEnv
    env = HangPoleBulletEnv(animate=params["animate"], latent_input=False, action_input=False)

    # Test
    if params["train"]:
        print("Training")
        policy = policies.NN_PG(env, 16, obs_dim=env.obs_dim + env.latent_dim, tanh=False, std_fixed=True)
        latent_predictor = policies.RNN_PG(env, hid_dim=8, memory_dim=8, n_temp=2, obs_dim = env.obs_dim + env.act_dim, act_dim=env.latent_dim)
        print(params, env.obs_dim, env.act_dim, env.__class__.__name__, policy.__class__.__name__)
        train(env, policy, latent_predictor, params)
    else:
        print("Testing")

        policy = T.load('agents/HangPoleBulletEnv_NN_PG_GO5_pg.p')
        env.test(policy)
コード例 #2
0
    # perlin: P92

    # Current experts w/ orientation rew:
    # flat: KYH
    # holes: 2CW
    # tiles: YI7
    # triangles: M3X
    # Stairs: H1Y
    # pipe: W01
    # perlin: H03


    # Test
    if params["train"]:
        print("Training")
        policy = policies.NN_PG(env, 96)
        print(params, env.obs_dim, env.act_dim, env.__class__.__name__, policy.__class__.__name__)
        train(env, policy, params)
    else:
        print("Testing")
        policy_name = "MQ7" # LX3: joints + contacts + yaw
        policy_path = 'agents/{}_NN_PG_{}_pg.p'.format(env.__class__.__name__, policy_name)
        policy = policies.NN_PG(env, 96)
        policy.load_state_dict(T.load(policy_path))

        env.test(policy, N=10)
        print(policy_path)



コード例 #3
0
        "weight_decay": 0.001,
        "ppo": True,
        "ppo_update_iters": 6,
        "animate": False,
        "train": False,
        "env_list": env_list,
        "note": "Score, 2E1",
        "ID": ID
    }

    if socket.gethostname() == "goedel":
        params["animate"] = False
        params["train"] = True

    from src.envs.hexapod_trossen_terrain_all import hexapod_trossen_terrain_all as hex_env
    env = hex_env.Hexapod(env_list=env_list, max_n_envs=1)

    r_lists = []
    for i in range(5):
        # Test
        print("Training: {}/N".format(i + 1))
        policy = policies.NN_PG(env, 64, tanh=False, std_fixed=True)
        print(params, env.obs_dim, env.act_dim, env.__class__.__name__,
              policy.__class__.__name__)
        r_list = train(env, policy, params)
        r_lists.append(np.array(r_list))

    r_lists = np.stack(r_lists, 0)
    print(r_lists.shape)
    np.save("R_{}_{}".format(env_list, params["ID"]), r_lists)
コード例 #4
0
        "animate": True,
        "train": False,
        "note": "Supervised model learning",
        "ID": ID
    }

    if socket.gethostname() == "goedel":
        params["animate"] = False
        params["train"] = True

    from src.envs.cartpole_pbt.hangpole import HangPoleBulletEnv
    env = HangPoleBulletEnv(animate=params["animate"],
                            latent_input=True,
                            action_input=False)

    # Test
    if params["train"]:
        print("Training")
        model = policies.NN_PG(env,
                               16,
                               obs_dim=env.obs_dim,
                               act_dim=env.obs_dim)
        print(params, env.obs_dim, env.act_dim, env.__class__.__name__,
              model.__class__.__name__)
        train(env, model, params)
    else:
        print("Testing")
        policy_path = 'agents/HangPoleBulletEnv_NN_PG_ETX_pg.p'
        policy = T.load(policy_path)
        env.test(policy, slow=params["animate"], seed=1338)
        print(policy_path)