Example #1
0
def test_mujoco():
    env = neorl.make("HalfCheetah-v3")
    train_data, val_data = env.get_dataset(train_num=100,
                                           data_type="L",
                                           path=TEST_DATA_PATH)
    assert int(len(train_data["index"]) * 0.1) == len(val_data["index"])
    env = neorl.make("Walker2d-v3")
    train_data, val_data = env.get_dataset(train_num=10,
                                           data_type="m",
                                           path=TEST_DATA_PATH)
    assert int(len(train_data["index"]) * 0.1) == len(val_data["index"])
    env = neorl.make("Hopper-v3")
    train_data, val_data = env.get_dataset(train_num=0,
                                           data_type="e",
                                           path=TEST_DATA_PATH)
    assert int(len(train_data["index"]) * 0.1) == 0 and len(
        val_data["index"]) == 0
Example #2
0
def get_env(task):
    try:
        if task.startswith("HalfCheetah-v3"):
            env = neorl.make("HalfCheetah-v3")
        elif task.startswith("Hopper-v3"):
            env = neorl.make("Hopper-v3")
        elif task.startswith("Walker2d-v3"):
            env = neorl.make("Walker2d-v3")
        elif task.startswith('d4rl'):
            import d4rl
            env = gym.make(task[5:])
        else:
            task_name = task.strip().split("-")[0]
            env = neorl.make(task_name)
    except:
        raise NotImplementedError

    return env
Example #3
0
def load_data_from_neorl(task, task_data_type="low", task_train_num=99):
    import neorl
    env = neorl.make(task)
    train_data, val_data = env.get_dataset(data_type=task_data_type,
                                           train_num=task_train_num)

    train_buffer, val_buffer = load_neorl_buffer(
        train_data), load_neorl_buffer(val_data)

    return train_buffer, val_buffer
Example #4
0
def test_finance():
    env = neorl.make("finance")
    train_data, val_data = env.get_dataset(train_num=100,
                                           data_type="H",
                                           path=TEST_DATA_PATH)
    assert int(len(train_data["index"]) * 0.1) == len(val_data["index"])
Example #5
0
def test_citylearn():
    env = neorl.make("citylearn")
    TEST_NUM = 9
    train_data, val_data = env.get_dataset(train_num=TEST_NUM,
                                           path=TEST_DATA_PATH)
    assert len(train_data["obs"]) == TEST_NUM * 1000
    assert len(train_data["next_obs"]) == TEST_NUM * 1000
    assert len(train_data["action"]) == TEST_NUM * 1000
    assert len(train_data["reward"]) == TEST_NUM * 1000
    assert len(train_data["done"]) == TEST_NUM * 1000
    assert len(train_data["index"]) == TEST_NUM

    TEST_NUM = 10
    train_data, val_data = env.get_dataset(train_num=TEST_NUM,
                                           path=TEST_DATA_PATH)
    assert len(train_data["obs"]) == TEST_NUM * 1000
    assert len(train_data["next_obs"]) == TEST_NUM * 1000
    assert len(train_data["action"]) == TEST_NUM * 1000
    assert len(train_data["reward"]) == TEST_NUM * 1000
    assert len(train_data["done"]) == TEST_NUM * 1000
    assert len(train_data["index"]) == TEST_NUM

    TEST_NUM = 99
    train_data, val_data = env.get_dataset(train_num=TEST_NUM,
                                           path=TEST_DATA_PATH)
    assert len(train_data["obs"]) == TEST_NUM * 1000
    assert len(train_data["next_obs"]) == TEST_NUM * 1000
    assert len(train_data["action"]) == TEST_NUM * 1000
    assert len(train_data["reward"]) == TEST_NUM * 1000
    assert len(train_data["done"]) == TEST_NUM * 1000
    assert len(train_data["index"]) == TEST_NUM

    TEST_NUM = 100
    train_data, val_data = env.get_dataset(train_num=TEST_NUM,
                                           path=TEST_DATA_PATH)
    assert len(train_data["obs"]) == TEST_NUM * 1000
    assert len(train_data["next_obs"]) == TEST_NUM * 1000
    assert len(train_data["action"]) == TEST_NUM * 1000
    assert len(train_data["reward"]) == TEST_NUM * 1000
    assert len(train_data["done"]) == TEST_NUM * 1000
    assert len(train_data["index"]) == TEST_NUM

    TEST_NUM = 999
    train_data, val_data = env.get_dataset(train_num=TEST_NUM,
                                           path=TEST_DATA_PATH)
    assert len(train_data["obs"]) == TEST_NUM * 1000
    assert len(train_data["next_obs"]) == TEST_NUM * 1000
    assert len(train_data["action"]) == TEST_NUM * 1000
    assert len(train_data["reward"]) == TEST_NUM * 1000
    assert len(train_data["done"]) == TEST_NUM * 1000
    assert len(train_data["index"]) == TEST_NUM

    TEST_NUM = 99
    train_data, val_data = env.get_dataset(train_num=TEST_NUM,
                                           data_type="medium",
                                           path=TEST_DATA_PATH)
    assert len(train_data["index"]) == TEST_NUM

    train_data, val_data = env.get_dataset(train_num=TEST_NUM,
                                           data_type="low",
                                           path=TEST_DATA_PATH)
    assert len(train_data["index"]) == TEST_NUM

    train_data, val_data = env.get_dataset(train_num=TEST_NUM,
                                           need_val=False,
                                           path=TEST_DATA_PATH)
    assert val_data is None

    train_data, val_data = env.get_dataset(train_num=TEST_NUM,
                                           val_ratio=0.3,
                                           path=TEST_DATA_PATH)
    assert int(len(train_data["index"]) * 0.3) == len(val_data["index"])

    def customized_reward_func(data):
        obs = data["obs"]
        return np.ones((len(obs), 1))

    env = neorl.make("ib", reward_func=customized_reward_func)
    train_data, val_data = env.get_dataset(data_type="high",
                                           train_num=50,
                                           need_val=False,
                                           use_data_reward=False)
    assert len(train_data["index"]) == 50
    assert np.all(train_data["reward"] == np.ones_like(train_data["reward"]))
    assert val_data is None
Example #6
0
import neorl

env = neorl.make("citylearn")
train_data, val_data = env.get_dataset(data_type="medium",
                                       train_num=99,
                                       need_val=True)
print("citylearn:", train_data, val_data)
Example #7
0
def test_ib():
    env = neorl.make("ib")
    train_data, val_data = env.get_dataset(train_num=99,
                                           data_type="M",
                                           path=TEST_DATA_PATH)
    assert int(len(train_data["index"]) * 0.1) == len(val_data["index"])
Example #8
0
import neorl

if __name__ == '__main__':
    for domain in [
            'HalfCheetah-v3', 'Hopper-v3', 'Walker2d-v3', 'ib', 'finance',
            'citylearn'
    ]:
        for level in ['low', 'medium', 'high']:
            for amount in [100, 1000, 10000
                           ] if not domain == 'finance' else [100, 1000]:
                env = neorl.make(domain)
                env.get_dataset(data_type=level, train_num=amount)