def ant(standup=True, short=False): env = mujoco.AntEnv() env = UseReward(env, reward_info_key="reward_forward") env = MjViewer(env, fps=20) if standup: bonus = lambda a, data: data.qpos.flat[2] - 1.2 env = NeverDone(env, bonus) env = limit(env, 300 if short else 1000) return env
def ant(standup=True, short=False): from gym.envs import mujoco # Import here to avoid forcing the user to have MuJoCo to run Teacher env = mujoco.AntEnv() env = UseReward(env, reward_info_key="reward_forward") env = MjViewer(env, fps=20) if standup: bonus = lambda a, data: data.qpos.flat[2] - 1.2 env = NeverDone(env, bonus) env = limit(env, 300 if short else 1000) return env