Beispiel #1
0
def build_gym_env(env_name, is_print=True):
    assert env_name is not None

    if env_name == 'Pendulum-v0':
        env = gym.make(env_name)
        env.spec.reward_threshold = -200.0  # target_reward
        state_dim, action_dim, max_action, target_reward, is_discrete = get_env_info(
            env, is_print)
    elif env_name == 'CarRacing-v0':
        from AgentPixel import fix_car_racing_v0
        env = gym.make(env_name)
        env = fix_car_racing_v0(env)
        state_dim, action_dim, max_action, target_reward, is_discrete = get_env_info(
            env, is_print)
        assert len(state_dim)
        # state_dim = (2, state_dim[0], state_dim[1])  # two consecutive frame (96, 96)
        state_dim = (1, state_dim[0], state_dim[1]
                     )  # two consecutive frame (96, 96)
    elif env_name == 'MultiWalker':
        from multiwalker_base import MultiWalkerEnv, multi_to_single_walker_decorator
        env = MultiWalkerEnv()
        env = multi_to_single_walker_decorator(env)

        state_dim = sum([box.shape[0] for box in env.observation_space])
        action_dim = sum([box.shape[0] for box in env.action_space])
        max_action = 1.0
        target_reward = 50
        is_discrete = False
    else:
        env = gym.make(env_name)
        state_dim, action_dim, max_action, target_reward, is_discrete = get_env_info(
            env, is_print)

    return env, state_dim, action_dim, max_action, target_reward, is_discrete
Beispiel #2
0
def build_gym_env(env_name, if_print=True, if_norm=True):
    assert env_name is not None

    '''UserWarning: WARN: Box bound precision lowered by casting to float32
    https://stackoverflow.com/questions/60149105/
    userwarning-warn-box-bound-precision-lowered-by-casting-to-float32
    '''
    gym.logger.set_level(40)  # non-essential

    # env = gym.make(env_name)
    # state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(env, if_print)
    '''env compatibility'''  # some env need to adjust.
    if env_name == 'Pendulum-v0':
        env = gym.make(env_name)
        env.spec.reward_threshold = -200.0  # target_reward
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(env, if_print)
    elif env_name == 'CarRacing-v0':
        from AgentPixel import fix_car_racing_v0
        env = gym.make(env_name)
        env = fix_car_racing_v0(env)
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(env, if_print)
        assert len(state_dim)
        # state_dim = (2, state_dim[0], state_dim[1])  # two consecutive frame (96, 96)
        state_dim = (1, state_dim[0], state_dim[1])  # one frame (96, 96)
    elif env_name == 'MultiWalker':
        from multiwalker_base import MultiWalkerEnv, multi_to_single_walker_decorator
        env = MultiWalkerEnv()
        env = multi_to_single_walker_decorator(env)

        state_dim = sum([box.shape[0] for box in env.observation_space])
        action_dim = sum([box.shape[0] for box in env.action_space])
        action_max = 1.0
        target_reward = 50
        is_discrete = False
    else:
        env = gym.make(env_name)
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(env, if_print)

    '''env normalization'''  # adjust action into [-1, +1] using action_max is necessary.
    avg = None
    std = None
    if if_norm:
        '''norm no need'''
        # if env_name == 'Pendulum-v0':
        #     state_mean = np.array([-0.00968592 -0.00118888 -0.00304381])
        #     std = np.array([0.53825575 0.54198545 0.8671749 ])

        '''norm could be'''
        if env_name == 'LunarLanderContinuous-v2':
            avg = np.array([
                0.0000, -0.116, -0.057, -0.242, 0.0000, 0.0000, 0.9490, 0.9340])
            std = np.array([
                0.2651, 0.4281, 0.1875, 0.1872, 0.1448, 0.0931, 0.4919, 0.4932])
        elif env_name == "BipedalWalker-v3" or env_name == "BipedalWalkerHardcore-v3":
            avg = np.array([
                -0.695, 0.0155, 2.3846, 0.1053, 0.7689, 0.0000, -0.672, 0.0000,
                0.1490, 0.6277, 0.0000, -0.465, -0.111, 0.0000, 0.7132, 0.7129,
                0.7131, 0.7146, 0.7177, 0.7234, 0.7358, 0.7693, 0.8446, 0.1197])
            std = np.array([
                0.1297, 0.0249, 0.1365, 0.0459, 0.3608, 0.8593, 0.2468, 0.7051,
                0.4938, 0.2846, 0.7836, 0.2022, 0.6874, 0.4859, 0.0180, 0.0185,
                0.0198, 0.0221, 0.0258, 0.0318, 0.0417, 0.0590, 0.0313, 0.0037])
        elif env_name == 'AntBulletEnv-v0':
            avg = np.array([
                0.4838, -0.047, 0.3500, 1.3028, -0.249, 0.0000, -0.281, 0.0573,
                -0.261, 0.0000, 0.0424, 0.0000, 0.2278, 0.0000, -0.072, 0.0000,
                0.0000, 0.0000, -0.175, 0.0000, -0.319, 0.0000, 0.1387, 0.0000,
                0.1949, 0.0000, -0.136, -0.060])
            std = np.array([
                0.0601, 0.2267, 0.0838, 0.2680, 0.1161, 0.0757, 0.1495, 0.1235,
                0.6733, 0.4326, 0.6723, 0.3422, 0.7444, 0.5129, 0.6561, 0.2732,
                0.6805, 0.4793, 0.5637, 0.2586, 0.5928, 0.3876, 0.6005, 0.2369,
                0.4858, 0.4227, 0.4428, 0.4831])
        elif env_name == 'MinitaurBulletEnv-v0':
            # avg = np.array([
            #     0.7147, 0.5474, -0.3564, 0.5502, 0.6279, 1.527, -0.02,
            #     0.5963, -0.3853, 2.9723, -0.0095, 2.303, 2.2352, -0.8862,
            #     1.1223, -0.5047, 7.7751, 1.6106, 2.4627, -2.0051, 0.277,
            #     8.1503, -3.2618, 4.3095, -0.9608, -0.757, -1.0414, -0.3454])
            # std = np.array([
            #     0.3291, 0.4835, 0.418, 0.5924, 0.5943, 0.3554, 0.6501,
            #     0.4363, 8.4146, 13.8735, 10.3055, 11.7078, 13.3214, 8.0504,
            #     11.7682, 9.9461, 2.7296, 2.9023, 2.4311, 2.7923, 3.0757,
            #     2.7678, 2.8346, 2.528, 0.061, 0.0544, 0.1049, 0.0171])
            avg = np.array([
                1.47001359, 0.30830934, -0.77972377, 0.17442283, 0.37340564,
                1.06691741, 0.60456939, 0.95682976, -0.39346971, 2.96403055,
                -0.01850164, 2.30442994, 2.23973869, -0.88910996, 1.12977025,
                -0.51102429, 7.5676575, 1.6353235, 2.1733776, -1.82636141,
                0.29630162, 8.24371965, -3.42478495, 4.51330641, 0.64870541,
                -0.79007218, -0.54545934, -1.05071308])
            std = np.array([
                0.23399817, 0.62358118, 0.52221307, 0.51214068, 0.58800798,
                0.52835137, 0.64166488, 0.73870745, 8.54430725, 14.22404018,
                11.0978438, 12.17012605, 12.29702763, 6.80255162, 10.03443105,
                8.00069937, 2.65448714, 2.77295941, 2.12780892, 2.3609657,
                3.0234977, 2.75095736, 3.06211432, 2.68766232, 0.08670024,
                0.04757667, 0.1714999, 0.02645171])

        '''norm necessary'''

    env = decorator__normalization(env, action_max, avg, std)
    return env, state_dim, action_dim, target_reward, is_discrete
Beispiel #3
0
def build_gym_env(env_name, if_print=True, if_norm=True):
    assert env_name is not None
    '''UserWarning: WARN: Box bound precision lowered by casting to float32
    https://stackoverflow.com/questions/60149105/
    userwarning-warn-box-bound-precision-lowered-by-casting-to-float32
    '''
    gym.logger.set_level(40)  # non-essential

    # env = gym.make(env_name)
    # state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(env, if_print)
    '''env compatibility'''  # some env need to adjust.
    if env_name == 'Pendulum-v0':
        env = gym.make(env_name)
        env.spec.reward_threshold = -200.0  # target_reward
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(
            env, if_print)
    elif env_name == 'CarRacing-v0':
        from AgentPixel import fix_car_racing_v0
        env = gym.make(env_name)
        env = fix_car_racing_v0(env)
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(
            env, if_print)
        assert len(state_dim)
        # state_dim = (2, state_dim[0], state_dim[1])  # two consecutive frame (96, 96)
        state_dim = (1, state_dim[0], state_dim[1])  # one frame (96, 96)
    elif env_name == 'MultiWalker':
        from multiwalker_base import MultiWalkerEnv, multi_to_single_walker_decorator
        env = MultiWalkerEnv()
        env = multi_to_single_walker_decorator(env)

        state_dim = sum([box.shape[0] for box in env.observation_space])
        action_dim = sum([box.shape[0] for box in env.action_space])
        action_max = 1.0
        target_reward = 50
        is_discrete = False
    elif env_name == "MinitaurBulletEnv-v0":
        env = gym.make(env_name)
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(
            env, if_print)

        def decorator_step(env_step):
            def new_env_step(action):
                state, reward, done, info = env_step(
                    (action * 1.1).clip(-1, 1))
                return state, reward, done, info

            return new_env_step

        env.step = decorator_step(env.step)

    else:
        env = gym.make(env_name)
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(
            env, if_print)
    '''env normalization'''  # adjust action into [-1, +1] using action_max is necessary.
    avg = None
    std = None
    if if_norm:
        '''norm no need'''
        # if env_name == 'Pendulum-v0':
        #     state_mean = np.array([-0.00968592 -0.00118888 -0.00304381])
        #     std = np.array([0.53825575 0.54198545 0.8671749 ])
        '''norm could be'''
        if env_name == 'LunarLanderContinuous-v2':
            avg = np.array([
                1.65470898e-02, -1.29684399e-01, 4.26883133e-03,
                -3.42124557e-02, -7.39076972e-03, -7.67103031e-04,
                1.12640885e+00, 1.12409466e+00
            ])
            std = np.array([
                0.15094465, 0.29366297, 0.23490797, 0.25931464, 0.21603736,
                0.25886878, 0.277233, 0.27771219
            ])
        elif env_name == "BipedalWalker-v3":
            avg = np.array([
                1.42211734e-01, -2.74547996e-03, 1.65104509e-01,
                -1.33418152e-02, -2.43243194e-01, -1.73886203e-02,
                4.24114229e-02, -6.57800099e-02, 4.53460692e-01,
                6.08022244e-01, -8.64884810e-04, -2.08789053e-01,
                -2.92092949e-02, 5.04791247e-01, 3.33571745e-01,
                3.37325723e-01, 3.49106580e-01, 3.70363115e-01, 4.04074671e-01,
                4.55838055e-01, 5.36685407e-01, 6.70771701e-01, 8.80356865e-01,
                9.97987386e-01
            ])
            std = np.array([
                0.84419678, 0.06317835, 0.16532085, 0.09356959, 0.486594,
                0.55477525, 0.44076614, 0.85030824, 0.29159821, 0.48093035,
                0.50323634, 0.48110776, 0.69684234, 0.29161077, 0.06962932,
                0.0705558, 0.07322677, 0.07793258, 0.08624322, 0.09846895,
                0.11752805, 0.14116005, 0.13839757, 0.07760469
            ])
        elif env_name == 'AntBulletEnv-v0':
            avg = np.array([
                0.4838, -0.047, 0.3500, 1.3028, -0.249, 0.0000, -0.281, 0.0573,
                -0.261, 0.0000, 0.0424, 0.0000, 0.2278, 0.0000, -0.072, 0.0000,
                0.0000, 0.0000, -0.175, 0.0000, -0.319, 0.0000, 0.1387, 0.0000,
                0.1949, 0.0000, -0.136, -0.060
            ])
            std = np.array([
                0.0601, 0.2267, 0.0838, 0.2680, 0.1161, 0.0757, 0.1495, 0.1235,
                0.6733, 0.4326, 0.6723, 0.3422, 0.7444, 0.5129, 0.6561, 0.2732,
                0.6805, 0.4793, 0.5637, 0.2586, 0.5928, 0.3876, 0.6005, 0.2369,
                0.4858, 0.4227, 0.4428, 0.4831
            ])
        elif env_name == 'MinitaurBulletEnv-v0':
            avg = np.array([
                0.90172989, 1.54730119, 1.24560906, 1.97365306, 1.9413892,
                1.03866835, 1.69646277, 1.18655352, -0.45842347, 0.17845232,
                0.38784456, 0.58572877, 0.91414561, -0.45410697, 0.7591031,
                -0.07008998, 3.43842258, 0.61032482, 0.86689961, -0.33910894,
                0.47030415, 4.5623528, -2.39108079, 3.03559422, -0.36328256,
                -0.20753499, -0.47758384, 0.86756409
            ])
            std = np.array([
                0.34192648, 0.51169916, 0.39370621, 0.55568461, 0.46910769,
                0.28387504, 0.51807949, 0.37723445, 13.16686185, 17.51240024,
                14.80264211, 16.60461412, 15.72930229, 11.38926597,
                15.40598346, 13.03124941, 2.47718145, 2.55088804, 2.35964651,
                2.51025567, 2.66379017, 2.37224904, 2.55892521, 2.41716885,
                0.07529733, 0.05903034, 0.1314812, 0.0221248
            ])
        # elif env_name == "BipedalWalkerHardcore-v3":
        #     pass
        '''norm necessary'''

    env = decorator__normalization(env, action_max, avg, std)
    return env, state_dim, action_dim, target_reward, is_discrete
Beispiel #4
0
def build_gym_env(env_name, if_print=True, if_norm=True):
    assert env_name is not None

    # env = gym.make(env_name)
    # state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(env, if_print)
    '''env compatibility'''  # some env need to adjust.
    if env_name == 'Pendulum-v0':
        env = gym.make(env_name)
        env.spec.reward_threshold = -200.0  # target_reward
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(
            env, if_print)
    elif env_name == 'CarRacing-v0':
        from AgentPixel import fix_car_racing_v0
        env = gym.make(env_name)
        env = fix_car_racing_v0(env)
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(
            env, if_print)
        assert len(state_dim)
        # state_dim = (2, state_dim[0], state_dim[1])  # two consecutive frame (96, 96)
        state_dim = (1, state_dim[0], state_dim[1])  # one frame (96, 96)
    elif env_name == 'MultiWalker':
        from multiwalker_base import MultiWalkerEnv, multi_to_single_walker_decorator
        env = MultiWalkerEnv()
        env = multi_to_single_walker_decorator(env)

        state_dim = sum([box.shape[0] for box in env.observation_space])
        action_dim = sum([box.shape[0] for box in env.action_space])
        action_max = 1.0
        target_reward = 50
        is_discrete = False
    else:
        env = gym.make(env_name)
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(
            env, if_print)
    '''env normalization'''  # adjust action into [-1, +1] using action_max is necessary.
    avg = None
    std = None
    if if_norm:
        '''norm transfer
        x: old state dist
        y: new state dist
        a: mean
        s: std
        
        xs += 1e-5
        state_mean = xa * xs + ya
        std = xs * ys
        '''
        '''norm no need'''
        # if env_name == 'Pendulum-v0':
        #     state_mean = np.array([-0.00968592 -0.00118888 -0.00304381])
        #     std = np.array([0.53825575 0.54198545 0.8671749 ])
        '''norm could be'''
        if env_name == 'LunarLanderContinuous-v2':
            avg = np.array([
                -0.02058458, 0.24824196, -0.00663194, -0.08466694, 0.01788491,
                0.00145454, 0.4105835, 0.41815186
            ])
            std = np.array([
                0.2651723, 0.42812532, 0.18754327, 0.18728738, 0.14481373,
                0.09316564, 0.49195474, 0.49327046
            ])
        elif env_name == "BipedalWalker-v3":
            avg = np.array([
                0.15421079, -0.0019480261, 0.20461461, -0.010021029,
                -0.054185472, -0.0066469274, 0.043834914, -0.0623244,
                0.47021484, 0.55891204, -0.0014871443, -0.18538311,
                -0.032906517, 0.4628296, 0.34264696, 0.3465399, 0.3586852,
                0.3805626, 0.41525024, 0.46849185, 0.55162823, 0.68896055,
                0.88635695, 0.997974
            ])
            std = np.array([
                0.33242697, 0.04527563, 0.19229797, 0.0729273, 0.7084785,
                0.6366427, 0.54090905, 0.6944477, 0.49912727, 0.6371604,
                0.5867769, 0.56915027, 0.6196849, 0.49863166, 0.07042835,
                0.07128556, 0.073920645, 0.078663535, 0.08622651, 0.09801551,
                0.116571024, 0.14705327, 0.14093699, 0.019490194
            ])
        elif env_name == 'AntBulletEnv-v0':
            avg = np.array([
                -0.21634328, 0.08877027, 0.92127347, 0.19477099, 0.01834413,
                -0.00399973, 0.05166896, -0.06077103, 0.30839303, 0.00338527,
                0.0065377, 0.00814168, -0.08944025, -0.00331316, 0.29353178,
                -0.00634391, 0.1048052, -0.00327279, 0.52993906, -0.00569263,
                -0.14778288, -0.00101847, -0.2781167, 0.00479939, 0.64501953,
                0.8638916, 0.8486328, 0.7150879
            ])
            std = np.array([
                0.07903007, 0.35201055, 0.13954371, 0.21050458, 0.06752874,
                0.06185101, 0.15283841, 0.16655168, 0.6452229, 0.26257575,
                0.6666661, 0.14235465, 0.69359726, 0.32817268, 0.6647092,
                0.16925392, 0.6878494, 0.3009345, 0.6294114, 0.15175952,
                0.6949041, 0.27704775, 0.6775213, 0.18721068, 0.478522,
                0.3429141, 0.35841736, 0.45138636
            ])
        elif env_name == 'MinitaurBulletEnv-v0':  # todo error with norm
            avg = np.array([
                1.25116920e+00, 2.35373068e+00, 1.77717030e+00, 2.72379971e+00,
                2.27262020e+00, 1.12126017e+00, 2.80015516e+00, 1.72379172e+00,
                -4.53610346e-02, 2.10091516e-01, -1.20424433e-03,
                2.07291126e-01, 1.69130951e-01, -1.16945259e-01,
                1.06861845e-01, -5.53673357e-02, 2.81922913e+00,
                5.51327229e-01, 9.92989361e-01, -8.03717971e-01,
                7.90598467e-02, 2.99980807e+00, -1.27279997e+00,
                1.76894355e+00, 3.58282216e-02, 8.28480721e-02, 8.04320276e-02,
                9.86465216e-01
            ])
            std = np.array([
                2.0109391e-01, 3.5780826e-01, 2.2601920e-01, 5.0385582e-01,
                3.8282552e-01, 1.9690999e-01, 3.9662227e-01, 2.3809761e-01,
                8.9289074e+00, 1.4150095e+01, 1.0200104e+01, 1.1171419e+01,
                1.3293057e+01, 7.7480621e+00, 1.0750853e+01, 9.1990738e+00,
                2.7995987e+00, 2.9199743e+00, 2.3916528e+00, 2.6439502e+00,
                3.1360087e+00, 2.7837939e+00, 2.7758663e+00, 2.5578094e+00,
                4.1734818e-02, 3.2294787e-02, 7.8678936e-02, 1.0366816e-02
            ])
        '''norm necessary'''

    env = decorator__normalization(env, action_max, avg, std)
    return env, state_dim, action_dim, target_reward, is_discrete
Beispiel #5
0
def build_gym_env(env_name, if_print=True, if_norm=True):
    assert env_name is not None

    '''UserWarning: WARN: Box bound precision lowered by casting to float32
    https://stackoverflow.com/questions/60149105/
    userwarning-warn-box-bound-precision-lowered-by-casting-to-float32
    '''
    gym.logger.set_level(40)  # non-essential

    # env = gym.make(env_name)
    # state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(env, if_print)
    '''env compatibility'''  # some env need to adjust.
    if env_name == 'Pendulum-v0':
        env = gym.make(env_name)
        env.spec.reward_threshold = -200.0  # target_reward
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(env, if_print)
    elif env_name == 'CarRacing-v0':
        from AgentPixel import fix_car_racing_v0
        env = gym.make(env_name)
        env = fix_car_racing_v0(env)
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(env, if_print)
        assert len(state_dim)
        # state_dim = (2, state_dim[0], state_dim[1])  # two consecutive frame (96, 96)
        state_dim = (1, state_dim[0], state_dim[1])  # one frame (96, 96)
    elif env_name == 'MultiWalker':
        from multiwalker_base import MultiWalkerEnv, multi_to_single_walker_decorator
        env = MultiWalkerEnv()
        env = multi_to_single_walker_decorator(env)

        state_dim = sum([box.shape[0] for box in env.observation_space])
        action_dim = sum([box.shape[0] for box in env.action_space])
        action_max = 1.0
        target_reward = 50
        is_discrete = False
    else:
        env = gym.make(env_name)
        state_dim, action_dim, action_max, target_reward, is_discrete = get_env_info(env, if_print)

    '''env normalization'''  # adjust action into [-1, +1] using action_max is necessary.
    avg = None
    std = None
    if if_norm:
        '''norm no need'''
        # if env_name == 'Pendulum-v0':
        #     state_mean = np.array([-0.00968592 -0.00118888 -0.00304381])
        #     std = np.array([0.53825575 0.54198545 0.8671749 ])

        '''norm could be'''
        if env_name == 'LunarLanderContinuous-v2':
            avg = np.array([1.65470898e-02, -1.29684399e-01, 4.26883133e-03, -3.42124557e-02,
                            -7.39076972e-03, -7.67103031e-04, 1.12640885e+00, 1.12409466e+00])
            std = np.array([0.15094465, 0.29366297, 0.23490797, 0.25931464, 0.21603736,
                            0.25886878, 0.277233, 0.27771219])
        elif env_name == "BipedalWalker-v3":
            avg = np.array([1.42211734e-01, -2.74547996e-03, 1.65104509e-01, -1.33418152e-02,
                            -2.43243194e-01, -1.73886203e-02, 4.24114229e-02, -6.57800099e-02,
                            4.53460692e-01, 6.08022244e-01, -8.64884810e-04, -2.08789053e-01,
                            -2.92092949e-02, 5.04791247e-01, 3.33571745e-01, 3.37325723e-01,
                            3.49106580e-01, 3.70363115e-01, 4.04074671e-01, 4.55838055e-01,
                            5.36685407e-01, 6.70771701e-01, 8.80356865e-01, 9.97987386e-01])
            std = np.array([0.84419678, 0.06317835, 0.16532085, 0.09356959, 0.486594,
                            0.55477525, 0.44076614, 0.85030824, 0.29159821, 0.48093035,
                            0.50323634, 0.48110776, 0.69684234, 0.29161077, 0.06962932,
                            0.0705558, 0.07322677, 0.07793258, 0.08624322, 0.09846895,
                            0.11752805, 0.14116005, 0.13839757, 0.07760469])
        elif env_name == 'AntBulletEnv-v0':
            avg = np.array([
                0.4838, -0.047, 0.3500, 1.3028, -0.249, 0.0000, -0.281, 0.0573,
                -0.261, 0.0000, 0.0424, 0.0000, 0.2278, 0.0000, -0.072, 0.0000,
                0.0000, 0.0000, -0.175, 0.0000, -0.319, 0.0000, 0.1387, 0.0000,
                0.1949, 0.0000, -0.136, -0.060])
            std = np.array([
                0.0601, 0.2267, 0.0838, 0.2680, 0.1161, 0.0757, 0.1495, 0.1235,
                0.6733, 0.4326, 0.6723, 0.3422, 0.7444, 0.5129, 0.6561, 0.2732,
                0.6805, 0.4793, 0.5637, 0.2586, 0.5928, 0.3876, 0.6005, 0.2369,
                0.4858, 0.4227, 0.4428, 0.4831])
        # elif env_name == 'MinitaurBulletEnv-v0':
        #     avg = np.array([
        #         2.05600e-01, 4.50800e-01, 2.50000e-01, 4.70300e-01, 4.79900e-01,
        #         2.34300e-01, 6.88300e-01, 2.85100e-01, 7.61380e+00, 1.45432e+01,
        #         8.73960e+00, 1.07944e+01, 1.46323e+01, 1.01205e+01, 1.32922e+01,
        #         1.17750e+01, 2.78030e+00, 3.12870e+00, 2.61230e+00, 2.99330e+00,
        #         3.12270e+00, 2.88790e+00, 2.84310e+00, 2.47970e+00, 3.56000e-02,
        #         3.55000e-02, 4.56000e-02, 8.50000e-03])
        #     std = np.array([
        #         -0.8859, 3.269, 4.0869, 5.4315, 3.5354, 1.1756, 2.9034,
        #         1.8138, -3.3335, 41.9715, -0.2011, 27.8806, 27.3546, -5.9289,
        #         11.2396, -4.0288, 18.4165, 4.1793, 4.6188, -4.2912, 1.0208,
        #         20.4591, -9.4547, 10.7167, -8.1029, 17.3196, 2.4853, 74.2696])
        # elif env_name == "BipedalWalkerHardcore-v3":
        #     pass
        '''norm necessary'''

    env = decorator__normalization(env, action_max, avg, std)
    return env, state_dim, action_dim, target_reward, is_discrete