Example #1
0
def set_env():
    register(
        id='SnakeEnv-v0',
        entry_point='gym.envs.gym_snake:SingleSnake',
    )

    add_group(id='gym_snake', name='gym_snake', description='snake')

    add_task(id='SnakeEnv-v0',
             group='gym_snake',
             summary="Multi snakes environment")
Example #2
0
add_group(id='safety',
          name='Safety',
          description='Environments to test various AI safety properties.')

# classic control

add_task(
    id='CartPole-v0',
    group='classic_control',
    summary="Balance a pole on a cart.",
    description="""\
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
The system is controlled by applying a force of +1 or -1 to the cart.
The pendulum starts upright, and the goal is to prevent it from falling over.
A reward of +1 is provided for every timestep that the pole remains upright.
The episode ends when the pole is more than 15 degrees from vertical, or the
cart moves more than 2.4 units from the center.
""",
    background="""\
This environment corresponds to the version of the cart-pole problem described by
Barto, Sutton, and Anderson [Barto83]_.

.. [Barto83] AG Barto, RS Sutton and CW Anderson, "Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem", IEEE Transactions on Systems, Man, and Cybernetics, 1983.
""",
)

add_task(
    id='Acrobot-v0',
    group='classic_control',
    summary="Swing up a two-link robot.",
    description="""\
Example #3
0
    name='Safety',
    description='Environments to test various AI safety properties.'
)

# classic control

add_task(
    id='CartPole-v0',
    group='classic_control',
    summary="Balance a pole on a cart.",
    description="""\
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
The system is controlled by applying a force of +1 or -1 to the cart.
The pendulum starts upright, and the goal is to prevent it from falling over.
A reward of +1 is provided for every timestep that the pole remains upright.
The episode ends when the pole is more than 15 degrees from vertical, or the
cart moves more than 2.4 units from the center.
""",
    background="""\
This environment corresponds to the version of the cart-pole problem described by
Barto, Sutton, and Anderson [Barto83]_.

.. [Barto83] AG Barto, RS Sutton and CW Anderson, "Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem", IEEE Transactions on Systems, Man, and Cybernetics, 1983.
""",
)


add_task(
    id='Acrobot-v0',
    group='classic_control',
    summary="Swing up a two-link robot.",
Example #4
0
)

add_task(
    id='{}/meta-Doom-v0',
    group='doom',
    summary='Mission #1 to #9 - Beat all 9 Doom missions.',
    description="""
This is a meta map that combines all 9 Doom levels.

Levels:
    - #0 Doom Basic
    - #1 Doom Corridor
    - #2 Doom DefendCenter
    - #3 Doom DefendLine
    - #4 Doom HealthGathering
    - #5 Doom MyWayHome
    - #6 Doom PredictPosition
    - #7 Doom TakeCover
    - #8 Doom Deathmatch

Goal: 9,000 points
    - Pass all levels

Scoring:
    - Each level score has been standardized on a scale of 0 to 1,000
    - The passing score for a level is 990 (99th percentile)
    - A bonus of 450 (50 * 9 levels) is given if all levels are passed
    - The score for a level is the average of the last 3 tries
"""
)

add_task(
Example #5
0
add_group(id='doom',
          name='Doom',
          description='Doom environments based on VizDoom.')

# classic control

add_task(
    id='CartPole-v0',
    group='classic_control',
    summary="Balance a pole on a cart.",
    description="""\
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
The system is controlled by applying a force of +1 or -1 to the cart.
The pendulum starts upright, and the goal is to prevent it from falling over.
A reward of +1 is provided for every timestep that the pole remains upright.
The episode ends when the pole is more than 15 degrees from vertical, or the
cart moves more than 2.4 units from the center.
""",
    background="""\
This environment corresponds to the version of the cart-pole problem described by
Barto, Sutton, and Anderson [Barto83]_.

.. [Barto83] AG Barto, RS Sutton and CW Anderson, "Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem", IEEE Transactions on Systems, Man, and Cybernetics, 1983.
""",
)

add_task(
    id='Acrobot-v0',
    group='classic_control',
    summary="Swing up a two-link robot.",
    description="""\
Example #6
0
    id='LolAcrobot-v1',
    entry_point='{}_gym_test:AcrobotEnv'.format(USERNAME),
    timestep_limit=500,
)

# Scoreboard registration
# ==========================
add_task(
    id='{}/CartPole-v0'.format(USERNAME),
    group='classic_control',
    summary="Balance a pole on a cart (for a short time).",
    description="""\
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
The system is controlled by applying a force of +1 or -1 to the cart.
The pendulum starts upright, and the goal is to prevent it from falling over.
A reward of +1 is provided for every timestep that the pole remains upright.
The episode ends when the pole is more than 15 degrees from vertical, or the
cart moves more than 2.4 units from the center.
""",
    background="""\
This environment corresponds to the version of the cart-pole problem described by
Barto, Sutton, and Anderson [Barto83]_.
.. [Barto83] AG Barto, RS Sutton and CW Anderson, "Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem", IEEE Transactions on Systems, Man, and Cybernetics, 1983.
""",
)

add_task(
    id='{}/CartPole-v1'.format(USERNAME),
    group='classic_control',
    summary="Balance a pole on a cart.",
    description="""\
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
Example #7
0
          name='Doom',
          description='Doom environments based on VizDoom.')

add_task(id='{}/meta-Doom-v0'.format(USERNAME),
         group='doom',
         summary='Mission #1 to #9 - Beat all 9 Doom missions.',
         description="""
This is a meta map that combines all 9 Doom levels.

Levels:
    - #0 Doom Basic
    - #1 Doom Corridor
    - #2 Doom DefendCenter
    - #3 Doom DefendLine
    - #4 Doom HealthGathering
    - #5 Doom MyWayHome
    - #6 Doom PredictPosition
    - #7 Doom TakeCover
    - #8 Doom Deathmatch

Goal: 9,000 points
    - Pass all levels

Scoring:
    - Each level score has been standardized on a scale of 0 to 1,000
    - The passing score for a level is 990 (99th percentile)
    - A bonus of 450 (50 * 9 levels) is given if all levels are passed
    - The score for a level is the average of the last 3 tries
""")

add_task(id='{}/DoomBasic-v0'.format(USERNAME),
         group='doom',
Example #8
0
    name='Doom',
    description='Doom environments based on VizDoom.'
)

# classic control

add_task(
    id='CartPole-v0',
    group='classic_control',
    summary="Balance a pole on a cart.",
    description="""\
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
The system is controlled by applying a force of +1 or -1 to the cart.
The pendulum starts upright, and the goal is to prevent it from falling over.
A reward of +1 is provided for every timestep that the pole remains upright.
The episode ends when the pole is more than 15 degrees from vertical, or the
cart moves more than 2.4 units from the center.
""",
    background="""\
This environment corresponds to the version of the cart-pole problem described by
Barto, Sutton, and Anderson [Barto83]_.

.. [Barto83] AG Barto, RS Sutton and CW Anderson, "Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem", IEEE Transactions on Systems, Man, and Cybernetics, 1983.
""",
)


add_task(
    id='Acrobot-v0',
    group='classic_control',
    summary="Swing up a two-link robot.",
# change api_base / web_base.)
'''api_key = os.environ.get('OPENAI_GYM_API_KEY')
api_base = os.environ.get('OPENAI_GYM_API_BASE', 'https://gym-api.openai.com')
web_base = os.environ.get('OPENAI_GYM_WEB_BASE', 'https://gym.openai.com')'''

# The following controls how various tasks appear on the
# scoreboard. These registrations can differ from what's registered in
# this repository.

# groups

add_group(id='gym-vehicle', name='gym-vehicle', description='TODO.')

add_task(
    id='GazeboCircuitLargeCatvehicleLidar-v0',
    group='gym-vehicle',
    summary='Test1.',
)

add_task(
    id='GazeboCircuitLargeCatvehicleLidarNn-v0',
    group='gym-vehicle',
    summary='Test2.',
)

add_task(
    id='GazeboTrackCatvehicleLidar-v0',
    group='gym-vehicle',
    summary='Test3.',
)
Example #10
0
from gym.envs.registration import register
from gym.scoreboard.registration import add_group
from gym.scoreboard.registration import add_task

register(
    id='SuperMarioBros-1-1-v0',
    entry_point='gym.envs.ppaquette_gym_super_mario:MetaSuperMarioBrosEnv',
)

add_group(id='ppaquette_gym_super_mario',
          name='ppaquette_gym_super_mario',
          description='super_mario')

add_task(id='SuperMarioBros-1-1-v0',
         group='ppaquette_gym_super_mario',
         summary="SuperMarioBros-1-1-v0")

import numpy as np
import tensorflow as tf
import random
from collections import deque

from gym import wrappers
import tensorflow.contrib.layers as layers

env = gym.make('ppaquette/SuperMarioBros-1-1-v0')

frame_history_len = 4
# Constants defining our neural network
#input_size = env.observation_space.shape[0]*env.observation_space.shape[1]*3        #####change input_size - 224*256*3 acquired from ppaquette_gym_super_mario/nes_env.py
Example #11
0

add_group(
    id='bandits',
    name='Bandits',
    description='Various N-Armed Bandit environments'
)

add_task(
    id='BanditTwoArmedDeterministicFixed-v0',
    group='bandits',
    experimental=True,
    contributor='jkcooper2',
    summary="Simplest bandit where one action always pays, and the other never does.",
    description="""
    Each bandit takes in a probability distribution, which is the likelihood of the action paying out,
    and a reward distribution, which is the value or distribution of what the agent will be rewarded
    the bandit does payout.

    p_dist = [1, 0]
    r_dist = [1, 1]
    """,
    background=""
)

add_task(
    id='BanditTwoArmedHighHighFixed-v0',
    group='bandits',
    experimental=True,
    contributor='jkcooper2',
    summary="Stochastic version with a small difference between which bandit pays where both are likely",
    description="""
Example #12
0
# Env registration
# ==========================
envs = [
    'BanditTenArmedRandomFixed', 'BanditTenArmedRandomRandom',
    'BanditTenArmedRandomStochastic', 'BanditTwoArmedDeterministicFixed',
    'BanditTwoArmedHighHighFixed', 'BanditTwoArmedHighLowFixed',
    'BanditTwoArmedHighLowFixedNegative', 'BanditTwoArmedLowLowFixed'
]

for env in envs:
    register(
        id='{}/{}-v0'.format(USERNAME, env),
        entry_point='{}_gym_bandits:{}'.format(USERNAME, env),
        timestep_limit=1,
        nondeterministic=True,
    )

# Scoreboard registration
# ==========================
add_group(id='bandits',
          name='Bandits',
          description='Various N-Armed Bandit environments')

for env in envs:
    add_task(
        id='{}/{}-v0'.format(USERNAME, env),
        group='bandits',
        summary='{}'.format(env),
    )
Example #13
0
# change api_base / web_base.)
'''api_key = os.environ.get('OPENAI_GYM_API_KEY')
api_base = os.environ.get('OPENAI_GYM_API_BASE', 'https://gym-api.openai.com')
web_base = os.environ.get('OPENAI_GYM_WEB_BASE', 'https://gym.openai.com')'''

# The following controls how various tasks appear on the
# scoreboard. These registrations can differ from what's registered in
# this repository.

# groups

add_group(id='gazebo', name='Gazebo', description='TODO.')

add_task(
    id='GazeboMazeTurtlebotLidar-v0',
    group='gazebo',
    summary='Obstacle avoidance in a Maze.',
)
add_task(
    id='GazeboCircuitTurtlebotLidar-v0',
    group='gazebo',
    summary='Obstacle avoidance in a Circuit.',
)
add_task(
    id='GazeboCircuit2TurtlebotLidar-v0',
    group='gazebo',
    summary='Obstacle avoidance in a Circuit 2.',
)
add_task(
    id='GazeboCircuit2TurtlebotLidarNn-v0',
    group='gazebo',
Example #14
0
# change api_base / web_base.)
'''api_key = os.environ.get('OPENAI_GYM_API_KEY')
api_base = os.environ.get('OPENAI_GYM_API_BASE', 'https://gym-api.openai.com')
web_base = os.environ.get('OPENAI_GYM_WEB_BASE', 'https://gym.openai.com')'''

# The following controls how various tasks appear on the
# scoreboard. These registrations can differ from what's registered in
# this repository.

# groups

add_group(id='gazebo', name='Gazebo', description='TODO.')

add_task(
    id='QuadCopter-v0',
    group='gazebo',
    summary='Obstacle avoidance in a Maze.',
)

add_task(
    id='GazeboMazeTurtlebotLidar-v0',
    group='gazebo',
    summary='Obstacle avoidance in a Maze.',
)
add_task(
    id='GazeboCircuitTurtlebotLidar-v0',
    group='gazebo',
    summary='Obstacle avoidance in a Circuit.',
)
add_task(
    id='GazeboCircuit2TurtlebotLidar-v0',
Example #15
0
)

register(
    id='vladfi1/SSBM-headless-v0',
    entry_point='dolphin:simpleSSBMEnv',
    reward_threshold=1,
    timestep_limit=9999999,
    kwargs=dict(
      cpu=9,
      stage='battlefield',
    ),
    nondeterministic=True,
)

# Scoreboard registration
# ==========================
add_group(
    id= 'ssbm',
    name= 'Super Smash Bros. Melee',
    description= 'Beat the in-game AIs at SSBM.'
)

"""
add_task(
    id='{}/meta-SuperMarioBros-v0'.format(USERNAME),
    group='ssbm',
    summary='Compilation of all 32 levels of Super Mario Bros. on Nintendo platform - Screen version.',
)
"""

Example #16
0
          description='Minecraft environments based on Malmo.')

add_task(id='MinecraftDefaultWorld1-v0',
         group='minecraft',
         summary='Survive and find gold, diamond or redstone!',
         description="""
The agent appears in a default Minecraft world, with all possible objects.
The agent appears at x="-204" y="81" z="217", which depending on the world
that is generated means that it's going to fall initially to touch ground.

Goal:
The task instance is considered complete if the agent finds (mines) any
special block (any of "gold_block diamond_block redstone_block").

Rewards:
Sparse rewards. Negative if dead (-10000), positive if the target block is
touched (+1000) and negative if the time is over (-1000).

End:
Task ends if the timeLimitMs=300000 (300 sec.) is reached or agent killed.

Observability:
Partial observability. Using VideoProducer (480x320).

Actions:
Possible actions given by "ContinuousMovementCommand" included in "survival
mode". Inventory empty initially. So actions are "move", "strafe", "pitch",
"turn", "jump", "crouch", "attack", "use" and "hotbar.X". Action "drop" not
included.
""")

add_task(id='MinecraftDefaultFlat1-v0',
Example #17
0
                kwargs={ 'draw_tiles': draw_tiles, 'level': level },
                # Seems to be non-deterministic about 5% of the time
                nondeterministic=True,
            )

    # Scoreboard registration
    # ==========================
    add_group(
        id= 'super-mario',
        name= 'SuperMario',
        description= '32 levels of the original Super Mario Bros game.'
    )

    add_task(
        id='{}/meta-SuperMarioBros-v0'.format(USERNAME),
        group='super-mario',
        summary='Compilation of all 32 levels of Super Mario Bros. on Nintendo platform - Screen version.',
    )
    add_task(
        id='{}/meta-SuperMarioBros-Tiles-v0'.format(USERNAME),
        group='super-mario',
        summary='Compilation of all 32 levels of Super Mario Bros. on Nintendo platform - Tiles version.',
    )

    for world in range(8):
        for level in range(4):
            add_task(
                id='{}/SuperMarioBros-{}-{}-v0'.format(USERNAME, world + 1, level + 1),
                group='super-mario',
                summary='Level: {}-{} of Super Mario Bros. on Nintendo platform - Screen version.'.format(world + 1, level + 1),
            )
Example #18
0
# change api_base / web_base.)
'''api_key = os.environ.get('OPENAI_GYM_API_KEY')
api_base = os.environ.get('OPENAI_GYM_API_BASE', 'https://gym-api.openai.com')
web_base = os.environ.get('OPENAI_GYM_WEB_BASE', 'https://gym.openai.com')'''

# The following controls how various tasks appear on the
# scoreboard. These registrations can differ from what's registered in
# this repository.

# groups

add_group(id='gym-vehicle', name='gym-vehicle', description='TODO.')

add_task(
    id='GazeboCircuitLargeCatvehicleLidar-v0',
    group='gym-vehicle',
    summary='Test1.',
)

add_task(
    id='GazeboCircuitLargeCatvehicleLidarNn-v0',
    group='gym-vehicle',
    summary='Test2.',
)

add_task(
    id='GazeboTrackCatvehicleLidar-v0',
    group='gym-vehicle',
    summary='Test3.',
)
Example #19
0
from gym.scoreboard.registration import add_task, add_group

add_group(id='bandits',
          name='Bandits',
          description='Various multi-armed Bandit environments')

add_task(
    id='multi_arm_bandit_gaussian_fixed-v0',
    group='bandits',
    experimental=True,
    contributor='bardofcodes',
    summary=
    "multi-armed bandit mentioned with reward based on a Gaussian distribution",
    description="""
    Each bandit gives a fixed reward,'r', where 'r' is sampled from a Gaussian Distribution(5,2)
    """,
    background="")

add_task(
    id='multi_arm_bandit_gaussian_gaussian-v0',
    group='bandits',
    experimental=True,
    contributor='bardofcodes',
    summary=
    "multi-armed bandit with each bandit having a normal distribution for reward distribution",
    description="""
     Each bandit has a N(r,1) reward distribution, where 'r' is sampled from a Normal(5,2) distribution.
    """,
    background="")

add_task(
Example #20
0
from gym.scoreboard.registration import add_task, add_group

add_group(id='bandits',
          name='Bandits',
          description='Various N-Armed Bandit environments')

add_task(
    id='BanditTwoArmedDeterministicFixed-v0',
    group='bandits',
    experimental=True,
    contributor='jkcooper2',
    summary=
    "Simplest bandit where one action always pays, and the other never does.",
    description="""
    Each bandit takes in a probability distribution, which is the likelihood of the action paying out,
    and a reward distribution, which is the value or distribution of what the agent will be rewarded
    the bandit does payout.

    p_dist = [1, 0]
    r_dist = [1, 1]
    """,
    background="")

add_task(
    id='BanditTwoArmedHighHighFixed-v0',
    group='bandits',
    experimental=True,
    contributor='jkcooper2',
    summary=
    "Stochastic version with a small difference between which bandit pays where both are likely",
    description="""
Example #21
0
    name='Toy text',
    description='Simple text environments to get you started.'
)

# classic control

add_task(
    id='CartPole-v0',
    group='classic_control',
    summary="Balance a pole on a cart.",
    description="""\
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
The system is controlled by applying a force of +1 or -1 to the cart.
The pendulum starts upright, and the goal is to prevent it from falling over.
A reward of +1 is provided for every timestep that the pole remains upright.
The episode ends when the pole is more than 15 degrees from vertical, or the
cart moves more than 2.4 units from the center.
""",
    background="""\
This environment corresponds to the version of the cart-pole problem described by
Barto, Sutton, and Anderson [Barto83]_.

.. [Barto83] AG Barto, RS Sutton and CW Anderson, "Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem", IEEE Transactions on Systems, Man, and Cybernetics, 1983.
""",
)


add_task(
    id='Acrobot-v0',
    group='classic_control',
    summary="Swing up a two-link robot.",
Example #22
0
# change api_base / web_base.)
'''api_key = os.environ.get('OPENAI_GYM_API_KEY')
api_base = os.environ.get('OPENAI_GYM_API_BASE', 'https://gym-api.openai.com')
web_base = os.environ.get('OPENAI_GYM_WEB_BASE', 'https://gym.openai.com')'''

# The following controls how various tasks appear on the
# scoreboard. These registrations can differ from what's registered in
# this repository.

# groups

add_group(id='gazebo', name='Gazebo', description='TODO.')

add_task(
    id='Circuit2TurtlebotLidar-v0',
    group='gazebo',
    summary='Obstacle avoidance in a Circuit 2.',
)

add_task(
    id='Circuit2cTurtlebotCameraNn-v0',
    group='gazebo',
    summary='Obstacle avoidance in a Circuit 2.c',
)

add_task(
    id='SimplemazeTurtlebotCameraNn-v0',
    group='gazebo',
    summary='Obstacle avoidance in a Simplemaze',
)
Example #23
0
import os

from gym.scoreboard.registration import registry, add_task, add_group

add_group(id='gazebo', name='Gazebo', description='TODO.')

add_task(
    id='test-v0',
    group='gazebo',
    summary='Obstacle avoidance in a Circuit.',
)

registry.finalize()
Example #24
0
from gym.scoreboard.registration import add_task, add_group

add_group(id='SpaceFortress',
          name='SpaceFortress',
          description='SpaceFortress games')

add_task(id='SpaceFortress-explode-image-v0',
         summary="2D frictionless space shooter (explode, image)",
         group='SpaceFortress',
         contributor='Ryan M. Hope')

add_task(id='SpaceFortress-autoturn-image-v0',
         summary="2D frictionless space shooter (autoturn, image)",
         group='SpaceFortress',
         contributor='Ryan M. Hope')

add_task(id='SpaceFortress-explode-features-v0',
         summary="2D frictionless space shooter (explode, features)",
         group='SpaceFortress',
         contributor='Ryan M. Hope')

add_task(id='SpaceFortress-autoturn-features-v0',
         summary="2D frictionless space shooter (autoturn, features)",
         group='SpaceFortress',
         contributor='Ryan M. Hope')
Example #25
0
from gym.envs.registration import register
from gym.scoreboard.registration import add_task, add_group
from .package_info import USERNAME

# Environment registration

for d in [4, 8, 16, 32, 64]:
    register(
        id='{}/gridworld-{}x{}'.format(USERNAME, d, d),
        entry_point='{}_gym_gridworld:GridWorldEnv'.format(USERNAME),
        max_episode_steps=1000,
        reward_threshold=9000.0,
        kwargs={'dimension': d},
    )

# Scoreboard registration
# ==========================
add_group(id='gridworld',
          name='GridWorld',
          description='Sutton & Barto classic Gridworld environment.')

for d in [4, 8, 16, 32, 64]:
    add_task(id='{}/GridWorld-{}x{}'.format(USERNAME, d, d),
             group='gridworld',
             summary='Sutton and Barto Grid world, {}x{}.'.format(d),
             description="""
        """)
Example #26
0
from gym.scoreboard.registration import registry, add_task, add_group

add_group(
    id='gazebo',
    name='Gazebo',
    description='TODO.'
)

add_task(
    id = 'GazeboQuadEnv-v0',
    group = 'gazebo',
    summary = 'hover',
)

registry.finalize()