Beispiel #1
0
    def __init__(self,
                 world,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None,
                 shared_viewer=True,
                 discrete_action_space=True):

        self.world = world
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        self.discrete_action_space = discrete_action_space
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(
            world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = False
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
            else:
                u_action_space = spaces.Box(low=-agent.u_range,
                                            high=+agent.u_range,
                                            shape=(world.dim_p, ))
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ))
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    act_space = spaces.MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            self.observation_space.append(
                spaces.Box(
                    low=-np.inf,
                    high=+np.inf,
                    shape=(obs_dim),
                ))
            agent.action.c = np.zeros(self.world.dim_c)

        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n
        self._reset_render()
Beispiel #2
0
    def __init__(
        self,
        env: Union[EnvDataset, PolicyEnv] = None,
        dataset: Union[EnvDataset, PolicyEnv] = None,
        batch_size: int = None,
        num_workers: int = None,
        **kwargs,
    ):
        assert not (
            env is None and dataset is None
        ), "One of the `dataset` or `env` arguments must be passed."
        assert not (
            env is not None and dataset is not None
        ), "Only one of the `dataset` and `env` arguments can be used."

        if not isinstance(env, IterableDataset):
            raise RuntimeError(
                f"The env {env} isn't an interable dataset! (You can use the "
                f"EnvDataset or PolicyEnv wrappers to make an IterableDataset "
                f"from a gym environment.")

        if isinstance(env.unwrapped, VectorEnv):
            if batch_size is not None and batch_size != env.num_envs:
                logger.warning(
                    UserWarning(
                        f"The provided batch size {batch_size} will be ignored, since "
                        f"the provided env is vectorized with a batch_size of "
                        f"{env.unwrapped.num_envs}."))
            batch_size = env.num_envs

        if isinstance(env.unwrapped, BatchedVectorEnv):
            num_workers = env.n_workers
        elif isinstance(env.unwrapped, AsyncVectorEnv):
            num_workers = env.num_envs
        else:
            num_workers = 0

        self.env = env
        # NOTE: The batch_size and num_workers attributes reflect the values from the
        # iterator (the VectorEnv), not those of the dataloader.
        # This is done in order to avoid pytorch workers being ever created, and also so
        # that pytorch-lightning stops warning us that the num_workers is too low.
        self._batch_size = batch_size
        self._num_workers = num_workers
        super().__init__(
            dataset=self.env,
            # The batch size is None, because the VecEnv takes care of
            # doing the batching for us.
            batch_size=None,
            num_workers=0,
            collate_fn=None,
            **kwargs,
        )
        Wrapper.__init__(self, env=self.env)
        assert not isinstance(
            self.env, GymDataLoader), "Something very wrong is happening."

        # self.max_epochs: int = max_epochs
        self.observation_space: gym.Space = self.env.observation_space
        self.action_space: gym.Space = self.env.action_space
        self.reward_space: gym.Space
        if isinstance(env.unwrapped, VectorEnv):
            env: VectorEnv
            batch_size = env.num_envs
            # TODO: Overwriting the action space to be the 'batched' version of
            # the single action space, rather than a Tuple(Discrete, ...) as is
            # done in the gym.vector.VectorEnv.
            self.action_space = batch_space(env.single_action_space,
                                            batch_size)

        if not hasattr(self.env, "reward_space"):
            self.reward_space = spaces.Box(
                low=self.env.reward_range[0],
                high=self.env.reward_range[1],
                shape=(),
            )
            if isinstance(self.env.unwrapped, VectorEnv):
                # Same here, we use a 'batched' space rather than Tuple.
                self.reward_space = batch_space(self.reward_space, batch_size)
 def __init__(self, env):
     gym.Wrapper.__init__(self, env)
     self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1))
Beispiel #4
0
world-map
0: stone
1: wood
2: house
3: water
4: stone (available ??)
5: wood (available ??)
6: wall

world-idx_map
0: house (mine: 1, others: player_idx + 2)
1: player's location (mine: 1, others: player_idx + 2)
'''
OBS_SPACE_AGENT = spaces.Dict({
    'world-map':
    spaces.Box(0, 1, shape=(7, 11, 11)),
    'world-idx_map':
    spaces.Box(0, 5, shape=(2, 11, 11)),
    'world-loc-row':
    spaces.Box(0, 1, shape=(1, )),
    'world-loc-col':
    spaces.Box(0, 1, shape=(1, )),
    'world-inventory-Coin':
    spaces.Box(0, np.inf, shape=(1, )),
    'world-inventory-Stone':
    spaces.Box(0, np.inf, shape=(1, )),
    'world-inventory-Wood':
    spaces.Box(0, np.inf, shape=(1, )),
    'time':
    spaces.Box(0, 1, shape=(1, 1)),
    'Build-build_payment':
Beispiel #5
0
    def __init__(self, level, **kwargs):
        """
        Base class for Gym interface for ViZDoom. Child classes are defined in vizdoom_env_definitions.py,
        that contain the level parameter and pass through any kwargs from gym.make()
        :param level: index of level in the CONFIGS list above
        :param kwargs: keyword arguments from gym.make(env_name_string, **kwargs) call. 'depth' will render the
        depth buffer and 'labels' will render the object labels and return it in the observation.
        Note that the observation will be a list with the screen buffer as the first element. If no kwargs are
        provided (or depth=False and labels=False) the observation will be of type np.ndarray.
        """

        # parse keyword arguments
        self.depth = kwargs.get("depth", False)
        self.labels = kwargs.get("labels", False)
        self.position = kwargs.get("position", False)
        self.health = kwargs.get("health", False)

        # init game
        self.game = vzd.DoomGame()
        self.game.set_screen_resolution(vzd.ScreenResolution.RES_640X480)
        scenarios_dir = os.path.join(os.path.dirname(__file__), "scenarios")
        self.game.load_config(os.path.join(scenarios_dir, CONFIGS[level][0]))
        self.game.set_window_visible(False)
        self.game.set_depth_buffer_enabled(self.depth)
        self.game.set_labels_buffer_enabled(self.labels)
        self.game.clear_available_game_variables()
        if self.position:
            self.game.add_available_game_variable(vzd.GameVariable.POSITION_X)
            self.game.add_available_game_variable(vzd.GameVariable.POSITION_Y)
            self.game.add_available_game_variable(vzd.GameVariable.POSITION_Z)
            self.game.add_available_game_variable(vzd.GameVariable.ANGLE)
        if self.health:
            self.game.add_available_game_variable(vzd.GameVariable.HEALTH)
        self.game.init()
        self.state = None
        self.viewer = None

        self.action_space = spaces.Discrete(CONFIGS[level][1])

        # specify observation space(s)
        list_spaces: List[gym.Space] = [
            spaces.Box(
                0,
                255,
                (
                    self.game.get_screen_height(),
                    self.game.get_screen_width(),
                    self.game.get_screen_channels(),
                ),
                dtype=np.uint8,
            )
        ]
        if self.depth:
            list_spaces.append(
                spaces.Box(
                    0,
                    255,
                    (
                        self.game.get_screen_height(),
                        self.game.get_screen_width(),
                    ),
                    dtype=np.uint8,
                ))
        if self.labels:
            list_spaces.append(
                spaces.Box(
                    0,
                    255,
                    (
                        self.game.get_screen_height(),
                        self.game.get_screen_width(),
                    ),
                    dtype=np.uint8,
                ))
        if self.position:
            list_spaces.append(spaces.Box(-np.Inf, np.Inf, (4, 1)))
        if self.health:
            list_spaces.append(spaces.Box(0, np.Inf, (1, 1)))
        if len(list_spaces) == 1:
            self.observation_space = list_spaces[0]
        else:
            self.observation_space = spaces.Tuple(list_spaces)
Beispiel #6
0
    def reset(self):
        # reset the layout
        layout = np.random.choice(['4rooms', '3rooms', '3roomsh', 'open'])
        if (layout == '4rooms'):
            layout = """\
wwwwwwwwwwwww
w     w     w
w     w     w
w           w
w     w     w
w     w     w
ww wwww     w
w     www www
w     w     w
w     w     w
w           w
w     w     w
wwwwwwwwwwwww
"""
        elif (layout == '3rooms'):
            layout = """\
wwwwwwwwwwwww
w   w   w   w
w   w       w
w   w   w   w
w   w   w   w
w   w   w   w
w   w   w   w
w   w   w   w
w   w   w   w
w       w   w
w   w   w   w
w   w   w   w
wwwwwwwwwwwww
"""
        elif (layout == '3roomsh'):
            layout = """\
wwwwwwwwwwwww
w           w
w           w
wwwwwwwww www
w           w
w           w
w           w
w           w
ww wwwwwwwwww
w           w
w           w
w           w
wwwwwwwwwwwww
"""
        elif (layout == 'open'):
            layout = """\
wwwwwwwwwwwww
w           w
w           w
w           w
w           w
w           w
w           w
w           w
w           w
w           w
w           w
w           w
wwwwwwwwwwwww
"""
        else:
            raise

        # reset the occupancy array, the obs space and the observation space
        self.occupancy = np.array([
            list(map(lambda c: 1 if c == 'w' else 0, line))
            for line in layout.splitlines()
        ])
        self.obs_space = np.zeros(np.sum(self.occupancy == 0))
        if (config <= 1):
            self.observation_space = spaces.Box(
                low=np.zeros(np.sum(self.occupancy == 0)),
                high=np.ones(np.sum(self.occupancy == 0)),
                dtype=np.uint8)
        elif (config == 2):
            self.observation_space = spaces.Box(low=np.zeros([4, 169]),
                                                high=np.ones([4, 169]),
                                                dtype=np.uint8)

        # a dictionary that can convert state index (scalar) to state cell location (2-dimension vector)
        self.tostate = {}
        statenum = 0
        for i in range(13):
            for j in range(13):
                # first entry is the y index (vertival)
                # second entry is the x index (horizontal)
                if self.occupancy[i, j] == 0:
                    self.tostate[(i, j)] = statenum
                    statenum += 1
        self.tocell = {v: k for k, v in self.tostate.items()}

        # get a list of all available states
        self.area = list(range(self.obs_space.shape[0]))
        '''
        The following is the normal reset procedure, the same as in the regular fourrooms_collect
        '''

        # clear the update count
        self.update = 0

        # first put objects to the env (each object is at different places)
        self.objects = {}
        for _ in range(self.num_goals):
            while True:
                new_pos = self.random_pos()
                if new_pos not in self.objects:
                    self.objects[new_pos] = np.random.multinomial(
                        1, self.object_priors)
                    break

        # Then we put the agent to the env
        self.agent_pos = self.random_pos()
        # if the agent is put onto an object, re-locate the agent
        while self.agent_pos in self.objects:
            self.agent_pos = self.random_pos()

        # now we have already put the agent and the objects into the env properly

        return self.observation()
Beispiel #7
0
 def __init__(self, env=None):
     super(NormalizeWrapper, self).__init__(env)
     self.obs_lo = self.observation_space.low[0, 0, 0]
     self.obs_hi = self.observation_space.high[0, 0, 0]
     obs_shape = self.observation_space.shape
     self.observation_space = spaces.Box(0.0, 1.0, obs_shape, dtype=np.float32)
Beispiel #8
0
from ray.rllib.agents.pg import PGTrainer
from ray.rllib.agents.pg.pg_policy_graph import PGPolicyGraph
from ray.rllib.env import MultiAgentEnv
from ray.rllib.env.base_env import BaseEnv
from ray.rllib.env.vector_env import VectorEnv
from ray.rllib.models import ModelCatalog
from ray.rllib.models.model import Model
from ray.rllib.models.pytorch.fcnet import FullyConnectedNetwork
from ray.rllib.models.pytorch.model import TorchModel
from ray.rllib.rollout import rollout
from ray.rllib.tests.test_external_env import SimpleServing
from ray.tune.registry import register_env

DICT_SPACE = spaces.Dict({
    "sensors": spaces.Dict({
        "position": spaces.Box(low=-100, high=100, shape=(3, )),
        "velocity": spaces.Box(low=-1, high=1, shape=(3, )),
        "front_cam": spaces.Tuple(
            (spaces.Box(low=0, high=1, shape=(10, 10, 3)),
             spaces.Box(low=0, high=1, shape=(10, 10, 3)))),
        "rear_cam": spaces.Box(low=0, high=1, shape=(10, 10, 3)),
    }),
    "inner_state": spaces.Dict({
        "charge": spaces.Discrete(100),
        "job_status": spaces.Dict({
            "task": spaces.Discrete(5),
            "progress": spaces.Box(low=0, high=100, shape=()),
        })
    })
})
Beispiel #9
0
    def __init__(self, layout_path="layout.yml"):
        # Define action and observation space
        super(WarehouseEnv, self).__init__()
        # They must be gym.spaces objects    # Example when using discrete actions:
        # Example for using image as input:

        self.actions = [MOVE_LEFT, MOVE_DOWN, MOVE_RIGHT, MOVE_UP]

        with open(layout_path, "r") as f:
            try:
                self.layout = yaml.safe_load(f)
            except yaml.YAMLError as exc:
                print(exc)

        self.max_items_in_env = self.layout["bin-slot-size"] * len(
            self.layout["bins"])

        # actions are zero based so this is ok
        self.load_actions = [
            len(self.actions) + i for i in range(self.max_items_in_env)
        ]
        self.unload_actions = [
            len(self.actions) + len(self.load_actions) + i
            for i in range(self.max_items_in_env)
        ]

        self.actions += self.load_actions + self.unload_actions

        assert self.max_items_in_env * 2 + 4 == len(self.actions)

        self.action_space = spaces.Discrete(len(self.actions))

        self.n_channels = 3 + self.max_items_in_env * 2
        self.shape = (self.layout["height"], self.layout["width"],
                      self.n_channels)
        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=self.shape,
                                            dtype=np.uint8)

        self.bins = self._create_bins(self.layout["bins"],
                                      self.layout["bin-slot-size"])
        self.staging_in = StagingIn(self.layout["staging-in"]["position"],
                                    self.layout["staging-in"]["loading"])
        self.staging_out = StagingOut(
            self.layout["staging-out"]["position"],
            self.layout["staging-out"]["loading"],
        )

        self.blocked_positions = []
        for b in self.bins:
            self.blocked_positions.append(b.pos)
        self.blocked_positions.append(self.staging_in.pos)
        self.blocked_positions.append(self.staging_out.pos)

        self.agent = Agent(
            self.layout["agent-start"]["position"],
            self.layout["height"],
            self.layout["width"],
            self.layout["bin-slot-size"],
            self.blocked_positions,
        )

        self.transaction = None

        self.item_counter = 0
        self.invalid_action_counter = 0

        self.gui = WarehouseGui([self.layout["height"], self.layout["width"]],
                                self.max_items_in_env)
    def __init__(self):
        """
        Initialise the environment
        """

        self.random_goal = True
        self.goal_oriented = True
        self.obs_type = 1
        self.reward_type = 1
        self.action_type = 1

        # Define action space
        self.action_space = spaces.Box(
            low=np.float32(
                np.array([-0.5, -0.25, -0.25, -0.25, -0.5, -0.005]) / 30),
            high=np.float32(
                np.array([0.5, 0.25, 0.25, 0.25, 0.5, 0.005]) / 30),
            dtype=np.float32)

        # Define observation space
        if self.obs_type == 1:
            self.obs_space_low = np.float32(
                np.concatenate((MIN_END_EFF_COORDS, JOINT_MIN), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate((MAX_END_EFF_COORDS, JOINT_MAX), axis=0))

        elif self.obs_type == 2:
            self.obs_space_low = np.float32(
                np.concatenate((MIN_GOAL_COORDS, JOINT_MIN), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate((MAX_GOAL_COORDS, JOINT_MAX), axis=0))

        elif self.obs_type == 3:
            self.obs_space_low = np.float32(
                np.concatenate(([-1.0] * 6, JOINT_MIN), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate(([1.0] * 6, JOINT_MAX), axis=0))

        elif self.obs_type == 4:
            self.obs_space_low = np.float32(
                np.concatenate(([-1.0] * 3, JOINT_MIN), axis=0))
            self.obs_space_high = np.float32(
                np.concatenate(([1.0] * 3, JOINT_MAX), axis=0))

        elif self.obs_type == 5:
            self.obs_space_low = np.float32(
                np.concatenate(([-1.0] * 6, MIN_GOAL_COORDS, JOINT_MIN),
                               axis=0))
            self.obs_space_high = np.float32(
                np.concatenate(([1.0] * 6, MAX_GOAL_COORDS, JOINT_MAX),
                               axis=0))

        self.observation_space = spaces.Box(low=self.obs_space_low,
                                            high=self.obs_space_high,
                                            dtype=np.float32)

        if self.goal_oriented:
            self.observation_space = spaces.Dict(
                dict(desired_goal=spaces.Box(low=np.float32(MIN_GOAL_COORDS),
                                             high=np.float32(MAX_GOAL_COORDS),
                                             dtype=np.float32),
                     achieved_goal=spaces.Box(
                         low=np.float32(MIN_END_EFF_COORDS),
                         high=np.float32(MAX_END_EFF_COORDS),
                         dtype=np.float32),
                     observation=self.observation_space))

        # Initialise goal position
        if self.random_goal:
            self.goal = self.sample_random_goal()
        else:
            self.goal = FIXED_GOAL_COORDS

        # Connect to physics client. By default, do not render
        self.physics_client = p.connect(p.DIRECT)

        # Load URDFs
        self.create_world()

        # reset environment
        self.reset()
Beispiel #11
0
    def __init__(self,
                 gym_config,
                 robot_class=None,
                 env_sensors=None,
                 robot_sensors=None,
                 task=None,
                 env_randomizers=None):
        """Initializes the locomotion gym environment.

    Args:
      gym_config: An instance of LocomotionGymConfig.
      robot_class: A class of a robot. We provide a class rather than an
        instance due to hard_reset functionality. Parameters are expected to be
        configured with gin.
      sensors: A list of environmental sensors for observation.
      task: A callable function/class to calculate the reward and termination
        condition. Takes the gym env as the argument when calling.
      env_randomizers: A list of EnvRandomizer(s). An EnvRandomizer may
        randomize the physical property of minitaur, change the terrrain during
        reset(), or add perturbation forces during step().

    Raises:
      ValueError: If the num_action_repeat is less than 1.

    """

        self.seed()
        self._gym_config = gym_config
        self._robot_class = robot_class
        self._robot_sensors = robot_sensors

        self._sensors = env_sensors if env_sensors is not None else list()
        if self._robot_class is None:
            raise ValueError('robot_class cannot be None.')

        # A dictionary containing the objects in the world other than the robot.
        self._world_dict = {}
        self._task = task

        self._env_randomizers = env_randomizers if env_randomizers else []

        # This is a workaround due to the issue in b/130128505#comment5
        if isinstance(self._task, sensor.Sensor):
            self._sensors.append(self._task)

        # Simulation related parameters.
        self._num_action_repeat = gym_config.simulation_parameters.num_action_repeat
        self._on_rack = gym_config.simulation_parameters.robot_on_rack
        if self._num_action_repeat < 1:
            raise ValueError('number of action repeats should be at least 1.')
        self._sim_time_step = gym_config.simulation_parameters.sim_time_step_s
        self._env_time_step = self._num_action_repeat * self._sim_time_step
        self._env_step_counter = 0

        self._num_bullet_solver_iterations = int(
            _NUM_SIMULATION_ITERATION_STEPS / self._num_action_repeat)
        self._is_render = gym_config.simulation_parameters.enable_rendering

        # The wall-clock time at which the last frame is rendered.
        self._last_frame_time = 0.0
        self._show_reference_id = -1

        if self._is_render:
            self._pybullet_client = bullet_client.BulletClient(
                connection_mode=pybullet.GUI)
            pybullet.configureDebugVisualizer(
                pybullet.COV_ENABLE_GUI,
                gym_config.simulation_parameters.enable_rendering_gui)
            self._show_reference_id = pybullet.addUserDebugParameter(
                "show reference", 0, 1, self._task._draw_ref_model_alpha)
            self._delay_id = pybullet.addUserDebugParameter("delay", 0, 0.3, 0)
        else:
            self._pybullet_client = bullet_client.BulletClient(
                connection_mode=pybullet.DIRECT)
        self._pybullet_client.setAdditionalSearchPath(pd.getDataPath())
        if gym_config.simulation_parameters.egl_rendering:
            self._pybullet_client.loadPlugin('eglRendererPlugin')

        # The action list contains the name of all actions.
        self._action_list = []
        action_upper_bound = []
        action_lower_bound = []
        action_config = robot_class.ACTION_CONFIG
        for action in action_config:
            self._action_list.append(action.name)
            action_upper_bound.append(action.upper_bound)
            action_lower_bound.append(action.lower_bound)

        self.action_space = spaces.Box(np.array(action_lower_bound),
                                       np.array(action_upper_bound),
                                       dtype=np.float32)

        # Set the default render options.
        self._camera_dist = gym_config.simulation_parameters.camera_distance
        self._camera_yaw = gym_config.simulation_parameters.camera_yaw
        self._camera_pitch = gym_config.simulation_parameters.camera_pitch
        self._render_width = gym_config.simulation_parameters.render_width
        self._render_height = gym_config.simulation_parameters.render_height

        self._hard_reset = True
        self.reset()

        self._hard_reset = gym_config.simulation_parameters.enable_hard_reset

        # Construct the observation space from the list of sensors. Note that we
        # will reconstruct the observation_space after the robot is created.
        self.observation_space = (
            space_utils.convert_sensors_to_gym_space_dictionary(
                self.all_sensors()))
Beispiel #12
0
 def __init__(self):
     super(CorrectivePsuedoSteeringEnv, self).__init__()
     self.observation_space = spaces.Box(low=-10, high=10, shape=(1, ))
     self.action_space = spaces.Box(low=-10, high=10, shape=(1, ))
     self.action_range = \
         (self.action_space.high - self.action_space.low)[0]
Beispiel #13
0
 def __init__(self):
     self.observation_space = spaces.Box(low=-1, high=1, shape=(1, ))
     self.action_space = spaces.Box(low=-1, high=1, shape=(1, ))
     self.target = None
     self._max_episode_steps = 10**3
     self.step_num = 0
    def __init__(self, train):
        # simulation timestep
        self.dt = 1
        # timestep counter
        self.t = 0
        # running mode: train or test
        self.train = train
        # simulation data: load, solar power, wind pwoer, price
        self.data = read_pickle_data()['train'] if train else read_pickle_data(
        )['test']
        # days
        self.days = self.data['solar_0'].size // 24
        # network architecture
        self.net = net = case34_3ph()
        # how much history to store
        self.past_t = 24
        # seed
        self.seed()
        # list of agents
        DG_1 = DG('DG 1',
                  bus='Bus 848',
                  min_p_mw=0,
                  max_p_mw=0.66,
                  sn_mva=0.825,
                  control_q=True,
                  cost_curve_coefs=[100, 71.6, 0.4615])
        DG_2 = DG('DG 2',
                  bus='Bus 890',
                  min_p_mw=0,
                  max_p_mw=0.50,
                  sn_mva=0.625,
                  control_q=True,
                  cost_curve_coefs=[100, 62.4, 0.5011])
        PV_1 = RES('PV 1',
                   source='SOLAR',
                   bus='Bus 822',
                   sn_mva=0.2,
                   control_q=False)
        PV_2 = RES('PV 2',
                   source='SOLAR',
                   bus='Bus 856',
                   sn_mva=0.2,
                   control_q=False)
        PV_3 = RES('PV 3',
                   source='SOLAR',
                   bus='Bus 838',
                   sn_mva=0.2,
                   control_q=False)
        WP_1 = RES('WP_1',
                   source='WIND',
                   bus='Bus 822',
                   sn_mva=0.3,
                   control_q=False)
        WP_2 = RES('WP_2',
                   source='WIND',
                   bus='Bus 826',
                   sn_mva=0.3,
                   control_q=False)
        WP_3 = RES('WP_3',
                   source='WIND',
                   bus='Bus 838',
                   sn_mva=0.3,
                   control_q=False)
        SUB = Transformer('Substation',
                          type='TAP',
                          fbus='Bus0',
                          tbus='Bus 800',
                          sn_mva=2.5,
                          tap_max=2,
                          tap_min=-2)
        TF = Transformer('TF',
                         type='Trafo',
                         fbus='Bus 832',
                         tbus='Bus 888',
                         sn_mva=0.5)
        TAP_1 = Transformer('TAP 1',
                            type='TAP',
                            fbus='Bus 814',
                            tbus='Bus 850',
                            sn_mva=2.5,
                            tap_max=10,
                            tap_min=-16)
        TAP_2 = Transformer('TAP 2',
                            type='TAP',
                            fbus='Bus 852',
                            tbus='Bus 832',
                            sn_mva=2.5,
                            tap_max=10,
                            tap_min=-16)
        SCB_1 = Shunt('SCB 1', bus='Bus 840', q_mvar=0.12, max_step=4)
        SCB_2 = Shunt('SCB 2', bus='Bus 864', q_mvar=0.12, max_step=4)
        ESS_1 = ESS('Storage',
                    bus='Bus 810',
                    min_p_mw=-0.2,
                    max_p_mw=0.2,
                    max_e_mwh=1.0,
                    min_e_mwh=0.2)
        GRID = Grid('GRID', bus='Bus 800', sn_mva=2.5)
        self.agents = [
            DG_1, DG_2, PV_1, PV_2, PV_3, WP_1, WP_2, WP_3, TF, SUB, TAP_1,
            TAP_2, SCB_1, SCB_2, ESS_1, GRID
        ]
        # reset
        ob = self.reset()

        # configure spaces
        action_space, action_shape = [], 0
        for agent in self.policy_agents:
            total_action_space = []
            # continuous action space
            if agent.action.range is not None:
                low, high = agent.action.range
                u_action_space = spaces.Box(low=low,
                                            high=high,
                                            dtype=np.float32)
                total_action_space.append(u_action_space)
                action_shape += u_action_space.shape[-1]
            # discrete action space
            if agent.action.ncats is not None:
                if isinstance(agent.action.ncats, list):
                    u_action_space = spaces.MultiDiscrete(agent.action.ncats)
                    action_shape += u_action_space.nvec.shape[-1]
                elif isinstance(agent.action.ncats, int):
                    u_action_space = spaces.Discrete(agent.action.ncats)
                    action_shape += 1
                else:
                    raise NotImplementedError()
                total_action_space.append(u_action_space)

            action_space.extend(total_action_space)

        # total action space
        if len(action_space) > 1:
            # all action spaces are discrete, so simplify to MultiDiscrete action space
            self.action_space = spaces.Tuple(action_space)

            self.action_space.shape = (action_shape, )
        else:
            self.action_space = action_space[0]
        # observation space
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=+np.inf,
                                            shape=(ob.shape[0], ),
                                            dtype=np.float32)

        # reward
        self.reward_range = (-200.0, 200.0)
    def __init__(self,
                 num_agents=1,
                 num_targets=2,
                 map_name='empty',
                 is_training=True,
                 known_noise=True,
                 **kwargs):
        super().__init__(num_agents=num_agents,
                         num_targets=num_targets,
                         map_name=map_name,
                         is_training=is_training)

        self.id = 'setTracking-v1'
        self.nb_agents = num_agents  #only for init, will change with reset()
        self.nb_targets = num_targets  #only for init, will change with reset()
        self.agent_dim = 3
        self.target_dim = 4
        self.target_init_vel = METADATA['target_init_vel'] * np.ones((2, ))
        # LIMIT
        self.limit = {}  # 0: low, 1:highs
        self.limit['agent'] = [
            np.concatenate((self.MAP.mapmin, [-np.pi])),
            np.concatenate((self.MAP.mapmax, [np.pi]))
        ]
        self.limit['target'] = [
            np.concatenate((self.MAP.mapmin, [
                -METADATA['target_vel_limit'], -METADATA['target_vel_limit']
            ])),
            np.concatenate(
                (self.MAP.mapmax,
                 [METADATA['target_vel_limit'], METADATA['target_vel_limit']]))
        ]
        rel_vel_limit = METADATA['target_vel_limit'] + METADATA['action_v'][
            0]  # Maximum relative speed
        self.limit['state'] = [
            np.concatenate(
                ([0.0, -np.pi, -rel_vel_limit, -10 * np.pi, -50.0,
                  0.0], [0.0, -np.pi])),
            np.concatenate(
                ([600.0, np.pi, rel_vel_limit, 10 * np.pi, 50.0,
                  2.0], [self.sensor_r, np.pi]))
        ]
        self.observation_space = spaces.Box(self.limit['state'][0],
                                            self.limit['state'][1],
                                            dtype=np.float32)
        self.targetA = np.concatenate(
            (np.concatenate((np.eye(2), self.sampling_period * np.eye(2)),
                            axis=1), [[0, 0, 1, 0], [0, 0, 0, 1]]))
        self.target_noise_cov = METADATA['const_q'] * np.concatenate(
            (np.concatenate((self.sampling_period**3 / 3 * np.eye(2),
                             self.sampling_period**2 / 2 * np.eye(2)),
                            axis=1),
             np.concatenate((self.sampling_period**2 / 2 * np.eye(2),
                             self.sampling_period * np.eye(2)),
                            axis=1)))
        if known_noise:
            self.target_true_noise_sd = self.target_noise_cov
        else:
            self.target_true_noise_sd = METADATA[
                'const_q_true'] * np.concatenate(
                    (np.concatenate((self.sampling_period**2 / 2 * np.eye(2),
                                     self.sampling_period / 2 * np.eye(2)),
                                    axis=1),
                     np.concatenate((self.sampling_period / 2 * np.eye(2),
                                     self.sampling_period * np.eye(2)),
                                    axis=1)))

        # Build a robot
        self.setup_agents()
        # Build a target
        self.setup_targets()
        self.setup_belief_targets()
        # Use custom reward
        self.get_reward()
Beispiel #16
0
    def __init__(
        self,
        setting_file='search_rr_plant78.json',
        reset_type='waypoint',  # testpoint, waypoint,
        augment_env=None,  #texture, target, light
        test=True,  # if True will use the test_xy as start point
        action_type='discrete',  # 'discrete', 'continuous'
        observation_type='rgbd',  # 'color', 'depth', 'rgbd'
        reward_type='bbox',  # distance, bbox, bbox_distance,
        docker=False,
        resolution=(320, 240)):

        setting = self.load_env_setting(setting_file)
        self.test = test
        self.docker = docker
        self.reset_type = reset_type
        self.augment_env = augment_env

        # start unreal env
        self.unreal = env_unreal.RunUnreal(ENV_BIN=setting['env_bin'])
        env_ip, env_port = self.unreal.start(docker, resolution)

        # connect UnrealCV
        self.unrealcv = Navigation(cam_id=self.cam_id,
                                   port=env_port,
                                   ip=env_ip,
                                   targets=self.target_list,
                                   env=self.unreal.path2env,
                                   resolution=resolution)
        self.unrealcv.pitch = self.pitch

        # define action
        self.action_type = action_type
        assert self.action_type == 'discrete' or self.action_type == 'continuous'
        if self.action_type == 'discrete':
            self.action_space = spaces.Discrete(len(self.discrete_actions))
        elif self.action_type == 'continuous':
            self.action_space = spaces.Box(
                low=np.array(self.continous_actions['low']),
                high=np.array(self.continous_actions['high']))

        # define observation space,
        # color, depth, rgbd,...
        self.observation_type = observation_type
        assert self.observation_type == 'color' or self.observation_type == 'depth' or self.observation_type == 'rgbd' or self.observation_type == 'gray'
        self.observation_shape = self.unrealcv.define_observation(
            self.cam_id, self.observation_type)

        # define reward type
        # distance, bbox, bbox_distance,
        self.reward_type = reward_type
        self.reward_function = reward.Reward(setting)

        # set start position
        self.trigger_count = 0
        current_pose = self.unrealcv.get_pose(self.cam_id)
        current_pose[2] = self.height
        self.unrealcv.set_location(self.cam_id, current_pose[:3])

        self.count_steps = 0
        self.targets_pos = self.unrealcv.build_pose_dic(self.target_list)

        # for reset point generation and selection
        self.reset_module = reset_point.ResetPoint(setting, reset_type, test,
                                                   current_pose)

        self.rendering = False
Beispiel #17
0
 def create_box_space(self):
     return spaces.Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
    def __init__(self):
        # Launch the simulation with the given launchfile name
        gazebo_env.GazeboEnv.__init__(self, "iri_wam_HER.launch")
        self.publishers = [
            'pub1', 'pub2', 'pub4', 'pub3', 'pub5', 'pub6', 'pub7'
        ]  #publishers for the motor commands

        self.publishers[0] = rospy.Publisher(
            '/iri_wam/joint1_position_controller/command',
            Float64,
            queue_size=5)
        self.publishers[1] = rospy.Publisher(
            '/iri_wam/joint2_position_controller/command',
            Float64,
            queue_size=5)
        self.publishers[2] = rospy.Publisher(
            '/iri_wam/joint4_position_controller/command',
            Float64,
            queue_size=5)
        self.publishers[3] = rospy.Publisher(
            '/iri_wam/joint3_position_controller/command',
            Float64,
            queue_size=5)
        self.publishers[4] = rospy.Publisher(
            '/iri_wam/joint5_position_controller/command',
            Float64,
            queue_size=5)
        self.publishers[5] = rospy.Publisher(
            '/iri_wam/joint6_position_controller/command',
            Float64,
            queue_size=5)
        self.publishers[6] = rospy.Publisher(
            '/iri_wam/joint7_position_controller/command',
            Float64,
            queue_size=5)  # discretely publishing motor actions for now
        self.pubMarker = rospy.Publisher('/goalPose', Marker, queue_size=5)

        self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty)
        self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty)
        self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation',
                                              Empty)

        #self.minDisplacement = 0.02 #minimum discrete distance by a single action
        #self.minDisplacementTolerance = 0.01
        #self.minDisplacementPose = self.minDisplacement + self.minDisplacementTolerance  # minimum distance to check for the goal reaching state
        self.distanceThreshold = 0.09
        self.baseFrame = 'iri_wam_link_base'
        self.homingTime = 0.6  # time given for homing
        self.lenGoal = 3  # goal position list length
        self._max_episode_steps = 100
        self.reward_type = 'sparse'

        self.home = np.zeros(4)  # what position is the homing
        # self.Xacrohigh = np.array([2.6, 2.0, 2.8, 3.1, 1.24, 1.57, 2.96])
        # self.Xacrolow = np.array([-2.6, -2.0, -2.8, -0.9, -4.76, -1.57, -2.96])
        # self.IKlow = np.array([-2.6, -1.94, -2.73, -0.88, -4.74, -1.55, -2.98])
        # self.IKhigh = np.array([2.6, 1.94, 2.73, 3.08, 1.22, 1.55, 2.98])

        #self.low = np.array([-2.6, -1.42, -2.73, -0.88, -4.74, -1.55, -2.98])
        #self.high = np.array([2.6, 1.42, 2.73, 3.08, 1.22, 1.55, 2.98])

        #self.lowConc = np.array([-2.6, -1.42, -0.88, -2.6, -1.42, -0.88]) #1, 2, 4, 1, 2, 4
        #self.highConc = np.array([2.6, 1.42, 3.08, 2.6, 1.42, 3.08])

        self.lowConcObs = np.array([-2.6, -1.42, -0.88])  #1, 2, 4
        self.highConcObs = np.array([2.6, 1.42, 3.08])

        self.samplelow = np.array(
            [-2.4, -1.4, -2.03, -0.68, -4.04, -1.05, -2.08])
        self.samplehigh = np.array([2.4, 1.4, 2.03, 2.78, 1.0, 1.05, 2.08])

        #self.samplelow = np.array([-1.5, -0.8, -2.03, -0.48, -4.04, -1.05, -2.08])
        #self.samplehigh = np.array([1.5, 0.8, 2.03, 2.48, 1.0, 1.05, 2.08])
        #self.high = np.array([5.2, 2.8, 5.4, 3.96, 6.96, 3.1, 5.96])
        self.lowAction = [-1, -1, -1]
        self.highAction = [1, 1, 1]
        self.n_actions = len(self.highAction)

        self.lastObservation = None
        self.lastObservationJS = None
        self.goal = None
        self.goalJS = None

        self.action_space = spaces.Box(-1.,
                                       1.,
                                       shape=(self.n_actions, ),
                                       dtype='float32')

        self.observation_space = spaces.Dict(
            dict(
                desired_goal=spaces.Box(-np.inf,
                                        np.inf,
                                        shape=(len(self.highConcObs), ),
                                        dtype='float32'),
                achieved_goal=spaces.Box(-np.inf,
                                         np.inf,
                                         shape=(len(self.highConcObs), ),
                                         dtype='float32'),
                observation=spaces.Box(-np.inf,
                                       np.inf,
                                       shape=(len(self.highConcObs), ),
                                       dtype='float32'),
            ))
Beispiel #19
0
    def __init__(self,
                 num_goals=10,
                 num_types=2,
                 object_priors=[1, 1],
                 reward_vec=None,
                 horizon=50,
                 p=0,
                 config=2,
                 layout='open',
                 seed=1234,
                 cont=True):
        """
        config -> configouration of the state space
            0 - returns tabular index of the state
            1 - returns one hot encoded vector of the state
            2 - returns matrix form of the state
        """
        if (layout == '4rooms'):
            layout = """\
wwwwwwwwwwwww
w     w     w
w     w     w
w           w
w     w     w
w     w     w
ww wwww     w
w     www www
w     w     w
w     w     w
w           w
w     w     w
wwwwwwwwwwwww
"""
        elif (layout == '3rooms'):
            layout = """\
wwwwwwwwwwwww
w   w   w   w
w   w       w
w   w   w   w
w   w   w   w
w   w   w   w
w   w   w   w
w   w   w   w
w   w   w   w
w       w   w
w   w   w   w
w   w   w   w
wwwwwwwwwwwww
"""
        elif (layout == '3roomsh'):
            layout = """\
wwwwwwwwwwwww
w           w
w           w
wwwwwwwww www
w           w
w           w
w           w
w           w
ww wwwwwwwwww
w           w
w           w
w           w
wwwwwwwwwwwww
"""
        elif (layout == 'maze'):
            layout = """\
wwwwwwwwwwwww
w           w
w ww wwwwww w
w w       w w
w w wwwww w w
w w w   w w w
w w   w   www
w w w   w w w
w w wwwww w w
w w       w w
w ww wwwwww w
w           w
wwwwwwwwwwwww
"""
        elif (layout == 'open'):
            layout = """\
wwwwwwwwwwwww
w           w
w           w
w           w
w           w
w           w
w           w
w           w
w           w
w           w
w           w
w           w
wwwwwwwwwwwww
"""
        else:
            raise

        assert num_types == len(object_priors)

        self.p = p
        self.config = config
        self.num_goals = num_goals
        self.num_types = num_types
        self.horizon = horizon
        # object priors is the vector of priors of types of each object (so we have to normalize it first)
        self.object_priors = np.array(object_priors) / np.sum(object_priors)
        # reward_vec is the reward vector for different type of objects
        if reward_vec is None:
            self.reward_vec = np.ones(shape=(self.num_types, ))
        else:
            self.reward_vec = reward_vec
        '''
        从这里开始都要放到reset里去
        '''
        # store the number of updates
        self.update = 0

        # if we would like to create another object when one object is collected
        self.cont = False

        # fix the random seed for reproducibility
        np.random.seed(seed)
        self.rng = np.random.RandomState(seed)

        # occupancy of the layout(1 -> walls, 0 -> elsewhere)
        self.occupancy = np.array([
            list(map(lambda c: 1 if c == 'w' else 0, line))
            for line in layout.splitlines()
        ])

        # action space of the env
        # 0: UP
        # 1: DOWN
        # 2: LEFT
        # 3: RIGHT
        self.a_space = np.array([0, 1, 2, 3])
        self.obs_space = np.zeros(np.sum(self.occupancy == 0))

        # observation space (Not used)
        # Setting the observation space based on the config
        # The observation space is for the network to know what shape of input it should expect
        if (config <= 1):
            self.observation_space = spaces.Box(
                low=np.zeros(np.sum(self.occupancy == 0)),
                high=np.ones(np.sum(self.occupancy == 0)),
                dtype=np.uint8)
        elif (config == 2):
            self.observation_space = spaces.Box(low=np.zeros([4, 169]),
                                                high=np.ones([4, 169]),
                                                dtype=np.uint8)

        # action space
        self.action_space = spaces.Discrete(4)

        # direction of actions
        self.directions = [
            np.array((-1, 0)),
            np.array((1, 0)),
            np.array((0, -1)),
            np.array((0, 1))
        ]

        # a dictionary that can convert state index (scalar) to state cell location (2-dimension vector)
        self.tostate = {}
        statenum = 0
        for i in range(13):
            for j in range(13):
                # first entry is the y index (vertival)
                # second entry is the x index (horizontal)
                if self.occupancy[i, j] == 0:
                    self.tostate[(i, j)] = statenum
                    statenum += 1
        self.tocell = {v: k for k, v in self.tostate.items()}

        # get a list of all available states
        self.area = list(range(self.obs_space.shape[0]))

        self.channels_all = len(self.reward_vec) + 2
Beispiel #20
0
    def __init__(self):
        super(QtradeEnv, self).__init__()
        self.root_dir = '/Users/liuyehong/Dropbox/CICC/Algorithm_Trading/Platform2/OHLC/data/1Min/'
        # self.root_dir = '/Users/liuyehong/Dropbox/CICC/Algorithm_Trading/Platform/data/Stock/US/'

        self.list_dir = [d for d in os.listdir(self.root_dir) if '.csv' in d]
        self.df_dir = np.random.choice(self.list_dir)
        self.df = pd.read_csv(self.root_dir + self.df_dir)
        self.alpha = Alpha(self.df)
        self.cost = -0.00005
        self.interest_rate = 0 / 240 / 240  # internal interest rate (necessary to avoid stuck of long-term training.)
        self.window = 50
        self.cash = 1
        self.stock = 0
        self.t = self.window + 1
        self.i = 0
        self.T = len(self.df)
        self.total_steps = int(self.T / 3.)
        self.list_asset = np.ones(self.T)
        self.list_holding = np.ones(self.T)

        # alpha
        self.close = self.alpha.close
        self.high = self.alpha.high
        self.low = self.alpha.low
        self.open = self.alpha.open
        self.vol = self.alpha.vol
        self.close_diff = self.alpha.close_diff()
        self.high_diff = self.alpha.high_diff()
        self.low_diff = self.alpha.low_diff()
        self.open_diff = self.alpha.open_diff()

        self.ma = self.alpha.moving_average(window=self.window)
        self.ema = self.alpha.EMA(window=self.window)
        self.dema = self.alpha.DEMA(window=self.window)
        self.kama = self.alpha.KAMA(window=self.window)
        self.sma = self.alpha.SMA(window=self.window)
        self.tema = self.alpha.TEMA(window=self.window)
        self.trima = self.alpha.TRIMA(window=self.window)
        self.linearreg_slope = self.alpha.LINEARREG_SLOPE(window=self.window)

        self.mstd = self.alpha.moving_std(window=self.window)
        self.bollinger_lower_bound = self.alpha.bollinger_lower_bound(
            window=self.window, width=1)
        self.bollinger_upper_bound = self.alpha.bollinger_upper_bound(
            window=self.window, width=1)
        self.moving_max = self.alpha.moving_max(window=self.window)
        self.moving_min = self.alpha.moving_min(window=self.window)
        self.moving_med = self.alpha.moving_med(window=self.window)

        # Actions of the format Buy x%, Sell x%, Hold, etc.
        # Action space range must be symetric and the order matters.

        self.action_space = spaces.Box(low=np.array([-np.inf, -np.inf]),
                                       high=np.array([np.inf, np.inf]),
                                       dtype=np.float16)

        # Prices contains the OHCL values for the last five prices
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=(1, self.window, 4),
                                            dtype=np.float16)
Beispiel #21
0
class RubiksCubeClassicEnv(ParametrizedEnv):
    """
    An environment where the goal is to solve Rubiks cube. Reward is 1.0 only when the cube is solved and 0.0 otherwise.
    This is a fixed 6x3x3 classic cube.

    Cube's faces are ordered in the following way:
      ---
      |1|
    ---------
    |0|2|4|5|
    ---------
      |3|
      ---

    shuffle_count is a parameter specifying how many times should random actions be played on the environment
    to initialize it "randomly"
    """

    # Set this in SOME subclasses
    metadata = {'render.modes': ['rgb_array']}
    spec = None

    colors = [
        # Taken from https://www.schemecolor.com/rubik-cube-colors.php
        (185, 0, 0),
        (0, 69, 173),
        (255, 213, 0),
        (0, 155, 72),
        (255, 89, 0),
        (255, 255, 255)
    ]

    # Where in canvas each face should be rendered
    canvas_coords = [(0, 33), (33, 0), (33, 33), (33, 66), (66, 33), (99, 33)]

    # Set these in ALL subclasses
    # 12 actions because there are 6 faces that can be turned clockwise or counterclockwise
    action_space = spaces.Discrete(12)
    observation_space = spaces.Box(low=0,
                                   high=5,
                                   shape=(6, 3, 3),
                                   dtype=np.uint8)

    def __init__(self, parameters=None, constants=None):
        if parameters is None:
            parameters = RubiksCubeParameters()
        elif isinstance(parameters, dict):
            parameters = RubiksCubeParameters(**parameters)

        if constants is None:
            constants = RubiksCubeConstants()
        elif isinstance(constants, dict):
            constants = RubiksCubeConstants(**constants)

        super().__init__(parameters, constants)

        self.reward_range = (min(self.constants.win_reward,
                                 self.constants.move_reward),
                             max(self.constants.win_reward,
                                 self.constants.move_reward))

        self._state = np.zeros(shape=(6, 3, 3), dtype=np.uint8)
        self._info = {'shuffle_count': self.parameters.shuffle_count}

        self._initialize_starting_cube()
        self._shuffle()

    def _initialize_from_current_parameters(self):
        """ Initialize internal state of the environment from a currently set set of parameters """
        pass

    def _initialize_starting_cube(self):
        """ Initialize cube to the initial 'solved' state """
        for i in range(6):
            self._state[i] = i

    def _shuffle(self):
        """ Shuffle the cube """
        actions = np.random.randint(0,
                                    self.action_space.n,
                                    size=self.parameters.shuffle_count)

        for action in actions:
            rubiks_cube_play(self._state, action)

    def is_solved(self):
        """ Check if given cube is solved """
        return cube_is_solved(self._state)

    def step(self, action: int):
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.

        Accepts an action and returns a tuple (observation, reward, done, info).

        Args:
            action (object): an action provided by the environment

        Returns:
            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (boolean): whether the episode has ended, in which case further step() calls will return
                            undefined results
            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
        """
        assert 0 <= action < 12, "Action must be within range"
        rubiks_cube_play(self._state, action)

        if self.is_solved():
            return self._state, self.constants.win_reward, True, self._info.copy(
            )
        else:
            return self._state, self.constants.move_reward, False, self._info.copy(
            )

    def reset(self):
        """Resets the state of the environment and returns an initial observation.

        Returns: observation (object): the initial observation of the
            space.
        """
        self._initialize_starting_cube()
        self._shuffle()
        return self._state

    def render(self, mode='human'):
        """Renders the environment.

        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.) By convention,
        if mode is:

        - human: render to the current display or terminal and
          return nothing. Usually for human consumption.
        - rgb_array: Return an numpy.ndarray with shape (x, y, 3),
          representing RGB values for an x-by-y pixel image, suitable
          for turning into a video.
        - ansi: Return a string (str) or StringIO.StringIO containing a
          terminal-style text representation. The text can include newlines
          and ANSI escape sequences (e.g. for colors).

        Note:
            Make sure that your class's metadata 'render.modes' key includes
              the list of supported modes. It's recommended to call super()
              in implementations to use the functionality of this method.

        Args:
            mode (str): the mode to render with

        Example:

        class MyEnv(Env):
            metadata = {'render.modes': ['human', 'rgb_array']}

            def render(self, mode='human'):
                if mode == 'rgb_array':
                    return np.array(...) # return RGB frame suitable for video
                elif mode is 'human':
                    ... # pop up a window and render
                else:
                    super(MyEnv, self).render(mode=mode) # just raise an exception
        """
        if mode == 'rgb_array':
            canvas = np.zeros(shape=(100, 133, 3), dtype=np.uint8)

            for face_idx in range(6):
                face_coords = self.canvas_coords[face_idx]

                for i in range(3):
                    for j in range(3):
                        color = self.colors[self._state[face_idx, i, j]]

                        start_x = face_coords[0] + j * 11 + 1
                        end_x = face_coords[0] + j * 11 + 10 + 1

                        start_y = face_coords[1] + i * 11 + 1
                        end_y = face_coords[1] + i * 11 + 10 + 1

                        canvas[start_y:end_y, start_x:end_x] = color

            return canvas
        else:
            super().render(mode=mode)

    def close(self):
        """Override _close in your subclass to perform any necessary cleanup.

        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        return

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

        Note:
            Some environments use multiple pseudorandom number generators.
            We want to capture all such seeds used in order to ensure that
            there aren't accidental correlations between multiple generators.

        Returns:
            list<bigint>: Returns the list of seeds used in this env's random
              number generators. The first value in the list should be the
              "main" seed, or the value which a reproducer should pass to
              'seed'. Often, the main seed equals the provided 'seed', but
              this won't be true if seed=None, for example.
        """
        return
Beispiel #22
0
    def __init__(self):
        super(PlannerEnv, self).__init__()
        print('Planner environment created!')
        self.hist_size = 3
        self.simOn = False

        # For time step
        self.current_time = time.time()
        self.last_time = self.current_time
        self.time_step = []
        self.last_obs = np.array([])
        self.TIME_STEP = 0.05  # 10 mili-seconds
        self.steps = 0
        self.total_reward = 0
        self.done = False
        # Define action and observation space
        # They must be gym.spaces objects
        # Example when using discrete actions:
        self.action_space = spaces.Box(low=np.array([-1] * 4),
                                       high=np.array([1] * 4),
                                       dtype=np.float16)
        # spaces.Discrete(N_DISCRETE_ACTIONS)
        # Example for using image as input:
        self.observation_space = spaces.Box(low=0,
                                            high=512,
                                            shape=(43657, ),
                                            dtype=np.uint8)
        # Observation: for now two lists: one of entities and one of enemies
        self._obs = []
        # As a first step, actions can be only of 3 types: move, look, attack
        # For each type of action, there should be a list of pairs of {entity, goal}
        self._actions = {
            'MOVE_TO': [],
            'LOOK_AT': [],
            'ATTACK': [],
            'TAKE_PATH': []
        }

        # ROS2 Support
        self.entities = []
        self.enemies = []
        self.entities_queue = deque([])
        self.match_los = {}

        rclpy.init()
        self.node = rclpy.create_node("planner")

        # Subscribe to topics
        self.entityPoseSub = self.node.create_subscription(
            SGlobalPose, '/entity/global_pose', self.global_pose_callback, 10)
        self.entityDescriptionSub = self.node.create_subscription(
            SDiagnosticStatus, '/entity/description',
            self.entity_description_callback, 10)
        self.enemyDescriptionSub = self.node.create_subscription(
            EnemyReport, '/enemy/description', self.enemy_description_callback,
            10)
        self.entityImuSub = self.node.create_subscription(
            SImu, '/entity/imu', self.entity_imu_callback, 10)
        self.entityOverallHealthSub = self.node.create_subscription(
            SHealth, '/entity/overall_health',
            self.entity_overall_health_callback, 10)
        self.entityTwistSub = self.node.create_subscription(
            STwist, '/entity/twist', self.entity_twist_callback, 10)
        # Publish topics
        self.moveToPub = self.node.create_publisher(SGlobalPose,
                                                    '/entity/moveto/goal', 10)
        self.attackPub = self.node.create_publisher(SGlobalPose,
                                                    '/entity/attack/goal', 10)
        self.lookPub = self.node.create_publisher(SGlobalPose,
                                                  '/entity/look/goal', 10)
        self.takePathPub = self.node.create_publisher(SPath,
                                                      '/entity/takepath', 10)
        self.takeGoalPathPub = self.node.create_publisher(
            SGoalAndPath, '/entity/followpath/goal', 10)
        self.num_of_dead_enemies = 0

        #       self.node.create_rate(10.0)
        #        rclpy.spin(self.node)
        #         executor = MultiThreadedExecutor(num_threads=4)
        #         executor.add_node(self.node)
        #         executor.spin()
        x = threading.Thread(target=self.thread_ros, args=())
        print("Before running thread")
        x.start()
Beispiel #23
0
 def test_normalized_action_space(self):
   config = solo_env.Solo8VanillaConfig()
   env = solo_env.Solo8VanillaEnv(config=config, normalize_actions=True)
   self.assertEqual(env.action_space, spaces.Box(-1, 1, shape=(12,)))
 def __init__(self, p, collision_penalty= -2, trap_penalty= 0.5):
     """Initializes the lattice
         
         
         Parameters
         ----------
         p : str, must only consist of an array of integers.
         Sequence containing the maximum length of each interpolator.
         collision_penalty : int, must be a negative value
         Penalty incurred when the agent made an invalid action.
         Default is -2.
         trap_penalty : float, must be between 0 and 1
         Penalty incurred when the agent is trapped. Actual value is
         computed as :code:`floor(length_of_sequence * trap_penalty)`
         Default is -2.
         """
     
     try:
         if collision_penalty >= 0:
             raise ValueError("%r (%s) must be negative" %
                              (collision_penalty, type(collision_penalty)))
         if not isinstance(collision_penalty, int):
             raise ValueError("%r (%s) must be of type 'int'" %
                              (collision_penalty, type(collision_penalty)))
         self.collision_penalty = collision_penalty
     except TypeError:
         logger.error("%r (%s) must be of type 'int'" %
                      (collision_penalty, type(collision_penalty)))
         raise
     
     
     try:
         if not 0 < trap_penalty < 1:
             raise ValueError("%r (%s) must be between 0 and 1" %
                              (trap_penalty, type(trap_penalty)))
         self.trap_penalty = trap_penalty
     except TypeError:
         logger.error("%r (%s) must be of type 'float'" %
                      (trap_penalty, type(trap_penalty)))
         raise
     
     
     self.state = [(0, 0)]
     self.master_state = [(0,0)]
     self.actions = []
     self.origin = (0,0)
     self.op_counts = 0
     self.is_looped = False
     
     
     
     # here P is an array with number of Xs allowed for each operator
     self.p = p
     
     self.seq = ['x'] * p[self.op_counts]
     
     # Grid attributes
     self.grid_length = 2 * len(self.seq) + 1
     self.midpoint = (len(self.seq), len(self.seq))
     self.grid = np.zeros(shape=(self.grid_length, self.grid_length), dtype=int)
     
     
     # Automatically assign first element into grid
     self.grid[self.midpoint] = POLY_TO_INT[self.seq[0]]
     
     
     # Define action-observation spaces
     self.action_space = spaces.Discrete(5)
     
     self.observation_space = spaces.Box(low=-2, high=1,
                                         shape=(self.grid_length, self.grid_length),
                                         dtype=int)
     self.last_action = None
    def init(self,
             client_pool=None,
             start_minecraft=None,
             continuous_discrete=True,
             add_noop_command=None,
             max_retries=90,
             retry_sleep=10,
             step_sleep=0.001,
             skip_steps=0,
             videoResolution=None,
             videoWithDepth=None,
             observeRecentCommands=None,
             observeHotBar=None,
             observeFullInventory=None,
             observeGrid=None,
             observeDistance=None,
             observeChat=None,
             allowContinuousMovement=None,
             allowDiscreteMovement=None,
             allowAbsoluteMovement=None,
             recordDestination=None,
             recordObservations=None,
             recordRewards=None,
             recordCommands=None,
             recordMP4=None,
             gameMode=None,
             forceWorldReset=None):

        self.max_retries = max_retries
        self.retry_sleep = retry_sleep
        self.step_sleep = step_sleep
        self.skip_steps = skip_steps
        self.forceWorldReset = forceWorldReset
        self.continuous_discrete = continuous_discrete
        self.add_noop_command = add_noop_command
        self.client_pool = client_pool

        if videoResolution:
            if videoWithDepth:
                self.mission_spec.requestVideoWithDepth(*videoResolution)
            else:
                self.mission_spec.requestVideo(*videoResolution)

        if observeRecentCommands:
            self.mission_spec.observeRecentCommands()
        if observeHotBar:
            self.mission_spec.observeHotBar()
        if observeFullInventory:
            self.mission_spec.observeFullInventory()
        if observeGrid:
            self.mission_spec.observeGrid(*(observeGrid + ["grid"]))
        if observeDistance:
            self.mission_spec.observeDistance(*(observeDistance + ["dist"]))
        if observeChat:
            self.mission_spec.observeChat()

        if allowDiscreteMovement:
            # if there are any parameters, remove current command handlers first
            self.mission_spec.removeAllCommandHandlers()

            if allowDiscreteMovement is True:
                self.mission_spec.allowAllDiscreteMovementCommands()
            elif isinstance(allowDiscreteMovement, list):
                for cmd in allowDiscreteMovement:
                    self.mission_spec.allowDiscreteMovementCommand(cmd)

        if start_minecraft:
            # start Minecraft process assigning port dynamically
            self.mc_process, port = minecraft_py.start()
            logger.info(
                "Started Minecraft on port %d, overriding client_pool.", port)
            client_pool = [('127.0.0.1', port)]
        """ 
        make client_pool usable for Malmo: change format of the client_pool to struct 
        """
        if client_pool:
            if not isinstance(client_pool, list):
                raise ValueError(
                    "client_pool must be list of tuples of (IP-address, port)")
            self.client_pool = MalmoPython.ClientPool()
            for client in client_pool:
                self.client_pool.add(MalmoPython.ClientInfo(*client))
        """
        initialize video parameters for video processing
        """
        self.video_height = self.mission_spec.getVideoHeight(0)
        self.video_width = self.mission_spec.getVideoWidth(0)
        self.video_depth = self.mission_spec.getVideoChannels(0)
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.video_height,
                                                   self.video_width,
                                                   self.video_depth))
        """
        dummy image just for the first observation
        """
        self.last_image1 = np.zeros(
            (self.video_height, self.video_width, self.video_depth),
            dtype=np.float32)
        self.last_image2 = np.zeros(
            (self.video_height, self.video_width, self.video_depth),
            dtype=np.float32)
        self.create_action_space()
        """ 
        mission recording 
        """
        self.mission_record_spec = MalmoPython.MissionRecordSpec(
        )  # record nothing
        if recordDestination:
            self.mission_record_spec.setDestination(recordDestination)
        if recordRewards:
            self.mission_record_spec.recordRewards()
        if recordCommands:
            self.mission_record_spec.recordCommands()
        if recordMP4:
            self.mission_record_spec.recordMP4(*recordMP4)
        """ 
        game mode
        """
        if gameMode:
            if gameMode == "spectator":
                self.mission_spec.setModeToSpectator()
            elif gameMode == "creative":
                self.mission_spec.setModeToCreative()
            elif gameMode == "survival":
                logger.warn(
                    "Cannot force survival mode, assuming it is the default.")
            else:
                assert False, "Unknown game mode: " + gameMode
Beispiel #26
0
    def __init__(self):
        """
        Initializing the simulation environment.
        """

        # Fixing the random seed -- for reproducible experiments
        np.random.seed(1)

        # Miguel: Why is this needed?
        self.previous_action = 0

        # Bolus carb factor -- [g/U]
        self.bolus = 30

        ## Loading variable parameters -- used if environment is extended
        reward_flag, bg_init_flag = self._update_parameters()

        # Miguel: CamelCase is not used in rest of code

        # Initialize bolus history -- used for insulin on board
        self.bolusHistoryIndex = 0
        self.bolusHistoryValue = []
        self.bolusHistoryTime = []
        self.insulinOnBoard = np.zeros(1)

        # Initialize sensor model -- Miguel: do we need all of this?
        # self.CGMlambda = 15.96    # Johnson parameter of recalibrated and synchronized sensor error.
        # self.CGMepsilon = -5.471  # Johnson parameter of recalibrated and synchronized sensor error.
        # self.CGMdelta = 1.6898    # Johnson parameter of recalibrated and synchronized sensor error.
        # self.CGMgamma = -0.5444   # Johnson parameter of recalibrated and synchronized sensor error.
        # self.CGMerror = 0
        self.sensor_noise = np.random.randn(1)
        # self.CGMaux = []
        # self.sensorNoiseValue = 0.07 # Set a value

        # Model parameters
        P = hovorka_parameters(70)
        init_basal_optimal = 6.43
        self.P = P
        self.init_basal_optimal = init_basal_optimal

        # Initializing the action space
        self.action_space = spaces.Box(0,
                                       2 * self.init_basal_optimal, (1, ),
                                       dtype=np.float32)

        # Initial basal rate -- used for init and reset. Either randomly initialized or by a fixed value.

        if bg_init_flag == 'random':
            self.init_basal = np.random.choice(
                np.linspace(init_basal_optimal - 2, init_basal_optimal, 10))
        elif bg_init_flag == 'fixed':
            self.init_basal = init_basal_optimal

        # Flag for manually resetting the init when the episode restarts
        self.reset_basal_manually = None

        self._seed()
        self.viewer = None

        # ==========================================
        # Setting up the Hovorka simulator
        # ==========================================

        # Initial values for parameters
        initial_pars = (self.init_basal, 0, P)

        # Initial value
        X0 = fsolve(hovorka_model_tuple, np.zeros(11), args=initial_pars)
        self.X0 = X0

        # Simulation setup
        self.integrator = ode(hovorka_model)
        self.integrator.set_integrator('vode', method='bdf', order=5)
        self.integrator.set_initial_value(X0, 0)

        # Simulation time in minutes -- the default is solving the simulator 30 minutes at a time
        # with a solver time of one minute.
        self.simulation_time = 30
        self.n_solver_steps = 1
        self.stepsize = int(self.simulation_time / self.n_solver_steps)

        # State space for the environment -- [30 min of BG, last 4 insulin action, bolus (if given) during last 30 mins]
        self.observation_space = spaces.Box(0,
                                            500,
                                            (int(self.stepsize + 4 + 2), ),
                                            dtype=np.float32)

        # State is BG, simulation_state is parameters of hovorka model
        initial_bg = X0[-1] * 18
        initial_insulin = np.ones(4) * self.init_basal_optimal
        initial_iob = np.zeros(1)

        # Initial state
        self.state = np.concatenate([
            np.repeat(initial_bg, self.stepsize), initial_insulin, initial_iob,
            np.zeros(1)
        ])

        self.simulation_state = X0

        # Keeping track of entire blood glucose level and insulin for each episode
        self.bg_history = []
        self.insulin_history = initial_insulin

        # ====================
        # Meal setup
        # ====================

        # Default eating time is considered one minute -- empirically the simulations are not too sensitive to this choice.
        eating_time = 1

        # Meals are carb intake and meal_indicator is the counted carbs by the patient
        meals, meal_indicator = meal_generator(eating_time=eating_time,
                                               premeal_bolus_time=0)

        self.meals = meals
        self.meal_indicator = meal_indicator
        self.eating_time = eating_time

        # Counter for number of iterations
        self.num_iters = 0

        # If blood glucose is less than zero or above 500, the simulator is considered out of bounds.
        self.bg_threshold_low = 0
        self.bg_threshold_high = 500

        # The max episode lenght is 36 hours
        self.max_iter = 2160

        # Reward flag
        self.reward_flag = reward_flag

        self.steps_beyond_done = None
 def __init__(self, env=None):
     super(ProcessFrame84, self).__init__(env)
     self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1))
 def __init__(self, env):
     gym.ObservationWrapper.__init__(self, env)
     self.observation_space = spaces.Box(low=0,
                                         high=1.0,
                                         shape=env.observation_space.shape,
                                         dtype=np.float32)
Beispiel #29
0
    def __init__(self,
                 x,
                 y,
                 z,
                 gamma,
                 turnspc,
                 policy,
                 rg_prob='rg',
                 rendermode='off'):

        self.rendermode = rendermode  # on/off display block model in matplotlib
        # self.cutoffpenaltyscalar=penaltyscalar #scaling parameter for changing the penalty for taking no action (cutoff).
        self.rg_prob = rg_prob  #rg for randomly generated, loadenv for loading premade envionments
        # self.savepath=savepath
        envpath = './environments'
        self.savedgeo = '%s/geology' % envpath
        # self.savedtruth='%s/truth' % envpath
        self.savedenv = '%s/environment' % envpath
        self.saveddepdic = '%s/depdict' % envpath
        self.savedeffdic = '%s/effdict' % envpath
        self.policy = policy

        #initiating values
        self.framecounter = 0
        self.actionslist = list()
        self.reward = 0
        self.discountedmined = 0
        self.turncounter = 1
        self.i = -1
        self.j = -1
        self.terminal = False
        self.gamma = gamma  #discount factor exponential (reward*turn^discount factor)
        self.Imin = 0
        self.Imax = x
        self.Jmin = 0
        self.Jmax = y
        self.RLmin = 0
        self.RLmax = z
        self.mined = -1
        self.callnumber = 1
        self.savenumber = 0
        self.episodecounter = 0

        try:
            self.maxloadid = len([
                name for name in os.listdir(self.savedgeo)
                if os.path.isfile(os.path.join(self.savedgeo, name))
            ])
        except:
            self.maxloadid = 1

        #sizing the block model environment
        self.Ilen = self.Imax - self.Imin
        self.Jlen = self.Jmax - self.Jmin
        self.RLlen = self.RLmax - self.RLmin  #RL (z coordinate) counts up as depth increases
        self.channels = 2  #H2O mean, mined state, Standard deviation
        self.flatlen = self.Ilen * self.Jlen * self.RLlen * self.channels

        #initiating block dependency dictionaries
        #self.block_dic={}
        self.block_dic_init = {}
        self.dep_dic = {}
        self.dep_dic_init = {}
        self.eff_dic_init = {}

        #create block model
        self.model = automodel(self.Ilen, self.Jlen, self.RLlen)
        self.build()

        self.startingturnspc = 0.04
        self.turns = round(
            len(self.dep_dic) * self.startingturnspc, 0
        )  #set max number of turns (actions) in each episode based on percentage of block model size.
        self.maxturnspc = turnspc - self.startingturnspc

        # Define action and observation space
        # They must be gym.spaces objects
        # Example when using discrete actions:
        #actions are taken on a 2D checkerboard style view of the environement. progress will be made downwards in 3D over time.

        self.action_space = spaces.Discrete(
            (self.Ilen) *
            (self.Jlen))  #+1) #+1 action for choosing terminal state.

        if self.policy == 'CnnPolicy':

            #observations are made of the entire environment (3D model with 3 channels, 1 channel represents mined state)
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(self.Ilen, self.Jlen,
                                                       self.RLlen,
                                                       self.channels),
                                                dtype=np.float64)

        elif self.policy == 'MlpPolicy':
            #observations are made of the entire environment (3D model with 3 channels, 1 channel represents mined state)
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(self.flatlen, ),
                                                dtype=np.float64)
 def response_space(cls):
     return spaces.Dict({
         'reward':
         spaces.Box(low=-10.0, high=5.0, shape=tuple(), dtype=np.float32)
     })