def __init__(self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, shared_viewer=True, discrete_action_space=True): self.world = world self.agents = self.world.policy_agents # set required vectorized gym env property self.n = len(world.policy_agents) # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback # environment parameters self.discrete_action_space = discrete_action_space # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = False # if true, even the action is continuous, action will be performed discretely self.force_discrete_action = world.discrete_action if hasattr( world, 'discrete_action') else False # if true, every agent has the same reward self.shared_reward = False self.time = 0 # configure spaces self.action_space = [] self.observation_space = [] for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: u_action_space = spaces.Discrete(world.dim_p * 2 + 1) else: u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p, )) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c, )) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([ isinstance(act_space, spaces.Discrete) for act_space in total_action_space ]): act_space = spaces.MultiDiscrete( [[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) self.observation_space.append( spaces.Box( low=-np.inf, high=+np.inf, shape=(obs_dim), )) agent.action.c = np.zeros(self.world.dim_c) # rendering self.shared_viewer = shared_viewer if self.shared_viewer: self.viewers = [None] else: self.viewers = [None] * self.n self._reset_render()
def __init__( self, env: Union[EnvDataset, PolicyEnv] = None, dataset: Union[EnvDataset, PolicyEnv] = None, batch_size: int = None, num_workers: int = None, **kwargs, ): assert not ( env is None and dataset is None ), "One of the `dataset` or `env` arguments must be passed." assert not ( env is not None and dataset is not None ), "Only one of the `dataset` and `env` arguments can be used." if not isinstance(env, IterableDataset): raise RuntimeError( f"The env {env} isn't an interable dataset! (You can use the " f"EnvDataset or PolicyEnv wrappers to make an IterableDataset " f"from a gym environment.") if isinstance(env.unwrapped, VectorEnv): if batch_size is not None and batch_size != env.num_envs: logger.warning( UserWarning( f"The provided batch size {batch_size} will be ignored, since " f"the provided env is vectorized with a batch_size of " f"{env.unwrapped.num_envs}.")) batch_size = env.num_envs if isinstance(env.unwrapped, BatchedVectorEnv): num_workers = env.n_workers elif isinstance(env.unwrapped, AsyncVectorEnv): num_workers = env.num_envs else: num_workers = 0 self.env = env # NOTE: The batch_size and num_workers attributes reflect the values from the # iterator (the VectorEnv), not those of the dataloader. # This is done in order to avoid pytorch workers being ever created, and also so # that pytorch-lightning stops warning us that the num_workers is too low. self._batch_size = batch_size self._num_workers = num_workers super().__init__( dataset=self.env, # The batch size is None, because the VecEnv takes care of # doing the batching for us. batch_size=None, num_workers=0, collate_fn=None, **kwargs, ) Wrapper.__init__(self, env=self.env) assert not isinstance( self.env, GymDataLoader), "Something very wrong is happening." # self.max_epochs: int = max_epochs self.observation_space: gym.Space = self.env.observation_space self.action_space: gym.Space = self.env.action_space self.reward_space: gym.Space if isinstance(env.unwrapped, VectorEnv): env: VectorEnv batch_size = env.num_envs # TODO: Overwriting the action space to be the 'batched' version of # the single action space, rather than a Tuple(Discrete, ...) as is # done in the gym.vector.VectorEnv. self.action_space = batch_space(env.single_action_space, batch_size) if not hasattr(self.env, "reward_space"): self.reward_space = spaces.Box( low=self.env.reward_range[0], high=self.env.reward_range[1], shape=(), ) if isinstance(self.env.unwrapped, VectorEnv): # Same here, we use a 'batched' space rather than Tuple. self.reward_space = batch_space(self.reward_space, batch_size)
def __init__(self, env): gym.Wrapper.__init__(self, env) self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1))
world-map 0: stone 1: wood 2: house 3: water 4: stone (available ??) 5: wood (available ??) 6: wall world-idx_map 0: house (mine: 1, others: player_idx + 2) 1: player's location (mine: 1, others: player_idx + 2) ''' OBS_SPACE_AGENT = spaces.Dict({ 'world-map': spaces.Box(0, 1, shape=(7, 11, 11)), 'world-idx_map': spaces.Box(0, 5, shape=(2, 11, 11)), 'world-loc-row': spaces.Box(0, 1, shape=(1, )), 'world-loc-col': spaces.Box(0, 1, shape=(1, )), 'world-inventory-Coin': spaces.Box(0, np.inf, shape=(1, )), 'world-inventory-Stone': spaces.Box(0, np.inf, shape=(1, )), 'world-inventory-Wood': spaces.Box(0, np.inf, shape=(1, )), 'time': spaces.Box(0, 1, shape=(1, 1)), 'Build-build_payment':
def __init__(self, level, **kwargs): """ Base class for Gym interface for ViZDoom. Child classes are defined in vizdoom_env_definitions.py, that contain the level parameter and pass through any kwargs from gym.make() :param level: index of level in the CONFIGS list above :param kwargs: keyword arguments from gym.make(env_name_string, **kwargs) call. 'depth' will render the depth buffer and 'labels' will render the object labels and return it in the observation. Note that the observation will be a list with the screen buffer as the first element. If no kwargs are provided (or depth=False and labels=False) the observation will be of type np.ndarray. """ # parse keyword arguments self.depth = kwargs.get("depth", False) self.labels = kwargs.get("labels", False) self.position = kwargs.get("position", False) self.health = kwargs.get("health", False) # init game self.game = vzd.DoomGame() self.game.set_screen_resolution(vzd.ScreenResolution.RES_640X480) scenarios_dir = os.path.join(os.path.dirname(__file__), "scenarios") self.game.load_config(os.path.join(scenarios_dir, CONFIGS[level][0])) self.game.set_window_visible(False) self.game.set_depth_buffer_enabled(self.depth) self.game.set_labels_buffer_enabled(self.labels) self.game.clear_available_game_variables() if self.position: self.game.add_available_game_variable(vzd.GameVariable.POSITION_X) self.game.add_available_game_variable(vzd.GameVariable.POSITION_Y) self.game.add_available_game_variable(vzd.GameVariable.POSITION_Z) self.game.add_available_game_variable(vzd.GameVariable.ANGLE) if self.health: self.game.add_available_game_variable(vzd.GameVariable.HEALTH) self.game.init() self.state = None self.viewer = None self.action_space = spaces.Discrete(CONFIGS[level][1]) # specify observation space(s) list_spaces: List[gym.Space] = [ spaces.Box( 0, 255, ( self.game.get_screen_height(), self.game.get_screen_width(), self.game.get_screen_channels(), ), dtype=np.uint8, ) ] if self.depth: list_spaces.append( spaces.Box( 0, 255, ( self.game.get_screen_height(), self.game.get_screen_width(), ), dtype=np.uint8, )) if self.labels: list_spaces.append( spaces.Box( 0, 255, ( self.game.get_screen_height(), self.game.get_screen_width(), ), dtype=np.uint8, )) if self.position: list_spaces.append(spaces.Box(-np.Inf, np.Inf, (4, 1))) if self.health: list_spaces.append(spaces.Box(0, np.Inf, (1, 1))) if len(list_spaces) == 1: self.observation_space = list_spaces[0] else: self.observation_space = spaces.Tuple(list_spaces)
def reset(self): # reset the layout layout = np.random.choice(['4rooms', '3rooms', '3roomsh', 'open']) if (layout == '4rooms'): layout = """\ wwwwwwwwwwwww w w w w w w w w w w w w w w ww wwww w w www www w w w w w w w w w w w wwwwwwwwwwwww """ elif (layout == '3rooms'): layout = """\ wwwwwwwwwwwww w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w wwwwwwwwwwwww """ elif (layout == '3roomsh'): layout = """\ wwwwwwwwwwwww w w w w wwwwwwwww www w w w w w w w w ww wwwwwwwwww w w w w w w wwwwwwwwwwwww """ elif (layout == 'open'): layout = """\ wwwwwwwwwwwww w w w w w w w w w w w w w w w w w w w w w w wwwwwwwwwwwww """ else: raise # reset the occupancy array, the obs space and the observation space self.occupancy = np.array([ list(map(lambda c: 1 if c == 'w' else 0, line)) for line in layout.splitlines() ]) self.obs_space = np.zeros(np.sum(self.occupancy == 0)) if (config <= 1): self.observation_space = spaces.Box( low=np.zeros(np.sum(self.occupancy == 0)), high=np.ones(np.sum(self.occupancy == 0)), dtype=np.uint8) elif (config == 2): self.observation_space = spaces.Box(low=np.zeros([4, 169]), high=np.ones([4, 169]), dtype=np.uint8) # a dictionary that can convert state index (scalar) to state cell location (2-dimension vector) self.tostate = {} statenum = 0 for i in range(13): for j in range(13): # first entry is the y index (vertival) # second entry is the x index (horizontal) if self.occupancy[i, j] == 0: self.tostate[(i, j)] = statenum statenum += 1 self.tocell = {v: k for k, v in self.tostate.items()} # get a list of all available states self.area = list(range(self.obs_space.shape[0])) ''' The following is the normal reset procedure, the same as in the regular fourrooms_collect ''' # clear the update count self.update = 0 # first put objects to the env (each object is at different places) self.objects = {} for _ in range(self.num_goals): while True: new_pos = self.random_pos() if new_pos not in self.objects: self.objects[new_pos] = np.random.multinomial( 1, self.object_priors) break # Then we put the agent to the env self.agent_pos = self.random_pos() # if the agent is put onto an object, re-locate the agent while self.agent_pos in self.objects: self.agent_pos = self.random_pos() # now we have already put the agent and the objects into the env properly return self.observation()
def __init__(self, env=None): super(NormalizeWrapper, self).__init__(env) self.obs_lo = self.observation_space.low[0, 0, 0] self.obs_hi = self.observation_space.high[0, 0, 0] obs_shape = self.observation_space.shape self.observation_space = spaces.Box(0.0, 1.0, obs_shape, dtype=np.float32)
from ray.rllib.agents.pg import PGTrainer from ray.rllib.agents.pg.pg_policy_graph import PGPolicyGraph from ray.rllib.env import MultiAgentEnv from ray.rllib.env.base_env import BaseEnv from ray.rllib.env.vector_env import VectorEnv from ray.rllib.models import ModelCatalog from ray.rllib.models.model import Model from ray.rllib.models.pytorch.fcnet import FullyConnectedNetwork from ray.rllib.models.pytorch.model import TorchModel from ray.rllib.rollout import rollout from ray.rllib.tests.test_external_env import SimpleServing from ray.tune.registry import register_env DICT_SPACE = spaces.Dict({ "sensors": spaces.Dict({ "position": spaces.Box(low=-100, high=100, shape=(3, )), "velocity": spaces.Box(low=-1, high=1, shape=(3, )), "front_cam": spaces.Tuple( (spaces.Box(low=0, high=1, shape=(10, 10, 3)), spaces.Box(low=0, high=1, shape=(10, 10, 3)))), "rear_cam": spaces.Box(low=0, high=1, shape=(10, 10, 3)), }), "inner_state": spaces.Dict({ "charge": spaces.Discrete(100), "job_status": spaces.Dict({ "task": spaces.Discrete(5), "progress": spaces.Box(low=0, high=100, shape=()), }) }) })
def __init__(self, layout_path="layout.yml"): # Define action and observation space super(WarehouseEnv, self).__init__() # They must be gym.spaces objects # Example when using discrete actions: # Example for using image as input: self.actions = [MOVE_LEFT, MOVE_DOWN, MOVE_RIGHT, MOVE_UP] with open(layout_path, "r") as f: try: self.layout = yaml.safe_load(f) except yaml.YAMLError as exc: print(exc) self.max_items_in_env = self.layout["bin-slot-size"] * len( self.layout["bins"]) # actions are zero based so this is ok self.load_actions = [ len(self.actions) + i for i in range(self.max_items_in_env) ] self.unload_actions = [ len(self.actions) + len(self.load_actions) + i for i in range(self.max_items_in_env) ] self.actions += self.load_actions + self.unload_actions assert self.max_items_in_env * 2 + 4 == len(self.actions) self.action_space = spaces.Discrete(len(self.actions)) self.n_channels = 3 + self.max_items_in_env * 2 self.shape = (self.layout["height"], self.layout["width"], self.n_channels) self.observation_space = spaces.Box(low=-1, high=1, shape=self.shape, dtype=np.uint8) self.bins = self._create_bins(self.layout["bins"], self.layout["bin-slot-size"]) self.staging_in = StagingIn(self.layout["staging-in"]["position"], self.layout["staging-in"]["loading"]) self.staging_out = StagingOut( self.layout["staging-out"]["position"], self.layout["staging-out"]["loading"], ) self.blocked_positions = [] for b in self.bins: self.blocked_positions.append(b.pos) self.blocked_positions.append(self.staging_in.pos) self.blocked_positions.append(self.staging_out.pos) self.agent = Agent( self.layout["agent-start"]["position"], self.layout["height"], self.layout["width"], self.layout["bin-slot-size"], self.blocked_positions, ) self.transaction = None self.item_counter = 0 self.invalid_action_counter = 0 self.gui = WarehouseGui([self.layout["height"], self.layout["width"]], self.max_items_in_env)
def __init__(self): """ Initialise the environment """ self.random_goal = True self.goal_oriented = True self.obs_type = 1 self.reward_type = 1 self.action_type = 1 # Define action space self.action_space = spaces.Box( low=np.float32( np.array([-0.5, -0.25, -0.25, -0.25, -0.5, -0.005]) / 30), high=np.float32( np.array([0.5, 0.25, 0.25, 0.25, 0.5, 0.005]) / 30), dtype=np.float32) # Define observation space if self.obs_type == 1: self.obs_space_low = np.float32( np.concatenate((MIN_END_EFF_COORDS, JOINT_MIN), axis=0)) self.obs_space_high = np.float32( np.concatenate((MAX_END_EFF_COORDS, JOINT_MAX), axis=0)) elif self.obs_type == 2: self.obs_space_low = np.float32( np.concatenate((MIN_GOAL_COORDS, JOINT_MIN), axis=0)) self.obs_space_high = np.float32( np.concatenate((MAX_GOAL_COORDS, JOINT_MAX), axis=0)) elif self.obs_type == 3: self.obs_space_low = np.float32( np.concatenate(([-1.0] * 6, JOINT_MIN), axis=0)) self.obs_space_high = np.float32( np.concatenate(([1.0] * 6, JOINT_MAX), axis=0)) elif self.obs_type == 4: self.obs_space_low = np.float32( np.concatenate(([-1.0] * 3, JOINT_MIN), axis=0)) self.obs_space_high = np.float32( np.concatenate(([1.0] * 3, JOINT_MAX), axis=0)) elif self.obs_type == 5: self.obs_space_low = np.float32( np.concatenate(([-1.0] * 6, MIN_GOAL_COORDS, JOINT_MIN), axis=0)) self.obs_space_high = np.float32( np.concatenate(([1.0] * 6, MAX_GOAL_COORDS, JOINT_MAX), axis=0)) self.observation_space = spaces.Box(low=self.obs_space_low, high=self.obs_space_high, dtype=np.float32) if self.goal_oriented: self.observation_space = spaces.Dict( dict(desired_goal=spaces.Box(low=np.float32(MIN_GOAL_COORDS), high=np.float32(MAX_GOAL_COORDS), dtype=np.float32), achieved_goal=spaces.Box( low=np.float32(MIN_END_EFF_COORDS), high=np.float32(MAX_END_EFF_COORDS), dtype=np.float32), observation=self.observation_space)) # Initialise goal position if self.random_goal: self.goal = self.sample_random_goal() else: self.goal = FIXED_GOAL_COORDS # Connect to physics client. By default, do not render self.physics_client = p.connect(p.DIRECT) # Load URDFs self.create_world() # reset environment self.reset()
def __init__(self, gym_config, robot_class=None, env_sensors=None, robot_sensors=None, task=None, env_randomizers=None): """Initializes the locomotion gym environment. Args: gym_config: An instance of LocomotionGymConfig. robot_class: A class of a robot. We provide a class rather than an instance due to hard_reset functionality. Parameters are expected to be configured with gin. sensors: A list of environmental sensors for observation. task: A callable function/class to calculate the reward and termination condition. Takes the gym env as the argument when calling. env_randomizers: A list of EnvRandomizer(s). An EnvRandomizer may randomize the physical property of minitaur, change the terrrain during reset(), or add perturbation forces during step(). Raises: ValueError: If the num_action_repeat is less than 1. """ self.seed() self._gym_config = gym_config self._robot_class = robot_class self._robot_sensors = robot_sensors self._sensors = env_sensors if env_sensors is not None else list() if self._robot_class is None: raise ValueError('robot_class cannot be None.') # A dictionary containing the objects in the world other than the robot. self._world_dict = {} self._task = task self._env_randomizers = env_randomizers if env_randomizers else [] # This is a workaround due to the issue in b/130128505#comment5 if isinstance(self._task, sensor.Sensor): self._sensors.append(self._task) # Simulation related parameters. self._num_action_repeat = gym_config.simulation_parameters.num_action_repeat self._on_rack = gym_config.simulation_parameters.robot_on_rack if self._num_action_repeat < 1: raise ValueError('number of action repeats should be at least 1.') self._sim_time_step = gym_config.simulation_parameters.sim_time_step_s self._env_time_step = self._num_action_repeat * self._sim_time_step self._env_step_counter = 0 self._num_bullet_solver_iterations = int( _NUM_SIMULATION_ITERATION_STEPS / self._num_action_repeat) self._is_render = gym_config.simulation_parameters.enable_rendering # The wall-clock time at which the last frame is rendered. self._last_frame_time = 0.0 self._show_reference_id = -1 if self._is_render: self._pybullet_client = bullet_client.BulletClient( connection_mode=pybullet.GUI) pybullet.configureDebugVisualizer( pybullet.COV_ENABLE_GUI, gym_config.simulation_parameters.enable_rendering_gui) self._show_reference_id = pybullet.addUserDebugParameter( "show reference", 0, 1, self._task._draw_ref_model_alpha) self._delay_id = pybullet.addUserDebugParameter("delay", 0, 0.3, 0) else: self._pybullet_client = bullet_client.BulletClient( connection_mode=pybullet.DIRECT) self._pybullet_client.setAdditionalSearchPath(pd.getDataPath()) if gym_config.simulation_parameters.egl_rendering: self._pybullet_client.loadPlugin('eglRendererPlugin') # The action list contains the name of all actions. self._action_list = [] action_upper_bound = [] action_lower_bound = [] action_config = robot_class.ACTION_CONFIG for action in action_config: self._action_list.append(action.name) action_upper_bound.append(action.upper_bound) action_lower_bound.append(action.lower_bound) self.action_space = spaces.Box(np.array(action_lower_bound), np.array(action_upper_bound), dtype=np.float32) # Set the default render options. self._camera_dist = gym_config.simulation_parameters.camera_distance self._camera_yaw = gym_config.simulation_parameters.camera_yaw self._camera_pitch = gym_config.simulation_parameters.camera_pitch self._render_width = gym_config.simulation_parameters.render_width self._render_height = gym_config.simulation_parameters.render_height self._hard_reset = True self.reset() self._hard_reset = gym_config.simulation_parameters.enable_hard_reset # Construct the observation space from the list of sensors. Note that we # will reconstruct the observation_space after the robot is created. self.observation_space = ( space_utils.convert_sensors_to_gym_space_dictionary( self.all_sensors()))
def __init__(self): super(CorrectivePsuedoSteeringEnv, self).__init__() self.observation_space = spaces.Box(low=-10, high=10, shape=(1, )) self.action_space = spaces.Box(low=-10, high=10, shape=(1, )) self.action_range = \ (self.action_space.high - self.action_space.low)[0]
def __init__(self): self.observation_space = spaces.Box(low=-1, high=1, shape=(1, )) self.action_space = spaces.Box(low=-1, high=1, shape=(1, )) self.target = None self._max_episode_steps = 10**3 self.step_num = 0
def __init__(self, train): # simulation timestep self.dt = 1 # timestep counter self.t = 0 # running mode: train or test self.train = train # simulation data: load, solar power, wind pwoer, price self.data = read_pickle_data()['train'] if train else read_pickle_data( )['test'] # days self.days = self.data['solar_0'].size // 24 # network architecture self.net = net = case34_3ph() # how much history to store self.past_t = 24 # seed self.seed() # list of agents DG_1 = DG('DG 1', bus='Bus 848', min_p_mw=0, max_p_mw=0.66, sn_mva=0.825, control_q=True, cost_curve_coefs=[100, 71.6, 0.4615]) DG_2 = DG('DG 2', bus='Bus 890', min_p_mw=0, max_p_mw=0.50, sn_mva=0.625, control_q=True, cost_curve_coefs=[100, 62.4, 0.5011]) PV_1 = RES('PV 1', source='SOLAR', bus='Bus 822', sn_mva=0.2, control_q=False) PV_2 = RES('PV 2', source='SOLAR', bus='Bus 856', sn_mva=0.2, control_q=False) PV_3 = RES('PV 3', source='SOLAR', bus='Bus 838', sn_mva=0.2, control_q=False) WP_1 = RES('WP_1', source='WIND', bus='Bus 822', sn_mva=0.3, control_q=False) WP_2 = RES('WP_2', source='WIND', bus='Bus 826', sn_mva=0.3, control_q=False) WP_3 = RES('WP_3', source='WIND', bus='Bus 838', sn_mva=0.3, control_q=False) SUB = Transformer('Substation', type='TAP', fbus='Bus0', tbus='Bus 800', sn_mva=2.5, tap_max=2, tap_min=-2) TF = Transformer('TF', type='Trafo', fbus='Bus 832', tbus='Bus 888', sn_mva=0.5) TAP_1 = Transformer('TAP 1', type='TAP', fbus='Bus 814', tbus='Bus 850', sn_mva=2.5, tap_max=10, tap_min=-16) TAP_2 = Transformer('TAP 2', type='TAP', fbus='Bus 852', tbus='Bus 832', sn_mva=2.5, tap_max=10, tap_min=-16) SCB_1 = Shunt('SCB 1', bus='Bus 840', q_mvar=0.12, max_step=4) SCB_2 = Shunt('SCB 2', bus='Bus 864', q_mvar=0.12, max_step=4) ESS_1 = ESS('Storage', bus='Bus 810', min_p_mw=-0.2, max_p_mw=0.2, max_e_mwh=1.0, min_e_mwh=0.2) GRID = Grid('GRID', bus='Bus 800', sn_mva=2.5) self.agents = [ DG_1, DG_2, PV_1, PV_2, PV_3, WP_1, WP_2, WP_3, TF, SUB, TAP_1, TAP_2, SCB_1, SCB_2, ESS_1, GRID ] # reset ob = self.reset() # configure spaces action_space, action_shape = [], 0 for agent in self.policy_agents: total_action_space = [] # continuous action space if agent.action.range is not None: low, high = agent.action.range u_action_space = spaces.Box(low=low, high=high, dtype=np.float32) total_action_space.append(u_action_space) action_shape += u_action_space.shape[-1] # discrete action space if agent.action.ncats is not None: if isinstance(agent.action.ncats, list): u_action_space = spaces.MultiDiscrete(agent.action.ncats) action_shape += u_action_space.nvec.shape[-1] elif isinstance(agent.action.ncats, int): u_action_space = spaces.Discrete(agent.action.ncats) action_shape += 1 else: raise NotImplementedError() total_action_space.append(u_action_space) action_space.extend(total_action_space) # total action space if len(action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space self.action_space = spaces.Tuple(action_space) self.action_space.shape = (action_shape, ) else: self.action_space = action_space[0] # observation space self.observation_space = spaces.Box(low=-np.inf, high=+np.inf, shape=(ob.shape[0], ), dtype=np.float32) # reward self.reward_range = (-200.0, 200.0)
def __init__(self, num_agents=1, num_targets=2, map_name='empty', is_training=True, known_noise=True, **kwargs): super().__init__(num_agents=num_agents, num_targets=num_targets, map_name=map_name, is_training=is_training) self.id = 'setTracking-v1' self.nb_agents = num_agents #only for init, will change with reset() self.nb_targets = num_targets #only for init, will change with reset() self.agent_dim = 3 self.target_dim = 4 self.target_init_vel = METADATA['target_init_vel'] * np.ones((2, )) # LIMIT self.limit = {} # 0: low, 1:highs self.limit['agent'] = [ np.concatenate((self.MAP.mapmin, [-np.pi])), np.concatenate((self.MAP.mapmax, [np.pi])) ] self.limit['target'] = [ np.concatenate((self.MAP.mapmin, [ -METADATA['target_vel_limit'], -METADATA['target_vel_limit'] ])), np.concatenate( (self.MAP.mapmax, [METADATA['target_vel_limit'], METADATA['target_vel_limit']])) ] rel_vel_limit = METADATA['target_vel_limit'] + METADATA['action_v'][ 0] # Maximum relative speed self.limit['state'] = [ np.concatenate( ([0.0, -np.pi, -rel_vel_limit, -10 * np.pi, -50.0, 0.0], [0.0, -np.pi])), np.concatenate( ([600.0, np.pi, rel_vel_limit, 10 * np.pi, 50.0, 2.0], [self.sensor_r, np.pi])) ] self.observation_space = spaces.Box(self.limit['state'][0], self.limit['state'][1], dtype=np.float32) self.targetA = np.concatenate( (np.concatenate((np.eye(2), self.sampling_period * np.eye(2)), axis=1), [[0, 0, 1, 0], [0, 0, 0, 1]])) self.target_noise_cov = METADATA['const_q'] * np.concatenate( (np.concatenate((self.sampling_period**3 / 3 * np.eye(2), self.sampling_period**2 / 2 * np.eye(2)), axis=1), np.concatenate((self.sampling_period**2 / 2 * np.eye(2), self.sampling_period * np.eye(2)), axis=1))) if known_noise: self.target_true_noise_sd = self.target_noise_cov else: self.target_true_noise_sd = METADATA[ 'const_q_true'] * np.concatenate( (np.concatenate((self.sampling_period**2 / 2 * np.eye(2), self.sampling_period / 2 * np.eye(2)), axis=1), np.concatenate((self.sampling_period / 2 * np.eye(2), self.sampling_period * np.eye(2)), axis=1))) # Build a robot self.setup_agents() # Build a target self.setup_targets() self.setup_belief_targets() # Use custom reward self.get_reward()
def __init__( self, setting_file='search_rr_plant78.json', reset_type='waypoint', # testpoint, waypoint, augment_env=None, #texture, target, light test=True, # if True will use the test_xy as start point action_type='discrete', # 'discrete', 'continuous' observation_type='rgbd', # 'color', 'depth', 'rgbd' reward_type='bbox', # distance, bbox, bbox_distance, docker=False, resolution=(320, 240)): setting = self.load_env_setting(setting_file) self.test = test self.docker = docker self.reset_type = reset_type self.augment_env = augment_env # start unreal env self.unreal = env_unreal.RunUnreal(ENV_BIN=setting['env_bin']) env_ip, env_port = self.unreal.start(docker, resolution) # connect UnrealCV self.unrealcv = Navigation(cam_id=self.cam_id, port=env_port, ip=env_ip, targets=self.target_list, env=self.unreal.path2env, resolution=resolution) self.unrealcv.pitch = self.pitch # define action self.action_type = action_type assert self.action_type == 'discrete' or self.action_type == 'continuous' if self.action_type == 'discrete': self.action_space = spaces.Discrete(len(self.discrete_actions)) elif self.action_type == 'continuous': self.action_space = spaces.Box( low=np.array(self.continous_actions['low']), high=np.array(self.continous_actions['high'])) # define observation space, # color, depth, rgbd,... self.observation_type = observation_type assert self.observation_type == 'color' or self.observation_type == 'depth' or self.observation_type == 'rgbd' or self.observation_type == 'gray' self.observation_shape = self.unrealcv.define_observation( self.cam_id, self.observation_type) # define reward type # distance, bbox, bbox_distance, self.reward_type = reward_type self.reward_function = reward.Reward(setting) # set start position self.trigger_count = 0 current_pose = self.unrealcv.get_pose(self.cam_id) current_pose[2] = self.height self.unrealcv.set_location(self.cam_id, current_pose[:3]) self.count_steps = 0 self.targets_pos = self.unrealcv.build_pose_dic(self.target_list) # for reset point generation and selection self.reset_module = reset_point.ResetPoint(setting, reset_type, test, current_pose) self.rendering = False
def create_box_space(self): return spaces.Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
def __init__(self): # Launch the simulation with the given launchfile name gazebo_env.GazeboEnv.__init__(self, "iri_wam_HER.launch") self.publishers = [ 'pub1', 'pub2', 'pub4', 'pub3', 'pub5', 'pub6', 'pub7' ] #publishers for the motor commands self.publishers[0] = rospy.Publisher( '/iri_wam/joint1_position_controller/command', Float64, queue_size=5) self.publishers[1] = rospy.Publisher( '/iri_wam/joint2_position_controller/command', Float64, queue_size=5) self.publishers[2] = rospy.Publisher( '/iri_wam/joint4_position_controller/command', Float64, queue_size=5) self.publishers[3] = rospy.Publisher( '/iri_wam/joint3_position_controller/command', Float64, queue_size=5) self.publishers[4] = rospy.Publisher( '/iri_wam/joint5_position_controller/command', Float64, queue_size=5) self.publishers[5] = rospy.Publisher( '/iri_wam/joint6_position_controller/command', Float64, queue_size=5) self.publishers[6] = rospy.Publisher( '/iri_wam/joint7_position_controller/command', Float64, queue_size=5) # discretely publishing motor actions for now self.pubMarker = rospy.Publisher('/goalPose', Marker, queue_size=5) self.unpause = rospy.ServiceProxy('/gazebo/unpause_physics', Empty) self.pause = rospy.ServiceProxy('/gazebo/pause_physics', Empty) self.reset_proxy = rospy.ServiceProxy('/gazebo/reset_simulation', Empty) #self.minDisplacement = 0.02 #minimum discrete distance by a single action #self.minDisplacementTolerance = 0.01 #self.minDisplacementPose = self.minDisplacement + self.minDisplacementTolerance # minimum distance to check for the goal reaching state self.distanceThreshold = 0.09 self.baseFrame = 'iri_wam_link_base' self.homingTime = 0.6 # time given for homing self.lenGoal = 3 # goal position list length self._max_episode_steps = 100 self.reward_type = 'sparse' self.home = np.zeros(4) # what position is the homing # self.Xacrohigh = np.array([2.6, 2.0, 2.8, 3.1, 1.24, 1.57, 2.96]) # self.Xacrolow = np.array([-2.6, -2.0, -2.8, -0.9, -4.76, -1.57, -2.96]) # self.IKlow = np.array([-2.6, -1.94, -2.73, -0.88, -4.74, -1.55, -2.98]) # self.IKhigh = np.array([2.6, 1.94, 2.73, 3.08, 1.22, 1.55, 2.98]) #self.low = np.array([-2.6, -1.42, -2.73, -0.88, -4.74, -1.55, -2.98]) #self.high = np.array([2.6, 1.42, 2.73, 3.08, 1.22, 1.55, 2.98]) #self.lowConc = np.array([-2.6, -1.42, -0.88, -2.6, -1.42, -0.88]) #1, 2, 4, 1, 2, 4 #self.highConc = np.array([2.6, 1.42, 3.08, 2.6, 1.42, 3.08]) self.lowConcObs = np.array([-2.6, -1.42, -0.88]) #1, 2, 4 self.highConcObs = np.array([2.6, 1.42, 3.08]) self.samplelow = np.array( [-2.4, -1.4, -2.03, -0.68, -4.04, -1.05, -2.08]) self.samplehigh = np.array([2.4, 1.4, 2.03, 2.78, 1.0, 1.05, 2.08]) #self.samplelow = np.array([-1.5, -0.8, -2.03, -0.48, -4.04, -1.05, -2.08]) #self.samplehigh = np.array([1.5, 0.8, 2.03, 2.48, 1.0, 1.05, 2.08]) #self.high = np.array([5.2, 2.8, 5.4, 3.96, 6.96, 3.1, 5.96]) self.lowAction = [-1, -1, -1] self.highAction = [1, 1, 1] self.n_actions = len(self.highAction) self.lastObservation = None self.lastObservationJS = None self.goal = None self.goalJS = None self.action_space = spaces.Box(-1., 1., shape=(self.n_actions, ), dtype='float32') self.observation_space = spaces.Dict( dict( desired_goal=spaces.Box(-np.inf, np.inf, shape=(len(self.highConcObs), ), dtype='float32'), achieved_goal=spaces.Box(-np.inf, np.inf, shape=(len(self.highConcObs), ), dtype='float32'), observation=spaces.Box(-np.inf, np.inf, shape=(len(self.highConcObs), ), dtype='float32'), ))
def __init__(self, num_goals=10, num_types=2, object_priors=[1, 1], reward_vec=None, horizon=50, p=0, config=2, layout='open', seed=1234, cont=True): """ config -> configouration of the state space 0 - returns tabular index of the state 1 - returns one hot encoded vector of the state 2 - returns matrix form of the state """ if (layout == '4rooms'): layout = """\ wwwwwwwwwwwww w w w w w w w w w w w w w w ww wwww w w www www w w w w w w w w w w w wwwwwwwwwwwww """ elif (layout == '3rooms'): layout = """\ wwwwwwwwwwwww w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w w wwwwwwwwwwwww """ elif (layout == '3roomsh'): layout = """\ wwwwwwwwwwwww w w w w wwwwwwwww www w w w w w w w w ww wwwwwwwwww w w w w w w wwwwwwwwwwwww """ elif (layout == 'maze'): layout = """\ wwwwwwwwwwwww w w w ww wwwwww w w w w w w w wwwww w w w w w w w w w w w www w w w w w w w w wwwww w w w w w w w ww wwwwww w w w wwwwwwwwwwwww """ elif (layout == 'open'): layout = """\ wwwwwwwwwwwww w w w w w w w w w w w w w w w w w w w w w w wwwwwwwwwwwww """ else: raise assert num_types == len(object_priors) self.p = p self.config = config self.num_goals = num_goals self.num_types = num_types self.horizon = horizon # object priors is the vector of priors of types of each object (so we have to normalize it first) self.object_priors = np.array(object_priors) / np.sum(object_priors) # reward_vec is the reward vector for different type of objects if reward_vec is None: self.reward_vec = np.ones(shape=(self.num_types, )) else: self.reward_vec = reward_vec ''' 从这里开始都要放到reset里去 ''' # store the number of updates self.update = 0 # if we would like to create another object when one object is collected self.cont = False # fix the random seed for reproducibility np.random.seed(seed) self.rng = np.random.RandomState(seed) # occupancy of the layout(1 -> walls, 0 -> elsewhere) self.occupancy = np.array([ list(map(lambda c: 1 if c == 'w' else 0, line)) for line in layout.splitlines() ]) # action space of the env # 0: UP # 1: DOWN # 2: LEFT # 3: RIGHT self.a_space = np.array([0, 1, 2, 3]) self.obs_space = np.zeros(np.sum(self.occupancy == 0)) # observation space (Not used) # Setting the observation space based on the config # The observation space is for the network to know what shape of input it should expect if (config <= 1): self.observation_space = spaces.Box( low=np.zeros(np.sum(self.occupancy == 0)), high=np.ones(np.sum(self.occupancy == 0)), dtype=np.uint8) elif (config == 2): self.observation_space = spaces.Box(low=np.zeros([4, 169]), high=np.ones([4, 169]), dtype=np.uint8) # action space self.action_space = spaces.Discrete(4) # direction of actions self.directions = [ np.array((-1, 0)), np.array((1, 0)), np.array((0, -1)), np.array((0, 1)) ] # a dictionary that can convert state index (scalar) to state cell location (2-dimension vector) self.tostate = {} statenum = 0 for i in range(13): for j in range(13): # first entry is the y index (vertival) # second entry is the x index (horizontal) if self.occupancy[i, j] == 0: self.tostate[(i, j)] = statenum statenum += 1 self.tocell = {v: k for k, v in self.tostate.items()} # get a list of all available states self.area = list(range(self.obs_space.shape[0])) self.channels_all = len(self.reward_vec) + 2
def __init__(self): super(QtradeEnv, self).__init__() self.root_dir = '/Users/liuyehong/Dropbox/CICC/Algorithm_Trading/Platform2/OHLC/data/1Min/' # self.root_dir = '/Users/liuyehong/Dropbox/CICC/Algorithm_Trading/Platform/data/Stock/US/' self.list_dir = [d for d in os.listdir(self.root_dir) if '.csv' in d] self.df_dir = np.random.choice(self.list_dir) self.df = pd.read_csv(self.root_dir + self.df_dir) self.alpha = Alpha(self.df) self.cost = -0.00005 self.interest_rate = 0 / 240 / 240 # internal interest rate (necessary to avoid stuck of long-term training.) self.window = 50 self.cash = 1 self.stock = 0 self.t = self.window + 1 self.i = 0 self.T = len(self.df) self.total_steps = int(self.T / 3.) self.list_asset = np.ones(self.T) self.list_holding = np.ones(self.T) # alpha self.close = self.alpha.close self.high = self.alpha.high self.low = self.alpha.low self.open = self.alpha.open self.vol = self.alpha.vol self.close_diff = self.alpha.close_diff() self.high_diff = self.alpha.high_diff() self.low_diff = self.alpha.low_diff() self.open_diff = self.alpha.open_diff() self.ma = self.alpha.moving_average(window=self.window) self.ema = self.alpha.EMA(window=self.window) self.dema = self.alpha.DEMA(window=self.window) self.kama = self.alpha.KAMA(window=self.window) self.sma = self.alpha.SMA(window=self.window) self.tema = self.alpha.TEMA(window=self.window) self.trima = self.alpha.TRIMA(window=self.window) self.linearreg_slope = self.alpha.LINEARREG_SLOPE(window=self.window) self.mstd = self.alpha.moving_std(window=self.window) self.bollinger_lower_bound = self.alpha.bollinger_lower_bound( window=self.window, width=1) self.bollinger_upper_bound = self.alpha.bollinger_upper_bound( window=self.window, width=1) self.moving_max = self.alpha.moving_max(window=self.window) self.moving_min = self.alpha.moving_min(window=self.window) self.moving_med = self.alpha.moving_med(window=self.window) # Actions of the format Buy x%, Sell x%, Hold, etc. # Action space range must be symetric and the order matters. self.action_space = spaces.Box(low=np.array([-np.inf, -np.inf]), high=np.array([np.inf, np.inf]), dtype=np.float16) # Prices contains the OHCL values for the last five prices self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(1, self.window, 4), dtype=np.float16)
class RubiksCubeClassicEnv(ParametrizedEnv): """ An environment where the goal is to solve Rubiks cube. Reward is 1.0 only when the cube is solved and 0.0 otherwise. This is a fixed 6x3x3 classic cube. Cube's faces are ordered in the following way: --- |1| --------- |0|2|4|5| --------- |3| --- shuffle_count is a parameter specifying how many times should random actions be played on the environment to initialize it "randomly" """ # Set this in SOME subclasses metadata = {'render.modes': ['rgb_array']} spec = None colors = [ # Taken from https://www.schemecolor.com/rubik-cube-colors.php (185, 0, 0), (0, 69, 173), (255, 213, 0), (0, 155, 72), (255, 89, 0), (255, 255, 255) ] # Where in canvas each face should be rendered canvas_coords = [(0, 33), (33, 0), (33, 33), (33, 66), (66, 33), (99, 33)] # Set these in ALL subclasses # 12 actions because there are 6 faces that can be turned clockwise or counterclockwise action_space = spaces.Discrete(12) observation_space = spaces.Box(low=0, high=5, shape=(6, 3, 3), dtype=np.uint8) def __init__(self, parameters=None, constants=None): if parameters is None: parameters = RubiksCubeParameters() elif isinstance(parameters, dict): parameters = RubiksCubeParameters(**parameters) if constants is None: constants = RubiksCubeConstants() elif isinstance(constants, dict): constants = RubiksCubeConstants(**constants) super().__init__(parameters, constants) self.reward_range = (min(self.constants.win_reward, self.constants.move_reward), max(self.constants.win_reward, self.constants.move_reward)) self._state = np.zeros(shape=(6, 3, 3), dtype=np.uint8) self._info = {'shuffle_count': self.parameters.shuffle_count} self._initialize_starting_cube() self._shuffle() def _initialize_from_current_parameters(self): """ Initialize internal state of the environment from a currently set set of parameters """ pass def _initialize_starting_cube(self): """ Initialize cube to the initial 'solved' state """ for i in range(6): self._state[i] = i def _shuffle(self): """ Shuffle the cube """ actions = np.random.randint(0, self.action_space.n, size=self.parameters.shuffle_count) for action in actions: rubiks_cube_play(self._state, action) def is_solved(self): """ Check if given cube is solved """ return cube_is_solved(self._state) def step(self, action: int): """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). Args: action (object): an action provided by the environment Returns: observation (object): agent's observation of the current environment reward (float) : amount of reward returned after previous action done (boolean): whether the episode has ended, in which case further step() calls will return undefined results info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) """ assert 0 <= action < 12, "Action must be within range" rubiks_cube_play(self._state, action) if self.is_solved(): return self._state, self.constants.win_reward, True, self._info.copy( ) else: return self._state, self.constants.move_reward, False, self._info.copy( ) def reset(self): """Resets the state of the environment and returns an initial observation. Returns: observation (object): the initial observation of the space. """ self._initialize_starting_cube() self._shuffle() return self._state def render(self, mode='human'): """Renders the environment. The set of supported modes varies per environment. (And some environments do not support rendering at all.) By convention, if mode is: - human: render to the current display or terminal and return nothing. Usually for human consumption. - rgb_array: Return an numpy.ndarray with shape (x, y, 3), representing RGB values for an x-by-y pixel image, suitable for turning into a video. - ansi: Return a string (str) or StringIO.StringIO containing a terminal-style text representation. The text can include newlines and ANSI escape sequences (e.g. for colors). Note: Make sure that your class's metadata 'render.modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method. Args: mode (str): the mode to render with Example: class MyEnv(Env): metadata = {'render.modes': ['human', 'rgb_array']} def render(self, mode='human'): if mode == 'rgb_array': return np.array(...) # return RGB frame suitable for video elif mode is 'human': ... # pop up a window and render else: super(MyEnv, self).render(mode=mode) # just raise an exception """ if mode == 'rgb_array': canvas = np.zeros(shape=(100, 133, 3), dtype=np.uint8) for face_idx in range(6): face_coords = self.canvas_coords[face_idx] for i in range(3): for j in range(3): color = self.colors[self._state[face_idx, i, j]] start_x = face_coords[0] + j * 11 + 1 end_x = face_coords[0] + j * 11 + 10 + 1 start_y = face_coords[1] + i * 11 + 1 end_y = face_coords[1] + i * 11 + 10 + 1 canvas[start_y:end_y, start_x:end_x] = color return canvas else: super().render(mode=mode) def close(self): """Override _close in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ return def seed(self, seed=None): """Sets the seed for this env's random number generator(s). Note: Some environments use multiple pseudorandom number generators. We want to capture all such seeds used in order to ensure that there aren't accidental correlations between multiple generators. Returns: list<bigint>: Returns the list of seeds used in this env's random number generators. The first value in the list should be the "main" seed, or the value which a reproducer should pass to 'seed'. Often, the main seed equals the provided 'seed', but this won't be true if seed=None, for example. """ return
def __init__(self): super(PlannerEnv, self).__init__() print('Planner environment created!') self.hist_size = 3 self.simOn = False # For time step self.current_time = time.time() self.last_time = self.current_time self.time_step = [] self.last_obs = np.array([]) self.TIME_STEP = 0.05 # 10 mili-seconds self.steps = 0 self.total_reward = 0 self.done = False # Define action and observation space # They must be gym.spaces objects # Example when using discrete actions: self.action_space = spaces.Box(low=np.array([-1] * 4), high=np.array([1] * 4), dtype=np.float16) # spaces.Discrete(N_DISCRETE_ACTIONS) # Example for using image as input: self.observation_space = spaces.Box(low=0, high=512, shape=(43657, ), dtype=np.uint8) # Observation: for now two lists: one of entities and one of enemies self._obs = [] # As a first step, actions can be only of 3 types: move, look, attack # For each type of action, there should be a list of pairs of {entity, goal} self._actions = { 'MOVE_TO': [], 'LOOK_AT': [], 'ATTACK': [], 'TAKE_PATH': [] } # ROS2 Support self.entities = [] self.enemies = [] self.entities_queue = deque([]) self.match_los = {} rclpy.init() self.node = rclpy.create_node("planner") # Subscribe to topics self.entityPoseSub = self.node.create_subscription( SGlobalPose, '/entity/global_pose', self.global_pose_callback, 10) self.entityDescriptionSub = self.node.create_subscription( SDiagnosticStatus, '/entity/description', self.entity_description_callback, 10) self.enemyDescriptionSub = self.node.create_subscription( EnemyReport, '/enemy/description', self.enemy_description_callback, 10) self.entityImuSub = self.node.create_subscription( SImu, '/entity/imu', self.entity_imu_callback, 10) self.entityOverallHealthSub = self.node.create_subscription( SHealth, '/entity/overall_health', self.entity_overall_health_callback, 10) self.entityTwistSub = self.node.create_subscription( STwist, '/entity/twist', self.entity_twist_callback, 10) # Publish topics self.moveToPub = self.node.create_publisher(SGlobalPose, '/entity/moveto/goal', 10) self.attackPub = self.node.create_publisher(SGlobalPose, '/entity/attack/goal', 10) self.lookPub = self.node.create_publisher(SGlobalPose, '/entity/look/goal', 10) self.takePathPub = self.node.create_publisher(SPath, '/entity/takepath', 10) self.takeGoalPathPub = self.node.create_publisher( SGoalAndPath, '/entity/followpath/goal', 10) self.num_of_dead_enemies = 0 # self.node.create_rate(10.0) # rclpy.spin(self.node) # executor = MultiThreadedExecutor(num_threads=4) # executor.add_node(self.node) # executor.spin() x = threading.Thread(target=self.thread_ros, args=()) print("Before running thread") x.start()
def test_normalized_action_space(self): config = solo_env.Solo8VanillaConfig() env = solo_env.Solo8VanillaEnv(config=config, normalize_actions=True) self.assertEqual(env.action_space, spaces.Box(-1, 1, shape=(12,)))
def __init__(self, p, collision_penalty= -2, trap_penalty= 0.5): """Initializes the lattice Parameters ---------- p : str, must only consist of an array of integers. Sequence containing the maximum length of each interpolator. collision_penalty : int, must be a negative value Penalty incurred when the agent made an invalid action. Default is -2. trap_penalty : float, must be between 0 and 1 Penalty incurred when the agent is trapped. Actual value is computed as :code:`floor(length_of_sequence * trap_penalty)` Default is -2. """ try: if collision_penalty >= 0: raise ValueError("%r (%s) must be negative" % (collision_penalty, type(collision_penalty))) if not isinstance(collision_penalty, int): raise ValueError("%r (%s) must be of type 'int'" % (collision_penalty, type(collision_penalty))) self.collision_penalty = collision_penalty except TypeError: logger.error("%r (%s) must be of type 'int'" % (collision_penalty, type(collision_penalty))) raise try: if not 0 < trap_penalty < 1: raise ValueError("%r (%s) must be between 0 and 1" % (trap_penalty, type(trap_penalty))) self.trap_penalty = trap_penalty except TypeError: logger.error("%r (%s) must be of type 'float'" % (trap_penalty, type(trap_penalty))) raise self.state = [(0, 0)] self.master_state = [(0,0)] self.actions = [] self.origin = (0,0) self.op_counts = 0 self.is_looped = False # here P is an array with number of Xs allowed for each operator self.p = p self.seq = ['x'] * p[self.op_counts] # Grid attributes self.grid_length = 2 * len(self.seq) + 1 self.midpoint = (len(self.seq), len(self.seq)) self.grid = np.zeros(shape=(self.grid_length, self.grid_length), dtype=int) # Automatically assign first element into grid self.grid[self.midpoint] = POLY_TO_INT[self.seq[0]] # Define action-observation spaces self.action_space = spaces.Discrete(5) self.observation_space = spaces.Box(low=-2, high=1, shape=(self.grid_length, self.grid_length), dtype=int) self.last_action = None
def init(self, client_pool=None, start_minecraft=None, continuous_discrete=True, add_noop_command=None, max_retries=90, retry_sleep=10, step_sleep=0.001, skip_steps=0, videoResolution=None, videoWithDepth=None, observeRecentCommands=None, observeHotBar=None, observeFullInventory=None, observeGrid=None, observeDistance=None, observeChat=None, allowContinuousMovement=None, allowDiscreteMovement=None, allowAbsoluteMovement=None, recordDestination=None, recordObservations=None, recordRewards=None, recordCommands=None, recordMP4=None, gameMode=None, forceWorldReset=None): self.max_retries = max_retries self.retry_sleep = retry_sleep self.step_sleep = step_sleep self.skip_steps = skip_steps self.forceWorldReset = forceWorldReset self.continuous_discrete = continuous_discrete self.add_noop_command = add_noop_command self.client_pool = client_pool if videoResolution: if videoWithDepth: self.mission_spec.requestVideoWithDepth(*videoResolution) else: self.mission_spec.requestVideo(*videoResolution) if observeRecentCommands: self.mission_spec.observeRecentCommands() if observeHotBar: self.mission_spec.observeHotBar() if observeFullInventory: self.mission_spec.observeFullInventory() if observeGrid: self.mission_spec.observeGrid(*(observeGrid + ["grid"])) if observeDistance: self.mission_spec.observeDistance(*(observeDistance + ["dist"])) if observeChat: self.mission_spec.observeChat() if allowDiscreteMovement: # if there are any parameters, remove current command handlers first self.mission_spec.removeAllCommandHandlers() if allowDiscreteMovement is True: self.mission_spec.allowAllDiscreteMovementCommands() elif isinstance(allowDiscreteMovement, list): for cmd in allowDiscreteMovement: self.mission_spec.allowDiscreteMovementCommand(cmd) if start_minecraft: # start Minecraft process assigning port dynamically self.mc_process, port = minecraft_py.start() logger.info( "Started Minecraft on port %d, overriding client_pool.", port) client_pool = [('127.0.0.1', port)] """ make client_pool usable for Malmo: change format of the client_pool to struct """ if client_pool: if not isinstance(client_pool, list): raise ValueError( "client_pool must be list of tuples of (IP-address, port)") self.client_pool = MalmoPython.ClientPool() for client in client_pool: self.client_pool.add(MalmoPython.ClientInfo(*client)) """ initialize video parameters for video processing """ self.video_height = self.mission_spec.getVideoHeight(0) self.video_width = self.mission_spec.getVideoWidth(0) self.video_depth = self.mission_spec.getVideoChannels(0) self.observation_space = spaces.Box(low=0, high=255, shape=(self.video_height, self.video_width, self.video_depth)) """ dummy image just for the first observation """ self.last_image1 = np.zeros( (self.video_height, self.video_width, self.video_depth), dtype=np.float32) self.last_image2 = np.zeros( (self.video_height, self.video_width, self.video_depth), dtype=np.float32) self.create_action_space() """ mission recording """ self.mission_record_spec = MalmoPython.MissionRecordSpec( ) # record nothing if recordDestination: self.mission_record_spec.setDestination(recordDestination) if recordRewards: self.mission_record_spec.recordRewards() if recordCommands: self.mission_record_spec.recordCommands() if recordMP4: self.mission_record_spec.recordMP4(*recordMP4) """ game mode """ if gameMode: if gameMode == "spectator": self.mission_spec.setModeToSpectator() elif gameMode == "creative": self.mission_spec.setModeToCreative() elif gameMode == "survival": logger.warn( "Cannot force survival mode, assuming it is the default.") else: assert False, "Unknown game mode: " + gameMode
def __init__(self): """ Initializing the simulation environment. """ # Fixing the random seed -- for reproducible experiments np.random.seed(1) # Miguel: Why is this needed? self.previous_action = 0 # Bolus carb factor -- [g/U] self.bolus = 30 ## Loading variable parameters -- used if environment is extended reward_flag, bg_init_flag = self._update_parameters() # Miguel: CamelCase is not used in rest of code # Initialize bolus history -- used for insulin on board self.bolusHistoryIndex = 0 self.bolusHistoryValue = [] self.bolusHistoryTime = [] self.insulinOnBoard = np.zeros(1) # Initialize sensor model -- Miguel: do we need all of this? # self.CGMlambda = 15.96 # Johnson parameter of recalibrated and synchronized sensor error. # self.CGMepsilon = -5.471 # Johnson parameter of recalibrated and synchronized sensor error. # self.CGMdelta = 1.6898 # Johnson parameter of recalibrated and synchronized sensor error. # self.CGMgamma = -0.5444 # Johnson parameter of recalibrated and synchronized sensor error. # self.CGMerror = 0 self.sensor_noise = np.random.randn(1) # self.CGMaux = [] # self.sensorNoiseValue = 0.07 # Set a value # Model parameters P = hovorka_parameters(70) init_basal_optimal = 6.43 self.P = P self.init_basal_optimal = init_basal_optimal # Initializing the action space self.action_space = spaces.Box(0, 2 * self.init_basal_optimal, (1, ), dtype=np.float32) # Initial basal rate -- used for init and reset. Either randomly initialized or by a fixed value. if bg_init_flag == 'random': self.init_basal = np.random.choice( np.linspace(init_basal_optimal - 2, init_basal_optimal, 10)) elif bg_init_flag == 'fixed': self.init_basal = init_basal_optimal # Flag for manually resetting the init when the episode restarts self.reset_basal_manually = None self._seed() self.viewer = None # ========================================== # Setting up the Hovorka simulator # ========================================== # Initial values for parameters initial_pars = (self.init_basal, 0, P) # Initial value X0 = fsolve(hovorka_model_tuple, np.zeros(11), args=initial_pars) self.X0 = X0 # Simulation setup self.integrator = ode(hovorka_model) self.integrator.set_integrator('vode', method='bdf', order=5) self.integrator.set_initial_value(X0, 0) # Simulation time in minutes -- the default is solving the simulator 30 minutes at a time # with a solver time of one minute. self.simulation_time = 30 self.n_solver_steps = 1 self.stepsize = int(self.simulation_time / self.n_solver_steps) # State space for the environment -- [30 min of BG, last 4 insulin action, bolus (if given) during last 30 mins] self.observation_space = spaces.Box(0, 500, (int(self.stepsize + 4 + 2), ), dtype=np.float32) # State is BG, simulation_state is parameters of hovorka model initial_bg = X0[-1] * 18 initial_insulin = np.ones(4) * self.init_basal_optimal initial_iob = np.zeros(1) # Initial state self.state = np.concatenate([ np.repeat(initial_bg, self.stepsize), initial_insulin, initial_iob, np.zeros(1) ]) self.simulation_state = X0 # Keeping track of entire blood glucose level and insulin for each episode self.bg_history = [] self.insulin_history = initial_insulin # ==================== # Meal setup # ==================== # Default eating time is considered one minute -- empirically the simulations are not too sensitive to this choice. eating_time = 1 # Meals are carb intake and meal_indicator is the counted carbs by the patient meals, meal_indicator = meal_generator(eating_time=eating_time, premeal_bolus_time=0) self.meals = meals self.meal_indicator = meal_indicator self.eating_time = eating_time # Counter for number of iterations self.num_iters = 0 # If blood glucose is less than zero or above 500, the simulator is considered out of bounds. self.bg_threshold_low = 0 self.bg_threshold_high = 500 # The max episode lenght is 36 hours self.max_iter = 2160 # Reward flag self.reward_flag = reward_flag self.steps_beyond_done = None
def __init__(self, env=None): super(ProcessFrame84, self).__init__(env) self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1))
def __init__(self, env): gym.ObservationWrapper.__init__(self, env) self.observation_space = spaces.Box(low=0, high=1.0, shape=env.observation_space.shape, dtype=np.float32)
def __init__(self, x, y, z, gamma, turnspc, policy, rg_prob='rg', rendermode='off'): self.rendermode = rendermode # on/off display block model in matplotlib # self.cutoffpenaltyscalar=penaltyscalar #scaling parameter for changing the penalty for taking no action (cutoff). self.rg_prob = rg_prob #rg for randomly generated, loadenv for loading premade envionments # self.savepath=savepath envpath = './environments' self.savedgeo = '%s/geology' % envpath # self.savedtruth='%s/truth' % envpath self.savedenv = '%s/environment' % envpath self.saveddepdic = '%s/depdict' % envpath self.savedeffdic = '%s/effdict' % envpath self.policy = policy #initiating values self.framecounter = 0 self.actionslist = list() self.reward = 0 self.discountedmined = 0 self.turncounter = 1 self.i = -1 self.j = -1 self.terminal = False self.gamma = gamma #discount factor exponential (reward*turn^discount factor) self.Imin = 0 self.Imax = x self.Jmin = 0 self.Jmax = y self.RLmin = 0 self.RLmax = z self.mined = -1 self.callnumber = 1 self.savenumber = 0 self.episodecounter = 0 try: self.maxloadid = len([ name for name in os.listdir(self.savedgeo) if os.path.isfile(os.path.join(self.savedgeo, name)) ]) except: self.maxloadid = 1 #sizing the block model environment self.Ilen = self.Imax - self.Imin self.Jlen = self.Jmax - self.Jmin self.RLlen = self.RLmax - self.RLmin #RL (z coordinate) counts up as depth increases self.channels = 2 #H2O mean, mined state, Standard deviation self.flatlen = self.Ilen * self.Jlen * self.RLlen * self.channels #initiating block dependency dictionaries #self.block_dic={} self.block_dic_init = {} self.dep_dic = {} self.dep_dic_init = {} self.eff_dic_init = {} #create block model self.model = automodel(self.Ilen, self.Jlen, self.RLlen) self.build() self.startingturnspc = 0.04 self.turns = round( len(self.dep_dic) * self.startingturnspc, 0 ) #set max number of turns (actions) in each episode based on percentage of block model size. self.maxturnspc = turnspc - self.startingturnspc # Define action and observation space # They must be gym.spaces objects # Example when using discrete actions: #actions are taken on a 2D checkerboard style view of the environement. progress will be made downwards in 3D over time. self.action_space = spaces.Discrete( (self.Ilen) * (self.Jlen)) #+1) #+1 action for choosing terminal state. if self.policy == 'CnnPolicy': #observations are made of the entire environment (3D model with 3 channels, 1 channel represents mined state) self.observation_space = spaces.Box(low=-1, high=1, shape=(self.Ilen, self.Jlen, self.RLlen, self.channels), dtype=np.float64) elif self.policy == 'MlpPolicy': #observations are made of the entire environment (3D model with 3 channels, 1 channel represents mined state) self.observation_space = spaces.Box(low=-1, high=1, shape=(self.flatlen, ), dtype=np.float64)
def response_space(cls): return spaces.Dict({ 'reward': spaces.Box(low=-10.0, high=5.0, shape=tuple(), dtype=np.float32) })