def __init__( self, initial_wealth=25.0, edge_prior_alpha=7, edge_prior_beta=3, max_wealth_alpha=5.0, max_wealth_m=200.0, max_rounds_mean=300.0, max_rounds_sd=25.0, reseed=True, clip_distributions=False, ): # clip_distributions=True asserts that state and action space are not modified at reset() # store the hyper-parameters for passing back into __init__() during resets so # the same hyper-parameters govern the next game's parameters, as the user # expects: # TODO: this is boilerplate, is there any more elegant way to do this? self.initial_wealth = float(initial_wealth) self.edge_prior_alpha = edge_prior_alpha self.edge_prior_beta = edge_prior_beta self.max_wealth_alpha = max_wealth_alpha self.max_wealth_m = max_wealth_m self.max_rounds_mean = max_rounds_mean self.max_rounds_sd = max_rounds_sd self.clip_distributions = clip_distributions if reseed or not hasattr(self, "np_random"): self.seed() # draw this game's set of parameters: edge = self.np_random.beta(edge_prior_alpha, edge_prior_beta) if self.clip_distributions: # (clip/resample some parameters to be able to fix obs/action space sizes/bounds) max_wealth_bound = round( genpareto.ppf(0.85, max_wealth_alpha, max_wealth_m) ) max_wealth = max_wealth_bound + 1.0 while max_wealth > max_wealth_bound: max_wealth = round( genpareto.rvs( max_wealth_alpha, max_wealth_m, random_state=self.np_random ) ) max_rounds_bound = int( round(norm.ppf(0.99, max_rounds_mean, max_rounds_sd)) ) max_rounds = max_rounds_bound + 1 while max_rounds > max_rounds_bound: max_rounds = int( round(self.np_random.normal(max_rounds_mean, max_rounds_sd)) ) else: max_wealth = round( genpareto.rvs( max_wealth_alpha, max_wealth_m, random_state=self.np_random ) ) max_wealth_bound = max_wealth max_rounds = int( round(self.np_random.normal(max_rounds_mean, max_rounds_sd)) ) max_rounds_bound = max_rounds # add an additional global variable which is the sufficient statistic for the # Pareto distribution on wealth cap; alpha doesn't update, but x_m does, and # simply is the highest wealth count we've seen to date: self.max_ever_wealth = float(self.initial_wealth) # for the coinflip edge, it is total wins/losses: self.wins = 0 self.losses = 0 # for the number of rounds, we need to remember how many rounds we've played: self.rounds_elapsed = 0 # the rest proceeds as before: self.action_space = spaces.Discrete(int(max_wealth_bound * 100)) self.observation_space = spaces.Tuple( ( spaces.Box( 0, max_wealth_bound, shape=[1], dtype=np.float32 ), # current wealth spaces.Discrete(max_rounds_bound + 1), # rounds elapsed spaces.Discrete(max_rounds_bound + 1), # wins spaces.Discrete(max_rounds_bound + 1), # losses spaces.Box(0, max_wealth_bound, [1], dtype=np.float32), ) ) # maximum observed wealth self.reward_range = (0, max_wealth) self.edge = edge self.wealth = self.initial_wealth self.max_rounds = max_rounds self.rounds = self.max_rounds self.max_wealth = max_wealth
def __init__( self, unity_env: BaseEnv, uint8_visual: bool = False, flatten_branched: bool = False, allow_multiple_obs: bool = False, ): """ Environment initialization :param unity_env: The Unity BaseEnv to be wrapped in the gym. Will be closed when the UnityToGymWrapper closes. :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0). :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than MultiDiscrete. :param allow_multiple_obs: If True, return a list of np.ndarrays as observations with the first elements containing the visual observations and the last element containing the array of vector observations. If False, returns a single np.ndarray containing either only a single visual observation or the array of vector observations. """ self._env = unity_env # Take a single step so that the brain information will be sent over if not self._env.behavior_specs: self._env.step() self.visual_obs = None # Save the step result from the last time all Agents requested decisions. self._previous_decision_step: DecisionSteps = None self._flattener = None # Hidden flag used by Atari environments to determine if the game is over self.game_over = False self._allow_multiple_obs = allow_multiple_obs # Check brain configuration if len(self._env.behavior_specs) != 1: raise UnityGymException( "There can only be one behavior in a UnityEnvironment " "if it is wrapped in a gym." ) self.name = list(self._env.behavior_specs.keys())[0] self.group_spec = self._env.behavior_specs[self.name] if self._get_n_vis_obs() == 0 and self._get_vec_obs_size() == 0: raise UnityGymException( "There are no observations provided by the environment." ) if not self._get_n_vis_obs() >= 1 and uint8_visual: logger.warning( "uint8_visual was set to true, but visual observations are not in use. " "This setting will not have any effect." ) else: self.uint8_visual = uint8_visual if ( self._get_n_vis_obs() + self._get_vec_obs_size() >= 2 and not self._allow_multiple_obs ): logger.warning( "The environment contains multiple observations. " "You must define allow_multiple_obs=True to receive them all. " "Otherwise, only the first visual observation (or vector observation if" "there are no visual observations) will be provided in the observation." ) # Check for number of agents in scene. self._env.reset() decision_steps, _ = self._env.get_steps(self.name) self._check_agents(len(decision_steps)) self._previous_decision_step = decision_steps # Set action spaces if self.group_spec.is_action_discrete(): branches = self.group_spec.discrete_action_branches if self.group_spec.action_shape == 1: self._action_space = spaces.Discrete(branches[0]) else: if flatten_branched: self._flattener = ActionFlattener(branches) self._action_space = self._flattener.action_space else: self._action_space = spaces.MultiDiscrete(branches) else: if flatten_branched: logger.warning( "The environment has a non-discrete action space. It will " "not be flattened." ) high = np.array([1] * self.group_spec.action_shape) self._action_space = spaces.Box(-high, high, dtype=np.float32) # Set observations space list_spaces: List[gym.Space] = [] shapes = self._get_vis_obs_shape() for shape in shapes: if uint8_visual: list_spaces.append(spaces.Box(0, 255, dtype=np.uint8, shape=shape)) else: list_spaces.append(spaces.Box(0, 1, dtype=np.float32, shape=shape)) if self._get_vec_obs_size() > 0: # vector observation is last high = np.array([np.inf] * self._get_vec_obs_size()) list_spaces.append(spaces.Box(-high, high, dtype=np.float32)) if self._allow_multiple_obs: self._observation_space = spaces.Tuple(list_spaces) else: self._observation_space = list_spaces[0] # only return the first one
def __init__(self, block_random=0.3, camera_random=0, simple_observations=False, continuous=False, remove_height_hack=False, urdf_list=None, render_mode='GUI', num_objects=5, dv=0.06, target=False, target_filenames=None, non_target_filenames=None, num_resets_per_setup=1, render_width=128, render_height=128, downsample_width=64, downsample_height=64, test=False, allow_duplicate_objects=True, max_num_training_models=900, max_num_test_models=100): """Creates a KukaGraspingEnv. Args: block_random: How much randomness to use in positioning blocks. camera_random: How much randomness to use in positioning camera. simple_observations: If True, observations are the position and orientation of end-effector and closest block, rather than images. continuous: If True, actions are continuous, else discrete. remove_height_hack: If True and continuous is True, add a dz component to action space. urdf_list: List of objects to populate the bin with. render_mode: GUI, DIRECT, or TCP. num_objects: The number of random objects to load. dv: Velocity magnitude of cartesian dx, dy, dz actions per time step. target: If True, then we receive reward only for grasping one "target" object. target_filenames: Objects that we want to grasp. non_target_filenames: Objects that we dont want to grasp. num_resets_per_setup: How many env resets before calling setup again. render_width: Width of camera image to render with. render_height: Height of camera image to render with. downsample_width: Width of image observation. downsample_height: Height of image observation. test: If True, uses test split of objects. allow_duplicate_objects: If True, samples URDFs with replacement. max_num_training_models: The number of distinct models to choose from when selecting the num_objects placed in the tray for training. max_num_test_models: The number of distinct models to choose from when selecting the num_objects placed in the tray for testing. """ self._time_step = 1. / 200. self._max_steps = 15 # Open-source search paths. self._urdf_root = OSS_DATA_ROOT self._models_dir = os.path.join(self._urdf_root, 'random_urdfs') self._action_repeat = 200 self._env_step = 0 self._renders = render_mode in ['GUI', 'TCP'] # Size we render at. self._width = render_width self._height = render_height # Size we downsample to. self._downsample_width = downsample_width self._downsample_height = downsample_height self._target = target self._num_objects = num_objects self._dv = dv self._urdf_list = urdf_list if target_filenames: target_filenames = [ self._get_urdf_path(f) for f in target_filenames ] if non_target_filenames: non_target_filenames = [ self._get_urdf_path(f) for f in non_target_filenames ] self._object_filenames = (target_filenames or []) + (non_target_filenames or []) self._target_filenames = target_filenames or [] self._block_random = block_random self._cam_random = camera_random self._simple_obs = simple_observations self._continuous = continuous self._remove_height_hack = remove_height_hack self._resets = 0 self._num_resets_per_setup = num_resets_per_setup self._test = test self._allow_duplicate_objects = allow_duplicate_objects self._max_num_training_models = max_num_training_models self._max_num_test_models = max_num_test_models if render_mode == 'GUI': self.cid = pybullet.connect(pybullet.GUI) pybullet.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33]) elif render_mode == 'DIRECT': self.cid = pybullet.connect(pybullet.DIRECT) elif render_mode == 'TCP': self.cid = pybullet.connect(pybullet.TCP, 'localhost', 6667) self.setup() if self._continuous: self.action_space = spaces.Box(low=-1, high=1, shape=(4, )) if self._remove_height_hack: self.action_space = spaces.Box( low=-1, high=1, shape=(5, )) # dx, dy, dz, da, close else: self.action_space = spaces.Discrete(8) if self._remove_height_hack: self.action_space = spaces.Discrete(10) if self._simple_obs: # (3 pos + 4 quat) x 2 self.observation_space = spaces.Box(low=-100, high=100, shape=(14, )) else: # image (self._height, self._width, 3) x position of the gripper (3,) img_space = spaces.Box(low=0, high=255, shape=(self._downsample_height, self._downsample_width, 3)) pos_space = spaces.Box(low=-5, high=5, shape=(3, )) self.observation_space = spaces.Tuple((img_space, pos_space)) self.viewer = None
def __init__(self, spec): self.spec = spec self.space = spaces.Tuple([conv.space for _, conv in spec])
def __init__(self): # # environment definition # self.descriptions = { "living": [ "This room has a couch, chairs and TV.", "You have entered the living room. You can watch TV here.", "This room has two sofas, chairs and a chandelier." ], "garden": [ "This space has a swing, flowers and trees.", "You have arrived at the garden. You can exercise here.", "This area has plants, grass and rabbits." ], "kitchen": [ "This room has a fridge, oven, and a sink.", "You have arrived in the kitchen. You can find food and drinks here.", "This living area has pizza, coke, and icecream." ], "bedroom": [ "This area has a bed, desk and a dresser.", "You have arrived in the bedroom. You can rest here.", "You see a wooden cot and a mattress on top of it." ], "pantry": [ "A small room for storing food and other kinds of goods.", "This area is usually used for preparing cold foods.", ], "hall": [ "This seems to be the entrance room of the house.", ], } self.rooms = self.descriptions.keys() self.env_objects = { "tv": "A huge television that is great for watching games.", "apple": "A red juicy fruit.", "cheese": "A good old emmentaler.", "pizza": "A delicious pizza margherita.", "rbutton": "A red button.", "gbutton": "A green button.", "bbutton": "A blue button.", "red": "A red fluid", "green": "A green fluid", "blue": "A blue fluid", "recipe_book": "A book full of recipes.", } self.definitions = { ("eat apple"): [{ "conds": { "room": "kitchen", "quest": "hungry", "poisoned": "apple" }, "effs": { "dead": True } }, { "conds": { "room": "kitchen", "quest": "hungry", "old": "apple" }, "effs": { "info": "old_food" } }, { "conds": { "room": "kitchen", "quest": "hungry" }, "effs": { "quest": "" } }], ("eat cheese"): [{ "conds": { "room": "kitchen", "quest": "hungry", "poisoned": "cheese" }, "effs": { "dead": True } }, { "conds": { "room": "kitchen", "quest": "hungry", "old": "cheese" }, "effs": { "info": "old_food" } }, { "conds": { "room": "kitchen", "quest": "hungry" }, "effs": { "quest": "" } }], ("eat pizza"): [{ "conds": { "room": "kitchen", "quest": "hungry", "poisoned": "pizza" }, "effs": { "dead": True } }, { "conds": { "room": "kitchen", "quest": "hungry", "old": "pizza" }, "effs": { "info": "old_food" } }, { "conds": { "room": "kitchen", "quest": "hungry" }, "effs": { "quest": "" } }], ("watch tv"): [{ "conds": { "room": "living", "quest": "bored", "energy": True }, "effs": { "quest": "" } }, { "conds": { "room": "living", "quest": "bored", "energy": False }, "effs": { "info": "energy_error" } }], ("press rbutton"): [{ "conds": { "room": "pantry", "energy_btn": "rbutton" }, "effs": { "energy": True } }, { "conds": { "room": "pantry", "shock_btn": "rbutton" }, "effs": { "dead": True } }, { "conds": { "room": "pantry" }, "effs": {} }], ("press gbutton"): [{ "conds": { "room": "pantry", "energy_btn": "gbutton" }, "effs": { "energy": True } }, { "conds": { "room": "pantry", "shock_btn": "gbutton" }, "effs": { "dead": True } }, { "conds": { "room": "pantry" }, "effs": {} }], ("press bbutton"): [{ "conds": { "room": "pantry", "energy_btn": "bbutton" }, "effs": { "energy": True } }, { "conds": { "room": "pantry", "shock_btn": "bbutton" }, "effs": { "dead": True } }, { "conds": { "room": "pantry" }, "effs": {} }], ("read recipe_book"): [{ "conds": { "room": "garden", }, "effs": { "info": "recipe_info" } }], # # Ingredients # ("read red"): [{ "conds": { "room": "bedroom" }, "effs": {} }], ("read green"): [{ "conds": { "room": "bedroom" }, "effs": {} }], ("read blue"): [{ "conds": { "room": "bedroom" }, "effs": {} }], #################################################################### ("drink red"): [{ "conds": { "room": "bedroom", "quest": "sleepy", "recipe_good": "red" }, "effs": { "quest": "" } }, { "conds": { "room": "bedroom", "recipe_bad": "red" }, "effs": { "quest": "", "dead": True } }], ("drink green"): [{ "conds": { "room": "bedroom", "quest": "sleepy", "recipe_good": "green" }, "effs": { "quest": "" } }, { "conds": { "room": "bedroom", "recipe_bad": "green" }, "effs": { "quest": "", "dead": True } }], ("drink blue"): [{ "conds": { "room": "bedroom", "quest": "sleepy", "recipe_good": "blue" }, "effs": { "quest": "" } }, { "conds": { "room": "bedroom", "recipe_bad": "blue" }, "effs": { "quest": "", "dead": True } }], # # Move in direction # ("go north"): [ { "conds": { "room": "bedroom" }, "effs": { "room": "living" } }, { "conds": { "room": "kitchen" }, "effs": { "room": "garden" } }, { "conds": { "room": "pantry" }, "effs": { "room": "kitchen" } }, ], ("go south"): [ { "conds": { "room": "living" }, "effs": { "room": "bedroom" } }, { "conds": { "room": "garden" }, "effs": { "room": "kitchen" } }, { "conds": { "room": "kitchen" }, "effs": { "room": "pantry" } }, ], ("go east"): [ { "conds": { "room": "living" }, "effs": { "room": "garden" } }, { "conds": { "room": "bedroom" }, "effs": { "room": "kitchen" } }, { "conds": { "room": "hall" }, "effs": { "room": "living" } }, ], ("go west"): [ { "conds": { "room": "garden" }, "effs": { "room": "living" } }, { "conds": { "room": "kitchen" }, "effs": { "room": "bedroom" } }, { "conds": { "room": "living" }, "effs": { "room": "hall" } }, ], } self.text = { "quest": { "hungry": "You are hungry", "sleepy": "You are sleepy", "bored": "You are bored", "fat": "You are getting fat", }, "mislead": { "hungry": "You are not hungry", "sleepy": "You are not sleepy", "bored": "You are not bored", "fat": "You are not getting fat", }, "info": { "energy_error": "Seems the tv does not work because of missing energy. Press the {} in the pantry.", "old_food": "The food does not seem good anymore.", "food_warning": "You cannot enjoy the {} anymore, it is old! Attention: do not eat the poisoned {}.", "recipe_wrong": "The recipe seems to have the wrong effect." }, "recipies": { 0: "To get {0} you should take the {1} drink.", 1: "Effect {0}: One needs to use a {1} sweet drink.", 2: "Take a drink which is {1} to get {0}.", } } HomeWorld.__init__(self) self.actions = list({a.split(" ")[0] for a in self.definitions}) self.objects = list({a.split(" ")[1] for a in self.definitions}) self.num_actions = len(self.actions) self.num_objects = len(self.objects) self.quests = ['hungry', 'sleepy', 'bored'] self.quest_actions = ['eat', 'sleep', 'watch'] self.extra_vocab = ['nothing', 'happend', 'not', 'but', 'now'] self.state = { "room": "", "description": "", "info": "", "quest": "", "mislead": "", "old": "", "poisoned": "", "energy": "", "shock_btn": "", "energy_btn": "", "recipe_good": "", "recipe_bad": "", "dead": False } self.init_vocab() self.vocab_space = self.get_vocab_size() self.action_space = spaces.Tuple((spaces.Discrete(self.num_actions), spaces.Discrete(self.num_objects))) self.observation_space = None self.seq_length = 50
def __init__(self, env_config): self.multi_goal = env_config.get("multi_goal", False) self.generalize = env_config.get("generalize", False) num_valid = env_config.get("num_valid", 50) self.specs_save = env_config.get("save_specs", False) self.valid = env_config.get("run_valid", False) self.env_steps = 0 with open(TwoStageAmp.CIR_YAML, 'r') as f: yaml_data = yaml.load(f, OrderedDictYAMLLoader) # design specs if self.generalize == False: specs = yaml_data['target_specs'] else: load_specs_path = TwoStageAmp.path + "/autockt/gen_specs/ngspice_specs_gen_two_stage_opamp" with open(load_specs_path, 'rb') as f: specs = pickle.load(f) self.specs = OrderedDict(sorted(specs.items(), key=lambda k: k[0])) if self.specs_save: with open( "specs_" + str(num_valid) + str(random.randint(1, 100000)), 'wb') as f: pickle.dump(self.specs, f) self.specs_ideal = [] self.specs_id = list(self.specs.keys()) self.fixed_goal_idx = -1 self.num_os = len(list(self.specs.values())[0]) # param array params = yaml_data['params'] self.params = [] self.params_id = list(params.keys()) for value in params.values(): param_vec = np.arange(value[0], value[1], value[2]) self.params.append(param_vec) #initialize sim environment self.sim_env = TwoStageClass(yaml_path=TwoStageAmp.CIR_YAML, num_process=1, path=TwoStageAmp.path) self.action_meaning = [-1, 0, 2] self.action_space = spaces.Tuple( [spaces.Discrete(len(self.action_meaning))] * len(self.params_id)) #self.action_space = spaces.Discrete(len(self.action_meaning)**len(self.params_id)) self.observation_space = spaces.Box( low=np.array([TwoStageAmp.PERF_LOW] * 2 * len(self.specs_id) + len(self.params_id) * [1]), high=np.array([TwoStageAmp.PERF_HIGH] * 2 * len(self.specs_id) + len(self.params_id) * [1])) #initialize current param/spec observations self.cur_specs = np.zeros(len(self.specs_id), dtype=np.float32) self.cur_params_idx = np.zeros(len(self.params_id), dtype=np.int32) #Get the g* (overall design spec) you want to reach self.global_g = [] for spec in list(self.specs.values()): self.global_g.append(float(spec[self.fixed_goal_idx])) self.g_star = np.array(self.global_g) self.global_g = np.array(yaml_data['normalize']) #objective number (used for validation) self.obj_idx = 0
def _set_action_space(self): self.action_space = spaces.Tuple( tuple([spaces.Discrete(6)] + [spaces.Discrete(self._radio_vocab_size)] * self._radio_num_words))
from ray.rllib.env.async_vector_env import AsyncVectorEnv from ray.rllib.env.vector_env import VectorEnv from ray.rllib.models import ModelCatalog from ray.rllib.models.model import Model from ray.rllib.test.test_external_env import SimpleServing from ray.tune.registry import register_env DICT_SPACE = spaces.Dict({ "sensors": spaces.Dict({ "position": spaces.Box(low=-100, high=100, shape=(3, )), "velocity": spaces.Box(low=-1, high=1, shape=(3, )), "front_cam": spaces.Tuple((spaces.Box(low=0, high=1, shape=(10, 10, 3)), spaces.Box(low=0, high=1, shape=(10, 10, 3)))), "rear_cam": spaces.Box(low=0, high=1, shape=(10, 10, 3)), }), "inner_state": spaces.Dict({ "charge": spaces.Discrete(100), "job_status": spaces.Dict({ "task": spaces.Discrete(5), "progress": spaces.Box(low=0, high=100, shape=()), }) }) })
def __init__(self, image_paths, true_bboxes, playout_episode=False, premasking=True, mode='train', max_steps_per_image=200, seed=None, bbox_scaling_w=0.05, bbox_scaling_h=0.1, bbox_transformer='base', has_termination_action=True, has_intermediate_reward=False, ior_marker_type='cross', history_length=10, assessor_model=None, train_assessor=False, grayscale=False, use_cut_area=False): """ :param image_paths: The paths to the individual images :param true_bboxes: The true bounding boxes for each image :type image_paths: String or list :type true_bboxes: numpy.ndarray """ # Determines whether the agent is training or testing # Optimizations can be applied during training that are not allowed for testing self.mode = mode # Factor for scaling all bounding boxes relative to their size self.bbox_scaling_w = bbox_scaling_w self.bbox_scaling_h = bbox_scaling_h # Whether IoR markers will be placed upfront after loading the image self.premasking = premasking # Whether an episode terminates after a single trigger or is played out until the end self.playout_episode = playout_episode # Episodes will be terminated automatically after reaching max steps self.max_steps_per_image = max_steps_per_image # Whether a termination action should be provided in the action set self.has_termination_action = has_termination_action # Whether a reward will be given for each non-trigger action based on the best gt iou self.has_intermediate_reward = has_intermediate_reward # The type of IoR marker to be used when masking trigger regions self.ior_marker_type = ior_marker_type # Length of history in state & agent model self.history_length = history_length # Whether to return grayscale, 1-channel environment images self.grayscale = grayscale # Use tightness-aware IoU for reward (incorporating cut gt) self.use_cut_area = use_cut_area # Initialize action space self.bbox_transformer = create_bbox_transformer(bbox_transformer) self.action_space = spaces.Discrete(len(self.action_set)) if self.grayscale: # 224*224*1 (RGB image) + 9 * 10 (on-hot-enconded history) self.observation_space = spaces.Tuple([ spaces.Box(low=0, high=256, shape=(450, 450, 1)), spaces.Box(low=0, high=1, shape=(self.history_length, len(self.action_set))) ]) else: # 224*224*3 (RGB image) + 9 * 10 (on-hot-enconded history) = 150618 self.observation_space = spaces.Tuple([ spaces.Box(low=0, high=256, shape=(450, 450, 3)), spaces.Box(low=0, high=1, shape=(self.history_length, len(self.action_set))) ]) # Initialize dataset if type(image_paths) is not list: image_paths = [image_paths] self.image_paths = image_paths self.true_bboxes = [[TextLocEnv.to_standard_box(b) for b in bboxes] for bboxes in true_bboxes] # For registering a handler that will be executed once after a step self.post_step_handler = None # Episode-specific # Image for the current episode self.episode_image = None self.current_image_index = 0 # Ground truth bounding boxes for the current episode image self.episode_true_bboxes = None # Predicted bounding boxes for the current episode image self.episode_pred_bboxes = None # IoU values for each trigger in the current episode self.episode_trigger_ious = None # List of indices of masked bounding boxes for the current episode image self.episode_masked_indices = [] # Number of trigger actions used so far self.num_triggers_used = 0 # Number of episodes rolled out so far self.episode_count = 0 # ID of last action taken self.last_action_taken = -1 # For rendering self.viewer = None # Assessor (weak-supervision) self.assessor = assessor_model self.train_assessor = train_assessor self.resize = Resize((450, 450), interpolation=InterpolationMode.NEAREST) self.seed(seed=seed) self.reset()
def __init__(self, env=None): gym.ObservationWrapper.__init__(self, env) self.observation_space = spaces.Tuple([self.observation_space])
def __init__(self, bodies: SystemScope = SystemScope.ALL, start_body: SolarSystemPlanet = None, target_bodies: List[SolarSystemPlanet] = None, start_time: Time = None, action_step: TimeDelta = TimeDelta(1 * u.minute), simulation_step: TimeDelta = TimeDelta(1 * u.second), spaceship_name: SpaceShipName = SpaceShipName.DEFAULT, spaceship_initial_altitude: u.km = 400 * u.km, spaceship_mass: u.kg = None, spaceship_propellant_mass: u.kg = None, spaceship_isp: u.s = None, spaceship_engine_thrust: u.N = None): super(SolarSystemGrav, self).__init__() if start_body is None: start_body = Earth if target_bodies is None: target_bodies = [Mars] if start_time is None: start_time = Time(datetime.now()).tdb # todo: enforce action_step/simulation_step is an integer? self.start_body = start_body self.target_bodies = target_bodies self.spaceship_initial_altitude = spaceship_initial_altitude self.start_time = start_time self.current_time = None self.time_step = action_step self.simulation_step = simulation_step self.done = False self.reward = 0 self.done = False self.spaceship_name = spaceship_name self.spaceship_mass = spaceship_mass self.spaceship_propellant_mass = spaceship_propellant_mass self.spaceship_isp = spaceship_isp self.spaceship_engine_thrust = spaceship_engine_thrust # set up solar system solar_system_ephemeris.set("jpl") # Download & use JPL Ephem body_dict = { SystemScope.EARTH: [Earth, Moon], SystemScope.ALL: [ Sun, Earth, Moon, Mercury, Venus, Mars, Jupiter, Saturn, Uranus, Neptune, Pluto ] } # define bodies to model # poliastro.bodies.SolarSystemPlanet = # Sun, Earth, Moon, Mercury, Venus, Mars, Jupiter, Saturn, Uranus, Neptune, Pluto # could also add versions for: only inner solar system, only 'major' bodies jovan moons, saturn's moons? try: self.body_list = body_dict[bodies] except KeyError: raise KeyError(f"bodies must be one of {body_dict.keys()}") # set up spacecraft self.spaceship = self._init_spaceship() self.current_ephem = None # init: # * which bodies are modelled # * what time it is # * what time_step to use # * target body # * spaceship pos/vel (orbit?) /fuel/thrust # * # init must define action & observation space # initialize model solar system # # Define action and observation space # They must be gym.spaces objects # observation ~~time~~, time_step, craft position, craft velocity, craft fuel, craft engine power, # bodies: position, velocity, mass # [time_step, [craft position, velocity, fuel, engine power], # [body_1_is_target, body_1_position, body_1_velocity, body_1_mass], # ... # [body_n_is_target, body_n_position, body_n_velocity, body_n_mass]] self.observation_space = spaces.Space() # action: # tuple [[x,y,z], burn duration] self.action_space = spaces.Tuple(( spaces.Box(low=-1.0, high=1.0, shape=(3, )), # x,y,z direction vector spaces.Box(low=0.0, high=1.0, shape=(1, )) # burn duration as percent of time_step ))
def __init__(self, urdfRoot=pybullet_data.getDataPath(), actionRepeat=1, isEnableSelfCollision=True, renders=False, isDiscrete=False): self._timeStep = 1. / 240. self._urdfRoot = urdfRoot self._actionRepeat = actionRepeat self._isEnableSelfCollision = isEnableSelfCollision self._observation = [] self._envStepCounter = 0 self._renders = renders #self._cam_dist = 1.3 #self._cam_yaw = 180 #self._cam_pitch = -40 self._cam_dist = 0.3 #self._cam_yaw = 45 self._cam_roll = 0 self._cam_yaw = 90 self._cam_pitch = -40 #self._width = 341 #self._height = 256 self._kinect_rgb_width = 1920 self._kinect_rgb_height = 1080 self._kinect_d_width = 512 self._kinect_d_height = 424 self._handcamera_width = 640 self._handcamera_height = 480 self._isDiscrete = isDiscrete #self._isBox = isBox self.terminated = 0 self._p = p if self._renders: cid = p.connect(p.SHARED_MEMORY) if (cid < 0): p.connect(p.GUI) p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33]) else: p.connect(p.DIRECT) #timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json") self.seed() self.reset() observationDim = len(self.getExtendedObservation()) #print("observationDim") #print(observationDim) observation_high = np.array([np.finfo(np.float32).max] * observationDim) if (self._isDiscrete): self.action_space = spaces.Discrete(7) else: action_dim = 12 self._action_bound = 1 action_high = np.array([self._action_bound] * action_dim) self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32) self._proximity_low = np.array([0] * 3) self._proximity_high = np.array([1] * 3) self._force_low = np.array([0] * 3) self._force_high = np.array([10] * 3) self.observation_space = spaces.Tuple( (spaces.Box(low=0, high=255, shape=(self._kinect_rgb_height, self._kinect_rgb_width, 4), dtype=np.uint8), spaces.Box(self._proximity_low, self._proximity_high, dtype=np.float32), spaces.Box(self._force_low, self._force_high, dtype=np.float32))) self.viewer = None
def response_space(self): res_space = self._response_model_ctor.response_space() return spaces.Tuple(tuple([ res_space, ] * self._slate_size))
def __init__(self, spaces): super().__init__(gym_space=gym_spaces.Tuple(spaces))
def __init__(self, environment_filename=None, docker_training=False, worker_id=0, retro=True, timeout_wait=30, realtime_mode=False, config=None, greyscale=False): """ Arguments: environment_filename: The file path to the Unity executable. Does not require the extension. docker_training: Whether this is running within a docker environment and should use a virtual frame buffer (xvfb). worker_id: The index of the worker in the case where multiple environments are running. Each environment reserves port (5005 + worker_id) for communication with the Unity executable. retro: Resize visual observation to 84x84 (int8) and flattens action space. timeout_wait: Time for python interface to wait for environment to connect. realtime_mode: Whether to render the environment window image and run environment at realtime. """ self._env = UnityEnvironment(environment_filename, worker_id, docker_training=docker_training, timeout_wait=timeout_wait) split_name = self._env.academy_name.split('-v') if len(split_name) == 2 and split_name[0] == "ObstacleTower": self.name, self.version = split_name else: raise UnityGymException( "Attempting to launch non-Obstacle Tower environment" ) if self.version not in self.ALLOWED_VERSIONS: raise UnityGymException( "Invalid Obstacle Tower version. Your build is v" + self.version + " but only the following versions are compatible with this gym: " + str(self.ALLOWED_VERSIONS) ) self.visual_obs = None self._current_state = None self._n_agents = None self._flattener = None self._greyscale = greyscale # Environment reset parameters self._seed = None self._floor = None self.realtime_mode = realtime_mode self.game_over = False # Hidden flag used by Atari environments to determine if the game is over self.retro = retro if config != None: self.config = config else: self.config = None flatten_branched = self.retro uint8_visual = self.retro # Check brain configuration if len(self._env.brains) != 1: raise UnityGymException( "There can only be one brain in a UnityEnvironment " "if it is wrapped in a gym.") self.brain_name = self._env.external_brain_names[0] brain = self._env.brains[self.brain_name] if brain.number_visual_observations == 0: raise UnityGymException("Environment provides no visual observations.") self.uint8_visual = uint8_visual if brain.number_visual_observations > 1: logger.warning("The environment contains more than one visual observation. " "Please note that only the first will be provided in the observation.") # Check for number of agents in scene. initial_info = self._env.reset(train_mode=not self.realtime_mode)[self.brain_name] self._check_agents(len(initial_info.agents)) # Set observation and action spaces if len(brain.vector_action_space_size) == 1: self._action_space = spaces.Discrete(brain.vector_action_space_size[0]) else: if flatten_branched: self._flattener = ActionFlattener(brain.vector_action_space_size) self._action_space = self._flattener.action_space else: self._action_space = spaces.MultiDiscrete(brain.vector_action_space_size) high = np.array([np.inf] * brain.vector_observation_space_size) self.action_meanings = brain.vector_action_descriptions if self._greyscale: depth = 1 else: depth = 3 image_space_max = 1.0 image_space_dtype = np.float32 camera_height = brain.camera_resolutions[0]["height"] camera_width = brain.camera_resolutions[0]["width"] if self.retro: image_space_max = 255 image_space_dtype = np.uint8 camera_height = 84 camera_width = 84 image_space = spaces.Box( 0, image_space_max, dtype=image_space_dtype, shape=(camera_height, camera_width, depth) ) if self.retro: self._observation_space = image_space else: max_float = np.finfo(np.float32).max keys_space = spaces.Discrete(5) time_remaining_space = spaces.Box(low=0.0, high=max_float, shape=(1,), dtype=np.float32) floor_space = spaces.Discrete(9999) self._observation_space = spaces.Tuple( (image_space, keys_space, time_remaining_space, floor_space) )
def test_split_batch_fn(): # from continuum.datasets import MNIST batch_size = 5 max_batches = 10 def split_batch_fn( batch: Tuple[Tensor, Tensor, Tensor] ) -> Tuple[Tuple[Tensor, Tensor], Tensor]: x, y, t = batch return (x, t), y # dataset = MNIST("data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])) from continuum import ClassIncremental from continuum.datasets import MNIST from continuum.tasks import split_train_val scenario = ClassIncremental( MNIST("data", download=True, train=True), increment=2, transformations=Compose([Transforms.to_tensor, Transforms.three_channels]), ) classes_per_task = scenario.nb_classes // scenario.nb_tasks print(f"Number of classes per task {classes_per_task}.") for i, task_dataset in enumerate(scenario): env = PassiveEnvironment( task_dataset, n_classes=classes_per_task, batch_size=batch_size, split_batch_fn=split_batch_fn, # Need to pass the observation space, in this case. observation_space=spaces.Tuple( [ spaces.Box(low=0, high=1, shape=(3, 28, 28)), spaces.Discrete(scenario.nb_tasks), # task label ] ), action_space=spaces.Box( low=np.array([i * classes_per_task]), high=np.array([(i + 1) * classes_per_task]), dtype=int, ), ) assert spaces.Box( low=np.array([i * classes_per_task]), high=np.array([(i + 1) * classes_per_task]), dtype=int, ).shape == (1,) assert isinstance(env.observation_space[0], spaces.Box) assert env.observation_space[0].shape == (batch_size, 3, 28, 28) assert env.observation_space[1].shape == (batch_size,) assert env.action_space.shape == (batch_size, 1) assert env.reward_space.shape == (batch_size, 1) env.seed(123) obs = env.reset() assert len(obs) == 2 x, t = obs assert x.shape == (batch_size, 3, 28, 28) assert t.shape == (batch_size,) obs, reward, done, info = env.step(env.action_space.sample()) assert x.shape == (batch_size, 3, 28, 28) assert t.shape == (batch_size,) assert reward.shape == (batch_size,) assert not done env.close()
def __init__(self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, shared_viewer=True): self.world = world self.agents = self.world.policy_agents # set required vectorized gym env property self.n = len(world.policy_agents) # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback # environment parameters self.discrete_action_space = True # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = False # if true, even the action is continuous, action will be performed discretely self.force_discrete_action = world.discrete_action if hasattr( world, 'discrete_action') else False # if true, every agent has the same reward self.shared_reward = world.collaborative if hasattr( world, 'collaborative') else False self.time = 0 # configure spaces self.action_space = [] self.observation_space = [] for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: u_action_space = spaces.Discrete(world.dim_p * 2 + 1) else: u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p, ), dtype=np.float32) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c, ), dtype=np.float32) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([ isinstance(act_space, spaces.Discrete) for act_space in total_action_space ]): act_space = MultiDiscrete( [[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32)) agent.action.c = np.zeros(self.world.dim_c) # rendering self.shared_viewer = shared_viewer if self.shared_viewer: self.viewers = [None] else: self.viewers = [None] * self.n self._reset_render()
def __init__(self, image_paths, true_bboxes, playout_episode=False, premasking=True, mode='train', max_steps_per_image=200, seed=None, bbox_scaling=0.125, bbox_transformer='base', has_termination_action=True, ior_marker_type='cross', history_length=10): """ :param image_paths: The paths to the individual images :param true_bboxes: The true bounding boxes for each image :type image_paths: String or list :type true_bboxes: numpy.ndarray """ # Determines whether the agent is training or testing # Optimizations can be applied during training that are not allowed for testing self.mode = mode # Factor for scaling all bounding boxes relative to their size self.bbox_scaling = bbox_scaling # Whether IoR markers will be placed upfront after loading the image self.premasking = premasking # Whether an episode terminates after a single trigger or is played out until the end self.playout_episode = playout_episode # Episodes will be terminated automatically after reaching max steps self.max_steps_per_image = max_steps_per_image # Whether a termination action should be provided in the action set self.has_termination_action = has_termination_action # The type of IoR marker to be used when masking trigger regions self.ior_marker_type = ior_marker_type # Length of history in state & agent model self.history_length = history_length # Initialize action space self.bbox_transformer = create_bbox_transformer(bbox_transformer) self.action_space = spaces.Discrete(len(self.action_set)) # 224*224*3 (RGB image) + 9 * 10 (on-hot-enconded history) = 150618 self.observation_space = spaces.Tuple([ spaces.Box(low=0, high=256, shape=(224, 224, 3)), spaces.Box(low=0, high=1, shape=(self.history_length, len(self.action_set))) ]) # Initialize dataset if type(image_paths) is not list: image_paths = [image_paths] self.image_paths = image_paths self.true_bboxes = [[TextLocEnv.to_standard_box(b) for b in bboxes] for bboxes in true_bboxes] # For registering a handler that will be executed once after a step self.post_step_handler = None # Episode-specific # Image for the current episode self.episode_image = None # Ground truth bounding boxes for the current episode image self.episode_true_bboxes = None # Predicted bounding boxes for the current episode image self.episode_pred_bboxes = None # IoU values for each trigger in the current episode self.episode_trigger_ious = None # List of indices of masked bounding boxes for the current episode image self.episode_masked_indices = [] # Number of trigger actions used so far self.num_triggers_used = 0 self.seed(seed=seed) self.reset()
def __init__( self, environment_filename=None, worker_id=0, retro=True, timeout_wait=30, realtime_mode=False, config=None, greyscale=False, ): """ Arguments: environment_filename: The file path to the Unity executable. Does not require the extension. docker_training: Whether this is running within a docker environment and should use a virtual frame buffer (xvfb). worker_id: The index of the worker in the case where multiple environments are running. Each environment reserves port (5005 + worker_id) for communication with the Unity executable. retro: Resize visual observation to 84x84 (int8) and flattens action space. timeout_wait: Time for python interface to wait for environment to connect. realtime_mode: Whether to render the environment window image and run environment at realtime. """ self.reset_parameters = EnvironmentParametersChannel() self.engine_config = EngineConfigurationChannel() if environment_filename is None: registry = UnityEnvRegistry() registry.register_from_yaml(self._REGISTRY_YAML) self._env = registry["ObstacleTower"].make( worker_id=worker_id, timeout_wait=timeout_wait, side_channels=[self.reset_parameters, self.engine_config]) else: self._env = UnityEnvironment( environment_filename, worker_id, timeout_wait=timeout_wait, side_channels=[self.reset_parameters, self.engine_config], ) if realtime_mode: self.engine_config.set_configuration_parameters(time_scale=1.0) self.reset_parameters.set_float_parameter("train-mode", 0.0) else: self.engine_config.set_configuration_parameters(time_scale=20.0) self.reset_parameters.set_float_parameter("train-mode", 1.0) self._env.reset() behavior_name = list(self._env.behavior_specs)[0] split_name = behavior_name.split("-v") if len(split_name) == 2 and split_name[0] == "ObstacleTowerAgent": self.name, self.version = split_name else: raise UnityGymException( "Attempting to launch non-Obstacle Tower environment") if self.version not in self.ALLOWED_VERSIONS: raise UnityGymException( "Invalid Obstacle Tower version. Your build is v" + self.version + " but only the following versions are compatible with this gym: " + str(self.ALLOWED_VERSIONS)) self.visual_obs = None self._n_agents = None self._flattener = None self._greyscale = greyscale # Environment reset parameters self._seed = None self._floor = None self.realtime_mode = realtime_mode self.game_over = False # Hidden flag used by Atari environments to determine if the game is over self.retro = retro if config != None: self.config = config else: self.config = None flatten_branched = self.retro uint8_visual = self.retro # Check behavior configuration if len(self._env.behavior_specs) != 1: raise UnityGymException( "There can only be one agent in this environment " "if it is wrapped in a gym.") self.behavior_name = behavior_name behavior_spec = self._env.behavior_specs[behavior_name] if len(behavior_spec) < 2: raise UnityGymException( "Environment provides too few observations.") self.uint8_visual = uint8_visual # Check for number of agents in scene. initial_info, terminal_info = self._env.get_steps(behavior_name) self._check_agents(len(initial_info)) # Set observation and action spaces if len(behavior_spec.action_shape) == 1: self._action_space = spaces.Discrete(behavior_spec.action_shape[0]) else: if flatten_branched: self._flattener = ActionFlattener(behavior_spec.action_shape) self._action_space = self._flattener.action_space else: self._action_space = spaces.MultiDiscrete( behavior_spec.action_shape) if self._greyscale: depth = 1 else: depth = 3 image_space_max = 1.0 image_space_dtype = np.float32 camera_height = behavior_spec.observation_shapes[0][0] camera_width = behavior_spec.observation_shapes[0][1] if self.retro: image_space_max = 255 image_space_dtype = np.uint8 camera_height = 84 camera_width = 84 image_space = spaces.Box( 0, image_space_max, dtype=image_space_dtype, shape=(camera_height, camera_width, depth), ) if self.retro: self._observation_space = image_space else: max_float = np.finfo(np.float32).max keys_space = spaces.Discrete(5) time_remaining_space = spaces.Box(low=0.0, high=max_float, shape=(1, ), dtype=np.float32) floor_space = spaces.Discrete(9999) self._observation_space = spaces.Tuple( (image_space, keys_space, time_remaining_space, floor_space))
def __init__(self): ''' general property ''' # self.tmp_pos = np.array([None,None,None,None]) self.episodes = 0 self.fps = 100 self.iteration, self.max_iteration = 0, 60 * self.fps self.width = 256 self.height = 144 self.gravity = 9.8 self.max_absolute_thrust = 16 # 2 * self.gravity self.min_absolute_thrust = 4 self.thrust_sensity = (self.max_absolute_thrust - self.min_absolute_thrust) / 2 self.min_absolute_x, self.max_absolute_x = 0, self.width self.min_absolute_y, self.max_absolute_y = 0, self.height self.min_initial_distance, self.max_initial_distance = 5, 30 self.min_detect_distance, self.max_detect_distance = 1, 30 self.max_absolute_angle = 180 self.max_roll_angle = 40 self.max_pitch_angle = 40 self.max_yaw_angle = 180 #self.max_absolute_thrust = 2 * self.mass_hunter * self.gravity ''' state related property ''' # configuration of relative position in view self.queue_length = 8 self.coordinate_queue = None self.distance_queue = None self.in_fov_queue = None # threshold for orientation self.min_angle, self.max_angle = -1, 1 # -180 -> 180 self.min_roll, self.max_roll = self.min_angle, self.max_angle self.min_pitch, self.max_pitch = self.min_angle, self.max_angle self.min_yaw, self.max_yaw = self.min_angle, self.max_angle # threshold for thrust self.min_thrust, self.max_thrust = -1, 1 # thrust = self.min_absolute_thrust + self.thrust_sensity * (x + 1) # threshold for relative position in view self.min_relative_x, self.max_relative_x = -1, 1 # 1 - 256 self.min_relative_y, self.max_relative_y = -1, 1 # 1 - 144 # threshold for distance within target and hunter self.min_distance, self.max_distance = 0, 1 # 0 -> 30 # threshold for state self.low_state = np.array( [self.min_roll, self.min_pitch, self.min_yaw, self.min_thrust] + self.queue_length * [self.min_relative_x, self.min_relative_y] + self.queue_length * [self.min_distance] ) self.high_state = np.array( [self.max_roll, self.max_pitch, self.max_yaw, self.max_thrust] + self.queue_length * [self.max_relative_x, self.max_relative_y] + self.queue_length * [self.max_distance] ) ''' action related property assume symmetric actions ''' # threshold for orientation self.min_roll_action, self.max_roll_action = -1, 1 # -180 -> 180 self.min_pitch_action, self.max_pitch_action = -1, 1 # -180 -> 180 self.min_yaw_action, self.max_yaw_action = -1, 1 # -180 -> 180 # threshold for thrust self.min_thrust_action, self.max_thrust_action = -1, 1 # -2 * self.mass_hunter * self.gravity -> 2 * self.mass_hunter * self.gravity # threshold for action self.low_action = np.array([self.min_roll_action, self.min_pitch_action, self.min_yaw_action, self.min_thrust_action]) self.high_action = np.array([self.max_roll_action, self.max_pitch_action, self.max_yaw_action, self.max_thrust_action]) ''' define action space and observation space ''' self.action_space = spaces.Box(low=self.low_action, high=self.high_action) # self.observation_space = spaces.Box(low=self.low_state, high=self.high_state) self.observation_space = spaces.Tuple(( spaces.Box(low=self.low_state, high=self.high_state), spaces.MultiBinary(self.queue_length) # if corrdinates in FOV, it is equal to if we can measure the distance )) self.seed() self.reset()
from .menu_manager import * from .state import * from .pad import Pad from .dolphin import DolphinRunner from . import ctype_util as ctutil from .reward import computeRewards buttons = ['A', 'B', 'Y', 'L', 'Z'] button_space = spaces.Discrete(len(buttons) + 1) main_stick_space = spaces.Box(0, 1, [2]) # X and Y axes c_directions = [(0.5, 0.5), (0.5, 1), (0.5, 0), (0, 0.5), (1, 0.5)] c_stick_space = spaces.Discrete(len(c_directions)) controller_space = spaces.Tuple((button_space, main_stick_space, c_stick_space)) def realController(control): button, main, c = control controller = ssbm.RealControllerState() if button < len(buttons): setattr(controller, 'button_' + buttons[button], True) controller.stick_MAIN = tuple(main) controller.stick_C = c_directions[c] return controller class BoolConv:
def _set_action_space(self): self.action_space = spaces.Tuple( [self.agents[i].action_space for i in range(self.num_agents)])
def __init__(self, conv, permutation): self.conv = conv self.permutation = permutation self.space = spaces.Tuple([conv.space for _ in permutation])
def _set_observation_space(self): self.observation_space = spaces.Tuple( [self.agents[i].observation_space for i in range(self.num_agents)])
def __init__( self, n_agents, f_init=1, growth_rate=1, S_eq=1, max_steps=1000, signal_size=2, signal_duration=1, threshold=1e-4, h_fn=def_h_fn, f_fn=def_f_fn, p_fn=def_p_fn, c_fn=def_c_fn, s_fn=def_s_fn ): """ Parameters ---------- n_agents : int Number of agents in the environment f_init : double Initial resource stock growth_rate : double Intrinsic growth rate S_eq : double Equilibrium population max_steps : int Maximum number of steps signal_size : int Signal size (cardinality) signal_duration : int Duration between signal shifts threshold : double Minimum stock thresold h_fn : lambda Total consumed resources function f_fn : lambda Spwaner-recruit function p_fn : lambda Price function c_fn : lambda Cost function s_fn : lambda Signal function """ super(FishermanEnv, self).__init__() self.n_agents = n_agents self.max_steps = max_steps self.f_init = f_init self.growth_rate = growth_rate self.S_eq = S_eq self.signal_size = signal_size self.signal_duration = signal_duration self.signal_stochastic_offset = random.randint(1,signal_size) self.done = True self.cur_stock = None self.cur_step = None self.last_actions = None self.threshold = threshold self.seed() self.l_agents = ["agent{}".format(i) for i in range(self.n_agents)] # Environment specific customs self.h_fn = h_fn self.f_fn = f_fn self.p_fn = p_fn self.c_fn = c_fn self.s_fn = s_fn self.observation_space = spaces.Tuple((spaces.MultiBinary(self.signal_size), spaces.Box(low=0, high=1, shape=(1,), dtype=float), spaces.Box(low=0, high=np.inf, shape=(1,), dtype=float))) self.action_space = spaces.Box(low=0, high=1, shape=(1,), dtype=float)
def space(self) -> spaces.Space: return spaces.Tuple( [action_type.space() for action_type in self.agents_action_types])
import pytest import numpy as np import gym from gym import spaces from gym_utils import env_wrappers flat_box_test_spaces = [ (spaces.Box(0, 1, ()), 1), (spaces.Box(0, 1, (0, )), 0), (spaces.Box(0, 1, (4, )), 4), (spaces.Box(0, 1, (2, 3)), 6), (spaces.Discrete(5), 5), (spaces.Tuple((spaces.Box(0, 1, (2)), spaces.Discrete(3))), 5), (spaces.Tuple(()), 0), ] class TestFlatBoxView(): @pytest.mark.parametrize('space,n', flat_box_test_spaces) def test_shape(self, space, n): low = np.zeros(n) high = np.ones(n) flat_space = env_wrappers.FlatBoxView(space) assert flat_space.shape == (n, ) assert np.array_equal(flat_space.low, low) assert np.array_equal(flat_space.high, high) @pytest.mark.parametrize('space,n', flat_box_test_spaces) def test_sample(self, space, n):
def __init__(self, level, no_reward=False, **kwargs): """ Base class for Gym interface for ViZDoom. Child classes are defined in vizdoom_env_definitions.py, that contain the level parameter and pass through any kwargs from gym.make() :param level: index of level in the CONFIGS list above :param kwargs: keyword arguments from gym.make(env_name_string, **kwargs) call. 'depth' will render the depth buffer and 'labels' will render the object labels and return it in the observation. Note that the observation will be a list with the screen buffer as the first element. If no kwargs are provided (or depth=False and labels=False) the observation will be of type np.ndarray. """ # parse keyword arguments self.depth = kwargs.get("depth", False) self.labels = kwargs.get("labels", False) self.position = kwargs.get("position", False) self.health = kwargs.get("health", False) self.no_reward = no_reward # init game self.game = vzd.DoomGame() self.game.set_screen_resolution(vzd.ScreenResolution.RES_640X480) scenarios_dir = os.path.join(os.path.dirname(__file__), "scenarios") self.game.load_config(os.path.join(scenarios_dir, CONFIGS[level][0])) self.game.set_window_visible(False) self.game.set_depth_buffer_enabled(self.depth) self.game.set_labels_buffer_enabled(self.labels) self.game.clear_available_game_variables() # if self.position: # import pdb; pdb.set_trace() self.game.add_available_game_variable(vzd.GameVariable.POSITION_X) self.game.add_available_game_variable(vzd.GameVariable.POSITION_Y) self.game.add_available_game_variable(vzd.GameVariable.POSITION_Z) self.game.add_available_game_variable(vzd.GameVariable.ANGLE) # if self.health: self.game.add_available_game_variable(vzd.GameVariable.HEALTH) self.info_str = [ "position_x", "position_y", "position_z", "angle", "health" ] self.game.init() self.state = None self.viewer = None self.action_space = spaces.Discrete(CONFIGS[level][1]) # specify observation space(s) list_spaces: List[gym.Space] = [ spaces.Box( 0, 255, ( self.game.get_screen_height(), self.game.get_screen_width(), self.game.get_screen_channels(), ), dtype=np.uint8, ) ] if self.depth: list_spaces.append( spaces.Box( 0, 255, ( self.game.get_screen_height(), self.game.get_screen_width(), ), dtype=np.uint8, )) if self.labels: list_spaces.append( spaces.Box( 0, 255, ( self.game.get_screen_height(), self.game.get_screen_width(), ), dtype=np.uint8, )) if self.position: list_spaces.append(spaces.Box(-np.Inf, np.Inf, (4, 1))) if self.health: list_spaces.append(spaces.Box(0, np.Inf, (1, 1))) if len(list_spaces) == 1: self.observation_space = list_spaces[0] else: self.observation_space = spaces.Tuple(list_spaces)
def __init__( self, num_players: int, num_streets: int, blinds: Union[int, List[int]], antes: Union[int, List[int]], raise_sizes: Union[float, str, List[Union[float, str]]], num_raises: Union[float, List[float]], num_suits: int, num_ranks: int, num_hole_cards: int, num_community_cards: Union[int, List[int]], num_cards_for_hand: int, mandatory_num_hole_cards: int, start_stack: int, low_end_straight: bool = True, order: Optional[List[str]] = None, ) -> None: self.dealer = clubs.Dealer( num_players, num_streets, blinds, antes, raise_sizes, num_raises, num_suits, num_ranks, num_hole_cards, num_community_cards, num_cards_for_hand, mandatory_num_hole_cards, start_stack, low_end_straight, order, ) max_bet = start_stack * num_players if isinstance(num_community_cards, list): comm_card_numb = sum(num_community_cards) else: comm_card_numb = num_community_cards self.action_space = spaces.Discrete(max_bet) card_space = spaces.Tuple( (spaces.Discrete(num_ranks), spaces.Discrete(num_suits))) hole_card_space = spaces.Tuple((card_space, ) * num_hole_cards) self.observation_space = spaces.Dict({ "action": spaces.Discrete(num_players), "active": spaces.MultiBinary(num_players), "button": spaces.Discrete(num_players), "call": spaces.Discrete(max_bet), "community_cards": spaces.Tuple((card_space, ) * comm_card_numb), "hole_cards": spaces.Tuple((hole_card_space, ) * num_players), "max_raise": spaces.Discrete(max_bet), "min_raise": spaces.Discrete(max_bet), "pot": spaces.Discrete(max_bet), "stacks": spaces.Tuple((spaces.Discrete(max_bet), ) * num_players), "street_commits": spaces.Tuple((spaces.Discrete(max_bet), ) * num_players), }) self.agents: Optional[Dict[int, agent.BaseAgent]] = None self.prev_obs: Optional[Dict] = None
def __init__(self): self.reset() self.action_space = spaces.Discrete(2) self.observation_space = spaces.Tuple( (spaces.Discrete(2), spaces.Discrete(2)))