Beispiel #1
0
    def __init__(
        self,
        initial_wealth=25.0,
        edge_prior_alpha=7,
        edge_prior_beta=3,
        max_wealth_alpha=5.0,
        max_wealth_m=200.0,
        max_rounds_mean=300.0,
        max_rounds_sd=25.0,
        reseed=True,
        clip_distributions=False,
    ):
        # clip_distributions=True asserts that state and action space are not modified at reset()

        # store the hyper-parameters for passing back into __init__() during resets so
        # the same hyper-parameters govern the next game's parameters, as the user
        # expects:
        # TODO: this is boilerplate, is there any more elegant way to do this?
        self.initial_wealth = float(initial_wealth)
        self.edge_prior_alpha = edge_prior_alpha
        self.edge_prior_beta = edge_prior_beta
        self.max_wealth_alpha = max_wealth_alpha
        self.max_wealth_m = max_wealth_m
        self.max_rounds_mean = max_rounds_mean
        self.max_rounds_sd = max_rounds_sd
        self.clip_distributions = clip_distributions

        if reseed or not hasattr(self, "np_random"):
            self.seed()

        # draw this game's set of parameters:
        edge = self.np_random.beta(edge_prior_alpha, edge_prior_beta)
        if self.clip_distributions:
            # (clip/resample some parameters to be able to fix obs/action space sizes/bounds)
            max_wealth_bound = round(
                genpareto.ppf(0.85, max_wealth_alpha, max_wealth_m)
            )
            max_wealth = max_wealth_bound + 1.0
            while max_wealth > max_wealth_bound:
                max_wealth = round(
                    genpareto.rvs(
                        max_wealth_alpha, max_wealth_m, random_state=self.np_random
                    )
                )
            max_rounds_bound = int(
                round(norm.ppf(0.99, max_rounds_mean, max_rounds_sd))
            )
            max_rounds = max_rounds_bound + 1
            while max_rounds > max_rounds_bound:
                max_rounds = int(
                    round(self.np_random.normal(max_rounds_mean, max_rounds_sd))
                )

        else:
            max_wealth = round(
                genpareto.rvs(
                    max_wealth_alpha, max_wealth_m, random_state=self.np_random
                )
            )
            max_wealth_bound = max_wealth
            max_rounds = int(
                round(self.np_random.normal(max_rounds_mean, max_rounds_sd))
            )
            max_rounds_bound = max_rounds

        # add an additional global variable which is the sufficient statistic for the
        # Pareto distribution on wealth cap; alpha doesn't update, but x_m does, and
        # simply is the highest wealth count we've seen to date:
        self.max_ever_wealth = float(self.initial_wealth)
        # for the coinflip edge, it is total wins/losses:
        self.wins = 0
        self.losses = 0
        # for the number of rounds, we need to remember how many rounds we've played:
        self.rounds_elapsed = 0

        # the rest proceeds as before:
        self.action_space = spaces.Discrete(int(max_wealth_bound * 100))
        self.observation_space = spaces.Tuple(
            (
                spaces.Box(
                    0, max_wealth_bound, shape=[1], dtype=np.float32
                ),  # current wealth
                spaces.Discrete(max_rounds_bound + 1),  # rounds elapsed
                spaces.Discrete(max_rounds_bound + 1),  # wins
                spaces.Discrete(max_rounds_bound + 1),  # losses
                spaces.Box(0, max_wealth_bound, [1], dtype=np.float32),
            )
        )  # maximum observed wealth
        self.reward_range = (0, max_wealth)
        self.edge = edge
        self.wealth = self.initial_wealth
        self.max_rounds = max_rounds
        self.rounds = self.max_rounds
        self.max_wealth = max_wealth
Beispiel #2
0
    def __init__(
        self,
        unity_env: BaseEnv,
        uint8_visual: bool = False,
        flatten_branched: bool = False,
        allow_multiple_obs: bool = False,
    ):
        """
        Environment initialization
        :param unity_env: The Unity BaseEnv to be wrapped in the gym. Will be closed when the UnityToGymWrapper closes.
        :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
        :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
            MultiDiscrete.
        :param allow_multiple_obs: If True, return a list of np.ndarrays as observations with the first elements
            containing the visual observations and the last element containing the array of vector observations.
            If False, returns a single np.ndarray containing either only a single visual observation or the array of
            vector observations.
        """
        self._env = unity_env

        # Take a single step so that the brain information will be sent over
        if not self._env.behavior_specs:
            self._env.step()

        self.visual_obs = None

        # Save the step result from the last time all Agents requested decisions.
        self._previous_decision_step: DecisionSteps = None
        self._flattener = None
        # Hidden flag used by Atari environments to determine if the game is over
        self.game_over = False
        self._allow_multiple_obs = allow_multiple_obs

        # Check brain configuration
        if len(self._env.behavior_specs) != 1:
            raise UnityGymException(
                "There can only be one behavior in a UnityEnvironment "
                "if it is wrapped in a gym."
            )

        self.name = list(self._env.behavior_specs.keys())[0]
        self.group_spec = self._env.behavior_specs[self.name]

        if self._get_n_vis_obs() == 0 and self._get_vec_obs_size() == 0:
            raise UnityGymException(
                "There are no observations provided by the environment."
            )

        if not self._get_n_vis_obs() >= 1 and uint8_visual:
            logger.warning(
                "uint8_visual was set to true, but visual observations are not in use. "
                "This setting will not have any effect."
            )
        else:
            self.uint8_visual = uint8_visual
        if (
            self._get_n_vis_obs() + self._get_vec_obs_size() >= 2
            and not self._allow_multiple_obs
        ):
            logger.warning(
                "The environment contains multiple observations. "
                "You must define allow_multiple_obs=True to receive them all. "
                "Otherwise, only the first visual observation (or vector observation if"
                "there are no visual observations) will be provided in the observation."
            )

        # Check for number of agents in scene.
        self._env.reset()
        decision_steps, _ = self._env.get_steps(self.name)
        self._check_agents(len(decision_steps))
        self._previous_decision_step = decision_steps

        # Set action spaces
        if self.group_spec.is_action_discrete():
            branches = self.group_spec.discrete_action_branches
            if self.group_spec.action_shape == 1:
                self._action_space = spaces.Discrete(branches[0])
            else:
                if flatten_branched:
                    self._flattener = ActionFlattener(branches)
                    self._action_space = self._flattener.action_space
                else:
                    self._action_space = spaces.MultiDiscrete(branches)

        else:
            if flatten_branched:
                logger.warning(
                    "The environment has a non-discrete action space. It will "
                    "not be flattened."
                )
            high = np.array([1] * self.group_spec.action_shape)
            self._action_space = spaces.Box(-high, high, dtype=np.float32)

        # Set observations space
        list_spaces: List[gym.Space] = []
        shapes = self._get_vis_obs_shape()
        for shape in shapes:
            if uint8_visual:
                list_spaces.append(spaces.Box(0, 255, dtype=np.uint8, shape=shape))
            else:
                list_spaces.append(spaces.Box(0, 1, dtype=np.float32, shape=shape))
        if self._get_vec_obs_size() > 0:
            # vector observation is last
            high = np.array([np.inf] * self._get_vec_obs_size())
            list_spaces.append(spaces.Box(-high, high, dtype=np.float32))
        if self._allow_multiple_obs:
            self._observation_space = spaces.Tuple(list_spaces)
        else:
            self._observation_space = list_spaces[0]  # only return the first one
Beispiel #3
0
    def __init__(self,
                 block_random=0.3,
                 camera_random=0,
                 simple_observations=False,
                 continuous=False,
                 remove_height_hack=False,
                 urdf_list=None,
                 render_mode='GUI',
                 num_objects=5,
                 dv=0.06,
                 target=False,
                 target_filenames=None,
                 non_target_filenames=None,
                 num_resets_per_setup=1,
                 render_width=128,
                 render_height=128,
                 downsample_width=64,
                 downsample_height=64,
                 test=False,
                 allow_duplicate_objects=True,
                 max_num_training_models=900,
                 max_num_test_models=100):
        """Creates a KukaGraspingEnv.

    Args:
      block_random: How much randomness to use in positioning blocks.
      camera_random: How much randomness to use in positioning camera.
      simple_observations: If True, observations are the position and
        orientation of end-effector and closest block, rather than images.
      continuous: If True, actions are continuous, else discrete.
      remove_height_hack: If True and continuous is True, add a dz
                          component to action space.
      urdf_list: List of objects to populate the bin with.
      render_mode: GUI, DIRECT, or TCP.
      num_objects: The number of random objects to load.
      dv: Velocity magnitude of cartesian dx, dy, dz actions per time step.
      target: If True, then we receive reward only for grasping one "target"
        object.
      target_filenames: Objects that we want to grasp.
      non_target_filenames: Objects that we dont want to grasp.
      num_resets_per_setup: How many env resets before calling setup again.
      render_width: Width of camera image to render with.
      render_height: Height of camera image to render with.
      downsample_width: Width of image observation.
      downsample_height: Height of image observation.
      test: If True, uses test split of objects.
      allow_duplicate_objects: If True, samples URDFs with replacement.
      max_num_training_models: The number of distinct models to choose from when
        selecting the num_objects placed in the tray for training.
      max_num_test_models: The number of distinct models to choose from when
        selecting the num_objects placed in the tray for testing.
    """
        self._time_step = 1. / 200.
        self._max_steps = 15

        # Open-source search paths.
        self._urdf_root = OSS_DATA_ROOT
        self._models_dir = os.path.join(self._urdf_root, 'random_urdfs')

        self._action_repeat = 200
        self._env_step = 0
        self._renders = render_mode in ['GUI', 'TCP']
        # Size we render at.
        self._width = render_width
        self._height = render_height
        # Size we downsample to.
        self._downsample_width = downsample_width
        self._downsample_height = downsample_height
        self._target = target
        self._num_objects = num_objects
        self._dv = dv
        self._urdf_list = urdf_list
        if target_filenames:
            target_filenames = [
                self._get_urdf_path(f) for f in target_filenames
            ]
        if non_target_filenames:
            non_target_filenames = [
                self._get_urdf_path(f) for f in non_target_filenames
            ]
        self._object_filenames = (target_filenames
                                  or []) + (non_target_filenames or [])
        self._target_filenames = target_filenames or []
        self._block_random = block_random
        self._cam_random = camera_random
        self._simple_obs = simple_observations
        self._continuous = continuous
        self._remove_height_hack = remove_height_hack
        self._resets = 0
        self._num_resets_per_setup = num_resets_per_setup
        self._test = test
        self._allow_duplicate_objects = allow_duplicate_objects
        self._max_num_training_models = max_num_training_models
        self._max_num_test_models = max_num_test_models

        if render_mode == 'GUI':
            self.cid = pybullet.connect(pybullet.GUI)
            pybullet.resetDebugVisualizerCamera(1.3, 180, -41,
                                                [0.52, -0.2, -0.33])
        elif render_mode == 'DIRECT':
            self.cid = pybullet.connect(pybullet.DIRECT)
        elif render_mode == 'TCP':
            self.cid = pybullet.connect(pybullet.TCP, 'localhost', 6667)

        self.setup()
        if self._continuous:
            self.action_space = spaces.Box(low=-1, high=1, shape=(4, ))
            if self._remove_height_hack:
                self.action_space = spaces.Box(
                    low=-1, high=1, shape=(5, ))  # dx, dy, dz, da, close
        else:
            self.action_space = spaces.Discrete(8)
            if self._remove_height_hack:
                self.action_space = spaces.Discrete(10)

        if self._simple_obs:
            # (3 pos + 4 quat) x 2
            self.observation_space = spaces.Box(low=-100,
                                                high=100,
                                                shape=(14, ))
        else:
            # image (self._height, self._width, 3) x position of the gripper (3,)
            img_space = spaces.Box(low=0,
                                   high=255,
                                   shape=(self._downsample_height,
                                          self._downsample_width, 3))
            pos_space = spaces.Box(low=-5, high=5, shape=(3, ))
            self.observation_space = spaces.Tuple((img_space, pos_space))
        self.viewer = None
Beispiel #4
0
  def __init__(self, spec):
    self.spec = spec

    self.space = spaces.Tuple([conv.space for _, conv in spec])
Beispiel #5
0
    def __init__(self):
        #
        # environment definition
        #
        self.descriptions = {
            "living": [
                "This room has a couch, chairs and TV.",
                "You have entered the living room. You can watch TV here.",
                "This room has two sofas, chairs and a chandelier."
            ],
            "garden": [
                "This space has a swing, flowers and trees.",
                "You have arrived at the garden. You can exercise here.",
                "This area has plants, grass and rabbits."
            ],
            "kitchen": [
                "This room has a fridge, oven, and a sink.",
                "You have arrived in the kitchen. You can find food and drinks here.",
                "This living area has pizza, coke, and icecream."
            ],
            "bedroom": [
                "This area has a bed, desk and a dresser.",
                "You have arrived in the bedroom. You can rest here.",
                "You see a wooden cot and a mattress on top of it."
            ],
            "pantry": [
                "A small room for storing food and other kinds of goods.",
                "This area is usually used for preparing cold foods.",
            ],
            "hall": [
                "This seems to be the entrance room of the house.",
            ],
        }

        self.rooms = self.descriptions.keys()

        self.env_objects = {
            "tv": "A huge television that is great for watching games.",
            "apple": "A red juicy fruit.",
            "cheese": "A good old emmentaler.",
            "pizza": "A delicious pizza margherita.",
            "rbutton": "A red button.",
            "gbutton": "A green button.",
            "bbutton": "A blue button.",
            "red": "A red fluid",
            "green": "A green fluid",
            "blue": "A blue fluid",
            "recipe_book": "A book full of recipes.",
        }

        self.definitions = {
            ("eat apple"): [{
                "conds": {
                    "room": "kitchen",
                    "quest": "hungry",
                    "poisoned": "apple"
                },
                "effs": {
                    "dead": True
                }
            }, {
                "conds": {
                    "room": "kitchen",
                    "quest": "hungry",
                    "old": "apple"
                },
                "effs": {
                    "info": "old_food"
                }
            }, {
                "conds": {
                    "room": "kitchen",
                    "quest": "hungry"
                },
                "effs": {
                    "quest": ""
                }
            }],
            ("eat cheese"): [{
                "conds": {
                    "room": "kitchen",
                    "quest": "hungry",
                    "poisoned": "cheese"
                },
                "effs": {
                    "dead": True
                }
            }, {
                "conds": {
                    "room": "kitchen",
                    "quest": "hungry",
                    "old": "cheese"
                },
                "effs": {
                    "info": "old_food"
                }
            }, {
                "conds": {
                    "room": "kitchen",
                    "quest": "hungry"
                },
                "effs": {
                    "quest": ""
                }
            }],
            ("eat pizza"): [{
                "conds": {
                    "room": "kitchen",
                    "quest": "hungry",
                    "poisoned": "pizza"
                },
                "effs": {
                    "dead": True
                }
            }, {
                "conds": {
                    "room": "kitchen",
                    "quest": "hungry",
                    "old": "pizza"
                },
                "effs": {
                    "info": "old_food"
                }
            }, {
                "conds": {
                    "room": "kitchen",
                    "quest": "hungry"
                },
                "effs": {
                    "quest": ""
                }
            }],
            ("watch tv"): [{
                "conds": {
                    "room": "living",
                    "quest": "bored",
                    "energy": True
                },
                "effs": {
                    "quest": ""
                }
            }, {
                "conds": {
                    "room": "living",
                    "quest": "bored",
                    "energy": False
                },
                "effs": {
                    "info": "energy_error"
                }
            }],
            ("press rbutton"): [{
                "conds": {
                    "room": "pantry",
                    "energy_btn": "rbutton"
                },
                "effs": {
                    "energy": True
                }
            }, {
                "conds": {
                    "room": "pantry",
                    "shock_btn": "rbutton"
                },
                "effs": {
                    "dead": True
                }
            }, {
                "conds": {
                    "room": "pantry"
                },
                "effs": {}
            }],
            ("press gbutton"): [{
                "conds": {
                    "room": "pantry",
                    "energy_btn": "gbutton"
                },
                "effs": {
                    "energy": True
                }
            }, {
                "conds": {
                    "room": "pantry",
                    "shock_btn": "gbutton"
                },
                "effs": {
                    "dead": True
                }
            }, {
                "conds": {
                    "room": "pantry"
                },
                "effs": {}
            }],
            ("press bbutton"): [{
                "conds": {
                    "room": "pantry",
                    "energy_btn": "bbutton"
                },
                "effs": {
                    "energy": True
                }
            }, {
                "conds": {
                    "room": "pantry",
                    "shock_btn": "bbutton"
                },
                "effs": {
                    "dead": True
                }
            }, {
                "conds": {
                    "room": "pantry"
                },
                "effs": {}
            }],
            ("read recipe_book"): [{
                "conds": {
                    "room": "garden",
                },
                "effs": {
                    "info": "recipe_info"
                }
            }],
            #
            # Ingredients
            #
            ("read red"): [{
                "conds": {
                    "room": "bedroom"
                },
                "effs": {}
            }],
            ("read green"): [{
                "conds": {
                    "room": "bedroom"
                },
                "effs": {}
            }],
            ("read blue"): [{
                "conds": {
                    "room": "bedroom"
                },
                "effs": {}
            }],
            ####################################################################
            ("drink red"): [{
                "conds": {
                    "room": "bedroom",
                    "quest": "sleepy",
                    "recipe_good": "red"
                },
                "effs": {
                    "quest": ""
                }
            }, {
                "conds": {
                    "room": "bedroom",
                    "recipe_bad": "red"
                },
                "effs": {
                    "quest": "",
                    "dead": True
                }
            }],
            ("drink green"): [{
                "conds": {
                    "room": "bedroom",
                    "quest": "sleepy",
                    "recipe_good": "green"
                },
                "effs": {
                    "quest": ""
                }
            }, {
                "conds": {
                    "room": "bedroom",
                    "recipe_bad": "green"
                },
                "effs": {
                    "quest": "",
                    "dead": True
                }
            }],
            ("drink blue"): [{
                "conds": {
                    "room": "bedroom",
                    "quest": "sleepy",
                    "recipe_good": "blue"
                },
                "effs": {
                    "quest": ""
                }
            }, {
                "conds": {
                    "room": "bedroom",
                    "recipe_bad": "blue"
                },
                "effs": {
                    "quest": "",
                    "dead": True
                }
            }],
            #
            # Move in direction
            #
            ("go north"): [
                {
                    "conds": {
                        "room": "bedroom"
                    },
                    "effs": {
                        "room": "living"
                    }
                },
                {
                    "conds": {
                        "room": "kitchen"
                    },
                    "effs": {
                        "room": "garden"
                    }
                },
                {
                    "conds": {
                        "room": "pantry"
                    },
                    "effs": {
                        "room": "kitchen"
                    }
                },
            ],
            ("go south"): [
                {
                    "conds": {
                        "room": "living"
                    },
                    "effs": {
                        "room": "bedroom"
                    }
                },
                {
                    "conds": {
                        "room": "garden"
                    },
                    "effs": {
                        "room": "kitchen"
                    }
                },
                {
                    "conds": {
                        "room": "kitchen"
                    },
                    "effs": {
                        "room": "pantry"
                    }
                },
            ],
            ("go east"): [
                {
                    "conds": {
                        "room": "living"
                    },
                    "effs": {
                        "room": "garden"
                    }
                },
                {
                    "conds": {
                        "room": "bedroom"
                    },
                    "effs": {
                        "room": "kitchen"
                    }
                },
                {
                    "conds": {
                        "room": "hall"
                    },
                    "effs": {
                        "room": "living"
                    }
                },
            ],
            ("go west"): [
                {
                    "conds": {
                        "room": "garden"
                    },
                    "effs": {
                        "room": "living"
                    }
                },
                {
                    "conds": {
                        "room": "kitchen"
                    },
                    "effs": {
                        "room": "bedroom"
                    }
                },
                {
                    "conds": {
                        "room": "living"
                    },
                    "effs": {
                        "room": "hall"
                    }
                },
            ],
        }

        self.text = {
            "quest": {
                "hungry": "You are hungry",
                "sleepy": "You are sleepy",
                "bored": "You are bored",
                "fat": "You are getting fat",
            },
            "mislead": {
                "hungry": "You are not hungry",
                "sleepy": "You are not sleepy",
                "bored": "You are not bored",
                "fat": "You are not getting fat",
            },
            "info": {
                "energy_error":
                "Seems the tv does not work because of missing energy. Press the {} in the pantry.",
                "old_food": "The food does not seem good anymore.",
                "food_warning":
                "You cannot enjoy the {} anymore, it is old! Attention: do not eat the poisoned {}.",
                "recipe_wrong": "The recipe seems to have the wrong effect."
            },
            "recipies": {
                0: "To get {0} you should take the {1} drink.",
                1: "Effect {0}: One needs to use a {1} sweet drink.",
                2: "Take a drink which is {1} to get {0}.",
            }
        }
        HomeWorld.__init__(self)

        self.actions = list({a.split(" ")[0] for a in self.definitions})
        self.objects = list({a.split(" ")[1] for a in self.definitions})

        self.num_actions = len(self.actions)
        self.num_objects = len(self.objects)

        self.quests = ['hungry', 'sleepy', 'bored']
        self.quest_actions = ['eat', 'sleep', 'watch']
        self.extra_vocab = ['nothing', 'happend', 'not', 'but', 'now']

        self.state = {
            "room": "",
            "description": "",
            "info": "",
            "quest": "",
            "mislead": "",
            "old": "",
            "poisoned": "",
            "energy": "",
            "shock_btn": "",
            "energy_btn": "",
            "recipe_good": "",
            "recipe_bad": "",
            "dead": False
        }

        self.init_vocab()

        self.vocab_space = self.get_vocab_size()
        self.action_space = spaces.Tuple((spaces.Discrete(self.num_actions),
                                          spaces.Discrete(self.num_objects)))
        self.observation_space = None
        self.seq_length = 50
Beispiel #6
0
    def __init__(self, env_config):
        self.multi_goal = env_config.get("multi_goal", False)
        self.generalize = env_config.get("generalize", False)
        num_valid = env_config.get("num_valid", 50)
        self.specs_save = env_config.get("save_specs", False)
        self.valid = env_config.get("run_valid", False)

        self.env_steps = 0
        with open(TwoStageAmp.CIR_YAML, 'r') as f:
            yaml_data = yaml.load(f, OrderedDictYAMLLoader)

        # design specs
        if self.generalize == False:
            specs = yaml_data['target_specs']
        else:
            load_specs_path = TwoStageAmp.path + "/autockt/gen_specs/ngspice_specs_gen_two_stage_opamp"
            with open(load_specs_path, 'rb') as f:
                specs = pickle.load(f)

        self.specs = OrderedDict(sorted(specs.items(), key=lambda k: k[0]))
        if self.specs_save:
            with open(
                    "specs_" + str(num_valid) + str(random.randint(1, 100000)),
                    'wb') as f:
                pickle.dump(self.specs, f)

        self.specs_ideal = []
        self.specs_id = list(self.specs.keys())
        self.fixed_goal_idx = -1
        self.num_os = len(list(self.specs.values())[0])

        # param array
        params = yaml_data['params']
        self.params = []
        self.params_id = list(params.keys())

        for value in params.values():
            param_vec = np.arange(value[0], value[1], value[2])
            self.params.append(param_vec)

        #initialize sim environment
        self.sim_env = TwoStageClass(yaml_path=TwoStageAmp.CIR_YAML,
                                     num_process=1,
                                     path=TwoStageAmp.path)
        self.action_meaning = [-1, 0, 2]
        self.action_space = spaces.Tuple(
            [spaces.Discrete(len(self.action_meaning))] * len(self.params_id))
        #self.action_space = spaces.Discrete(len(self.action_meaning)**len(self.params_id))
        self.observation_space = spaces.Box(
            low=np.array([TwoStageAmp.PERF_LOW] * 2 * len(self.specs_id) +
                         len(self.params_id) * [1]),
            high=np.array([TwoStageAmp.PERF_HIGH] * 2 * len(self.specs_id) +
                          len(self.params_id) * [1]))

        #initialize current param/spec observations
        self.cur_specs = np.zeros(len(self.specs_id), dtype=np.float32)
        self.cur_params_idx = np.zeros(len(self.params_id), dtype=np.int32)

        #Get the g* (overall design spec) you want to reach
        self.global_g = []
        for spec in list(self.specs.values()):
            self.global_g.append(float(spec[self.fixed_goal_idx]))
        self.g_star = np.array(self.global_g)
        self.global_g = np.array(yaml_data['normalize'])

        #objective number (used for validation)
        self.obj_idx = 0
Beispiel #7
0
 def _set_action_space(self):
     self.action_space = spaces.Tuple(
         tuple([spaces.Discrete(6)] +
               [spaces.Discrete(self._radio_vocab_size)] *
               self._radio_num_words))
Beispiel #8
0
from ray.rllib.env.async_vector_env import AsyncVectorEnv
from ray.rllib.env.vector_env import VectorEnv
from ray.rllib.models import ModelCatalog
from ray.rllib.models.model import Model
from ray.rllib.test.test_external_env import SimpleServing
from ray.tune.registry import register_env

DICT_SPACE = spaces.Dict({
    "sensors":
    spaces.Dict({
        "position":
        spaces.Box(low=-100, high=100, shape=(3, )),
        "velocity":
        spaces.Box(low=-1, high=1, shape=(3, )),
        "front_cam":
        spaces.Tuple((spaces.Box(low=0, high=1, shape=(10, 10, 3)),
                      spaces.Box(low=0, high=1, shape=(10, 10, 3)))),
        "rear_cam":
        spaces.Box(low=0, high=1, shape=(10, 10, 3)),
    }),
    "inner_state":
    spaces.Dict({
        "charge":
        spaces.Discrete(100),
        "job_status":
        spaces.Dict({
            "task": spaces.Discrete(5),
            "progress": spaces.Box(low=0, high=100, shape=()),
        })
    })
})
Beispiel #9
0
    def __init__(self,
                 image_paths,
                 true_bboxes,
                 playout_episode=False,
                 premasking=True,
                 mode='train',
                 max_steps_per_image=200,
                 seed=None,
                 bbox_scaling_w=0.05,
                 bbox_scaling_h=0.1,
                 bbox_transformer='base',
                 has_termination_action=True,
                 has_intermediate_reward=False,
                 ior_marker_type='cross',
                 history_length=10,
                 assessor_model=None,
                 train_assessor=False,
                 grayscale=False,
                 use_cut_area=False):
        """
        :param image_paths: The paths to the individual images
        :param true_bboxes: The true bounding boxes for each image
        :type image_paths: String or list
        :type true_bboxes: numpy.ndarray
        """
        # Determines whether the agent is training or testing
        # Optimizations can be applied during training that are not allowed for testing
        self.mode = mode
        # Factor for scaling all bounding boxes relative to their size
        self.bbox_scaling_w = bbox_scaling_w
        self.bbox_scaling_h = bbox_scaling_h
        # Whether IoR markers will be placed upfront after loading the image
        self.premasking = premasking
        # Whether an episode terminates after a single trigger or is played out until the end
        self.playout_episode = playout_episode
        # Episodes will be terminated automatically after reaching max steps
        self.max_steps_per_image = max_steps_per_image
        # Whether a termination action should be provided in the action set
        self.has_termination_action = has_termination_action
        # Whether a reward will be given for each non-trigger action based on the best gt iou
        self.has_intermediate_reward = has_intermediate_reward
        # The type of IoR marker to be used when masking trigger regions
        self.ior_marker_type = ior_marker_type
        # Length of history in state & agent model
        self.history_length = history_length
        # Whether to return grayscale, 1-channel environment images
        self.grayscale = grayscale
        # Use tightness-aware IoU for reward (incorporating cut gt)
        self.use_cut_area = use_cut_area

        # Initialize action space
        self.bbox_transformer = create_bbox_transformer(bbox_transformer)
        self.action_space = spaces.Discrete(len(self.action_set))
        if self.grayscale:
            # 224*224*1 (RGB image) + 9 * 10 (on-hot-enconded history)
            self.observation_space = spaces.Tuple([
                spaces.Box(low=0, high=256, shape=(450, 450, 1)),
                spaces.Box(low=0,
                           high=1,
                           shape=(self.history_length, len(self.action_set)))
            ])
        else:
            # 224*224*3 (RGB image) + 9 * 10 (on-hot-enconded history) = 150618
            self.observation_space = spaces.Tuple([
                spaces.Box(low=0, high=256, shape=(450, 450, 3)),
                spaces.Box(low=0,
                           high=1,
                           shape=(self.history_length, len(self.action_set)))
            ])

        # Initialize dataset
        if type(image_paths) is not list:
            image_paths = [image_paths]
        self.image_paths = image_paths
        self.true_bboxes = [[TextLocEnv.to_standard_box(b) for b in bboxes]
                            for bboxes in true_bboxes]

        # For registering a handler that will be executed once after a step
        self.post_step_handler = None

        # Episode-specific

        # Image for the current episode
        self.episode_image = None
        self.current_image_index = 0
        # Ground truth bounding boxes for the current episode image
        self.episode_true_bboxes = None
        # Predicted bounding boxes for the current episode image
        self.episode_pred_bboxes = None
        # IoU values for each trigger in the current episode
        self.episode_trigger_ious = None
        # List of indices of masked bounding boxes for the current episode image
        self.episode_masked_indices = []
        # Number of trigger actions used so far
        self.num_triggers_used = 0
        # Number of episodes rolled out so far
        self.episode_count = 0
        # ID of last action taken
        self.last_action_taken = -1

        # For rendering
        self.viewer = None

        # Assessor (weak-supervision)
        self.assessor = assessor_model
        self.train_assessor = train_assessor

        self.resize = Resize((450, 450),
                             interpolation=InterpolationMode.NEAREST)

        self.seed(seed=seed)
        self.reset()
Beispiel #10
0
    def __init__(self, env=None):
        gym.ObservationWrapper.__init__(self, env)

        self.observation_space = spaces.Tuple([self.observation_space])
    def __init__(self,
                 bodies: SystemScope = SystemScope.ALL,
                 start_body: SolarSystemPlanet = None,
                 target_bodies: List[SolarSystemPlanet] = None,
                 start_time: Time = None,
                 action_step: TimeDelta = TimeDelta(1 * u.minute),
                 simulation_step: TimeDelta = TimeDelta(1 * u.second),
                 spaceship_name: SpaceShipName = SpaceShipName.DEFAULT,
                 spaceship_initial_altitude: u.km = 400 * u.km,
                 spaceship_mass: u.kg = None,
                 spaceship_propellant_mass: u.kg = None,
                 spaceship_isp: u.s = None,
                 spaceship_engine_thrust: u.N = None):
        super(SolarSystemGrav, self).__init__()

        if start_body is None:
            start_body = Earth
        if target_bodies is None:
            target_bodies = [Mars]
        if start_time is None:
            start_time = Time(datetime.now()).tdb

        # todo: enforce action_step/simulation_step is an integer?

        self.start_body = start_body
        self.target_bodies = target_bodies
        self.spaceship_initial_altitude = spaceship_initial_altitude
        self.start_time = start_time
        self.current_time = None
        self.time_step = action_step
        self.simulation_step = simulation_step
        self.done = False
        self.reward = 0
        self.done = False

        self.spaceship_name = spaceship_name
        self.spaceship_mass = spaceship_mass
        self.spaceship_propellant_mass = spaceship_propellant_mass
        self.spaceship_isp = spaceship_isp
        self.spaceship_engine_thrust = spaceship_engine_thrust

        # set up solar system
        solar_system_ephemeris.set("jpl")
        # Download & use JPL Ephem

        body_dict = {
            SystemScope.EARTH: [Earth, Moon],
            SystemScope.ALL: [
                Sun, Earth, Moon, Mercury, Venus, Mars, Jupiter, Saturn,
                Uranus, Neptune, Pluto
            ]
        }
        # define bodies to model
        # poliastro.bodies.SolarSystemPlanet =
        #   Sun, Earth, Moon, Mercury, Venus, Mars, Jupiter, Saturn, Uranus, Neptune, Pluto
        # could also add versions for: only inner solar system, only 'major' bodies jovan moons, saturn's moons?

        try:
            self.body_list = body_dict[bodies]
        except KeyError:
            raise KeyError(f"bodies must be one of {body_dict.keys()}")

        # set up spacecraft

        self.spaceship = self._init_spaceship()

        self.current_ephem = None

        # init:
        # * which bodies are modelled
        # * what time it is
        # * what time_step to use
        # * target body
        # * spaceship pos/vel (orbit?) /fuel/thrust
        # *

        # init must define action & observation space
        # initialize model solar system
        #
        # Define action and observation space
        # They must be gym.spaces objects

        # observation ~~time~~, time_step, craft position, craft velocity, craft fuel, craft engine power,
        # bodies: position, velocity, mass

        # [time_step, [craft position, velocity, fuel, engine power],
        # [body_1_is_target, body_1_position, body_1_velocity, body_1_mass],
        # ...
        # [body_n_is_target, body_n_position, body_n_velocity, body_n_mass]]
        self.observation_space = spaces.Space()

        # action:
        # tuple [[x,y,z], burn duration]
        self.action_space = spaces.Tuple((
            spaces.Box(low=-1.0, high=1.0,
                       shape=(3, )),  # x,y,z direction vector
            spaces.Box(low=0.0, high=1.0,
                       shape=(1, ))  # burn duration as percent of time_step
        ))
    def __init__(self,
                 urdfRoot=pybullet_data.getDataPath(),
                 actionRepeat=1,
                 isEnableSelfCollision=True,
                 renders=False,
                 isDiscrete=False):
        self._timeStep = 1. / 240.
        self._urdfRoot = urdfRoot
        self._actionRepeat = actionRepeat
        self._isEnableSelfCollision = isEnableSelfCollision
        self._observation = []
        self._envStepCounter = 0
        self._renders = renders
        #self._cam_dist = 1.3
        #self._cam_yaw = 180
        #self._cam_pitch = -40
        self._cam_dist = 0.3
        #self._cam_yaw = 45
        self._cam_roll = 0
        self._cam_yaw = 90
        self._cam_pitch = -40
        #self._width = 341
        #self._height = 256
        self._kinect_rgb_width = 1920
        self._kinect_rgb_height = 1080
        self._kinect_d_width = 512
        self._kinect_d_height = 424

        self._handcamera_width = 640
        self._handcamera_height = 480

        self._isDiscrete = isDiscrete
        #self._isBox = isBox
        self.terminated = 0
        self._p = p
        if self._renders:
            cid = p.connect(p.SHARED_MEMORY)
            if (cid < 0):
                p.connect(p.GUI)
            p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])
        else:
            p.connect(p.DIRECT)
        #timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json")
        self.seed()
        self.reset()
        observationDim = len(self.getExtendedObservation())
        #print("observationDim")
        #print(observationDim)

        observation_high = np.array([np.finfo(np.float32).max] *
                                    observationDim)
        if (self._isDiscrete):
            self.action_space = spaces.Discrete(7)
        else:
            action_dim = 12
            self._action_bound = 1
            action_high = np.array([self._action_bound] * action_dim)
            self.action_space = spaces.Box(-action_high,
                                           action_high,
                                           dtype=np.float32)
            self._proximity_low = np.array([0] * 3)
            self._proximity_high = np.array([1] * 3)
            self._force_low = np.array([0] * 3)
            self._force_high = np.array([10] * 3)

        self.observation_space = spaces.Tuple(
            (spaces.Box(low=0,
                        high=255,
                        shape=(self._kinect_rgb_height, self._kinect_rgb_width,
                               4),
                        dtype=np.uint8),
             spaces.Box(self._proximity_low,
                        self._proximity_high,
                        dtype=np.float32),
             spaces.Box(self._force_low, self._force_high, dtype=np.float32)))

        self.viewer = None
Beispiel #13
0
 def response_space(self):
     res_space = self._response_model_ctor.response_space()
     return spaces.Tuple(tuple([
         res_space,
     ] * self._slate_size))
Beispiel #14
0
 def __init__(self, spaces):
     super().__init__(gym_space=gym_spaces.Tuple(spaces))
    def __init__(self, environment_filename=None, docker_training=False, worker_id=0, retro=True,
                 timeout_wait=30, realtime_mode=False, config=None, greyscale=False):
        """
        Arguments:
          environment_filename: The file path to the Unity executable.  Does not require the extension.
          docker_training: Whether this is running within a docker environment and should use a virtual 
            frame buffer (xvfb).
          worker_id: The index of the worker in the case where multiple environments are running.  Each 
            environment reserves port (5005 + worker_id) for communication with the Unity executable.
          retro: Resize visual observation to 84x84 (int8) and flattens action space.
          timeout_wait: Time for python interface to wait for environment to connect.
          realtime_mode: Whether to render the environment window image and run environment at realtime.
        """
        self._env = UnityEnvironment(environment_filename,
                                     worker_id,
                                     docker_training=docker_training,
                                     timeout_wait=timeout_wait)

        split_name = self._env.academy_name.split('-v')
        if len(split_name) == 2 and split_name[0] == "ObstacleTower":
            self.name, self.version = split_name
        else:
            raise UnityGymException(
                "Attempting to launch non-Obstacle Tower environment"
            )

        if self.version not in self.ALLOWED_VERSIONS:
            raise UnityGymException(
                "Invalid Obstacle Tower version.  Your build is v" + self.version +
                " but only the following versions are compatible with this gym: " +
                str(self.ALLOWED_VERSIONS)
            )

        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._flattener = None
        self._greyscale = greyscale

        # Environment reset parameters
        self._seed = None
        self._floor = None

        self.realtime_mode = realtime_mode
        self.game_over = False  # Hidden flag used by Atari environments to determine if the game is over
        self.retro = retro
        if config != None:
            self.config = config
        else:
            self.config = None

        flatten_branched = self.retro
        uint8_visual = self.retro

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if brain.number_visual_observations == 0:
            raise UnityGymException("Environment provides no visual observations.")

        self.uint8_visual = uint8_visual

        if brain.number_visual_observations > 1:
            logger.warning("The environment contains more than one visual observation. "
                           "Please note that only the first will be provided in the observation.")

        # Check for number of agents in scene.
        initial_info = self._env.reset(train_mode=not self.realtime_mode)[self.brain_name]
        self._check_agents(len(initial_info.agents))

        # Set observation and action spaces
        if len(brain.vector_action_space_size) == 1:
            self._action_space = spaces.Discrete(brain.vector_action_space_size[0])
        else:
            if flatten_branched:
                self._flattener = ActionFlattener(brain.vector_action_space_size)
                self._action_space = self._flattener.action_space
            else:
                self._action_space = spaces.MultiDiscrete(brain.vector_action_space_size)

        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions

        if self._greyscale:
            depth = 1
        else:
            depth = 3
        image_space_max = 1.0
        image_space_dtype = np.float32
        camera_height = brain.camera_resolutions[0]["height"]
        camera_width = brain.camera_resolutions[0]["width"]
        if self.retro:
            image_space_max = 255
            image_space_dtype = np.uint8
            camera_height = 84
            camera_width = 84

        image_space = spaces.Box(
            0, image_space_max,
            dtype=image_space_dtype,
            shape=(camera_height, camera_width, depth)
        )
        if self.retro:
            self._observation_space = image_space
        else:
            max_float = np.finfo(np.float32).max
            keys_space = spaces.Discrete(5)
            time_remaining_space = spaces.Box(low=0.0, high=max_float, shape=(1,), dtype=np.float32)
            floor_space = spaces.Discrete(9999)
            self._observation_space = spaces.Tuple(
                (image_space, keys_space, time_remaining_space, floor_space)
            )
Beispiel #16
0
def test_split_batch_fn():
    # from continuum.datasets import MNIST
    batch_size = 5
    max_batches = 10

    def split_batch_fn(
        batch: Tuple[Tensor, Tensor, Tensor]
    ) -> Tuple[Tuple[Tensor, Tensor], Tensor]:
        x, y, t = batch
        return (x, t), y

    # dataset = MNIST("data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]))
    from continuum import ClassIncremental
    from continuum.datasets import MNIST
    from continuum.tasks import split_train_val

    scenario = ClassIncremental(
        MNIST("data", download=True, train=True),
        increment=2,
        transformations=Compose([Transforms.to_tensor, Transforms.three_channels]),
    )

    classes_per_task = scenario.nb_classes // scenario.nb_tasks
    print(f"Number of classes per task {classes_per_task}.")
    for i, task_dataset in enumerate(scenario):
        env = PassiveEnvironment(
            task_dataset,
            n_classes=classes_per_task,
            batch_size=batch_size,
            split_batch_fn=split_batch_fn,
            # Need to pass the observation space, in this case.
            observation_space=spaces.Tuple(
                [
                    spaces.Box(low=0, high=1, shape=(3, 28, 28)),
                    spaces.Discrete(scenario.nb_tasks),  # task label
                ]
            ),
            action_space=spaces.Box(
                low=np.array([i * classes_per_task]),
                high=np.array([(i + 1) * classes_per_task]),
                dtype=int,
            ),
        )
        assert spaces.Box(
            low=np.array([i * classes_per_task]),
            high=np.array([(i + 1) * classes_per_task]),
            dtype=int,
        ).shape == (1,)
        assert isinstance(env.observation_space[0], spaces.Box)
        assert env.observation_space[0].shape == (batch_size, 3, 28, 28)
        assert env.observation_space[1].shape == (batch_size,)
        assert env.action_space.shape == (batch_size, 1)
        assert env.reward_space.shape == (batch_size, 1)

        env.seed(123)

        obs = env.reset()
        assert len(obs) == 2
        x, t = obs
        assert x.shape == (batch_size, 3, 28, 28)
        assert t.shape == (batch_size,)

        obs, reward, done, info = env.step(env.action_space.sample())
        assert x.shape == (batch_size, 3, 28, 28)
        assert t.shape == (batch_size,)
        assert reward.shape == (batch_size,)
        assert not done

        env.close()
Beispiel #17
0
    def __init__(self,
                 world,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None,
                 shared_viewer=True):

        self.world = world
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(
            world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = world.collaborative if hasattr(
            world, 'collaborative') else False
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
            else:
                u_action_space = spaces.Box(low=-agent.u_range,
                                            high=+agent.u_range,
                                            shape=(world.dim_p, ),
                                            dtype=np.float32)
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ),
                                            dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    act_space = MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            self.observation_space.append(
                spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(obs_dim, ),
                           dtype=np.float32))
            agent.action.c = np.zeros(self.world.dim_c)

        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n
        self._reset_render()
Beispiel #18
0
    def __init__(self,
                 image_paths,
                 true_bboxes,
                 playout_episode=False,
                 premasking=True,
                 mode='train',
                 max_steps_per_image=200,
                 seed=None,
                 bbox_scaling=0.125,
                 bbox_transformer='base',
                 has_termination_action=True,
                 ior_marker_type='cross',
                 history_length=10):
        """
        :param image_paths: The paths to the individual images
        :param true_bboxes: The true bounding boxes for each image
        :type image_paths: String or list
        :type true_bboxes: numpy.ndarray
        """
        # Determines whether the agent is training or testing
        # Optimizations can be applied during training that are not allowed for testing
        self.mode = mode
        # Factor for scaling all bounding boxes relative to their size
        self.bbox_scaling = bbox_scaling
        # Whether IoR markers will be placed upfront after loading the image
        self.premasking = premasking
        # Whether an episode terminates after a single trigger or is played out until the end
        self.playout_episode = playout_episode
        # Episodes will be terminated automatically after reaching max steps
        self.max_steps_per_image = max_steps_per_image
        # Whether a termination action should be provided in the action set
        self.has_termination_action = has_termination_action
        # The type of IoR marker to be used when masking trigger regions
        self.ior_marker_type = ior_marker_type
        # Length of history in state & agent model
        self.history_length = history_length

        # Initialize action space
        self.bbox_transformer = create_bbox_transformer(bbox_transformer)
        self.action_space = spaces.Discrete(len(self.action_set))
        # 224*224*3 (RGB image) + 9 * 10 (on-hot-enconded history) = 150618
        self.observation_space = spaces.Tuple([
            spaces.Box(low=0, high=256, shape=(224, 224, 3)),
            spaces.Box(low=0,
                       high=1,
                       shape=(self.history_length, len(self.action_set)))
        ])

        # Initialize dataset
        if type(image_paths) is not list:
            image_paths = [image_paths]
        self.image_paths = image_paths
        self.true_bboxes = [[TextLocEnv.to_standard_box(b) for b in bboxes]
                            for bboxes in true_bboxes]

        # For registering a handler that will be executed once after a step
        self.post_step_handler = None

        # Episode-specific

        # Image for the current episode
        self.episode_image = None
        # Ground truth bounding boxes for the current episode image
        self.episode_true_bboxes = None
        # Predicted bounding boxes for the current episode image
        self.episode_pred_bboxes = None
        # IoU values for each trigger in the current episode
        self.episode_trigger_ious = None
        # List of indices of masked bounding boxes for the current episode image
        self.episode_masked_indices = []
        # Number of trigger actions used so far
        self.num_triggers_used = 0

        self.seed(seed=seed)
        self.reset()
Beispiel #19
0
    def __init__(
        self,
        environment_filename=None,
        worker_id=0,
        retro=True,
        timeout_wait=30,
        realtime_mode=False,
        config=None,
        greyscale=False,
    ):
        """
        Arguments:
          environment_filename: The file path to the Unity executable.  Does not require the extension.
          docker_training: Whether this is running within a docker environment and should use a virtual 
            frame buffer (xvfb).
          worker_id: The index of the worker in the case where multiple environments are running.  Each 
            environment reserves port (5005 + worker_id) for communication with the Unity executable.
          retro: Resize visual observation to 84x84 (int8) and flattens action space.
          timeout_wait: Time for python interface to wait for environment to connect.
          realtime_mode: Whether to render the environment window image and run environment at realtime.
        """
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        if environment_filename is None:
            registry = UnityEnvRegistry()
            registry.register_from_yaml(self._REGISTRY_YAML)
            self._env = registry["ObstacleTower"].make(
                worker_id=worker_id,
                timeout_wait=timeout_wait,
                side_channels=[self.reset_parameters, self.engine_config])
        else:
            self._env = UnityEnvironment(
                environment_filename,
                worker_id,
                timeout_wait=timeout_wait,
                side_channels=[self.reset_parameters, self.engine_config],
            )

        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0)
            self.reset_parameters.set_float_parameter("train-mode", 0.0)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0)
            self.reset_parameters.set_float_parameter("train-mode", 1.0)
        self._env.reset()
        behavior_name = list(self._env.behavior_specs)[0]
        split_name = behavior_name.split("-v")
        if len(split_name) == 2 and split_name[0] == "ObstacleTowerAgent":
            self.name, self.version = split_name
        else:
            raise UnityGymException(
                "Attempting to launch non-Obstacle Tower environment")

        if self.version not in self.ALLOWED_VERSIONS:
            raise UnityGymException(
                "Invalid Obstacle Tower version.  Your build is v" +
                self.version +
                " but only the following versions are compatible with this gym: "
                + str(self.ALLOWED_VERSIONS))

        self.visual_obs = None
        self._n_agents = None
        self._flattener = None
        self._greyscale = greyscale

        # Environment reset parameters
        self._seed = None
        self._floor = None

        self.realtime_mode = realtime_mode
        self.game_over = False  # Hidden flag used by Atari environments to determine if the game is over
        self.retro = retro
        if config != None:
            self.config = config
        else:
            self.config = None

        flatten_branched = self.retro
        uint8_visual = self.retro

        # Check behavior configuration
        if len(self._env.behavior_specs) != 1:
            raise UnityGymException(
                "There can only be one agent in this environment "
                "if it is wrapped in a gym.")
        self.behavior_name = behavior_name
        behavior_spec = self._env.behavior_specs[behavior_name]

        if len(behavior_spec) < 2:
            raise UnityGymException(
                "Environment provides too few observations.")

        self.uint8_visual = uint8_visual

        # Check for number of agents in scene.
        initial_info, terminal_info = self._env.get_steps(behavior_name)
        self._check_agents(len(initial_info))

        # Set observation and action spaces
        if len(behavior_spec.action_shape) == 1:
            self._action_space = spaces.Discrete(behavior_spec.action_shape[0])
        else:
            if flatten_branched:
                self._flattener = ActionFlattener(behavior_spec.action_shape)
                self._action_space = self._flattener.action_space
            else:
                self._action_space = spaces.MultiDiscrete(
                    behavior_spec.action_shape)
        if self._greyscale:
            depth = 1
        else:
            depth = 3
        image_space_max = 1.0
        image_space_dtype = np.float32
        camera_height = behavior_spec.observation_shapes[0][0]
        camera_width = behavior_spec.observation_shapes[0][1]
        if self.retro:
            image_space_max = 255
            image_space_dtype = np.uint8
            camera_height = 84
            camera_width = 84

        image_space = spaces.Box(
            0,
            image_space_max,
            dtype=image_space_dtype,
            shape=(camera_height, camera_width, depth),
        )
        if self.retro:
            self._observation_space = image_space
        else:
            max_float = np.finfo(np.float32).max
            keys_space = spaces.Discrete(5)
            time_remaining_space = spaces.Box(low=0.0,
                                              high=max_float,
                                              shape=(1, ),
                                              dtype=np.float32)
            floor_space = spaces.Discrete(9999)
            self._observation_space = spaces.Tuple(
                (image_space, keys_space, time_remaining_space, floor_space))
    def __init__(self):
        '''
        general property
        '''
        # self.tmp_pos = np.array([None,None,None,None])
        self.episodes = 0
        self.fps = 100
        self.iteration, self.max_iteration = 0, 60 * self.fps

        self.width = 256
        self.height = 144

        self.gravity = 9.8
        
        self.max_absolute_thrust = 16 # 2 * self.gravity
        self.min_absolute_thrust = 4
        self.thrust_sensity = (self.max_absolute_thrust - self.min_absolute_thrust) / 2

        self.min_absolute_x, self.max_absolute_x = 0, self.width
        self.min_absolute_y, self.max_absolute_y = 0, self.height

        self.min_initial_distance, self.max_initial_distance = 5, 30
        self.min_detect_distance, self.max_detect_distance = 1, 30

        self.max_absolute_angle = 180
        self.max_roll_angle = 40
        self.max_pitch_angle = 40
        self.max_yaw_angle = 180

        #self.max_absolute_thrust = 2 * self.mass_hunter * self.gravity

        '''
        state related property
        '''
        # configuration of relative position in view
        self.queue_length = 8
        self.coordinate_queue = None
        self.distance_queue = None
        self.in_fov_queue = None

        # threshold for orientation
        self.min_angle, self.max_angle = -1, 1 # -180 -> 180
        self.min_roll, self.max_roll = self.min_angle, self.max_angle
        self.min_pitch, self.max_pitch = self.min_angle, self.max_angle
        self.min_yaw, self.max_yaw = self.min_angle, self.max_angle

        # threshold for thrust
        self.min_thrust, self.max_thrust = -1, 1 # thrust = self.min_absolute_thrust + self.thrust_sensity * (x + 1)

        # threshold for relative position in view
        self.min_relative_x, self.max_relative_x = -1, 1 # 1 - 256
        self.min_relative_y, self.max_relative_y = -1, 1 # 1 - 144

        # threshold for distance within target and hunter
        self.min_distance, self.max_distance = 0, 1 # 0 -> 30

        # threshold for state
        self.low_state = np.array(
            [self.min_roll, self.min_pitch, self.min_yaw, self.min_thrust] 
            + self.queue_length * [self.min_relative_x, self.min_relative_y]
            + self.queue_length * [self.min_distance]
            )
        self.high_state = np.array(
            [self.max_roll, self.max_pitch, self.max_yaw, self.max_thrust] 
            + self.queue_length * [self.max_relative_x, self.max_relative_y]
            + self.queue_length * [self.max_distance]
            )

        '''
        action related property
        assume symmetric actions
        '''
        # threshold for orientation
        self.min_roll_action, self.max_roll_action = -1, 1 # -180 -> 180
        self.min_pitch_action, self.max_pitch_action = -1, 1 # -180 -> 180
        self.min_yaw_action, self.max_yaw_action = -1, 1 # -180 -> 180

        # threshold for thrust
        self.min_thrust_action, self.max_thrust_action = -1, 1 # -2 * self.mass_hunter * self.gravity -> 2 * self.mass_hunter * self.gravity

        # threshold for action
        self.low_action = np.array([self.min_roll_action, self.min_pitch_action, self.min_yaw_action, self.min_thrust_action])
        self.high_action = np.array([self.max_roll_action, self.max_pitch_action, self.max_yaw_action, self.max_thrust_action])

        '''
        define action space and observation space
        '''
        self.action_space = spaces.Box(low=self.low_action, high=self.high_action)
        # self.observation_space = spaces.Box(low=self.low_state, high=self.high_state)

        self.observation_space = spaces.Tuple((
            spaces.Box(low=self.low_state, high=self.high_state),
            spaces.MultiBinary(self.queue_length) # if corrdinates in FOV, it is equal to if we can measure the distance
            ))

        self.seed()
        self.reset()
Beispiel #21
0
from .menu_manager import *
from .state import *
from .pad import Pad
from .dolphin import DolphinRunner
from . import ctype_util as ctutil
from .reward import computeRewards

buttons = ['A', 'B', 'Y', 'L', 'Z']

button_space = spaces.Discrete(len(buttons) + 1)
main_stick_space = spaces.Box(0, 1, [2]) # X and Y axes

c_directions = [(0.5, 0.5), (0.5, 1), (0.5, 0), (0, 0.5), (1, 0.5)]
c_stick_space = spaces.Discrete(len(c_directions))

controller_space = spaces.Tuple((button_space, main_stick_space, c_stick_space))

def realController(control):
  button, main, c = control

  controller = ssbm.RealControllerState()

  if button < len(buttons):
    setattr(controller, 'button_' + buttons[button], True)

  controller.stick_MAIN = tuple(main)
  controller.stick_C = c_directions[c]

  return controller

class BoolConv:
 def _set_action_space(self):
     self.action_space = spaces.Tuple(
         [self.agents[i].action_space for i in range(self.num_agents)])
Beispiel #23
0
  def __init__(self, conv, permutation):
    self.conv = conv
    self.permutation = permutation

    self.space = spaces.Tuple([conv.space for _ in permutation])
 def _set_observation_space(self):
     self.observation_space = spaces.Tuple(
         [self.agents[i].observation_space for i in range(self.num_agents)])
    def __init__(
            self, 
            n_agents, 
            f_init=1,
            growth_rate=1,
            S_eq=1, 
            max_steps=1000, 
            signal_size=2, 
            signal_duration=1,
            threshold=1e-4,
            h_fn=def_h_fn,
            f_fn=def_f_fn,
            p_fn=def_p_fn,
            c_fn=def_c_fn,
            s_fn=def_s_fn
            ):
        """
        Parameters
        ----------
        n_agents : int
            Number of agents in the environment
        f_init : double
            Initial resource stock 
        growth_rate : double
            Intrinsic growth rate 
        S_eq : double
            Equilibrium population
        max_steps : int
            Maximum number of steps
        signal_size : int
            Signal size (cardinality)
        signal_duration : int
            Duration between signal shifts
        threshold : double
            Minimum stock thresold
        h_fn : lambda
            Total consumed resources function
        f_fn : lambda
            Spwaner-recruit function
        p_fn : lambda
            Price function
        c_fn : lambda
            Cost function
        s_fn : lambda
            Signal function

        """
        super(FishermanEnv, self).__init__()

        self.n_agents = n_agents
        self.max_steps = max_steps
        self.f_init = f_init
        self.growth_rate = growth_rate

        self.S_eq = S_eq

        self.signal_size = signal_size
        self.signal_duration = signal_duration
        self.signal_stochastic_offset = random.randint(1,signal_size)

        self.done = True
        self.cur_stock = None
        self.cur_step = None
        self.last_actions = None

        self.threshold = threshold

        self.seed()

        self.l_agents = ["agent{}".format(i) for i in range(self.n_agents)]

        # Environment specific customs
        self.h_fn = h_fn
        self.f_fn = f_fn
        self.p_fn = p_fn
        self.c_fn = c_fn
        self.s_fn = s_fn

        self.observation_space = spaces.Tuple((spaces.MultiBinary(self.signal_size), spaces.Box(low=0, high=1, shape=(1,), dtype=float), spaces.Box(low=0, high=np.inf, shape=(1,), dtype=float)))
        self.action_space = spaces.Box(low=0, high=1, shape=(1,), dtype=float)
Beispiel #26
0
 def space(self) -> spaces.Space:
     return spaces.Tuple(
         [action_type.space() for action_type in self.agents_action_types])
Beispiel #27
0
import pytest

import numpy as np
import gym
from gym import spaces

from gym_utils import env_wrappers

flat_box_test_spaces = [
    (spaces.Box(0, 1, ()), 1),
    (spaces.Box(0, 1, (0, )), 0),
    (spaces.Box(0, 1, (4, )), 4),
    (spaces.Box(0, 1, (2, 3)), 6),
    (spaces.Discrete(5), 5),
    (spaces.Tuple((spaces.Box(0, 1, (2)), spaces.Discrete(3))), 5),
    (spaces.Tuple(()), 0),
]


class TestFlatBoxView():
    @pytest.mark.parametrize('space,n', flat_box_test_spaces)
    def test_shape(self, space, n):
        low = np.zeros(n)
        high = np.ones(n)
        flat_space = env_wrappers.FlatBoxView(space)
        assert flat_space.shape == (n, )
        assert np.array_equal(flat_space.low, low)
        assert np.array_equal(flat_space.high, high)

    @pytest.mark.parametrize('space,n', flat_box_test_spaces)
    def test_sample(self, space, n):
    def __init__(self, level, no_reward=False, **kwargs):
        """
        Base class for Gym interface for ViZDoom. Child classes are defined in vizdoom_env_definitions.py,
        that contain the level parameter and pass through any kwargs from gym.make()
        :param level: index of level in the CONFIGS list above
        :param kwargs: keyword arguments from gym.make(env_name_string, **kwargs) call. 'depth' will render the
        depth buffer and 'labels' will render the object labels and return it in the observation.
        Note that the observation will be a list with the screen buffer as the first element. If no kwargs are
        provided (or depth=False and labels=False) the observation will be of type np.ndarray.
        """

        # parse keyword arguments
        self.depth = kwargs.get("depth", False)
        self.labels = kwargs.get("labels", False)
        self.position = kwargs.get("position", False)
        self.health = kwargs.get("health", False)
        self.no_reward = no_reward

        # init game
        self.game = vzd.DoomGame()
        self.game.set_screen_resolution(vzd.ScreenResolution.RES_640X480)
        scenarios_dir = os.path.join(os.path.dirname(__file__), "scenarios")
        self.game.load_config(os.path.join(scenarios_dir, CONFIGS[level][0]))
        self.game.set_window_visible(False)
        self.game.set_depth_buffer_enabled(self.depth)
        self.game.set_labels_buffer_enabled(self.labels)
        self.game.clear_available_game_variables()
        # if self.position:
        # import pdb; pdb.set_trace()
        self.game.add_available_game_variable(vzd.GameVariable.POSITION_X)
        self.game.add_available_game_variable(vzd.GameVariable.POSITION_Y)
        self.game.add_available_game_variable(vzd.GameVariable.POSITION_Z)
        self.game.add_available_game_variable(vzd.GameVariable.ANGLE)
        # if self.health:
        self.game.add_available_game_variable(vzd.GameVariable.HEALTH)
        self.info_str = [
            "position_x", "position_y", "position_z", "angle", "health"
        ]
        self.game.init()
        self.state = None
        self.viewer = None

        self.action_space = spaces.Discrete(CONFIGS[level][1])

        # specify observation space(s)
        list_spaces: List[gym.Space] = [
            spaces.Box(
                0,
                255,
                (
                    self.game.get_screen_height(),
                    self.game.get_screen_width(),
                    self.game.get_screen_channels(),
                ),
                dtype=np.uint8,
            )
        ]
        if self.depth:
            list_spaces.append(
                spaces.Box(
                    0,
                    255,
                    (
                        self.game.get_screen_height(),
                        self.game.get_screen_width(),
                    ),
                    dtype=np.uint8,
                ))
        if self.labels:
            list_spaces.append(
                spaces.Box(
                    0,
                    255,
                    (
                        self.game.get_screen_height(),
                        self.game.get_screen_width(),
                    ),
                    dtype=np.uint8,
                ))
        if self.position:
            list_spaces.append(spaces.Box(-np.Inf, np.Inf, (4, 1)))
        if self.health:
            list_spaces.append(spaces.Box(0, np.Inf, (1, 1)))
        if len(list_spaces) == 1:
            self.observation_space = list_spaces[0]
        else:
            self.observation_space = spaces.Tuple(list_spaces)
Beispiel #29
0
    def __init__(
        self,
        num_players: int,
        num_streets: int,
        blinds: Union[int, List[int]],
        antes: Union[int, List[int]],
        raise_sizes: Union[float, str, List[Union[float, str]]],
        num_raises: Union[float, List[float]],
        num_suits: int,
        num_ranks: int,
        num_hole_cards: int,
        num_community_cards: Union[int, List[int]],
        num_cards_for_hand: int,
        mandatory_num_hole_cards: int,
        start_stack: int,
        low_end_straight: bool = True,
        order: Optional[List[str]] = None,
    ) -> None:

        self.dealer = clubs.Dealer(
            num_players,
            num_streets,
            blinds,
            antes,
            raise_sizes,
            num_raises,
            num_suits,
            num_ranks,
            num_hole_cards,
            num_community_cards,
            num_cards_for_hand,
            mandatory_num_hole_cards,
            start_stack,
            low_end_straight,
            order,
        )

        max_bet = start_stack * num_players
        if isinstance(num_community_cards, list):
            comm_card_numb = sum(num_community_cards)
        else:
            comm_card_numb = num_community_cards
        self.action_space = spaces.Discrete(max_bet)
        card_space = spaces.Tuple(
            (spaces.Discrete(num_ranks), spaces.Discrete(num_suits)))
        hole_card_space = spaces.Tuple((card_space, ) * num_hole_cards)
        self.observation_space = spaces.Dict({
            "action":
            spaces.Discrete(num_players),
            "active":
            spaces.MultiBinary(num_players),
            "button":
            spaces.Discrete(num_players),
            "call":
            spaces.Discrete(max_bet),
            "community_cards":
            spaces.Tuple((card_space, ) * comm_card_numb),
            "hole_cards":
            spaces.Tuple((hole_card_space, ) * num_players),
            "max_raise":
            spaces.Discrete(max_bet),
            "min_raise":
            spaces.Discrete(max_bet),
            "pot":
            spaces.Discrete(max_bet),
            "stacks":
            spaces.Tuple((spaces.Discrete(max_bet), ) * num_players),
            "street_commits":
            spaces.Tuple((spaces.Discrete(max_bet), ) * num_players),
        })

        self.agents: Optional[Dict[int, agent.BaseAgent]] = None
        self.prev_obs: Optional[Dict] = None
Beispiel #30
0
 def __init__(self):
     self.reset()
     self.action_space = spaces.Discrete(2)
     self.observation_space = spaces.Tuple(
         (spaces.Discrete(2), spaces.Discrete(2)))