예제 #1
0
    def setup(self, dbars):
        print("Setup")
        # data
        #train_data = np.around(get_data(dbars))
        train_data = get_data(dbars)
        self.stock_price_history = train_data  # round up to integer to reduce state space
        self.n_stock, self.n_step = self.stock_price_history.shape
        print(self.n_stock, self.n_step)

        # instance attributes
        self.init_invest = START_MONEY
        self.cur_step = None
        self.stock_owned = None
        self.stock_price = None
        self.cash_in_hand = None

        # action space
        self.action_space = spaces.Discrete(3**self.n_stock)
        self.action_combo = [
            *map(list, itertools.product([0, 1, 2], repeat=self.n_stock))
        ]

        # observation space: give estimates in order to sample and build scaler
        stock_max_price = self.stock_price_history.max(axis=1)
        #stock_range = [[0, self.init_invest * 2 // mx] for mx in stock_max_price]
        stock_range = [[0, 1000], [0, 1000], [0, 1000], [0, 1000], [0, 1000]]
        price_range = [[0, mx * 100] for mx in stock_max_price]
        cash_in_hand_range = [[0, self.init_invest * 2]]
        print(stock_range + price_range + cash_in_hand_range)

        self.observation_space = spaces.MultiDiscrete(stock_range +
                                                      price_range +
                                                      cash_in_hand_range)

        # seed and start
        self.seed()
        self.reset()

        state_size = self.observation_space.shape
        action_size = self.action_space.n
        self.agent = QAgent(state_size, action_size)
        self.scaler = get_scaler(self.stock_price_history, self.init_invest,
                                 self.n_stock)

        # parameters
        self.batch_size = 500

        # here we could have a variable called 'train'. If it is true we train, otherwise we load from weight file.

        # here we train =]
        state = self.reset()
        state = self.scaler.transform([state])
        for time in range(self.n_step):
            print("time:", time, "/", self.n_step)
            action = self.agent.act(state)
            next_state, reward, done = self.train_step(action)
            next_state = self.scaler.transform([next_state])
            self.agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                break
            if len(self.agent.memory
                   ) > self.batch_size:  # train faster with this
                self.agent.replay(self.batch_size)

        self.agent.save('./weights/dqn')

        self.last_state = self.reset()
        self.last_state = self.scaler.transform([self.last_state])
예제 #2
0
    def __init__(self, world, reset_callback=None, reward_callback=None,
                 observation_callback=None, info_callback=None,
                 done_callback=None, shared_viewer=True):

        self.world = world
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = False
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
            else:
                u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
                    act_space = spaces.MultiDiscrete([[0,act_space.n-1] for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = observation_callback(agent, self.world).shape
            self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=obs_dim, dtype=np.float32))
            agent.action.c = np.zeros(self.world.dim_c)

        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n
        self._reset_render()
예제 #3
0
    def _create_action_space(self):
        # collect different actions based on allowed commands
        continuous_actions = []
        discrete_actions = []
        multidiscrete_actions = []
        multidiscrete_action_ranges = []
        if self.add_noop_command:
            # add NOOP command
            discrete_actions.append("move 0\nturn 0")
        chs = self.mission_spec.getListOfCommandHandlers(0)
        for ch in chs:

            #cmds = self.mission_spec.getAllowedCommands(0, ch)
            cmds = ["move", "turn", "use"]
            for cmd in cmds:
                logger.debug(ch + ":" + cmd)
                if ch == "ContinuousMovement":
                    if cmd in ["move", "strafe", "pitch", "turn"]:
                        if self.continuous_discrete:
                            discrete_actions.append(cmd + " 1")
                            discrete_actions.append(cmd + " -1")
                        else:
                            continuous_actions.append(cmd)
                    elif cmd in ["crouch", "jump", "attack", "use"]:
                        if self.continuous_discrete:
                            discrete_actions.append(cmd + " 1")
                            discrete_actions.append(cmd + " 0")
                        else:
                            multidiscrete_actions.append(cmd)
                            multidiscrete_action_ranges.append([0, 1])
                    else:
                        raise ValueError("Unknown continuous action " + cmd)
                elif ch == "DiscreteMovement":
                    if cmd in SINGLE_DIRECTION_DISCRETE_MOVEMENTS:
                        discrete_actions.append(cmd + " 1")
                    elif cmd in MULTIPLE_DIRECTION_DISCRETE_MOVEMENTS:
                        discrete_actions.append(cmd + " 1")
                        discrete_actions.append(cmd + " -1")
                    else:
                        raise ValueError(False,
                                         "Unknown discrete action " + cmd)
                elif ch == "AbsoluteMovement":
                    # TODO: support for AbsoluteMovement
                    logger.warn("Absolute movement not supported, ignoring.")
                elif ch == "Inventory":
                    # TODO: support for Inventory
                    logger.warn(
                        "Inventory management not supported, ignoring.")
                else:
                    logger.warn("Unknown commandhandler " + ch)

        # turn action lists into action spaces
        self.action_names = []
        self.action_spaces = []
        if len(discrete_actions) > 0:
            self.action_spaces.append(spaces.Discrete(len(discrete_actions)))
            self.action_names.append(discrete_actions)
        if len(continuous_actions) > 0:
            self.action_spaces.append(
                spaces.Box(-1, 1, (len(continuous_actions), )))
            self.action_names.append(continuous_actions)
        if len(multidiscrete_actions) > 0:
            self.action_spaces.append(
                spaces.MultiDiscrete(multidiscrete_action_ranges))
            self.action_names.append(multidiscrete_actions)

        # if there is only one action space, don't wrap it in Tuple
        if len(self.action_spaces) == 1:
            self.action_space = self.action_spaces[0]
        else:
            self.action_space = spaces.Tuple(self.action_spaces)
        logger.debug(self.action_space)
예제 #4
0
    def __init__(self,
                 world,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None,
                 shared_viewer=True,
                 reward_type=0):

        self.world = world
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        self.reward_type = reward_type
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(
            world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = False
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        # custom parameters for uav begin-------------------------------------------------------------------------------
        self.uav = getattr(FLAGS, 'num_uav')
        self.size = getattr(FLAGS, 'size_map')
        self.radius = getattr(FLAGS, 'radius')**2
        self.max_epoch = getattr(FLAGS, 'max_epoch')
        self.map_scale_rate = getattr(FLAGS, 'map_scale_rate')
        self.threshold = getattr(FLAGS, 'map_threshold')

        self.map = np.zeros((self.size, self.size))
        self.coverage = np.zeros((self.size, self.size))
        self.normalized_fair = np.ones((self.size, self.size))
        self.fair = np.zeros((self.size, self.size))
        self.PoI = []
        base = -(self.size - 1) / 2
        for i in range(self.size):
            for j in range(self.size):
                self.PoI.append([base + i, base + j])
        self.poi_array = np.array(self.PoI)  # [size * size, 2]
        self.M = np.zeros((self.size, self.size))
        self.final = np.zeros((self.size, self.size), dtype=np.int64)
        self.state = []

        for a in world.agents:
            location_tem = [
                a.state.p_pos[0] * self.map_scale_rate,
                a.state.p_pos[1] * self.map_scale_rate
            ]
            self.state.append(location_tem)
        # energy
        self.energy = np.zeros(self.uav)
        self.jain_index = 0
        # cost per speed
        self.cost = 1
        self.honor = FLAGS.factor * self.cost
        self.last_r = 0
        # single uav total
        self.SUE_ENERGY = (self.cost * FLAGS.max_speed + self.honor)
        self.SUT_ENERGY = self.SUE_ENERGY * FLAGS.max_epoch
        self.dis_flag = False
        self.agent_index_for_greedy = 0
        # custom parameters for uav end---------------------------------------------------------------------------------
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
            else:
                u_action_space = spaces.Box(low=-agent.u_range,
                                            high=+agent.u_range,
                                            shape=(world.dim_p, ))
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ))
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    act_space = spaces.MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(
                observation_callback(self.agents, self.world, self.poi_array,
                                     self.M)[0])
            self.observation_space.append(
                spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, )))
            agent.action.c = np.zeros(self.world.dim_c)

        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n
        self._reset_render()
예제 #5
0
    def __init__(self,
                 environment_filename: str,
                 worker_id=0,
                 docker_training=False,
                 n_arenas=1,
                 seed=0,
                 arenas_configurations=None,
                 greyscale=False,
                 retro=True,
                 inference=False,
                 resolution=None):
        """
        Environment initialization
        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
        :param worker_id: Worker number for environment.
        :param docker_training: Whether this is running within a docker environment and should use a virtual
            frame buffer (xvfb).
        :param n_arenas: number of arenas to create in the environment (one agent per arena)
        :param arenas_configurations: an ArenaConfig to configure the items present in each arena, will spawn random
            objects randomly if not provided
        :param greyscale: whether the visual observations should be grayscaled or not
        :param retro: Resize visual observation to 84x84 (int8) and flattens action space.
        """
        self._env = UnityEnvironment(file_name=environment_filename,
                                     worker_id=worker_id,
                                     seed=seed,
                                     docker_training=docker_training,
                                     n_arenas=n_arenas,
                                     arenas_configurations=arenas_configurations,
                                     inference=inference,
                                     resolution=resolution)
        self.name = 'aaio'
        self.vector_obs = None
        self.inference = inference
        self.resolution = resolution
        self._current_state = None
        self._n_agents = None
        self._flattener = None
        self._greyscale = greyscale or retro
        # self._seed = None
        self.retro = retro
        self.game_over = False  # Hidden flag used by Atari environments to determine if the game is over
        self.arenas_configurations = arenas_configurations

        self.flatten_branched = self.retro
        self.uint8_visual = self.retro

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if brain.number_visual_observations == 0:
            raise UnityGymException("Environment provides no visual observations.")

        if brain.num_stacked_vector_observations != 1:
            raise UnityGymException("Environment provides no vector observations.")

        # Check for number of agents in scene.
        initial_info = self._env.reset(arenas_configurations=arenas_configurations)[self.brain_name]
        self._check_agents(len(initial_info.agents))

        if self.retro and self._n_agents > 1:
            raise UnityGymException("Only one agent is allowed in retro mode, set n_agents to 1.")

        # Set observation and action spaces
        if len(brain.vector_action_space_size) == 1:
            self._action_space = spaces.Discrete(brain.vector_action_space_size[0])
        else:
            if self.flatten_branched:
                self._flattener = ActionFlattener(brain.vector_action_space_size)
                self._action_space = self._flattener.action_space
            else:
                self._action_space = spaces.MultiDiscrete(brain.vector_action_space_size)

        # high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions

        # if self.visual_obs:
        if self._greyscale:
            depth = 1
        else:
            depth = 3

        if self.retro:
            image_space_max = 255
            image_space_dtype = np.uint8
            camera_height = 84
            camera_width = 84

            image_space = spaces.Box(
                0, image_space_max,
                dtype=image_space_dtype,
                shape=(camera_height, camera_width, depth)
            )

            self._observation_space = image_space
        else:
            image_space_max = 1.0
            image_space_dtype = np.float32
            camera_height = brain.camera_resolutions[0]["height"]
            camera_width = brain.camera_resolutions[0]["width"]
            max_float = np.finfo(np.float32).max

            image_space = spaces.Box(
                0, image_space_max,
                dtype=image_space_dtype,
                shape=(self._n_agents, camera_height, camera_width, depth)
            )
            vector_space = spaces.Box(-max_float, max_float,
                                      shape=(self._n_agents, brain.vector_observation_space_size))
            self._observation_space = spaces.Tuple((image_space, vector_space))
예제 #6
0
    def __init__(
        self,
        unity_env: BaseEnv,
        uint8_visual: bool = False,
        flatten_branched: bool = False,
        allow_multiple_obs: bool = False,
        termination_mode: str = TerminationMode.ANY,
    ):
        """

        Parameters:
            unity_env: The Unity BaseEnv to be wrapped in the gym. Will be closed when the UnityToGymWrapper closes.
            uint8_visual : Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
                If True, turn branched discrete action spaces into a Discrete space rather than MultiDiscrete.
            allow_multiple_obs: If True, return a list of np.ndarrays as observations with the first elements
                containing the visual observations and the last element containing the array of vector observations.
                If False, returns a single np.ndarray containing either only a single visual observation or the array of
                vector observations.
            termination_mode: A string (enum) suggesting when to end an episode. Supports "any", "majority" and "all"
                which are atributes on `TerminationMode`.
        """
        self._env = unity_env

        # Take a single step so that the brain information will be sent over
        if not self._env.behavior_specs:
            self._env.step()

        self.visual_obs = None

        # Save the step result from the last time all Agents requested decisions.
        self._previous_decision_step: DecisionSteps = None
        self._flattener = None
        # Hidden flag used by Atari environments to determine if the game is over
        self.game_over = False
        self._allow_multiple_obs = allow_multiple_obs

        # When to stop the game. Only `any` supported currently but it should have options for `all` and `majority`.
        assert termination_mode in TerminationMode.__dict__
        self.termination_mode = termination_mode

        agent_name = list(self._env.behavior_specs.keys())[0]
        self.name = list(
            self._env.behavior_specs.keys())[0]  # TODO: no need for self.name
        self.agent_prefix = agent_name[:agent_name.index('=') + 1]
        self.group_spec = self._env.behavior_specs[agent_name]

        if self._get_n_vis_obs() == 0 and self._get_vec_obs_size() == 0:
            raise ValueError(
                "There are no observations provided by the environment.")

        if not self._get_n_vis_obs() >= 1 and uint8_visual:
            self.logger.warning(
                "uint8_visual was set to true, but visual observations are not in use. "
                "This setting will not have any effect.")
        else:
            self.uint8_visual = uint8_visual
        if (self._get_n_vis_obs() + self._get_vec_obs_size() >= 2
                and not self._allow_multiple_obs):
            self.logger.warning(
                "The environment contains multiple observations. "
                "You must define allow_multiple_obs=True to receive them all. "
                "Otherwise, only the first visual observation (or vector observation if"
                "there are no visual observations) will be provided in the observation."
            )

        # Check for number of agents in scene.
        self._env.reset()
        decision_steps, _ = self._env.get_steps(agent_name)
        # self.num_agents = len(decision_steps)  # NOTE: Worked with FoodCollector
        self.num_agents = len(self._env.behavior_specs)
        self._previous_decision_step = decision_steps

        # Set action spaces
        if self.group_spec.is_action_discrete():
            branches = self.group_spec.discrete_action_branches
            if self.group_spec.action_size == 1:
                self._action_space = spaces.Discrete(branches[0])
            else:
                if flatten_branched:
                    self._flattener = ActionFlattener(branches)
                    self._action_space = self._flattener.action_space
                else:
                    self._action_space = spaces.MultiDiscrete(branches)

        else:
            if flatten_branched:
                self.logger.warning(
                    "The environment has a non-discrete action space. It will "
                    "not be flattened.")
            high = np.ones(self.group_spec.action_shape)
            self._action_space = spaces.Box(-high, high, dtype=np.float32)

        # Set observations space
        list_spaces: List[gym.Space] = []
        shapes = self._get_vis_obs_shape()
        for shape in shapes:
            if uint8_visual:
                list_spaces.append(
                    spaces.Box(0, 255, dtype=np.uint8, shape=shape))
            else:
                list_spaces.append(
                    spaces.Box(0, 1, dtype=np.float32, shape=shape))
        if self._get_vec_obs_size() > 0:
            # vector observation is last
            high = np.array([np.inf] * self._get_vec_obs_size())
            list_spaces.append(spaces.Box(-high, high, dtype=np.float32))
        if self._allow_multiple_obs:
            self._observation_space = spaces.Tuple(list_spaces)
        else:
            self._observation_space = list_spaces[
                0]  # only return the first one
예제 #7
0
class QwixxOneHotEnv(Env):
    """
    Action Space:
    Whites:
    - No take
    - Take as red
    - Take as yellow
    - Take as blue
    - Take as green
    Colors :
    - Take None
    - Take white 1 red
    - Take white 1 yellow
    - Take white 1 blue
    - Take white 1 green
    - Take white 2 red
    - Take white 2 yellow
    - Take white 2 blue
    - Take white 2 green
    """
    action_space = spaces.MultiDiscrete([5, 9])
    action_space.n = 45
    dice: Dice
    previous_dice: Dice
    current_player: int
    progress: PlayerProgress
    env = object
    score: int

    def __init__(self, num_players=3, bot_player=0):
        self.bot_player = bot_player
        self.num_players = num_players
        self.observation_space = spaces.Box(-np.inf, np.inf, shape=(1, 48), dtype='float32')
        self.current_player = 0
        self.num_turns = 0
        self.last_actions = []
        self.last_reward = None
        self.invalid_move_reward = INVALID_MOVE_REWARD
        self.win_reward = WIN_REWARD
        self.lose_reward = LOSE_REWARD
        self.skip_bias = SKIP_BIAS

        self.reset()

    def step(self, action):
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
        Args:
            action (object): an action provided by the agent
        Returns:
            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (bool): whether the episode has ended, in which case further step() calls will return undefined results
            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
        """
        if not self.action_space.contains(action):
            # noinspection PyTypeChecker
            action = np.array([action % 5, action // 5])
        changed_values = []
        current_score = self._calculate_score()
        next_player = (self.current_player + 1) % self.num_players
        white_action, color_action = action
        self.last_actions = (WHITE_ACTION_COLOR[white_action], COLOR_ACTION[color_action])
        if not self._is_bots_roll() and color_action != 0:
            # non-roller players can't take the color die
            self.current_player = next_player
            return self._serialize_state(), self.invalid_move_reward, True, {}
            # {"error": "took color, did not roll", "scores": scores}
        # If it chooses not to take white or color it adds a strike
        if self._is_bots_roll() and white_action == 0 and color_action == 0:
            self.progress.strikes += 1
            self.current_player = next_player
            self._roll_dice()
            return (self._serialize_state(), self.calculate_skip_reward(),
                    self._is_done(), {"score": self._calculate_score()})
            # {"action": "took strike", "scores": scores}
        self.current_player = next_player
        # take white action
        if white_action != 0:
            white_color = WHITE_ACTION_COLOR[white_action]
            latest = self.progress.latest_num.__dict__[white_color]
            wdv = self._white_dice_value()
            # Checks validity of move
            if not COMPARE_FUNCTION[white_color](wdv, latest):
                return self._serialize_state(), self.invalid_move_reward, True, {"score": self._calculate_score()}
                # {"error": "took white die invalid", "latest": latest,
                #  "value": wdv, "color": white_color, "scores": scores})
            if wdv == COLORS_MAX[white_color] and not self._can_lock_color(white_color):
                return self._serialize_state(), self.invalid_move_reward, True, {"score": self._calculate_score()}
                # {"error": "tried to lock without having 5",
                #  "latest": self.progress[self.current_player].counts.__dict__[white_color],
                #  "color": white_color, "scores": scores})

            self.progress.counts.__dict__[white_color] += 1
            self.progress.latest_num.__dict__[white_color] = wdv
            changed_values.append((latest, wdv))

        # take color action
        if color_action != 0:
            white_die, color = COLOR_ACTION[color_action]
            latest = self.progress.latest_num.__dict__[color]
            cdv = self._color_dice_value(white_die, color)
            # Checks validity of move
            if not COMPARE_FUNCTION[color](cdv, latest):
                return self._serialize_state(), self.invalid_move_reward, True, {"score": self._calculate_score()}
                # {"error": "took color die invalid", "latest": latest,
                #  "value": cdv, "color": color, "white": white_die, "scores": scores})

            if cdv == COLORS_MAX[color] and not self._can_lock_color(color):
                self.current_player = next_player
                return self._serialize_state(), self.invalid_move_reward, True, {"score": self._calculate_score()}
                # {"error": "tried to lock without having 5",
                #  "latest": self.progress[self.current_player].counts.__dict__[color],
                #  "color": color, "scores": scores})

            self.progress.counts.__dict__[color] += 1
            self.progress.latest_num.__dict__[color] = cdv
            changed_values.append((latest, cdv))
        self.current_player = next_player
        reward = self.calculate_reward(changed_values, current_score)
        self._roll_dice()
        return (self._serialize_state(), reward,
                self._is_done(), {"score": self._calculate_score()})  # {"scores": scores}

    def calculate_skip_reward(self):
        wdv = self._white_dice_value()
        skipped = []
        for color, latest in self.progress.latest_num.__dict__.items():
            cmv = COLORS_MAX[color]
            # check if we have passed the white value
            if cmv >= latest and latest >= wdv:
                continue
            if wdv >= latest and latest >= cmv:
                continue
            mi = min([latest, wdv])
            ma = max([latest, wdv])
            if mi < ma:
                skipped.append(len(SKIP_WEIGHT[mi + 1: ma]))
        if skipped:
            r = min(skipped)
        else:
            r = 10
        self.last_reward = r
        if not self._is_bots_roll():
            return self.last_reward
        # if bot rolled we should see is colors were skipped too
        skipped = []
        for color, latest in self.progress.latest_num.__dict__.items():
            min_skip_distance = self.get_skipped_values(color, latest)
            if min_skip_distance is not None:
                skipped.append(min_skip_distance)
        if skipped:
            r = min(skipped)
        else:
            r = 10
        self.last_reward = min([self.last_reward, r])
        return self.last_reward - self.skip_bias

    def get_skipped_values(self, color, latest):
        cmv = COLORS_MAX[color]
        cval1 = self._color_dice_value('white1', color)
        cval2 = self._color_dice_value('white2', color)
        if cmv == latest:
            return None
        if cmv > latest:
            vals = []
            if cval1 - latest > 0:
                vals.append(cval1)
            if cval2 - latest > 0:
                vals.append(cval2)
            if not vals:
                return None
            return len(SKIP_WEIGHT[latest + 1: min(vals)])
        vals = []
        if latest - cval1 > 0:
            vals.append(cval1)
        if latest - cval2 > 0:
            vals.append(cval2)
        if not vals:
            return None
        return len(SKIP_WEIGHT[max(vals): latest - 1])

    def calculate_reward(self, changed_values, current_score):
        if not changed_values:
            return self.calculate_skip_reward()
        skipped = []
        for before, after in changed_values:
            if after - before >= 0:
                skipped.append(sum(SKIP_WEIGHT[before + 1: after]))
            else:
                skipped.append(sum(SKIP_WEIGHT[after + 1: before]))
        divisor = sum(skipped) + 1
        r = (self._calculate_score() - current_score) / divisor * 100
        self.last_reward = r
        return r

    def reset(self):
        """Resets internal state to beginning of game and starts a new game"""
        self.dice = Dice()
        self.last_actions = []
        self.current_player = 0
        self.progress = PlayerProgress()
        self._roll_dice()
        self.num_turns = 0
        self.previous_dice = None
        self.last_reward = None
        self.score = 0
        return self._serialize_state()

    def render(self, mode='human'):
        if self.previous_dice is not None:
            print("Previous dice: ", self.previous_dice)
        if len(self.last_actions) == 2:
            print("Last action: Whites: {}, Colors: {}".format(self.last_actions[0], self.last_actions[1]))
            print("Last reward: ", self.last_reward)
        print("\tLatest:", self.progress.latest_num)
        print("\tCounts:", self.progress.counts)
        print("\tStrikes:", self.progress.strikes)
        print("\tScore:", self._calculate_score())
        print("Num turns:", self.num_turns)
        print("Rolling player:", self.current_player)
        print("New dice:", self.dice)
        print("")

    def _calculate_score(self):
        scores = []
        for color, count in self.progress.counts.__dict__.items():
            color_score = SCORE[count]
            if self.progress.latest_num.__dict__[color] == COLORS_MAX[color]:
                color_score += 1
            scores.append(color_score)
        scores.append(self.progress.strikes * -5)
        self.score = sum(scores)
        return self.score

    def _color_is_locked(self, color):
        max_value = COLORS_MAX[color]
        return self.progress.latest_num.__dict__[color] == max_value

    def _can_lock_color(self, color):
        return self.progress.counts.__dict__[color] == 5

    def _is_done(self):
        if self.num_turns > 50:
            return True
        self.locked_colors = []
        for color, max_value in COLORS_MAX.items():
            if self.progress.strikes == 4:
                return True
            if self.progress.latest_num.__dict__[color] == max_value:
                self.locked_colors.append(color)
        return len(self.locked_colors) >= 2

    def _roll_dice(self):
        self.num_turns += 1
        self.previous_dice = Dice(**self.dice.__dict__)
        self.dice.white1 = np.random.choice(DIE_ROLLS)
        self.dice.white2 = np.random.choice(DIE_ROLLS)

        if not self._color_is_locked("yellow"):
            self.dice.yellow = np.random.choice(DIE_ROLLS)
        if not self._color_is_locked("red"):
            self.dice.red = np.random.choice(DIE_ROLLS)
        if not self._color_is_locked("blue"):
            self.dice.blue = np.random.choice(DIE_ROLLS)
        if not self._color_is_locked("green"):
            self.dice.green = np.random.choice(DIE_ROLLS)

    def _white_dice_value(self):
        return self.dice.white1 + self.dice.white2

    def _color_dice_value(self, white_die, color):
        dice = self.dice.__dict__
        return dice[white_die] + dice[color]

    def _is_bots_roll(self):
        return self.bot_player == self.current_player

    def _serialize_state(self):
        y = self._is_bots_roll()
        a = float(y)
        return np.array([
            a,
            self.current_player,
            self.bot_player,
            *self._serialize_dice(),
            *self._serialize_player(),
        ], dtype=np.float)

    def _serialize_player(self) -> List[float]:
        """
        Serializes a players board. There are only 11 possible crosses you can do,
        this excludes the lock and the number 1. Since we store the latest numbers
        as their actual number, to get it to a 0.0-1.0 scale, we subtract one from
        red and yellow (can't roll a 1) and subtract 2 from green and blue (can't
        roll a 1 and not counting the lock)
        """
        return [
            float(self.progress.counts.red),
            float(self.progress.counts.yellow),
            float(self.progress.counts.green),
            float(self.progress.counts.blue),
            float((self.progress.latest_num.red - 1) / 11.0),
            float((self.progress.latest_num.yellow - 1) / 11.0),
            1 - float((self.progress.latest_num.green - 2) / 11.0),
            1 - float((self.progress.latest_num.blue - 2) / 11.0),
            float(self.progress.strikes / 4.0),
        ]

    def _serialize_dice(self):
        return [
            *[1. if x == self.dice.white1 - 1 else 0.0 for x in range(6)],
            *[1. if x == self.dice.white2 - 1 else 0.0 for x in range(6)],
            *[1. if x == self.dice.red - 1 else 0.0 for x in range(6)],
            *[1. if x == self.dice.yellow - 1 else 0.0 for x in range(6)],
            *[1. if x == self.dice.green - 1 else 0.0 for x in range(6)],
            *[1. if x == self.dice.blue - 1 else 0.0 for x in range(6)],
        ]
예제 #8
0
 def test_bundle_and_unbundle_trivial(self):
     action_space = spaces.MultiDiscrete(np.ones((1, )))
     agent = random_agent.RandomAgent(action_space, random_seed=0)
     self.assertFalse(agent.unbundle('', 0, {}))
     self.assertEqual({'episode_num': 0},
                      agent.bundle_and_checkpoint('', 0))
예제 #9
0
    def __init__(self,
                 fractional_power_levels=[0.25, 0.0],
                 eavesdropping=True,
                 num_agents=40,
                 initialization="Random",
                 aoi_reward=True,
                 episode_length=500.0,
                 comm_model="tw",
                 min_sinr=1.0,
                 last_comms=True):
        super(MultiAgentEnv, self).__init__()

        # Problem parameters
        self.last_comms = last_comms
        self.n_agents = num_agents
        self.n_nodes = self.n_agents * self.n_agents

        self.r_max = 500.0
        self.n_features = N_NODE_FEAT  # (TransTime, Parent Agent, PosX, PosY, VelX, VelY)
        self.n_edges = self.n_agents * self.n_agents

        self.carrier_frequency_ghz = 2.4
        self.min_SINR_dbm = min_sinr  # 10-15 is consider unreliable, cited paper uses -4
        self.gaussian_noise_dBm = -50

        self.gaussian_noise_mW = 10**(self.gaussian_noise_dBm / 10)
        self.path_loss_exponent = 2
        self.aoi_reward = aoi_reward
        self.distance_scale = self.r_max * 2

        self.fraction_of_rmax = fractional_power_levels
        self.power_levels = self.find_power_levels()

        self.r_max *= np.sqrt(self.n_agents / 40)

        # initialize state matrices
        self.edge_features = np.zeros((self.n_nodes, 1))
        self.episode_length = episode_length
        self.penalty = 0.0
        self.x = np.zeros((self.n_agents, self.n_features))
        self.network_buffer = np.zeros(
            (self.n_agents, self.n_agents, self.n_features))
        self.old_buffer = np.zeros(
            (self.n_agents, self.n_agents, self.n_features))
        self.relative_buffer = np.zeros(
            (self.n_agents, self.n_agents, self.n_features))
        self.diag = np.eye(self.n_agents,
                           dtype=np.bool).reshape(self.n_agents, self.n_agents,
                                                  1)

        # each agent has their own action space of a n_agent vector of weights
        self.action_space = spaces.MultiDiscrete(
            [self.n_agents * len(self.power_levels)] * self.n_agents)

        self.observation_space = spaces.Dict([
            # (nxn) by (features-1) we maintain parent references by edges
            ("nodes",
             spaces.Box(shape=(self.n_agents * self.n_agents, N_NODE_FEAT),
                        low=-np.Inf,
                        high=np.Inf,
                        dtype=np.float32)),
            # upperbound, n fully connected trees (n-1) edges
            # To-Do ensure these bounds don't affect anything
            ("edges",
             spaces.Box(shape=(self.n_edges, N_EDGE_FEAT),
                        low=-np.Inf,
                        high=np.Inf,
                        dtype=np.float32)),
            # senders and receivers will each be one endpoint of an edge, and thus should be same size as edges
            ("senders",
             spaces.Box(shape=(self.n_edges, 1),
                        low=0,
                        high=self.n_agents,
                        dtype=np.float32)),
            ("receivers",
             spaces.Box(shape=(self.n_edges, 1),
                        low=0,
                        high=self.n_agents,
                        dtype=np.float32)),
            ("globals",
             spaces.Box(shape=(1, 1),
                        low=0,
                        high=self.episode_length,
                        dtype=np.float32)),
        ])

        # Plotting placeholders
        self.fig = None
        self.agent_markers = None
        self.np_random = None
        self.ax = None
        self.agent0_marker = None
        self._plot_text = None
        self.arrows = None
        self.current_arrow = None

        self.diff = None
        self.r2 = None

        self.timestep = 0
        self.avg_transmit_distance = 0

        self.symmetric_comms = True
        self.is_interference = True
        self.mst_action = None

        self.network_connected = False
        self.recompute_solution = False
        self.mobile_agents = False

        self.flocking = False
        self.biased_velocities = False

        self.known_initial_positions = False

        self.tx_power = None
        self.eavesdroppers = None
        self.eavesdroppers_response = None
        self.attempted_transmissions = None
        self.successful_transmissions = None
        self.eavesdropping = eavesdropping
        self.initial_formation = initialization
        # 'push' : At each time step, agent selects which agent they want to 'push' their buffer to
        # 'tw'': An agent requests/pushes their buffer to an agent, with hopes of getting their information back

        self.comm_model = comm_model

        if self.flocking:
            self.render_radius = 2 * self.r_max
        else:
            self.render_radius = self.r_max

        # Packing and unpacking information
        self.keys = ['nodes', 'edges', 'senders', 'receivers', 'globals']
        self.save_plots = False
        self.seed()
예제 #10
0
 def _get_action_space(self):
     screen_shape = self.observation_spec["screen"][1:]
     return spaces.MultiDiscrete([(0, 2)] + [(0, s - 1)
                                             for s in screen_shape])
예제 #11
0
    def __init__(self, env_path, worker_id = 1, no_graphis = False, realtime_mode = False, config = None):
        """Instantiates the Unity Environment from a specified executable.
        
        Arguments:
            env_path {string} -- Path to the executable of the environment
        
        Keyword Arguments:
            worker_id {int} -- Port of the environment"s instance (default: {1})
            no_graphis {bool} -- Whether to allow the executable to render or not (default: {False})
            realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False})
            config {dict} -- Specifies the reset parameters of the environment (default: {None})
        """
        # Disable logging
        logging.disable(logging.INFO)

        # Initialize channels
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        self._config = config
        self._realtime_mode = realtime_mode
        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0, width=128, height=128)

        # Launch the environment's executable
        self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])
        # Reset the environment
        self._env.reset()
        # Retrieve behavior configuration
        self._behavior_name = list(self._env.behavior_specs)[0]
        self._behavior_spec = self._env.behavior_specs[self._behavior_name]

        # Set action space properties
        if len(self._behavior_spec.action_shape) == 1:
            self._action_space = spaces.Discrete(self._behavior_spec.action_shape[0])
        else:
            self._action_space = spaces.MultiDiscrete(self._behavior_spec.action_shape)
        self._action_names = ["Not available"]
        
        # Count visual and vector observations
        self._num_vis_obs, self._num_vec_obs = 0, 0
        self._vec_obs_indices = []
        for index, obs in enumerate(self._behavior_spec.observation_shapes):
            if len(obs) > 1:
                self._num_vis_obs = self._num_vis_obs + 1
                self._vis_obs_index = index
            else:
                self._num_vec_obs = self._num_vec_obs + 1
                self._vec_obs_indices.append(index)

        # Verify the environment
        self._verify_environment()

        # Set visual observation space property
        if self._num_vis_obs == 1:
            height = self._behavior_spec.observation_shapes[self._vis_obs_index][0]
            width = self._behavior_spec.observation_shapes[self._vis_obs_index][1]
            depth = self._behavior_spec.observation_shapes[self._vis_obs_index][2]
            self._visual_observation_space = spaces.Box(
                low = 0,
                high = 1.0,
                shape = (height, width, depth),
                dtype = np.float32)
        else:
            self._visual_observation_space = None

        # Set vector observation space property
        if self._num_vec_obs > 0:
            # Determine the length of vec obs by summing the length of each distinct one
            vec_obs_length = sum([self._behavior_spec.observation_shapes[i][0] for i in self._vec_obs_indices])
            self._vector_observatoin_space = (vec_obs_length, )
        else:
            self._vector_observatoin_space = None
예제 #12
0
def test_check_iterate_and_step(dataset: str, expected_obs_shape: Tuple[int,
                                                                        ...],
                                batch_size: int):
    setting = IncrementalRLSetting(dataset=dataset, nb_tasks=5)
    assert len(setting.train_task_schedule) == 5
    assert not setting.smooth_task_boundaries
    assert setting.task_labels_at_train_time

    # TODO: Should we have the task label space in this case?
    assert setting.task_labels_at_train_time
    assert not setting.task_labels_at_test_time

    if batch_size is None:
        expected_obs_batch_shape = expected_obs_shape
    else:
        expected_obs_batch_shape = (batch_size, *expected_obs_shape)

    with setting.train_dataloader(batch_size=batch_size) as temp_env:
        obs_space = temp_env.observation_space
        assert obs_space[0] == spaces.Box(0.0,
                                          1.0,
                                          expected_obs_batch_shape,
                                          dtype=np.float32)
        assert (obs_space[1] == spaces.MultiDiscrete([5] * batch_size)
                if batch_size else spaces.Discrete(5))

    with setting.val_dataloader(batch_size=batch_size) as temp_env:
        # No task labels:
        obs_space = temp_env.observation_space

        assert obs_space[0] == spaces.Box(0.0,
                                          1.0,
                                          expected_obs_batch_shape,
                                          dtype=np.float32)
        if batch_size:
            assert str(obs_space[1]) == str(
                spaces.MultiDiscrete([5] * batch_size))
            # assert str(obs_space[1]) == str(spaces.Tuple([Sparse(spaces.Discrete(5), sparsity=1.) for _ in range(batch_size)]))
        else:
            # TODO: Should the task labels be given in the valid dataloader if they arent' during testing?
            assert obs_space[1] == spaces.Discrete(5)
            # assert obs_space[1] == Sparse(spaces.Discrete(5), sparsity=1.)

    # NOTE: Limitting the batch size at test time to None (i.e. a single env)
    # because of how the Monitor class works atm.

    with setting.test_dataloader(batch_size=None) as temp_env:
        obs_space = temp_env.observation_space
        assert obs_space[1] == Sparse(spaces.Discrete(5), sparsity=1.0)
        # No task labels:
        # if batch_size:
        #     assert str(obs_space[1]) == str(spaces.Tuple([Sparse(spaces.Discrete(5), sparsity=1.) for _ in range(batch_size)]))

    def check_obs(obs, task_label: int = None):
        if batch_size is None:
            assert obs[1] == task_label
        else:
            assert isinstance(obs,
                              IncrementalRLSetting.Observations), obs[0].shape
            assert obs.task_labels is task_label or all(
                task_label == task_label for task_label in obs.task_labels)

    env = setting.train_dataloader(batch_size=batch_size)
    reset_obs = env.reset()
    check_obs(reset_obs, task_label=0)

    for i in range(5):
        step_obs, *_ = env.step(env.action_space.sample())
        check_obs(step_obs, task_label=0)

    for iter_obs in take(env, 3):
        check_obs(iter_obs, task_label=0)
        reward = env.send(env.action_space.sample())
        env.render("human")

    env.close()
예제 #13
0
    def __init__(self, params):

        environment_filename = params['path']
        worker_id = params['worker_id']
        seed = params['seed']
        use_visual = params['visual_mode']
        multiagent = params['multiagent_mode']

        self._env = UnityEnvironment(environment_filename, seed=seed)
        self.name = self._env.academy_name
        self.visual_obs = None
        self._action_space_size = None
        self._current_state = None
        self._n_agents = None
        self._multiagent = multiagent

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if use_visual and brain.number_visual_observations == 0:
            raise UnityGymException(
                "`use_visual` was set to True, however there are no"
                " visual observations as part of this environment.")
        self.use_visual = brain.number_visual_observations >= 1 and use_visual

        if brain.number_visual_observations > 1:
            logger.warning(
                "The environment contains more than one visual observation. "
                "Please note that only the first will be provided in the observation."
            )

        if brain.num_stacked_vector_observations != 1:
            raise UnityGymException(
                "There can only be one stacked vector observation in a UnityEnvironment "
                "if it is wrapped in a gym.")

        # Check for number of agents in scene.
        initial_info = self._env.reset()[self.brain_name]
        self._check_agents(len(initial_info.agents))

        # Set observation and action spaces
        if brain.vector_action_space_type == "discrete":
            if len(brain.vector_action_space_size) == 1:
                self._action_space = spaces.Discrete(
                    brain.vector_action_space_size[0])
            else:
                self._action_space = spaces.MultiDiscrete(
                    brain.vector_action_space_size)
        else:
            self._action_space_size = brain.vector_action_space_size
            high = np.array([1] * brain.vector_action_space_size)
            self._action_space = spaces.Box(-high, high, dtype=np.float32)

        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions
        if self.use_visual:
            if brain.camera_resolutions[0]["blackAndWhite"]:
                depth = 1
            else:
                depth = 3
            self._observation_space = spaces.Box(
                0,
                1,
                dtype=np.float32,
                shape=(brain.camera_resolutions[0]["height"],
                       brain.camera_resolutions[0]["width"], depth))
        else:
            self._observation_space = spaces.Box(-high, high, dtype=np.float32)
예제 #14
0
 def action_space(self):
     return spaces.MultiDiscrete([self.num_actions] * self.num_players)
예제 #15
0
    def __init__(
        self,
        unity_env: BaseEnv,
        uint8_visual: bool = False,
        flatten_branched: bool = False,
        allow_multiple_obs: bool = False,
    ):
        """
        Environment initialization
        :param unity_env: The Unity BaseEnv to be wrapped in the gym. Will be closed when the UnityToGymWrapper closes.
        :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
        :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
            MultiDiscrete.
        :param allow_multiple_obs: If True, return a list of np.ndarrays as observations with the first elements
            containing the visual observations and the last element containing the array of vector observations.
            If False, returns a single np.ndarray containing either only a single visual observation or the array of
            vector observations.
        """
        self._env = unity_env

        # Take a single step so that the brain information will be sent over
        if not self._env.behavior_specs:
            self._env.step()

        self.visual_obs = None

        # Save the step result from the last time all Agents requested decisions.
        self._previous_decision_step: DecisionSteps = None
        self._flattener = None
        # Hidden flag used by Atari environments to determine if the game is over
        self.game_over = False
        self._allow_multiple_obs = allow_multiple_obs

        # Check brain configuration
        if len(self._env.behavior_specs) != 1:
            raise UnityGymException(
                "There can only be one behavior in a UnityEnvironment "
                "if it is wrapped in a gym.")

        self.name = list(self._env.behavior_specs.keys())[0]
        self.group_spec = self._env.behavior_specs[self.name]

        if self._get_n_vis_obs() == 0 and self._get_vec_obs_size() == 0:
            raise UnityGymException(
                "There are no observations provided by the environment.")

        if not self._get_n_vis_obs() >= 1 and uint8_visual:
            logger.warning(
                "uint8_visual was set to true, but visual observations are not in use. "
                "This setting will not have any effect.")
        else:
            self.uint8_visual = uint8_visual
        if (self._get_n_vis_obs() + self._get_vec_obs_size() >= 2
                and not self._allow_multiple_obs):
            logger.warning(
                "The environment contains multiple observations. "
                "You must define allow_multiple_obs=True to receive them all. "
                "Otherwise, only the first visual observation (or vector observation if"
                "there are no visual observations) will be provided in the observation."
            )

        # Check for number of agents in scene.
        self._env.reset()
        decision_steps, _ = self._env.get_steps(self.name)
        self._check_agents(len(decision_steps))
        self._previous_decision_step = decision_steps

        # Set action spaces
        if self.group_spec.action_spec.is_discrete():
            self.action_size = self.group_spec.action_spec.discrete_size
            branches = self.group_spec.action_spec.discrete_branches
            if self.group_spec.action_spec.discrete_size == 1:
                self._action_space = spaces.Discrete(branches[0])
            else:
                if flatten_branched:
                    self._flattener = ActionFlattener(branches)
                    self._action_space = self._flattener.action_space
                else:
                    self._action_space = spaces.MultiDiscrete(branches)

        elif self.group_spec.action_spec.is_continuous():
            if flatten_branched:
                logger.warning(
                    "The environment has a non-discrete action space. It will "
                    "not be flattened.")

            self.action_size = self.group_spec.action_spec.continuous_size
            high = np.array([1] * self.group_spec.action_spec.continuous_size)
            self._action_space = spaces.Box(-high, high, dtype=np.float32)
        else:
            raise UnityGymException(
                "The gym wrapper does not provide explicit support for both discrete "
                "and continuous actions.")

        # Set observations space
        list_spaces: List[gym.Space] = []
        shapes = self._get_vis_obs_shape()
        for shape in shapes:
            if uint8_visual:
                list_spaces.append(
                    spaces.Box(0, 255, dtype=np.uint8, shape=shape))
            else:
                list_spaces.append(
                    spaces.Box(0, 1, dtype=np.float32, shape=shape))
        if self._get_vec_obs_size() > 0:
            # vector observation is last
            high = np.array([np.inf] * self._get_vec_obs_size())
            list_spaces.append(spaces.Box(-high, high, dtype=np.float32))
        if self._allow_multiple_obs:
            self._observation_space = spaces.Tuple(list_spaces)
        else:
            self._observation_space = list_spaces[
                0]  # only return the first one
예제 #16
0
    def __init__(self,
                 environment_filename=None,
                 docker_training=False,
                 worker_id=0,
                 retro=True):
        """
        Arguments:
          environment_filename: The file path to the Unity executable.  Does not require the extension.
          docker_training: Whether this is running within a docker environment and should use a virtual 
            frame buffer (xvfb).
          worker_id: The index of the worker in the case where multiple environments are running.  Each 
            environment reserves port (5005 + worker_id) for communication with the Unity executable.
          retro: Resize visual observation to 84x84 (int8) and flattens action space.
        """
        if self.is_grading():
            environment_filename = None

        self._env = UnityEnvironment(environment_filename,
                                     worker_id,
                                     docker_training=docker_training)

        split_name = self._env.academy_name.split('-v')
        if len(split_name) == 2 and split_name[0] == "ObstacleTower":
            self.name, self.version = split_name
        else:
            raise UnityGymException(
                "Attempting to launch non-Obstacle Tower environment")

        if self.version not in self.ALLOWED_VERSIONS:
            raise UnityGymException(
                "Invalid Obstacle Tower version.  Your build is v" + self.version + \
                " but only the following versions are compatible with this gym: " + \
                str(self.ALLOWED_VERSIONS)
            )

        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._done_grading = False
        self._flattener = None
        self._seed = None
        self._floor = None
        self.game_over = False  # Hidden flag used by Atari environments to determine if the game is over
        self.retro = retro

        flatten_branched = self.retro
        uint8_visual = self.retro

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if brain.number_visual_observations == 0:
            raise UnityGymException(
                "Environment provides no visual observations.")

        self.uint8_visual = uint8_visual

        if brain.number_visual_observations > 1:
            logger.warning(
                "The environment contains more than one visual observation. "
                "Please note that only the first will be provided in the observation."
            )

        # Check for number of agents in scene.
        initial_info = self._env.reset()[self.brain_name]
        self._check_agents(len(initial_info.agents))

        # Set observation and action spaces
        if len(brain.vector_action_space_size) == 1:
            self._action_space = spaces.Discrete(
                brain.vector_action_space_size[0])
        else:
            if flatten_branched:
                self._flattener = ActionFlattener(
                    brain.vector_action_space_size)
                self._action_space = self._flattener.action_space
            else:
                self._action_space = spaces.MultiDiscrete(
                    brain.vector_action_space_size)

        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions

        depth = 3
        image_space_max = 1.0
        image_space_dtype = np.float32
        camera_height = brain.camera_resolutions[0]["height"]
        camera_width = brain.camera_resolutions[0]["width"]
        if self.retro:
            image_space_max = 255
            image_space_dtype = np.uint8
            camera_height = 84
            camera_width = 84

        image_space = spaces.Box(0,
                                 image_space_max,
                                 dtype=image_space_dtype,
                                 shape=(camera_height, camera_width, depth))
        if self.retro:
            self._observation_space = image_space
        else:
            max_float = np.finfo(np.float32).max
            keys_space = spaces.Discrete(5)
            time_remaining_space = spaces.Box(low=0.0,
                                              high=max_float,
                                              shape=(1, ),
                                              dtype=np.float32)
            self._observation_space = spaces.Tuple(
                (image_space, keys_space, time_remaining_space))
예제 #17
0
    def __init__(self, debug=False, number_of_player=2, goal_end=False):

        self.debug = debug
        self.number_of_player = number_of_player
        self.goal_end = goal_end

        # action space
        # 1) Arrow Keys: Discrete 5  - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4]  - params: min: 0, max: 4
        # 2) Action Keys: Discrete 5  - noop[0], dash[1], shoot[2], press[3], pass[4] - params: min: 0, max: 4
        self.action_space = spaces.MultiDiscrete([5, 5] *
                                                 self.number_of_player)

        # observation space (normalized)
        # [0] x position
        # [1] y position
        # [2] x velocity
        # [3] y velocity
        self.obs_low = np.array(
            [-3, 0, -self.BALL_MAX_VELOCITY, -self.BALL_MAX_VELOCITY] +
            [-3, 0, -self.PLAYER_MAX_VELOCITY, -self.PLAYER_MAX_VELOCITY] *
            (self.number_of_player * 2),
            dtype=np.float32)
        self.obs_high = np.array([
            self.WIDTH + 3, self.HEIGHT, self.BALL_MAX_VELOCITY,
            self.BALL_MAX_VELOCITY
        ] + [
            self.WIDTH + 3, self.HEIGHT, self.PLAYER_MAX_VELOCITY,
            self.PLAYER_MAX_VELOCITY
        ] * (self.number_of_player * 2),
                                 dtype=np.float32)

        self.observation_space = spaces.Box(low=self.obs_low,
                                            high=self.obs_high,
                                            dtype=np.float32)

        # create space
        self.space = pymunk.Space()
        self.space.gravity = 0, 0

        # Amount of simple damping to apply to the space.
        # A value of 0.9 means that each body will lose 10% of its velocity per second.
        self.space.damping = 0.95

        # create walls
        self.space, self.static, self.static_goal = setup_walls(
            self.space, self.WIDTH, self.HEIGHT, self.GOAL_SIZE)

        # Teams
        self.team_left = Team(
            self.space,
            self.WIDTH,
            self.HEIGHT,
            player_weight=self.PLAYER_WEIGHT,
            player_max_velocity=self.PLAYER_MAX_VELOCITY,
            color=(1, 0, 0, 1),  # red
            side=Side.left,
            player_number=self.number_of_player)

        self.team_right = Team(
            self.space,
            self.WIDTH,
            self.HEIGHT,
            player_weight=self.PLAYER_WEIGHT,
            player_max_velocity=self.PLAYER_MAX_VELOCITY,
            color=(0, 0, 1, 1),  # blue
            side=Side.right,
            player_number=self.number_of_player)

        # Agents
        self.team_left_agent = BaseAgent(self, Side.left)
        self.team_right_agent = BaseAgent(self, Side.right)

        self.player_arr = self.team_left.player_array + self.team_right.player_array

        # Ball
        self.ball = Ball(self.space,
                         self.WIDTH * 0.5,
                         self.HEIGHT * 0.5,
                         mass=self.BALL_WEIGHT,
                         max_velocity=self.BALL_MAX_VELOCITY,
                         elasticity=0.2)

        self.has_ball_player = None

        self.current_time = 0
        self.observation = self.reset()
        self.reward_class = Reward(self)
예제 #18
0
    def __init__(self, players, human_mode=False):
        '''
            Parameters
            ----------
            players : List
                List containing player instances

            Returns
            -------
            None.
            Initializes the Skyjo environment

            '''
        self.players = players  # List of players in the game

        # not_done is a Boolean, as a player's turn can be composed of 2 actions,
        # It is True if the player has not done all his action, False otherwise
        self.not_done = False
        self.drew = False
        self.num_players = len(players)  # The number of players
        self.action_space = spaces.MultiDiscrete([2, 13])
        self.take = 0  # Int representing a global of an action
        self.draw = 1  # Int representing a global of an action
        self.throw = 2  # Int representing a global of an action
        self.defausse = []  # The discard pile, list containing discarded cards
        self.deck_card = Card(5)  # First deck_card
        self.reward = 0  # The reward
        self.state = 0
        self.unfinished = True
        self.cards_thrown = []
        self.human_mode = human_mode
        self.columns_made = []
        self.reward2 = 0

        # Deck card initialized as the real game
        de = [-2] * 5 + [-1] * 10 + [0] * 15 + [
            i for i in range(1, 13) for j in range(10)
        ]
        self.deck = []  # The deck, list of cards composing the deck
        for u in de:
            self.deck.append(Card(u))
        self.deck_copy = self.deck.copy()

        self.setup()  # Call set up, initialize the env
        L = [-2, 0, -2] + [-2] * 12
        H = [12, 100, 12] + [12] * 12
        self.observation_space = spaces.Box(low=np.array(L), high=np.array(H))
        # The observation space, created respecting GYM env
        if self.num_players == 1:
            L = [-2, 0, -2] + [-2] * 12
            H = [12, 100, 12] + [12] * 12
            self.observation_space = spaces.Box(low=np.array(L),
                                                high=np.array(H))
            board_int, board_bool = self.players[0].get_board_as_int(
                self.mean_value_deck())
            self.observation = np.concatenate((np.array(
                [self.defausse[-1].value, self.state,
                 self.deck_card.value]), board_int))
            #self.observation = np.concatenate((self.observation,board_bool))
        elif self.num_players == 2:
            #L = [-2,0,-2]+[-2]*12+[0]*12+[-2]*12+[0]*12   # boolean boards  and board of the opponent
            #H = [12,100,12]+[12]*12+[1]*12+[12]*12+[1]*12
            #L = [-2,0,-2]+[-2]*12+[-20,0]                 # only score of the opponent
            #H = [12,100,12]+[12]*12+[150,10]
            L = [
                -2, 0, -2
            ] + [-2] * 12 + [-2] * 12  # board of the opponent without booleans
            H = [12, 100, 12] + [12] * 12 + [12] * 12

            self.observation_space = spaces.Box(low=np.array(L),
                                                high=np.array(H))
            board_int, board_bool = self.players[0].get_board_as_int(
                self.mean_value_deck())
            self.observation = np.concatenate((np.array(
                [self.defausse[-1].value, self.state,
                 self.deck_card.value]), board_int))
            #self.observation = np.concatenate((self.observation,board_bool))
            board_int, board_bool = self.players[1].get_board_as_int(
                self.mean_value_deck())
            self.observation = np.concatenate((self.observation, board_int))
예제 #19
0
    def __init__(
        self,
        environment_filename: str,
        worker_id: int = 0,
        use_visual: bool = False,
        uint8_visual: bool = False,
        multiagent: bool = False,
        flatten_branched: bool = False,
        no_graphics: bool = False,
        allow_multiple_visual_obs: bool = False,
    ):
        """
        Environment initialization
        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
        :param worker_id: Worker number for environment.
        :param use_visual: Whether to use visual observation or vector observation.
        :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
        :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
        :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
            MultiDiscrete.
        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
        :param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
        """
        self._env = UnityEnvironment(
            environment_filename, worker_id, no_graphics=no_graphics
        )

        # Take a single step so that the brain information will be sent over
        if not self._env.get_agent_groups():
            self._env.step()

        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._multiagent = multiagent
        self._flattener = None
        # Hidden flag used by Atari environments to determine if the game is over
        self.game_over = False
        self._allow_multiple_visual_obs = allow_multiple_visual_obs

        # Check brain configuration
        if len(self._env.get_agent_groups()) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym."
            )

        self.brain_name = self._env.get_agent_groups()[0]
        self.name = self.brain_name
        self.group_spec = self._env.get_agent_group_spec(self.brain_name)

        if use_visual and self._get_n_vis_obs() == 0:
            raise UnityGymException(
                "`use_visual` was set to True, however there are no"
                " visual observations as part of this environment."
            )
        self.use_visual = self._get_n_vis_obs() >= 1 and use_visual

        if not use_visual and uint8_visual:
            logger.warning(
                "`uint8_visual was set to true, but visual observations are not in use. "
                "This setting will not have any effect."
            )
        else:
            self.uint8_visual = uint8_visual

        if self._get_n_vis_obs() > 1 and not self._allow_multiple_visual_obs:
            logger.warning(
                "The environment contains more than one visual observation. "
                "You must define allow_multiple_visual_obs=True to received them all. "
                "Otherwise, please note that only the first will be provided in the observation."
            )

        # Check for number of agents in scene.
        self._env.reset()
        step_result = self._env.get_step_result(self.brain_name)
        self._check_agents(step_result.n_agents())

        # Set observation and action spaces
        if self.group_spec.is_action_discrete():
            branches = self.group_spec.discrete_action_branches
            if self.group_spec.action_shape == 1:
                self._action_space = spaces.Discrete(branches[0])
            else:
                if flatten_branched:
                    self._flattener = ActionFlattener(branches)
                    self._action_space = self._flattener.action_space
                else:
                    self._action_space = spaces.MultiDiscrete(branches)

        else:
            if flatten_branched:
                logger.warning(
                    "The environment has a non-discrete action space. It will "
                    "not be flattened."
                )
            high = np.array([1] * self.group_spec.action_shape)
            self._action_space = spaces.Box(-high, high, dtype=np.float32)
        high = np.array([np.inf] * self._get_vec_obs_size())
        if self.use_visual:
            shape = self._get_vis_obs_shape()
            if uint8_visual:
                self._observation_space = spaces.Box(
                    0, 255, dtype=np.uint8, shape=shape
                )
            else:
                self._observation_space = spaces.Box(
                    0, 1, dtype=np.float32, shape=shape
                )

        else:
            self._observation_space = spaces.Box(-high, high, dtype=np.float32)
    def __init__(self,
                 sumo_cmd,
                 vehicle_generator_config,
                 junctions,
                 traffic_movements,
                 traffic_lights_phases,
                 light_duration,
                 clusters,
                 max_steps=1500,
                 env_name=None):
        super().__init__(sumo_cmd,
                         vehicle_generator_config,
                         max_steps,
                         env_name=env_name)

        if not clusters:
            clusters = {}

        self.junctions = junctions
        self.cluster_map = clusters
        self.traffic_lights_phases = traffic_lights_phases

        self.observation_space = spaces.Space(shape=(len(junctions),
                                                     traffic_movements + 1))
        self.action_space = spaces.MultiDiscrete([traffic_lights_phases] *
                                                 len(junctions))

        self.light_duration = light_duration

        self.previous_actions = {}
        self.clustered_juncions = {}
        for junction in self.junctions:
            cluster = self.cluster_map.get(junction)
            if cluster:
                for jun, _ in cluster["tls_to_phases"].items():
                    self.clustered_juncions[jun] = junction
                    self.previous_actions[jun] = (0, 1, 2, 3)
            else:
                self.previous_actions[junction] = (0, 1, 2, 3)

        self.traveling_cars = {}

        self.travel_time = 0
        self.throughput = 0

        self.green_dur = self.light_duration
        self.connection.trafficlight.setPhase(self.junctions[0], 1)
        self.yellow_dur = self.connection.trafficlight.getPhaseDuration(
            self.junctions[0])
        self.connection.trafficlight.setPhase(self.junctions[0], 2)
        self.red_dur = self.connection.trafficlight.getPhaseDuration(
            self.junctions[0])
        self.connection.trafficlight.setPhase(self.junctions[0], 0)

        self.curr_phases = [-1] * len(junctions)
        self.prev_phases = [-1] * len(junctions)

        self.events = []
        self.ret_state = [True] * len(junctions)

        self.restarted = True
예제 #21
0
for _ in range(2):
    print(box.sample())

### 3.multi binary
print("==================")
mb = spaces.MultiBinary(5)
print(mb)
print(mb.shape)
mb.seed(4)
for _ in range(2):
    print(mb.sample())

### 4.multi discrete
# 取值是多个{0, 1, ..., n - 1}
print("==================")
md = spaces.MultiDiscrete([3, 4])  # 指定每个discrete的取值范围
print(md)
print(md.shape)
md.seed(4)
for _ in range(10):
    print(md.sample())

### 5.
# self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
tup = spaces.Tuple([spaces.Discrete(3), spaces.MultiDiscrete([3.0, 2])])
print(tup)
print(tup.shape)
tup.seed(4)
for _ in range(3):
    print(tup.sample())
예제 #22
0
    def __init__(self, n_seats, max_limit=20000, debug=False):
        n_suits = 4  # s,h,d,c
        n_ranks = 13  # 2,3,4,5,6,7,8,9,T,J,Q,K,A
        n_community_cards = 5  # flop, turn, river
        n_pocket_cards = 2
        n_stud = 5

        self.n_seats = n_seats
        self._deck = Deck()
        self._evaluator = Evaluator()
        self._debug = debug

        self.init()

        # fill seats with dummy players
        self._seats = [
            Player(i, stack=0, emptyplayer=True) for i in range(n_seats)
        ]
        self.emptyseats = n_seats
        self._player_dict = {}

        self.observation_space = spaces.Tuple([
            spaces.Tuple([  # # **players info**
                spaces.MultiDiscrete([
                    1,  #  (boolean) is_emptyplayer
                    n_seats - 1,  #  (numbers) number of seat
                    max_limit,  #  (numbers) stack
                    1,  #  (boolean) is_playing_hand
                    max_limit,  #  (numbers) handrank, need_error_msg?  (0 could be no rank, no error_msg needed
                    1,  #  (boolean) is_playedthisround
                    max_limit,  #  (numbers) is_betting
                    1,  #  (boolean) isallin
                    max_limit,  #  (numbers) last side pot
                ]),
                spaces.Tuple([
                    spaces.MultiDiscrete([  # # **players hand**
                        1,  # (boolean) is_avalible
                        n_suits,  #  (catagory) suit,
                        n_ranks,  #  (catagory) rank.
                    ])
                ] * n_pocket_cards)
            ] * n_seats),
            spaces.Tuple([
                spaces.Discrete(n_seats - 1),  # big blind location
                spaces.Discrete(max_limit),  # small blind
                spaces.Discrete(max_limit),  # big blind
                spaces.Discrete(max_limit * n_seats),  # pot amount
                spaces.Discrete(max_limit),  # last raise
                spaces.Discrete(max_limit),  # minimum amount to raise
                spaces.Discrete(
                    max_limit),  # how much needed to call by current player.
                spaces.Discrete(n_seats - 1),  # current player seat location.
                spaces.MultiDiscrete([  # community cards
                    1,  # (boolean) is_avalible
                    n_suits,  # (catagory) suit
                    n_ranks,  # (catagory) rank
                    1,  # (boolean) is_flopped
                ]),
            ] * n_stud),
        ])

        self.action_space = spaces.Tuple([
            spaces.MultiDiscrete([
                3,  # action_id
                max_limit,  # raise_amount
            ]),
        ] * n_seats)

        self.logger = logging.getLogger('TexasHoldemEnv')
        #self.logger.setLevel(logging.DEBUG)
        ch = logging.StreamHandler()
        # create formatter
        formatter = logging.Formatter('%(asctime)s: %(message)s')
        # add formatter to ch
        ch.setFormatter(formatter)
        # add ch to logger
        self.logger.addHandler(ch)
예제 #23
0
#for getting the observation/action/reward

#local = 'remote'
forced = True

env_config = {}
env_config["observation_space"] = spaces.Tuple((
    spaces.Discrete(9),  # final position * (if not 0 means game is over!)
    spaces.Discrete(101),  # health *
    spaces.Discrete(100),  # gold
    spaces.Discrete(11),  # level *
    spaces.Discrete(99),  # remaining EXP to level up
    spaces.Discrete(50),  # round
    spaces.Discrete(2),  # locked in
    spaces.Discrete(6),  # gamePhase *
    spaces.MultiDiscrete([250, 3]),  # heroToMove: heroLocalID, isUnderlord
    spaces.Discrete(250),  # itemToMove: localID*,
    spaces.Discrete(3),  # reRoll cost
    spaces.Discrete(2),  # rerolled (item)
    spaces.Discrete(35),  # current round timer
    # below are the store heros
    spaces.MultiDiscrete([71, 71, 71, 71, 71]),
    # below are the bench heroes
    spaces.MultiDiscrete([71, 250, 4, 6, 14, 9, 9, 3]),
    spaces.MultiDiscrete([71, 250, 4, 6, 14, 9, 9, 3]),
    spaces.MultiDiscrete([71, 250, 4, 6, 14, 9, 9, 3]),
    spaces.MultiDiscrete([71, 250, 4, 6, 14, 9, 9, 3]),
    spaces.MultiDiscrete([71, 250, 4, 6, 14, 9, 9, 3]),
    spaces.MultiDiscrete([71, 250, 4, 6, 14, 9, 9, 3]),
    spaces.MultiDiscrete([71, 250, 4, 6, 14, 9, 9, 3]),
    spaces.MultiDiscrete([71, 250, 4, 6, 14, 9, 9, 3]),
    def __init__(self,
                 lights,
                 netfile,
                 routefile,
                 guifile,
                 addfile,
                 loops=[],
                 lanes=[],
                 exitloops=[],
                 tmpfile="tmp.rou.xml",
                 pngfile="tmp.png",
                 mode="gui",
                 detector="detector0",
                 simulation_end=3600,
                 sleep_between_restart=1):
        # "--end", str(simulation_end),
        self.simulation_end = simulation_end
        self.sleep_between_restart = sleep_between_restart
        self.mode = mode
        self._seed()
        self.loops = loops
        self.exitloops = exitloops
        self.loop_variables = [
            tc.LAST_STEP_MEAN_SPEED, tc.LAST_STEP_TIME_SINCE_DETECTION,
            tc.LAST_STEP_VEHICLE_NUMBER
        ]
        self.lanes = lanes
        self.detector = detector
        args = [
            "--net-file", netfile, "--route-files", tmpfile,
            "--additional-files", addfile
        ]
        if mode == "gui":
            binary = "sumo-gui"
            args += ["-S", "-Q", "--gui-settings-file", guifile]
        else:
            binary = "sumo"
            args += ["--no-step-log"]

        with open(routefile) as f:
            self.route = f.read()
        self.tmpfile = tmpfile
        self.pngfile = pngfile
        self.sumo_cmd = [binary] + args
        self.sumo_step = 0
        self.lights = lights
        self.action_space = spaces.DiscreteToMultiDiscrete(
            spaces.MultiDiscrete([[0, len(light.actions) - 1]
                                  for light in self.lights]), 'all')

        trafficspace = spaces.Box(low=float('-inf'),
                                  high=float('inf'),
                                  shape=(len(self.loops) *
                                         len(self.loop_variables), ))
        lightspaces = [
            spaces.Discrete(len(light.actions)) for light in self.lights
        ]
        self.observation_space = spaces.Tuple([trafficspace] + lightspaces)

        self.sumo_running = False
        self.viewer = None
예제 #25
0
def discretize(space, steps):
    """
    Creates a discretized version of `space` and returns
    a `Transform` that contains the conversion functions.
    If the space is already discrete, the identity
    is returned. The steps are distributed such that the old
    minimum and maximum value can still be reached in the new
    domain.
    :param gym.Space space: The space to be discretized.
    :param int|Iterable steps: The number of discrete steps to produce
                  for each continuous dimension. Can be an
                  Integer or a list.
    :raises ValueError: If less than two steps are are supplied.
    :return Transform: A `Transform` to the discretized space.
    """

    # there are two possible ways how we could handle already
    # discrete spaces.
    #  1) throw an error because (unless
    #     steps is configured to fit) we would try to convert
    #     an already discrete space to one with a different number
    #     of states.
    #  2) keep the space as is.
    # here, we implement the second. This allows scripts that
    # train a discrete agent to just apply discretize, only
    # changing envs that are not already discrete.

    if is_discrete(space):
        return Transform(space, space, _identity, _identity)

    # check that step number is valid and convert steps into a np array
    if not isinstance(steps, numbers.Integral):
        steps = np.array(steps, dtype=int)
        if (steps < 2).any():
            raise ValueError("Need at least two steps to discretize, got {}".format(steps))
    elif steps < 2:
        raise ValueError("Need at least two steps to discretize, got {}".format(steps))

    if isinstance(space, spaces.Box):
        if len(space.shape) == 1 and space.shape[0] == 1:
            discrete_space = spaces.Discrete(steps)
            lo = space.low[0]
            hi = space.high[0]

            convert = _LinearTransform(lo, (hi-lo) / (steps - 1.0))
            back = _LinearTransform(-lo * (steps-1) / (hi - lo), (steps - 1.0) / (hi-lo), int)
            return Transform(original=space, target=discrete_space, convert_from=convert, convert_to=back)
        else:
            if isinstance(steps, numbers.Integral):
                steps = np.full(space.low.shape, steps)
            if steps.shape != space.shape:
                raise ValueError("Supplied steps {} have invalid shape, expected {}".format(steps, steps.shape,
                                                                                            space.shape))

            discrete_space = spaces.MultiDiscrete(steps.flatten())
            lo = space.low.flatten()
            hi = space.high.flatten()

            convert = _LinearTransformArray(lo, (hi - lo) / (steps - 1.0), space.shape)
            back = _LinearTransformArray(-lo * (steps - 1) / (hi - lo), (steps - 1.0) / (hi - lo), (len(steps),), int)

            return Transform(original=space, target=discrete_space, convert_from=convert, convert_to=back)

    raise NotImplementedError("Unknown space {} of type {} supplied".format(space, type(space)))  # pragma: no cover
예제 #26
0
 def __init__(self, env):
     super().__init__(env)
     self.action_space = spaces.MultiDiscrete(
         np.array([space.n for space in env.action_space.spaces])
     )
예제 #27
0
 def create_multidiscrete_space(self):
     return spaces.MultiDiscrete([4, 4, 4])
예제 #28
0
    def __init__(
        self,
        environment_filename: str,
        worker_id: int = 0,
        use_visual: bool = False,
        uint8_visual: bool = False,
        multiagent: bool = False,
        flatten_branched: bool = False,
        no_graphics: bool = False,
        allow_multiple_visual_obs: bool = False,
    ):
        """
        Environment initialization
        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
        :param worker_id: Worker number for environment.
        :param use_visual: Whether to use visual observation or vector observation.
        :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
        :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
        :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
            MultiDiscrete.
        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
        :param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
        """
        self._env = UnityEnvironment(
            environment_filename, worker_id, no_graphics=no_graphics
        )
        self.name = self._env.academy_name
        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._multiagent = multiagent
        self._flattener = None
        self.game_over = (
            False
        )  # Hidden flag used by Atari environments to determine if the game is over
        self._allow_multiple_visual_obs = allow_multiple_visual_obs

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym."
            )
        if len(self._env.external_brain_names) <= 0:
            raise UnityGymException(
                "There are not any external brain in the UnityEnvironment"
            )

        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if use_visual and brain.number_visual_observations == 0:
            raise UnityGymException(
                "`use_visual` was set to True, however there are no"
                " visual observations as part of this environment."
            )
        self.use_visual = brain.number_visual_observations >= 1 and use_visual

        if not use_visual and uint8_visual:
            logger.warning(
                "`uint8_visual was set to true, but visual observations are not in use. "
                "This setting will not have any effect."
            )
        else:
            self.uint8_visual = uint8_visual

        if brain.number_visual_observations > 1 and not self._allow_multiple_visual_obs:
            logger.warning(
                "The environment contains more than one visual observation. "
                "You must define allow_multiple_visual_obs=True to received them all. "
                "Otherwise, please note that only the first will be provided in the observation."
            )

        if brain.num_stacked_vector_observations != 1:
            raise UnityGymException(
                "There can only be one stacked vector observation in a UnityEnvironment "
                "if it is wrapped in a gym."
            )

        # Check for number of agents in scene.
        initial_info = self._env.reset()[self.brain_name]
        self._check_agents(len(initial_info.agents))

        # Set observation and action spaces
        if brain.vector_action_space_type == "discrete":
            if len(brain.vector_action_space_size) == 1:
                self._action_space = spaces.Discrete(brain.vector_action_space_size[0])
            else:
                if flatten_branched:
                    self._flattener = ActionFlattener(brain.vector_action_space_size)
                    self._action_space = self._flattener.action_space
                else:
                    self._action_space = spaces.MultiDiscrete(
                        brain.vector_action_space_size
                    )

        else:
            if flatten_branched:
                logger.warning(
                    "The environment has a non-discrete action space. It will "
                    "not be flattened."
                )
            high = np.array([1] * brain.vector_action_space_size[0])
            self._action_space = spaces.Box(-high, high, dtype=np.float32)
        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions
        if self.use_visual:
            if brain.camera_resolutions[0]["blackAndWhite"]:
                depth = 1
            else:
                depth = 3
            self._observation_space = spaces.Box(
                0,
                1,
                dtype=np.float32,
                shape=(
                    brain.camera_resolutions[0]["height"],
                    brain.camera_resolutions[0]["width"],
                    depth,
                ),
            )
        else:
            self._observation_space = spaces.Box(-high, high, dtype=np.float32)
예제 #29
0
 def action_space(self):
     """Returns the shape of the action space of the agent."""
     if self._flat_action_space:
         return spaces.Discrete(10)
     else:
         return spaces.MultiDiscrete((2, 3, 2))
예제 #30
0
  def __init__(self, n_seats, max_limit=100000, debug=False):
    n_suits = 4                     # s,h,d,c
    n_ranks = 13                    # 2,3,4,5,6,7,8,9,T,J,Q,K,A
    n_community_cards = 5           # flop, turn, river
    n_pocket_cards = 2
    n_stud = 5

    self.n_seats = n_seats

    self._blind_index = 0
    [self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[0]
    self._deck = Deck()
    self._evaluator = Evaluator()

    self.community = []
    self._round = 0
    self._button = 0
    self._discard = []

    self._side_pots = [0] * n_seats
    self._current_sidepot = 0 # index of _side_pots
    self._totalpot = 0
    self._tocall = 0
    self._lastraise = 0
    self._number_of_hands = 0

    # fill seats with dummy players
    self._seats = [Player(i, stack=0, emptyplayer=True) for i in range(n_seats)]
    self.emptyseats = n_seats
    self._player_dict = {}
    self._current_player = None
    self._debug = debug
    self._last_player = None
    self._last_actions = None

    self.observation_space = spaces.Tuple([
      spaces.Tuple([                # players
        spaces.MultiDiscrete([
          1,                   # emptyplayer
          n_seats - 1,         # seat
          max_limit,           # stack
          1,                   # is_playing_hand
          max_limit,           # handrank
          1,                   # playedthisround
          1,                   # is_betting
          1,                   # isallin
          max_limit,           # last side pot
        ]),
        spaces.Tuple([
          spaces.MultiDiscrete([    # hand
            n_suits,          # suit, can be negative one if it's not avaiable.
            n_ranks,          # rank, can be negative one if it's not avaiable.
          ])
        ] * n_pocket_cards)
      ] * n_seats),
      spaces.Tuple([
        spaces.Discrete(n_seats - 1), # big blind location
        spaces.Discrete(max_limit),   # small blind
        spaces.Discrete(max_limit),   # big blind
        spaces.Discrete(max_limit),   # pot amount
        spaces.Discrete(max_limit),   # last raise
        spaces.Discrete(max_limit),   # minimum amount to raise
        spaces.Discrete(max_limit),   # how much needed to call by current player.
        spaces.Discrete(n_seats - 1), # current player seat location.
        spaces.MultiDiscrete([        # community cards
          n_suits - 1,          # suit
          n_ranks - 1,          # rank
          1,                     # is_flopped
        ]),
      ] * n_stud),
    ])

    self.action_space = spaces.Tuple([
      spaces.MultiDiscrete([
        3,                     # action_id
        max_limit,             # raise_amount
      ]),
    ] * n_seats)