def build_q_model_and_distribution_comp(policy, obs_space, action_space,
                                        config):
    # Keys of the observation space that must be used at train and test time
    policy.train_obs_keys = config["train_obs_keys"]
    policy.test_obs_keys = config["test_obs_keys"]

    # Check whether policy observation space is inside a Tuple space
    policy.requires_tupling = False
    if isinstance(action_space, Tuple) and len(action_space.spaces) == 1:
        policy.action_space = action_space.spaces[0]
        action_space = action_space.spaces[0]
        policy.requires_tupling = True
    if not isinstance(action_space, Discrete):
        raise UnsupportedSpaceException(
            "Action space {} is not supported for DQN.".format(action_space))

    # Get real observation space
    if isinstance(obs_space, Box):
        assert hasattr(obs_space,
                       "original_space"), "Invalid observation space"
        obs_space = obs_space.original_space
        if isinstance(obs_space, Tuple):
            obs_space = obs_space.spaces[0]
    assert isinstance(obs_space, Dict), "Invalid observation space"
    policy.has_action_mask = "action_mask" in obs_space.spaces
    assert all([k in obs_space.spaces for k in policy.train_obs_keys
                ]), "Invalid train keys specification"
    assert all([k in obs_space.spaces for k in policy.test_obs_keys
                ]), "Invalid test keys specification"

    # Get observation space used for training
    if config["train_obs_space"] is None:
        train_obs_space = obs_space
    else:
        train_obs_space = config["train_obs_space"]
        if isinstance(train_obs_space, Box):
            assert hasattr(train_obs_space,
                           "original_space"), "Invalid observation space"
            train_obs_space = train_obs_space.original_space
            if isinstance(train_obs_space, Tuple):
                train_obs_space = train_obs_space.spaces[0]

    # Obs spaces used for training and testing
    sp = Dict({k: obs_space.spaces[k] for k in policy.test_obs_keys})
    policy.real_test_obs_space = flatten_space(sp)
    policy.real_test_obs_space.original_space = sp

    sp = Dict({k: train_obs_space.spaces[k] for k in policy.train_obs_keys})
    policy.real_train_obs_space = flatten_space(sp)
    policy.real_train_obs_space.original_space = sp
    policy.n_actions = action_space.n

    model_space = Dict({
        k: obs_space.spaces[k]
        for k in policy.test_obs_keys if k != "action_mask" and k != "signal"
    })
    return build_q_models(policy, flatten_space(model_space), action_space, config), \
           TorchCategorical
Esempio n. 2
0
 def __init__(self, env: Env, flatten_obs=True, flatten_actions=True):
     super(Flatten, self).__init__(env)
     self._flatten_obs = flatten_obs
     self._flatten_actions = flatten_actions
     if flatten_obs:
         self.observation_space = spaces.flatten_space(
             env.observation_space)
     if flatten_actions:
         self.action_space = spaces.flatten_space(env.action_space)
         self.action_space = Box(low=-1.0,
                                 high=1.0,
                                 shape=self.action_space.shape)
Esempio n. 3
0
    def test_flatten(self):
        # We flatten Discrete to 1 value
        assert su.flatdim(self.space) == 25
        # gym flattens Discrete to one-hot
        assert gyms.flatdim(self.space) == 35

        asample = su.torch_point(self.space, self.space.sample())
        flattened = su.flatten(self.space, asample)
        unflattened = su.unflatten(self.space, flattened)
        assert self.same(asample, unflattened)

        # suppress `UserWarning: WARN: Box bound precision lowered by casting to float32`
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            flattened_space = su.flatten_space(self.space)
            assert flattened_space.shape == (25, )
            # The maximum comes from Discrete(11)
            assert flattened_space.high.max() == 11.0
            assert flattened_space.low.min() == -10.0

            gym_flattened_space = gyms.flatten_space(self.space)
            assert gym_flattened_space.shape == (35, )
            # The maximum comes from Box(-10, 10, (3, 4))
            assert gym_flattened_space.high.max() == 10.0
            assert gym_flattened_space.low.min() == -10.0
Esempio n. 4
0
    def __init__(self, env: gym.Env):
        """Flattens the observations of an environment.

        Args:
            env: The environment to apply the wrapper
        """
        super().__init__(env)
        self.observation_space = spaces.flatten_space(env.observation_space)
    def __init__(self, env, continuous=True):
        super().__init__(env)
        self.CONTINUOUS = continuous

        # Work arounds for continuous and discrete spaces
        if self.CONTINUOUS == False:
            self.action_space = MultiDiscrete(
                [space.n for _, space in env.action_space.spaces.items()])
            self.labels = env.action_space.spaces.keys()
        else:
            self.action_space = flatten_space(self.action_space)
Esempio n. 6
0
    def __init__(self):
        self.__version__ = "0.1.0"
        self.viewer = None
        self.template = None
        self.canvas = None
        self.template_radial_map = None
        self.cur_state = {}
        self.state_history = []
        self.action_history = []
        self.painter = Painter()
        self.renderer = None
        self._prev_loss = 0
        self._configure_environment()
        logger.info(f"PaintingEnv - Version {self.__version__}")

        self.cur_step = 0

        # -- ACTION SPACE -- #
        # ------------------ #

        color_space = spaces.Box(
            np.array([-0.3, -0.3, -0.3]), np.array([0.3, 0.3, 0.3])
        )  # (hue, saturation, value)
        motion_space = spaces.Box(
            np.array([-math.pi/4, 1, -3, 0]), np.array([math.pi/4, 10, 3, 1])
        )  # (direction, distance, radius, pendown)
        # brush_space = spaces.Box(np.array([0]),np.array([1]))  # (pen up, pen down)
        self.action_space = spaces.Dict(
            {"color": color_space, "motion": motion_space}
        )

        self.action_space = spaces.flatten_space(self.action_space)

        # -- OBSERVATION SPACE -- #
        # ----------------------- #

        img_patch_space = spaces.Box(low=0, high=1, shape=OBS_FRAME_SHAPE)
        color_space = spaces.Box(np.array([0, 0, 0]), np.array([1, 1, 1]), dtype=np.float32)
        motion_space = spaces.Box(np.array([0, 0, 0]), np.array([2 * math.pi, 15, 1]), dtype=np.float32)
        # brush_space = spaces.Box(np.array([0]),np.array([1]), dtype=np.float32)

        self.observation_space = spaces.Dict(
             {
                 "patch": img_patch_space,
                 "color": color_space,
                 "motion": motion_space,
             }
        )

        # self.observation_space = spaces.flatten_space(self.observation_space)
        
        self.seed()
        self.reset()
Esempio n. 7
0
    def __init__(self, env):
        super(GoalEnvFlattenObservation, self).__init__(env)

        wrapped_observation_space = env.observation_space
        assert isinstance(wrapped_observation_space, spaces.Dict), (
            "GoalEnvFlattenObservation is only usable with dict observations.")

        unwrapped_observation_space = wrapped_observation_space['observation']

        self.observation_space = spaces.Dict([
            ('observation',
             copy.deepcopy(spaces.flatten_space(unwrapped_observation_space))),
            ('achieved_goal',
             copy.deepcopy(wrapped_observation_space.spaces['achieved_goal'])),
            ('desired_goal',
             copy.deepcopy(wrapped_observation_space.spaces['desired_goal']))
        ])
 def __init__(self, env):
     super(FlattenObservation, self).__init__(env)
     self.observation_space = spaces.flatten_space(env.observation_space)
Esempio n. 9
0
def make_model_and_action_dist(policy, obs_space, action_space, config):
    """create model neural network"""
    policy.device = (torch.device("cuda")
                       if torch.cuda.is_available() else torch.device("cpu"))
    policy.log_stats = config["log_stats"]  # flag to log statistics
    if policy.log_stats:
        policy.stats_dict = {}
        policy.stats_fn = config["stats_fn"]

    # Keys of the observation space that must be used at train and test time ('signal' and 'mask' will be excluded
    # from the actual obs space)
    policy.train_obs_keys = config["train_obs_keys"]
    policy.test_obs_keys = config["test_obs_keys"]

    # Check whether policy observation space is inside a Tuple space
    policy.requires_tupling = False
    if isinstance(action_space, Tuple) and len(action_space.spaces) == 1:
        policy.action_space = action_space.spaces[0]
        action_space = action_space.spaces[0]
        policy.requires_tupling = True
    if not isinstance(action_space, Discrete):
        raise UnsupportedSpaceException(
            "Action space {} is not supported for DQN.".format(action_space))

    # Get real observation space
    if isinstance(obs_space, Box):
        assert hasattr(obs_space, "original_space"), "Invalid observation space"
        obs_space = obs_space.original_space
        if isinstance(obs_space, Tuple):
            obs_space = obs_space.spaces[0]
    assert isinstance(obs_space, Dict), "Invalid observation space"
    policy.has_action_mask = "action_mask" in obs_space.spaces
    assert all([k in obs_space.spaces for k in policy.train_obs_keys]), "Invalid train keys specification"
    assert all([k in obs_space.spaces for k in policy.test_obs_keys]), "Invalid test keys specification"

    # Get observation space used for training
    if config["train_obs_space"] is None:
        train_obs_space = obs_space
    else:
        train_obs_space = config["train_obs_space"]
        if isinstance(train_obs_space, Box):
            assert hasattr(train_obs_space, "original_space"), "Invalid observation space"
            train_obs_space = train_obs_space.original_space
            if isinstance(train_obs_space, Tuple):
                train_obs_space = train_obs_space.spaces[0]

    # Obs spaces used for training and testing
    sp = Dict({
        k: obs_space.spaces[k]
        for k in policy.test_obs_keys
    })

    policy.real_test_obs_space = flatten_space(sp)
    policy.real_test_obs_space.original_space = sp
    model_space = Dict({
        k: obs_space.spaces[k]
        for k in policy.test_obs_keys if k != "signal" and k != "action_mask"
    })


    sp = Dict({
        k: train_obs_space.spaces[k]
        for k in policy.train_obs_keys
    })
    policy.real_train_obs_space = flatten_space(sp)
    policy.real_train_obs_space.original_space = sp
    policy.n_actions = action_space.n
    def update_target():
        pass

    policy.update_target = update_target
    model = FullyConnectedNetwork(flatten_space(model_space), action_space, action_space.n, name="FcNet",
                                 model_config=config['model']).to(policy.device)
    return model, ModelCatalog.get_action_dist(action_space, config, framework='torch')
Esempio n. 10
0
    def __init__(self, obs_space, action_space, config):
        """ Only Dict observation spaces are allowed"""
        super().__init__(obs_space, action_space, config)

        # General configs
        self.framework = "torch"
        self.n_agents = len(obs_space.original_space.spaces)
        assert self.n_agents == 2, "At this moment only two-team agents are supported {} is not a valid number"\
            .format(self.n_agents)  # TODO (fede) increase the number of team agents to arbitrary
        self.n_actions = action_space.spaces[0].n
        self.device = (torch.device("cuda")
                       if torch.cuda.is_available() else torch.device("cpu"))
        self.beta = config["beta"]
        self.gamma = self.beta * config['factor_ent']
        self.n_train_signals = config['n_train_signals']

        # Flag that regulates whether to log statistics
        self.log_stats = config["log_stats"]
        self.eval_fn = config["stats_fn"]

        # Keys from the observation space that must be used at training and test time
        self.train_obs_keys = config["train_obs_keys"]
        self.test_obs_keys = config["test_obs_keys"]

        # Get and validate real observation space
        # (Assumed uniform observation and action spaces for the players)
        agent_obs_space = obs_space.original_space.spaces[0]
        assert isinstance(agent_obs_space, Dict), "Invalid observation space"
        assert "signal" in agent_obs_space.spaces, "Observation space must contain field 'signal'" + \
                                                   str(agent_obs_space.spaces)

        self.real_test_obs_space = flatten_space(
            Tuple([agent_obs_space] * self.n_agents))
        self.real_test_obs_space.original_space = Tuple([agent_obs_space] *
                                                        self.n_agents)
        self.test_obs_size = _get_size(self.real_test_obs_space)
        self.signal_size = _get_size(agent_obs_space.spaces["signal"])
        if "action_mask" in agent_obs_space.spaces:
            mask_shape = tuple(agent_obs_space.spaces["action_mask"].shape)
            assert mask_shape == (
                self.n_actions, ), "Invalid shape for action mask"

        # Get and validate train observation space
        # (Assumed uniform observation and action spaces for the players)
        if config["train_obs_space"] is None:
            train_obs_space = agent_obs_space
        else:
            train_obs_space = config["train_obs_space"]
            if isinstance(train_obs_space, Tuple):
                train_obs_space = train_obs_space.spaces[0]

        self.real_train_obs_space = flatten_space(
            Tuple([train_obs_space] * self.n_agents))
        self.real_train_obs_space.original_space = Tuple([train_obs_space] *
                                                         self.n_agents)
        agent_obs_space_signaled = Tuple([
            Dict({
                **{
                    k: agent_obs_space.spaces[k]
                    for k in self.test_obs_keys if k != "signal" and k != "action_mask"
                },
                **{
                    "signal": MultiDiscrete([2] * self.n_train_signals)
                }
            })
        ] * self.n_agents)

        # training signaler
        self.signaler = Signaler()

        # Models
        self.model = ModelCatalog.get_model_v2(
            agent_obs_space_signaled,
            action_space,
            self.n_actions,
            config["model"],
            framework="torch",
            name="SignaledFCNet",
            default_model=MultiAgentFullyConnectedNetwork)

        self.signaler_model = ModelCatalog.get_model_v2(
            MultiDiscrete([2]),
            Discrete(self.n_train_signals),
            self.n_train_signals,
            config['sig_model'],
            framework="torch",
            name="SignalerNet",
            default_model=FullyConnectedNetwork)

        # exploration
        self.exploration = self._create_exploration()

        # Setup the optimizer and loss TODO (fede): add custom choice possibility for optimiser
        self.model_optimiser = config["model_optimiser"]["type"](
            self.model.parameters(), lr=config["model_optimiser"]["lr"])
        self.signaler_optimiser = config["sig_model_optimiser"]["type"](
            self.signaler_model.parameters(),
            lr=config["sig_model_optimiser"]["lr"])

        # lr/beta scheduling algorithm (experimental)
        # self._curr_ts = 0
        # self._prev_lr_update = 0
        # self._prev_beta_update = 0

        self.classification_loss = nn.CrossEntropyLoss()

        def EntropyLoss(dist, reduce=True):
            S = nn.Softmax(dim=-1)
            LS = nn.LogSoftmax(dim=-1)
            b = S(dist) * LS(dist)
            b = torch.sum(b, 1)
            if reduce:
                b = torch.mean(b)
            return b

        self.entropy_loss = EntropyLoss