Beispiel #1
0
    def get_configs_and_output_sizes(cls, config: ConcatenatedBrainLSTMCfg, input_space: Space, output_space: Space):
        input_size = flatdim(input_space)
        output_size = flatdim(output_space)

        lstm_input_size = input_size
        lstm_output_size = output_size

        feed_forward_front_cfg = None
        feed_forward_back_cfg = None
        lstm_config = config.lstm if isinstance(config.lstm, LstmLayeredCfg) else LstmLayeredCfg(**config.lstm)

        if config.feed_forward_front:
            feed_forward_front_cfg = (
                config.feed_forward_front if isinstance(config.feed_forward_front, FeedForwardCfg) else FeedForwardCfg(
                    **config.feed_forward_front))

            lstm_input_size = feed_forward_front_cfg.hidden_layers[-1]

        if config.feed_forward_back:
            feed_forward_back_cfg = (
                config.feed_forward_back if isinstance(config.feed_forward_back, FeedForwardCfg) else FeedForwardCfg(
                    **config.feed_forward_back))

            lstm_output_size = feed_forward_back_cfg.hidden_layers[0]

        return feed_forward_front_cfg, feed_forward_back_cfg, lstm_config, lstm_input_size, lstm_output_size
Beispiel #2
0
 def __init__(self, config, env, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.env = env
     self.device = device
     
     self.feature_layers = make_fc(flatdim(env.observation_space), [256, 256])
     self.mean_head = nn.Linear(256, flatdim(env.action_space))
     self.logstd_head = nn.Linear(256, flatdim(env.action_space))
     
     self.to(device)
Beispiel #3
0
    def __init__(self, input_observation_space, stack_size=1):
        self._input_observation_space = input_observation_space
        user_space = input_observation_space.spaces['user']
        doc_space = input_observation_space.spaces['doc']
        self._num_candidates = len(doc_space.spaces)

        doc_space_shape = spaces.flatdim(list(doc_space.spaces.values())[0])
        # Use the longer of user_space and doc_space as the shape of each row.
        obs_shape = (np.max([spaces.flatdim(user_space), doc_space_shape]), )
        self._observation_shape = (self._num_candidates + 1, ) + obs_shape
        self._observation_dtype = user_space.dtype
        self._stack_size = stack_size
Beispiel #4
0
 def __init__(self, config, env, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.env = env
     self.device = device
     
     # Q1
     self.first_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256])
     self.first_Q_head = nn.Linear(256, 1)
     
     # Q2
     self.second_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256])
     self.second_Q_head = nn.Linear(256, 1)
     
     self.to(self.device)
    def __init__(self, input_space: Space, output_space: Space,
                 individual: np.ndarray, config: FeedForwardCfg):
        super().__init__(input_space, output_space, individual, config)

        assert len(individual) == self.get_individual_size(
            config=config, input_space=input_space, output_space=output_space)

        self.input_size: int = flatdim(input_space)
        self.output_size: int = flatdim(output_space)
        self.config = config
        self.use_bias = config.use_bias

        # If the check fails the program aborts
        self.hidden_layers: List[int] = self.check_hidden_layers(
            config.hidden_layers)
Beispiel #6
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(config, env, device, **kwargs)

        feature_dim = config['nn.sizes'][-1]
        self.feature_network = MLP(config, env, device, **kwargs)
        if isinstance(env.action_space, Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n,
                                               device, **kwargs)
        elif isinstance(env.action_space, Box):
            self.action_head = DiagGaussianHead(feature_dim,
                                                flatdim(env.action_space),
                                                device, config['agent.std0'],
                                                **kwargs)
        self.V_head = nn.Linear(feature_dim, 1)
        ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)
        self.V_head = self.V_head.to(
            device
        )  # reproducible between CPU/GPU, ortho_init behaves differently

        self.register_buffer('total_timestep', torch.tensor(0))
        #self.total_timestep = 0

        self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
        if config['agent.use_lr_scheduler']:
            self.lr_scheduler = linear_lr_scheduler(self.optimizer,
                                                    config['train.timestep'],
                                                    min_lr=1e-8)
        self.gamma = config['agent.gamma']
        self.clip_rho = config['agent.clip_rho']
        self.clip_pg_rho = config['agent.clip_pg_rho']
Beispiel #7
0
def get_last_layers(space, last_dim):
    if isinstance(space, Box):

        def map_box(ld, n):
            if isinstance(n, list):
                return [map_box(ld, x) for x in n]
            return nn.Linear(ld, 1)

        return map_box(last_dim, np.empty(space.shape).tolist())
    if isinstance(space, Discrete):
        return nn.Sequential(*[nn.Linear(last_dim, flatdim(space))])
    if isinstance(space, Tuple):
        return [get_last_layers(s, last_dim) for s in space]
    if isinstance(space, Dict):
        return [get_last_layers(s, last_dim) for s in space.spaces.values()]
    if isinstance(space, MultiBinary):

        def map_multibinary(ld, n):
            if isinstance(n, list):
                return [map_multibinary(ld, x) for x in n]

            return nn.Sequential(*[nn.Linear(ld, 1), nn.Sigmoid()])

        return map_multibinary(last_dim, np.empty(space.n).tolist())
    if isinstance(space, MultiDiscrete):

        def map_multidiscrete(ld, n):
            if isinstance(n, list):
                return [map_multidiscrete(ld, x) for x in n]

            return nn.Sequential(*[nn.Linear(ld, n)])

        return map_multidiscrete(last_dim, space.nvec.tolist())

    raise NotImplementedError
Beispiel #8
0
def test_step(env: GameWrapperEnvironment):
    env.reset()

    # Note round 1: only one agent we care about!
    assert env.next_player == 0

    bet_action_int = __action_int(env, ActionInstance(acn.ActionName.BET, 3))

    # Player 1 bet
    obs, reward, terminal, info = env.step([bet_action_int, None])
    assert len(obs) == AGENT_COUNT
    assert len(reward) == AGENT_COUNT
    assert len(terminal) == AGENT_COUNT
    # TODO: reward not set
    # assert all([r >= 0 for r in reward]), f"not contain negative {reward}"

    # given the observation, we should be able to flatten it
    # and obtain reasonable result
    obs_space = env.observation_space
    flatten_data = obs[0]
    assert flatten_data.size == spaces.flatdim(obs_space)  # type: ignore

    # Now we need to action again
    assert env.next_player == 0
    hit_action_int = __action_int(env, ActionInstance(acn.ActionName.HIT,
                                                      True))

    obs, reward, terminal, info = env.step([hit_action_int, None])
    assert len(obs) == AGENT_COUNT
    assert len(reward) == AGENT_COUNT
    assert len(terminal) == AGENT_COUNT
Beispiel #9
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device

        self.feature_layers = make_fc(flatdim(env.observation_space),
                                      [400, 300])
        self.action_head = nn.Linear(300, flatdim(env.action_space))

        assert np.unique(env.action_space.high).size == 1
        assert -np.unique(env.action_space.low).item() == np.unique(
            env.action_space.high).item()
        self.max_action = env.action_space.high[0]

        self.to(self.device)
Beispiel #10
0
    def test_flatten(self):
        # We flatten Discrete to 1 value
        assert su.flatdim(self.space) == 25
        # gym flattens Discrete to one-hot
        assert gyms.flatdim(self.space) == 35

        asample = su.torch_point(self.space, self.space.sample())
        flattened = su.flatten(self.space, asample)
        unflattened = su.unflatten(self.space, flattened)
        assert self.same(asample, unflattened)

        # suppress `UserWarning: WARN: Box bound precision lowered by casting to float32`
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            flattened_space = su.flatten_space(self.space)
            assert flattened_space.shape == (25, )
            # The maximum comes from Discrete(11)
            assert flattened_space.high.max() == 11.0
            assert flattened_space.low.min() == -10.0

            gym_flattened_space = gyms.flatten_space(self.space)
            assert gym_flattened_space.shape == (35, )
            # The maximum comes from Box(-10, 10, (3, 4))
            assert gym_flattened_space.high.max() == 10.0
            assert gym_flattened_space.low.min() == -10.0
Beispiel #11
0
    def __init__(self, env, capacity, device):
        self.env = env
        self.capacity = capacity
        self.device = device

        self.observations = np.zeros(
            [capacity, flatdim(env.observation_space)], dtype=np.float32)
        self.actions = np.zeros([capacity, flatdim(env.action_space)],
                                dtype=np.float32)
        self.rewards = np.zeros([capacity, 1], dtype=np.float32)
        self.next_observations = np.zeros(
            [capacity, flatdim(env.observation_space)], dtype=np.float32)
        self.masks = np.zeros([capacity, 1], dtype=np.float32)

        self.size = 0
        self.pointer = 0
Beispiel #12
0
    def __init__(self, env, hidden_layers=[]):
        # Action space and observation spaces should by OpenAI gym spaces
        isinstance(
            env.observation_space,
            spaces.Space), 'Observation space should be an OpenAI Gym space'
        isinstance(env.action_space, spaces.Discrete
                   ), 'Action space should be an OpenAI Gym "Discrete" space'

        # Create network
        super().__init__()  # Initialize module
        self.env = env  # Save environment

        self.input_size = spaces.flatdim(self.env.observation_space)
        self.output_size = self.env.action_space.n
        self.hidden_layers = hidden_layers

        self.network = nn.Sequential()
        hidden_layers = hidden_layers + [self.output_size]
        for i, hidden_size in enumerate(hidden_layers):
            # Create layer
            in_features = self.input_size if i == 0 else hidden_layers[i - 1]
            out_features = hidden_layers[i]
            layer = nn.Linear(in_features, out_features)

            # Add layer + activation
            if i > 0:
                self.network.add_module('dense_act_{}'.format(i), nn.ReLU())
            self.network.add_module('dense_{}'.format(i + 1), layer)

        # Move network to GPU if available
        if torch.cuda.is_available():
            self.network.cuda()
Beispiel #13
0
    def __init__(self, env):
        super(FlattenObservation, self).__init__(env)

        flatdim = spaces.flatdim(env.observation_space)
        self.observation_space = spaces.Box(low=-float("inf"),
                                            high=float("inf"),
                                            shape=(flatdim, ),
                                            dtype=numpy.float32)
Beispiel #14
0
    def __init__(self, env):
        super(FlattenScaleSwapAxisObservation, self).__init__(env)

        flatdim = spaces.flatdim(env.observation_space)
        self.observation_space = spaces.Box(low=0,
                                            high=1,
                                            shape=(flatdim, ),
                                            dtype=np.float32)
Beispiel #15
0
 def __init__(self, config, env, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.env = env
     self.device = device
     
     self.lstm = make_lnlstm(spaces.flatdim(env.observation_space), config['rnn.size'], num_layers=1)
     
     self.to(self.device)
def state_dims(space: gym.Space) -> int:
    if isinstance(space, Discrete):
        # The whole reason for this function is that time_remaining is both discrete and only takes
        # up one nn input dimension.
        return 1
    elif isinstance(space, GymTuple):
        return sum(state_dims(inner) for inner in space)
    else:
        return flatdim(space)
Beispiel #17
0
    def __init__(self, observation_space, action_space):
        """

        :param observation_space:
        :param action_space:
        """
        if not isinstance(action_space, (Discrete, MultiDiscrete)):
            raise TypeError(
                "action_space need to be instance of Discrete or MultiDiscrete, not :"
                + str(type(action_space)))

        super().__init__(observation_space=observation_space,
                         action_space=action_space)

        self.NUM_ATOMS = 51

        self.network = nn.Sequential()
        self.network.add_module(
            "C51_Linear_Input",
            nn.Linear(np.prod(flatdim(self.observation_space)), 64))
        self.network.add_module("C51_LeakyReLU_Input", nn.LeakyReLU())
        self.network.add_module("C51_Linear_1", nn.Linear(64, 64))
        self.network.add_module("C51_LeakyReLU_1", nn.LeakyReLU())

        self.distributional_list = []
        if isinstance(self.action_space, Discrete):
            self.len_distributional = self.action_space.n

            for i in range(self.len_distributional):
                distributional = nn.Sequential()
                distributional.add_module(
                    "C51_Distributional_" + str(i) + "_Linear",
                    nn.Linear(64, self.NUM_ATOMS))
                distributional.add_module(
                    "C51_Distributional_" + str(i) + "_Softmax",
                    nn.Softmax(dim=1))

                self.add_module("C51_Distributional_" + str(i) + "_Sequential",
                                distributional)
                self.distributional_list.append(distributional)

        elif isinstance(self.action_space, MultiDiscrete):

            def gen_outputs(nvec):
                dis = []
                for nspace in nvec:
                    if isinstance(nspace, (list, np.ndarray)):
                        dis.append(gen_outputs(nspace))
                    else:
                        dis.append([
                            nn.Sequential(nn.Linear(64, self.NUM_ATOMS),
                                          nn.Softmax(dim=1))
                            for i in range(nspace)
                        ])
                return dis

            self.distributional_list = gen_outputs(self.action_space.nvec)
Beispiel #18
0
    def __init__(self, env, keys=None):
        super().__init__(env)

        # todo: allow selecting subsets using keys
        dim = spaces.flatdim(env.observation_space)
        self.observation_space = spaces.Box(low=-float('inf'),
                                            high=float('inf'),
                                            shape=(dim, ),
                                            dtype=np.float32)
Beispiel #19
0
    def __init__(self, env):
        super(FlattenSAObservation, self).__init__(env)
        
        ma_spaces = []

        for sa_obs in env.observation_space:
            flatdim = spaces.flatdim(sa_obs)
            ma_spaces += [spaces.Box(low=-float('inf'), high=float('inf'), shape=(flatdim,), dtype=np.float32)]
        
        self.observation_space = spaces.Tuple(tuple(ma_spaces))
    def get_free_parameter_usage(cls, config: FeedForwardConfigClass,
                                 input_space: Space, output_space: Space):
        input_size = flatdim(input_space)
        output_size = flatdim(output_space)

        hidden_layers = cls.check_hidden_layers(config.hidden_layers)

        individual_size = 0
        last_layer = input_size

        for hidden_layer in hidden_layers:
            individual_size += last_layer * hidden_layer
            last_layer = hidden_layer

        individual_size += last_layer * output_size

        if config.use_bias:
            individual_size += sum(hidden_layers) + output_size

        return {'individual_size': individual_size}
Beispiel #21
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device

        self.feature_layers = make_fc(spaces.flatdim(env.observation_space),
                                      config['nn.sizes'])
        self.layer_norms = nn.ModuleList(
            [nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']])
        self.to(self.device)
    def get_free_parameter_usage(cls, config: ILayerBasedBrainCfg,
                                 input_space: Space, output_space: Space):

        number_gates = cls.get_number_gates()
        input_size = flatdim(input_space)
        output_size = flatdim(output_space)
        hidden_size = cls.get_number_hidden_values()
        hidden_structure = config.hidden_layer_structure

        individual_size = 0

        for layer in range(len(hidden_structure)):
            # Matrices for weighted input values
            if layer == 0:  # The first Layer don't has an output from the previous layer, but the input values
                individual_size += number_gates * input_size * hidden_structure[
                    0]
            else:
                individual_size += number_gates * hidden_structure[
                    layer] * hidden_structure[layer - 1]

            # Matrices for weighted state values
            if config.diagonal_hidden_to_hidden:
                individual_size += number_gates * hidden_structure[layer]
            else:
                individual_size += number_gates * hidden_structure[
                    layer] * hidden_structure[layer]

            # initialize biases
            if config.use_bias:
                individual_size += hidden_structure[layer] * number_gates

            # Hidden values
            if config.optimize_initial_neuron_state:
                individual_size += hidden_structure[layer] * hidden_size
        # for end

        # Matrix for transforming output of last layer into output neurons
        individual_size += hidden_structure[len(hidden_structure) -
                                            1] * output_size
        # TODO better usage of the dict
        return {"all": individual_size}
Beispiel #23
0
 def output_observation_space(self):
     """The output observation space of the adapter."""
     user_space = self._input_observation_space.spaces['user']
     doc_space = self._input_observation_space.spaces['doc']
     user_dim = spaces.flatdim(user_space)
     low = np.concatenate(
         [self._pad_with_zeros(np.ones(user_dim) *
                               -np.inf).reshape(1, -1)] +
         [
             self._pad_with_zeros(np.ones(spaces.flatdim(d)) *
                                  -np.inf).reshape(1, -1)
             for d in doc_space.spaces.values()
         ])
     high = np.concatenate(
         [self._pad_with_zeros(np.ones(user_dim) * np.inf).reshape(1, -1)] +
         [
             self._pad_with_zeros(np.ones(spaces.flatdim(d)) *
                                  np.inf).reshape(1, -1)
             for d in doc_space.spaces.values()
         ])
     return spaces.Box(low=low, high=high, dtype=np.float32)
    def generate_and_set_class_state(cls, config: ContinuousTimeRNNCfg,
                                     input_space: Space, output_space: Space):
        input_size = flatdim(input_space)
        output_size = flatdim(output_space)

        if hasattr(cls, "v_mask") or hasattr(cls, "w_mask") or hasattr(
                cls, "t_mask"):
            logging.warning("Masks are already present in class")
        # todo: also store masks in checkpoints and hof.
        v_mask = cls._generate_mask(config.v_mask, config.number_neurons,
                                    input_size, config.v_mask_param)
        if config.use_bias:
            v_mask = np.c_[v_mask, np.ones(config.number_neurons, dtype=bool)]

        w_mask = cls._generate_mask(config.w_mask, config.number_neurons,
                                    config.number_neurons, config.w_mask_param)
        # TODO The mask was flipped on the diagonal for mathematical correct structure. check if no errer exist
        t_mask = cls._generate_mask(config.t_mask, output_size,
                                    config.number_neurons, config.t_mask_param)

        cls.set_class_state(v_mask=v_mask, w_mask=w_mask, t_mask=t_mask)
Beispiel #25
0
    def get_free_parameter_usage(cls, config: IPytorchBrainCfg,
                                 input_space: Space, output_space: Space):
        num_layers = config.num_layers
        number_gates = cls.get_number_gates()
        hidden_size = config.hidden_size
        index = 0
        # size of the learnable input-hidden weights

        index += hidden_size * flatdim(input_space) * number_gates
        if num_layers > 1:
            index += hidden_size * hidden_size * (num_layers -
                                                  1) * number_gates

        # size of the learnable hidden-hidden weights
        index += hidden_size * hidden_size * num_layers * number_gates

        if config.use_bias:
            index += 2 * hidden_size * num_layers * number_gates
        index += flatdim(output_space) * hidden_size
        # TODO better usage of the dict
        return {"all": index}
Beispiel #26
0
 def __init__(self, config, env, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.env = env
     self.device = device
     
     self.feature_layers = make_fc(spaces.flatdim(env.observation_space), config['nn.sizes'])
     for layer in self.feature_layers:
         ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
     self.layer_norms = nn.ModuleList([nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']])
     
     self.to(self.device)
Beispiel #27
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device

        self.feature_layers = make_fc(spaces.flatdim(env.observation_space),
                                      config['nn.sizes'])
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='tanh', constant_bias=0.0)

        feature_dim = config['nn.sizes'][-1]
        if isinstance(env.action_space, spaces.Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n,
                                               device, **kwargs)
        elif isinstance(env.action_space, spaces.Box):
            self.action_head = DiagGaussianHead(
                feature_dim, spaces.flatdim(env.action_space), device,
                config['agent.std0'], **kwargs)

        self.to(self.device)
Beispiel #28
0
    def _transform_action_space(self, space):
        if isinstance(space, spaces.Discrete):
            space = DiscreteShaped(space.n)
        elif isinstance(space, spaces.Box):
            space = spaces.Box(space.low.flatten(),
                               space.high.flatten(),
                               shape=space.shape,
                               dtype=space.dtype)
        elif (isinstance(space, spaces.MultiBinary)
              or isinstance(space, spaces.MultiDiscrete)):
            space = DiscreteShaped(spaces.flatdim(space))
        elif isinstance(space, spaces.Tuple):

            conts = []
            discretes = []

            space_iter = list(space.spaces)
            for sp in space_iter:
                if isinstance(sp, spaces.Tuple):
                    space_iter += sp.spaces
                else:
                    sp = self._transform_action_space(sp)

                    if isinstance(sp, spaces.Box):
                        conts.append(sp)
                    elif isinstance(sp, spaces.Discrete):
                        discretes.append(sp)

            cont_space = None
            discrete_space = None

            if len(conts) == 1:
                cont_space = conts[0]
            elif len(conts) > 1:
                cont_space = FlattenedTupleShaped(conts)

            if len(discretes) == 1:
                discrete_space = discretes[0]
            if len(discretes) > 1:
                discrete_space = FlattenedTupleShaped(discretes)

            if cont_space is None:
                space = discrete_space
            elif discrete_space is None:
                space = cont_space
            else:
                space = TupleShaped([discrete_space, cont_space])

            if isinstance(space, spaces.Tuple):
                space = TupleShaped(space.spaces)

        return space
Beispiel #29
0
    def __init__(self, feature_size, batch_size, timesteps, num_players, num_time, obs_space, obj_obs_space, reco_desc,
                 action_num, loc_feature_num):
        super().__init__()

        features_per_object_type = [flatdim(s) for s in obs_space.spaces]
        num_obj_types = len(features_per_object_type)

        self.embedder = DynEnvFeatureExtractor(features_per_object_type, feature_size, batch_size,
                                               timesteps,
                                               num_players, num_obj_types, num_time, extended_feature_cnt=action_num)

        self.predictor = nn.Linear(feature_size, loc_feature_num)

        features_per_object_type = [flatdim(s) for s in obj_obs_space.spaces]
        num_obj_types = len(features_per_object_type)

        self.objEmbedder = DynEnvFeatureExtractor(features_per_object_type, feature_size, batch_size,
                                                  timesteps,
                                                  num_players, num_obj_types, num_time, extended_feature_cnt=loc_feature_num)
        self.reconstructor = ReconNet(feature_size, reco_desc)

        self.mse = nn.MSELoss()
Beispiel #30
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(config, env, device, **kwargs)

        self.feature_network = MLP(config, env, device, **kwargs)
        feature_dim = config['nn.sizes'][-1]
        if isinstance(env.action_space, spaces.Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n,
                                               device, **kwargs)
        elif isinstance(env.action_space, spaces.Box):
            self.action_head = DiagGaussianHead(
                feature_dim, spaces.flatdim(env.action_space), device,
                config['agent.std0'], **kwargs)
        self.total_timestep = 0