def get_configs_and_output_sizes(cls, config: ConcatenatedBrainLSTMCfg, input_space: Space, output_space: Space): input_size = flatdim(input_space) output_size = flatdim(output_space) lstm_input_size = input_size lstm_output_size = output_size feed_forward_front_cfg = None feed_forward_back_cfg = None lstm_config = config.lstm if isinstance(config.lstm, LstmLayeredCfg) else LstmLayeredCfg(**config.lstm) if config.feed_forward_front: feed_forward_front_cfg = ( config.feed_forward_front if isinstance(config.feed_forward_front, FeedForwardCfg) else FeedForwardCfg( **config.feed_forward_front)) lstm_input_size = feed_forward_front_cfg.hidden_layers[-1] if config.feed_forward_back: feed_forward_back_cfg = ( config.feed_forward_back if isinstance(config.feed_forward_back, FeedForwardCfg) else FeedForwardCfg( **config.feed_forward_back)) lstm_output_size = feed_forward_back_cfg.hidden_layers[0] return feed_forward_front_cfg, feed_forward_back_cfg, lstm_config, lstm_input_size, lstm_output_size
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(flatdim(env.observation_space), [256, 256]) self.mean_head = nn.Linear(256, flatdim(env.action_space)) self.logstd_head = nn.Linear(256, flatdim(env.action_space)) self.to(device)
def __init__(self, input_observation_space, stack_size=1): self._input_observation_space = input_observation_space user_space = input_observation_space.spaces['user'] doc_space = input_observation_space.spaces['doc'] self._num_candidates = len(doc_space.spaces) doc_space_shape = spaces.flatdim(list(doc_space.spaces.values())[0]) # Use the longer of user_space and doc_space as the shape of each row. obs_shape = (np.max([spaces.flatdim(user_space), doc_space_shape]), ) self._observation_shape = (self._num_candidates + 1, ) + obs_shape self._observation_dtype = user_space.dtype self._stack_size = stack_size
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device # Q1 self.first_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256]) self.first_Q_head = nn.Linear(256, 1) # Q2 self.second_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256]) self.second_Q_head = nn.Linear(256, 1) self.to(self.device)
def __init__(self, input_space: Space, output_space: Space, individual: np.ndarray, config: FeedForwardCfg): super().__init__(input_space, output_space, individual, config) assert len(individual) == self.get_individual_size( config=config, input_space=input_space, output_space=output_space) self.input_size: int = flatdim(input_space) self.output_size: int = flatdim(output_space) self.config = config self.use_bias = config.use_bias # If the check fails the program aborts self.hidden_layers: List[int] = self.check_hidden_layers( config.hidden_layers)
def __init__(self, config, env, device, **kwargs): super().__init__(config, env, device, **kwargs) feature_dim = config['nn.sizes'][-1] self.feature_network = MLP(config, env, device, **kwargs) if isinstance(env.action_space, Discrete): self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs) elif isinstance(env.action_space, Box): self.action_head = DiagGaussianHead(feature_dim, flatdim(env.action_space), device, config['agent.std0'], **kwargs) self.V_head = nn.Linear(feature_dim, 1) ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0) self.V_head = self.V_head.to( device ) # reproducible between CPU/GPU, ortho_init behaves differently self.register_buffer('total_timestep', torch.tensor(0)) #self.total_timestep = 0 self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr']) if config['agent.use_lr_scheduler']: self.lr_scheduler = linear_lr_scheduler(self.optimizer, config['train.timestep'], min_lr=1e-8) self.gamma = config['agent.gamma'] self.clip_rho = config['agent.clip_rho'] self.clip_pg_rho = config['agent.clip_pg_rho']
def get_last_layers(space, last_dim): if isinstance(space, Box): def map_box(ld, n): if isinstance(n, list): return [map_box(ld, x) for x in n] return nn.Linear(ld, 1) return map_box(last_dim, np.empty(space.shape).tolist()) if isinstance(space, Discrete): return nn.Sequential(*[nn.Linear(last_dim, flatdim(space))]) if isinstance(space, Tuple): return [get_last_layers(s, last_dim) for s in space] if isinstance(space, Dict): return [get_last_layers(s, last_dim) for s in space.spaces.values()] if isinstance(space, MultiBinary): def map_multibinary(ld, n): if isinstance(n, list): return [map_multibinary(ld, x) for x in n] return nn.Sequential(*[nn.Linear(ld, 1), nn.Sigmoid()]) return map_multibinary(last_dim, np.empty(space.n).tolist()) if isinstance(space, MultiDiscrete): def map_multidiscrete(ld, n): if isinstance(n, list): return [map_multidiscrete(ld, x) for x in n] return nn.Sequential(*[nn.Linear(ld, n)]) return map_multidiscrete(last_dim, space.nvec.tolist()) raise NotImplementedError
def test_step(env: GameWrapperEnvironment): env.reset() # Note round 1: only one agent we care about! assert env.next_player == 0 bet_action_int = __action_int(env, ActionInstance(acn.ActionName.BET, 3)) # Player 1 bet obs, reward, terminal, info = env.step([bet_action_int, None]) assert len(obs) == AGENT_COUNT assert len(reward) == AGENT_COUNT assert len(terminal) == AGENT_COUNT # TODO: reward not set # assert all([r >= 0 for r in reward]), f"not contain negative {reward}" # given the observation, we should be able to flatten it # and obtain reasonable result obs_space = env.observation_space flatten_data = obs[0] assert flatten_data.size == spaces.flatdim(obs_space) # type: ignore # Now we need to action again assert env.next_player == 0 hit_action_int = __action_int(env, ActionInstance(acn.ActionName.HIT, True)) obs, reward, terminal, info = env.step([hit_action_int, None]) assert len(obs) == AGENT_COUNT assert len(reward) == AGENT_COUNT assert len(terminal) == AGENT_COUNT
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(flatdim(env.observation_space), [400, 300]) self.action_head = nn.Linear(300, flatdim(env.action_space)) assert np.unique(env.action_space.high).size == 1 assert -np.unique(env.action_space.low).item() == np.unique( env.action_space.high).item() self.max_action = env.action_space.high[0] self.to(self.device)
def test_flatten(self): # We flatten Discrete to 1 value assert su.flatdim(self.space) == 25 # gym flattens Discrete to one-hot assert gyms.flatdim(self.space) == 35 asample = su.torch_point(self.space, self.space.sample()) flattened = su.flatten(self.space, asample) unflattened = su.unflatten(self.space, flattened) assert self.same(asample, unflattened) # suppress `UserWarning: WARN: Box bound precision lowered by casting to float32` with warnings.catch_warnings(): warnings.simplefilter("ignore") flattened_space = su.flatten_space(self.space) assert flattened_space.shape == (25, ) # The maximum comes from Discrete(11) assert flattened_space.high.max() == 11.0 assert flattened_space.low.min() == -10.0 gym_flattened_space = gyms.flatten_space(self.space) assert gym_flattened_space.shape == (35, ) # The maximum comes from Box(-10, 10, (3, 4)) assert gym_flattened_space.high.max() == 10.0 assert gym_flattened_space.low.min() == -10.0
def __init__(self, env, capacity, device): self.env = env self.capacity = capacity self.device = device self.observations = np.zeros( [capacity, flatdim(env.observation_space)], dtype=np.float32) self.actions = np.zeros([capacity, flatdim(env.action_space)], dtype=np.float32) self.rewards = np.zeros([capacity, 1], dtype=np.float32) self.next_observations = np.zeros( [capacity, flatdim(env.observation_space)], dtype=np.float32) self.masks = np.zeros([capacity, 1], dtype=np.float32) self.size = 0 self.pointer = 0
def __init__(self, env, hidden_layers=[]): # Action space and observation spaces should by OpenAI gym spaces isinstance( env.observation_space, spaces.Space), 'Observation space should be an OpenAI Gym space' isinstance(env.action_space, spaces.Discrete ), 'Action space should be an OpenAI Gym "Discrete" space' # Create network super().__init__() # Initialize module self.env = env # Save environment self.input_size = spaces.flatdim(self.env.observation_space) self.output_size = self.env.action_space.n self.hidden_layers = hidden_layers self.network = nn.Sequential() hidden_layers = hidden_layers + [self.output_size] for i, hidden_size in enumerate(hidden_layers): # Create layer in_features = self.input_size if i == 0 else hidden_layers[i - 1] out_features = hidden_layers[i] layer = nn.Linear(in_features, out_features) # Add layer + activation if i > 0: self.network.add_module('dense_act_{}'.format(i), nn.ReLU()) self.network.add_module('dense_{}'.format(i + 1), layer) # Move network to GPU if available if torch.cuda.is_available(): self.network.cuda()
def __init__(self, env): super(FlattenObservation, self).__init__(env) flatdim = spaces.flatdim(env.observation_space) self.observation_space = spaces.Box(low=-float("inf"), high=float("inf"), shape=(flatdim, ), dtype=numpy.float32)
def __init__(self, env): super(FlattenScaleSwapAxisObservation, self).__init__(env) flatdim = spaces.flatdim(env.observation_space) self.observation_space = spaces.Box(low=0, high=1, shape=(flatdim, ), dtype=np.float32)
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.lstm = make_lnlstm(spaces.flatdim(env.observation_space), config['rnn.size'], num_layers=1) self.to(self.device)
def state_dims(space: gym.Space) -> int: if isinstance(space, Discrete): # The whole reason for this function is that time_remaining is both discrete and only takes # up one nn input dimension. return 1 elif isinstance(space, GymTuple): return sum(state_dims(inner) for inner in space) else: return flatdim(space)
def __init__(self, observation_space, action_space): """ :param observation_space: :param action_space: """ if not isinstance(action_space, (Discrete, MultiDiscrete)): raise TypeError( "action_space need to be instance of Discrete or MultiDiscrete, not :" + str(type(action_space))) super().__init__(observation_space=observation_space, action_space=action_space) self.NUM_ATOMS = 51 self.network = nn.Sequential() self.network.add_module( "C51_Linear_Input", nn.Linear(np.prod(flatdim(self.observation_space)), 64)) self.network.add_module("C51_LeakyReLU_Input", nn.LeakyReLU()) self.network.add_module("C51_Linear_1", nn.Linear(64, 64)) self.network.add_module("C51_LeakyReLU_1", nn.LeakyReLU()) self.distributional_list = [] if isinstance(self.action_space, Discrete): self.len_distributional = self.action_space.n for i in range(self.len_distributional): distributional = nn.Sequential() distributional.add_module( "C51_Distributional_" + str(i) + "_Linear", nn.Linear(64, self.NUM_ATOMS)) distributional.add_module( "C51_Distributional_" + str(i) + "_Softmax", nn.Softmax(dim=1)) self.add_module("C51_Distributional_" + str(i) + "_Sequential", distributional) self.distributional_list.append(distributional) elif isinstance(self.action_space, MultiDiscrete): def gen_outputs(nvec): dis = [] for nspace in nvec: if isinstance(nspace, (list, np.ndarray)): dis.append(gen_outputs(nspace)) else: dis.append([ nn.Sequential(nn.Linear(64, self.NUM_ATOMS), nn.Softmax(dim=1)) for i in range(nspace) ]) return dis self.distributional_list = gen_outputs(self.action_space.nvec)
def __init__(self, env, keys=None): super().__init__(env) # todo: allow selecting subsets using keys dim = spaces.flatdim(env.observation_space) self.observation_space = spaces.Box(low=-float('inf'), high=float('inf'), shape=(dim, ), dtype=np.float32)
def __init__(self, env): super(FlattenSAObservation, self).__init__(env) ma_spaces = [] for sa_obs in env.observation_space: flatdim = spaces.flatdim(sa_obs) ma_spaces += [spaces.Box(low=-float('inf'), high=float('inf'), shape=(flatdim,), dtype=np.float32)] self.observation_space = spaces.Tuple(tuple(ma_spaces))
def get_free_parameter_usage(cls, config: FeedForwardConfigClass, input_space: Space, output_space: Space): input_size = flatdim(input_space) output_size = flatdim(output_space) hidden_layers = cls.check_hidden_layers(config.hidden_layers) individual_size = 0 last_layer = input_size for hidden_layer in hidden_layers: individual_size += last_layer * hidden_layer last_layer = hidden_layer individual_size += last_layer * output_size if config.use_bias: individual_size += sum(hidden_layers) + output_size return {'individual_size': individual_size}
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(spaces.flatdim(env.observation_space), config['nn.sizes']) self.layer_norms = nn.ModuleList( [nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']]) self.to(self.device)
def get_free_parameter_usage(cls, config: ILayerBasedBrainCfg, input_space: Space, output_space: Space): number_gates = cls.get_number_gates() input_size = flatdim(input_space) output_size = flatdim(output_space) hidden_size = cls.get_number_hidden_values() hidden_structure = config.hidden_layer_structure individual_size = 0 for layer in range(len(hidden_structure)): # Matrices for weighted input values if layer == 0: # The first Layer don't has an output from the previous layer, but the input values individual_size += number_gates * input_size * hidden_structure[ 0] else: individual_size += number_gates * hidden_structure[ layer] * hidden_structure[layer - 1] # Matrices for weighted state values if config.diagonal_hidden_to_hidden: individual_size += number_gates * hidden_structure[layer] else: individual_size += number_gates * hidden_structure[ layer] * hidden_structure[layer] # initialize biases if config.use_bias: individual_size += hidden_structure[layer] * number_gates # Hidden values if config.optimize_initial_neuron_state: individual_size += hidden_structure[layer] * hidden_size # for end # Matrix for transforming output of last layer into output neurons individual_size += hidden_structure[len(hidden_structure) - 1] * output_size # TODO better usage of the dict return {"all": individual_size}
def output_observation_space(self): """The output observation space of the adapter.""" user_space = self._input_observation_space.spaces['user'] doc_space = self._input_observation_space.spaces['doc'] user_dim = spaces.flatdim(user_space) low = np.concatenate( [self._pad_with_zeros(np.ones(user_dim) * -np.inf).reshape(1, -1)] + [ self._pad_with_zeros(np.ones(spaces.flatdim(d)) * -np.inf).reshape(1, -1) for d in doc_space.spaces.values() ]) high = np.concatenate( [self._pad_with_zeros(np.ones(user_dim) * np.inf).reshape(1, -1)] + [ self._pad_with_zeros(np.ones(spaces.flatdim(d)) * np.inf).reshape(1, -1) for d in doc_space.spaces.values() ]) return spaces.Box(low=low, high=high, dtype=np.float32)
def generate_and_set_class_state(cls, config: ContinuousTimeRNNCfg, input_space: Space, output_space: Space): input_size = flatdim(input_space) output_size = flatdim(output_space) if hasattr(cls, "v_mask") or hasattr(cls, "w_mask") or hasattr( cls, "t_mask"): logging.warning("Masks are already present in class") # todo: also store masks in checkpoints and hof. v_mask = cls._generate_mask(config.v_mask, config.number_neurons, input_size, config.v_mask_param) if config.use_bias: v_mask = np.c_[v_mask, np.ones(config.number_neurons, dtype=bool)] w_mask = cls._generate_mask(config.w_mask, config.number_neurons, config.number_neurons, config.w_mask_param) # TODO The mask was flipped on the diagonal for mathematical correct structure. check if no errer exist t_mask = cls._generate_mask(config.t_mask, output_size, config.number_neurons, config.t_mask_param) cls.set_class_state(v_mask=v_mask, w_mask=w_mask, t_mask=t_mask)
def get_free_parameter_usage(cls, config: IPytorchBrainCfg, input_space: Space, output_space: Space): num_layers = config.num_layers number_gates = cls.get_number_gates() hidden_size = config.hidden_size index = 0 # size of the learnable input-hidden weights index += hidden_size * flatdim(input_space) * number_gates if num_layers > 1: index += hidden_size * hidden_size * (num_layers - 1) * number_gates # size of the learnable hidden-hidden weights index += hidden_size * hidden_size * num_layers * number_gates if config.use_bias: index += 2 * hidden_size * num_layers * number_gates index += flatdim(output_space) * hidden_size # TODO better usage of the dict return {"all": index}
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(spaces.flatdim(env.observation_space), config['nn.sizes']) for layer in self.feature_layers: ortho_init(layer, nonlinearity='relu', constant_bias=0.0) self.layer_norms = nn.ModuleList([nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']]) self.to(self.device)
def __init__(self, config, env, device, **kwargs): super().__init__(**kwargs) self.config = config self.env = env self.device = device self.feature_layers = make_fc(spaces.flatdim(env.observation_space), config['nn.sizes']) for layer in self.feature_layers: ortho_init(layer, nonlinearity='tanh', constant_bias=0.0) feature_dim = config['nn.sizes'][-1] if isinstance(env.action_space, spaces.Discrete): self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs) elif isinstance(env.action_space, spaces.Box): self.action_head = DiagGaussianHead( feature_dim, spaces.flatdim(env.action_space), device, config['agent.std0'], **kwargs) self.to(self.device)
def _transform_action_space(self, space): if isinstance(space, spaces.Discrete): space = DiscreteShaped(space.n) elif isinstance(space, spaces.Box): space = spaces.Box(space.low.flatten(), space.high.flatten(), shape=space.shape, dtype=space.dtype) elif (isinstance(space, spaces.MultiBinary) or isinstance(space, spaces.MultiDiscrete)): space = DiscreteShaped(spaces.flatdim(space)) elif isinstance(space, spaces.Tuple): conts = [] discretes = [] space_iter = list(space.spaces) for sp in space_iter: if isinstance(sp, spaces.Tuple): space_iter += sp.spaces else: sp = self._transform_action_space(sp) if isinstance(sp, spaces.Box): conts.append(sp) elif isinstance(sp, spaces.Discrete): discretes.append(sp) cont_space = None discrete_space = None if len(conts) == 1: cont_space = conts[0] elif len(conts) > 1: cont_space = FlattenedTupleShaped(conts) if len(discretes) == 1: discrete_space = discretes[0] if len(discretes) > 1: discrete_space = FlattenedTupleShaped(discretes) if cont_space is None: space = discrete_space elif discrete_space is None: space = cont_space else: space = TupleShaped([discrete_space, cont_space]) if isinstance(space, spaces.Tuple): space = TupleShaped(space.spaces) return space
def __init__(self, feature_size, batch_size, timesteps, num_players, num_time, obs_space, obj_obs_space, reco_desc, action_num, loc_feature_num): super().__init__() features_per_object_type = [flatdim(s) for s in obs_space.spaces] num_obj_types = len(features_per_object_type) self.embedder = DynEnvFeatureExtractor(features_per_object_type, feature_size, batch_size, timesteps, num_players, num_obj_types, num_time, extended_feature_cnt=action_num) self.predictor = nn.Linear(feature_size, loc_feature_num) features_per_object_type = [flatdim(s) for s in obj_obs_space.spaces] num_obj_types = len(features_per_object_type) self.objEmbedder = DynEnvFeatureExtractor(features_per_object_type, feature_size, batch_size, timesteps, num_players, num_obj_types, num_time, extended_feature_cnt=loc_feature_num) self.reconstructor = ReconNet(feature_size, reco_desc) self.mse = nn.MSELoss()
def __init__(self, config, env, device, **kwargs): super().__init__(config, env, device, **kwargs) self.feature_network = MLP(config, env, device, **kwargs) feature_dim = config['nn.sizes'][-1] if isinstance(env.action_space, spaces.Discrete): self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs) elif isinstance(env.action_space, spaces.Box): self.action_head = DiagGaussianHead( feature_dim, spaces.flatdim(env.action_space), device, config['agent.std0'], **kwargs) self.total_timestep = 0