def __init__( self, # state_net: StateNet, head_net: ValueHead, ): super().__init__() # self.state_net = state_net self.observation_net = nn.Sequential( nn.Conv2d(12, 16, kernel_size=3), nn.Dropout2d(p=0.1), nn.LeakyReLU(), nn.Conv2d(16, 32, kernel_size=3, groups=4), nn.Dropout2d(p=0.1), nn.LeakyReLU(), nn.Conv2d(32, 64, kernel_size=3, groups=4), # Flatten() ) self.observation_net.apply(create_optimal_inner_init(nn.LeakyReLU)) self.aggregation_net = nn.Sequential( Flatten(), nn.Linear(64, 64), nn.LayerNorm(64), nn.Dropout(p=0.1), nn.LeakyReLU(), ) self.aggregation_net.apply(create_optimal_inner_init(nn.LeakyReLU)) self.head_net = head_net
def __init__( self, # state_net: StateNet, head_net: PolicyHead, ): super().__init__() # self.state_net = state_net self.observation_net = nn.Sequential( nn.Conv2d(4, 64, kernel_size=4, stride=4), nn.Dropout2d(p=0.1), nn.LeakyReLU(), nn.Conv2d(64, 64, kernel_size=4, stride=4, groups=4), nn.Dropout2d(p=0.1), nn.LeakyReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=1, groups=4), # Flatten() ) self.observation_net.apply(create_optimal_inner_init(nn.LeakyReLU)) self.aggregation_net = nn.Sequential( Flatten(), nn.Linear(576, 512), nn.LayerNorm(512), nn.Dropout(p=0.1), nn.LeakyReLU(), ) self.aggregation_net.apply(create_optimal_inner_init(nn.LeakyReLU)) self.head_net = head_net
def __init__(self, state_shape, action_size, hiddens, layer_fn, activation_fn=nn.ReLU, norm_fn=None, bias=True, out_activation=nn.Tanh): super().__init__() # hack to prevent cycle imports from catalyst.modules.modules import name2nn layer_fn = name2nn(layer_fn) activation_fn = name2nn(activation_fn) norm_fn = name2nn(norm_fn) out_activation = name2nn(out_activation) state_size = reduce(lambda x, y: x * y, state_shape) self.feature_net = SequentialNet(hiddens=[state_size] + hiddens, layer_fn=layer_fn, activation_fn=activation_fn, norm_fn=norm_fn, bias=bias) self.policy_net = SequentialNet(hiddens=[hiddens[-1], action_size], layer_fn=nn.Linear, activation_fn=out_activation, norm_fn=None, bias=True) inner_init = create_optimal_inner_init(nonlinearity=activation_fn) self.feature_net.apply(inner_init) self.policy_net.apply(outer_init)
def __init__(self, hiddens, layer_fn=nn.Linear, bias=True, norm_fn=None, activation_fn=nn.ReLU, dropout=None, layer_order=None, residual=False): super().__init__() assert len(hiddens) > 1, "No sequence found" layer_fn = MODULES.get_if_str(layer_fn) activation_fn = MODULES.get_if_str(activation_fn) norm_fn = MODULES.get_if_str(norm_fn) dropout = MODULES.get_if_str(dropout) inner_init = create_optimal_inner_init(nonlinearity=activation_fn) layer_order = layer_order or ["layer", "norm", "drop", "act"] if isinstance(dropout, float): dropout_fn = lambda: nn.Dropout(dropout) else: dropout_fn = dropout def _layer_fn(f_in, f_out, bias): return layer_fn(f_in, f_out, bias=bias) def _normalize_fn(f_in, f_out, bias): return norm_fn(f_out) if norm_fn is not None else None def _dropout_fn(f_in, f_out, bias): return dropout_fn() if dropout_fn is not None else None def _activation_fn(f_in, f_out, bias): return activation_fn() if activation_fn is not None else None name2fn = { "layer": _layer_fn, "norm": _normalize_fn, "drop": _dropout_fn, "act": _activation_fn, } net = [] for i, (f_in, f_out) in enumerate(pairwise(hiddens)): block = [] for key in layer_order: fn = name2fn[key](f_in, f_out, bias) if fn is not None: block.append((f"{key}", fn)) block = torch.nn.Sequential(OrderedDict(block)) if residual: block = ResidualWrapper(net=block) net.append((f"block_{i}", block)) self.net = torch.nn.Sequential(OrderedDict(net)) self.net.apply(inner_init)
def __init__(self, state_shape, action_size, hiddens, layer_fn, concat_at=1, n_atoms=1, activation_fn=nn.ReLU, norm_fn=None, bias=True, out_activation=None): super().__init__() # hack to prevent cycle imports from catalyst.modules.modules import name2nn layer_fn = name2nn(layer_fn) activation_fn = name2nn(activation_fn) norm_fn = name2nn(norm_fn) out_activation = name2nn(out_activation) self.n_atoms = n_atoms state_size = reduce(lambda x, y: x * y, state_shape) if concat_at > 0: hiddens_ = [state_size] + hiddens[0:concat_at] self.observation_net = SequentialNet(hiddens=hiddens_, layer_fn=layer_fn, activation_fn=activation_fn, norm_fn=norm_fn, bias=bias) hiddens_ = \ [hiddens[concat_at - 1] + action_size] + hiddens[concat_at:] self.feature_net = SequentialNet(hiddens=hiddens_, layer_fn=layer_fn, activation_fn=activation_fn, norm_fn=norm_fn, bias=bias) else: self.observation_net = None hiddens_ = [state_size + action_size] + hiddens self.feature_net = SequentialNet(hiddens=hiddens_, layer_fn=layer_fn, activation_fn=activation_fn, norm_fn=norm_fn, bias=bias) self.value_net = SequentialNet(hiddens=[hiddens[-1], n_atoms], layer_fn=nn.Linear, activation_fn=out_activation, norm_fn=None, bias=True) inner_init = create_optimal_inner_init(nonlinearity=activation_fn) if self.observation_net is not None: self.observation_net.apply(inner_init) self.feature_net.apply(inner_init) self.value_net.apply(outer_init)
def __init__(self, state_shape, action_size, hiddens, layer_fn, activation_fn=nn.ReLU, norm_fn=None, bias=True, out_activation=nn.Sigmoid): super().__init__() # hack to prevent cycle imports from catalyst.modules.modules import name2nn self.n_action = action_size layer_fn = name2nn(layer_fn) activation_fn = name2nn(activation_fn) norm_fn = name2nn(norm_fn) out_activation = name2nn(out_activation) state_size = reduce(lambda x, y: x * y, state_shape) self.feature_net = SequentialNet(hiddens=[state_size] + hiddens, layer_fn=layer_fn, activation_fn=activation_fn, norm_fn=norm_fn, bias=bias) self.embedding_net = SequentialNet( hiddens=[hiddens[-1], action_size * 2], layer_fn=layer_fn, activation_fn=None, norm_fn=norm_fn, bias=bias) self.coupling1 = CouplingLayer(action_size=action_size, layer_fn=layer_fn, activation_fn=activation_fn, norm_fn=None, bias=bias, parity="odd") self.coupling2 = CouplingLayer(action_size=action_size, layer_fn=layer_fn, activation_fn=activation_fn, norm_fn=None, bias=bias, parity="even") self.squasher = SquashingLayer(out_activation) inner_init = create_optimal_inner_init(nonlinearity=activation_fn) self.feature_net.apply(inner_init) self.embedding_net.apply(inner_init)
def __init__(self, state_shape, action_size, hiddens, layer_fn, activation_fn=nn.ReLU, norm_fn=None, bias=True, out_activation=nn.Tanh): super().__init__() # hack to prevent cycle imports from catalyst.modules.modules import name2nn layer_fn = name2nn(layer_fn) activation_fn = name2nn(activation_fn) norm_fn = name2nn(norm_fn) out_activation = name2nn(out_activation) state_size = state_shape[-1] self.feature_net = SequentialNet(hiddens=[state_size] + hiddens, layer_fn=layer_fn, activation_fn=activation_fn, norm_fn=norm_fn, bias=bias) self.attn = nn.Sequential( nn.Conv1d(in_channels=hiddens[-1], out_channels=1, kernel_size=1, bias=True), nn.Softmax(dim=1)) self.feature_net2 = SequentialNet( hiddens=[hiddens[-1] * 4, hiddens[-1]], layer_fn=layer_fn, activation_fn=activation_fn, norm_fn=norm_fn, bias=bias) self.policy_net = SequentialNet(hiddens=[hiddens[-1], action_size], layer_fn=nn.Linear, activation_fn=out_activation, norm_fn=None, bias=True) inner_init = create_optimal_inner_init(nonlinearity=activation_fn) self.feature_net.apply(inner_init) self.attn.apply(outer_init) self.feature_net2.apply(inner_init) self.policy_net.apply(outer_init)
def __init__( self, action_size, layer_fn, activation_fn=nn.ReLU, norm_fn=None, bias=True, parity="odd" ): """ Conditional affine coupling layer used in Real NVP Bijector. Original paper: https://arxiv.org/abs/1605.08803 Adaptation to RL: https://arxiv.org/abs/1804.02808 Important notes --------------- 1. State embeddings are supposed to have size (action_size * 2). 2. Scale and translation networks used in the Real NVP Bijector both have one hidden layer of (action_size) (activation_fn) units. 3. Parity ("odd" or "even") determines which part of the input is being copied and which is being transformed. """ super().__init__() # hack to prevent cycle imports from catalyst.modules.modules import name2nn layer_fn = name2nn(layer_fn) activation_fn = name2nn(activation_fn) norm_fn = name2nn(norm_fn) self.parity = parity if self.parity == "odd": self.copy_size = action_size // 2 else: self.copy_size = action_size - action_size // 2 self.scale_prenet = SequentialNet( hiddens=[action_size * 2 + self.copy_size, action_size], layer_fn=layer_fn, activation_fn=activation_fn, norm_fn=None, bias=bias ) self.scale_net = SequentialNet( hiddens=[action_size, action_size - self.copy_size], layer_fn=layer_fn, activation_fn=None, norm_fn=None, bias=True ) self.translation_prenet = SequentialNet( hiddens=[action_size * 2 + self.copy_size, action_size], layer_fn=layer_fn, activation_fn=activation_fn, norm_fn=None, bias=bias ) self.translation_net = SequentialNet( hiddens=[action_size, action_size - self.copy_size], layer_fn=layer_fn, activation_fn=None, norm_fn=None, bias=True ) inner_init = create_optimal_inner_init(nonlinearity=activation_fn) self.scale_prenet.apply(inner_init) self.scale_net.apply(outer_init) self.translation_prenet.apply(inner_init) self.translation_net.apply(outer_init)
def __init__( self, hiddens, layer_fn: Union[str, Dict, List], norm_fn: Union[str, Dict, List] = None, dropout_fn: Union[str, Dict, List] = None, activation_fn: Union[str, Dict, List] = None, residual: Union[bool, str] = False, layer_order: List = None, ): super().__init__() assert len(hiddens) > 1, "No sequence found" # layer params layer_fn = _process_additional_params(layer_fn, hiddens[1:]) # normalization params norm_fn = _process_additional_params(norm_fn, hiddens[1:]) # dropout params dropout_fn = _process_additional_params(dropout_fn, hiddens[1:]) # activation params activation_fn = _process_additional_params(activation_fn, hiddens[1:]) if isinstance(residual, bool) and residual: residual = "hard" residual = _process_additional_params(residual, hiddens[1:]) layer_order = layer_order or ["layer", "norm", "drop", "act"] def _layer_fn(layer_fn, f_in, f_out, **kwargs): layer_fn = MODULES.get_if_str(layer_fn) layer_fn = layer_fn(f_in, f_out, **kwargs) return layer_fn def _normalization_fn(normalization_fn, f_in, f_out, **kwargs): normalization_fn = MODULES.get_if_str(normalization_fn) normalization_fn = \ normalization_fn(f_out, **kwargs) \ if normalization_fn is not None \ else None return normalization_fn def _dropout_fn(dropout_fn, f_in, f_out, **kwargs): dropout_fn = MODULES.get_if_str(dropout_fn) dropout_fn = dropout_fn(**kwargs) \ if dropout_fn is not None \ else None return dropout_fn def _activation_fn(activation_fn, f_in, f_out, **kwargs): activation_fn = MODULES.get_if_str(activation_fn) activation_fn = activation_fn(**kwargs) \ if activation_fn is not None \ else None return activation_fn name2fn = { "layer": _layer_fn, "norm": _normalization_fn, "drop": _dropout_fn, "act": _activation_fn, } name2params = { "layer": layer_fn, "norm": norm_fn, "drop": dropout_fn, "act": activation_fn, } net = [] for i, (f_in, f_out) in enumerate(pairwise(hiddens)): block = [] for key in layer_order: sub_fn = name2fn[key] sub_params = deepcopy(name2params[key][i]) if isinstance(sub_params, Dict): sub_module = sub_params.pop("module") else: sub_module = sub_params sub_params = {} sub_block = sub_fn(sub_module, f_in, f_out, **sub_params) if sub_block is not None: block.append((f"{key}", sub_block)) block_ = OrderedDict(block) block = torch.nn.Sequential(block_) if block_.get("act", None) is not None: activation = block_["act"] activation_init = \ create_optimal_inner_init(nonlinearity=activation) block.apply(activation_init) if residual == "hard" or (residual == "soft" and f_in == f_out): block = ResidualWrapper(net=block) net.append((f"block_{i}", block)) self.net = torch.nn.Sequential(OrderedDict(net))