def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None: super().__init__() self._policy_specs = specs state_encoder_settings = NetworkSettings( normalize=False, hidden_units=settings.encoding_size, num_layers=2, vis_encode_type=EncoderType.SIMPLE, memory=None, ) self._state_encoder = NetworkBody( specs.observation_shapes, state_encoder_settings ) self._action_flattener = ModelUtils.ActionFlattener(specs) self.inverse_model_action_predition = torch.nn.Sequential( linear_layer(2 * settings.encoding_size, 256), Swish(), linear_layer(256, self._action_flattener.flattened_size), ) self.forward_model_next_state_prediction = torch.nn.Sequential( linear_layer( settings.encoding_size + self._action_flattener.flattened_size, 256 ), Swish(), linear_layer(256, settings.encoding_size), )
def __init__( self, input_size: int, hidden_size: int, num_layers: int, normalize: bool = False, ): self.normalizer: Optional[Normalizer] = None super().__init__() self.layers = [ linear_layer( input_size, hidden_size, kernel_init=Initialization.KaimingHeNormal, kernel_gain=1.0, ) ] self.layers.append(Swish()) if normalize: self.normalizer = Normalizer(input_size) for _ in range(num_layers - 1): self.layers.append( linear_layer( hidden_size, hidden_size, kernel_init=Initialization.KaimingHeNormal, kernel_gain=1.0, )) self.layers.append(Swish()) self.seq_layers = nn.Sequential(*self.layers)
def __init__(self, channel: int): """ Creates a ResNet Block. :param channel: The number of channels in the input (and output) tensors of the convolutions """ super().__init__() self.layers = nn.Sequential( Swish(), nn.Conv2d(channel, channel, [3, 3], [1, 1], padding=1), Swish(), nn.Conv2d(channel, channel, [3, 3], [1, 1], padding=1), )
def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None: super().__init__() self._policy_specs = specs self._use_vail = settings.use_vail self._settings = settings state_encoder_settings = NetworkSettings( normalize=False, hidden_units=settings.encoding_size, num_layers=2, vis_encode_type=EncoderType.SIMPLE, memory=None, ) self._state_encoder = NetworkBody(specs.observation_shapes, state_encoder_settings) self._action_flattener = ModelUtils.ActionFlattener(specs) encoder_input_size = settings.encoding_size if settings.use_actions: encoder_input_size += (self._action_flattener.flattened_size + 1 ) # + 1 is for done self.encoder = torch.nn.Sequential( linear_layer(encoder_input_size, settings.encoding_size), Swish(), linear_layer(settings.encoding_size, settings.encoding_size), Swish(), ) estimator_input_size = settings.encoding_size if settings.use_vail: estimator_input_size = self.z_size self._z_sigma = torch.nn.Parameter(torch.ones((self.z_size), dtype=torch.float), requires_grad=True) self._z_mu_layer = linear_layer( settings.encoding_size, self.z_size, kernel_init=Initialization.KaimingHeNormal, kernel_gain=0.1, ) self._beta = torch.nn.Parameter(torch.tensor(self.initial_beta, dtype=torch.float), requires_grad=False) self._estimator = torch.nn.Sequential( linear_layer(estimator_input_size, 1), torch.nn.Sigmoid())
def __init__(self, input_size, output_size, hyper_input_size, layer_size, num_layers): """ Hyper Network module. This module will use the hyper_input tensor to generate the weights of the main network. The main network is a single fully connected layer. :param input_size: The size of the input of the main network :param output_size: The size of the output of the main network :param hyper_input_size: The size of the input of the hypernetwork that will generate the main network. :param layer_size: The number of hidden units in the layers of the hypernetwork :param num_layers: The number of layers of the hypernetwork """ super().__init__() self.input_size = input_size self.output_size = output_size layer_in_size = hyper_input_size layers = [] for _ in range(num_layers): layers.append( linear_layer( layer_in_size, layer_size, kernel_init=Initialization.KaimingHeNormal, kernel_gain=1.0, bias_init=Initialization.Zero, )) layers.append(Swish()) layer_in_size = layer_size flat_output = linear_layer( layer_size, input_size * output_size, kernel_init=Initialization.KaimingHeNormal, kernel_gain=0.1, bias_init=Initialization.Zero, ) # Re-initializing the weights of the last layer of the hypernetwork bound = math.sqrt(1 / (layer_size * self.input_size)) flat_output.weight.data.uniform_(-bound, bound) self.hypernet = torch.nn.Sequential(*layers, LayerNorm(), flat_output) # The hypernetwork will not generate the bias of the main network layer self.bias = torch.nn.Parameter(torch.zeros(output_size))
def __init__( self, input_size: int, goal_size: int, hidden_size: int, num_layers: int, num_conditional_layers: int, kernel_init: Initialization = Initialization.KaimingHeNormal, kernel_gain: float = 1.0, ): """ ConditionalEncoder module. A fully connected network of which some of the weights are generated by a goal conditioning. Uses the HyperNetwork module to generate the weights of the network. Only the weights of the last "num_conditional_layers" layers will be generated by HyperNetworks, the others will use regular parameters. :param input_size: The size of the input of the encoder :param goal_size: The size of the goal tensor that will condition the encoder :param hidden_size: The number of hidden units in the encoder :param num_layers: The total number of layers of the encoder (both regular and generated by HyperNetwork) :param num_conditional_layers: The number of layers generated with hypernetworks :param kernel_init: The Initialization to use for the weights of the layer :param kernel_gain: The multiplier for the weights of the kernel. """ super().__init__() layers: List[torch.nn.Module] = [] prev_size = input_size + goal_size for i in range(num_layers): if num_layers - i <= num_conditional_layers: # This means layer i is a conditional layer since the conditional # leyers are the last num_conditional_layers layers.append( HyperNetwork(prev_size, hidden_size, goal_size, hidden_size, 2)) else: layers.append( linear_layer( prev_size, hidden_size, kernel_init=kernel_init, kernel_gain=kernel_gain, )) layers.append(Swish()) prev_size = hidden_size self.layers = torch.nn.ModuleList(layers)
def __init__(self, height, width, initial_channels, final_hidden): super().__init__() n_channels = [16, 32, 32] # channel for each stack n_blocks = 2 # number of residual blocks self.layers = [] last_channel = initial_channels for _, channel in enumerate(n_channels): self.layers.append( nn.Conv2d(last_channel, channel, [3, 3], [1, 1], padding=1)) self.layers.append(nn.MaxPool2d([3, 3], [2, 2])) height, width = pool_out_shape((height, width), 3) for _ in range(n_blocks): self.layers.append(ResNetBlock(channel)) last_channel = channel self.layers.append(Swish()) self.dense = linear_layer( n_channels[-1] * height * width, final_hidden, kernel_init=Initialization.KaimingHeNormal, kernel_gain=1.0, )
def __init__(self, height: int, width: int, initial_channels: int, output_size: int): super().__init__() n_channels = [16, 32, 32] # channel for each stack n_blocks = 2 # number of residual blocks layers = [] last_channel = initial_channels for _, channel in enumerate(n_channels): layers.append( nn.Conv2d(last_channel, channel, [3, 3], [1, 1], padding=1)) layers.append(nn.MaxPool2d([3, 3], [2, 2])) height, width = pool_out_shape((height, width), 3) for _ in range(n_blocks): layers.append(ResNetBlock(channel)) last_channel = channel layers.append(Swish()) self.dense = linear_layer( n_channels[-1] * height * width, output_size, kernel_init=Initialization.KaimingHeNormal, kernel_gain=1.41, # Use ReLU gain ) self.sequential = nn.Sequential(*layers)
def test_swish(): layer = Swish() input_tensor = torch.Tensor([[1, 2, 3], [4, 5, 6]]) target_tensor = torch.mul(input_tensor, torch.sigmoid(input_tensor)) assert torch.all(torch.eq(layer(input_tensor), target_tensor))