def __init__(self, input_channels, input_size, n, conv_layer_params, same_padding=False, activation=torch.relu_, kernel_initializer=None, flatten_output=False, name="ParallelImageEncodingNetwork"): """ Args: input_channels (int): number of channels in the input image input_size (int or tuple): the input image size (height, width) n (int): number of parallel networks conv_layer_params (tuppe[tuple]): a non-empty tuple of tuple (num_filters, kernel_size, strides, padding), where padding is optional same_padding (bool): similar to TF's conv2d ``same`` padding mode. If True, the user provided paddings in `conv_layer_params` will be replaced by automatically calculated ones; if False, it corresponds to TF's ``valid`` padding mode (the user can still provide custom paddings though) activation (torch.nn.functional): activation for all the layers kernel_initializer (Callable): initializer for all the layers. flatten_output (bool): If False, the output will be an image structure of shape ``(B, n, C, H, W)``; otherwise the output will be flattened into a feature of shape ``(B, n, C*H*W)``. """ input_size = common.tuplify2d(input_size) super().__init__(input_tensor_spec=TensorSpec((input_channels, ) + input_size), name=name) assert isinstance(conv_layer_params, tuple) assert len(conv_layer_params) > 0 self._flatten_output = flatten_output self._conv_layer_params = conv_layer_params self._conv_layers = nn.ModuleList() for paras in conv_layer_params: filters, kernel_size, strides = paras[:3] padding = paras[3] if len(paras) > 3 else 0 if same_padding: # overwrite paddings kernel_size = common.tuplify2d(kernel_size) padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) self._conv_layers.append( layers.ParallelConv2D(input_channels, filters, kernel_size, n, activation=activation, kernel_initializer=kernel_initializer, strides=strides, padding=padding)) input_channels = filters
def __init__(self, in_channels, out_channels, kernel_size, activation=torch.relu_, strides=1, pooling_kernel=None, padding=0, use_bias=False): """A 2D conv layer that does not maintain its own weight and bias, but accepts both from users. If the given parameter (weight and bias) tensor has an extra batch dimension (first dimension), it performs parallel FC operation. Args: in_channels (int): channels of the input image out_channels (int): channels of the output image kernel_size (int or tuple): activation (torch.nn.functional): strides (int or tuple): pooling_kernel (int or tuple): padding (int or tuple): use_bias (bool): whether use bias """ super(ParamConv2D, self).__init__() self._in_channels = in_channels self._out_channels = out_channels self._activation = activation self._kernel_size = common.tuplify2d(kernel_size) self._kH, self._kW = self._kernel_size self._strides = strides self._pooling_kernel = pooling_kernel self._padding = padding self._use_bias = use_bias self._weight_length = out_channels * in_channels * self._kH * self._kW self.set_weight(torch.randn(1, self._weight_length)) if use_bias: self._bias_length = out_channels self.set_bias(torch.randn(1, self._bias_length)) else: self._bias_length = 0 self._bias = None
def __init__(self, input_channels, input_size, conv_layer_params, same_padding=False, activation=torch.relu_, kernel_initializer=None, flatten_output=False, name="ImageEncodingNetwork"): """ Initialize the layers for encoding an image into a latent vector. Currently there seems no need for this class to handle nested inputs; If necessary, extend the argument list to support it in the future. How to calculate the output size: `<https://pytorch.org/docs/stable/nn.html#torch.nn.Conv2d>`_:: H = (H1 - HF + 2P) // strides + 1 where H = output size, H1 = input size, HF = size of kernel, P = padding. Regarding padding: in the previous TF version, we have two padding modes: ``valid`` and ``same``. For the former, we always have no padding (P=0); for the latter, it's also called "half padding" (P=(HF-1)//2 when strides=1 and HF is an odd number the output has the same size with the input. Currently, PyTorch don't support different left and right paddings and P is always (HF-1)//2. So if HF is an even number, the output size will decrease by 1 when strides=1). Args: input_channels (int): number of channels in the input image input_size (int or tuple): the input image size (height, width) conv_layer_params (tuppe[tuple]): a non-empty tuple of tuple (num_filters, kernel_size, strides, padding), where padding is optional same_padding (bool): similar to TF's conv2d ``same`` padding mode. If True, the user provided paddings in `conv_layer_params` will be replaced by automatically calculated ones; if False, it corresponds to TF's ``valid`` padding mode (the user can still provide custom paddings though) activation (torch.nn.functional): activation for all the layers kernel_initializer (Callable): initializer for all the layers. flatten_output (bool): If False, the output will be an image structure of shape ``BxCxHxW``; otherwise the output will be flattened into a feature of shape ``BxN``. """ input_size = common.tuplify2d(input_size) super().__init__(input_tensor_spec=TensorSpec((input_channels, ) + input_size), name=name) assert isinstance(conv_layer_params, tuple) assert len(conv_layer_params) > 0 self._flatten_output = flatten_output self._conv_layer_params = conv_layer_params self._conv_layers = nn.ModuleList() for paras in conv_layer_params: filters, kernel_size, strides = paras[:3] padding = paras[3] if len(paras) > 3 else 0 if same_padding: # overwrite paddings kernel_size = common.tuplify2d(kernel_size) padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) self._conv_layers.append( layers.Conv2D(input_channels, filters, kernel_size, activation=activation, kernel_initializer=kernel_initializer, strides=strides, padding=padding)) input_channels = filters
def __init__(self, input_size, n, transconv_layer_params, start_decoding_size, start_decoding_channels, same_padding=False, preprocess_fc_layer_params=None, activation=torch.relu_, kernel_initializer=None, output_activation=torch.tanh, name="ImageDecodingNetwork"): """ Args: input_size (int): the size of the input latent vector n (int): number of parallel networks transconv_layer_params (tuple[tuple]): a non-empty tuple of tuple (num_filters, kernel_size, strides, padding), where ``padding`` is optional. start_decoding_size (int or tuple): the initial height and width we'd like to have for the feature map start_decoding_channels (int): the initial number of channels we'd like to have for the feature map. Note that we always first project an input latent vector into a vector of an appropriate length so that it can be reshaped into (``start_decoding_channels``, ``start_decoding_height``, ``start_decoding_width``). same_padding (bool): similar to TF's conv2d ``same`` padding mode. If True, the user provided paddings in ``transconv_layer_params`` will be replaced by automatically calculated ones; if False, it corresponds to TF's ``valid`` padding mode (the user can still provide custom paddings though). preprocess_fc_layer_params (tuple[int]): a tuple of fc layer units. These fc layers are used for preprocessing the latent vector before transposed convolutions. activation (nn.functional): activation for hidden layers kernel_initializer (Callable): initializer for all the layers. output_activation (nn.functional): activation for the output layer. Usually our image inputs are normalized to [0, 1] or [-1, 1], so this function should be ``torch.sigmoid`` or ``torch.tanh``. name (str): """ super().__init__(input_tensor_spec=TensorSpec((input_size, )), name=name) assert isinstance(transconv_layer_params, tuple) assert len(transconv_layer_params) > 0 self._preprocess_fc_layers = nn.ModuleList() if preprocess_fc_layer_params is not None: for size in preprocess_fc_layer_params: self._preprocess_fc_layers.append( layers.ParallelFC(input_size, size, n, activation=activation, kernel_initializer=kernel_initializer)) input_size = size start_decoding_size = common.tuplify2d(start_decoding_size) # pytorch assumes "channels_first" ! self._start_decoding_shape = [ start_decoding_channels, start_decoding_size[0], start_decoding_size[1] ] self._preprocess_fc_layers.append( layers.ParallelFC(input_size, np.prod(self._start_decoding_shape), n, activation=activation, kernel_initializer=kernel_initializer)) self._transconv_layer_params = transconv_layer_params self._transconv_layers = nn.ModuleList() in_channels = start_decoding_channels for i, paras in enumerate(transconv_layer_params): filters, kernel_size, strides = paras[:3] padding = paras[3] if len(paras) > 3 else 0 if same_padding: # overwrite paddings kernel_size = common.tuplify2d(kernel_size) padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) act = activation if i == len(transconv_layer_params) - 1: act = output_activation self._transconv_layers.append( layers.ParallelConvTranspose2D( in_channels, filters, kernel_size, n, activation=act, kernel_initializer=kernel_initializer, strides=strides, padding=padding)) in_channels = filters self._n = n
def __init__(self, input_size, transconv_layer_params, start_decoding_size, start_decoding_channels, same_padding=False, preprocess_fc_layer_params=None, activation=torch.relu_, kernel_initializer=None, output_activation=torch.tanh, name="ImageDecodingNetwork"): """ Initialize the layers for decoding a latent vector into an image. Currently there seems no need for this class to handle nested inputs; If necessary, extend the argument list to support it in the future. How to calculate the output size: `<https://pytorch.org/docs/stable/nn.html#torch.nn.ConvTranspose2d>`_:: H = (H1-1) * strides + HF - 2P + OP where H = output size, H1 = input size, HF = size of kernel, P = padding, OP = output_padding (currently hardcoded to be 0 for this class). Regarding padding: in the previous TF version, we have two padding modes: ``valid`` and ``same``. For the former, we always have no padding (P=0); for the latter, it's also called ``half padding`` (P=(HF-1)//2 when strides=1 and HF is an odd number the output has the same size with the input. Currently, PyTorch doesn't support different left and right paddings and P is always (HF-1)//2. So if HF is an even number, the output size will increaseby 1 when strides=1). Args: input_size (int): the size of the input latent vector transconv_layer_params (tuple[tuple]): a non-empty tuple of tuple (num_filters, kernel_size, strides, padding), where ``padding`` is optional. start_decoding_size (int or tuple): the initial height and width we'd like to have for the feature map start_decoding_channels (int): the initial number of channels we'd like to have for the feature map. Note that we always first project an input latent vector into a vector of an appropriate length so that it can be reshaped into (``start_decoding_channels``, ``start_decoding_height``, ``start_decoding_width``). same_padding (bool): similar to TF's conv2d ``same`` padding mode. If True, the user provided paddings in ``transconv_layer_params`` will be replaced by automatically calculated ones; if False, it corresponds to TF's ``valid`` padding mode (the user can still provide custom paddings though). preprocess_fc_layer_params (tuple[int]): a tuple of fc layer units. These fc layers are used for preprocessing the latent vector before transposed convolutions. activation (nn.functional): activation for hidden layers kernel_initializer (Callable): initializer for all the layers. output_activation (nn.functional): activation for the output layer. Usually our image inputs are normalized to [0, 1] or [-1, 1], so this function should be ``torch.sigmoid`` or ``torch.tanh``. name (str): """ super().__init__(input_tensor_spec=TensorSpec((input_size, )), name=name) assert isinstance(transconv_layer_params, tuple) assert len(transconv_layer_params) > 0 self._preprocess_fc_layers = nn.ModuleList() if preprocess_fc_layer_params is not None: for size in preprocess_fc_layer_params: self._preprocess_fc_layers.append( layers.FC(input_size, size, activation=activation, kernel_initializer=kernel_initializer)) input_size = size start_decoding_size = common.tuplify2d(start_decoding_size) # pytorch assumes "channels_first" ! self._start_decoding_shape = [ start_decoding_channels, start_decoding_size[0], start_decoding_size[1] ] self._preprocess_fc_layers.append( layers.FC(input_size, np.prod(self._start_decoding_shape), activation=activation, kernel_initializer=kernel_initializer)) self._transconv_layer_params = transconv_layer_params self._transconv_layers = nn.ModuleList() in_channels = start_decoding_channels for i, paras in enumerate(transconv_layer_params): filters, kernel_size, strides = paras[:3] padding = paras[3] if len(paras) > 3 else 0 if same_padding: # overwrite paddings kernel_size = common.tuplify2d(kernel_size) padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) act = activation if i == len(transconv_layer_params) - 1: act = output_activation self._transconv_layers.append( layers.ConvTranspose2D(in_channels, filters, kernel_size, activation=act, kernel_initializer=kernel_initializer, strides=strides, padding=padding)) in_channels = filters
def __init__(self, in_channels, out_channels, kernel_size, n, activation=torch.relu_, strides=1, padding=0, use_bias=None, use_bn=False, kernel_initializer=None, kernel_init_gain=1.0, bias_init_value=0.0): """A parallel ConvTranspose2D layer that can be used to perform n independent 2D transposed convolutions in parallel. Args: in_channels (int): channels of the input image out_channels (int): channels of the output image kernel_size (int or tuple): n (int): n independent ``ConvTranspose2D`` layers activation (torch.nn.functional): strides (int or tuple): padding (int or tuple): use_bias (bool|None): If None, will use ``not use_bn`` use_bn (bool): kernel_initializer (Callable): initializer for the conv_trans layer. If None is provided a ``variance_scaling_initializer`` with gain as ``kernel_init_gain`` will be used. kernel_init_gain (float): a scaling factor (gain) applied to the std of kernel init distribution. It will be ignored if ``kernel_initializer`` is not None. bias_init_value (float): a constant """ super(ParallelConvTranspose2D, self).__init__() if use_bias is None: use_bias = not use_bn self._activation = activation self._n = n self._in_channels = in_channels self._out_channels = out_channels self._kernel_size = common.tuplify2d(kernel_size) self._conv_trans2d = nn.ConvTranspose2d(in_channels * n, out_channels * n, kernel_size, groups=n, stride=strides, padding=padding, bias=use_bias) for i in range(n): if kernel_initializer is None: variance_scaling_init( self._conv_trans2d.weight.data[i * in_channels:(i + 1) * in_channels], gain=kernel_init_gain, nonlinearity=self._activation) else: kernel_initializer( self._conv_trans2d.weight.data[i * in_channels:(i + 1) * in_channels]) # [n*C, C', kernel_size, kernel_size]->[n, C, C', kernel_size, kernel_size] self._weight = self._conv_trans2d.weight.view(self._n, self._in_channels, self._out_channels, self._kernel_size[0], self._kernel_size[1]) if use_bias: nn.init.constant_(self._conv_trans2d.bias.data, bias_init_value) # [n*C]->[n, C] self._bias = self._conv_trans2d.bias.view(self._n, self._out_channels) else: self._bias = None if use_bn: self._bn = nn.BatchNorm2d(n * out_channels) else: self._bn = None
def __init__(self, input_channels, input_size, conv_layer_params, same_padding=False, activation=torch.relu_, use_bias=False, flatten_output=False, name="ParamConvNet"): """A fully 2D conv network that does not maintain its own network parameters, but accepts them from users. If the given parameter tensor has an extra batch dimension (first dimension), it performs parallel operations. Args: input_channels (int): number of channels in the input image input_size (int or tuple): the input image size (height, width) conv_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format ``(filters, kernel_size, strides, padding, pooling_kernel)``, where ``padding`` and ``pooling_kernel`` are optional. same_padding (bool): similar to TF's conv2d ``same`` padding mode. If True, the user provided paddings in `conv_layer_params` will be replaced by automatically calculated ones; if False, it corresponds to TF's ``valid`` padding mode (the user can still provide custom paddings though) activation (torch.nn.functional): activation for all the layers use_bias (bool): whether use bias flatten_output (bool): If False, the output will be an image structure of shape ``(B, n, C, H, W)``; otherwise the output will be flattened into a feature of shape ``(B, n, C*H*W)``. name (str): """ input_size = common.tuplify2d(input_size) super().__init__(input_tensor_spec=TensorSpec((input_channels, ) + input_size), name=name) assert isinstance(conv_layer_params, tuple) assert len(conv_layer_params) > 0 self._flatten_output = flatten_output self._conv_layer_params = conv_layer_params self._conv_layers = nn.ModuleList() self._param_length = None for paras in conv_layer_params: filters, kernel_size, strides = paras[:3] padding = paras[3] if len(paras) > 3 else 0 pooling_kernel = paras[4] if len(paras) > 4 else None if same_padding: # overwrite paddings kernel_size = common.tuplify2d(kernel_size) padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) self._conv_layers.append( ParamConv2D(input_channels, filters, kernel_size, activation=activation, strides=strides, pooling_kernel=pooling_kernel, padding=padding, use_bias=use_bias)) input_channels = filters