Example #1
0
    def __init__(self,
                 in_planes,
                 planes,
                 stride=1,
                 batch_norm=True,
                 nl="relu",
                 no_fnl=False):
        super(Bottleneck, self).__init__()

        # normal block-layers
        self.block_layer1 = nn.Sequential(
            nn.Conv2d(in_planes,
                      planes,
                      kernel_size=1,
                      bias=False if batch_norm else True),
            nn.BatchNorm2d(planes) if batch_norm else modules.Identity(),
            nn.ReLU() if nl == "relu" else nn.LeakyReLU())
        self.block_layer2 = nn.Sequential(
            nn.Conv2d(planes,
                      planes,
                      kernel_size=3,
                      stride=stride,
                      padding=1,
                      bias=False if batch_norm else True),
            nn.BatchNorm2d(planes) if batch_norm else modules.Identity(),
            nn.ReLU() if nl == "relu" else nn.LeakyReLU())
        self.block_layer3 = nn.Sequential(
            nn.Conv2d(planes,
                      self.expansion * planes,
                      kernel_size=1,
                      bias=False if batch_norm else True),
            nn.BatchNorm2d(self.expansion *
                           planes) if batch_norm else modules.Identity())

        # shortcut block-layer
        self.shortcut = modules.Identity()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes,
                          self.expansion * planes,
                          kernel_size=1,
                          stride=stride,
                          bias=False if batch_norm else True),
                nn.BatchNorm2d(self.expansion *
                               planes) if batch_norm else True)

        # final non-linearity
        self.nl = (nn.ReLU() if nl == "relu" else
                   nn.LeakyReLU()) if not no_fnl else modules.Identity()
Example #2
0
    def __init__(self,
                 in_planes,
                 out_planes,
                 block=BasicBlock,
                 num_blocks=2,
                 stride=1,
                 drop=0,
                 batch_norm=True,
                 nl="relu",
                 no_fnl=False):

        ## NOTE: should [no_fnl] be changed so that also no batch_norm is applied?? ##

        # Set configurations
        super().__init__()
        self.num_blocks = num_blocks
        self.in_planes = in_planes
        self.out_planes = out_planes * block.expansion

        # Create layer
        self.dropout = nn.Dropout2d(drop)
        for block_id in range(num_blocks):
            # -first block has given stride, later blocks have stride 1
            new_block = block(in_planes,
                              out_planes,
                              stride=stride if block_id == 0 else 1,
                              batch_norm=batch_norm,
                              nl=nl,
                              no_fnl=True if block_id == (num_blocks -
                                                          1) else False)
            setattr(self, "block{}".format(block_id + 1), new_block)
            in_planes = out_planes * block.expansion
        # self.bn = nn.BatchNorm2d(out_planes * block.expansion) if batch_norm else utils.Identity()
        self.nl = (nn.ReLU() if nl == "relu" else
                   nn.LeakyReLU()) if not no_fnl else modules.Identity()
 def __init__(self,
              in_size,
              out_size,
              nl=nn.ReLU(),
              drop=0.,
              bias=True,
              excitability=False,
              excit_buffer=False,
              batch_norm=False,
              gated=False):
     super().__init__()
     if drop > 0:
         self.dropout = nn.Dropout(drop)
     self.linear = em.LinearExcitability(in_size,
                                         out_size,
                                         bias=False if batch_norm else bias,
                                         excitability=excitability,
                                         excit_buffer=excit_buffer)
     if batch_norm:
         self.bn = nn.BatchNorm1d(out_size)
     if gated:
         self.gate = nn.Linear(in_size, out_size)
         self.sigmoid = nn.Sigmoid()
     if isinstance(nl, nn.Module):
         self.nl = nl
     elif not nl == "none":
         self.nl = nn.ReLU() if nl == "relu" else (
             nn.LeakyReLU() if nl == "leakyrelu" else modules.Identity())
Example #4
0
 def __init__(self,
              in_planes,
              out_planes,
              kernel_size=3,
              stride=1,
              padding=1,
              drop=0,
              batch_norm=False,
              nl=nn.ReLU(),
              bias=True,
              gated=False):
     super().__init__()
     if drop > 0:
         self.dropout = nn.Dropout2d(drop)
     self.conv = nn.Conv2d(in_planes,
                           out_planes,
                           stride=stride,
                           kernel_size=kernel_size,
                           padding=padding,
                           bias=bias)
     if batch_norm:
         self.bn = nn.BatchNorm2d(out_planes)
     if gated:
         self.gate = nn.Conv2d(in_planes,
                               out_planes,
                               stride=stride,
                               kernel_size=kernel_size,
                               padding=padding,
                               bias=False)
         self.sigmoid = nn.Sigmoid()
     if isinstance(nl, nn.Module):
         self.nl = nl
     elif not nl == "none":
         self.nl = nn.ReLU() if nl == "relu" else (
             nn.LeakyReLU() if nl == "leakyrelu" else modules.Identity())
 def __init__(self,
              in_size,
              out_size,
              nl=nn.ReLU(),
              drop=0.,
              bias=True,
              excitability=False,
              excit_buffer=False,
              batch_norm=False,
              gate_size=0,
              gating_prop=0.8,
              device='cuda'):
     super().__init__()
     if drop > 0:
         self.dropout = nn.Dropout(drop)
     self.linear = em.LinearExcitability(in_size,
                                         out_size,
                                         bias=False if batch_norm else bias,
                                         excitability=excitability,
                                         excit_buffer=excit_buffer)
     if batch_norm:
         self.bn = nn.BatchNorm1d(out_size)
     if gate_size > 0:
         self.gate_mask = torch.tensor(np.random.choice(
             [0., 1.],
             size=(gate_size, out_size),
             p=[gating_prop, 1. - gating_prop]),
                                       dtype=torch.float,
                                       device=device)
     if isinstance(nl, nn.Module):
         self.nl = nl
     elif not nl == "none":
         self.nl = nn.ReLU() if nl == "relu" else (
             nn.LeakyReLU() if nl == "leakyrelu" else modules.Identity())
    def __init__(self,
                 in_planes,
                 planes,
                 stride=1,
                 batch_norm=True,
                 nl="relu",
                 no_fnl=False,
                 smaller_kernel=False):
        super(DeconvBlock, self).__init__()

        # normal block-layers
        self.block_layer1 = nn.Sequential(
            nn.ConvTranspose2d(
                in_planes,
                planes,
                stride=stride,
                bias=False if batch_norm else True,
                kernel_size=(2 if smaller_kernel else 4) if stride == 2 else 3,
                padding=0 if (stride == 2 and smaller_kernel) else 1),
            nn.BatchNorm2d(planes) if batch_norm else modules.Identity(),
            nn.ReLU() if nl == "relu" else nn.LeakyReLU())
        self.block_layer2 = nn.Sequential(
            nn.ConvTranspose2d(planes,
                               self.expansion * planes,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=False if batch_norm else True),
            nn.BatchNorm2d(self.expansion *
                           planes) if batch_norm else modules.Identity())

        # shortcut block-layer
        self.shortcut = modules.Identity()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.ConvTranspose2d(in_planes,
                                   self.expansion * planes,
                                   kernel_size=1,
                                   stride=stride,
                                   output_padding=0 if stride == 1 else 1,
                                   bias=False if batch_norm else True),
                nn.BatchNorm2d(self.expansion *
                               planes) if batch_norm else modules.Identity())

        # final non-linearity
        self.nl = (nn.ReLU() if nl == "relu" else
                   nn.LeakyReLU()) if not no_fnl else modules.Identity()
    def __init__(self,
                 in_planes,
                 out_planes,
                 block=DeconvBlock,
                 num_blocks=2,
                 stride=1,
                 drop=0,
                 batch_norm=True,
                 nl="relu",
                 smaller_kernel=False,
                 output="normal"):

        ## NOTE: should [output=="none"] be changed so that also no batch_norm is applied?? ##

        # Set configurations
        super().__init__()
        self.num_blocks = num_blocks
        self.in_planes = in_planes
        self.out_planes = out_planes * block.expansion

        # Create layer
        self.dropout = nn.Dropout2d(drop)
        for block_id in range(num_blocks):
            # -first block has given stride, later blocks have stride 1
            new_block = block(in_planes,
                              out_planes,
                              stride=stride if block_id == 0 else 1,
                              batch_norm=batch_norm,
                              nl=nl,
                              no_fnl=True if block_id == (num_blocks -
                                                          1) else False,
                              smaller_kernel=smaller_kernel)
            setattr(self, "block{}".format(block_id + 1), new_block)
            in_planes = out_planes * block.expansion
        # self.bn = nn.BatchNorm2d(out_planes * block.expansion) if batch_norm else utils.Identity()
        if output == "sigmoid":
            self.nl = nn.Sigmoid()
        elif output == "normal":
            self.nl = nn.ReLU() if nl == "relu" else nn.LeakyReLU()
        elif output == "none":
            self.nl = modules.Identity()
        else:
            raise NotImplementedError(
                "Ouptut '{}' not implemented for deconvolutional ResNet layer."
                .format(output))
 def __init__(self,
              input_channels,
              output_channels,
              stride=1,
              drop=0,
              batch_norm=True,
              nl="relu",
              bias=True,
              gated=False,
              smaller_kernel=False):
     super().__init__()
     if drop > 0:
         self.dropout = nn.Dropout2d(drop)
     self.deconv = nn.ConvTranspose2d(
         input_channels,
         output_channels,
         bias=bias,
         stride=stride,
         kernel_size=(2 if smaller_kernel else 4) if stride == 2 else 3,
         padding=0 if (stride == 2 and smaller_kernel) else 1)
     if batch_norm:
         self.bn = nn.BatchNorm2d(output_channels)
     if gated:
         self.gate = nn.ConvTranspose2d(
             input_channels,
             output_channels,
             bias=False,
             stride=stride,
             kernel_size=(2 if smaller_kernel else 4) if stride == 2 else 3,
             padding=0 if (stride == 2 and smaller_kernel) else 1)
         self.sigmoid = nn.Sigmoid()
     if isinstance(nl, nn.Module):
         self.nl = nl
     elif nl in ("sigmoid", "hardtanh"):
         self.nl = nn.Sigmoid() if nl == "sigmoid" else nn.Hardtanh(
             min_val=-4.5, max_val=0)
     elif not nl == "none":
         self.nl = nn.ReLU() if nl == "relu" else (
             nn.LeakyReLU() if nl == "leakyrelu" else modules.Identity())
    def __init__(self,
                 conv_type="standard",
                 block_type="basic",
                 num_blocks=2,
                 image_channels=3,
                 depth=5,
                 start_channels=16,
                 reducing_layers=None,
                 batch_norm=True,
                 nl="relu",
                 output="normal",
                 global_pooling=False,
                 gated=False):
        '''Initialize stacked convolutional layers (either "standard" or "res-net" ones--1st layer is always standard).

        [conv_type]         <str> type of conv-layers to be used: [standard|resnet]
        [block_type]        <str> block-type to be used: [basic|bottleneck] (only relevant if [type]=resNet)
        [num_blocks]        <int> or <list> (with len=[depth]-1) of # blocks in each layer
        [image_channels]    <int> # channels of input image to encode
        [depth]             <int> # layers
        [start_channels]    <int> # channels in 1st layer, doubled in every "rl" (=reducing layer)
        [reducing_layers]   <int> # layers in which image-size is halved & # channels doubled (default=[depth]-1)
                                      ("rl"'s are the last conv-layers; in 1st layer # channels cannot double)
        [batch_norm]        <bool> whether to use batch-norm after each convolution-operation
        [nl]                <str> non-linearity to be used: [relu|leakyrelu]
        [output]            <str>  if - "normal", final layer is same as all others
                                      - "none", final layer has no batchnorm or non-linearity
        [global_pooling]    <bool> whether to include global average pooling layer at very end
        [gated]             <bool> whether conv-layers should be gated (not implemented for ResNet-layers)'''

        # Process type and number of blocks
        conv_type = "standard" if depth < 2 else conv_type
        if conv_type == "resNet":
            num_blocks = [num_blocks] * (depth - 1) if type(
                num_blocks) == int else num_blocks
            assert len(num_blocks) == (depth - 1)
            block = conv_layers.Bottleneck if block_type == "bottleneck" else conv_layers.BasicBlock

        # Prepare label
        type_label = "C" if conv_type == "standard" else "R{}".format(
            "b" if block_type == "bottleneck" else "")
        channel_label = "{}-{}x{}".format(image_channels, depth,
                                          start_channels)
        block_label = "-{}".format(num_blocks) if conv_type == "resNet" else ""
        nd_label = "{bn}{nl}{gp}{gate}{out}".format(
            bn="b" if batch_norm else "",
            nl="l" if nl == "leakyrelu" else "",
            gp="p" if global_pooling else "",
            gate="g" if gated else "",
            out="n" if output == "none" else "")
        nd_label = "" if nd_label == "" else "-{}".format(nd_label)

        # Set configurations
        super().__init__()
        self.depth = depth
        self.rl = depth - 1 if (reducing_layers is None) else (
            reducing_layers if (depth + 1) > reducing_layers else depth)
        rl_label = "" if self.rl == (self.depth -
                                     1) else "-rl{}".format(self.rl)
        self.label = "{}{}{}{}{}".format(type_label, channel_label,
                                         block_label, rl_label, nd_label)
        self.block_expansion = block.expansion if conv_type == "resNet" else 1
        # -> constant by which # of output channels of each block is multiplied (if >1, it creates "bottleneck"-effect)
        double_factor = self.rl if self.rl < depth else depth - 1  # -> how often # start-channels is doubled
        self.out_channels = (
            start_channels * 2**double_factor
        ) * self.block_expansion if depth > 0 else image_channels
        # -> number channels in last layer (as seen from image)
        self.start_channels = start_channels  # -> number channels in 1st layer (doubled in every "reducing layer")
        self.global_pooling = global_pooling  # -> whether or not average global pooling layer should be added at end

        # Conv-layers
        output_channels = start_channels
        for layer_id in range(1, depth + 1):
            # should this layer down-sample? --> last [self.rl] layers should be down-sample layers
            reducing = True if (layer_id > (depth - self.rl)) else False
            # calculate number of this layer's input and output channels
            input_channels = image_channels if layer_id == 1 else output_channels * self.block_expansion
            output_channels = output_channels * 2 if (
                reducing and not layer_id == 1) else output_channels
            # define and set the convolutional-layer
            if conv_type == "standard" or layer_id == 1:
                conv_layer = conv_layers.conv_layer(
                    input_channels,
                    output_channels,
                    stride=2 if reducing else 1,
                    drop=0,
                    nl="no" if output == "none" and layer_id == depth else nl,
                    batch_norm=False
                    if output == "none" and layer_id == depth else batch_norm,
                    gated=False
                    if output == "none" and layer_id == depth else gated)
            else:
                conv_layer = conv_layers.res_layer(
                    input_channels,
                    output_channels,
                    block=block,
                    num_blocks=num_blocks[layer_id - 2],
                    stride=2 if reducing else 1,
                    drop=0,
                    batch_norm=batch_norm,
                    nl=nl,
                    no_fnl=True
                    if output == "none" and layer_id == depth else False)
            setattr(self, 'convLayer{}'.format(layer_id), conv_layer)
        # Perform pooling (if requested)
        self.pooling = nn.AdaptiveAvgPool2d(
            (1, 1)) if global_pooling else modules.Identity()
    def __init__(self,
                 input_size=1000,
                 output_size=10,
                 layers=2,
                 hid_size=1000,
                 hid_smooth=None,
                 size_per_layer=None,
                 drop=0,
                 batch_norm=True,
                 nl="relu",
                 bias=True,
                 excitability=False,
                 excit_buffer=False,
                 gate_size=0,
                 gating_prop=0.,
                 final_gate=False,
                 output='normal',
                 device='cuda'):
        '''sizes: 0th=[input], 1st=[hid_size], ..., 1st-to-last=[hid_smooth], last=[output].
        [input_size]       # of inputs
        [output_size]      # of units in final layer
        [layers]           # of layers
        [hid_size]         # of units in each hidden layer
        [hid_smooth]       if None, all hidden layers have [hid_size] units, else # of units linearly in-/decreases s.t.
                             final hidden layer has [hid_smooth] units (if only 1 hidden layer, it has [hid_size] units)
        [size_per_layer]   None or <list> with for each layer number of units (1st element = number of inputs)
                                --> overwrites [input_size], [output_size], [layers], [hid_size] and [hid_smooth]
        [drop]             % of each layer's inputs that is randomly set to zero during training
        [batch_norm]       <bool>; if True, batch-normalization is applied to each layer
        [nl]               <str>; type of non-linearity to be used (options: "relu", "leakyrelu", "none")
        [gate_size]        <int>; if>0, each linear layer has gate controlled by separate inputs of size [gate_size]
        [gating_prop]      <float>; probability for each unit to be gated
        [final_gate]       <bool>; whether final layer is allowed to have a gate
        [output]           <str>; if - "normal", final layer is same as all others
                                     - "none", final layer has no non-linearity
                                     - "sigmoid", final layer has sigmoid non-linearity'''

        super().__init__()
        self.output = output

        # get sizes of all layers
        if size_per_layer is None:
            hidden_sizes = []
            if layers > 1:
                if (hid_smooth is not None):
                    hidden_sizes = [
                        int(x) for x in np.linspace(
                            hid_size, hid_smooth, num=layers - 1)
                    ]
                else:
                    hidden_sizes = [
                        int(x) for x in np.repeat(hid_size, layers - 1)
                    ]
            size_per_layer = [input_size] + hidden_sizes + [
                output_size
            ] if layers > 0 else [input_size]
        self.layers = len(size_per_layer) - 1

        # set label for this module
        # -determine "non-default options"-label
        nd_label = "{drop}{bias}{exc}{bn}{nl}{gate}".format(
            drop="" if drop == 0 else "d{}".format(drop),
            bias="" if bias else "n",
            exc="e" if excitability else "",
            bn="b" if batch_norm else "",
            nl="l" if nl == "leakyrelu" else "",
            gate="g{}m{}".format(gate_size, gating_prop) if
            (gate_size > 0 and gating_prop > 0.) else "",
        )
        nd_label = "{}{}".format(
            "" if nd_label == "" else "-{}".format(nd_label),
            "" if output == "normal" else "-{}".format(output))
        # -set label
        size_statement = ""
        for i in size_per_layer:
            size_statement += "{}{}".format(
                "-" if size_statement == "" else "x", i)
        self.label = "F{}{}".format(size_statement,
                                    nd_label) if self.layers > 0 else ""

        # set layers
        for lay_id in range(1, self.layers + 1):
            # number of units of this layer's input and output
            in_size = size_per_layer[lay_id - 1]
            out_size = size_per_layer[lay_id]
            # define and set the fully connected layer
            if (not gate_size > 0.) or (not gating_prop > 0.) or (
                    lay_id == self.layers and not final_gate):
                layer = fc_layer(
                    in_size,
                    out_size,
                    bias=bias,
                    excitability=excitability,
                    excit_buffer=excit_buffer,
                    batch_norm=False if
                    (lay_id == self.layers
                     and not output == "normal") else batch_norm,
                    nl=("none" if output == "none" else nn.Sigmoid()) if
                    (lay_id == self.layers and not output == "normal") else nl,
                    drop=drop if lay_id > 1 else 0.,
                )
            else:
                layer = fc_layer_fixed_gates(
                    in_size,
                    out_size,
                    bias=bias,
                    excitability=excitability,
                    excit_buffer=excit_buffer,
                    batch_norm=False if
                    (lay_id == self.layers
                     and not output == "normal") else batch_norm,
                    gate_size=gate_size,
                    gating_prop=gating_prop,
                    device=device,
                    nl=("none" if output == "none" else nn.Sigmoid()) if
                    (lay_id == self.layers and not output == "normal") else nl,
                    drop=drop if lay_id > 1 else 0.,
                )
            setattr(self, 'fcLayer{}'.format(lay_id), layer)

        # if no layers, add "identity"-module to indicate in this module's representation nothing happens
        if self.layers < 1:
            self.noLayers = modules.Identity()
Example #11
0
    def __init__(
            self,
            image_size,
            image_channels,
            classes,
            target_name,
            only_active=False,
            # -conv-layers
            conv_type="standard",
            depth=5,
            start_channels=16,
            reducing_layers=4,
            conv_bn=True,
            conv_nl="relu",
            num_blocks=2,
            global_pooling=False,
            no_fnl=True,
            conv_gated=False,
            # -fc-layers
            fc_layers=3,
            fc_units=2000,
            h_dim=2000,
            fc_drop=0,
            fc_bn=False,
            fc_nl="relu",
            excit_buffer=False,
            fc_gated=False,
            # -prior
            prior="GMM",
            z_dim=100,
            per_class=True,
            n_modes=1,
            # -decoder
            recon_loss='MSEnorm',
            dg_gates=True,
            dg_prop=0.5,
            device='cpu',
            # -training-specific settings (can be changed after setting up model)
            lamda_pl=1.,
            lamda_rcl=1.,
            lamda_vl=1.,
            **kwargs):

        # Set configurations for setting up the model
        super().__init__()
        self.target_name = target_name
        self.label = "BIR"
        self.image_size = image_size
        self.image_channels = image_channels
        self.classes = classes
        self.fc_layers = fc_layers
        self.z_dim = z_dim
        self.h_dim = h_dim
        self.fc_units = fc_units
        self.fc_drop = fc_drop
        self.depth = depth

        # whether always all classes can be predicted or only those seen so far
        self.only_active = only_active
        self.active_classes = []

        # -type of loss to be used for reconstruction
        self.recon_loss = recon_loss  # options: BCE|MSE|MSEnorm
        self.network_output = "sigmoid" if self.recon_loss in (
            "MSE", "BCE") else "none"
        # -settings for class-specific gates in fully-connected hidden layers of decoder
        self.dg_prop = dg_prop
        self.dg_gates = dg_gates if dg_prop > 0. else False
        self.gate_size = classes if self.dg_gates else 0

        # Prior-related parameters
        self.prior = prior
        self.per_class = per_class
        self.n_modes = n_modes * classes if self.per_class else n_modes
        self.modes_per_class = n_modes if self.per_class else None

        # Components deciding how to train / run the model (i.e., these can be changed after setting up the model)
        # -options for prediction loss
        self.lamda_pl = lamda_pl  # weight of classification-loss
        # -how to compute the loss function?
        self.lamda_rcl = lamda_rcl  # weight of reconstruction-loss
        self.lamda_vl = lamda_vl  # weight of variational loss

        # Check whether there is at least 1 fc-layer
        if fc_layers < 1:
            raise ValueError("VAE cannot have 0 fully-connected layers!")

        ######------SPECIFY MODEL------######

        ##>----Encoder (= q[z|x])----<##
        self.convE = ConvLayers(conv_type=conv_type,
                                block_type="basic",
                                num_blocks=num_blocks,
                                image_channels=image_channels,
                                depth=self.depth,
                                start_channels=start_channels,
                                reducing_layers=reducing_layers,
                                batch_norm=conv_bn,
                                nl=conv_nl,
                                output="none" if no_fnl else "normal",
                                global_pooling=global_pooling,
                                gated=conv_gated)
        self.flatten = modules.Flatten()
        #------------------------------calculate input/output-sizes--------------------------------#
        self.conv_out_units = self.convE.out_units(image_size)
        self.conv_out_size = self.convE.out_size(image_size)
        self.conv_out_channels = self.convE.out_channels
        if fc_layers < 2:
            self.fc_layer_sizes = [
                self.conv_out_units
            ]  #--> this results in self.fcE = modules.Identity()
        elif fc_layers == 2:
            self.fc_layer_sizes = [self.conv_out_units, h_dim]
        else:
            self.fc_layer_sizes = [self.conv_out_units] + [
                int(x) for x in np.linspace(fc_units, h_dim, num=fc_layers - 1)
            ]
        real_h_dim = h_dim if fc_layers > 1 else self.conv_out_units
        #------------------------------------------------------------------------------------------#
        self.fcE = MLP(size_per_layer=self.fc_layer_sizes,
                       drop=fc_drop,
                       batch_norm=fc_bn,
                       nl=fc_nl,
                       excit_buffer=excit_buffer,
                       gated=fc_gated)
        # to z
        self.toZ = fc_layer_split(real_h_dim,
                                  z_dim,
                                  nl_mean='none',
                                  nl_logvar='none')  #, drop=fc_drop)

        ##>----Classifier----<##
        self.units_before_classifier = real_h_dim
        self.classifier = fc_layer(self.units_before_classifier,
                                   classes,
                                   excit_buffer=True,
                                   nl='none')

        ##>----Decoder (= p[x|z])----<##
        out_nl = True if fc_layers > 1 else (True if (
            self.depth > 0 and not no_fnl) else False)
        real_h_dim_down = h_dim if fc_layers > 1 else self.convE.out_units(
            image_size, ignore_gp=True)
        if self.dg_gates:
            self.fromZ = fc_layer_fixed_gates(
                z_dim,
                real_h_dim_down,
                batch_norm=(out_nl and fc_bn),
                nl=fc_nl if out_nl else "none",
                gate_size=self.gate_size,
                gating_prop=dg_prop,
            )
        else:
            self.fromZ = fc_layer(z_dim,
                                  real_h_dim_down,
                                  batch_norm=(out_nl and fc_bn),
                                  nl=fc_nl if out_nl else "none")
        fc_layer_sizes_down = self.fc_layer_sizes
        fc_layer_sizes_down[0] = self.convE.out_units(image_size,
                                                      ignore_gp=True)
        # -> if 'gp' is used in forward pass, size of first/final hidden layer differs between forward and backward pass
        if self.dg_gates:
            self.fcD = MLP_gates(
                size_per_layer=[x for x in reversed(fc_layer_sizes_down)],
                drop=fc_drop,
                batch_norm=fc_bn,
                nl=fc_nl,
                gate_size=self.gate_size,
                gating_prop=dg_prop,
                device=device,
                output=self.network_output,
            )
        else:
            self.fcD = MLP(
                size_per_layer=[x for x in reversed(fc_layer_sizes_down)],
                drop=fc_drop,
                batch_norm=fc_bn,
                nl=fc_nl,
                gated=fc_gated,
                output=self.network_output,
            )
        # to image-shape
        self.to_image = modules.Reshape(image_channels=self.convE.out_channels
                                        if self.depth > 0 else image_channels)
        # through deconv-layers
        self.convD = modules.Identity()

        ##>----Prior----<##
        # -if using the GMM-prior, add its parameters
        if self.prior == "GMM":
            # -create
            self.z_class_means = nn.Parameter(
                torch.Tensor(self.n_modes, self.z_dim))
            self.z_class_logvars = nn.Parameter(
                torch.Tensor(self.n_modes, self.z_dim))
            # -initialize
            self.z_class_means.data.normal_()
            self.z_class_logvars.data.normal_()