def __init__(self, use_bias, alg, **kwargs):
     super(ConvAct, self).__init__(**kwargs)
     self.conv0 = nn.Conv2D(channels=64, kernel_size=(3, 3), strides=1, use_bias=use_bias)
     if alg == "relu6":
       self.act = RELU6()
     elif alg == "leakyrelu":
       self.act = nn.LeakyReLU(0.25)
     elif alg == "gelu":
       self.act = nn.GELU()
     else:
       self.act = nn.Activation(activation = alg)
 def __init__(self, alg, use_bias, **kwargs):
     super(ConvBNAct, self).__init__(**kwargs)
     self.conv0 = nn.Conv2D(channels=64, kernel_size=(3, 3), strides=1, use_bias=use_bias)
     self.bn = nn.BatchNorm()
     if alg == "relu6":
       self.act = RELU6()
     elif alg == "leakyrelu":
       self.act = nn.LeakyReLU(0.25)
     elif alg == "gelu":
       self.act = nn.GELU()
     elif alg == "gelu_tanh":
       self.act = nn.GELU(approximation='tanh')
     else:
       self.act = nn.Activation(activation = alg)
 def __init__(self, alg, use_bias, **kwargs):
     super(ConvBNSumAct, self).__init__(**kwargs)
     self.conv0 = nn.Conv2D(channels=64, kernel_size=(3, 3), strides=1, use_bias=use_bias)
     self.conv1 = nn.Conv2D(channels=64, kernel_size=(3, 3), strides=1)
     self.conv1.share_parameters(self.conv0.collect_params())
     self.bn = nn.BatchNorm()
     if alg == "relu6":
       self.act = RELU6()
     elif alg == "leakyrelu":
       self.act = nn.LeakyReLU(0.25)
     elif alg == "gelu":
       self.act = nn.GELU()
     else:
       self.act = nn.Activation(activation = alg)
 def __init__(self, use_bias, alg, **kwargs):
     super(ConvActAdd, self).__init__(**kwargs)
     self.conv0 = nn.Conv2D(channels=64, kernel_size=(3, 3), strides=1, use_bias=use_bias,
                            weight_initializer=mx.init.Xavier(magnitude=2.24))
     if alg == "relu6":
       self.act = RELU6()
     elif alg == "leakyrelu":
       self.act = nn.LeakyReLU(0.25)
     elif alg == "gelu":
       self.act = nn.GELU()
     else:
       self.act = nn.Activation(activation = alg)
     self.conv1 = nn.Conv2D(channels=64, kernel_size=(3, 3), strides=1, use_bias=use_bias)
     self.conv1.share_parameters(self.conv0.collect_params())