def __init__(self, c_in, c_out, k_size, stride, pad, initializer='kaiming'): super(equalized_conv2d, self).__init__() self.conv = nn.Conv2d(c_in, c_out, k_size, stride, pad, bias=True) #偏置影响不大 if initializer == 'kaiming': torch.nn.init.kaiming_normal_(self.conv.weight, a=calculate_gain('conv2d')) # a is the negative slope of the rectifier used after this layer elif initializer == 'xavier': torch.nn.init.xavier_normal_(self.conv.weight) #conv_w=self.conv.weight.data.clone() #self.bias=torch.nn.Parameter(torch.Tensor(c_out).fill_(0)) self.scale = (torch.mean( self.conv.weight.data**2))**0.5 #将scale分散至权重及输入中加速收敛 self.conv.weight.data.copy_(self.conv.weight.data / self.scale) #把权重训练词向量copy进去#权重归一化
def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. Notes ----- The fc parameters are initialized using Glorot uniform initialization and the bias is initialized to be zero. The mu weight is initialized using normal distribution and inv_sigma is initialized with constant value 1.0. """ gain = init.calculate_gain('relu') init.xavier_normal_(self.fc.weight, gain=gain) if isinstance(self.res_fc, nn.Linear): init.xavier_normal_(self.res_fc.weight, gain=gain) init.normal_(self.mu.data, 0, 0.1) init.constant_(self.inv_sigma.data, 1) if self.bias is not None: init.zeros_(self.bias.data)
def __init__(self): super(Net, self).__init__() nf = 64 self.net = [ nn.Conv2d(3, nf, kernel_size=3, padding=1), nn.ELU(), nn.Conv2d(nf, nf, kernel_size=3, padding=1), nn.ELU(), nn.Conv2d(nf, nf, kernel_size=4, stride=2, padding=1), nn.ELU(), nn.Conv2d(nf, nf, kernel_size=3, padding=1), nn.ELU(), nn.Conv2d(nf, nf, kernel_size=3, padding=1), nn.ELU(), nn.Conv2d(nf * 2, nf * 2, kernel_size=4, stride=2, padding=1), nn.ELU(), nn.Conv2d(nf * 2, nf * 2, kernel_size=3, padding=1), nn.ELU(), nn.Conv2d(nf * 2, nf * 2, kernel_size=3, padding=1), nn.ELU(), nn.Conv2d(nf * 2, 3 * 4**2, kernel_size=3, padding=1), nn.ELU(), nn.PixelShuffle(4), nn.Sigmoid(), #nn.Upsample(scale_factor=2), #nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ELU(), #nn.Upsample(scale_factor=2), #nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ELU(), #nn.Conv2d(32, 3, kernel_size=3, padding=1), ] for idx, module in enumerate(self.net): # initilze convolutional if module.__class__.__name__ == "Conv2d": init.orthogonal(module.weight, init.calculate_gain('relu')) self.add_module(str(idx), module)
def reset_parameters(self) -> None: # Embeddings for name in 'word pos nt action'.split(): embedding = getattr(self, f'{name}_embedding') embedding.reset_parameters() # Encoders for name in 'stack buffer history'.split(): encoder = getattr(self, f'{name}_encoder') encoder.reset_parameters() # Compositions for name in 'fwd bwd'.split(): lstm = getattr(self, f'{name}_composer') for pname, pval in lstm.named_parameters(): if pname.startswith('weight'): init.orthogonal_(pval) else: assert pname.startswith('bias') init.constant_(pval, 0.) # Transformations gain = init.calculate_gain('relu') for name in 'word nt action'.split(): layer = getattr(self, f'{name}2encoder') init.xavier_uniform_(layer[0].weight, gain=gain) init.constant_(layer[0].bias, 1.) init.xavier_uniform_(self.fwdbwd2composed[0].weight, gain=gain) init.constant_(self.fwdbwd2composed[0].bias, 1.) init.xavier_uniform_(self.encoders2summary[1].weight, gain=gain) init.constant_(self.encoders2summary[1].bias, 1.) init.xavier_uniform_(self.summary2actionlogprobs.weight) init.constant_(self.summary2actionlogprobs.bias, 0.) # Guards for name in 'stack buffer history'.split(): guard = getattr(self, f'{name}_guard') init.constant_(guard, 0.)
def __init__(self, inplanes=3, outplanes=1, use_logits=False, logits_per_output=12, debug=False): super(ModelCountception_v2, self).__init__() # params self.inplanes = inplanes self.outplanes = outplanes self.activation = nn.LeakyReLU(0.01) self.final_activation = nn.LeakyReLU(0.3) self.patch_size = 40 self.use_logits = use_logits self.logits_per_output = logits_per_output self.debug = debug torch.LongTensor() self.conv1 = ConvBlock(self.inplanes, 64, ksize=3, pad=self.patch_size, activation=self.activation) self.simple1 = SimpleBlock(64, 16, 16, activation=self.activation) self.simple2 = SimpleBlock(48, 16, 32, activation=self.activation) self.conv2 = ConvBlock(80, 16, ksize=14, activation=self.activation) self.simple3 = SimpleBlock(16, 112, 48, activation=self.activation) self.simple4 = SimpleBlock(208, 64, 32, activation=self.activation) self.simple5 = SimpleBlock(128, 40, 40, activation=self.activation) self.simple6 = SimpleBlock(120, 32, 96, activation=self.activation) self.conv3 = ConvBlock(224, 32, ksize=20, activation=self.activation) self.conv4 = ConvBlock(32, 64, ksize=10, activation=self.activation) self.conv5 = ConvBlock(64, 32, ksize=9, activation=self.activation) if use_logits: self.conv6 = nn.ModuleList([ConvBlock( 64, logits_per_output, ksize=1, activation=self.final_activation) for _ in range(outplanes)]) else: self.conv6 = ConvBlock(32, self.outplanes, ksize=20, pad=1, activation=self.final_activation) # Weight initialization for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): init.xavier_uniform_(m.weight, gain=init.calculate_gain('leaky_relu', param=0.01)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def create_fnet(widths, orthoinit, llbias, bnidx=-1): """ Creates feature-generating network, a multi-layer perceptron. Parameters: widths: list of widths of layers (including input and output widths) orthoinit: whether to use orthogonal weight initialization llbias: whether to use bias in the last layer bnidx: index of batch normalization (-1 if not used) """ fnet_modules = [] for k in range(len(widths) - 2): fnet_modules.append(nn.Linear(widths[k], widths[k + 1])) if orthoinit: init.orthogonal_(fnet_modules[-1].weight, gain=init.calculate_gain('relu')) if bnidx == k: fnet_modules.append(nn.BatchNorm1d(widths[k + 1])) fnet_modules.append(nn.ReLU(True)) fnet_modules.append(nn.Linear(widths[-2], widths[-1], bias=llbias)) if orthoinit: init.orthogonal_(fnet_modules[-1].weight) if bnidx == len(widths) - 1: fnet_modules.append(nn.BatchNorm1d(fnet_modules[-1].weight.size(0))) return nn.Sequential(*fnet_modules)
def __init__(self, c_in, c_out, k_size, stride, pad, initializer='kaiming'): super(equalized_deconv2d, self).__init__() self.deconv = nn.ConvTranspose2d(c_in, c_out, k_size, stride, pad, bias=False) if initializer == 'kaiming': kaiming_normal(self.deconv.weight, a=calculate_gain('conv2d')) elif initializer == 'xavier': xavier_normal(self.deconv.weight) deconv_w = self.deconv.weight.data.clone() self.bias = torch.nn.Parameter(torch.FloatTensor(c_out).fill_(0)) self.scale = (torch.mean(self.deconv.weight.data**2))**0.5 self.deconv.weight.data.copy_(self.deconv.weight.data / self.scale)
def create_fnet(widths, nfeat, nfeato, orthoinit, llbias, dropout=None, batchnorm=False): fnet_modules = [] for k in range(len(widths) - 1): # 循环:Linear + relu等。也就是fc层-与A,fc层后提取出权重 fnet_modules.append(torch.nn.Linear( widths[k], widths[k + 1])) # width[k]和width[k+1],in_fear=3/16,outfea=16/32 if orthoinit: init.orthogonal_(fnet_modules[-1].weight, gain=init.calculate_gain('relu')) if batchnorm: fnet_modules.append(torch.nn.BatchNorm1d(widths[k + 1])) fnet_modules.append(torch.nn.ReLU(True)) if dropout != None and dropout != 0: fnet_modules.append(torch.nn.Dropout(dropout, inplace=False)) fnet_modules.append( torch.nn.Linear(widths[-1], nfeat * nfeato, bias=llbias)) # 完成权重与节点特征fc,进行节点特征更新 if orthoinit: init.orthogonal_(fnet_modules[-1].weight) return torch.nn.Sequential(*fnet_modules)
def __init__(self, in_channels, kernel_size, filter_type, nonlinearity='linear', running_std=False, running_mean=False): assert filter_type in ('spatial', 'channel') assert in_channels >= 1 super(FilterNorm, self).__init__() self.in_channels = in_channels self.filter_type = filter_type self.runing_std = running_std self.runing_mean = running_mean std = calculate_gain(nonlinearity) / kernel_size if running_std: self.std = nn.Parameter(torch.randn(in_channels * kernel_size**2) * std, requires_grad=True) else: self.std = std if running_mean: self.mean = nn.Parameter(torch.randn(in_channels * kernel_size**2), requires_grad=True)
def init_weights(self): init_range = self._init_range init_std = self._gru_init_std self._embedding_layer.weight.data.copy_( torch.from_numpy(self._emb_vector)) unk_n_var = self._embedding_layer.weight.data[1:2 + self._nr_unk + self._var_size, :] init.normal(unk_n_var, 0, 1) unk_n_var /= torch.norm(unk_n_var, p=2, dim=1).unsqueeze( 1) # normalise randomly initialised embeddings # ^^^ init unk * 100 embeddings self._embedding_layer.weight.data[0, :] = 0 if not self._emb_trainable: self._embedding_layer.weight.requires_gard = False # ^^^ size = entities + ph + non-ent-marker # DONE: initialise non-zero locations # TODO: randomise in forward step? gain = init.calculate_gain('tanh') for p in self._recurrent_layer.parameters(): if p.dim() == 1: p.data.normal_(0, init_std) else: init.orthogonal(p.data, gain) for p in self._question_recurrent_layer.parameters(): if p.dim() == 1: p.data.normal_(0, init_std) else: init.orthogonal(p.data, gain) # self._embedding_projection_layer.weight.data.uniform_(-init_range, init_range) self._output_layer.weight.data.uniform_(-init_range, init_range) self._output_layer.bias.data.fill_(0) self._mix_matrix.data.uniform_(-init_range, init_range)
def __init__(self): super(Net, self).__init__() ## TODO: Define all the layers of this CNN, the only requirements are: ## 1. This network takes in a square (same width and height), grayscale image as input ## 2. It ends with a linear layer that represents the keypoints ## it's suggested that you make this last layer output 136 values, 2 for each of the 68 keypoint (x, y) pairs # As an example, you've been given a convolutional layer, which you may (but don't have to) change: # 1 input image channel (grayscale), 32 output channels/feature maps, 5x5 square convolution kernel self.conv1 = nn.Conv2d(1, 32, 5) self.conv1_bn = nn.BatchNorm2d(32) self.pool = nn.MaxPool2d(2,2) self.drop1 = nn.Dropout(p=0.1) self.conv2 = nn.Conv2d(32,48, 5) self.conv2_bn = nn.BatchNorm2d(48) self.drop2 = nn.Dropout(p=0.1) self.conv3 = nn.Conv2d(48,64, 5) self.conv3_bn = nn.BatchNorm2d(64) self.drop3 = nn.Dropout(p=0.2) self.conv4 = nn.Conv2d(64, 96, 5) self.conv4_bn = nn.BatchNorm2d(96) self.drop4 = nn.Dropout(p=0.2) self.fc1 = nn.Linear(9600, 2400) self.fc1_bn = nn.BatchNorm1d(2400) self.fc1_drop = nn.Dropout(p=0.4) self.fc2 = nn.Linear(2400, 612) self.fc2_bn = nn.BatchNorm2d(612) self.fc2_drop = nn.Dropout(p=0.2) self.fc3 = nn.Linear(612, 136) ## Note that among the layers to add, consider including: # maxpooling layers, multiple conv layers, fully-connected layers, and other layers (such as dropout or batch normalization) to avoid overfitting I.xavier_uniform(self.conv1.weight, gain=I.calculate_gain('relu')) I.xavier_uniform(self.conv2.weight, gain=I.calculate_gain('relu')) I.xavier_uniform(self.conv3.weight, gain=I.calculate_gain('relu')) I.xavier_uniform(self.conv4.weight, gain=I.calculate_gain('relu')) I.xavier_uniform(self.fc1.weight, gain=I.calculate_gain('relu')) I.xavier_uniform(self.fc2.weight, gain=I.calculate_gain('relu')) I.xavier_uniform(self.fc3.weight)
def _initialize_weights(self): if self.initWay=='kaiming': init.kaiming_normal_(self.conv1.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv2.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv3.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv4.weight) init.kaiming_normal_(self.conv11.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv22.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv33.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv44.weight) elif self.initWay=='ortho': init.orthogonal_(self.conv1.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv2.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv3.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv4.weight) init.orthogonal_(self.conv11.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv22.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv33.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv44.weight) else: print('Only Kaiming or Orthogonal initializer can be used!') exit()
help='visualize generated adversarial examples') args = parser.parse_args() args.cuda = torch.cuda.is_available() and args.cuda torch.manual_seed(args.seed) kwargs = {'num_workers': 4} if args.cuda else {} if args.cuda: torch.cuda.set_device(args.gpu) torch.cuda.manual_seed(args.seed) init_functions = [{ 'xavier_normal': init.xavier_normal_, 'kwargs': { 'gain': init.calculate_gain('relu') } }, { 'xavier_uniform_': init.xavier_uniform_, 'kwargs': { 'gain': init.calculate_gain('relu') } }, { 'He_normal': init.kaiming_normal_, 'kwargs': { 'a': 0, 'mode': 'fan_in', 'nonlinearity': 'relu' } }, { 'He_uniform': init.kaiming_uniform_,
def reset_parameters(self): """Reinitialize learnable parameters.""" gain = init.calculate_gain('relu') self.gru.reset_parameters()
def weights_init(m): if isinstance(m, (nn.Linear, nn.Conv2d)): init.xavier_normal(m.weight.data, gain=init.calculate_gain('relu')) init.constant(m.bias.data, 0) elif isinstance(m, nn.BatchNorm1d): pass
def _weight_init(m): if isinstance(m, nn.Conv2d): init.orthogonal_(m.weight, init.calculate_gain('relu'))
def build_disc_in_sequence(self): return Sequential( Permute([0, 2, 1]), Reshape([[0], 1, [1], [2]]), weight_scale(nn.Conv2d(1, self.n_filters, (1, self.n_channels)), gain=calculate_gain('leaky_relu')), Reshape([[0], [1], [2]]), nn.LeakyReLU(0.2))
def reset_parameters(self): """Reinitialize learnable parameters.""" gain = init.calculate_gain('relu') self.gru.reset_parameters() init.xavier_normal_(self.edge_embed.weight, gain=gain)
def _init_fc_layer(self): std = calculate_gain('leaky_relu', math.sqrt(5)) / math.sqrt( self.current_prototypes_number) bound = math.sqrt(3.0) * std self.fc1.weight.uniform_(-bound, bound) # kaiming_uniform_
def _initialize_weights(self): init.orthogonal_(self.conv2d.weight, init.calculate_gain('relu'))
def __init__(self, width, height, spec_conv_layers, spec_max_pooling, spec_linear, spec_dropout_rates, useBatchNorm=False, useAffineTransformInBatchNorm=False): ''' The structure of the network is: a number of convolutional layers, intermittend max-pooling and dropout layers, and a number of linear layers. The max-pooling layers are inserted in the positions specified, as do the dropout layers. :param spec_conv_layers: list of tuples with (numFilters, width, height) (one tuple for each layer); :param spec_max_pooling: list of tuples with (posToInsert, width, height) of max-pooling layers :param spec_dropout_rates list of tuples with (posToInsert, rate of dropout) (applied after max-pooling) :param spec_linear: list with numNeurons for each layer (i.e. [100, 200, 300] creates 3 layers) ''' super(ConvolForwardNet, self).__init__() self.width = width self.height = height self.conv_layers = [] self.max_pooling_layers = [] self.dropout_layers = [] self.linear_layers = [] self.max_pooling_positions = [] self.dropout_positions = [] self.useBatchNorm = useBatchNorm self.batchNormalizationLayers = [] #creating the convolutional layers oldNumChannels = 3 for idx in range(len(spec_conv_layers)): currSpecLayer = spec_conv_layers[idx] numFilters = currSpecLayer[0] kernel_size = (currSpecLayer[1], currSpecLayer[2]) #The padding needs to be such that width and height of the image are unchanges after each conv layer padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) newConvLayer = nn.Conv2d(in_channels=oldNumChannels, out_channels=numFilters, kernel_size=kernel_size, padding=padding) nn.init.xavier_uniform_( newConvLayer.weight, calculate_gain('conv2d')) #glorot weight initialization #if USE_CUDA: newConvLayer.weight = newConvLayer.weight.cuda() self.conv_layers.append(newConvLayer) self.batchNormalizationLayers.append( nn.BatchNorm2d(numFilters, affine=useAffineTransformInBatchNorm)) oldNumChannels = numFilters #creating the max pooling layers for idx in range(len(spec_max_pooling)): currSpecLayer = spec_max_pooling[idx] kernel_size = (currSpecLayer[1], currSpecLayer[2]) self.max_pooling_layers.append(nn.MaxPool2d(kernel_size)) self.max_pooling_positions.append(currSpecLayer[0]) #creating the dropout layers for idx in range(len(spec_dropout_rates)): currSpecLayer = spec_dropout_rates[idx] rate = currSpecLayer[1] currPosition = currSpecLayer[0] if currPosition < len(self.conv_layers): #we use dropout2d only for the conv_layers, otherwise we use the usual dropout self.dropout_layers.append(nn.Dropout2d(rate)) else: self.dropout_layers.append(nn.Dropout(rate)) self.dropout_positions.append(currPosition) #creating the linear layers oldInputFeatures = oldNumChannels * width * height // 2**( 2 * len(self.max_pooling_layers)) for idx in range(len(spec_linear)): currNumFeatures = spec_linear[idx] newLinearLayer = nn.Linear(in_features=oldInputFeatures, out_features=currNumFeatures) nn.init.xavier_uniform_( newLinearLayer.weight, calculate_gain('linear')) # glorot weight initialization #if USE_CUDA: newLinearLayer.weight = newLinearLayer.weight.cuda() self.linear_layers.append(newLinearLayer) self.batchNormalizationLayers.append( nn.BatchNorm1d(currNumFeatures, affine=useAffineTransformInBatchNorm)) oldInputFeatures = currNumFeatures #final output layer self.out_layer = nn.Linear(in_features=oldInputFeatures, out_features=10) nn.init.xavier_uniform_(self.out_layer.weight, calculate_gain('linear')) #if USE_CUDA: self.out_layer.weight = self.out_layer.weight.cuda() self.conv_layers = nn.ModuleList(self.conv_layers) self.max_pooling_layers = nn.ModuleList(self.max_pooling_layers) self.dropout_layers = nn.ModuleList(self.dropout_layers) self.linear_layers = nn.ModuleList(self.linear_layers) self.batchNormalizationLayers = nn.ModuleList( self.batchNormalizationLayers) self.num_conv_layers = len(self.conv_layers) self.total_num_layers = self.num_conv_layers + len(self.linear_layers)
def _initialize_weights(self): init.orthogonal_(self.conv1.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv2.weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_01[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_02[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_03[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_04[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_05[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_06[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_07[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_08[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_8A[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_09[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_10[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_11[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_12[0].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_01[3].weight) init.orthogonal_(self.layers_out_02[2].weight) init.orthogonal_(self.layers_out_03[2].weight) init.orthogonal_(self.layers_out_04[2].weight) init.orthogonal_(self.layers_out_05[3].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_06[3].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_07[3].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_08[2].weight) init.orthogonal_(self.layers_out_8A[3].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_09[3].weight) init.orthogonal_(self.layers_out_10[3].weight) init.orthogonal_(self.layers_out_11[3].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_12[3].weight, init.calculate_gain('relu')) init.orthogonal_(self.layers_out_05[5].weight) init.orthogonal_(self.layers_out_06[5].weight) init.orthogonal_(self.layers_out_07[5].weight) init.orthogonal_(self.layers_out_8A[5].weight) init.orthogonal_(self.layers_out_11[5].weight) init.orthogonal_(self.layers_out_12[5].weight)
def init_weights(self): xavier_uniform_(self.positions.data, calculate_gain('tanh')) xavier_uniform_(self.relation_unbinder.data, calculate_gain('tanh')) xavier_uniform_(self.relation_classifier.data, calculate_gain('tanh')) xavier_uniform_(self.argument_classifier.data, calculate_gain('tanh'))
def init_weights(self): xavier_uniform_(self.zeroth_tuple.data, calculate_gain('tanh'))
y = self.conv2(y) if self.norm2: y = self.norm2(y) return self.a2(x + y) latent_dim = 128 batch_size = 64 use_gpu = True # encoder network h = 128 resample = nn.AvgPool2d norm = nn.BatchNorm2d #None a, g = nn.ReLU, init.calculate_gain('relu') groups = 1 #h//8 E = nn.Sequential( nn.Conv2d(3, h, 5, 1, 2), resample(2), a(), ResBlock(h, activation=a, norm=norm, init_gain=g, groups=groups), resample(2), ResBlock(h, activation=a, norm=norm, init_gain=g, groups=groups), resample(2), ResBlock(h, activation=a, norm=norm, init_gain=g, groups=groups), ChannelsToLinear(h * 16, latent_dim)) for layer in (0, 8): init.xavier_normal(E[layer].weight, g) t = Variable(torch.randn(batch_size, 3, 32, 32)) assert E(t).size() == (batch_size, latent_dim)
def _initialize_weights(self): init.orthogonal(self.conv1.weight, init.calculate_gain('relu')) init.orthogonal(self.conv2.weight, init.calculate_gain('relu')) init.orthogonal(self.conv3.weight, init.calculate_gain('relu')) init.orthogonal(self.conv4.weight)
def __init__(self, args): super().__init__() # these parameters are set to what lasagne.layers.BatchNorm implements bn_param = dict( eps=1e-4, # just like in lasagne momentum=0.1, # 'alpha' in lasagne affine= True, # we learn a translation, called 'beta' in the paper and lasagne track_running_stats=True) hcnn_mult = args.hcnn_undertones + args.hcnn_overtones conv_in_cap = args.capacity # input capacity for ordinary conv layers hcnn_conv_in_cap = conv_in_cap if args.hcnn_onlyinput else conv_in_cap * hcnn_mult # input capacity for conv layers after harmonic stacking conv_out_cap = args.capacity self.conv = nn.Sequential( HarmonicStacking(48, args.hcnn_undertones, args.hcnn_overtones), nn.Conv2d(1 * hcnn_mult, conv_out_cap, (3, 3), padding=(1, 1), bias=False), nn.BatchNorm2d(conv_out_cap, **bn_param), nn.ReLU(), HarmonicStacking(48, args.hcnn_undertones, args.hcnn_overtones) if not args.hcnn_onlyinput else nn.Identity(), nn.Conv2d(hcnn_conv_in_cap, conv_out_cap, (3, 3), padding=(0, 0), bias=False), nn.BatchNorm2d(conv_out_cap, **bn_param), nn.ReLU(), nn.MaxPool2d((1, 2)), nn.Dropout2d(0.25), HarmonicStacking(24, args.hcnn_undertones, args.hcnn_overtones) if not args.hcnn_onlyinput else nn.Identity(), nn.Conv2d(hcnn_conv_in_cap, conv_out_cap * 2, (3, 3), padding=(0, 0), bias=False), nn.BatchNorm2d(conv_out_cap * 2, **bn_param), nn.ReLU(), nn.MaxPool2d((1, 2)), nn.Dropout2d(0.25), ) self.n_flat = conv_out_cap * 2 * 1 * 55 self.linear = nn.Sequential( nn.Linear(self.n_flat, 512, bias=False), nn.BatchNorm1d(512, **bn_param), nn.ReLU(), nn.Dropout(0.5), nn.Linear(512, 88) # the sigmoid nonlinearity is not missing! # during training we do not want it to be applied, only during prediction! ) for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): init.xavier_uniform_(m.weight, init.calculate_gain('relu')) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() init.xavier_uniform_(self.linear[-1].weight, init.calculate_gain('sigmoid'))
def reset_parameters(self) -> None: super(EmbeddingWithPretrained, self).reset_parameters() if self.pretrained_embedding is not None: init.xavier_uniform( self.embedding_projection[0].weight, gain=init.calculate_gain('tanh'))
def _initialize_weights(self): init.orthogonal_(self.stump.weight, init.calculate_gain('relu')) init.orthogonal_(self.body1.weight, init.calculate_gain('relu')) init.orthogonal_(self.body2.weight, init.calculate_gain('relu')) init.orthogonal_(self.head.weight)
def _initialize_weights(self): init.orthogonal_(self.conv1.weight, init.calculate_gain("relu")) init.orthogonal_(self.conv2.weight, init.calculate_gain("relu")) init.orthogonal_(self.conv3.weight, init.calculate_gain("relu")) init.orthogonal_(self.conv4.weight)
def create_out_sequence(n_chans, in_filters): return nn.Sequential( weight_scale(nn.Conv1d(in_filters, n_chans, 1), gain=calculate_gain('linear')), Reshape([[0], [1], [2], 1]), PixelShuffle2d([1, n_chans]))