def __init__(self, version=1.0, num_classes=1000): super(SqueezeNet, self).__init__() if version not in [1.0, 1.1]: raise ValueError("Unsupported SqueezeNet version {version}:" "1.0 or 1.1 expected".format(version=version)) self.num_classes = num_classes if version == 1.0: self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=7, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(96, 16, 64, 64), Fire(128, 16, 64, 64), Fire(128, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 32, 128, 128), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(512, 64, 256, 256), ) else: self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) # Final convolution is initialized differently form the rest final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) self.classifier = nn.Sequential( nn.Dropout(p=0.5), final_conv, nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)) ) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv: init.normal_(m.weight, mean=0.0, std=0.01) else: init.kaiming_uniform_(m.weight) if m.bias is not None: init.constant_(m.bias, 0)
def reset_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): init.kaiming_uniform_(m.weight, mode='fan_out') if m.bias is not None: init.constant(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): init.constant(m.weight, 1) init.constant(m.bias, 0) elif isinstance(m, nn.Linear): init.normal(m.weight, std=0.001) if m.bias is not None: init.constant(m.bias, 0)
def __init__(self, input_num, output_num): super(CrossPoolingDir, self).__init__() self.input_num = input_num self.output_num = output_num self.featK = nn.Linear(self.input_num, self.output_num) self.featK_bn = nn.BatchNorm1d(self.output_num) # Softmax self.softmax = nn.Softmax() init.kaiming_uniform_(self.featK.weight, mode='fan_out') init.constant_(self.featK.bias, 0) init.constant_(self.featK_bn.weight, 1) init.constant_(self.featK_bn.bias, 0)
def __init__(self, depth, pretrained=True, cut_at_pooling=False, num_features=0, dropout=0): super(ResNet, self).__init__() self.depth = depth self.pretrained = pretrained self.cut_at_pooling = cut_at_pooling # Construct base (pretrain) resnet if depth not in ResNet.__factory: raise KeyError("Unsupported depth:", depth) conv0 = nn.Conv2d(2, 64, kernel_size=7, stride=2, padding=3, bias=False) init.kaiming_uniform_(conv0.weight, mode='fan_out') self.conv0 = conv0 self.base = ResNet.__factory[depth](pretrained=pretrained) if not self.cut_at_pooling: self.num_features = num_features self.dropout = dropout self.has_embedding = num_features > 0 out_planes = self.base.fc.in_features # Append new layers if self.has_embedding: self.feat = nn.Linear(out_planes, self.num_features) self.feat_bn = nn.BatchNorm1d(self.num_features) init.kaiming_uniform_(self.feat.weight, mode='fan_out') init.constant_(self.feat.bias, 0) init.constant_(self.feat_bn.weight, 1) init.constant_(self.feat_bn.bias, 0) else: self.num_features = out_planes if self.dropout > 0: self.drop = nn.Dropout(self.dropout) if not self.pretrained: self.reset_params()
def __init__(self, input_num, output_num): super(SelfPoolingDir, self).__init__() self.input_num = input_num self.output_num = output_num # todo: LSTM self.lstm = nn.LSTM(input_size=self.input_num, hidden_size=self.output_num, num_layers=1, batch_first=True, dropout=0) self.bilstm = nn.LSTM(input_size=self.input_num, hidden_size=self.output_num, num_layers=1, batch_first=True, dropout=0, bidirectional=True) self.lstm_bn = nn.BatchNorm1d(self.output_num) ## Linear K self.featK = nn.Linear(self.input_num, self.output_num) self.featK_bn = nn.BatchNorm1d(self.output_num) ## Linear_Q self.featQ = nn.Linear(self.input_num, self.output_num) self.featQ_bn = nn.BatchNorm1d(self.output_num) ## Softmax self.softmax = nn.Softmax(dim=-1) init.kaiming_uniform_(self.featK.weight, mode='fan_out') init.constant_(self.featK.bias, 0) init.constant_(self.featK_bn.weight, 1) init.constant_(self.featK_bn.bias, 0) init.kaiming_uniform_(self.featQ.weight, mode='fan_out') init.constant_(self.featQ.bias, 0) init.constant_(self.featQ_bn.weight, 1) init.constant_(self.featQ_bn.bias, 0) init.constant_(self.lstm_bn.weight, 1) init.constant_(self.lstm_bn.bias, 0)
def reset_params(self): init.kaiming_uniform_(self.weight, a=math.sqrt(5)) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound)
def __init__(self, num_categories, add_intermediate_layers, num_outputs=1, version=1.0, num_classes=1000): super(SqueezeNet, self).__init__() if version not in [1.0, 1.1]: raise ValueError("Unsupported SqueezeNet version {version}:" "1.0 or 1.1 expected".format(version=version)) self.intermediate_CLF = [] self.add_intermediate_layers = add_intermediate_layers self.num_categories = num_categories self.num_outputs = num_outputs self.num_classes = num_classes if version == 1.0: self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=7, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(96, 16, 64, 64), Fire(128, 16, 64, 64), Fire(128, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 32, 128, 128), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(512, 64, 256, 256), ) if self.add_intermediate_layers == 2: self.intermediate_CLF.append( IntermediateClassifier(54, 128, self.num_categories)) self.num_outputs += 1 self.intermediate_CLF.append( IntermediateClassifier(54, 128, self.num_categories)) self.num_outputs += 1 self.intermediate_CLF.append( IntermediateClassifier(54, 256, self.num_categories)) self.num_outputs += 1 self.intermediate_CLF.append( IntermediateClassifier(27, 256, self.num_categories)) self.num_outputs += 1 self.intermediate_CLF.append( IntermediateClassifier(27, 384, self.num_categories)) self.num_outputs += 1 self.intermediate_CLF.append( IntermediateClassifier(27, 384, self.num_categories)) self.num_outputs += 1 self.intermediate_CLF.append( IntermediateClassifier(27, 512, self.num_categories)) self.num_outputs += 1 else: self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) # Final convolution is initialized differently form the rest final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) self.classifier = nn.Sequential(nn.Dropout(p=0.5), final_conv, nn.ReLU(inplace=True), nn.AvgPool2d(13, stride=1)) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv: init.normal_(m.weight, mean=0.0, std=0.01) else: init.kaiming_uniform_(m.weight) if m.bias is not None: init.constant_(m.bias, 0)
def reset_parameters(self): kaiming_uniform_(self.weights, a=math.sqrt(5)) if self.deformable: nn.init.zeros_(self.offset_bias) return
def reset_parameters(self): init.kaiming_uniform_(self.a_vals, a=math.sqrt(5)) init.kaiming_uniform_(self.p_vals, a=math.sqrt(5))
def __init__(self, prior, coupling, coupling_type, in_out_dim, hidden_dim, hidden_layers, device): """Initialize a NICE. Args: coupling_type: 'additive' or 'affine' coupling: number of coupling layers. in_out_dim: input/output dimensions. hidden_dim: number of units in a hidden layer. hidden_layers: number of hidden layers. device: run on cpu or gpu """ super(NICE, self).__init__() self.device = device self.prior = prior self.in_out_dim = in_out_dim self.coupling = coupling self.coupling_type = coupling_type half_dim = int(in_out_dim / 2) if coupling_type == 'additive': odd = 1 even = 0 self.layer1 = AdditiveCoupling( in_out_dim, odd, _build_relu_network(half_dim, hidden_dim, hidden_layers)) self.layer2 = AdditiveCoupling( in_out_dim, even, _build_relu_network(half_dim, hidden_dim, hidden_layers)) self.layer3 = AdditiveCoupling( in_out_dim, odd, _build_relu_network(half_dim, hidden_dim, hidden_layers)) self.layer4 = AdditiveCoupling( in_out_dim, even, _build_relu_network(half_dim, hidden_dim, hidden_layers)) self.scaling_diag = Scaling(in_out_dim) # randomly initialize weights: for p in self.layer1.parameters(): if len(p.shape) > 1: init.kaiming_uniform_(p, nonlinearity='relu') else: init.normal_(p, mean=0., std=0.001) for p in self.layer2.parameters(): if len(p.shape) > 1: init.kaiming_uniform_(p, nonlinearity='relu') else: init.normal_(p, mean=0., std=0.001) for p in self.layer3.parameters(): if len(p.shape) > 1: init.kaiming_uniform_(p, nonlinearity='relu') else: init.normal_(p, mean=0., std=0.001) for p in self.layer4.parameters(): if len(p.shape) > 1: init.kaiming_uniform_(p, nonlinearity='relu') else: init.normal_(p, mean=0., std=0.001) elif coupling_type == 'affine': odd = 1 even = 0 affineBool = True self.layer1 = AffineCoupling( in_out_dim, odd, _build_relu_network(half_dim, hidden_dim, hidden_layers, affineBool)) self.layer2 = AffineCoupling( in_out_dim, even, _build_relu_network(half_dim, hidden_dim, hidden_layers, affineBool)) self.layer3 = AffineCoupling( in_out_dim, odd, _build_relu_network(half_dim, hidden_dim, hidden_layers, affineBool)) self.layer4 = AffineCoupling( in_out_dim, even, _build_relu_network(half_dim, hidden_dim, hidden_layers, affineBool)) self.scaling_diag = Scaling(in_out_dim) # randomly initialize weights: for p in self.layer1.parameters(): if len(p.shape) > 1: init.kaiming_uniform_(p, nonlinearity='relu') else: init.normal_(p, mean=0., std=0.001) for p in self.layer2.parameters(): if len(p.shape) > 1: init.kaiming_uniform_(p, nonlinearity='relu') else: init.normal_(p, mean=0., std=0.001) for p in self.layer3.parameters(): if len(p.shape) > 1: init.kaiming_uniform_(p, nonlinearity='relu') else: init.normal_(p, mean=0., std=0.001) for p in self.layer4.parameters(): if len(p.shape) > 1: init.kaiming_uniform_(p, nonlinearity='relu') else: init.normal_(p, mean=0., std=0.001) else: raise ValueError('Coupling Type Error.')
def reset(self): kaiming_uniform_(self.classifier[0].weight)
def reset_parameters(self): init.kaiming_uniform_(self.conv0_kernel, a=math.sqrt(5))
def weight_init(m): if isinstance(m, (nn.Linear, nn.Conv2d, nn.Conv3d)): init.kaiming_uniform_(m.weight) init.zeros_(m.bias)
def __init__(self, version='1_1', num_classes=5): super(SqueezeNet, self).__init__() self.num_classes = num_classes self.version = version final_conv = None if version == '1_0': self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=7, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(96, 16, 64, 64), Fire(128, 16, 64, 64), Fire(128, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 32, 128, 128), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(512, 64, 256, 256)) # Final convolution is initialized differently from the rest final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) self.masks = nn.Sequential(nn.Dropout(p=0.5), final_conv) self.attention = nn.Sigmoid() self.head = nn.Sequential(nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)), nn.LogSoftmax(dim=1)) elif version == '1_1': self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256)) # Final convolution is initialized differently from the rest final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) self.masks = nn.Sequential(nn.Dropout(p=0.5), final_conv) self.attention = nn.Sigmoid() self.head = nn.Sequential(nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)), nn.LogSoftmax(dim=1)) elif version == 'FC': self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256)) # Final convolution is initialized differently from the rest final_fc = nn.Linear(512 * 13 * 13, self.num_classes) self.head = nn.Sequential(nn.Dropout(p=0.5), final_fc, nn.LogSoftmax(dim=1)) else: raise ValueError("Unsupported SqueezeNet version {version}:" "1_0/1_1/FC expected".format(version=version)) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv: init.normal_(m.weight, mean=0.0, std=0.01) else: init.kaiming_uniform_(m.weight) if m.bias is not None: init.constant_(m.bias, 0)
def reset_parameters(self): for p in self.parameters(): if p.dim() > 1: kaiming_uniform_(p)
def reset_parameters(self): init.kaiming_uniform_(self.weight, a=math.sqrt(5)) if self.bias is not None: bound = 1 / math.sqrt(self.in_features) init.uniform_(self.bias, -bound, bound)
def conv_init(self, layer, lower=-1, upper=1): kaiming_uniform_(layer.weight)
def reset_parameters(self): init.kaiming_uniform_(self.weight)
def reset_parameters(self): if hasattr(self, 'bias'): init.constant_(self.bias, 0.) init.kaiming_uniform_(self.weight, nonlinearity='sigmoid')
def init_weights(m): if type(m) == nn.Conv3d: init.kaiming_uniform_(m.weight, nonlinearity='leaky_relu')
def reset_parameters(self): init.kaiming_uniform_(self.importance, a=math.sqrt(5)) init.zeros_(self.scale) if hasattr(self, 'bias'): init.zeros_(self.bias)
def reset_parameters(self): init.kaiming_uniform_(self.weight) if self.use_bias: init.zeros_(self.bias)
def init_weight(self): init.kaiming_uniform_(self.affine_a.weight, mode='fan_in') init.kaiming_uniform_(self.affine_b.weight, mode='fan_in') self.affine_a.bias.data.fill_(0) self.affine_b.bias.data.fill_(0)
def weight_init(m): if m.__class__.__name__ == 'Linear': m.weight.data.copy_(kaiming_uniform_(m.weight.data)) m.bias.data.fill_(0)
def reset_parameters(self): init.kaiming_uniform_(self.weight) init.kaiming_uniform_(self.weight_classifier)
def init_weights(self): """Initialize the weights.""" for m in self.classifier.modules(): if isinstance(m, nn.Linear): init.kaiming_uniform_(m.weight, mode='fan_in') m.bias.data.fill_(0)
def _init_params(self): for name, module in self.named_modules(): if isinstance(module, nn.Conv2d): init.kaiming_uniform_(module.weight) if module.bias is not None: init.constant_(module.bias, 0)
def init_params(self): init.kaiming_uniform_(self.weight) if self.use_bias is not None: _out_feats_bias = self.bias.size(0) stdv_b = 1. / np.sqrt(_out_feats_bias) init.uniform_(self.bias, -stdv_b, stdv_b)
def weight_init(m): if isinstance(m, nn.Linear): init.kaiming_uniform_(m.weight.data)
def reset_parameters(self): init.kaiming_uniform_(self.theta, a=math.sqrt(5)) fan_in, _ = init._calculate_fan_in_and_fan_out(self.theta) bound = 1 / math.sqrt(fan_in) init.uniform_(self.b, -bound, bound)
def reset_parameters(self): init.kaiming_uniform_(self.weight, a=math.sqrt(5)) if self.padding_idx is not None: with torch.no_grad(): self.weight[self.padding_idx].fill_(0)
def reset_parameters(self) -> None: init.kaiming_uniform_(self.weight, a=np.sqrt(15)) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / np.sqrt(fan_in) init.uniform_(self.bias, -bound, bound)
def __init__( self, config, alpha, init_emb=None, bg_init=None, device="cpu", classify_from_covars=True, classify_from_topics=True, classify_from_doc_reps=True, ): super(torchScholar, self).__init__() # load the configuration self.vocab_size = config["vocab_size"] self.words_emb_dim = config["embedding_dim"] self.zero_out_embeddings = config["zero_out_embeddings"] self.reconstruct_bow = config["reconstruct_bow"] self.doc_reps_dim = config["doc_reps_dim"] self.attend_over_doc_reps = config["attend_over_doc_reps"] self.use_doc_layer = config["use_doc_layer"] self.doc_reconstruction_weight = config["doc_reconstruction_weight"] self.doc_reconstruction_temp = config["doc_reconstruction_temp"] self.doc_reconstruction_min_count = config["doc_reconstruction_min_count"] self.n_topics = config["n_topics"] self.n_labels = config["n_labels"] self.n_prior_covars = config["n_prior_covars"] self.n_topic_covars = config["n_topic_covars"] self.classifier_layers = config["classifier_layers"] self.classifier_loss_weight = config["classifier_loss_weight"] self.use_interactions = config["use_interactions"] self.l1_beta_reg = config["l1_beta_reg"] self.l1_beta_c_reg = config["l1_beta_c_reg"] self.l1_beta_ci_reg = config["l1_beta_ci_reg"] self.l2_prior_reg = config["l2_prior_reg"] self.device = device self.classify_from_covars = classify_from_covars self.classify_from_topics = classify_from_topics self.classify_from_doc_reps = classify_from_doc_reps # create a layer for prior covariates to influence the document prior if self.n_prior_covars > 0: self.prior_covar_weights = nn.Linear( self.n_prior_covars, self.n_topics, bias=False ) else: self.prior_covar_weights = None # create the encoder emb_size = self.words_emb_dim classifier_input_dim = 0 if self.classify_from_topics: classifier_input_dim = self.n_topics if self.n_prior_covars > 0: emb_size += self.n_prior_covars if self.classify_from_covars: classifier_input_dim += self.n_prior_covars if self.n_topic_covars > 0: emb_size += self.n_topic_covars if self.classify_from_covars: classifier_input_dim += self.n_topic_covars if self.doc_reps_dim is not None: if self.attend_over_doc_reps: self.attention_vec = torch.nn.Parameter( torch.rand(self.doc_reps_dim) ).to(self.device) if self.use_doc_layer: emb_size += self.words_emb_dim self.doc_layer = nn.Linear( self.doc_reps_dim, self.words_emb_dim ).to(self.device) else: emb_size += self.doc_reps_dim if self.classify_from_doc_reps: classifier_input_dim += self.doc_reps_dim if self.n_labels > 0: emb_size += self.n_labels self.encoder_dropout_layer = nn.Dropout(p=0.2) self.embeddings_x = torch.nn.ParameterDict() # initialize each embedding for emb_name, (emb_data, update) in init_emb.items(): self.embeddings_x[emb_name] = torch.nn.Parameter( torch.zeros( size=(self.words_emb_dim, self.vocab_size) ).to(self.device), requires_grad=update, ) if emb_data is not None: (self.embeddings_x[emb_name] .data.copy_(torch.from_numpy(emb_data)).to(self.device) ) else: kaiming_uniform_(self.embeddings_x[emb_name], a=np.sqrt(5)) xavier_uniform_(self.embeddings_x[emb_name]) # create the mean and variance components of the VAE self.mean_layer = nn.Linear(emb_size, self.n_topics) self.logvar_layer = nn.Linear(emb_size, self.n_topics) self.mean_bn_layer = nn.BatchNorm1d( self.n_topics, eps=0.001, momentum=0.001, affine=True ) self.mean_bn_layer.weight.data.copy_( torch.from_numpy(np.ones(self.n_topics)) ).to(self.device) self.mean_bn_layer.weight.requires_grad = False self.logvar_bn_layer = nn.BatchNorm1d( self.n_topics, eps=0.001, momentum=0.001, affine=True ) self.logvar_bn_layer.weight.data.copy_( torch.from_numpy(np.ones(self.n_topics)) ).to(self.device) self.logvar_bn_layer.weight.requires_grad = False self.z_dropout_layer = nn.Dropout(p=0.2) # create the decoder self.beta_layer = nn.Linear(self.n_topics, self.vocab_size) xavier_uniform_(self.beta_layer.weight) if bg_init is not None: self.beta_layer.bias.data.copy_(torch.from_numpy(bg_init)) self.beta_layer.bias.requires_grad = False self.beta_layer = self.beta_layer.to(self.device) if self.n_topic_covars > 0: self.beta_c_layer = nn.Linear( self.n_topic_covars, self.vocab_size, bias=False ).to(self.device) if self.use_interactions: self.beta_ci_layer = nn.Linear( self.n_topics * self.n_topic_covars, self.vocab_size, bias=False ).to(self.device) # create the classifier if self.n_labels > 0: if self.classifier_layers == 0: self.classifier_layer_0 = nn.Linear( classifier_input_dim, self.n_labels ).to(self.device) else: self.classifier_layer_0 = nn.Linear( classifier_input_dim, classifier_input_dim ).to(self.device) self.classifier_layer_1 = nn.Linear( classifier_input_dim, self.n_labels ).to(self.device) # create a final batchnorm layer self.eta_bn_layer = nn.BatchNorm1d( self.vocab_size, eps=0.001, momentum=0.001, affine=True ).to(self.device) self.eta_bn_layer.weight.data.copy_( torch.from_numpy(np.ones(self.vocab_size)).to(self.device) ) self.eta_bn_layer.weight.requires_grad = False # create the document prior terms prior_mean = (np.log(alpha).T - np.mean(np.log(alpha), 1)).T prior_var = ( ((1.0 / alpha) * (1 - (2.0 / self.n_topics))).T + (1.0 / (self.n_topics * self.n_topics)) * np.sum(1.0 / alpha, 1) ).T prior_mean = np.array(prior_mean).reshape((1, self.n_topics)) prior_logvar = np.array(np.log(prior_var)).reshape((1, self.n_topics)) self.prior_mean = torch.from_numpy(prior_mean).to(self.device) self.prior_mean.requires_grad = False self.prior_logvar = torch.from_numpy(prior_logvar).to(self.device) self.prior_logvar.requires_grad = False
def weights_init(m): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv3d): # kaiming is first name of author whose last name is 'He' lol init.kaiming_uniform_(m.weight)
def reset_parameters(self) -> None: for layer in self[1::2]: init.kaiming_uniform_(layer.weight) if getattr(layer, 'bias', None) is not None: init.constant_(layer.bias, 0.)