예제 #1
0
    def __init__(self, version=1.0, num_classes=1000):
        super(SqueezeNet, self).__init__()
        if version not in [1.0, 1.1]:
            raise ValueError("Unsupported SqueezeNet version {version}:"
                             "1.0 or 1.1 expected".format(version=version))
        self.num_classes = num_classes
        if version == 1.0:
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=7, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(96, 16, 64, 64),
                Fire(128, 16, 64, 64),
                Fire(128, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 32, 128, 128),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(512, 64, 256, 256),
            )
        else:
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64),
                Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128),
                Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                Fire(512, 64, 256, 256),
            )
        # Final convolution is initialized differently form the rest
        final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            final_conv,
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((1, 1))
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m is final_conv:
                    init.normal_(m.weight, mean=0.0, std=0.01)
                else:
                    init.kaiming_uniform_(m.weight)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
예제 #2
0
 def reset_params(self):
     for m in self.modules():
         if isinstance(m, nn.Conv2d):
             init.kaiming_uniform_(m.weight, mode='fan_out')
             if m.bias is not None:
                 init.constant(m.bias, 0)
         elif isinstance(m, nn.BatchNorm2d):
             init.constant(m.weight, 1)
             init.constant(m.bias, 0)
         elif isinstance(m, nn.Linear):
             init.normal(m.weight, std=0.001)
             if m.bias is not None:
                 init.constant(m.bias, 0)
예제 #3
0
    def __init__(self, input_num, output_num):
        super(CrossPoolingDir, self).__init__()
        self.input_num = input_num
        self.output_num = output_num

        self.featK = nn.Linear(self.input_num, self.output_num)
        self.featK_bn = nn.BatchNorm1d(self.output_num)

        # Softmax
        self.softmax = nn.Softmax()

        init.kaiming_uniform_(self.featK.weight, mode='fan_out')
        init.constant_(self.featK.bias, 0)
        init.constant_(self.featK_bn.weight, 1)
        init.constant_(self.featK_bn.bias, 0)
예제 #4
0
    def __init__(self, depth, pretrained=True, cut_at_pooling=False,
                 num_features=0, dropout=0):
        super(ResNet, self).__init__()

        self.depth = depth
        self.pretrained = pretrained
        self.cut_at_pooling = cut_at_pooling

        # Construct base (pretrain) resnet
        if depth not in ResNet.__factory:
            raise KeyError("Unsupported depth:", depth)

        conv0 = nn.Conv2d(2, 64, kernel_size=7, stride=2, padding=3, bias=False)
        init.kaiming_uniform_(conv0.weight, mode='fan_out')

        self.conv0 = conv0
        self.base = ResNet.__factory[depth](pretrained=pretrained)

        if not self.cut_at_pooling:
            self.num_features = num_features
            self.dropout = dropout
            self.has_embedding = num_features > 0

            out_planes = self.base.fc.in_features

            # Append new layers
            if self.has_embedding:
                self.feat = nn.Linear(out_planes, self.num_features)
                self.feat_bn = nn.BatchNorm1d(self.num_features)
                init.kaiming_uniform_(self.feat.weight, mode='fan_out')
                init.constant_(self.feat.bias, 0)
                init.constant_(self.feat_bn.weight, 1)
                init.constant_(self.feat_bn.bias, 0)
            else:
                self.num_features = out_planes

            if self.dropout > 0:
                self.drop = nn.Dropout(self.dropout)

        if not self.pretrained:
            self.reset_params()
예제 #5
0
    def __init__(self, input_num, output_num):
        super(SelfPoolingDir, self).__init__()
        self.input_num = input_num
        self.output_num = output_num

        # todo: LSTM
        self.lstm = nn.LSTM(input_size=self.input_num,
                            hidden_size=self.output_num, num_layers=1, batch_first=True, dropout=0)
        self.bilstm = nn.LSTM(input_size=self.input_num, hidden_size=self.output_num,
                              num_layers=1, batch_first=True, dropout=0, bidirectional=True)
        self.lstm_bn = nn.BatchNorm1d(self.output_num)

        ## Linear K
        self.featK = nn.Linear(self.input_num, self.output_num)
        self.featK_bn = nn.BatchNorm1d(self.output_num)

        ## Linear_Q
        self.featQ = nn.Linear(self.input_num, self.output_num)
        self.featQ_bn = nn.BatchNorm1d(self.output_num)


        ## Softmax
        self.softmax = nn.Softmax(dim=-1)

        init.kaiming_uniform_(self.featK.weight, mode='fan_out')
        init.constant_(self.featK.bias, 0)

        init.constant_(self.featK_bn.weight, 1)
        init.constant_(self.featK_bn.bias, 0)

        init.kaiming_uniform_(self.featQ.weight, mode='fan_out')
        init.constant_(self.featQ.bias, 0)
        init.constant_(self.featQ_bn.weight, 1)
        init.constant_(self.featQ_bn.bias, 0)
        init.constant_(self.lstm_bn.weight, 1)
        init.constant_(self.lstm_bn.bias, 0)
예제 #6
0
	def reset_params(self):
		init.kaiming_uniform_(self.weight, a=math.sqrt(5))
		if self.bias is not None:
			fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
			bound = 1 / math.sqrt(fan_in)
			init.uniform_(self.bias, -bound, bound)
    def __init__(self,
                 num_categories,
                 add_intermediate_layers,
                 num_outputs=1,
                 version=1.0,
                 num_classes=1000):
        super(SqueezeNet, self).__init__()
        if version not in [1.0, 1.1]:
            raise ValueError("Unsupported SqueezeNet version {version}:"
                             "1.0 or 1.1 expected".format(version=version))
        self.intermediate_CLF = []
        self.add_intermediate_layers = add_intermediate_layers
        self.num_categories = num_categories
        self.num_outputs = num_outputs

        self.num_classes = num_classes
        if version == 1.0:
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=7, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(96, 16, 64, 64),
                Fire(128, 16, 64, 64),
                Fire(128, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 32, 128, 128),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(512, 64, 256, 256),
            )
            if self.add_intermediate_layers == 2:
                self.intermediate_CLF.append(
                    IntermediateClassifier(54, 128, self.num_categories))
                self.num_outputs += 1

                self.intermediate_CLF.append(
                    IntermediateClassifier(54, 128, self.num_categories))
                self.num_outputs += 1

                self.intermediate_CLF.append(
                    IntermediateClassifier(54, 256, self.num_categories))
                self.num_outputs += 1

                self.intermediate_CLF.append(
                    IntermediateClassifier(27, 256, self.num_categories))
                self.num_outputs += 1

                self.intermediate_CLF.append(
                    IntermediateClassifier(27, 384, self.num_categories))
                self.num_outputs += 1

                self.intermediate_CLF.append(
                    IntermediateClassifier(27, 384, self.num_categories))
                self.num_outputs += 1

                self.intermediate_CLF.append(
                    IntermediateClassifier(27, 512, self.num_categories))
                self.num_outputs += 1

        else:
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64),
                Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128),
                Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                Fire(512, 64, 256, 256),
            )
        # Final convolution is initialized differently form the rest
        final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
        self.classifier = nn.Sequential(nn.Dropout(p=0.5), final_conv,
                                        nn.ReLU(inplace=True),
                                        nn.AvgPool2d(13, stride=1))

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m is final_conv:
                    init.normal_(m.weight, mean=0.0, std=0.01)
                else:
                    init.kaiming_uniform_(m.weight)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
예제 #8
0
 def reset_parameters(self):
     kaiming_uniform_(self.weights, a=math.sqrt(5))
     if self.deformable:
         nn.init.zeros_(self.offset_bias)
     return
예제 #9
0
 def reset_parameters(self):
     init.kaiming_uniform_(self.a_vals, a=math.sqrt(5))
     init.kaiming_uniform_(self.p_vals, a=math.sqrt(5))
예제 #10
0
    def __init__(self, prior, coupling, coupling_type, in_out_dim, hidden_dim,
                 hidden_layers, device):
        """Initialize a NICE.

        Args:
            coupling_type: 'additive' or 'affine'
            coupling: number of coupling layers.
            in_out_dim: input/output dimensions.
            hidden_dim: number of units in a hidden layer.
            hidden_layers: number of hidden layers.
            device: run on cpu or gpu
        """
        super(NICE, self).__init__()
        self.device = device
        self.prior = prior
        self.in_out_dim = in_out_dim
        self.coupling = coupling
        self.coupling_type = coupling_type
        half_dim = int(in_out_dim / 2)
        if coupling_type == 'additive':
            odd = 1
            even = 0
            self.layer1 = AdditiveCoupling(
                in_out_dim, odd,
                _build_relu_network(half_dim, hidden_dim, hidden_layers))
            self.layer2 = AdditiveCoupling(
                in_out_dim, even,
                _build_relu_network(half_dim, hidden_dim, hidden_layers))
            self.layer3 = AdditiveCoupling(
                in_out_dim, odd,
                _build_relu_network(half_dim, hidden_dim, hidden_layers))
            self.layer4 = AdditiveCoupling(
                in_out_dim, even,
                _build_relu_network(half_dim, hidden_dim, hidden_layers))
            self.scaling_diag = Scaling(in_out_dim)

            # randomly initialize weights:
            for p in self.layer1.parameters():
                if len(p.shape) > 1:
                    init.kaiming_uniform_(p, nonlinearity='relu')
                else:
                    init.normal_(p, mean=0., std=0.001)
            for p in self.layer2.parameters():
                if len(p.shape) > 1:
                    init.kaiming_uniform_(p, nonlinearity='relu')
                else:
                    init.normal_(p, mean=0., std=0.001)
            for p in self.layer3.parameters():
                if len(p.shape) > 1:
                    init.kaiming_uniform_(p, nonlinearity='relu')
                else:
                    init.normal_(p, mean=0., std=0.001)
            for p in self.layer4.parameters():
                if len(p.shape) > 1:
                    init.kaiming_uniform_(p, nonlinearity='relu')
                else:
                    init.normal_(p, mean=0., std=0.001)

        elif coupling_type == 'affine':
            odd = 1
            even = 0
            affineBool = True
            self.layer1 = AffineCoupling(
                in_out_dim, odd,
                _build_relu_network(half_dim, hidden_dim, hidden_layers,
                                    affineBool))
            self.layer2 = AffineCoupling(
                in_out_dim, even,
                _build_relu_network(half_dim, hidden_dim, hidden_layers,
                                    affineBool))
            self.layer3 = AffineCoupling(
                in_out_dim, odd,
                _build_relu_network(half_dim, hidden_dim, hidden_layers,
                                    affineBool))
            self.layer4 = AffineCoupling(
                in_out_dim, even,
                _build_relu_network(half_dim, hidden_dim, hidden_layers,
                                    affineBool))
            self.scaling_diag = Scaling(in_out_dim)

            # randomly initialize weights:
            for p in self.layer1.parameters():
                if len(p.shape) > 1:
                    init.kaiming_uniform_(p, nonlinearity='relu')
                else:
                    init.normal_(p, mean=0., std=0.001)
            for p in self.layer2.parameters():
                if len(p.shape) > 1:
                    init.kaiming_uniform_(p, nonlinearity='relu')
                else:
                    init.normal_(p, mean=0., std=0.001)
            for p in self.layer3.parameters():
                if len(p.shape) > 1:
                    init.kaiming_uniform_(p, nonlinearity='relu')
                else:
                    init.normal_(p, mean=0., std=0.001)
            for p in self.layer4.parameters():
                if len(p.shape) > 1:
                    init.kaiming_uniform_(p, nonlinearity='relu')
                else:
                    init.normal_(p, mean=0., std=0.001)
        else:
            raise ValueError('Coupling Type Error.')
 def reset(self):
     kaiming_uniform_(self.classifier[0].weight)
 def reset_parameters(self):
     init.kaiming_uniform_(self.conv0_kernel, a=math.sqrt(5))
예제 #13
0
 def weight_init(m):
     if isinstance(m, (nn.Linear, nn.Conv2d, nn.Conv3d)):
         init.kaiming_uniform_(m.weight)
         init.zeros_(m.bias)
예제 #14
0
    def __init__(self, version='1_1', num_classes=5):
        super(SqueezeNet, self).__init__()
        self.num_classes = num_classes
        self.version = version
        final_conv = None
        if version == '1_0':
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=7, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(96, 16, 64, 64), Fire(128, 16, 64, 64),
                Fire(128, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 32, 128, 128), Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192), Fire(384, 64, 256, 256),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(512, 64, 256, 256))
            # Final convolution is initialized differently from the rest
            final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
            self.masks = nn.Sequential(nn.Dropout(p=0.5), final_conv)
            self.attention = nn.Sigmoid()
            self.head = nn.Sequential(nn.ReLU(inplace=True),
                                      nn.AdaptiveAvgPool2d((1, 1)),
                                      nn.LogSoftmax(dim=1))
        elif version == '1_1':
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64), Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128), Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192), Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256), Fire(512, 64, 256, 256))
            # Final convolution is initialized differently from the rest
            final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
            self.masks = nn.Sequential(nn.Dropout(p=0.5), final_conv)
            self.attention = nn.Sigmoid()
            self.head = nn.Sequential(nn.ReLU(inplace=True),
                                      nn.AdaptiveAvgPool2d((1, 1)),
                                      nn.LogSoftmax(dim=1))
        elif version == 'FC':
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64), Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128), Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192), Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256), Fire(512, 64, 256, 256))
            # Final convolution is initialized differently from the rest
            final_fc = nn.Linear(512 * 13 * 13, self.num_classes)
            self.head = nn.Sequential(nn.Dropout(p=0.5), final_fc,
                                      nn.LogSoftmax(dim=1))
        else:
            raise ValueError("Unsupported SqueezeNet version {version}:"
                             "1_0/1_1/FC expected".format(version=version))

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m is final_conv:
                    init.normal_(m.weight, mean=0.0, std=0.01)
                else:
                    init.kaiming_uniform_(m.weight)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
 def reset_parameters(self):
     for p in self.parameters():
         if p.dim() > 1:
             kaiming_uniform_(p)
 def reset_parameters(self):
     init.kaiming_uniform_(self.weight, a=math.sqrt(5))
     if self.bias is not None:
         bound = 1 / math.sqrt(self.in_features)
         init.uniform_(self.bias, -bound, bound)
 def conv_init(self, layer, lower=-1, upper=1):
     kaiming_uniform_(layer.weight)
예제 #18
0
 def reset_parameters(self):
     init.kaiming_uniform_(self.weight)
예제 #19
0
 def reset_parameters(self):
     if hasattr(self, 'bias'):
         init.constant_(self.bias, 0.)
     init.kaiming_uniform_(self.weight, nonlinearity='sigmoid')
예제 #20
0
def init_weights(m):
    if type(m) == nn.Conv3d:
        init.kaiming_uniform_(m.weight, nonlinearity='leaky_relu')
예제 #21
0
 def reset_parameters(self):
     init.kaiming_uniform_(self.importance, a=math.sqrt(5))
     init.zeros_(self.scale)
     if hasattr(self, 'bias'):
         init.zeros_(self.bias)
예제 #22
0
파일: model.py 프로젝트: luanshiyinyang/GNN
 def reset_parameters(self):
     init.kaiming_uniform_(self.weight)
     if self.use_bias:
         init.zeros_(self.bias)
예제 #23
0
 def init_weight(self):
     init.kaiming_uniform_(self.affine_a.weight, mode='fan_in')
     init.kaiming_uniform_(self.affine_b.weight, mode='fan_in')
     self.affine_a.bias.data.fill_(0)
     self.affine_b.bias.data.fill_(0)
예제 #24
0
def weight_init(m):
    if m.__class__.__name__ == 'Linear':
        m.weight.data.copy_(kaiming_uniform_(m.weight.data))
        m.bias.data.fill_(0)
예제 #25
0
 def reset_parameters(self):
     init.kaiming_uniform_(self.weight)
     init.kaiming_uniform_(self.weight_classifier)
예제 #26
0
 def init_weights(self):
     """Initialize the weights."""
     for m in self.classifier.modules():
         if isinstance(m, nn.Linear):
             init.kaiming_uniform_(m.weight, mode='fan_in')
             m.bias.data.fill_(0)
예제 #27
0
 def _init_params(self):
     for name, module in self.named_modules():
         if isinstance(module, nn.Conv2d):
             init.kaiming_uniform_(module.weight)
             if module.bias is not None:
                 init.constant_(module.bias, 0)
예제 #28
0
 def init_params(self):
     init.kaiming_uniform_(self.weight)
     if self.use_bias is not None:
         _out_feats_bias = self.bias.size(0)
         stdv_b = 1. / np.sqrt(_out_feats_bias)
         init.uniform_(self.bias, -stdv_b, stdv_b)
예제 #29
0
def weight_init(m):
    if isinstance(m, nn.Linear):
        init.kaiming_uniform_(m.weight.data)
예제 #30
0
 def reset_parameters(self):
     init.kaiming_uniform_(self.theta, a=math.sqrt(5))
     fan_in, _ = init._calculate_fan_in_and_fan_out(self.theta)
     bound = 1 / math.sqrt(fan_in)
     init.uniform_(self.b, -bound, bound)
예제 #31
0
 def reset_parameters(self):
     init.kaiming_uniform_(self.weight, a=math.sqrt(5))
     if self.padding_idx is not None:
         with torch.no_grad():
             self.weight[self.padding_idx].fill_(0)
예제 #32
0
 def reset_parameters(self) -> None:
     init.kaiming_uniform_(self.weight, a=np.sqrt(15))
     if self.bias is not None:
         fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
         bound = 1 / np.sqrt(fan_in)
         init.uniform_(self.bias, -bound, bound)
예제 #33
0
    def __init__(
        self,
        config,
        alpha,
        init_emb=None,
        bg_init=None,
        device="cpu",
        classify_from_covars=True,
        classify_from_topics=True,
        classify_from_doc_reps=True,
    ):
        super(torchScholar, self).__init__()

        # load the configuration
        self.vocab_size = config["vocab_size"]
        self.words_emb_dim = config["embedding_dim"]
        self.zero_out_embeddings = config["zero_out_embeddings"]        
        self.reconstruct_bow = config["reconstruct_bow"]
        self.doc_reps_dim = config["doc_reps_dim"]
        self.attend_over_doc_reps = config["attend_over_doc_reps"]
        self.use_doc_layer = config["use_doc_layer"]
        self.doc_reconstruction_weight = config["doc_reconstruction_weight"]
        self.doc_reconstruction_temp = config["doc_reconstruction_temp"]
        self.doc_reconstruction_min_count = config["doc_reconstruction_min_count"]
        self.n_topics = config["n_topics"]
        self.n_labels = config["n_labels"]
        self.n_prior_covars = config["n_prior_covars"]
        self.n_topic_covars = config["n_topic_covars"]
        self.classifier_layers = config["classifier_layers"]
        self.classifier_loss_weight = config["classifier_loss_weight"]
        self.use_interactions = config["use_interactions"]
        self.l1_beta_reg = config["l1_beta_reg"]
        self.l1_beta_c_reg = config["l1_beta_c_reg"]
        self.l1_beta_ci_reg = config["l1_beta_ci_reg"]
        self.l2_prior_reg = config["l2_prior_reg"]
        self.device = device
        self.classify_from_covars = classify_from_covars
        self.classify_from_topics = classify_from_topics
        self.classify_from_doc_reps = classify_from_doc_reps

        # create a layer for prior covariates to influence the document prior
        if self.n_prior_covars > 0:
            self.prior_covar_weights = nn.Linear(
                self.n_prior_covars, self.n_topics, bias=False
            )
        else:
            self.prior_covar_weights = None

        # create the encoder    
        emb_size = self.words_emb_dim
        classifier_input_dim = 0
        if self.classify_from_topics:
            classifier_input_dim = self.n_topics
        if self.n_prior_covars > 0:
            emb_size += self.n_prior_covars
            if self.classify_from_covars:
                classifier_input_dim += self.n_prior_covars
        if self.n_topic_covars > 0:
            emb_size += self.n_topic_covars
            if self.classify_from_covars:
                classifier_input_dim += self.n_topic_covars
        if self.doc_reps_dim is not None:
            if self.attend_over_doc_reps:
                self.attention_vec = torch.nn.Parameter(
                    torch.rand(self.doc_reps_dim)
                ).to(self.device)
            if self.use_doc_layer:
                emb_size += self.words_emb_dim
                self.doc_layer = nn.Linear(
                    self.doc_reps_dim, self.words_emb_dim
                ).to(self.device)
            else:
                emb_size += self.doc_reps_dim
            if self.classify_from_doc_reps:
                classifier_input_dim += self.doc_reps_dim
        if self.n_labels > 0:
            emb_size += self.n_labels

        self.encoder_dropout_layer = nn.Dropout(p=0.2)
        
        self.embeddings_x = torch.nn.ParameterDict()
        # initialize each embedding
        for emb_name, (emb_data, update) in init_emb.items():
            self.embeddings_x[emb_name] = torch.nn.Parameter(
                torch.zeros(
                    size=(self.words_emb_dim, self.vocab_size)
                ).to(self.device),
                requires_grad=update,
            )
            if emb_data is not None:
                (self.embeddings_x[emb_name]
                     .data.copy_(torch.from_numpy(emb_data)).to(self.device)
                )
            else:
                kaiming_uniform_(self.embeddings_x[emb_name], a=np.sqrt(5))         
                xavier_uniform_(self.embeddings_x[emb_name])
        
        # create the mean and variance components of the VAE
        self.mean_layer = nn.Linear(emb_size, self.n_topics)
        self.logvar_layer = nn.Linear(emb_size, self.n_topics)

        self.mean_bn_layer = nn.BatchNorm1d(
            self.n_topics, eps=0.001, momentum=0.001, affine=True
        )
        self.mean_bn_layer.weight.data.copy_(
            torch.from_numpy(np.ones(self.n_topics))
        ).to(self.device)
        self.mean_bn_layer.weight.requires_grad = False
        self.logvar_bn_layer = nn.BatchNorm1d(
            self.n_topics, eps=0.001, momentum=0.001, affine=True
        )
        self.logvar_bn_layer.weight.data.copy_(
            torch.from_numpy(np.ones(self.n_topics))
        ).to(self.device)
        self.logvar_bn_layer.weight.requires_grad = False

        self.z_dropout_layer = nn.Dropout(p=0.2)

        # create the decoder
        self.beta_layer = nn.Linear(self.n_topics, self.vocab_size)

        xavier_uniform_(self.beta_layer.weight)
        if bg_init is not None:
            self.beta_layer.bias.data.copy_(torch.from_numpy(bg_init))
            self.beta_layer.bias.requires_grad = False
        self.beta_layer = self.beta_layer.to(self.device)

        if self.n_topic_covars > 0:
            self.beta_c_layer = nn.Linear(
                self.n_topic_covars, self.vocab_size, bias=False
            ).to(self.device)
            if self.use_interactions:
                self.beta_ci_layer = nn.Linear(
                    self.n_topics * self.n_topic_covars, self.vocab_size, bias=False
                ).to(self.device)

        # create the classifier
        if self.n_labels > 0:
            if self.classifier_layers == 0:
                self.classifier_layer_0 = nn.Linear(
                    classifier_input_dim, self.n_labels
                ).to(self.device)
            else:
                self.classifier_layer_0 = nn.Linear(
                    classifier_input_dim, classifier_input_dim
                ).to(self.device)
                self.classifier_layer_1 = nn.Linear(
                    classifier_input_dim, self.n_labels
                ).to(self.device)

        # create a final batchnorm layer
        self.eta_bn_layer = nn.BatchNorm1d(
            self.vocab_size, eps=0.001, momentum=0.001, affine=True
        ).to(self.device)
        self.eta_bn_layer.weight.data.copy_(
            torch.from_numpy(np.ones(self.vocab_size)).to(self.device)
        )
        self.eta_bn_layer.weight.requires_grad = False

        # create the document prior terms
        prior_mean = (np.log(alpha).T - np.mean(np.log(alpha), 1)).T
        prior_var = (
            ((1.0 / alpha) * (1 - (2.0 / self.n_topics))).T
            + (1.0 / (self.n_topics * self.n_topics)) * np.sum(1.0 / alpha, 1)
        ).T

        prior_mean = np.array(prior_mean).reshape((1, self.n_topics))
        prior_logvar = np.array(np.log(prior_var)).reshape((1, self.n_topics))
        self.prior_mean = torch.from_numpy(prior_mean).to(self.device)
        self.prior_mean.requires_grad = False
        self.prior_logvar = torch.from_numpy(prior_logvar).to(self.device)
        self.prior_logvar.requires_grad = False
예제 #34
0
def weights_init(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv3d):
        # kaiming is first name of author whose last name is 'He' lol
        init.kaiming_uniform_(m.weight)
예제 #35
0
 def reset_parameters(self) -> None:
     for layer in self[1::2]:
         init.kaiming_uniform_(layer.weight)
         if getattr(layer, 'bias', None) is not None:
             init.constant_(layer.bias, 0.)