예제 #1
0
 def forward(self, x):
     x = self.linear1(x)
     x = nn.tanh(x)
     x = self.linear2(x)
     x = nn.tanh(x)
     x = self.linear3(x)
     return x
예제 #2
0
    def __init__(self,
                 attention_dim,
                 embed_dim,
                 decoder_dim,
                 vocab_size,
                 encoder_dim=2048,
                 dropout=0.5):
        super(DecoderWithAttention, self).__init__()

        self.encoder_dim = encoder_dim
        self.attention_dim = attention_dim
        self.embed_dim = embed_dim
        self.decoder_dim = decoder_dim
        self.vocab_size = vocab_size
        self.dropout = dropout
        self.tanh = nn.tanh()

        self.attention = Attention(encoder_dim, decoder_dim, attention_dim)

        self.embedding = create_word_embedding()
        self.dropout = nn.Dropout(p=self.dropout)
        self.decode_step = nn.LSTMCell(embed_dim + encoder_dim,
                                       decoder_dim,
                                       bias=True)
        self.init_h = nn.Linear(encoder_dim, decoder_dim)
        self.init_c = nn.Linear(encoder_dim, decoder_dim)
        #self.f_beta = nn.Linear(decoder_dim, encoder_dim)
        self.attention_learner_1 = nn.Linear(decoder_dim, 1024)
        self.leaky_relu = nn.LeakyReLU(0.01)
        self.attention_learner_2 = nn.Linear(1024, encoder_dim)
        self.sigmoid = nn.Sigmoid()
        self.fc_1 = nn.Linear(decoder_dim, 1000)
        self.fc_2 = nn.Linear(1000, vocab_size)
        self.init_weights()
예제 #3
0
    def __init__(self, input_size, hidden_dim, output_size):
        '''
        This is the constructor class for the generator

        Arguments:
        - input_size : The hidden size of the vector of the latent sample
        - hidden_dim : The number of neurons for the last number of layers
        - output_size : The number neurons for the output layer
        '''

        # Define the class variables
        self.input_size = input_size
        self.hidden_dim = hidden_dim
        self.output_size = output_size

        # Define the modules required by this class
        self.fc1 = nn.Linear(self.input_size, self.hidden_dim)
        self.fc2 = nn.Linear(self.hidden_dim, self.hidden_dim * 2)
        self.fc3 = nn.Linear(self.hidden_dim * 2, self.hidden_dim * 4)

        self.fc4 = nn.Linear(hidden_dim * 4, output_size)

        self.dropout = nn.Dropout(0.3)

        self.tanh = nn.tanh()
예제 #4
0
    def __init__(self):
        super(EncDec, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, stride=2, padding=1, kernel_size=3),
            nn.BatchNorm2d(16),
            nn.ReLU(True),
            nn.Conv2d(16, 32, stride=2, padding=1, kernel_size=3),
            nn.BatchNorm2d(32),
            nn.ReLU(True),
            nn.Conv2d(32, 64, stride=2, padding=1, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.Conv2d(64, 128, stride=2, padding=1, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 64, stride=2, padding=1, kernel_size=3),
            nn.BatchNorm2d(64), nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, stride=2, padding=1, kernel_size=3),
            nn.BatchNorm2d(32), nn.ReLU(True),
            nn.ConvTranspose2d(32, 16, stride=2, padding=1, kernel_size=3),
            nn.BatchNorm2d(16), nn.ReLU(True),
            nn.ConvTranspose2d(16, 1, stride=2, padding=1, kernel_size=3),
            nn.tanh())
예제 #5
0
    def forward(self, state):
        h = state
        for layer in self.policy:
            h = layer(h)
            h = nn.ReLU(h)

        mean = nn.tanh(self.mean(h)) * self.action_scale + self.action_bias
        return mean
 def forward(self, x, res):
     x = self.unpool3(x)
     x = self.conv3(torch.cat([x, res[2]], dim=1))
     x = self.unpool2(x)
     x = self.conv2(torch.cat([x, res[1]], dim=1))
     x = self.unpool1(x)
     x = self.conv1(torch.cat([x, res[0]], dim=1))
     if self.use_tanh:
         x = nn.tanh()(x)
     return x
예제 #7
0
    def forward(self, H):
        size = H.size()  #expected = [batch_size,19,3,512]
        print("Size:")
        print(size)
        x = nn.tanh(nn.bmm(H.view(size[0], size[1] * size[2]), self.W1))
        x = nn.bmm(self.W2, x)
        A = nn.Softmax(x, dim=0)
        E = nn.bmm(torch.transpose(A, 1, 2), H)

        return E
예제 #8
0
	def __init__(self, args):
		super(MLP, self).__init__()

		self.hidden_sizes = args.classifier_hidden_sizes
		self.actv_fun = args.classifier_actv_fun
		self.dropout_rate = args.classifier_dropout_rate
		self.batch_norm = args.classifier_batch_norm
		self.num_classes = args.num_classes
		self.encoder_name = args.encoder

		assert self.encoder_name in ["BoW", "LSTM", "backwardLSTM", "biLSTM", "biLSTM_maxp", "biLSTM_minmax"]
		assert self.actv_fun in ["ReLU", "tanh", "linear"]

		if self.encoder_name == "BoW":
			self.n_dim = 4 * args.emb_dim
			self.encoder = BoW()

		elif self.encoder_name == "LSTM":
			self.n_dim = 4 * args.lstm_hidden_size
			self.encoder = LSTM_encoder(False, args.emb_dim, args.lstm_hidden_size, args.lstm_num_layers, args.lstm_dropout_rate)

		elif self.encoder_name == "biLSTM":
			self.n_dim = 4 * 2 * args.lstm_hidden_size
			self.encoder = LSTM_encoder(True, args.emb_dim, args.lstm_hidden_size, args.lstm_num_layers, args.lstm_dropout_rate)

		elif self.encoder_name == "biLSTM_maxp":
			self.n_dim = 4 * 2 * args.lstm_hidden_size
			self.encoder = biLSTM_maxp_encoder(args.lstm_hidden_size, args.batch_size, args.emb_dim, args.lstm_num_layers, args.lstm_dropout_rate)

		modules = []

		self.hidden_sizes = [self.n_dim] + self.hidden_sizes
		n_layers = len(self.hidden_sizes)

		for i in range(n_layers - 1):

			modules.append(nn.Linear(self.hidden_sizes[i], self.hidden_sizes[i + 1]))

			# Activation layer
			if self.actv_fun == "ReLU":
				modules.append(nn.ReLU())
			elif self.actv_fun == "tanh":
				modules.append(nn.tanh())

			if self.dropout_rate:
				modules.append(nn.Dropout(p = self.dropout_rate))

			if self.batch_norm:
				modules.append(nn.BatchNorm1d(self.hidden_sizes[i + 1]))

		modules.append(nn.Linear(self.hidden_sizes[-1], self.num_classes))

		self.layers = nn.Sequential(*modules)
    def forward(self, x):
        # ndocs is the batch size, i.e., number of documents in a batch
        ndocs = x.size(0)
        doc_len = x.size(1)
        sent_len = x.size(2)
        word_len = x.size(3)

        # x will have shape (ndocs, doc_len, sent_len,word_len)
        # Get the embeddings of the words; embeddings will be of shape (ndocs, doc_len, sent_len, word_len, emb_dim)
        x = self.emb_layer(x)
        char_dim = x.size(-1)
        x = x.reshape((-1, 1, word_len, char_dim))

        print('shape before conv', x.shape)

        x1 = self.char_conv1(x)
        x1 = self.char_pool1(x1).squeeze()
        x2 = self.char_pool2(self.char_conv2(x)).squeeze()
        x3 = self.char_pool3(self.char_conv3(x)).squeeze().reshape(
            1200000, 32, 1)
        x4 = self.char_pool4(self.char_conv4(x)).squeeze().reshape(
            1200000, 32, 1)
        x5 = self.char_pool5(self.char_conv5(x)).squeeze().reshape(
            1200000, 32, 1)
        x6 = self.char_pool6(self.char_conv6(x)).squeeze().reshape(
            1200000, 32, 1)

        x = torch.cat((x1, x2, x3, x4, x5, x6), 2)
        #x=x.squeeze()
        x = x.reshape(-1, 1, 1, word_len)
        #Pass through the word GRU. It expects input in the form (batch, seq_len, input_size)
        print('shape for word  conv', x.shape)
        p1 = self.word_conv1(x)
        print('p1 shape', p1.shape)
        p1 = self.word_pool1(p1)
        p2 = self.word_pool2(self.word_conv2(x))

        print('after word shapes are', p1.shape, p2.shape)
        x = torch.cat((p1, p2), 2)
        x = x.squeeze()  #word_rep

        x = x.reshape(-1, sent_len, 1)
        #Pass through the sentence GRU. It expects input in the form (batch, seq_len, input_size)
        x, _ = self.sent_GRU(x)

        #Average pool to get Document representation
        doc_rep = x.reshape(-1, 1, doc_len, 1)

        #Pass the doc_rep through a linear layer and tanh non-linearity
        doc_rep = nn.tanh(self.doc_linear(doc_rep))
        return doc_rep
예제 #10
0
 def __init__(self):
     super(G, self).__init__()
     self.linear1 = nn.Linear(100, 4*4*512)
     
     self.deconvs = nn.Sequential()
     layer1 = nn.ConvTranspose2d(512, 256, 5, padding=2, stride=2)
     layer2 = nn.ConvTranspose2d(256, 128, 5, padding=2, stride=2)
     layer3 = nn.ConvTranspose2d(128, 3, 5, padding=2, stride=2)
     nl = nn.LeakyReLU(negative_slope=0.2)
     self.deconvs.append(layer1)
     self.deconvs.append(nl)
     self.deconvs.append(layer2)
     self.deconvs.append(nl)
     self.deconvs.append(layer3)
     self.deconvs.append(nn.tanh())
     return
예제 #11
0
    def __init__(self,
                 input_size=784,
                 hidden_size=500,
                 encoding_size=2,
                 activation='relu'):
        super(VAE, self).__init__()
        # ENCODER
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc21 = nn.Linear(hidden_size, encoding_size)  # mean of z|x
        self.fc22 = nn.Linear(hidden_size, encoding_size)  # std of z|x

        # DECODER
        self.fc3 = nn.Linear(encoding_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, input_size)

        self.encoding_size = encoding_size

        self.sigmoid = nn.Sigmoid()
        if activation == 'tanh':
            self.activation = nn.tanh()
        else:
            self.activation = nn.ReLU()
예제 #12
0
def define_PRSNet(input_nc,
                  output_nc,
                  conv_layers,
                  num_plane,
                  num_quat,
                  biasTerms,
                  useBn,
                  activation,
                  init_gain=0.02,
                  gpu_ids=[]):
    if activation == 'relu':
        ac_fun = nn.relu()
    elif activation == 'tanh':
        ac_fun = nn.tanh()
    elif activation == 'lrelu':
        ac_fun = nn.LeakyReLU(0.2, True)
    if useBn:
        print('using batch normalization')

    net = PRSNet(input_nc, output_nc, conv_layers, num_plane, num_quat,
                 biasTerms, useBn, ac_fun)
    return init_net(net, init_gain, gpu_ids)
예제 #13
0
 def __init__(self):
     super(SelectionModel, self).__init__()
     self.layer = nn.Sequential(
         nn.Conv2d(18, 64, 3, stride=1, dilation=2),
         nn.ELU(),
         nn.BatchNorm2d(64),
         nn.Conv2d(64, 128, 3, stride=1, dilation=4),
         nn.ELU(),
         nn.BatchNorm2d(64),
         nn.Conv2d(128, 128, 3, stride=1, dilation=8),
         nn.ELU(),
         nn.BatchNorm2d(128),
         nn.Conv2d(128, 128, 3, stride=1, dilation=16),
         nn.ELU(),
         nn.BatchNorm2d(128),
         nn.Conv2d(128, 64, 3, stride=1),
         nn.ELU(),
         nn.BatchNorm2d(64),
         nn.Conv2d(64, 32, 3, stride=1),
         nn.ELU(),
         nn.BatchNorm2d(32),
         nn.Conv2d(32, 4, 3, stride=1),
         nn.tanh()   
     )
예제 #14
0
    def __init__(self,code_size,img_size,kernel_size = 4, num_input_channels = 3,num_feature_maps = 64 , batch_nomr = True):
        super(Encoder, self).__init__()

        if is_power2(max(img_size)):
            stable_dim = max(img_size)
        else:
            stale_dim = min(img_size)
        
        if isinstance(img_size,tuple):
            self.img_size = img_sizeself.final_size = tuple(int(4**x //stable_dim) for x in self.img_size)
        else:
            self.img_size = (img_size,img_size)
            self.final_size = (4,4)
        
        self.code_size = code_size
        self.num_feature_maps = num_feature_mapsself.cl = nn.ModuleList()
        self.num_layers = int(np.log2(mac(self.img_ssize))) - 2

        stride = 2

        padding = calcualte_padding(kernel_size, stride)

        if batch_nomr:
            self.cl.append(nn.Sequential(
                nn.Conv2d(self.channels[-1,self.channels[-1]*2,kernel_size, stride=stride, padding= padding // 2,bias = False),
                nn.BatchNorm2d(self.channels[-1]*2),
                nn.ReLU(True)
            ))
        else:
            self.cl.append(nn.Sequential(
                nn.Conv2d(self.channels[-1],self,channels[-1]*2, kernel_size stride = stride, padding = padding //2, bias = False ),
                nn.ReLU(True)
            ))
        
        self.channels.append(2*self.channels[-1])
        for i in range(self.num_layers - 1):

            if batch_nomr:
                self.cl.append(nn.Sequential(
                    nn.Conv2d(self.channels[-1],self.channels[-1]* 2, kernel_size , stride = stride, padding= padding //2, bias = False),
                    nn.BatchNorm2d(self.channels[-1]* 2),
                    nn.ReLU(True)
                ))
            else:
                self.cl.append(nn.Sequential(
                    nn.Conv2d(self.channels[-1],self.channels[-1]* 2, kernel_size , stride = stride, padding= padding //2, bias = False),
                    nn.ReLU(True)
                ))
           self.channels.append(2 * self.channels[-1])

        self.cl.append(nn.Sequential (
            nn.Conv1d(self.chnnels[-1],code_size,self.final_size, stride = 1, padding = 0, bias =False),
            nn.tanh()
        ))
    def forward(self, x retian_intermediate= False):
        if retain_intermediate:
            h = [x]
            for conv_layer in self.cl:
                h.append(conv_layer(h[-1]))
            return h[-1].view(-1,self.code_size),h[1:-1]
        else:
            for conv_layer in self.cl:
                x =  conv_layer(x)
            
            return x.view(-1, self.code_size)
예제 #15
0
 def forward(self, x):
     value = self.critic(x)
     mu = nn.tanh(self.actor(x))
     std = self.log_std.exp().expand_as(mu)
     dist = Normal(mu, std)
     return dist, value
예제 #16
0
파일: nnlm.py 프로젝트: funny0619/HKIME
 def __init__(self, n, vocab_size, dim, h):
     super(Net, self).__init__()
     self.embedding = nn.Embedding(vocab_size, dim)
     self.linear = nn.linear(dim, vocab_size)
     self.tanh = nn.tanh()
     self.softmax = nn.softmax()
예제 #17
0
 def forward(self, x):
     return x * nn.tanh(nn.softplus(x))
예제 #18
0
    def __init__(self, opt):
        """Initialize model."""
        super(Seq2SeqModel, self).__init__()

        self.vocab_size = opt.vocab_size
        self.emb_dim = opt.word_vec_size
        self.num_directions = 2 if opt.bidirectional else 1
        self.encoder_size = opt.encoder_size
        self.decoder_size = opt.decoder_size
        #self.ctx_hidden_dim = opt.rnn_size
        self.batch_size = opt.batch_size
        self.bidirectional = opt.bidirectional
        self.enc_layers = opt.enc_layers
        self.dec_layers = opt.dec_layers
        self.dropout = opt.dropout

        self.bridge = opt.bridge
        self.one2many_mode = opt.one2many_mode
        self.one2many = opt.one2many

        self.coverage_attn = opt.coverage_attn
        self.copy_attn = opt.copy_attention

        self.pad_idx_src = opt.word2idx[pykp.io.PAD_WORD]
        self.pad_idx_trg = opt.word2idx[pykp.io.PAD_WORD]
        self.bos_idx = opt.word2idx[pykp.io.BOS_WORD]
        self.eos_idx = opt.word2idx[pykp.io.EOS_WORD]
        self.unk_idx = opt.word2idx[pykp.io.UNK_WORD]
        self.sep_idx = opt.word2idx[pykp.io.SEP_WORD]
        self.orthogonal_loss = opt.orthogonal_loss

        self.share_embeddings = opt.share_embeddings
        self.review_attn = opt.review_attn

        self.attn_mode = opt.attn_mode

        self.use_target_encoder = opt.use_target_encoder
        self.target_encoder_size = opt.target_encoder_size

        self.device = opt.device

        self.separate_present_absent = opt.separate_present_absent
        self.goal_vector_mode = opt.goal_vector_mode
        self.goal_vector_size = opt.goal_vector_size
        self.manager_mode = opt.manager_mode
        self.title_guided = opt.title_guided

        if self.separate_present_absent:
            self.peos_idx = opt.word2idx[pykp.io.PEOS_WORD]

        '''
        self.attention_mode = opt.attention_mode    # 'dot', 'general', 'concat'
        self.input_feeding = opt.input_feeding

        self.copy_attention = opt.copy_attention    # bool, enable copy attention or not
        self.copy_mode = opt.copy_mode         # same to `attention_mode`
        self.copy_input_feeding = opt.copy_input_feeding
        self.reuse_copy_attn = opt.reuse_copy_attn
        self.copy_gate = opt.copy_gate

        self.must_teacher_forcing = opt.must_teacher_forcing
        self.teacher_forcing_ratio = opt.teacher_forcing_ratio
        self.scheduled_sampling = opt.scheduled_sampling
        self.scheduled_sampling_batches = opt.scheduled_sampling_batches
        self.scheduled_sampling_type = 'inverse_sigmoid'  # decay curve type: linear or inverse_sigmoid
        self.current_batch = 0  # for scheduled sampling

        self.device = opt.device

        if self.scheduled_sampling:
            logging.info("Applying scheduled sampling with %s decay for the first %d batches" % (self.scheduled_sampling_type, self.scheduled_sampling_batches))
        if self.must_teacher_forcing or self.teacher_forcing_ratio >= 1:
            logging.info("Training with All Teacher Forcing")
        elif self.teacher_forcing_ratio <= 0:
            logging.info("Training with All Sampling")
        else:
            logging.info("Training with Teacher Forcing with static rate=%f" % self.teacher_forcing_ratio)

        self.get_mask = GetMask(self.pad_idx_src)
        '''
        '''
        self.embedding = nn.Embedding(
            self.vocab_size,
            self.emb_dim,
            self.pad_idx_src
        )
        '''
        if self.title_guided:
            self.encoder = RNNEncoderTG(
                vocab_size=self.vocab_size,
                embed_size=self.emb_dim,
                hidden_size=self.encoder_size,
                num_layers=self.enc_layers,
                bidirectional=self.bidirectional,
                pad_token=self.pad_idx_src,
                dropout=self.dropout
            )
        else:
            self.encoder = RNNEncoderBasic(
                vocab_size=self.vocab_size,
                embed_size=self.emb_dim,
                hidden_size=self.encoder_size,
                num_layers=self.enc_layers,
                bidirectional=self.bidirectional,
                pad_token=self.pad_idx_src,
                dropout=self.dropout
            )

        self.decoder = RNNDecoder(
            vocab_size=self.vocab_size,
            embed_size=self.emb_dim,
            hidden_size=self.decoder_size,
            num_layers=self.dec_layers,
            memory_bank_size=self.num_directions * self.encoder_size,
            coverage_attn=self.coverage_attn,
            copy_attn=self.copy_attn,
            review_attn=self.review_attn,
            pad_idx=self.pad_idx_trg,
            attn_mode=self.attn_mode,
            dropout=self.dropout,
            use_target_encoder=self.use_target_encoder,
            target_encoder_size=self.target_encoder_size,
            goal_vector_mode=self.goal_vector_mode,
            goal_vector_size=self.goal_vector_size
        )

        if self.use_target_encoder:
            self.target_encoder = TargetEncoder(
                embed_size=self.emb_dim,
                hidden_size=self.target_encoder_size,
                vocab_size=self.vocab_size,
                pad_idx=self.pad_idx_trg
            )
            # use the same embedding layer as that in the decoder
            self.target_encoder.embedding.weight = self.decoder.embedding.weight
            self.target_encoder_attention = Attention(
                self.target_encoder_size,
                memory_bank_size=self.num_directions * self.encoder_size,
                coverage_attn=False,
                attn_mode="general"
            )

        if self.bridge == 'dense':
            self.bridge_layer = nn.Linear(self.encoder_size * self.num_directions, self.decoder_size)
        elif opt.bridge == 'dense_nonlinear':
            self.bridge_layer = nn.tanh(nn.Linear(self.encoder_size * self.num_directions, self.decoder_size))
        else:
            self.bridge_layer = None

        if self.bridge == 'copy':
            assert self.encoder_size * self.num_directions == self.decoder_size, 'encoder hidden size and decoder hidden size are not match, please use a bridge layer'

        if self.separate_present_absent and self.goal_vector_mode > 0:
            if self.manager_mode == 2:  # use GRU as a manager
                self.manager = nn.GRU(input_size=self.decoder_size, hidden_size=self.goal_vector_size, num_layers=1, bidirectional=False, batch_first=False, dropout=self.dropout)
                self.bridge_manager = opt.bridge_manager
                if self.bridge_manager:
                    self.manager_bridge_layer = nn.Linear(self.encoder_size * self.num_directions, self.goal_vector_size)
                else:
                    self.manager_bridge_layer = None
            elif self.manager_mode == 1:  # use two trainable vectors only
                self.manager = ManagerBasic(self.goal_vector_size)

        if self.share_embeddings:
            self.encoder.embedding.weight = self.decoder.embedding.weight

        self.init_weights()
예제 #19
0
 def __init__(self, fsdh_input_dim, fsdh_hidden_1, fsdh_hidden_2, fsdh_out_dim, layers, beta=0.3, gamma=1e-3,
              alpha=1.0, mu=0.1, nbits=32, batch_size=100):
     super(My_model, self).__init__()
     # FX_matrix是原数据   Y_matrix是标签
     self.layers = layers
     self.beta = beta
     self.gamma = gamma
     self.alpha = alpha
     self.mu = mu
     self.nbits = nbits
     self.batch_size = batch_size
     self.layer1 = nn.Sequential(nn.Linear(fsdh_input_dim, fsdh_hidden_1), nn.BatchNorm1d(fsdh_hidden_1), nn.ReLU())
     self.layer2 = nn.Sequential(nn.Linear(fsdh_hidden_1, fsdh_hidden_2), nn.BatchNorm1d(fsdh_hidden_2), nn.ReLU())
     self.layer3 = nn.Linear(fsdh_hidden_2, fsdh_out_dim)
     # self.W = nn.ParameterList()
     # self.b = nn.ParameterList()
     # for k in range(self.layers):
     # self.W = nn.Parameter(torch.eye(self.nbits,fsdh_out_dim,dtype=torch.float32))
     # self.b = nn.Parameter(torch.eye(self.nbits, self.batch_size, dtype=torch.float32))
     
     #
     self.layer_init = nn.Sequential(nn.Linear(fsdh_input_dim, self.nbits), nn.BatchNorm1d(self.nbits), nn.tanh())
예제 #20
0
 def forward(self, x, squash = False):
     if squash:
         x = nn.tanh(x)
     return self.out(x)
예제 #21
0
 def __init__(self, in_dim, hidden, num_action):
     self.model = nn.Sequential(nn.Linear(in_dim, hidden), nn.tanh(),
                                nn.Linear(hidden, num_action))
     self.softmax = nn.Softmax(dim=-1)
예제 #22
0
#code for conditional attention(bahadanau attn).

import torch
from torch import nn
import torch.nn.functional as F

self._weight = nn.Parameter(torch.FloatTensor(100,1)) #VARIABLE
self._act = nn.tanh() #VARIABLE
self.lin_1 = nn.Linear(var_1_feat_size, 100, bias=False) #VARIABLE
self.lin_2 = nn.Linear(var_2_feat_size, 100, bias=False) #VARIABLE

def context(var_2, var_1):
'''
var_1 - on which attention is conditioned.
var_2 - on which conditional attention is applied.
'''
  attn_wt = F.softmax((self._act(self.lin_2(var_2) + self.lin_1(var_1))).matmul(self.weight), dim = 1)
  return torch.mul(attn_wt, var_2) #VARIABLE
예제 #23
0
def gelu(features: torch.Tensor, approximate: bool = False):
    if approximate:
        return 0.5 * features * (1.0 + nn.tanh(0.7978845608028654 * (features + 0.044715 * (features ** 3))))
    else:
        return 0.5 * features * (1.0 + torch.erf(features / 1.4142135623730951))