def __init__(self, word_embed_size: int, word_hidden_size: int, sent_hidden_size: int, bidirectional: bool = True): super().__init__() self.word_embed_size = word_embed_size self.word_hidden_size = word_hidden_size self.sent_hidden_size = sent_hidden_size self.bidirectional = bidirectional self.batch_first = True ## This is stuck in True, needs modifications before being able to change ## Word-level Attention self.word_gru = nn.GRU(word_embed_size, word_hidden_size, num_layers=1, bidirectional=self.bidirectional, bias=True, batch_first=self.batch_first) word_dim = word_hidden_size * (2 if self.bidirectional else 1) self.word_att = Attention(word_dim) ## Sentence-level Attention self.sent_gru = nn.GRU(word_dim, sent_hidden_size, num_layers=1, bidirectional=self.bidirectional, bias=True, batch_first=self.batch_first) sent_dim = sent_hidden_size * (2 if self.bidirectional else 1) self.sent_att = Attention(sent_dim)
def _attentions(self, attention_type=['general', 'dot']): """ Generate all possible instantiations of `Attention` to test """ possible_params = {} if attention_type: possible_params['attention_type'] = attention_type for kwargs in kwargs_product(possible_params): attention = Attention(self.dimensions, **kwargs) for param in attention.parameters(): param.data.uniform_(-.1, .1) yield attention, kwargs
def __init__( self, batch_size, hidden_dims=128, num_classes=10, num_layers=1, dropout=0.2, useBigram=False, attentionOutput=False ): super().__init__() self.embedding, embed_dims = create_emb_layer( non_trainable=False, useBigram=useBigram) self.embed_dims = embed_dims kwargs = { "input_size": embed_dims, "hidden_size": hidden_dims, "num_layers": num_layers, "dropout": dropout, "batch_first": True, } self.lstm = torch.nn.LSTM(**kwargs) self.attention = Attention(hidden_dims, 'general') self.drop = nn.Dropout(p=0.3) self.fc = nn.Linear(hidden_dims, num_classes) self.device = torch.device("cuda" if IS_CUDA else "cpu") hidden = torch.zeros( (num_layers, batch_size, hidden_dims), device=self.device) cell = torch.zeros( (num_layers, batch_size, hidden_dims), device=self.device) self.hidden = (hidden, cell) self.batch_size = batch_size self.attentionOutput = attentionOutput
def __init__(self, config): super(MRGCN, self).__init__() self.num_features = config.num_features self.num_relations = config.num_relations self.num_classes = config.nclass self.num_layers = config.num_layers #defines number of RGCN conv layers. self.hidden_dim = config.hidden_dim self.layer_spec = None if config.layer_spec == None else list(map(int, config.layer_spec.split(','))) self.lstm_dim1 = config.lstm_input_dim self.lstm_dim2 = config.lstm_output_dim self.rgcn_func = FastRGCNConv if config.conv_type == "FastRGCNConv" else RGCNConv self.activation = F.relu if config.activation == 'relu' else F.leaky_relu self.pooling_type = config.pooling_type self.readout_type = config.readout_type self.temporal_type = config.temporal_type self.dropout = config.dropout self.conv = [] total_dim = 0 if self.layer_spec == None: if self.num_layers > 0: self.conv.append(self.rgcn_func(self.num_features, self.hidden_dim, self.num_relations).to(config.device)) total_dim += self.hidden_dim for i in range(1, self.num_layers): self.conv.append(self.rgcn_func(self.hidden_dim, self.hidden_dim, self.num_relations).to(config.device)) total_dim += self.hidden_dim else: self.fc0_5 = Linear(self.num_features, self.hidden_dim) else: if self.num_layers > 0: print("using layer specification and ignoring hidden_dim parameter.") print("layer_spec: " + str(self.layer_spec)) self.conv.append(self.rgcn_func(self.num_features, self.layer_spec[0], self.num_relations).to(config.device)) total_dim += self.layer_spec[0] for i in range(1, self.num_layers): self.conv.append(self.rgcn_func(self.layer_spec[i-1], self.layer_spec[i], self.num_relations).to(config.device)) total_dim += self.layer_spec[i] else: self.fc0_5 = Linear(self.num_features, self.hidden_dim) total_dim += self.hidden_dim if self.pooling_type == "sagpool": self.pool1 = RGCNSAGPooling(total_dim, self.num_relations, ratio=config.pooling_ratio, rgcn_func=config.conv_type) elif self.pooling_type == "topk": self.pool1 = TopKPooling(total_dim, ratio=config.pooling_ratio) self.fc1 = Linear(total_dim, self.lstm_dim1) if "lstm" in self.temporal_type: self.lstm = LSTM(self.lstm_dim1, self.lstm_dim2, batch_first=True) self.attn = Attention(self.lstm_dim2) self.lstm_decoder = LSTM(self.lstm_dim2, self.lstm_dim2, batch_first=True) else: self.fc1_5 = Linear(self.lstm_dim1, self.lstm_dim2) self.fc2 = Linear(self.lstm_dim2, self.num_classes)
def __init__(self, in_features): """ :param in_features: mlp input latent: here 100 :param out_features: mlp classification number, here neg+1 """ super(Recommendation, self).__init__() self.weight = torch.nn.Parameter(torch.Tensor(2, in_features)) self.bias = torch.nn.Parameter(torch.Tensor(2)) self.in_features = in_features self.attention1 = Attention(self.in_features) self.attention2 = Attention(self.in_features) stdv = 1. / math.sqrt(self.weight.size(1)) self.weight.data.uniform_(-stdv, stdv) if self.bias is not None: self.bias.data.uniform_(-stdv, stdv)
def __init__(self, hparams): super().__init__(hparams) self.news_attentions = clones( AttLayer(hparams.head_dim, hparams.attention_hidden_dim), hparams.head_num) self.user_attentions = clones( AttLayer(hparams.head_dim, hparams.attention_hidden_dim), hparams.head_num) self.title_body_att = Attention(hparams.head_dim)
def __init__(self, vocab_size, num_layers, residual_layers, inp_size, hid_size, dropout=0.0, num_attn_layers=1, residual_n=1): super().__init__() self.is_res = np.zeros(num_layers, dtype=bool) if residual_layers: for layer_id in residual_layers: self.is_res[layer_id] = True self.residual_n = residual_n self.num_layers = num_layers self.dropout = nn.Dropout(p=dropout) self.embeddings = nn.Embedding(num_embeddings=vocab_size, embedding_dim=inp_size) if num_attn_layers == 1: self.attn_layers = Attention(hid_size) elif num_attn_layers == num_layers: self.attn_layers = nn.ModuleList( [Attention(hid_size) for _ in range(num_layers)]) elif num_attn_layers == 0: self.attn_layers = None else: raise ValueError( f"Valid options for 'num_attn_layers': 0 or 1 or {num_layers} (= num. LSTM layers)" ) self.num_attn_layers = num_attn_layers # LSTMs will get 2 things as input (IF using attention): # [attended encoder states, embedded decoder input/hidden state] (concatenated) self.layers = nn.ModuleList([ nn.LSTM(input_size=(inp_size + hid_size) if num_attn_layers > 0 else inp_size, hidden_size=hid_size, batch_first=True) for _ in range(num_layers) ]) self.fc = nn.Linear(hid_size, vocab_size)
def forward(self, x): cnn_x = self.embed(x) # (N, W, D) #static cnn_x = Variable(cnn_x) cnn_x = cnn_x.unsqueeze(1) # (N, Ci, W, D) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N, Co, W), ...]*len(Ks) cnn_x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in cnn_x] # [(N, Co), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 1) cnn_out = self.dropout(cnn_x) # (N, len(Ks)*Co) #print('5', cnn_out.shape) bilstm_x = self.embed(x) #print('1', bilstm_x.shape) states, hidden = self.encoder(bilstm_x.permute([1, 0, 2])) bilstm_x = torch.cat([states[0], states[-1]], dim=1) #print('2', bilstm_x.shape) for layer in self.linear_layers: bilstm_out = layer(bilstm_x) #print('3', bilstm_out.shape) ## using cnn output to do self attention on itself cnn_att = Attention(cnn_out.shape[1]) if self.args.use_gpu: cnn_att.cuda() cnn_query, cnn_context = cnn_out.unsqueeze(1), cnn_out.unsqueeze(1) cnn_att_out, cnn_att_weights = cnn_att(cnn_query, cnn_context) #print(cnn_att_out.shape) ## using bilstm output to do self attention on itself bi_att = Attention(bilstm_out.shape[1]) if self.args.use_gpu: bi_att.cuda() bilstm_query, bilstm_context = bilstm_out.unsqueeze( 1), bilstm_out.unsqueeze(1) bilstm_att_out, bilstm_att_weights = bi_att(bilstm_query, bilstm_context) #print(bilstm_att_out.shape) # concatenate the attended output cnn_bilstm_out = torch.cat( (cnn_att_out.squeeze(1), bilstm_att_out.squeeze(1)), dim=1) #cnn_bilstm_out = torch.cat((cnn_out, bilstm_out), dim=1) #print('4', cnn_bilstm_out.shape) cnn_bilstm_feature = self.dimcast(cnn_bilstm_out) logit = self.decoder(cnn_bilstm_feature) return logit
def __init__(self, config, d, cuda_on): super(MemoryLayer, self).__init__() self.cuda_on = cuda_on self.q_transform = nn.RNN( input_size=(int(config['transform_input_size']) * d), hidden_size=int(config['hidden_size']), nonlinearity=config['nonlinearity']) self.p_transform = nn.RNN( input_size=(int(config['transform_input_size']) * d), hidden_size=int(config['hidden_size']), nonlinearity=config['nonlinearity']) self.attention = Attention(int(config['hidden_size']), attention_type='dot') self.self_attention = Attention( int(config['attention_input_size']) * d) self.bi_lstm = OneLayerBRNN( input_size=(int(config['bi_lstm_input_size']) * d), hidden_size=int(config['hidden_size'])) self.output_size = int(config['hidden_size']) self.dropout = nn.Dropout(float(config['dropout']))
def __init__(self, input_size, hidden_size, vocab, fasttext_model, device='cpu'): super(LSTMModel, self).__init__() self.hidden_size = hidden_size self.input_size = input_size self.vocab = vocab self.embedding = ModelEmbeddings(input_size, vocab, fasttext_model, device) self.lstm = nn.LSTM(input_size, hidden_size, bidirectional=True) self.linear = nn.Linear(self.hidden_size * 2, self.hidden_size, bias=True) self.linear2 = nn.Linear(self.hidden_size, self.hidden_size, bias=True) self.attention = Attention(self.hidden_size)
def __init__(self, max_dist, hidden_size=64): super().__init__() self.hidden_size = hidden_size with open("./vocab.pkl", "rb") as f: self.vocab = pickle.load(f) pre_trained_emb = torch.Tensor(self.vocab.vectors) self.word_embedding = nn.Embedding.from_pretrained(pre_trained_emb) self.distance_embedding = nn.Embedding(max_dist, 14) self.lstm = nn.LSTM(114, hidden_size, batch_first=True, bidirectional=True) self.selective = nn.Linear(2 * hidden_size, 1) self.attention = Attention(2 * hidden_size)
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, bidirectional, dropout, pad_idx): super().__init__() self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx) self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout) self.att = Attention(hidden_dim) self.fc = nn.Linear(hidden_dim * 2, output_dim) self.dropout = nn.Dropout(dropout)
def __init__(self, params): super(Net, self).__init__() #parameters self.attention_size = 512 # the embedding takes as input the vocab_size and the embedding_dim self.embedding = nn.Embedding(params.vocab_size, params.embedding_dim) self.lstm_hidden_dim = params.lstm_hidden_dim self.n_layers = params.n_layers # the LSTM takes as input the size of its input (embedding_dim), its hidden size # for more details on how to use it, check out the documentation self.lstm = nn.LSTM(params.embedding_dim, params.lstm_hidden_dim,\ num_layers=params.n_layers, bidirectional=True,\ dropout=params.dropout) self.dropout = nn.Dropout(params.dropout) self.attention = Attention(512) # the fully connected layer transforms the output to give the final output layer self.fc = nn.Linear(params.lstm_hidden_dim * 2, params.number_of_classes) self.w_omega = Variable( torch.zeros(params.lstm_hidden_dim * params.n_layers, self.attention_size).cuda()) self.u_omega = Variable(torch.zeros(self.attention_size).cuda())
def __init__(self, n_inputs, n_outputs, embeddings_size, attention, bidirectional, hidden_sizes, layers, dropout, input_vocab, output_vocab, device): super(EncoderDecoder, self).__init__() self.input_vocab = input_vocab self.output_vocab = output_vocab self.padding_value = output_vocab["$PAD"] self.bidirectional = bidirectional self.encoder_embeddings = nn.Embedding(n_inputs, embeddings_size, padding_idx=input_vocab["$PAD"]) self.encoder = nn.LSTM(embeddings_size, hidden_sizes, layers, dropout=dropout, batch_first=True, bidirectional=bidirectional) self.decoder_embeddings = nn.Embedding( n_inputs, embeddings_size, padding_idx=output_vocab["$PAD"]) if bidirectional: hidden_sizes *= 2 self.decoder = nn.LSTM(embeddings_size, hidden_sizes, layers, dropout=dropout, batch_first=True) if attention is not None: from torchnlp.nn import Attention self.attention = Attention(hidden_sizes, attention_type=attention) self.hidden_to_output = nn.Linear(hidden_sizes, n_outputs) self.device = device self.to(device)
def __init__(self, params): super().__init__() row_vocab_size = len(row.vocab) col_vocab_size = len(column.vocab) mentions_vocab_size = len(mentions.vocab) self.row_encoder = nn.Embedding(row_vocab_size, params['emb_dim']) # encode the mentions with LSTM: self.mention_col_encoder = LSTMEncoder( mentions_vocab_size, params['emb_dim'], params['lstm_hid']) #TODO: if no mentions as query, this can be a simple table: self.query_col_encoder = LSTMEncoder( col_vocab_size, params['emb_dim'], params['lstm_hid']) # self.query_col_encoder = nn.Embedding(col_vocab_size, params['lstm_hid']) if params.get('pooling', None) == 'attention': # TODO: using tying for the attention encoder self.attention_col_encoder = LSTMEncoder( mentions_vocab_size, params['emb_dim'], params['lstm_hid']) self.attention = Attention(params['lstm_hid'], attention_type='dot')
def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout): super().__init__() self.emb_dim = emb_dim self.hid_dim = hid_dim self.output_dim = output_dim self.n_layers = n_layers self.dropout = dropout self.embedding = nn.Embedding(num_embeddings=output_dim, embedding_dim=emb_dim) # <YOUR CODE HERE> self.rnn = nn.LSTM(input_size=emb_dim, hidden_size=hid_dim, num_layers=n_layers, dropout=dropout) # <YOUR CODE HERE> self.attention = Attention(hid_dim, attention_type='dot') self.out = nn.Linear(in_features=2 * hid_dim, out_features=output_dim) # <YOUR CODE HERE> self.dropout = nn.Dropout(p=dropout) # <YOUR CODE HERE>
def __init__(self, vocab_size, class_size, embedding_dim, embedding_matrix, device): super(WordSentenceEncoding, self).__init__() self.device = device self.emb = nn.Embedding(vocab_size, embedding_dim) if embedding_matrix is not None: weight = torch.from_numpy(embedding_matrix).type( torch.FloatTensor).to(device) self.emb = nn.Embedding.from_pretrained(weight) self.emb.weight.requires_grad = True self.hidden_dim = embedding_dim self.layer_dim = 2 self.bilstm = nn.LSTM(input_size=embedding_dim, hidden_size=self.hidden_dim // 2, bidirectional=True, batch_first=True) self.fc = nn.Sequential( nn.Dropout(0.5), nn.Linear(self.hidden_dim, 10), nn.Tanh(), nn.Linear(10, class_size), ) self.fc_f = nn.Sequential( nn.Dropout(0.5), nn.Linear(AppConf.sentenceEncoding_r + 1, class_size)) self.d_a = AppConf.sentenceEncoding_r self.tanh = nn.Tanh() self.softmax = nn.Softmax(dim=2) self.sigmoid = nn.Sigmoid() self.loss = BCEPLoss(self.device) self.attention = Attention(self.hidden_dim, attention_type='general') self.w_s1 = nn.Parameter(torch.randn(self.d_a, self.hidden_dim)) self.w_s2 = nn.Parameter( torch.randn(AppConf.sentenceEncoding_r, self.d_a))
def test_init(self): Attention(self.dimensions)