def __init__(self, z_dim, input_size, dist_type, image_features_type="global"): assert(dist_type in ["normal", "logistic_normal"]), \ "Distribution not supported: %s"%str(dist_type) assert(image_features_type in ["global", "posterior", "local"]), \ "Image features type not supported: %s"%str(image_features_type) super(GlobalFullInferenceNetwork, self).__init__(z_dim, input_size, dist_type) self.image_features_type = image_features_type if image_features_type == 'local': attn_type = 'general' coverage_attn = False # TODO: remove hardcoded hyperparameters #hidden_size = input_size // 4 hidden_size = 500 #print("input_size: ", input_size) #print("hidden_size: ", hidden_size) # linear layer to project local image features into RNN hidden state space self.image_proj = nn.Linear(2048, 500) # create attention mechanisms for the local image features self.src_image_attn = GlobalAttention(hidden_size, coverage=coverage_attn, attn_type=attn_type) self.tgt_image_attn = GlobalAttention(hidden_size, coverage=coverage_attn, attn_type=attn_type) self.src_dropout = nn.Dropout(0.5) self.tgt_dropout = nn.Dropout(0.5)
def __init__(self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False, copy_attn_type="general"): super(RNNDecoderBase, self).__init__( attentional=attn_type != "none" and attn_type is not None) self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) # Decoder state self.state = {} # Build the RNN. #LSTM self.rnn = self._build_rnn(rnn_type, input_size=self._input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout) # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory( context_gate, self._input_size, hidden_size, hidden_size, hidden_size ) # Set up the standard attention. self._coverage = coverage_attn if not self.attentional: if self._coverage: raise ValueError("Cannot use coverage term with no attention.") self.attn = None else: self.attn = GlobalAttention( hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func ) if copy_attn and not reuse_copy_attn: if copy_attn_type == "none" or copy_attn_type is None: raise ValueError( "Cannot use copy_attn with copy_attn_type none") self.copy_attn = GlobalAttention( hidden_size, attn_type=copy_attn_type, attn_func=attn_func ) else: self.copy_attn = None self._reuse_copy_attn = reuse_copy_attn and copy_attn if self._reuse_copy_attn and not self.attentional: raise ValueError("Cannot reuse copy attention with no attention.")
def __init__(self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False): super(RNNDecoderBase, self).__init__() self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) # Decoder state self.state = {} # Build the RNN. self.rnn = self._build_rnn(rnn_type, input_size=self._input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout) # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory(context_gate, self._input_size, hidden_size, hidden_size, hidden_size) # Set up the standard attention. self._coverage = coverage_attn self.attn = GlobalAttention(hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func) if copy_attn and not reuse_copy_attn: self.copy_attn = GlobalAttention(hidden_size, attn_type=attn_type, attn_func=attn_func) else: self.copy_attn = None self._reuse_copy_attn = reuse_copy_attn and copy_attn
def __init__(self, num_layers, d_model, heads, d_ff, attn_type, copy_attn, self_attn_type, dropout, embeddings, max_relative_positions): super(TransformerDecoder, self).__init__() self.embeddings = embeddings # Decoder State self.state = {} self.transformer_layers = nn.ModuleList([ TransformerDecoderLayer( d_model, heads, d_ff, dropout, self_attn_type=self_attn_type, max_relative_positions=max_relative_positions) for i in range(num_layers) ]) # previously, there was a GlobalAttention module here for copy # attention. But it was never actually used -- the "copy" attention # just reuses the context attention. self._copy = copy_attn self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) self.copy_attn = GlobalAttention(768, attn_type='general', attn_func='softmax')
def __init__(self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", dropout=0.0, embeddings=None): super(RNNDecoderBase, self).__init__( attentional=attn_type != "none" and attn_type is not None) self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) # Decoder state self.state = {} # Build the RNN. self.rnn = self._build_rnn(rnn_type, input_size=self._input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout) self.attn = GlobalAttention(hidden_size, attn_type=attn_type, attn_func=attn_func)
def __init__(self, num_layers, hidden_size, attn_type, copy_attn, cnn_kernel_width, dropout, embeddings, copy_attn_type): super(CNNDecoder, self).__init__() self.cnn_kernel_width = cnn_kernel_width self.embeddings = embeddings # Decoder State self.state = {} input_size = self.embeddings.embedding_size self.linear = nn.Linear(input_size, hidden_size) self.conv_layers = nn.ModuleList([ GatedConv(hidden_size, cnn_kernel_width, dropout, True) for i in range(num_layers) ]) self.attn_layers = nn.ModuleList( [ConvMultiStepAttention(hidden_size) for i in range(num_layers)]) # CNNDecoder has its own attention mechanism. # Set up a separate copy attention layer if needed. assert not copy_attn, "Copy mechanism not yet tested in conv2conv" if copy_attn: self.copy_attn = GlobalAttention(hidden_size, attn_type=copy_attn_type) else: self.copy_attn = None
def __init__(self, opt, mode): super(DNC, self).__init__() self.input_feed = opt.input_feed if mode == 'decode' else 0 opt.rnn_size = opt.word_vec_size if mode == 'diag_encode' else opt.rnn_size self.layers = opt.layers self.rnn_sz = (opt.word_vec_size, None) if self.layers == 1 else ( opt.rnn_size, opt.word_vec_size) use_cuda = len(opt.gpus) > 0 self.memory = Memory(opt.mem_slots, opt.mem_size, opt.read_heads, opt.batch_size, use_cuda) input_sz = 2 * opt.word_vec_size if self.input_feed else opt.word_vec_size self.controller = Controller(input_sz, opt.word_vec_size, opt.read_heads, opt.rnn_size, opt.mem_size, opt.batch_size, opt.dropout, self.layers) self.dropout = nn.Dropout(opt.dropout) self.attn = GlobalAttention( opt.word_vec_size) if opt.attn and mode == 'decode' else None
def __init__(self, opt, mode, memory, controller): super(DNC, self).__init__() self.input_feed = opt.input_feed if mode == 'decode' else 0 output_size = opt.word_vec_size // 2 if mode == 'bla_diag_encode' else opt.word_vec_size opt.rnn_size = opt.word_vec_size if mode == 'diag_encode' else opt.rnn_size self.rnn_sz = (opt.word_vec_size, None) if opt.layers == 1 else (opt.rnn_size, output_size) self.layers = opt.layers self.net_data = [] if opt.gather_net_data else None use_cuda = len(opt.gpus) > 0 self.memory = memory self.controller = controller self.dropout = nn.Dropout(opt.dropout) self.attn = GlobalAttention( opt.word_vec_size ) if opt.attn and mode == 'context_decode' else None
def __init__(self, opt): super(DNC, self).__init__() self.input_feed = opt.input_feed if opt.seq == 'decoder' else 0 self.rnn_sz = (opt.word_vec_size, None) if opt.layers == 1 else ( opt.rnn_size, opt.word_vec_size) self.layers = opt.layers self.net_data = [] if opt.gather_net_data else None use_cuda = len(opt.gpus) > 0 self.memory = Memory(opt.mem_slots, opt.mem_size, opt.read_heads, opt.batch_size, use_cuda) input_sz = 2 * opt.word_vec_size if self.input_feed else opt.word_vec_size self.controller = Controller(input_sz, opt.word_vec_size, opt.read_heads, opt.rnn_size, opt.mem_size, opt.batch_size, opt.dropout, opt.layers) self.attn = GlobalAttention( opt.word_vec_size) if opt.attn and opt.seq == 'decoder' else None
def __init__(self, distinct_tokens, encoder_count, sos, eos, max_len=70): super(Speller, self).__init__() self.vocab_to_embedding = nn.Embedding(distinct_tokens + 1, 8) # +1 for padding self.to_tokens = nn.Linear(encoder_count * 512, distinct_tokens + 1) self.initial_hiddens = nn.Linear(encoder_count * 512, 512) self.gru = nn.GRU(input_size=8, hidden_size=512, num_layers=1, batch_first=True, bidirectional=False, dropout=0.15) self.attns = [ GlobalAttention(512, attn_type="general") for _ in range(0, encoder_count) ] for i in range(0, encoder_count): setattr(self, "attn_{}".format(i + 1), self.attns[i]) self.distinct_tokens = distinct_tokens self.max_len = max_len self.sos = sos self.eos = eos
def __init__( self, embeddings=None, hidden=None, encoder_hidden=None, num_layers=1, teacher_forcing_p=0.0, attention=None, dropout=None, char_encoder_hidden=None, char_attention=None, word_dropout=None, encoder_feeding=False, variational=False, decoder="LSTM", latent_size=None, ): super(LSTM_Decoder, self).__init__() # TODO Use Fairseq attention self.embeddings = embeddings self.hidden = hidden self.embedding_dropout = nn.Dropout(dropout.input) self.hidden_state_dropout = nn.Dropout(dropout.output) self.word_dropout_p = word_dropout if word_dropout is not None else 0.0 self.variational = variational if self.variational: encoder_feeding = True self.encoder_hidden_proj = lambda x: x else: self.encoder_hidden_proj = ( nn.Linear(encoder_hidden, hidden, bias=False) if (encoder_hidden != hidden or encoder_hidden is None) else lambda x: x ) self.lstm_decoder = nn.ModuleList() self.num_layers = num_layers self.input_feeding_size = 0 # latent_size if self.variational else 0 if decoder == "LSTM": decoder_cell = [nn.LSTMCell] * self.num_layers elif decoder == "VDM": if self.num_layers == 1: decoder_cell = [VDM_LSTMCell] else: decoder_cell = [nn.LSTMCell] * (self.num_layers - 1) + [VDM_LSTMCell] for i in range(self.num_layers): if i == 0: self.lstm_decoder.append( decoder_cell[i]( self.embeddings.embedding_dim + self.input_feeding_size, self.hidden, ) ) else: self.lstm_decoder.append(decoder_cell[i](self.hidden, self.hidden)) self.teacher_forcing_p = teacher_forcing_p self.state = { "hidden": [None] * self.num_layers, "cell": [None] * self.num_layers, "latent": [None] * self.num_layers, } self.attention = attention self.proj_layer = nn.Linear(self.hidden, self.embeddings.num_embeddings) if attention is not None: self.enc_hidden_att_komp = ( nn.Linear(encoder_hidden, hidden, bias=False) if (encoder_hidden != hidden or encoder_hidden is None) else lambda x: x ) self.attention = Attention( self.hidden, attn_type=attention, attn_func="softmax", ) self.char_attention = char_attention if char_attention is not None: self.char_hidden_att_komp = ( nn.Linear(char_encoder_hidden, hidden, bias=False) if (char_encoder_hidden != hidden or char_encoder_hidden is None) else lambda x: x ) self.char_attention = GlobalAttention( self.hidden, attn_type=attention, attn_func="softmax" ) self.proj_layer = nn.Linear(self.hidden * 2, self.embeddings.num_embeddings)
def __init__(self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False, copy_attn_type="general", num_emotion_classes=0, emotion_emb_size=0, generic_vocab_indices=None, emotion_vocab_indices=None, eds_type=0, no_clf_loss=False, no_eds_attention=False): super(RNNDecoderBase, self).__init__( attentional=attn_type != "none" and attn_type is not None) self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) self.embedding_size = self.embeddings.embedding_size self.vocab_size = self.embeddings.word_vocab_size self.eds_type = eds_type self.no_clf_loss = no_clf_loss self.no_eds_attention = no_eds_attention # Emotion embedding # init_logger() self.num_emotion_classes = num_emotion_classes self.emotion_emb_size = emotion_emb_size rnn_input_size = self._input_size if num_emotion_classes != 0 and emotion_emb_size != 0: self.emo_embedding = nn.Embedding(num_emotion_classes, emotion_emb_size) rnn_input_size += emotion_emb_size # EDS model self.generic_vocab_indices = None # a 1D list self.emotion_vocab_indices = None # a 2D list if generic_vocab_indices is not None: if not self.no_eds_attention: rnn_input_size *= 2 # one from word embedding and another from emotion embedding self.all_vocab_indices = nn.Parameter(torch.arange(0, self.vocab_size, dtype=torch.long), requires_grad=False) self.generic_vocab_indices = nn.Parameter(torch.LongTensor(generic_vocab_indices), requires_grad=False) self.emotion_vocab_indices = nn.Parameter(torch.LongTensor(emotion_vocab_indices), requires_grad=False) self.generic_vocab_size = self.generic_vocab_indices.size(0) self.emotion_vocab_size = self.emotion_vocab_indices.size(1) self.num_emotions = self.emotion_vocab_indices.size(0) self.alpha = nn.Parameter(torch.zeros(hidden_size)) self.beta = nn.Parameter(torch.zeros(hidden_size)) self.gamma = nn.Parameter(torch.zeros(self.embedding_size)) self.emotion_classifier = nn.Linear(self.embedding_size, self.num_emotions) self.generic_mask = nn.Parameter(torch.zeros(self.vocab_size), requires_grad=False) self.generic_mask[self.generic_vocab_indices] = 1 other_emotion_indices = [] flattened_emotion_vocab_indices = [i for e in emotion_vocab_indices for i in e] for i in range(len(emotion_vocab_indices)): other_emotion_indices.append(list(set(flattened_emotion_vocab_indices).difference(set(emotion_vocab_indices[i])))) self.other_emotion_indices = nn.Parameter(torch.LongTensor(other_emotion_indices), requires_grad=False) self.all_emotion_indices = nn.Parameter(torch.LongTensor(list(set(flattened_emotion_vocab_indices))), requires_grad=False) self.vocab_embedding = nn.Parameter(self.embeddings(self.all_vocab_indices.unsqueeze(0).unsqueeze(-1)).squeeze(0), requires_grad=False) # (vocab, emb_size) # print(self.all_vocab_indices.shape) # print(self.generic_vocab_indices.shape) # print(self.emotion_vocab_indices.shape) # print(self.other_emotion_indices.shape) # Decoder state self.state = {} # Build the RNN. self.rnn = self._build_rnn(rnn_type, input_size=rnn_input_size, # input_size=self._input_size hidden_size=hidden_size, num_layers=num_layers, dropout=dropout) # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory( context_gate, self._input_size, hidden_size, hidden_size, hidden_size ) # Set up the standard attention. self._coverage = coverage_attn if not self.attentional: if self._coverage: raise ValueError("Cannot use coverage term with no attention.") self.attn = None else: self.attn = GlobalAttention( hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func ) if copy_attn and not reuse_copy_attn: if copy_attn_type == "none" or copy_attn_type is None: raise ValueError( "Cannot use copy_attn with copy_attn_type none") self.copy_attn = GlobalAttention( hidden_size, attn_type=copy_attn_type, attn_func=attn_func ) else: self.copy_attn = None self._reuse_copy_attn = reuse_copy_attn and copy_attn if self._reuse_copy_attn and not self.attentional: raise ValueError("Cannot reuse copy attention with no attention.")
def __init__(self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, teacher_forcing="teacher", copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False, copy_attn_type="general"): super(RNNDecoderBase, self).__init__( attentional=attn_type != "none" and attn_type is not None) self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) self.teacher_forcing = teacher_forcing # Decoder state self.state = {} self.lin = nn.Linear(self.hidden_size, 100) # This line! # Build the RNN. self.rnn = self._build_rnn(rnn_type, input_size=self._input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout) self.eval_status = False # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory(context_gate, self._input_size, hidden_size, hidden_size, hidden_size) # Set up the standard attention. self._coverage = coverage_attn if not self.attentional: if self._coverage: raise ValueError("Cannot use coverage term with no attention.") self.attn = None else: self.attn = GlobalAttention(hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func) if copy_attn and not reuse_copy_attn: if copy_attn_type == "none" or copy_attn_type is None: raise ValueError( "Cannot use copy_attn with copy_attn_type none") self.copy_attn = GlobalAttention(hidden_size, attn_type=copy_attn_type, attn_func=attn_func) else: self.copy_attn = None self.vocab_size = 0 #Only used by student-forcing, rand, and dist self.generator = None #Only used by student-forcing, rand, and dist self._reuse_copy_attn = reuse_copy_attn and copy_attn if self._reuse_copy_attn and not self.attentional: raise ValueError("Cannot reuse copy attention with no attention.")
def __init__( self, rnn_type, bidirectional_encoder, num_layers, hidden_size, attn_type="general", attn_func="softmax", coverage_attn=False, context_gate=None, copy_attn=False, dropout=0.0, embeddings=None, reuse_copy_attn=False, copy_attn_type="general", target_encoder_type=None, detach_target_encoder=False, ): super(RNNDecoderBase, self).__init__( attentional=attn_type != "none" and attn_type is not None) self.bidirectional_encoder = bidirectional_encoder self.num_layers = num_layers self.hidden_size = hidden_size self.embeddings = embeddings self.dropout = nn.Dropout(dropout) # Decoder state self.state = {} # @memray: hack to change size for target encoding self.input_size = self._input_size if target_encoder_type == 'none': target_encoder_type = None if target_encoder_type is not None: self.input_size += self.hidden_size # Build the RNN. self.rnn = self._build_rnn(rnn_type, input_size=self.input_size, hidden_size=hidden_size, num_layers=1, dropout=dropout) # Set up the context gate. self.context_gate = None if context_gate is not None: self.context_gate = context_gate_factory(context_gate, self.input_size, hidden_size, hidden_size, hidden_size) # Set up the standard attention. self._coverage = coverage_attn if not self.attentional: if self._coverage: raise ValueError("Cannot use coverage term with no attention.") self.attn = None else: self.attn = GlobalAttention(hidden_size, coverage=coverage_attn, attn_type=attn_type, attn_func=attn_func) if copy_attn and not reuse_copy_attn: if copy_attn_type == "none" or copy_attn_type is None: raise ValueError( "Cannot use copy_attn with copy_attn_type none") self.copy_attn = GlobalAttention(hidden_size, attn_type=copy_attn_type, attn_func=attn_func) else: self.copy_attn = None self._reuse_copy_attn = reuse_copy_attn and copy_attn if self._reuse_copy_attn and not self.attentional: raise ValueError("Cannot reuse copy attention with no attention.") # @memray # Build the Target Encoder. Feed its output to the decoder as auxiliary input self.target_encoder_type = target_encoder_type self.target_encoder = None if target_encoder_type == 'rnn': self.target_encoder = self._build_rnn( "GRU", input_size=self.embeddings.embedding_size, hidden_size=hidden_size, num_layers=1, dropout=dropout) self.detach_target_encoder = detach_target_encoder self.bilinear_layer = nn.Bilinear(in1_features=hidden_size, in2_features=hidden_size, out_features=1)