def __init__(self, rnn_type, bidirectional_encoder, num_layers,
                 hidden_size, attn_type="general", attn_func="softmax",
                 coverage_attn=False, context_gate=None,
                 copy_attn=False, dropout=0.0, embeddings=None,
                 reuse_copy_attn=False, copy_attn_type="general"):
        super(RNNDecoderBase, self).__init__(
            attentional=attn_type != "none" and attn_type is not None)

        self.bidirectional_encoder = bidirectional_encoder
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embeddings = embeddings
        self.dropout = nn.Dropout(dropout)

        # Decoder state
        self.state = {}

        # Build the RNN.
        #LSTM
        self.rnn = self._build_rnn(rnn_type,
                                   input_size=self._input_size,
                                   hidden_size=hidden_size,
                                   num_layers=num_layers,
                                   dropout=dropout)

        # Set up the context gate.
        self.context_gate = None
        if context_gate is not None:
            self.context_gate = context_gate_factory(
                context_gate, self._input_size,
                hidden_size, hidden_size, hidden_size
            )

        # Set up the standard attention.
        self._coverage = coverage_attn
        if not self.attentional:
            if self._coverage:
                raise ValueError("Cannot use coverage term with no attention.")
            self.attn = None
        else:
            self.attn = GlobalAttention(
                hidden_size, coverage=coverage_attn,
                attn_type=attn_type, attn_func=attn_func
            )

        if copy_attn and not reuse_copy_attn:
            if copy_attn_type == "none" or copy_attn_type is None:
                raise ValueError(
                    "Cannot use copy_attn with copy_attn_type none")
            self.copy_attn = GlobalAttention(
                hidden_size, attn_type=copy_attn_type, attn_func=attn_func
            )
        else:
            self.copy_attn = None

        self._reuse_copy_attn = reuse_copy_attn and copy_attn
        if self._reuse_copy_attn and not self.attentional:
            raise ValueError("Cannot reuse copy attention with no attention.")
Esempio n. 2
0
    def __init__(self, rnn_type, bidirectional_encoder, num_layers,
                 hidden_size, attn_type="general", attn_func="softmax",
                 coverage_attn=False, context_gate=None,
                 copy_attn=False, dropout=0.0, embeddings=None,
                 reuse_copy_attn=False, copy_attn_type="general"):
        super(RNNDecoderBase, self).__init__(
            attentional=attn_type != "none" and attn_type is not None)

        self.bidirectional_encoder = bidirectional_encoder
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embeddings = embeddings
        self.dropout = nn.Dropout(dropout)

        # Decoder state
        self.state = {}

        # Build the RNN.
        self.rnn = self._build_rnn(rnn_type,
                                   input_size=self._input_size,
                                   hidden_size=hidden_size,
                                   num_layers=num_layers,
                                   dropout=dropout)

        # Set up the context gate.
        self.context_gate = None
        if context_gate is not None:
            self.context_gate = context_gate_factory(
                context_gate, self._input_size,
                hidden_size, hidden_size, hidden_size
            )

        # Set up the standard attention.
        self._coverage = coverage_attn
        if not self.attentional:
            if self._coverage:
                raise ValueError("Cannot use coverage term with no attention.")
            self.attn = None
        else:
            self.attn = GlobalAttention(
                hidden_size, coverage=coverage_attn,
                attn_type=attn_type, attn_func=attn_func
            )

        if copy_attn and not reuse_copy_attn:
            if copy_attn_type == "none" or copy_attn_type is None:
                raise ValueError(
                    "Cannot use copy_attn with copy_attn_type none")
            self.copy_attn = GlobalAttention(
                hidden_size, attn_type=copy_attn_type, attn_func=attn_func
            )
        else:
            self.copy_attn = None

        self._reuse_copy_attn = reuse_copy_attn and copy_attn
        if self._reuse_copy_attn and not self.attentional:
            raise ValueError("Cannot reuse copy attention with no attention.")
Esempio n. 3
0
    def __init__(self,
                 rnn_type,
                 bidirectional_encoder,
                 num_layers,
                 hidden_size,
                 attn_type="general",
                 attn_func="softmax",
                 coverage_attn=False,
                 context_gate=None,
                 copy_attn=False,
                 dropout=0.0,
                 embeddings=None,
                 reuse_copy_attn=False):
        super(RNNDecoderBase, self).__init__()

        self.bidirectional_encoder = bidirectional_encoder
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embeddings = embeddings
        self.dropout = nn.Dropout(dropout)

        # Decoder state
        self.state = {}

        # Build the RNN.
        self.rnn = self._build_rnn(rnn_type,
                                   input_size=self._input_size,
                                   hidden_size=hidden_size,
                                   num_layers=num_layers,
                                   dropout=dropout)

        # Set up the context gate.
        self.context_gate = None
        if context_gate is not None:
            self.context_gate = context_gate_factory(context_gate,
                                                     self._input_size,
                                                     hidden_size, hidden_size,
                                                     hidden_size)

        # Set up the standard attention.
        self._coverage = coverage_attn
        self.attn = GlobalAttention(hidden_size,
                                    coverage=coverage_attn,
                                    attn_type=attn_type,
                                    attn_func=attn_func)

        if copy_attn and not reuse_copy_attn:
            self.copy_attn = GlobalAttention(hidden_size,
                                             attn_type=attn_type,
                                             attn_func=attn_func)
        else:
            self.copy_attn = None

        self._reuse_copy_attn = reuse_copy_attn and copy_attn
Esempio n. 4
0
    def __init__(self, rnn_type, bidirectional_encoder, num_layers,
                 hidden_size, attn_type="general", attn_func="softmax",
                 coverage_attn=False, context_gate=None,
                 copy_attn=False, dropout=0.0, embeddings=None,
                 reuse_copy_attn=False, copy_attn_type="general",
                 num_emotion_classes=0, emotion_emb_size=0, 
                 generic_vocab_indices=None, emotion_vocab_indices=None, 
                 eds_type=0, no_clf_loss=False, no_eds_attention=False):
        super(RNNDecoderBase, self).__init__(
            attentional=attn_type != "none" and attn_type is not None)

        self.bidirectional_encoder = bidirectional_encoder
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embeddings = embeddings
        self.dropout = nn.Dropout(dropout)
        self.embedding_size = self.embeddings.embedding_size
        self.vocab_size = self.embeddings.word_vocab_size
        self.eds_type = eds_type
        self.no_clf_loss = no_clf_loss
        self.no_eds_attention = no_eds_attention

        # Emotion embedding
        # init_logger()
        self.num_emotion_classes = num_emotion_classes
        self.emotion_emb_size = emotion_emb_size
        rnn_input_size = self._input_size
        if num_emotion_classes != 0 and emotion_emb_size != 0:
            self.emo_embedding = nn.Embedding(num_emotion_classes, emotion_emb_size)
            rnn_input_size += emotion_emb_size

        # EDS model
        self.generic_vocab_indices = None # a 1D list
        self.emotion_vocab_indices = None # a 2D list
        if generic_vocab_indices is not None:
            if not self.no_eds_attention:
                rnn_input_size *= 2 # one from word embedding and another from emotion embedding
            
            self.all_vocab_indices = nn.Parameter(torch.arange(0, self.vocab_size, dtype=torch.long), requires_grad=False)
            self.generic_vocab_indices = nn.Parameter(torch.LongTensor(generic_vocab_indices), requires_grad=False)
            self.emotion_vocab_indices = nn.Parameter(torch.LongTensor(emotion_vocab_indices), requires_grad=False)
            self.generic_vocab_size = self.generic_vocab_indices.size(0) 
            self.emotion_vocab_size = self.emotion_vocab_indices.size(1)
            self.num_emotions = self.emotion_vocab_indices.size(0)
            self.alpha = nn.Parameter(torch.zeros(hidden_size))
            self.beta = nn.Parameter(torch.zeros(hidden_size))
            self.gamma = nn.Parameter(torch.zeros(self.embedding_size))
            self.emotion_classifier = nn.Linear(self.embedding_size, self.num_emotions)
            self.generic_mask = nn.Parameter(torch.zeros(self.vocab_size), requires_grad=False)
            self.generic_mask[self.generic_vocab_indices] = 1
            
            other_emotion_indices = []
            flattened_emotion_vocab_indices = [i for e in emotion_vocab_indices for i in e]
            for i in range(len(emotion_vocab_indices)):
                other_emotion_indices.append(list(set(flattened_emotion_vocab_indices).difference(set(emotion_vocab_indices[i]))))
            self.other_emotion_indices = nn.Parameter(torch.LongTensor(other_emotion_indices), requires_grad=False)
            self.all_emotion_indices = nn.Parameter(torch.LongTensor(list(set(flattened_emotion_vocab_indices))), requires_grad=False)
            
            self.vocab_embedding = nn.Parameter(self.embeddings(self.all_vocab_indices.unsqueeze(0).unsqueeze(-1)).squeeze(0), requires_grad=False) # (vocab, emb_size)
            # print(self.all_vocab_indices.shape)
            # print(self.generic_vocab_indices.shape)
            # print(self.emotion_vocab_indices.shape)
            # print(self.other_emotion_indices.shape)
            

        # Decoder state
        self.state = {}

        # Build the RNN.
        self.rnn = self._build_rnn(rnn_type,
                                   input_size=rnn_input_size, # input_size=self._input_size
                                   hidden_size=hidden_size,
                                   num_layers=num_layers,
                                   dropout=dropout)

        # Set up the context gate.
        self.context_gate = None
        if context_gate is not None:
            self.context_gate = context_gate_factory(
                context_gate, self._input_size,
                hidden_size, hidden_size, hidden_size
            )

        # Set up the standard attention.
        self._coverage = coverage_attn
        if not self.attentional:
            if self._coverage:
                raise ValueError("Cannot use coverage term with no attention.")
            self.attn = None
        else:
            self.attn = GlobalAttention(
                hidden_size, coverage=coverage_attn,
                attn_type=attn_type, attn_func=attn_func
            )

        if copy_attn and not reuse_copy_attn:
            if copy_attn_type == "none" or copy_attn_type is None:
                raise ValueError(
                    "Cannot use copy_attn with copy_attn_type none")
            self.copy_attn = GlobalAttention(
                hidden_size, attn_type=copy_attn_type, attn_func=attn_func
            )
        else:
            self.copy_attn = None

        self._reuse_copy_attn = reuse_copy_attn and copy_attn
        if self._reuse_copy_attn and not self.attentional:
            raise ValueError("Cannot reuse copy attention with no attention.")
Esempio n. 5
0
    def __init__(self,
                 rnn_type,
                 bidirectional_encoder,
                 num_layers,
                 hidden_size,
                 attn_type="general",
                 attn_func="softmax",
                 coverage_attn=False,
                 context_gate=None,
                 teacher_forcing="teacher",
                 copy_attn=False,
                 dropout=0.0,
                 embeddings=None,
                 reuse_copy_attn=False,
                 copy_attn_type="general"):
        super(RNNDecoderBase, self).__init__(
            attentional=attn_type != "none" and attn_type is not None)

        self.bidirectional_encoder = bidirectional_encoder
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embeddings = embeddings
        self.dropout = nn.Dropout(dropout)
        self.teacher_forcing = teacher_forcing
        # Decoder state
        self.state = {}
        self.lin = nn.Linear(self.hidden_size, 100)  # This line!
        # Build the RNN.
        self.rnn = self._build_rnn(rnn_type,
                                   input_size=self._input_size,
                                   hidden_size=hidden_size,
                                   num_layers=num_layers,
                                   dropout=dropout)
        self.eval_status = False
        # Set up the context gate.
        self.context_gate = None
        if context_gate is not None:
            self.context_gate = context_gate_factory(context_gate,
                                                     self._input_size,
                                                     hidden_size, hidden_size,
                                                     hidden_size)

        # Set up the standard attention.
        self._coverage = coverage_attn
        if not self.attentional:
            if self._coverage:
                raise ValueError("Cannot use coverage term with no attention.")
            self.attn = None
        else:
            self.attn = GlobalAttention(hidden_size,
                                        coverage=coverage_attn,
                                        attn_type=attn_type,
                                        attn_func=attn_func)

        if copy_attn and not reuse_copy_attn:
            if copy_attn_type == "none" or copy_attn_type is None:
                raise ValueError(
                    "Cannot use copy_attn with copy_attn_type none")
            self.copy_attn = GlobalAttention(hidden_size,
                                             attn_type=copy_attn_type,
                                             attn_func=attn_func)
        else:
            self.copy_attn = None

        self.vocab_size = 0  #Only used by student-forcing, rand, and dist
        self.generator = None  #Only used by student-forcing, rand, and dist

        self._reuse_copy_attn = reuse_copy_attn and copy_attn
        if self._reuse_copy_attn and not self.attentional:
            raise ValueError("Cannot reuse copy attention with no attention.")
Esempio n. 6
0
    def __init__(
        self,
        rnn_type,
        bidirectional_encoder,
        num_layers,
        hidden_size,
        attn_type="general",
        attn_func="softmax",
        coverage_attn=False,
        context_gate=None,
        copy_attn=False,
        dropout=0.0,
        embeddings=None,
        reuse_copy_attn=False,
        copy_attn_type="general",
        target_encoder_type=None,
        detach_target_encoder=False,
    ):
        super(RNNDecoderBase, self).__init__(
            attentional=attn_type != "none" and attn_type is not None)

        self.bidirectional_encoder = bidirectional_encoder
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embeddings = embeddings
        self.dropout = nn.Dropout(dropout)

        # Decoder state
        self.state = {}

        # @memray: hack to change size for target encoding
        self.input_size = self._input_size
        if target_encoder_type == 'none':
            target_encoder_type = None
        if target_encoder_type is not None:
            self.input_size += self.hidden_size

        # Build the RNN.
        self.rnn = self._build_rnn(rnn_type,
                                   input_size=self.input_size,
                                   hidden_size=hidden_size,
                                   num_layers=1,
                                   dropout=dropout)

        # Set up the context gate.
        self.context_gate = None
        if context_gate is not None:
            self.context_gate = context_gate_factory(context_gate,
                                                     self.input_size,
                                                     hidden_size, hidden_size,
                                                     hidden_size)

        # Set up the standard attention.
        self._coverage = coverage_attn
        if not self.attentional:
            if self._coverage:
                raise ValueError("Cannot use coverage term with no attention.")
            self.attn = None
        else:
            self.attn = GlobalAttention(hidden_size,
                                        coverage=coverage_attn,
                                        attn_type=attn_type,
                                        attn_func=attn_func)

        if copy_attn and not reuse_copy_attn:
            if copy_attn_type == "none" or copy_attn_type is None:
                raise ValueError(
                    "Cannot use copy_attn with copy_attn_type none")
            self.copy_attn = GlobalAttention(hidden_size,
                                             attn_type=copy_attn_type,
                                             attn_func=attn_func)
        else:
            self.copy_attn = None

        self._reuse_copy_attn = reuse_copy_attn and copy_attn
        if self._reuse_copy_attn and not self.attentional:
            raise ValueError("Cannot reuse copy attention with no attention.")

        # @memray
        # Build the Target Encoder. Feed its output to the decoder as auxiliary input
        self.target_encoder_type = target_encoder_type
        self.target_encoder = None
        if target_encoder_type == 'rnn':
            self.target_encoder = self._build_rnn(
                "GRU",
                input_size=self.embeddings.embedding_size,
                hidden_size=hidden_size,
                num_layers=1,
                dropout=dropout)
        self.detach_target_encoder = detach_target_encoder
        self.bilinear_layer = nn.Bilinear(in1_features=hidden_size,
                                          in2_features=hidden_size,
                                          out_features=1)
    def __init__(
        self,
        rnn_type,
        bidirectional_encoder,
        num_layers,
        hidden_size,
        attn_type="general",
        attn_func="softmax",
        coverage_attn=False,
        context_gate=None,
        copy_attn=False,
        dropout=0.0,
        embeddings=None,
        reuse_copy_attn=False,
        copy_attn_type="general",
        num_srcs: int = 1,
    ):
        super().__init__(
            attentional=attn_type != "none" and attn_type is not None)

        self.bidirectional_encoder = bidirectional_encoder
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embeddings = embeddings
        self.dropout = nn.Dropout(dropout)
        self.num_srcs = num_srcs

        # Decoder state
        self.state = {}

        # Build the RNN.
        self.rnn = self._build_rnn(rnn_type,
                                   input_size=self._input_size,
                                   hidden_size=hidden_size,
                                   num_layers=num_layers,
                                   dropout=dropout)

        # Hidden state merging
        self.hidden_active_0 = nn.LeakyReLU()
        self.hidden_merge_0 = nn.Linear(
            in_features=self.hidden_size * self.num_srcs,
            out_features=self.hidden_size,
        )
        if rnn_type == "LSTM":
            self.hidden_active_1 = nn.LeakyReLU()
            self.hidden_merge_1 = nn.Linear(
                in_features=self.hidden_size * self.num_srcs,
                out_features=self.hidden_size,
            )
        # end if

        # Set up the context gate.
        self.context_gate = None
        if context_gate is not None:
            self.context_gate = context_gate_factory(context_gate,
                                                     self._input_size,
                                                     hidden_size, hidden_size,
                                                     hidden_size)
        # end if

        # Set up the standard attention.
        assert not coverage_attn, "Coverage attention is not supported"
        self._coverage: bool = coverage_attn
        if not self.attentional:
            if self._coverage:
                raise ValueError("Cannot use coverage term with no attention.")
            self.ms_attn = None
        else:
            self.ms_attn = MultiSourceAPGlobalAttention(hidden_size,
                                                        coverage=coverage_attn,
                                                        attn_type=attn_type,
                                                        attn_func=attn_func)
        # end if

        # Copy attention
        if copy_attn and not reuse_copy_attn:
            if copy_attn_type == "none" or copy_attn_type is None:
                raise ValueError(
                    "Cannot use copy_attn with copy_attn_type none")
            self.copy_ms_attn = MultiSourceAPGlobalAttention(
                hidden_size, attn_type=copy_attn_type, attn_func=attn_func)
        else:
            self.copy_ms_attn = None
        # end if

        self._reuse_copy_attn = reuse_copy_attn and copy_attn
        if self._reuse_copy_attn and not self.attentional:
            raise ValueError("Cannot reuse copy attention with no attention.")
        return