Ejemplo n.º 1
0
    def __init__(
            self,
            input_size: int,
            hidden_size: int,
            num_layers: int,
            recurrent_dropout_probability: float = 0.0,
            layer_dropout_probability: float = 0.0,
            use_highway: bool = True,
    ) -> None:
        super().__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = True
        self.layer_dropout_probability = layer_dropout_probability

        layers = []
        lstm_input_size = input_size
        for layer_index in range(num_layers):
            forward_layer = AugmentedLstm(
                lstm_input_size,
                hidden_size,
                go_forward=True,
                recurrent_dropout_probability=recurrent_dropout_probability,
                use_highway=use_highway,
                use_input_projection_bias=False,
            )
            lstm_input_size = hidden_size
            self.add_module("forward_layer_{}".format(layer_index), forward_layer)
            layers.append(forward_layer)

        self.lstm_layers = layers
        self.layer_dropout = InputVariationalDropout(layer_dropout_probability)
Ejemplo n.º 2
0
    def __init__(self, hidden_size, drop_prob=0.):
        super(Co_Attention, self).__init__()

        self.drop_prob = drop_prob

        # For get_similarity_matrix_slqa
        self.shared_weight = nn.Parameter(torch.zeros(hidden_size,
                                                      hidden_size))

        nn.init.xavier_uniform_(self.shared_weight)

        # dropout mar 13 YL
        self._variational_dropout = InputVariationalDropout(drop_prob)

        # fuse
        self.linear_fuse_pq = nn.Linear(hidden_size * 4,
                                        hidden_size,
                                        bias=True)
        self.linear_fuse_qp = nn.Linear(hidden_size * 4,
                                        hidden_size,
                                        bias=True)
        self.tanh = nn.Tanh()

        # gate
        self.linear_gate_pq = nn.Linear(hidden_size * 4, 1, bias=True)
        self.linear_gate_qp = nn.Linear(hidden_size * 4, 1, bias=True)
        self.sigmoid = nn.Sigmoid()
Ejemplo n.º 3
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        qdep_henc_rnn: Seq2SeqEncoder,
        senc_self_attn: GatedMultifactorSelfAttnEnc,
        attnpool: AttnPooling,
        output_ffl: FeedForward,
        initializer: InitializerApplicator,
        dropout: float = 0.3,
    ) -> None:
        super().__init__(vocab)
        self._text_field_embedder = text_field_embedder

        self._qdep_henc_rnn = qdep_henc_rnn
        self._senc_self_attn = senc_self_attn

        self._variational_dropout = InputVariationalDropout(dropout)
        self._attn_pool = attnpool
        self._output_ffl = output_ffl

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        self._accuracy = CategoricalAccuracy()
        self._mae = MeanAbsoluteError()
        self._loss = torch.nn.MSELoss()
        self._softmax = torch.nn.Softmax(dim=1)
        initializer(self)
Ejemplo n.º 4
0
 def __init__(self, inp_dim: int,
              hidden_dim: int,
              dropout: float = 0.0):
     super(ProjectedEncoder, self).__init__()
     self.encoder = torch.nn.Linear(inp_dim, hidden_dim, bias=False)
     self.dropout = InputVariationalDropout(dropout)
     self.hidden_dim = hidden_dim
     self.reset_parameters()
Ejemplo n.º 5
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 phrase_layer: Seq2SeqEncoder,
                 residual_encoder: Seq2SeqEncoder,
                 span_start_encoder: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 initializer: InitializerApplicator,
                 dropout: float = 0.2,
                 num_context_answers: int = 0,
                 marker_embedding_dim: int = 10,
                 max_span_length: int = 30) -> None:
        super().__init__(vocab)
        self._num_context_answers = num_context_answers
        self._max_span_length = max_span_length
        self._text_field_embedder = text_field_embedder
        self._phrase_layer = phrase_layer
        self._marker_embedding_dim = marker_embedding_dim
        self._encoding_dim = phrase_layer.get_output_dim()
        max_turn_length = 12

        self._matrix_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y')
        self._merge_atten = TimeDistributed(torch.nn.Linear(self._encoding_dim * 4, self._encoding_dim))

        self._residual_encoder = residual_encoder

        if num_context_answers > 0:
            self._question_num_marker = torch.nn.Embedding(max_turn_length,
                                                           marker_embedding_dim * num_context_answers)
            self._prev_ans_marker = torch.nn.Embedding((num_context_answers * 4) + 1, marker_embedding_dim)

        self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y')

        self._followup_lin = torch.nn.Linear(self._encoding_dim, 3)
        self._merge_self_attention = TimeDistributed(torch.nn.Linear(self._encoding_dim * 3,
                                                                     self._encoding_dim))

        self._span_start_encoder = span_start_encoder
        self._span_end_encoder = span_end_encoder

        self._span_start_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 1))
        self._span_end_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 1))
        self._span_yesno_predictor = TimeDistributed(torch.nn.Linear(self._encoding_dim, 3))
        self._span_followup_predictor = TimeDistributed(self._followup_lin)

        initializer(self)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_yesno_accuracy = CategoricalAccuracy()
        self._span_followup_accuracy = CategoricalAccuracy()

        self._span_gt_yesno_accuracy = CategoricalAccuracy()
        self._span_gt_followup_accuracy = CategoricalAccuracy()

        self._span_accuracy = BooleanAccuracy()
        self._official_f1 = Average()
        self._variational_dropout = InputVariationalDropout(dropout)
Ejemplo n.º 6
0
    def __init__(
            self,
            vocab: Vocabulary,
            elmo_embedder: TextFieldEmbedder,
            tokens_embedder: TextFieldEmbedder,
            features_embedder: TextFieldEmbedder,
            phrase_layer: Seq2SeqEncoder,
            projected_layer: Seq2SeqEncoder,
            contextual_passage: Seq2SeqEncoder,
            contextual_question: Seq2SeqEncoder,
            dropout: float = 0.2,
            regularizer: Optional[RegularizerApplicator] = None,
            initializer: InitializerApplicator = InitializerApplicator(),
    ):

        super(MultiGranularityHierarchicalAttentionFusionNetworks,
              self).__init__(vocab, regularizer)
        self.elmo_embedder = elmo_embedder
        self.tokens_embedder = tokens_embedder
        self.features_embedder = features_embedder
        self._phrase_layer = phrase_layer
        self._encoding_dim = self._phrase_layer.get_output_dim()
        self.projected_layer = torch.nn.Linear(self._encoding_dim + 1024,
                                               self._encoding_dim)
        self.fuse_p = FusionLayer(self._encoding_dim)
        self.fuse_q = FusionLayer(self._encoding_dim)
        self.fuse_s = FusionLayer(self._encoding_dim)
        self.projected_lstm = projected_layer
        self.contextual_layer_p = contextual_passage
        self.contextual_layer_q = contextual_question
        self.linear_self_align = torch.nn.Linear(self._encoding_dim, 1)
        # self._self_attention = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y')
        self._self_attention = BilinearMatrixAttention(self._encoding_dim,
                                                       self._encoding_dim)
        self.bilinear_layer_s = BilinearSeqAtt(self._encoding_dim,
                                               self._encoding_dim)
        self.bilinear_layer_e = BilinearSeqAtt(self._encoding_dim,
                                               self._encoding_dim)
        self.yesno_predictor = FeedForward(self._encoding_dim,
                                           self._encoding_dim, 3)
        self.relu = torch.nn.ReLU()

        self._max_span_length = 30

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        self._span_yesno_accuracy = CategoricalAccuracy()
        self._official_f1 = Average()
        self._variational_dropout = InputVariationalDropout(dropout)

        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Ejemplo n.º 7
0
    def __init__(
            self,
            vocab: Vocabulary,
            embedder: PretrainedBertEmbedder,
            passage_BiLSTM: Seq2SeqEncoder,
            columns_BiLSTM: Seq2SeqEncoder,
            sentence_BiLSTM: Seq2SeqEncoder,
            passage_contextual: Seq2SeqEncoder,
            columns_contextual: Seq2SeqEncoder,
            sentence_contextual: Seq2SeqEncoder,
            dropout: float = 0.2,
            regularizer: Optional[RegularizerApplicator] = None,
            initializer: InitializerApplicator = InitializerApplicator(),
    ):
        super(NumSeq2SQL, self).__init__(vocab, regularizer)

        # BERT Embedding
        self._embedder = embedder
        self._baidu_embedder = self.load_word_emb()

        self._passage_BiLSTM = passage_BiLSTM
        self._columns_BiLSTM = columns_BiLSTM
        self._sentence_BiLSTM = sentence_BiLSTM

        self._passage_contextual = passage_contextual
        self._columns_contextual = columns_contextual
        self._sentence_contextual = sentence_contextual

        self._encoding_dim = self._passage_BiLSTM.get_output_dim()
        self.projected_layer = torch.nn.Linear(self._encoding_dim,
                                               self._encoding_dim)
        self.fuse_p = FusionLayer(self._encoding_dim)
        self.fuse_c = FusionLayer(self._encoding_dim)
        self.fuse_s = FusionLayer(self._encoding_dim)

        self.linear_self_align = torch.nn.Linear(self._encoding_dim, 1)

        self.bilinear_layer_s = BilinearSeqAtt(self._encoding_dim,
                                               self._encoding_dim)
        self.bilinear_layer_e = BilinearSeqAtt(self._encoding_dim,
                                               self._encoding_dim)
        self.yesno_predictor = torch.nn.Linear(self._encoding_dim, 3)
        self.relu = torch.nn.ReLU()

        self._max_span_length = 30

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._variational_dropout = InputVariationalDropout(dropout)
        self._span_yesno_accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Ejemplo n.º 8
0
    def __init__(self,
                 char_to_index,
                 char_embed_size,
                 hidden_size,
                 output_size,
                 dropout,
                 cuda_flag,
                 batch_first=True):
        """
        Args:
            char_to_index:
            char_embed_size: char embeddings dim
            hidden_size: lstm reccurent dim
            dropout: dropout probability
            batch_first: batch first option
        """

        super(Char_RNN, self).__init__()

        self.char_to_index = char_to_index
        self.char_embed_size = char_embed_size
        self.hidden_size = hidden_size
        self.dropout = dropout
        self.output_size = output_size
        self.batch_first = batch_first
        self.padding_index = self.char_to_index['__PADDING__']
        self.cuda_flag = cuda_flag

        self.char_encoder = nn.Embedding(len(self.char_to_index),
                                         self.char_embed_size,
                                         sparse=True,
                                         padding_idx=self.padding_index)
        torch.nn.init.xavier_uniform_(self.char_encoder.weight.data)

        self.char_rnn = AugmentedLstm(
            input_size=self.char_embed_size,
            hidden_size=self.hidden_size,
            go_forward=True,
            recurrent_dropout_probability=self.dropout,
            use_highway=False,
            use_input_projection_bias=False)

        self.char_rnn.state_linearity.bias.data.fill_(0.0)
        self.var_drop = InputVariationalDropout(self.dropout)
        self.w_atten = nn.Linear(self.hidden_size, 1, bias=False)
        self.w_atten.weight.data.fill_(0.0)
        self.char_projection = nn.Linear(self.hidden_size * 2,
                                         self.output_size,
                                         bias=True)
        self.char_projection.weight.data.fill_(0.0)
        self.char_projection.bias.data.fill_(0.0)
        self.drp = nn.Dropout(self.dropout)
Ejemplo n.º 9
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 pseqlevelenc: Seq2SeqEncoder,
                 qaseqlevelenc: Seq2SeqEncoder,
                 choicelevelenc: Seq2SeqEncoder,
                 cartesian_attn: SeqAttnMat,
                 pcattnmat: SeqAttnMat,
                 gate_qdep_penc: GatedEncoding,
                 qdep_penc_rnn: Seq2SeqEncoder,
                 mfa_enc: GatedMultifactorSelfAttnEnc,
                 mfa_rnn: Seq2SeqEncoder,
                 pqaattnmat: SeqAttnMat,
                 cqaattnmat: SeqAttnMat,
                 initializer: InitializerApplicator,
                 dropout: float = 0.3,
                 is_qdep_penc: bool = True,
                 is_mfa_enc: bool = True,
                 with_knowledge: bool = True,
                 is_qac_ap: bool = True,
                 shared_rnn: bool = True) -> None:
        super().__init__(vocab)
        self._text_field_embedder = text_field_embedder
        self._pseqlevel_enc = pseqlevelenc
        self._qaseqlevel_enc = qaseqlevelenc
        self._cseqlevel_enc = choicelevelenc

        self._cart_attn = cartesian_attn
        self._pqaattnmat = pqaattnmat
        self._pcattnmat = pcattnmat
        self._cqaattnmat = cqaattnmat

        self._gate_qdep_penc = gate_qdep_penc
        self._qdep_penc_rnn = qdep_penc_rnn
        self._multifactor_attn = mfa_enc
        self._mfarnn = mfa_rnn

        self._with_knowledge = with_knowledge
        self._qac_ap = is_qac_ap
        if not self._with_knowledge:
            if not self._qac_ap:
                raise AssertionError
        self._is_qdep_penc = is_qdep_penc
        self._is_mfa_enc = is_mfa_enc
        self._shared_rnn = shared_rnn

        self._variational_dropout = InputVariationalDropout(dropout)

        self._num_labels = vocab.get_vocab_size(namespace="labels")
        self._auc = Auc()
        self._loss = torch.nn.BCELoss()
        initializer(self)
Ejemplo n.º 10
0
    def __init__(
        self,
        input_size: int,
        hidden_size: int,
        num_layers: int,
        recurrent_dropout_probability: float = 0.0,
        layer_dropout_probability: float = 0.0,
        use_highway: bool = True,
    ) -> None:
        super().__init__()

        # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`.
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = True

        layers = []
        lstm_input_size = input_size
        for layer_index in range(num_layers):

            forward_layer = AugmentedLstm(
                lstm_input_size,
                hidden_size,
                go_forward=True,
                recurrent_dropout_probability=recurrent_dropout_probability,
                use_highway=use_highway,
                use_input_projection_bias=False,
            )
            backward_layer = AugmentedLstm(
                lstm_input_size,
                hidden_size,
                go_forward=False,
                recurrent_dropout_probability=recurrent_dropout_probability,
                use_highway=use_highway,
                use_input_projection_bias=False,
            )

            lstm_input_size = hidden_size * 2
            self.add_module("forward_layer_{}".format(layer_index),
                            forward_layer)
            self.add_module("backward_layer_{}".format(layer_index),
                            backward_layer)
            layers.append([forward_layer, backward_layer])
        self.lstm_layers = layers
        self.layer_dropout = InputVariationalDropout(layer_dropout_probability)
Ejemplo n.º 11
0
    def __init__(self,
                 input_size: int,
                 hidden_size: int,
                 num_layers: int,
                 layer_dropout_probability: float = 0.0,
                 use_residual: bool = True,
                 use_residual_projection: bool = False) -> None:
        super(ResidualBidirectionalLstm, self).__init__()

        # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`.
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = True
        self.use_residual = use_residual
        self.use_residual_projection = use_residual_projection

        layers = []
        lstm_input_size = input_size
        for layer_index in range(num_layers):

            layer = nn.LSTM(lstm_input_size,
                            hidden_size,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=True)

            if use_residual and layer_index < (self.num_layers - 1):
                if use_residual_projection or lstm_input_size != hidden_size * 2:
                    residual_projection = nn.Linear(lstm_input_size,
                                                    hidden_size * 2,
                                                    bias=False)
                else:
                    residual_projection = nn.Identity()
                self.add_module('res_proj_{}'.format(layer_index),
                                residual_projection)

            lstm_input_size = hidden_size * 2
            self.add_module('layer_{}'.format(layer_index), layer)
            layers.append(layer)

        self.lstm_layers = layers
        self.layer_dropout = InputVariationalDropout(layer_dropout_probability)
Ejemplo n.º 12
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        embedding_dim: int,
        max_mention_length: int,
        max_embeddings: int,
        variational_dropout_rate: float = 0.0,
        dropout_rate: float = 0.0,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        super(EntityNLMDiscriminator, self).__init__(vocab)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder
        self._embedding_dim = embedding_dim
        self._max_mention_length = max_mention_length
        self._max_embeddings = max_embeddings

        self._state: Optional[StateDict] = None

        # Input variational dropout
        self._variational_dropout = InputVariationalDropout(
            variational_dropout_rate)
        self._dropout = torch.nn.Dropout(dropout_rate)

        # For entity type prediction
        self._entity_type_projection = torch.nn.Linear(
            in_features=embedding_dim, out_features=2, bias=False)
        self._dynamic_embeddings = DynamicEmbedding(
            embedding_dim=embedding_dim, max_embeddings=max_embeddings)

        # For mention length prediction
        self._mention_length_projection = torch.nn.Linear(
            in_features=2 * embedding_dim, out_features=max_mention_length)

        self._entity_type_accuracy = CategoricalAccuracy()
        self._entity_id_accuracy = CategoricalAccuracy()
        self._mention_length_accuracy = CategoricalAccuracy()

        initializer(self)
Ejemplo n.º 13
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        attnpool: AttnPooling,
        output_ffl: FeedForward,
        initializer: InitializerApplicator,
        dropout: float = 0.3,
    ) -> None:
        super().__init__(vocab)
        self._text_field_embedder = text_field_embedder

        self._variational_dropout = InputVariationalDropout(dropout)
        self._attn_pool = attnpool
        self._output_ffl = output_ffl

        self._num_labels = vocab.get_vocab_size(namespace="labels")
        self._auc = Auc()
        self._loss = torch.nn.BCELoss()
        initializer(self)
Ejemplo n.º 14
0
    def __init__(self, word_vectors, hidden_size, drop_prob=0.):
        super(Encoder, self).__init__()

        self.input_size_query = 300
        self.input_size_doc = 300
        self.embed_size = 300
        self.drop_prob = 0.5

        # Word embedding: glove
        self.glove_embedding = nn.Embedding.from_pretrained(word_vectors)

        # two Bi-LSTM
        self.bilstm1 = RNNEncoder(input_size=self.input_size_doc,
                                  hidden_size=hidden_size,
                                  drop_prob=drop_prob,
                                  num_layers=1)
        self.bilstm2 = RNNEncoder(input_size=self.input_size_query,
                                  hidden_size=hidden_size,
                                  drop_prob=drop_prob,
                                  num_layers=1)

        # dropout mar 13 YL
        self._variational_dropout = InputVariationalDropout(drop_prob)
Ejemplo n.º 15
0
    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 contextualizer: Seq2SeqEncoder,
                 dropout: float = None,
                 tie_embeddings: bool = True,
                 num_samples: int = None,
                 use_variational_dropout: bool = False):
        super().__init__(vocab)

        self._embedder = embedder
        self._contextualizer = contextualizer
        self._context_dim = contextualizer.get_output_dim()

        if use_variational_dropout:
            self._dropout = InputVariationalDropout(
                dropout) if dropout else lambda x: x
        else:
            self._dropout = Dropout(dropout) if dropout else lambda x: x

        vocab_size = self.vocab.get_vocab_size()
        padding_index = self.vocab.get_token_index(DEFAULT_PADDING_TOKEN)
        if num_samples:
            self._softmax_loss = SampledSoftmaxLoss(vocab_size,
                                                    self._context_dim,
                                                    num_samples)
        else:
            self._softmax_loss = SoftmaxLoss(vocab_size, self._context_dim,
                                             padding_index)

        self._tie_embeddings = tie_embeddings
        if self._tie_embeddings:
            embedder_children = dict(self._embedder.named_children())
            word_embedder = embedder_children["token_embedder_tokens"]
            assert self._softmax_loss.softmax_w.size(
            ) == word_embedder.weight.size()
            self._softmax_loss.softmax_w = word_embedder.weight
Ejemplo n.º 16
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        embedding_dim: int,
        max_mention_length: int,
        max_embeddings: int,
        tie_weights: bool,
        variational_dropout_rate: float = 0.0,
        dropout_rate: float = 0.0,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        super(EntityNLM, self).__init__(vocab)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder
        self._embedding_dim = embedding_dim
        self._max_mention_length = max_mention_length
        self._max_embeddings = max_embeddings
        self._tie_weights = tie_weights
        self._variational_dropout_rate = variational_dropout_rate
        self._dropout_rate = dropout_rate

        self._state: Optional[StateDict] = None

        # Input variational dropout
        self._variational_dropout = InputVariationalDropout(
            variational_dropout_rate)
        self._dropout = torch.nn.Dropout(dropout_rate)

        # For entity type prediction
        self._entity_type_projection = torch.nn.Linear(
            in_features=embedding_dim, out_features=2, bias=False)
        self._dynamic_embeddings = DynamicEmbedding(
            embedding_dim=embedding_dim, max_embeddings=max_embeddings)

        # For mention length prediction
        self._mention_length_projection = torch.nn.Linear(
            in_features=2 * embedding_dim, out_features=max_mention_length)

        # For next word prediction
        self._dummy_context_embedding = Parameter(
            F.normalize(torch.randn(1, embedding_dim)))  # TODO: Maybe squeeze
        self._entity_output_projection = torch.nn.Linear(
            in_features=embedding_dim, out_features=embedding_dim, bias=False)
        self._context_output_projection = torch.nn.Linear(
            in_features=embedding_dim, out_features=embedding_dim, bias=False)
        self._vocab_projection = torch.nn.Linear(
            in_features=embedding_dim,
            out_features=vocab.get_vocab_size('tokens'))
        if tie_weights:
            self._vocab_projection.weight = self._text_field_embedder._token_embedders[
                'tokens'].weight  # pylint: disable=W0212

        # self._perplexity = Perplexity()
        # self._unknown_penalized_perplexity = UnknownPenalizedPerplexity(self.vocab)
        self._entity_type_accuracy = CategoricalAccuracy()
        self._entity_id_accuracy = CategoricalAccuracy()
        self._mention_length_accuracy = CategoricalAccuracy()

        if tie_weights:
            self._vocab_projection.weight = self._text_field_embedder._token_embedders[
                'tokens'].weight  # pylint: disable=W0212

        initializer(self)
    def __init__(
            self,
            vocab: Vocabulary,
            text_field_embedder: TextFieldEmbedder,
            phrase_layer: Seq2SeqEncoder,
            residual_encoder: Seq2SeqEncoder,
            ctx_q_encoder:
        ContextualizedQuestionEncoder,  # this is used to get some m_t
            span_start_encoder: Seq2SeqEncoder,
            span_end_encoder: Seq2SeqEncoder,
            initializer: InitializerApplicator,
            dropout: float = 0.2,
            num_context_answers: int = 0,
            marker_embedding_dim: int = 10,
            max_span_length: int = 30) -> None:
        super().__init__(vocab)
        print('INIT MODEL')
        self._num_context_answers = num_context_answers
        self._max_span_length = max_span_length
        self._text_field_embedder = text_field_embedder
        self._phrase_layer = phrase_layer
        self._marker_embedding_dim = marker_embedding_dim
        self._encoding_dim = phrase_layer.get_output_dim()

        self._train_coref_module = True

        # combine memory with question
        max_turn_length = 12

        self._ctx_q_encoder = ctx_q_encoder

        self._matrix_attention = LinearMatrixAttention(self._encoding_dim,
                                                       self._encoding_dim,
                                                       'x,y,x*y')
        self._merge_atten = TimeDistributed(
            torch.nn.Linear(self._encoding_dim * 4, self._encoding_dim))

        self._residual_encoder = residual_encoder

        if num_context_answers > 0:
            self._question_num_marker = torch.nn.Embedding(
                max_turn_length, marker_embedding_dim)
            self._prev_ans_marker = torch.nn.Embedding(
                (num_context_answers * 4) + 1, marker_embedding_dim)

        if self._ctx_q_encoder.use_ling:
            pos_tags = self.vocab.get_vocab_size('pos_tags')
            self._pos_emb = torch.nn.Embedding(pos_tags, marker_embedding_dim)

        self._self_attention = LinearMatrixAttention(self._encoding_dim,
                                                     self._encoding_dim,
                                                     'x,y,x*y')

        self._followup_lin = torch.nn.Linear(self._encoding_dim, 3)
        self._merge_self_attention = TimeDistributed(
            torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim))

        self._span_start_encoder = span_start_encoder
        self._span_end_encoder = span_end_encoder

        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(self._encoding_dim, 1))
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(self._encoding_dim, 1))
        self._span_yesno_predictor = TimeDistributed(
            torch.nn.Linear(self._encoding_dim, 3))
        self._span_followup_predictor = TimeDistributed(self._followup_lin)

        initializer(self)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_yesno_accuracy = CategoricalAccuracy()
        self._span_followup_accuracy = CategoricalAccuracy()

        self._span_gt_yesno_accuracy = CategoricalAccuracy()
        self._span_gt_followup_accuracy = CategoricalAccuracy()

        self._span_accuracy = BooleanAccuracy()
        self._official_f1 = Average()
        self._variational_dropout = InputVariationalDropout(dropout)
Ejemplo n.º 18
0
    def __init__(self, vocab: Vocabulary,
                 text_encoder: Seq2SeqEncoder,
                 word_embedder: TextFieldEmbedder,
                 enable_training_log: bool = False,
                 inp_drop_rate: float = 0.2,
                 out_drop_rate: float = 0.2,
                 loss_weights: List = (0.2, 0.4, 0.4),
                 super_mode: str = 'before',
                 backbone: str = 'unet',
                 unet_down_channel: int = 256,
                 feature_sel: int = 127):
        super(UnifiedFollowUp, self).__init__(vocab)
        self.text_encoder = text_encoder
        self.word_embedder = word_embedder

        """
        Define model arch choices
        """
        self.backbone = backbone

        # input dropout
        if inp_drop_rate > 0:
            self.var_inp_dropout = InputVariationalDropout(p=inp_drop_rate)
        else:
            self.var_inp_dropout = lambda x: x
        # output dropout
        if out_drop_rate > 0:
            self.var_out_dropout = InputVariationalDropout(p=out_drop_rate)
        else:
            self.var_out_dropout = lambda x: x

        self.hidden_size = text_encoder.get_output_dim() // 2 if text_encoder.is_bidirectional() \
            else text_encoder.get_output_dim()

        self.output_size = text_encoder.get_output_dim()

        # ele -> element wise multiply
        # dot -> dot product
        # cos -> cosine similarity
        # emb_dot -> embedding dot product
        # emb_cos -> embedding cosine similarity
        # linear -> linear similarity
        # bilinear -> bilinear similarity

        feature_sel = feature_sel
        sel_arr = "{0:07b}".format(int(feature_sel))
        nni_choices = ['ele', 'dot', 'cos', 'emb_dot', 'emb_cos', 'linear', 'bilinear']

        self.segment_choices = [nni_choices[i] for i in range(7) if sel_arr[i] == '1']
        # if expand bi-direction, we will regard forward/backward as two channels
        self.expand_bidir = False

        self.similar_function = ModuleDict({
            'ele': ElementWiseMatrixAttention(),
            'dot': DotProductMatrixAttention(),
            'cos': CosineMatrixAttention(),
            'emb_dot': DotProductMatrixAttention(),
            'emb_cos': CosineMatrixAttention(),
            'bilinear': BilinearMatrixAttention(matrix_1_dim=self.output_size, matrix_2_dim=self.output_size),
            'linear': LinearMatrixAttention(tensor_1_dim=self.output_size, tensor_2_dim=self.output_size)
        })

        self.attn_channel = 0
        for choice in self.segment_choices:
            if choice == 'ele':
                self.attn_channel += self.output_size
            elif choice in ['dot', 'cos', 'emb_dot', 'emb_cos', 'bilinear', 'linear']:
                if self.expand_bidir:
                    self.attn_channel += 2
                else:
                    self.attn_channel += 1

        self.class_mapping: Dict[str, int] = get_class_mapping(super_mode=super_mode)

        # Here we have two choices now, one is MLP, and another is UNet
        if self.backbone == 'unet':
            self.segmentation_net = AttentionUNet(input_channels=self.attn_channel,
                                                  class_number=len(self.class_mapping.keys()),
                                                  down_channel=unet_down_channel)
        else:
            raise Exception("Currently we do not support for other arches.")

        class_zero_weight = loss_weights[0]
        class_one_weight = loss_weights[1]

        self.register_buffer('weight_tensor', torch.tensor([class_zero_weight, class_one_weight,
                                                            1 - class_zero_weight - class_one_weight]))
        self.loss = nn.CrossEntropyLoss(ignore_index=-1,
                                        weight=self.weight_tensor)

        # initialize metrics measurement
        self.metrics = {'ROUGE': BatchAverage(),
                        '_ROUGE1': BatchAverage(),
                        '_ROUGE2': BatchAverage(),
                        # TODO: You can speed up the code by disable BLEU since
                        #  the corpus-based BLEU metric is much time-consuming.
                        'BLEU': CorpusBLEUMetric(),
                        'EM': BatchAverage(),
                        'F1': FScoreMetric(prefix="1"),
                        'F2': FScoreMetric(prefix="2"),
                        'F3': FScoreMetric(prefix="3")}

        parameter_num = count_parameters(self)
        print(parameter_num)

        self.min_width = 8
        self.min_height = 8
        self.enable_training_log = enable_training_log
Ejemplo n.º 19
0
    def __init__(self, n_relations: int, conf: Dict,
                 input_batchers: Dict[str, InputBatch], use_cuda: bool):
        super(BiaffineParser, self).__init__()
        self.n_relations = n_relations
        self.conf = conf
        self.use_cuda = use_cuda
        self.use_mst_decoding_for_validation = conf[
            'use_mst_decoding_for_validation']

        input_layers = {}
        for i, c in enumerate(conf['input']):
            if c['type'] == 'embeddings':
                if 'pretrained' in c:
                    embs = load_embedding_txt(c['pretrained'], c['has_header'])
                    logger.info('loaded {0} embedding entries.'.format(
                        len(embs[0])))
                else:
                    embs = None
                name = c['name']
                mapping = input_batchers[name].mapping
                layer = Embeddings(name,
                                   c['dim'],
                                   mapping,
                                   fix_emb=c['fixed'],
                                   embs=embs,
                                   normalize=c.get('normalize', False))
                logger.info('embedding for field {0} '
                            'created with {1} x {2}.'.format(
                                c['field'], layer.n_V, layer.n_d))
                input_layers[name] = layer

            elif c['type'] == 'cnn_encoder' or c['type'] == 'lstm_encoder':
                name = c['name']
                mapping = input_batchers[name].mapping
                embeddings = Embeddings('{0}_ch_emb',
                                        c['dim'],
                                        mapping,
                                        fix_emb=False,
                                        embs=None,
                                        normalize=False)
                logger.info('character embedding for field {0} '
                            'created with {1} x {2}.'.format(
                                c['field'], embeddings.n_V, embeddings.n_d))
                if c['type'] == 'lstm_encoder':
                    layer = LstmTokenEmbedder(name, c['dim'], embeddings,
                                              conf['dropout'], use_cuda)
                elif c['type'] == 'cnn_encoder':
                    layer = ConvTokenEmbedder(name, c['dim'], embeddings,
                                              c['filters'],
                                              c.get('n_highway', 1),
                                              c.get('activation',
                                                    'relu'), use_cuda)
                else:
                    raise ValueError('Unknown type: {}'.format(c['type']))
                input_layers[name] = layer

            elif c['type'] == 'elmo':
                name = c['name']
                layer = ContextualizedWordEmbeddings(name, c['path'], use_cuda)
                input_layers[name] = layer

            else:
                raise ValueError('{} unknown input layer'.format(c['type']))

        self.input_layers = torch.nn.ModuleDict(input_layers)

        input_encoders = []
        input_dim = 0
        for i, c in enumerate(conf['input_encoder']):
            input_info = {
                name: [
                    entry['dim'] for entry in conf['input']
                    if entry['name'] == name
                ][0]
                for name in c['input']
            }

            if c['type'] == 'affine':
                input_encoder = AffineTransformInputEncoder(
                    input_info, c['dim'], use_cuda)
            elif c['type'] == 'sum':
                input_encoder = SummationInputEncoder(input_info, use_cuda)
            elif c['type'] == 'concat':
                input_encoder = ConcatenateInputEncoder(input_info, use_cuda)
            else:
                raise ValueError('{} unknown input encoder'.format(c['type']))

            input_dim += input_encoder.get_output_dim()
            input_encoders.append(input_encoder)

        self.input_encoders = torch.nn.ModuleList(input_encoders)

        c = conf['context_encoder']
        if c['type'] == 'stacked_bidirectional_lstm_dozat':
            self.encoder = PytorchSeq2SeqWrapper(
                InputDropoutedStackedBidirectionalLstm(
                    DozatLstmCell,
                    num_layers=c['num_layers'],
                    input_size=input_dim,
                    hidden_size=c['hidden_dim'],
                    recurrent_dropout_probability=c[
                        'recurrent_dropout_probability'],
                    layer_dropout_probability=c['layer_dropout_probability'],
                    activation=Activation.by_name("leaky_relu")()),
                stateful=False)
        elif c['type'] == 'stacked_bidirectional_lstm_ma':
            self.encoder = PytorchSeq2SeqWrapper(
                InputDropoutedStackedBidirectionalLstm(
                    MaLstmCell,
                    num_layers=c['num_layers'],
                    input_size=input_dim,
                    hidden_size=c['hidden_dim'],
                    recurrent_dropout_probability=c[
                        'recurrent_dropout_probability'],
                    layer_dropout_probability=c['layer_dropout_probability'],
                    activation=Activation.by_name("tanh")()),
                stateful=False)
        elif c['type'] == 'stacked_bidirectional_lstm':
            self.encoder = PytorchSeq2SeqWrapper(StackedBidirectionalLstm(
                num_layers=c['num_layers'],
                input_size=input_dim,
                hidden_size=c['hidden_dim'],
                recurrent_dropout_probability=c[
                    'recurrent_dropout_probability'],
                layer_dropout_probability=c['layer_dropout_probability']),
                                                 stateful=False)
        else:
            self.encoder = DummyContextEncoder()

        encoder_dim = self.encoder.get_output_dim()
        c = conf['biaffine_parser']
        self.arc_representation_dim = arc_representation_dim = c[
            'arc_representation_dim']
        self.tag_representation_dim = tag_representation_dim = c[
            'tag_representation_dim']

        self.head_sentinel_ = torch.nn.Parameter(
            torch.randn([1, 1, encoder_dim]))

        self.head_arc_feedforward = FeedForward(encoder_dim, 1,
                                                arc_representation_dim,
                                                Activation.by_name("elu")())
        self.child_arc_feedforward = FeedForward(encoder_dim, 1,
                                                 arc_representation_dim,
                                                 Activation.by_name("elu")())

        self.head_tag_feedforward = FeedForward(encoder_dim, 1,
                                                tag_representation_dim,
                                                Activation.by_name("elu")())
        self.child_tag_feedforward = FeedForward(encoder_dim, 1,
                                                 tag_representation_dim,
                                                 Activation.by_name("elu")())

        arc_attention_version = c.get('arc_attention_version', 'v1')
        if arc_attention_version == 'v2':
            self.arc_attention = BilinearMatrixAttentionV2(
                arc_representation_dim,
                arc_representation_dim,
                use_input_biases=True)
        else:
            self.arc_attention = BilinearMatrixAttention(
                arc_representation_dim,
                arc_representation_dim,
                use_input_biases=True)

        self.tag_bilinear = BilinearWithBias(tag_representation_dim,
                                             tag_representation_dim,
                                             n_relations)

        self.input_dropout_ = torch.nn.Dropout2d(p=conf['dropout'])
        self.dropout_ = InputVariationalDropout(p=conf['dropout'])

        self.input_encoding_timer = TimeRecoder()
        self.context_encoding_timer = TimeRecoder()
        self.classification_timer = TimeRecoder()
Ejemplo n.º 20
0
    def __init__(self, conf: Dict,
                 input_batchers: Dict[str, Union[WordBatch, CharacterBatch]],
                 n_class: int, use_cuda: bool):
        super(SeqLabelModel, self).__init__()
        self.n_class = n_class
        self.use_cuda = use_cuda
        self.input_dropout = torch.nn.Dropout2d(p=conf["dropout"])
        self.dropout = InputVariationalDropout(p=conf['dropout'])

        input_layers = {}
        for i, c in enumerate(conf['input']):
            if c['type'] == 'embeddings':
                if 'pretrained' in c:
                    embs = load_embedding_txt(c['pretrained'], c['has_header'])
                    logger.info('loaded {0} embedding entries.'.format(
                        len(embs[0])))
                else:
                    embs = None
                name = c['name']
                mapping = input_batchers[name].mapping
                layer = Embeddings(c['dim'],
                                   mapping,
                                   fix_emb=c['fixed'],
                                   embs=embs,
                                   normalize=c.get('normalize', False),
                                   input_field_name=name)
                logger.info('embedding for field {0} '
                            'created with {1} x {2}.'.format(
                                c['field'], layer.n_V, layer.n_d))
                input_layers[name] = layer

            elif c['type'] == 'cnn_encoder' or c['type'] == 'lstm_encoder':
                name = c['name']
                mapping = input_batchers[name].mapping
                embeddings = Embeddings(
                    c['dim'],
                    mapping,
                    fix_emb=False,
                    embs=None,
                    normalize=False,
                    input_field_name='{0}_ch_emb'.format(name))
                logger.info('character embedding for field {0} '
                            'created with {1} x {2}.'.format(
                                c['field'], embeddings.n_V, embeddings.n_d))
                if c['type'] == 'lstm_encoder':
                    layer = LstmTokenEmbedder(c['dim'],
                                              embeddings,
                                              conf['dropout'],
                                              use_cuda,
                                              input_field_name=name)
                elif c['type'] == 'cnn_encoder':
                    layer = ConvTokenEmbedder(c['dim'],
                                              embeddings,
                                              c['filters'],
                                              c.get('n_highway', 1),
                                              c.get('activation', 'relu'),
                                              use_cuda,
                                              input_field_name=name)
                else:
                    raise ValueError('Unknown type: {}'.format(c['type']))
                input_layers[name] = layer

            elif c['type'] == 'elmo':
                name = c['name']
                layer = ContextualizedWordEmbeddings(name, c['path'], use_cuda)
                input_layers[name] = layer

            else:
                raise ValueError('{} unknown input layer'.format(c['type']))

        self.input_layers = torch.nn.ModuleDict(input_layers)
        input_encoders = []
        input_dim = 0
        for i, c in enumerate(conf['input_encoder']):
            input_info = {
                name: self.input_layers[name].get_output_dim()
                for name in c['input']
            }

            if c['type'] == 'affine':
                input_encoder = AffineTransformInputEncoder(
                    input_info, c['dim'], use_cuda)
            elif c['type'] == 'sum':
                input_encoder = SummationInputEncoder(input_info, use_cuda)
            elif c['type'] == 'concat':
                input_encoder = ConcatenateInputEncoder(input_info, use_cuda)
            else:
                raise ValueError('{} unknown input encoder'.format(c['type']))

            input_dim += input_encoder.get_output_dim()
            input_encoders.append(input_encoder)

        self.input_encoders = torch.nn.ModuleList(input_encoders)

        encoder_name = conf['encoder']['type'].lower()
        if encoder_name == 'stacked_bidirectional_lstm':
            lstm = StackedBidirectionalLstm(
                input_size=input_dim,
                hidden_size=conf['encoder']['hidden_dim'],
                num_layers=conf['encoder']['n_layers'],
                recurrent_dropout_probability=conf['dropout'],
                layer_dropout_probability=conf['dropout'],
                use_highway=conf['encoder'].get('use_highway', True))
            self.encoder = PytorchSeq2SeqWrapper(lstm, stateful=False)
            encoded_input_dim = self.encoder.get_output_dim()
        elif encoder_name == 'project':
            self.encoder = ProjectedEncoder(input_dim,
                                            conf['encoder']['hidden_dim'],
                                            dropout=conf['dropout'])
            encoded_input_dim = self.encoder.get_output_dim()
        elif encoder_name == 'dummy':
            self.encoder = DummyEncoder()
            encoded_input_dim = input_dim
        else:
            raise ValueError('Unknown input encoder: {}'.format(encoder_name))

        if conf["classifier"]["type"].lower() == 'crf':
            self.classify_layer = CRFLayer(encoded_input_dim, n_class,
                                           use_cuda)
        else:
            self.classify_layer = ClassifyLayer(encoded_input_dim, n_class,
                                                use_cuda)

        self.encode_time = 0
        self.emb_time = 0
        self.classify_time = 0
Ejemplo n.º 21
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 label_namespace: str = "labels",
                 encoder: Optional[Seq2SeqEncoder] = None,
                 feedforward: Optional[FeedForward] = None,
                 label_encoding: Optional[str] = None,
                 calculate_span_f1: bool = None,
                 dropout: Optional[float] = None,
                 verbose_metrics: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)
        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self._verbose_metrics = verbose_metrics

        embedding_output_dim = self.text_field_embedder.get_output_dim()

        if dropout is not None:
            self.dropout = torch.nn.Dropout(dropout)
            self.variational_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        elif encoder is not None:
            output_dim = self.encoder.get_output_dim()
        else:
            output_dim = embedding_output_dim
        self.tag_projection_layer = TimeDistributed(
            Linear(output_dim, self.num_tags))

        if calculate_span_f1 is None:
            calculate_span_f1 = label_encoding is not None

        self.label_encoding = label_encoding
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.calculate_span_f1 = calculate_span_f1
        if calculate_span_f1 is not None:
            if not label_encoding:
                raise ConfigurationError("calculate_span_f1 is True, but "
                                         "no label_encoding was specified.")
            self._f1_metric = SpanBasedF1Measure(vocab,
                                                 tag_namespace=label_namespace,
                                                 label_encoding=label_encoding)

        if encoder is not None:
            check_dimensions_match(embedding_output_dim,
                                   encoder.get_input_dim(),
                                   "text field embedding dim",
                                   "encoder input dim")
        if feedforward is not None and encoder is not None:
            check_dimensions_match(encoder.get_output_dim(),
                                   feedforward.get_input_dim(),
                                   "encoder output dim",
                                   "feedforward input dim")
        elif feedforward is not None and encoder is None:
            check_dimensions_match(embedding_output_dim,
                                   feedforward.get_input_dim(),
                                   "text field output dim",
                                   "feedforward input dim")
        initializer(self)
Ejemplo n.º 22
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 label_namespace: str = "labels",
                 encoder: Optional[Seq2VecEncoder] = None,
                 seq_encoder: Optional[Seq2SeqEncoder] = None,
                 feedforward: Optional[FeedForward] = None,
                 dropout: Optional[float] = None,
                 incl_neutral: Optional[bool] = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)
        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_labels = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder

        self.seq_encoder = seq_encoder
        if self.seq_encoder is not None:
            self.attention_vector = Parameter(torch.Tensor(self.seq_encoder.get_output_dim()))
            self.attention_layer = DotProductAttention(normalize=True)
    
        embedding_output_dim = self.text_field_embedder.get_output_dim()
        
        if dropout is not None:
            self.dropout = torch.nn.Dropout(dropout)
            self.variational_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        elif encoder is not None:
            output_dim = self.encoder.get_output_dim()
        elif seq_encoder is not None:
            output_dim = self.seq_encoder.get_output_dim()
        else:
            output_dim = embedding_output_dim
        # Have to create a tag projection layer for each label in the 
        # multi label classifier
        self._tag_projection_layers: Any = []
        for k in range(self.num_labels):
            tag_projection_layer = Linear(output_dim, 1)
            self.add_module(f'tag_projection_layer_{k}', tag_projection_layer)
            self._tag_projection_layers.append(tag_projection_layer)
        self.output_activation = torch.nn.Sigmoid()
        self.loss_criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')
        
        self.incl_neutral = incl_neutral
        self.metrics = {"jaccard_index": JaccardIndex(self.incl_neutral)}
        if encoder is not None:
            check_dimensions_match(embedding_output_dim, encoder.get_input_dim(),
                                   "text field embedding dim", "encoder input dim")
        if feedforward is not None and encoder is not None:
            check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(),
                                   "encoder output dim", "feedforward input dim")
        elif feedforward is not None and encoder is None:
            check_dimensions_match(embedding_output_dim, feedforward.get_input_dim(),
                                   "text field output dim", "feedforward input dim")
        if self.seq_encoder is not None:
            self.reset_parameters()
        initializer(self)
Ejemplo n.º 23
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 pos_tag_embedding: Embedding = None,
                 pos_tag_loss: Optional[float] = None,
                 label_namespace: str = "labels",
                 encoder: Optional[Seq2SeqEncoder] = None,
                 feedforward: Optional[FeedForward] = None,
                 label_encoding: Optional[str] = None,
                 crf: bool = True,
                 include_start_end_transitions: bool = True,
                 constrain_crf_decoding: bool = None,
                 calculate_span_f1: bool = None,
                 dropout: Optional[float] = None,
                 verbose_metrics: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)
        if pos_tag_loss is not None or pos_tag_embedding is not None:
            pos_tag_err = (f"Model uses POS tags but the Vocabulary {vocab} "
                           "does not contain `pos_tags` namespace")
            if 'pos_tags' not in vocab._token_to_index:
                raise ConfigurationError(pos_tag_err)
            elif not len(vocab._token_to_index['pos_tags']):
                raise ConfigurationError(pos_tag_err)
        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.pos_tag_embedding = pos_tag_embedding
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self._verbose_metrics = verbose_metrics

        embedding_output_dim = self.text_field_embedder.get_output_dim()
        if self.pos_tag_embedding is not None:
            embedding_output_dim += self.pos_tag_embedding.get_output_dim()

        if dropout is not None:
            self.dropout = torch.nn.Dropout(dropout)
            self.variational_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        elif encoder is not None:
            output_dim = self.encoder.get_output_dim()
        else:
            output_dim = embedding_output_dim
        self.tag_projection_layer = TimeDistributed(
            Linear(output_dim, self.num_tags))
        self.pos_tag_loss = pos_tag_loss
        if self.pos_tag_loss:
            self.num_pos_tags = self.vocab.get_vocab_size("pos_tags")
            self.pos_tag_projection_layer = TimeDistributed(
                Linear(output_dim, self.num_pos_tags))
            self.pos_crf = None
            if crf:
                self.pos_crf = ConditionalRandomField(self.num_pos_tags, None,
                                                      False)

        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.
        if crf:
            if constrain_crf_decoding is None:
                constrain_crf_decoding = label_encoding is not None
        if calculate_span_f1 is None:
            calculate_span_f1 = label_encoding is not None

        self.label_encoding = label_encoding
        if constrain_crf_decoding and crf:
            if not label_encoding:
                raise ConfigurationError("constrain_crf_decoding is True, but "
                                         "no label_encoding was specified.")
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None
        if crf:
            self.include_start_end_transitions = include_start_end_transitions
            self.crf = ConditionalRandomField(
                self.num_tags,
                constraints,
                include_start_end_transitions=include_start_end_transitions)
        else:
            self.crf = None

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.calculate_span_f1 = calculate_span_f1
        if calculate_span_f1 is not None:
            if not label_encoding:
                raise ConfigurationError("calculate_span_f1 is True, but "
                                         "no label_encoding was specified.")
            self._f1_metric = SpanBasedF1Measure(vocab,
                                                 tag_namespace=label_namespace,
                                                 label_encoding=label_encoding)
        # If performing POS tagging would be good to keep updated on POS
        # accuracy
        if self.pos_tag_loss:
            self.metrics['POS_accuracy'] = CategoricalAccuracy()

        if encoder is not None:
            check_dimensions_match(embedding_output_dim,
                                   encoder.get_input_dim(),
                                   "text field embedding dim",
                                   "encoder input dim")
        if feedforward is not None and encoder is not None:
            check_dimensions_match(encoder.get_output_dim(),
                                   feedforward.get_input_dim(),
                                   "encoder output dim",
                                   "feedforward input dim")
        elif feedforward is not None and encoder is None:
            check_dimensions_match(embedding_output_dim,
                                   feedforward.get_input_dim(),
                                   "text field output dim",
                                   "feedforward input dim")
        initializer(self)
Ejemplo n.º 24
0
    def __init__(self, **kwargs):
        super().__init__()

        content_id_size = kwargs["content_id_size"]
        content_id_dim = kwargs["content_id_dim"]

        self.content_id_emb = nn.Embedding(num_embeddings=content_id_size,
                                           embedding_dim=content_id_dim,
                                           padding_idx=0)

        if kwargs["emb_dropout"] > 0:
            self.emb_dropout = InputVariationalDropout(p=kwargs["emb_dropout"])

        feature_dim = kwargs["feature_dim"] + kwargs["content_id_dim"]
        if not kwargs.get("no_prev_ans", False):
            feature_dim += 1
        if "lstm_in_dim" in kwargs and kwargs["lstm_in_dim"] != feature_dim:
            lstm_in_dim = kwargs["lstm_in_dim"]
            self.lstm_in_proj = nn.Linear(in_features=feature_dim,
                                          out_features=lstm_in_dim,
                                          bias=True)
        else:
            lstm_in_dim = feature_dim

        lstm_hidden_dim = kwargs["lstm_hidden_dim"]
        lstm_num_layers = kwargs["lstm_num_layers"]
        lstm_dropout = kwargs["lstm_dropout"]

        self.encoder_type = kwargs.get("encoder_type", "vanilla_lstm")
        if self.encoder_type == "vanilla_lstm":
            self.encoder = nn.LSTM(input_size=lstm_in_dim,
                                   hidden_size=lstm_hidden_dim,
                                   bidirectional=False,
                                   batch_first=True,
                                   num_layers=lstm_num_layers,
                                   dropout=lstm_dropout)
        elif self.encoder_type == "GRU":
            self.encoder = nn.GRU(input_size=lstm_in_dim,
                                  hidden_size=lstm_hidden_dim,
                                  bidirectional=False,
                                  batch_first=True,
                                  num_layers=lstm_num_layers,
                                  dropout=lstm_dropout)
        elif self.encoder_type == "attention":
            self.encoder = nn.MultiheadAttention(embed_dim=lstm_in_dim,
                                                 num_heads=lstm_num_layers,
                                                 dropout=lstm_dropout)
        elif self.encoder_type == "augmented_lstm":
            self.encoder = StackedAugmentedLSTM(
                input_size=lstm_in_dim,
                hidden_size=lstm_hidden_dim,
                num_layers=lstm_num_layers,
                recurrent_dropout_probability=lstm_dropout,
                use_highway=kwargs.get("lstm_use_highway", True))

        if kwargs.get("layer_norm", False):
            self.layer_norm = nn.LayerNorm(lstm_hidden_dim)

        if kwargs["output_dropout"] > 0:
            self.output_dropout = InputVariationalDropout(
                p=kwargs["output_dropout"])

        if kwargs.get("highway_connection", False):
            self.highway_H = nn.Linear(in_features=lstm_in_dim,
                                       out_features=lstm_hidden_dim)
            self.highway_C = nn.Linear(in_features=lstm_in_dim,
                                       out_features=lstm_hidden_dim)

        hidden2logit_num_layers = kwargs.get("hidden2logit_num_layers", 1)
        self.hidden2logit = []

        in_features = lstm_hidden_dim
        for i in range(hidden2logit_num_layers):
            if i == hidden2logit_num_layers - 1:
                self.hidden2logit.append(
                    nn.Linear(in_features=in_features, out_features=1))
            else:
                self.hidden2logit.append(
                    nn.Linear(in_features=in_features,
                              out_features=in_features // 2))
                self.hidden2logit.append(nn.ReLU())
                self.hidden2logit.append(
                    nn.Dropout(p=kwargs["output_dropout"]))
                in_features = in_features // 2
        self.hidden2logit = nn.Sequential(*self.hidden2logit)

        if "content_adj_mat" in kwargs and "smoothness_alpha" in kwargs:
            # normalize adjacency weight with w_ij/(sqrt(d_i)*sqrt(d_j))
            adjacency_mat = kwargs["content_adj_mat"]
            del kwargs["content_adj_mat"]

            degree_mat = torch.clamp(adjacency_mat.sum(dim=1), 1e-8)
            inv_degree_mat = torch.diag(torch.pow(degree_mat, -0.5))
            self.content_id_adj = inv_degree_mat @ adjacency_mat @ inv_degree_mat

            self.smoothness_alpha = kwargs["smoothness_alpha"]

            self.criterion = self.BCE_logit_emb_smooth_loss
        else:
            self.criterion = self.__class__.binary_cross_entropy_with_logits

        self.hparams = kwargs
Ejemplo n.º 25
0
    def __init__(self, vocab: Vocabulary,
                 char_embedder: TextFieldEmbedder,
                 word_embedder: TextFieldEmbedder,
                 tokens_encoder: Seq2SeqEncoder,
                 model_args,
                 inp_drop_rate: float = 0.5,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        """
        :param vocab: vocabulary from train and dev dataset
        :param char_embedder: character embedding + cnn encoder
        :param word_embedder: word embedding
        :param tokens_encoder: Bi-LSTM backbone for split
        :param model_args: model arguments
        :param inp_drop_rate: input dropout rate
        """
        super(FollowUpSnippetModel, self).__init__(vocab, regularizer)

        self.tokens_encoder = tokens_encoder

        self.projection_layer = torch.nn.Linear(
            in_features=word_embedder.get_output_dim() + 1 + char_embedder.get_output_dim(),
            out_features=self.tokens_encoder.get_input_dim(),
            bias=False)

        # integer to mark field, 0 or 1
        self.num_classes = 2
        self.num_conflicts = 2

        self._non_linear = torch.nn.PReLU()

        self.hidden_size = int(self.tokens_encoder.get_output_dim() / 2)

        self.policy_net = PolicyNet(self.tokens_encoder.get_output_dim() * 3,
                                    self.num_classes)

        self.token_field_embedding = word_embedder
        self.char_field_embedding = char_embedder

        self._scaled_value = 1.0
        self._self_attention = CosineMatrixAttention()

        self.margin_loss = MarginRankingLoss(margin=model_args.margin)

        # calculate span similarity
        self.cosine_similar = CosineSimilarity(dim=0)

        if inp_drop_rate > 0:
            self._variational_dropout = InputVariationalDropout(p=inp_drop_rate)
        else:
            self._variational_dropout = lambda x: x

        self.metrics = {
            "bleu": BLEUScore(),
            "reward": RewardScore(),
            "symbol": SymbolScore(),
            "reward_var": RewardScore(),
            "overall": RewardScore()
        }

        initializer(self)
Ejemplo n.º 26
0
    def __init__(self,
                 word_to_index,
                 ext_word_to_index,
                 char_to_index,
                 char_embed_size,
                 pos_embed_size,
                 char_lstm_hidden_size,
                 word_embed_size,
                 ext_embed_tensor,
                 num_layers,
                 hidden_size,
                 dropout,
                 r_dropout,
                 mlp_arc_size,
                 mlp_label_size,
                 pos_to_index,
                 xpos_to_index,
                 rel_to_index,
                 cuda_flag,
                 batch_first=True):
        """
        Args:
            word_to_index:
            char_to_index:
            char_embed_size: word embeddings dim
            pos_embed_size: pos/xpos embeddings dim
            char_lstm_hidden_size: char LSTM reccurent dim
            word_embed_size: word embeddings dim
            num_layers: Bi-LSTM  of layers
            hidden_size: Bi-lstm reccurent dim
            dropout: dropout probability
            r_dropout: dropout probability for reccurent units (Gal & Grahami)
            mlp_arc_size: arc mlp hidden dim
            mlp_label_size: label mlp hidden dim
            pos_to_index:
            xpos_to_index:
            rel_to_index:
            cuda_flag:
            batch_first: batch first option
        """

        super(RNN, self).__init__()

        self.word_to_index = word_to_index
        self.ext_word_to_index = ext_word_to_index
        self.char_to_index = char_to_index
        self.char_embed_size = char_embed_size
        self.pos_embed_size = pos_embed_size
        self.char_lstm_hidden_size = char_lstm_hidden_size
        self.word_embed_size = word_embed_size
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.dropout = dropout
        self.r_dropout = r_dropout
        self.mlp_arc_size = mlp_arc_size
        self.mlp_label_size = mlp_label_size
        self.pos_to_index = pos_to_index
        self.xpos_to_index = xpos_to_index
        self.rel_to_index = rel_to_index
        self.rels = len(self.rel_to_index)
        self.cuda_flag = cuda_flag
        self.batch_first = batch_first

        self.word_encoder = nn.Embedding(
            len(self.word_to_index),
            self.word_embed_size,
            sparse=True,
            padding_idx=self.word_to_index['__PADDING__'])
        torch.nn.init.xavier_uniform_(self.word_encoder.weight.data)

        self.pos_encoder = nn.Embedding(
            len(self.pos_to_index),
            self.pos_embed_size,
            sparse=True,
            padding_idx=self.pos_to_index['__PADDING__'])
        torch.nn.init.xavier_uniform_(self.pos_encoder.weight.data)

        self.xpos_encoder = nn.Embedding(
            len(self.xpos_to_index),
            self.pos_embed_size,
            sparse=True,
            padding_idx=self.xpos_to_index['__PADDING__'])
        torch.nn.init.xavier_uniform_(self.xpos_encoder.weight.data)

        self.ROOT = nn.Parameter(
            torch.zeros(self.word_embed_size + self.pos_embed_size))
        torch.nn.init.normal_(self.ROOT, 0, 0.001)
        self.rnn = StackedBidirectionalLstm(
            input_size=self.word_embed_size + self.pos_embed_size,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            recurrent_dropout_probability=self.r_dropout,
            use_highway=False)

        self.char_embeds = Char_RNN(self.char_to_index, self.char_embed_size,
                                    self.char_lstm_hidden_size,
                                    self.word_embed_size, self.dropout,
                                    self.cuda_flag)
        self.fasttext_embs = Fasttext(ext_embed_tensor, self.word_embed_size,
                                      self.ext_word_to_index['__PADDING__'],
                                      self.cuda_flag)

        #Set forget bias to zero
        for layer_index in range(self.num_layers):
            eval(
                "self.rnn.forward_layer_{}.state_linearity.bias.data.fill_(0.0)"
                .format(layer_index))
            eval(
                "self.rnn.backward_layer_{}.state_linearity.bias.data.fill_(0.0)"
                .format(layer_index))

        self.var_drop = InputVariationalDropout(self.dropout)

        self.arc_head = nn.Linear(2 * self.hidden_size,
                                  self.mlp_arc_size,
                                  bias=True)
        self.arc_dep = nn.Linear(2 * self.hidden_size,
                                 self.mlp_arc_size,
                                 bias=True)
        torch.nn.init.orthogonal_(self.arc_head.weight.data,
                                  gain=np.sqrt(2 / (1 + 0.1**2)))
        torch.nn.init.orthogonal_(self.arc_dep.weight.data,
                                  gain=np.sqrt(2 / (1 + 0.1**2)))
        self.arc_head.bias.data.fill_(0.0)
        self.arc_head.bias.data.fill_(0.0)

        self.label_head = nn.Linear(2 * self.hidden_size,
                                    self.mlp_label_size,
                                    bias=True)
        self.label_dep = nn.Linear(2 * self.hidden_size,
                                   self.mlp_label_size,
                                   bias=True)
        torch.nn.init.orthogonal_(self.label_head.weight.data,
                                  gain=np.sqrt(2 / (1 + 0.1**2)))
        torch.nn.init.orthogonal_(self.label_dep.weight.data,
                                  gain=np.sqrt(2 / (1 + 0.1**2)))
        self.label_head.bias.data.fill_(0.0)
        self.label_head.bias.data.fill_(0.0)

        #Add biaffine layers
        #self.arc_biaffine = Biaffine(self.mlp_arc_size, self.mlp_arc_size, 1, bias=(True, False, False))
        #self.label_biaffine = Biaffine(self.mlp_label_size, self.mlp_label_size,self.rels,bias=(True, True, True))

        self.arc_biaf = nn.Parameter(
            torch.zeros(self.mlp_arc_size, self.mlp_arc_size))
        self.arc_head_aff = nn.Linear(self.mlp_arc_size, 1, bias=False)
        self.arc_head_aff.weight.data.fill_(0.0)

        self.label_biaf = nn.Parameter(
            torch.zeros(self.mlp_label_size, self.mlp_label_size, self.rels))
        self.label_head_aff = nn.Linear(self.mlp_label_size,
                                        self.rels,
                                        bias=False)
        self.label_head_aff.weight.data.fill_(0.0)
        self.label_dep_aff = nn.Linear(self.mlp_label_size,
                                       self.rels,
                                       bias=False)
        self.label_dep_aff.weight.data.fill_(0.0)
        self.label_bias = nn.Parameter(torch.zeros(self.rels))

        self.Relu = nn.LeakyReLU(0.1)

        self.param_group_sparse = []
        self.param_group_dense = []
        for name, param in self.named_parameters():
            if ((name == "word_encoder.weight")
                    or (name == "char_embeds.char_encoder.weight")
                    or (name == "pos_encoder.weight")
                    or (name == "xpos_encoder.weight")):
                print("Sparse:", name)
                self.param_group_sparse.append(param)
            else:
                self.param_group_dense.append(param)

        self.param_group_sparse = iter(self.param_group_sparse)
        self.param_group_dense = iter(self.param_group_dense)