Exemple #1
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 contextualizer: Seq2SeqEncoder,
                 layer_norm: Optional[MaskedLayerNorm] = None,
                 dropout: float = None,
                 loss_scale: Union[float, str] = 1.0,
                 remove_bos_eos: bool = True) -> None:
        super().__init__(vocab)
        self._text_field_embedder = text_field_embedder
        self._layer_norm = layer_norm or (lambda x: x)

        if not contextualizer.is_bidirectional():
            raise ConfigurationError("contextualizer must be bidirectional")

        self._contextualizer = contextualizer
        # The dimension for making predictions just in the forward
        # (or backward) direction.
        self._forward_dim = contextualizer.get_output_dim() // 2

        # TODO(joelgrus): Allow SampledSoftmaxLoss here by configuration
        self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(),
                                          embedding_dim=self._forward_dim)

        self.register_buffer('_last_average_loss', torch.zeros(1))

        if dropout:
            self._dropout = torch.nn.Dropout(dropout)
        else:
            self._dropout = lambda x: x

        self._loss_scale = loss_scale
        self._remove_bos_eos = remove_bos_eos
Exemple #2
0
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 vocab: Vocabulary,
                 positive_label: int = 1) -> None:
        super().__init__(vocab)
        # We need the embeddings to convert word IDs to their vector representations
        self.word_embeddings = word_embeddings

        self.encoder = encoder

        # After converting a sequence of vectors to a single vector, we feed it into
        # a fully-connected linear layer to reduce the dimension to the total number of labels.
        self.linear = torch.nn.Linear(
            in_features=encoder.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))

        # Monitor the metrics - we use accuracy, as well as prec, rec, f1 for 4 (very positive)
        self.accuracy = CategoricalAccuracy()
        self.f1_measure = F1Measure(positive_label)

        # We use the cross entropy loss because this is a classification task.
        # Note that PyTorch's CrossEntropyLoss combines softmax and log likelihood loss,
        # which makes it unnecessary to add a separate softmax layer.
        self.loss_function = torch.nn.CrossEntropyLoss()

        self.W = nn.Parameter(
            torch.zeros(size=(2 * encoder.get_output_dim(), 1)))
        nn.init.xavier_uniform_(self.W.data)
        self.LeakyReLU = torch.nn.LeakyReLU(0.1)
Exemple #3
0
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 text_encoder: Seq2SeqEncoder,
                 relation_encoder: Seq2VecEncoder,
                 vocab: Vocabulary,
                 encoder_dropout: float = 0.5) -> None:
        # We have to pass the vocabulary to the constructor.
        super().__init__(vocab)

        self.word_embeddings = word_embeddings

        self.encoder_dropout = torch.nn.Dropout(p=encoder_dropout)

        self.text_encoder = text_encoder
        self.text_attn = LinearAttention(
            input_dim=text_encoder.get_output_dim())

        self.relation_encoder = relation_encoder
        self.relation_attn = BilinearAttention(
            vector_dim=text_encoder.get_output_dim(),
            matrix_dim=relation_encoder.get_output_dim())

        hidden_dim = (text_encoder.get_output_dim() +
                      relation_encoder.get_output_dim())
        self.output = torch.nn.Linear(in_features=hidden_dim, out_features=1)
Exemple #4
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 contextualizer: Seq2SeqEncoder = None,
                 dropout: float = 0.0,
                 num_samples: int = None,
                 sparse_embeddings: bool = False,
                 bidirectional: bool = False,
                 initializer=InitializerApplicator(),
                 **kwargs) -> None:
        super().__init__(vocab, **kwargs)
        self._text_field_embedder = text_field_embedder
        self._contextualizer = contextualizer
        self._bidirectional = bidirectional

        if self._bidirectional:
            self._forward_dim = contextualizer.get_output_dim() // 2
        else:
            self._forward_dim = contextualizer.get_output_dim()

        self._softmax_loss = SoftmaxLoss(num_words=vocab.get_vocab_size(),
                                         embedding_dim=self._forward_dim)

        self._perplexity = Perplexity()

        if dropout:
            self._dropout = torch.nn.Dropout(dropout)
        else:
            self._dropout = lambda x: x

        if initializer is not None:
            initializer(self)
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        contextualizer: Seq2SeqEncoder,
        dropout: float = None,
        num_samples: int = None,
        sparse_embeddings: bool = False,
        bidirectional: bool = False,
        initializer: InitializerApplicator = None,
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)
        self._text_field_embedder = text_field_embedder

        if contextualizer.is_bidirectional() is not bidirectional:
            raise ConfigurationError(
                "Bidirectionality of contextualizer must match bidirectionality of "
                "language model. "
                f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, "
                f"language model bidirectional: {bidirectional}")

        self._contextualizer = contextualizer
        self._bidirectional = bidirectional

        # The dimension for making predictions just in the forward
        # (or backward) direction.
        if self._bidirectional:
            self._forward_dim = contextualizer.get_output_dim() // 2
        else:
            self._forward_dim = contextualizer.get_output_dim()

        if num_samples is not None:
            self._softmax_loss = SampledSoftmaxLoss(
                num_words=vocab.get_vocab_size("transactions"),
                embedding_dim=self._forward_dim,
                num_samples=num_samples,
                sparse=sparse_embeddings,
            )
        else:
            self._softmax_loss = SoftmaxLoss(
                num_words=vocab.get_vocab_size("transactions"),
                embedding_dim=self._forward_dim,
            )

        # This buffer is now unused and exists only for backwards compatibility reasons.
        self.register_buffer("_last_average_loss", torch.zeros(1))

        self._perplexity = Perplexity()

        if dropout:
            self._dropout = torch.nn.Dropout(dropout)
        else:
            self._dropout = lambda x: x

        if initializer is not None:
            initializer(self)
    def __init__(
        self,
        highway_encoder: Seq2SeqEncoder,
        transform_gate_encoder: Seq2SeqEncoder,
        carry_gate_encoder: Optional[Seq2SeqEncoder] = None,
        projection: bool = True,
    ) -> None:
        stateful = highway_encoder.stateful or transform_gate_encoder.stateful
        check_dimensions_match(
            highway_encoder.get_input_dim(),
            transform_gate_encoder.get_input_dim(),
            "highway_encoder input dim",
            "transform_gate_encoder input dim",
        )
        if carry_gate_encoder is not None:
            stateful = stateful or carry_gate_encoder.stateful
            check_dimensions_match(
                highway_encoder.get_input_dim(),
                carry_gate_encoder.get_input_dim(),
                "highway_encoder input dim",
                "carry_gate_encoder input dim",
            )

        super().__init__(stateful=stateful)

        self._input_dim = highway_encoder.get_input_dim()
        self._highway_encoder = highway_encoder
        self._transform_gate_encoder = transform_gate_encoder
        self._carry_gate_encoder = carry_gate_encoder
        self._highway_projection: Optional[torch.nn.Module] = None
        self._transform_gate_projection: Optional[torch.nn.Module] = None
        self._carry_gate_projection: Optional[torch.nn.Module] = None
        if projection:
            self._highway_projection = TimeDistributed(  # type: ignore
                torch.nn.Linear(
                    highway_encoder.get_output_dim(),
                    highway_encoder.get_input_dim(),
                ))
            self._transform_gate_projection = TimeDistributed(  # type: ignore
                torch.nn.Linear(
                    transform_gate_encoder.get_output_dim(),
                    transform_gate_encoder.get_input_dim(),
                ), )
            if carry_gate_encoder is not None:
                self._carry_gate_projection = TimeDistributed(  # type: ignore
                    torch.nn.Linear(
                        carry_gate_encoder.get_output_dim(),
                        carry_gate_encoder.get_input_dim(),
                    ), )
        else:
            assert highway_encoder.get_output_dim() in (self._input_dim, 1)
            assert transform_gate_encoder.get_output_dim() in (self._input_dim,
                                                               1)
            if carry_gate_encoder is not None:
                assert carry_gate_encoder.get_output_dim() in (self._input_dim,
                                                               1)
Exemple #7
0
 def __init__(self, word_embeddings: TextFieldEmbedder,
              encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None:
     super().__init__(vocab)
     self.dbg_ctr = 0
     self.word_embeddings = word_embeddings
     self.encoder = encoder.cuda(CUDA_DEVICE)
     self.hidden2tag = torch.nn.Linear(
         in_features=encoder.get_output_dim(),
         out_features=vocab.get_vocab_size('labels')).cuda(CUDA_DEVICE)
     self.accuracy = CategoricalAccuracy()
     self.blank_index = vocab.get_token_index("_")
Exemple #8
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 contextualizer: Seq2SeqEncoder,
                 dropout: float = None,
                 loss_scale: Union[float, str] = 1.0,
                 num_samples: int = None,
                 sparse_embeddings: bool = False,
                 bidirectional: bool = False,
                 initializer: InitializerApplicator = None) -> None:
        super().__init__(vocab)
        self._text_field_embedder = text_field_embedder

        if contextualizer.is_bidirectional() is not bidirectional:
            raise ConfigurationError(
                "Bidirectionality of contextualizer must match bidirectionality of "
                "language model. "
                f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, "
                f"language model bidirectional: {bidirectional}")

        self._contextualizer = contextualizer
        self._bidirectional = bidirectional

        # The dimension for making predictions just in the forward
        # (or backward) direction.
        if self._bidirectional:
            self._forward_dim = contextualizer.get_output_dim() // 2
        else:
            self._forward_dim = contextualizer.get_output_dim()

        # TODO(joelgrus): more sampled softmax configuration options, as needed.
        if num_samples is not None:
            self._softmax_loss = SampledSoftmaxLoss(
                num_words=vocab.get_vocab_size(),
                embedding_dim=self._forward_dim,
                num_samples=num_samples,
                sparse=sparse_embeddings)
        else:
            self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(),
                                              embedding_dim=self._forward_dim)

        # TODO(brendanr): Output perplexity here. e^loss
        self.register_buffer('_last_average_loss', torch.zeros(1))

        if dropout:
            self._dropout = torch.nn.Dropout(dropout)
        else:
            self._dropout = lambda x: x

        self._loss_scale = loss_scale
        if initializer is not None:
            initializer(self)
Exemple #9
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 contextualizer: Seq2SeqEncoder,
                 dropout: float = None,
                 num_samples: int = None,
                 sparse_embeddings: bool = False,
                 bidirectional: bool = False,
                 initializer: InitializerApplicator = None) -> None:
        super().__init__(vocab)
        self._text_field_embedder = text_field_embedder

        if contextualizer.is_bidirectional() is not bidirectional:
            raise ConfigurationError(
                    "Bidirectionality of contextualizer must match bidirectionality of "
                    "language model. "
                    f"Contextualizer bidirectional: {contextualizer.is_bidirectional()}, "
                    f"language model bidirectional: {bidirectional}")

        self._contextualizer = contextualizer
        self._bidirectional = bidirectional

        # The dimension for making predictions just in the forward
        # (or backward) direction.
        if self._bidirectional:
            self._forward_dim = contextualizer.get_output_dim() // 2
        else:
            self._forward_dim = contextualizer.get_output_dim()

        # TODO(joelgrus): more sampled softmax configuration options, as needed.
        if num_samples is not None:
            self._softmax_loss = SampledSoftmaxLoss(num_words=vocab.get_vocab_size(),
                                                    embedding_dim=self._forward_dim,
                                                    num_samples=num_samples,
                                                    sparse=sparse_embeddings)
        else:
            self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(),
                                              embedding_dim=self._forward_dim)

        # TODO(brendanr): Output perplexity here. e^loss
        self.register_buffer('_last_average_loss', torch.zeros(1))

        if dropout:
            self._dropout = torch.nn.Dropout(dropout)
        else:
            self._dropout = lambda x: x

        if initializer is not None:
            initializer(self)
Exemple #10
0
 def __init__(
     self,
     #### The embedding layer is specified as an AllenNLP <code>TextFieldEmbedder</code>
     #### which represents a general way of turning tokens into tensors.
     #### (Here we know that we want to represent each unique word with a learned tensor,
     #### but using the general class allows us to easily experiment with different types
     #### of embeddings, for example <a href = "https://allennlp.org/elmo">ELMo</a>.)
     word_embeddings: TextFieldEmbedder,
     #### Similarly, the encoder is specified as a general <code>Seq2SeqEncoder</code>
     #### even though we know we want to use an LSTM. Again, this makes it easy to
     #### experiment with other sequence encoders, for example a Transformer.
     encoder: Seq2SeqEncoder,
     #### Every AllenNLP model also expects a <code>Vocabulary</code>,
     #### which contains the namespaced mappings of tokens to indices and labels to indices.
     vocab: Vocabulary
 ) -> None:
     #### Notice that we have to pass the vocab to the base class constructor.
     super().__init__(vocab)
     self.word_embeddings = word_embeddings
     self.encoder = encoder
     #### The feed forward layer is not passed in as a parameter, but is constructed by us.
     #### Notice that it looks at the encoder to find the correct input dimension and looks
     #### at the vocabulary (and, in particular, at the label -> index mapping) to find the correct output dimension.
     self.hidden2tag = torch.nn.Linear(
         in_features=encoder.get_output_dim(),
         out_features=vocab.get_vocab_size('labels'))
     #### The last thing to notice is that we also instantiate a
     #### <code>CategoricalAccuracy</code> metric, which we'll use to track accuracy
     #### during each training and validation epoch.
     self.accuracy = CategoricalAccuracy()
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None:

        super().__init__(vocab)
        self.word_embeddings = word_embeddings
        self.encoder = encoder
        self.vocab = vocab
        self.label_vocab = vocab.get_index_to_token_vocabulary(
            namespace='labels')

        inf_vec = torch.Tensor([float('-inf')] * encoder.get_input_dim())
        self.class_avgs = [
            inf_vec.clone() for i in range(len(self.label_vocab))
        ]

        self.accuracy = CategoricalAccuracy()
        self.debug = False

        if self.debug:
            print("===MODEL DEBUG===")
            print(
                "Number of embeddings:",
                self.word_embeddings._token_embedders['tokens'].num_embeddings)
            # print("Token embedders:", self.word_embeddings._token_embedders)
            # print("Embedding weights", self.word_embeddings._token_embedders['tokens'].weight)
            print("vocab:", vocab)
            print("===MODEL DEBUG===")
Exemple #12
0
    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 dropout: float = 0.1,
                 ff_dim: int = 100):
        super().__init__(vocab)
        self.embedder = embedder
        self.encoder = encoder

        assert self.embedder.get_output_dim() == self.encoder.get_input_dim()

        self.feedforward = FeedForward(
            encoder.get_output_dim(),
            1,
            hidden_dims=ff_dim,
            activations=Activation.by_name('relu')(),
            dropout=dropout)
        self.out = torch.nn.Linear(
            in_features=self.feedforward.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))
        self.crf = ConditionalRandomField(vocab.get_vocab_size('labels'))

        self.f1 = FBetaMeasure(average='micro')
        self.accuracy = CategoricalAccuracy()
        self.idx_to_label = vocab.get_index_to_token_vocabulary('labels')
Exemple #13
0
 def __init__(
     self,
     vocab: Vocabulary,
     embedder: TextFieldEmbedder,
     encoder: Seq2SeqEncoder,
     dropout: float = 0.2,
     decoder_hidden_dim: int = 128,
     decoder_ff_dim: int = 128,
     decoder_num_layers: int = 1,
     decoder_num_heads: int = 4,
     teacher_forcing: float = 1.0,
     num_teacher_forcing_steps: int = None,
     num_tags: int = 2,
     label_smoothing: float = None,
 ):
     super().__init__(vocab)
     # teacher forcing is how often we choose to force the correct answer.
     self.embedder = embedder
     self.encoder = encoder
     self.laserdecoder = LaserDecoder(
         hidden_dim=decoder_hidden_dim,
         encoder_dim=encoder.get_output_dim(),
         num_layers=decoder_num_layers,
         ff_dim=decoder_ff_dim,
         num_heads=decoder_num_heads,
         num_classes=num_tags,
     )
     self.dropout = torch.nn.Dropout(dropout)
     self.accuracy = CategoricalAccuracy()
     self.f1 = F1Measure(1)
     self.teacher_forcing = teacher_forcing
     self.num_tf_steps = num_teacher_forcing_steps
     self.cur_tf_steps = 0
     self.label_smoothing = label_smoothing
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 sentence_encoder: Seq2SeqEncoder,
                 document_encoder: Seq2SeqEncoder,
                 vocab: Vocabulary,
                 encoder_dropout: float = 0.0) -> None:
        # We have to pass the vocabulary to the constructor.
        super().__init__(vocab)

        self.word_embeddings = word_embeddings

        if encoder_dropout > 0:
            self.encoder_dropout = torch.nn.Dropout(p=encoder_dropout)
        else:
            self.encoder_dropout = lambda x: x

        self.sentence_encoder = sentence_encoder

        self.sentence_attn = LinearSelfAttention(
            input_dim=self.sentence_encoder.get_output_dim(), bias=True)

        self.document_encoder = document_encoder
        self.document_attn = LinearSelfAttention(
            input_dim=self.document_encoder.get_output_dim(), bias=True)

        self.output = torch.nn.Linear(
            in_features=document_encoder.get_output_dim(), out_features=1)
Exemple #15
0
 def __init__(self, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder,
              vocab: Vocabulary) -> None:
     super().__init__(vocab)
     self.embedder = embedder
     self.encoder = encoder
     self.linear = torch.nn.Linear(in_features=encoder.get_output_dim(),
                                   out_features=vocab.get_vocab_size('pos'))
     self.accuracy = CategoricalAccuracy()
Exemple #16
0
    def __init__(self, 
                 vocab: Vocabulary,
                 bert_embedder: Optional[PretrainedBertEmbedder] = None,
                 encoder: Optional[Seq2SeqEncoder] = None,
                 dropout: Optional[float] = None,
                 use_crf: bool = True) -> None:
        super().__init__(vocab)

        if bert_embedder:
            self.use_bert = True
            self.bert_embedder = bert_embedder
        else:
            self.use_bert = False
            self.basic_embedder = BasicTextFieldEmbedder({
                "tokens": Embedding(vocab.get_vocab_size(namespace="tokens"), 1024)
            })
            self.rnn = Seq2SeqEncoder.from_params(Params({     
                "type": "lstm",
                "input_size": 1024,
                "hidden_size": 512,
                "bidirectional": True,
                "batch_first": True
            }))

        self.encoder = encoder

        if encoder:
            hidden2tag_in_dim = encoder.get_output_dim()
        else:
            hidden2tag_in_dim = bert_embedder.get_output_dim()
        self.hidden2tag = TimeDistributed(torch.nn.Linear(
            in_features=hidden2tag_in_dim,
            out_features=vocab.get_vocab_size("labels")))
        
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        
        self.use_crf = use_crf
        if use_crf:
            crf_constraints = allowed_transitions(
                constraint_type="BIO",
                labels=vocab.get_index_to_token_vocabulary("labels")
            )
            self.crf = ConditionalRandomField(
                num_tags=vocab.get_vocab_size("labels"),
                constraints=crf_constraints,
                include_start_end_transitions=True
            )
        
        self.f1 = SpanBasedF1Measure(vocab, 
                                     tag_namespace="labels",
                                     ignore_classes=["news/type","negation",
                                                     "demonstrative_reference",
                                                     "timer/noun","timer/attributes"],
                                     label_encoding="BIO")
Exemple #17
0
def build_decoder(task, d_inp, vocab, embedder, args):
    ''' Build a task specific decoder '''
    rnn = s2s_e.by_name('lstm').from_params(
        Params({'input_size': embedder.get_output_dim(),
                'hidden_size': args.d_hid_dec,
                'num_layers': args.n_layers_dec, 'bidirectional': False}))
    decoder = SentenceEncoder(vocab, embedder, 0, rnn)
    hid2voc = nn.Linear(args.d_hid_dec, args.max_word_v_size)
    return decoder, hid2voc
Exemple #18
0
 def __init__(self, word_embeddings: TextFieldEmbedder,
              encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None:
     super().__init__(vocab)
     self.word_embeddings = word_embeddings
     self.encoder = encoder
     self.hidden2tag = torch.nn.Linear(
         in_features=encoder.get_output_dim(),
         out_features=vocab.get_vocab_size('labels'))
     self.accuracy = CategoricalAccuracy()
Exemple #19
0
 def __init__(self, word_embeddings: TextFieldEmbedder,
              encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None:
     super().__init__(vocab)
     self.word_embeddings = word_embeddings
     self.encoder = encoder
     self.hidden2tag = torch.nn.Linear(
         in_features=encoder.get_output_dim(),
         out_features=vocab.get_vocab_size('labels'))
     self._f1_metric = SpanBasedF1Measure(
         vocab, 'labels')  # SpanBasedF1Measure: NER の評価
Exemple #20
0
    def test_stacked_bidirectional_lstm_can_build_from_params(self):
        params = Params({"type": "stacked_bidirectional_lstm",
                         "input_size": 5,
                         "hidden_size": 9,
                         "num_layers": 3})
        encoder = Seq2SeqEncoder.from_params(params)

        assert encoder.get_input_dim() == 5
        assert encoder.get_output_dim() == 18
        assert encoder.is_bidirectional
    def test_stacked_bidirectional_lstm_can_build_from_params(self):
        params = Params({"type": "stacked_bidirectional_lstm",
                         "input_size": 5,
                         "hidden_size": 9,
                         "num_layers": 3})
        encoder = Seq2SeqEncoder.from_params(params)

        assert encoder.get_input_dim() == 5
        assert encoder.get_output_dim() == 18
        assert encoder.is_bidirectional
Exemple #22
0
 def __init__(self,
              word_embeddings: TextFieldEmbedder,
              encoder: Seq2SeqEncoder,
              vocab: Vocabulary) -> None:
     super().__init__(vocab)
     self.word_embeddings = word_embeddings
     self.encoder = encoder
     self.hidden2tag = torch.nn.Linear(in_features=encoder.get_output_dim(),
                                       out_features=vocab.get_vocab_size('labels'))
     self.accuracy = CategoricalAccuracy()
Exemple #23
0
 def __init__(self, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder,
              vocab: Vocabulary) -> None:
     super().__init__(vocab)
     self.embedder = embedder
     self.encoder = encoder
     self.hidden2labels = torch.nn.Linear(
         in_features=encoder.get_output_dim(),
         out_features=vocab.get_vocab_size('labels'))
     self.accuracy = CategoricalAccuracy()
     self.f1 = SpanBasedF1Measure(vocab, tag_namespace='labels')
Exemple #24
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        contextualizer: Seq2SeqEncoder,
        hparams: Dict,
    ) -> None:
        super().__init__(vocab)
        self.text_field_embedder = text_field_embedder

        self.contextualizer = contextualizer
        self.bidirectional = contextualizer.is_bidirectional()

        if self.bidirectional:
            self.forward_dim = contextualizer.get_output_dim() // 2
        else:
            self.forward_dim = contextualizer.get_output_dim()

        dropout = hparams["dropout"]
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = lambda x: x

        self.hidden2chord = torch.nn.Sequential(
            torch.nn.Linear(self.forward_dim, hparams["fc_hidden_dim"]),
            torch.nn.ReLU(True),
            torch.nn.Linear(hparams["fc_hidden_dim"], vocab.get_vocab_size()),
        )
        self.perplexity = PerplexityCustom()
        self.accuracy = CategoricalAccuracy()
        self.real_loss = Average()

        self.similarity_matrix = hparams["similarity_matrix"]
        self.training_mode = hparams["training_mode"]

        self.T_initial = hparams["T_initial"]
        self.T = self.T_initial
        self.decay_rate = hparams["decay_rate"]

        self.batches_per_epoch = hparams["batches_per_epoch"]
        self.epoch = 0
        self.batch_counter = 0
Exemple #25
0
    def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder) -> None:
        super().__init__(vocab)

        self._embedder = embedder
        self._encoder = encoder
        self._classifier = nn.Linear(
            in_features=encoder.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))

        self.f1 = SpanBasedF1Measure(vocab, 'labels')
Exemple #26
0
    def __init__(
        self,
        word_embeddings: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        vocab: Vocabulary,
        dropout: float = 0.5,
        n_linear_layers=1,
    ) -> None:
        """

        :param word_embeddings: the embeddings to start with
        :param encoder:  the seq2seq transformer of embeddings can be LSTM for example
        :param vocab: dataset input and output vocabulary
        """

        super(BaseTextClassifier, self).__init__(vocab)

        self.word_embeddings = word_embeddings

        self.encoder = encoder

        # Representations this is the layer that is just above the last layer and the non linearity (hidden[-1])
        # is is used to calculate FID score, and similar metrics that's why we expose it into self.representations
        # class attribute
        self.representations = self.encoder

        if n_linear_layers > 0:
            extra_hiddens = []
            for k in range(n_linear_layers):
                extra_hiddens += [
                    nn.Linear(self.encoder.get_output_dim(),
                              self.encoder.get_output_dim()),
                    nn.ReLU(True)
                ]
            self.extra_hiddens = nn.Sequential(*extra_hiddens)
        else:
            self.extra_hiddens = None

        self.hidden2label = torch.nn.Linear(
            in_features=encoder.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))

        # self.accuracy = CategoricalAccuracy()
        self.criterion = CrossEntropyLoss()

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "hinge-loss": Loss(HingeEmbeddingLoss()),
            "huber-loss": Loss(SmoothL1Loss()),
            "cross-entropy-loss": Loss(CrossEntropyLoss()),
            "confidence": Confidence()
        }

        self.dropout = nn.Dropout(dropout)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 contextualizer: Seq2SeqEncoder,
                 forward_segmental_contextualizer: Seq2SeqEncoder,
                 backward_segmental_contextualizer: Seq2SeqEncoder,
                 label_feature_dim: int,
                 softmax_projection_dim: int,
                 label_namespace: str = "labels",
                 dropout: float = None,
                 num_samples: int = None,
                 sparse_embeddings: bool = False,
                 bidirectional: bool = True,
                 initializer: InitializerApplicator = None) -> None:
        super().__init__(vocab=vocab,
                         text_field_embedder=text_field_embedder,
                         contextualizer=contextualizer,
                         dropout=dropout,
                         num_samples=num_samples,
                         sparse_embeddings=sparse_embeddings,
                         bidirectional=bidirectional,
                         initializer=initializer)
        self._forward_segmental_contextualizer = forward_segmental_contextualizer
        self._backward_segmental_contextualizer = backward_segmental_contextualizer

        if num_samples is not None:
            self._softmax_loss = SampledSoftmaxLoss(num_words=vocab.get_vocab_size(),
                                                    embedding_dim=softmax_projection_dim,
                                                    num_samples=num_samples,
                                                    sparse=sparse_embeddings)
        else:
            self._softmax_loss = _SoftmaxLoss(num_words=vocab.get_vocab_size(),
                                              embedding_dim=softmax_projection_dim)

        self.num_classes = self.vocab.get_vocab_size(label_namespace)
        self.label_feature_embedding = Embedding(self.num_classes, label_feature_dim)

        self._forward_dim = contextualizer.get_output_dim() // 2 + \
                            forward_segmental_contextualizer.get_output_dim() // 2 + \
                            label_feature_dim
        self.projection_layer = TimeDistributed(Linear(self._forward_dim, softmax_projection_dim))
Exemple #28
0
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 sequence_encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None:
        super().__init__(vocab)
        self.word_embeddings = word_embeddings
        self.sequence_encoder = sequence_encoder

        # Fully connected layer from sequence encoding to tags
        self.fc = torch.nn.Linear(
            in_features=sequence_encoder.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))

        self.accuracy = CategoricalAccuracy()
Exemple #29
0
 def __init__(self, word_embeddings: TextFieldEmbedder,
              encoder: Seq2SeqEncoder, vocab: Vocabulary,
              num_categories: int) -> None:
     super().__init__(vocab)
     self.word_embeddings = word_embeddings
     self.encoder = encoder
     self.hidden2tag = torch.nn.Linear(
         in_features=encoder.get_output_dim(),
         out_features=vocab.get_vocab_size('labels'))
     self.accuracy = CategoricalAccuracy()
     self.num_categories = num_categories
     self.fms = [F1Measure(i) for i in range(1, self.num_categories + 1)]
Exemple #30
0
def build_decoder(task, d_inp, vocab, embedder, args):
    """ Build a task specific decoder """
    rnn = s2s_e.by_name("lstm").from_params(
        Params({
            "input_size": embedder.get_output_dim(),
            "hidden_size": args.s2s["d_hid_dec"],
            "num_layers": args.s2s["n_layers_dec"],
            "bidirectional": False,
        }))
    decoder = SentenceEncoder(vocab, embedder, 0, rnn)
    hid2voc = nn.Linear(args.s2s["d_hid_dec"], args.max_word_v_size)
    return decoder, hid2voc
Exemple #31
0
 def build_pair_attn(d_in, use_attn, d_hid_attn):
     ''' Build the pair model '''
     if not use_attn:
         pair_attn = None
     else:
         d_inp_model = 2 * d_in
         modeling_layer = s2s_e.by_name('lstm').from_params(
             Params({'input_size': d_inp_model, 'hidden_size': d_hid_attn,
                     'num_layers': 1, 'bidirectional': True}))
         pair_attn = AttnPairEncoder(vocab, modeling_layer,
                                     dropout=params["dropout"])
     return pair_attn
Exemple #32
0
    def __init__(
            self,
            vocab: Vocabulary,
            embedder: TextFieldEmbedder,
            encoder:
        Seq2SeqEncoder,  # you pass in the model with layers here. LSTM, etc.
    ):
        super().__init__(vocab)
        self.embedder = embedder
        self.encoder = encoder

        num_labels = vocab.get_vocab_size(
            "tokens")  #get from the tokens namespace
        self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels)
Exemple #33
0
 def build_pair_attn(d_in, d_hid_attn):
     """ Build the pair model """
     d_inp_model = 2 * d_in
     modeling_layer = s2s_e.by_name("lstm").from_params(
         Params({
             "input_size": d_inp_model,
             "hidden_size": d_hid_attn,
             "num_layers": 1,
             "bidirectional": True,
         }))
     pair_attn = AttnPairEncoder(model.vocab,
                                 modeling_layer,
                                 dropout=params["dropout"])
     return pair_attn
Exemple #34
0
 def __init__(self,
              #### The embedding layer is specified as an AllenNLP <code>TextFieldEmbedder</code> which represents a general way of turning tokens into tensors. (Here we know that we want to represent each unique word with a learned tensor, but using the general class allows us to easily experiment with different types of embeddings, for example <a href = "https://allennlp.org/elmo">ELMo</a>.)
              word_embeddings: TextFieldEmbedder,
              #### Similarly, the encoder is specified as a general <code>Seq2SeqEncoder</code> even though we know we want to use an LSTM. Again, this makes it easy to experiment with other sequence encoders, for example a Transformer.
              encoder: Seq2SeqEncoder,
              #### Every AllenNLP model also expects a <code>Vocabulary</code>, which contains the namespaced mappings of tokens to indices and labels to indices.
              vocab: Vocabulary) -> None:
     #### Notice that we have to pass the vocab to the base class constructor.
     super().__init__(vocab)
     self.word_embeddings = word_embeddings
     self.encoder = encoder
     #### The feed forward layer is not passed in as a parameter, but is constructed by us. Notice that it looks at the encoder to find the correct input dimension and looks at the vocabulary (and, in particular, at the label -> index mapping) to find the correct output dimension.
     self.hidden2tag = torch.nn.Linear(in_features=encoder.get_output_dim(),
                                       out_features=vocab.get_vocab_size('labels'))
     #### The last thing to notice is that we also instantiate a <code>CategoricalAccuracy</code> metric, which we'll use to track accuracy during each training and validation epoch.
     self.accuracy = CategoricalAccuracy()
Exemple #35
0
def build_model(args, vocab, pretrained_embs, tasks):
    '''Build model according to arguments

    args:
        - args (TODO): object with attributes:
        - vocab (Vocab):
        - pretrained_embs (TODO): word embeddings to use

    returns
    '''
    d_word, n_layers_highway = args.d_word, args.n_layers_highway

    # Build embedding layers
    if args.glove:
        word_embs = pretrained_embs
        train_embs = bool(args.train_words)
    else:
        log.info("\tLearning embeddings from scratch!")
        word_embs = None
        train_embs = True
    word_embedder = Embedding(vocab.get_vocab_size('tokens'), d_word, weight=word_embs,
                              trainable=train_embs,
                              padding_index=vocab.get_token_index('@@PADDING@@'))
    d_inp_phrase = 0

    # Handle elmo and cove
    token_embedder = {}
    if args.elmo:
        log.info("\tUsing ELMo embeddings!")
        if args.deep_elmo:
            n_reps = 2
            log.info("\tUsing deep ELMo embeddings!")
        else:
            n_reps = 1
        if args.elmo_no_glove:
            log.info("\tNOT using GLoVe embeddings!")
        else:
            token_embedder = {"words": word_embedder}
            log.info("\tUsing GLoVe embeddings!")
            d_inp_phrase += d_word
        elmo = Elmo(options_file=ELMO_OPT_PATH, weight_file=ELMO_WEIGHTS_PATH,
                    num_output_representations=n_reps)
        d_inp_phrase += 1024
    else:
        elmo = None
        token_embedder = {"words": word_embedder}
        d_inp_phrase += d_word
    text_field_embedder = BasicTextFieldEmbedder(token_embedder) if "words" in token_embedder \
                            else None
    d_hid_phrase = args.d_hid if args.pair_enc != 'bow' else d_inp_phrase

    if args.cove:
        cove_layer = cove_lstm(n_vocab=vocab.get_vocab_size('tokens'),
                               vectors=word_embedder.weight.data)
        d_inp_phrase += 600
        log.info("\tUsing CoVe embeddings!")
    else:
        cove_layer = None

    # Build encoders
    phrase_layer = s2s_e.by_name('lstm').from_params(Params({'input_size': d_inp_phrase,
                                                             'hidden_size': d_hid_phrase,
                                                             'num_layers': args.n_layers_enc,
                                                             'bidirectional': True}))
    if args.pair_enc == 'bow':
        sent_encoder = BoWSentEncoder(vocab, text_field_embedder) # maybe should take in CoVe/ELMO?
        pair_encoder = None # model will just run sent_encoder on both inputs
    else: # output will be 2 x d_hid_phrase (+ deep elmo)
        sent_encoder = HeadlessSentEncoder(vocab, text_field_embedder, n_layers_highway,
                                           phrase_layer, dropout=args.dropout,
                                           cove_layer=cove_layer, elmo_layer=elmo)
    d_single = 2 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024
    if args.pair_enc == 'simple': # output will be 4 x [2 x d_hid_phrase (+ deep elmo)]
        pair_encoder = HeadlessPairEncoder(vocab, text_field_embedder, n_layers_highway,
                                           phrase_layer, cove_layer=cove_layer, elmo_layer=elmo,
                                           dropout=args.dropout)
        d_pair = d_single
    elif args.pair_enc == 'attn':
        log.info("\tUsing attention!")
        d_inp_model = 4 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024
        d_hid_model = d_hid_phrase # make it as large as the original sentence encoding
        modeling_layer = s2s_e.by_name('lstm').from_params(Params({'input_size': d_inp_model,
                                                                   'hidden_size': d_hid_model,
                                                                   'num_layers':  1,
                                                                   'bidirectional': True}))
        pair_encoder = HeadlessPairAttnEncoder(vocab, text_field_embedder, n_layers_highway,
                                               phrase_layer, DotProductSimilarity(), modeling_layer,
                                               cove_layer=cove_layer, elmo_layer=elmo,
                                               deep_elmo=args.deep_elmo,
                                               dropout=args.dropout)
        d_pair = 2 * d_hid_phrase
        # output will be 4 x [2 x d_hid_model], where d_hid_model = 2 x d_hid_phrase
        #                = 4 x [2 x 2 x d_hid_phrase]

    # Build model and classifiers
    model = MultiTaskModel(args, sent_encoder, pair_encoder)
    build_classifiers(tasks, model, d_pair, d_single)
    if args.cuda >= 0:
        model = model.cuda()
    return model