# Custom vocab_to_cache logic requires a from_params implementation.
    @classmethod
    def from_params(cls, vocab, params):  # type: ignore
        # pylint: disable=arguments-differ
        params.add_file_to_archive(u'options_file')
        params.add_file_to_archive(u'weight_file')
        options_file = params.pop(u'options_file')
        weight_file = params.pop(u'weight_file')
        requires_grad = params.pop(u'requires_grad', False)
        do_layer_norm = params.pop_bool(u'do_layer_norm', False)
        dropout = params.pop_float(u"dropout", 0.5)
        namespace_to_cache = params.pop(u"namespace_to_cache", None)
        if namespace_to_cache is not None:
            vocab_to_cache = list(
                vocab.get_token_to_index_vocabulary(namespace_to_cache).keys())
        else:
            vocab_to_cache = None
        projection_dim = params.pop_int(u"projection_dim", None)
        params.assert_empty(cls.__name__)
        return cls(options_file=options_file,
                   weight_file=weight_file,
                   do_layer_norm=do_layer_norm,
                   dropout=dropout,
                   requires_grad=requires_grad,
                   projection_dim=projection_dim,
                   vocab_to_cache=vocab_to_cache)


ElmoTokenEmbedder = TokenEmbedder.register(u"elmo_token_embedder")(
    ElmoTokenEmbedder)
Exemple #2
0
        else:
            weight = None

        return cls(num_embeddings=num_embeddings,
                   embedding_dim=embedding_dim,
                   projection_dim=projection_dim,
                   weight=weight,
                   padding_index=padding_index,
                   trainable=trainable,
                   max_norm=max_norm,
                   norm_type=norm_type,
                   scale_grad_by_freq=scale_grad_by_freq,
                   sparse=sparse)


Embedding = TokenEmbedder.register(u"embedding")(Embedding)


def _read_pretrained_embeddings_file(file_uri,
                                     embedding_dim,
                                     vocab,
                                     namespace=u"tokens"):
    u"""
    Returns and embedding matrix for the given vocabulary using the pretrained embeddings
    contained in the given file. Embeddings for tokens not found in the pretrained embedding file
    are randomly initialized using a normal distribution with mean and standard deviation equal to
    those of the pretrained embeddings.

    We support two file formats:

        * text format - utf-8 encoded text file with space separated fields: [word] [dim 1] [dim 2] ...
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

    def get_output_dim(self):
        return self._encoder._module.get_output_dim()  # pylint: disable=protected-access

    def forward(self, token_characters):  # pylint: disable=arguments-differ
        mask = (token_characters != 0).long()
        return self._dropout(
            self._encoder(self._embedding(token_characters), mask))

    # The setdefault requires a custom from_params
    @classmethod
    def from_params(cls, vocab, params):  # type: ignore
        # pylint: disable=arguments-differ
        embedding_params = params.pop(u"embedding")
        # Embedding.from_params() uses "tokens" as the default namespace, but we need to change
        # that to be "token_characters" by default.
        embedding_params.setdefault(u"vocab_namespace", u"token_characters")
        embedding = Embedding.from_params(vocab, embedding_params)
        encoder_params = params.pop(u"encoder")
        encoder = Seq2VecEncoder.from_params(encoder_params)
        dropout = params.pop_float(u"dropout", 0.0)
        params.assert_empty(cls.__name__)
        return cls(embedding, encoder, dropout)


TokenCharactersEncoder = TokenEmbedder.register(u"character_encoding")(
    TokenCharactersEncoder)
            num_timesteps, device=get_device_of(inputs)) + vocab_size

        # Combine the inputs with positional encodings
        batch_tensor = torch.stack(
            [
                inputs,  # (batch_size, num_timesteps)
                positional_encodings.expand(batch_size, num_timesteps)
            ],
            dim=-1)

        byte_pairs_mask = inputs != 0

        # Embeddings is num_output_layers x (batch_size, num_timesteps, embedding_dim)
        layer_activations = self._transformer(batch_tensor)

        # Output of scalar_mix is (batch_size, num_timesteps, embedding_dim)
        mix = self._scalar_mix(layer_activations, byte_pairs_mask)

        # These embeddings are one per byte-pair, but we want one per original _word_.
        # So we choose the embedding corresponding to the last byte pair for each word,
        # which is captured by the ``offsets`` input.
        range_vector = get_range_vector(batch_size,
                                        device=get_device_of(mix)).unsqueeze(1)
        last_byte_pair_embeddings = mix[range_vector, offsets]

        return last_byte_pair_embeddings


OpenaiTransformerEmbedder = TokenEmbedder.register(
    u"openai_transformer_embedder")(OpenaiTransformerEmbedder)