Esempio n. 1
0
    class Config(DocModel.Config):
        class ByteModelInput(DocModel.Config.ModelInput):
            token_bytes: ByteTokenTensorizer.Config = ByteTokenTensorizer.Config(
            )

        inputs: ByteModelInput = ByteModelInput()
        byte_embedding: CharacterEmbedding.Config = CharacterEmbedding.Config()
Esempio n. 2
0
    class Config(WordTaggingModel.Config):
        class ByteModelInput(WordTaggingModel.Config.ModelInput):
            # We should support characters as well, but CharacterTokenTensorizer
            # does not support adding characters to vocab yet.
            tokens: ByteTokenTensorizer.Config = ByteTokenTensorizer.Config()

        inputs: ByteModelInput = ByteModelInput()
        embedding: CharacterEmbedding.Config = CharacterEmbedding.Config()
Esempio n. 3
0
 def create_embedding(cls, config, tensorizers):
     return CharacterEmbedding(
         tensorizers["token_bytes"].NUM_BYTES,
         config.embedding.embed_dim,
         config.embedding.cnn.kernel_num,
         config.embedding.cnn.kernel_sizes,
         config.embedding.highway_layers,
         config.embedding.projection_dim,
     )
Esempio n. 4
0
    def create_embedding(cls, config, tensorizers: Dict[str, Tensorizer]):
        word_tensorizer = config.inputs.tokens
        byte_tensorizer = config.inputs.token_bytes
        assert word_tensorizer.column == byte_tensorizer.column

        word_embedding = create_module(config.embedding,
                                       tensorizer=tensorizers["tokens"])
        byte_embedding = CharacterEmbedding(
            ByteTokenTensorizer.NUM_BYTES,
            config.byte_embedding.embed_dim,
            config.byte_embedding.cnn.kernel_num,
            config.byte_embedding.cnn.kernel_sizes,
            config.byte_embedding.highway_layers,
            config.byte_embedding.projection_dim,
        )
        return EmbeddingList([word_embedding, byte_embedding], concat=True)