Beispiel #1
0
# type: ignore
import abc
import unittest

from src import logging, preprocessing
from src.text_encoder import (
    SubWordTextEncoder,
    TextEncoder,
    WordNoFilterTextEncoder,
    WordTextEncoder,
)

logger = logging.create_logger(__name__)

CORPUS = preprocessing.add_start_end_token(
    ["a against", "battle", "pandemy covid-19 not easy"])
A_VOCAB_SIZE = 258
ANY_TEXT_FILE = "data/train.lang1"
SAMPLE = "a pandemy"


class TextEncoderTest(abc.ABC):
    def test_can_save_load(self):
        encoder = self.create_encoder()
        file_name = "/tmp/text_ecoder"

        encoder.save_to_file(file_name)
        loaded_encoder = self.load_encoder(file_name)

        self.assertEqual(loaded_encoder.vocab_size, encoder.vocab_size)
Beispiel #2
0
import tensorflow as tf
from tensorflow.keras import layers

from src.logging import create_logger
from src.model import base
from src.model.transformer import Encoder, _create_padding_mask

NAME = "demi-bert"
logger = create_logger(__name__)


class DemiBERT(base.Model):
    """D E M I B E R T."""
    def __init__(self, num_layers, d_model, num_heads, dff, vocab_size, max_pe,
                 rate):
        """Initialize D E M I B E R T."""
        super().__init__(f"{NAME}")

        self.encoder = Encoder(
            num_layers=num_layers,
            d_model=d_model,
            num_heads=num_heads,
            dff=dff,
            input_vocab_size=vocab_size,
            maximum_position_encoding=max_pe,
            rate=rate,
        )
        self.dense = layers.Dense(d_model)
        self._embedding_size = d_model

    def call(self, x: tf.Tensor, training=False):