def transform_to_sequences(
         self,
         preprocessed_articles: Articles) -> List[List[Optional[Any]]]:
     """Transform articles content to a padded vector of length "max_article_length"."""
     matrix = self.tokenizer.texts_to_sequences(
         preprocessed_articles.title_and_summary())
     matrix = keras.preprocessing.sequence.pad_sequences(
         matrix, value=0, padding='post', maxlen=self.max_article_length)
     return matrix
    def __init__(self, articles: Articles, max_article_length: int):
        self.tokenizer = Tokenizer()
        self.tokenizer.fit_on_texts(articles.title_and_summary())
        self.max_article_length: int = max_article_length

        self.sequences = self.transform_to_sequences(articles)
        self.voc_size = len(
            self.tokenizer.word_index) + 1  # +1 because we pad with 0.
        self.document_count = self.tokenizer.document_count