Exemple #1
0
    def test_training_summarizer_transformer(self):
        summarizer_transformer = SummarizerTransformer(num_heads=1,
                                                       num_layers=1,
                                                       feed_forward_dim=20,
                                                       embedding_size=10,
                                                       dropout_rate=0,
                                                       max_prediction_len=3)
        summarizer_transformer.init_model(preprocessor=self.preprocessor,
                                          vectorizer=self.vectorizer,
                                          embedding_weights_encoder=None,
                                          embedding_weights_decoder=None)
        loss_transformer = 0
        train_step = summarizer_transformer.new_train_step(
            loss_function=self.loss_func, batch_size=2)
        for e in range(0, 10):
            for source_seq, target_seq in self.dataset.take(-1):
                loss_transformer = train_step(source_seq, target_seq)
                print(str(loss_transformer))

        self.assertAlmostEqual(1.3421446084976196, float(loss_transformer), 5)
        output_transformer = summarizer_transformer.predict_vectors('a c', '')
        expected_first_logits = np.array(
            [-0.514366, 1.416978, -0.679771, -0.488442, -0.022602])
        np.testing.assert_allclose(expected_first_logits,
                                   output_transformer['logits'][0],
                                   atol=1e-6)
        self.assertEqual('<start> a c <end>',
                         output_transformer['preprocessed_text'][0])
        self.assertEqual('c c c', output_transformer['predicted_text'])
    def test_serde_happy_path(self) -> None:
        preprocessor = Preprocessor()
        tokenizer = KerasTokenizer(oov_token='<unk>')
        tokenizer.fit([
            'a b c {} {}'.format(preprocessor.start_token,
                                 preprocessor.end_token)
        ])
        vectorizer = Vectorizer(tokenizer, tokenizer)
        summarizer = SummarizerTransformer(num_layers=1,
                                           num_heads=2,
                                           max_prediction_len=3,
                                           embedding_size=10,
                                           embedding_encoder_trainable=False)
        summarizer.init_model(preprocessor=preprocessor, vectorizer=vectorizer)

        # we need at least a train step to init the weights
        train_step = summarizer.new_train_step(masked_crossentropy,
                                               batch_size=1,
                                               apply_gradients=True)
        train_seq = tf.convert_to_tensor(np.array([[1, 1, 1]]), dtype=tf.int32)
        train_step(train_seq, train_seq)

        save_dir = os.path.join(self.temp_dir, 'summarizer_serde_happy_path')
        summarizer.save(save_dir)
        summarizer_loaded = SummarizerTransformer.load(save_dir)
        self.assertEqual(1, summarizer_loaded.num_layers)
        self.assertEqual(2, summarizer_loaded.num_heads)
        self.assertEqual(3, summarizer_loaded.max_prediction_len)
        self.assertEqual(10, summarizer_loaded.embedding_size)
        self.assertIsNotNone(summarizer_loaded.preprocessor)
        self.assertIsNotNone(summarizer_loaded.vectorizer)
        self.assertIsNotNone(summarizer_loaded.transformer)
        self.assertFalse(
            summarizer_loaded.transformer.encoder.embedding.trainable)
        self.assertTrue(
            summarizer_loaded.transformer.decoder.embedding.trainable)
        self.assertIsNotNone(summarizer_loaded.optimizer)

        pred = summarizer.predict_vectors('a c', '')
        pred_loaded = summarizer_loaded.predict_vectors('a c', '')
        np.testing.assert_almost_equal(pred['logits'],
                                       pred_loaded['logits'],
                                       decimal=6)
Exemple #3
0
    def test_training(self) -> None:
        data = [('a b', 'c'), ('a b c', 'd')]
        tokenizer_encoder = KerasTokenizer(lower=False, filters='')
        tokenizer_decoder = KerasTokenizer(lower=False, filters='')
        tokenizer_encoder.fit(['a b c <start> <end>'])
        tokenizer_decoder.fit(['c d <start> <end>'])
        vectorizer = Vectorizer(tokenizer_encoder=tokenizer_encoder,
                                tokenizer_decoder=tokenizer_decoder,
                                max_output_len=3)
        preprocessor = Preprocessor()
        batch_generator = DatasetGenerator(2)
        data_prep = [preprocessor(d) for d in data]
        data_vecs = [vectorizer(d) for d in data_prep]
        dataset = batch_generator(lambda: data_vecs)

        summarizer_transformer = SummarizerTransformer(num_heads=1,
                                                       num_layers=1,
                                                       feed_forward_dim=20,
                                                       embedding_size=10,
                                                       dropout_rate=0,
                                                       max_prediction_len=3)

        summarizer_transformer.init_model(preprocessor=preprocessor,
                                          vectorizer=vectorizer,
                                          embedding_weights_encoder=None,
                                          embedding_weights_decoder=None)

        summarizer_attention = SummarizerAttention(lstm_size=10,
                                                   embedding_size=10)

        summarizer_attention.init_model(preprocessor=preprocessor,
                                        vectorizer=vectorizer,
                                        embedding_weights_encoder=None,
                                        embedding_weights_decoder=None)

        summarizer = SummarizerBasic(lstm_size=10, embedding_size=10)

        summarizer.init_model(preprocessor=preprocessor,
                              vectorizer=vectorizer,
                              embedding_weights_encoder=None,
                              embedding_weights_decoder=None)

        loss_func = masked_crossentropy

        loss_attention = 0
        train_step = summarizer_attention.new_train_step(
            loss_function=loss_func, batch_size=2)
        for _ in range(10):
            for source_seq, target_seq in dataset.take(-1):
                loss_attention = train_step(source_seq, target_seq)
                print(str(loss_attention))

        self.assertAlmostEqual(1.5810251235961914, float(loss_attention), 10)
        output_attention = summarizer_attention.predict_vectors('a c', '')
        expected_first_logits = np.array(
            [-0.069454, 0.00272, 0.007199, -0.039547, 0.014357])
        np.testing.assert_allclose(expected_first_logits,
                                   output_attention['logits'][0],
                                   atol=1e-6)
        self.assertEqual('a c', output_attention['preprocessed_text'][0])
        self.assertEqual('<end>', output_attention['predicted_text'])

        loss = 0
        train_step = summarizer.new_train_step(loss_function=loss_func,
                                               batch_size=2)
        for e in range(0, 10):
            for source_seq, target_seq in dataset.take(-1):
                loss = train_step(source_seq, target_seq)

        self.assertAlmostEqual(1.5771859884262085, float(loss), 10)
        output = summarizer.predict_vectors('a c', '')
        expected_first_logits = np.array(
            [-0.03838864, 0.01226684, 0.01055636, -0.05209339, 0.02549592])
        np.testing.assert_allclose(expected_first_logits,
                                   output['logits'][0],
                                   atol=1e-6)
        self.assertEqual('a c', output['preprocessed_text'][0])
        self.assertEqual('<end>', output['predicted_text'])

        loss_transformer = 0
        train_step = summarizer_transformer.new_train_step(
            loss_function=loss_func, batch_size=2)
        for e in range(0, 10):
            for source_seq, target_seq in dataset.take(-1):
                loss_transformer = train_step(source_seq, target_seq)
                print(str(loss_transformer))

        self.assertAlmostEqual(1.2841172218322754, float(loss_transformer), 10)
        output_transformer = summarizer_transformer.predict_vectors('a c', '')

        expected_first_logits = np.array(
            [0.094787, 0.516092, 1.165521, 0.271338, 0.670318])
        np.testing.assert_allclose(expected_first_logits,
                                   output_transformer['logits'][0],
                                   atol=1e-6)
        self.assertEqual('a c', output_transformer['preprocessed_text'][0])
        self.assertEqual('d <end>', output_transformer['predicted_text'])
Exemple #4
0
import streamlit as st

from headliner.model.summarizer_transformer import SummarizerTransformer
from headliner.model.summarizer_attention import SummarizerAttention

summarizer_transformer = SummarizerTransformer.load('model/transformer')
summarizer_attention = SummarizerAttention.load('model/attention')

st.title('English-German Translator')
st.markdown('''
This is a demo showcasing our [Headliner package](). In particular, we trained
a simple seq2seq model on an English-German dataset. We didn't train it very long so
the model is not doing well as this was not our main goals anyway. For creating the app,
we use [Streamlit](https://streamlit.io/), a new open-source framework that lets users creating
apps for machine learning projects very easily.
''')
input = st.text_input(label='Type in some English words.',
                      value='I really like you.')
st.write('(transformer) {}'.format(summarizer_transformer.predict(input)))
st.write('(attention) {}'.format(summarizer_attention.predict(input)))
Exemple #5
0
    def test_training(self) -> None:
        data = [('a b', 'c'), ('a b c', 'd')]
        tokenizer_encoder = KerasTokenizer(lower=False, filters='')
        tokenizer_decoder = KerasTokenizer(lower=False, filters='')
        tokenizer_encoder.fit(['a b c <start> <end>'])
        tokenizer_decoder.fit(['c d <start> <end>'])
        vectorizer = Vectorizer(tokenizer_encoder=tokenizer_encoder,
                                tokenizer_decoder=tokenizer_decoder,
                                max_output_len=3)
        preprocessor = Preprocessor()
        batch_generator = DatasetGenerator(2)
        data_prep = [preprocessor(d) for d in data]
        data_vecs = [vectorizer(d) for d in data_prep]
        dataset = batch_generator(lambda: data_vecs)

        summarizer_transformer = SummarizerTransformer(num_heads=1,
                                                       num_layers=1,
                                                       feed_forward_dim=20,
                                                       embedding_size=10,
                                                       dropout_rate=0,
                                                       max_prediction_len=3)

        summarizer_transformer.init_model(preprocessor=preprocessor,
                                          vectorizer=vectorizer,
                                          embedding_weights_encoder=None,
                                          embedding_weights_decoder=None)

        summarizer_attention = SummarizerAttention(lstm_size=10,
                                                   embedding_size=10)

        summarizer_attention.init_model(preprocessor=preprocessor,
                                        vectorizer=vectorizer,
                                        embedding_weights_encoder=None,
                                        embedding_weights_decoder=None)

        summarizer = SummarizerBasic(lstm_size=10, embedding_size=10)

        summarizer.init_model(preprocessor=preprocessor,
                              vectorizer=vectorizer,
                              embedding_weights_encoder=None,
                              embedding_weights_decoder=None)

        loss_func = masked_crossentropy

        loss_attention = 0
        train_step = summarizer_attention.new_train_step(
            loss_function=loss_func, batch_size=2)
        for _ in range(10):
            for source_seq, target_seq in dataset.take(-1):
                loss_attention = train_step(source_seq, target_seq)
                print(str(loss_attention))

        self.assertAlmostEqual(1.577033519744873, float(loss_attention), 5)
        output_attention = summarizer_attention.predict_vectors('a c', '')
        expected_first_logits = np.array(
            [-0.077805, 0.012667, 0.021359, -0.04872, 0.014989])
        np.testing.assert_allclose(expected_first_logits,
                                   output_attention['logits'][0],
                                   atol=1e-6)
        self.assertEqual('<start> a c <end>',
                         output_attention['preprocessed_text'][0])
        self.assertEqual('d <end>', output_attention['predicted_text'])

        loss = 0
        train_step = summarizer.new_train_step(loss_function=loss_func,
                                               batch_size=2)
        for e in range(0, 10):
            for source_seq, target_seq in dataset.take(-1):
                loss = train_step(source_seq, target_seq)

        self.assertAlmostEqual(1.5713274478912354, float(loss), 5)
        output = summarizer.predict_vectors('a c', '')
        expected_first_logits = np.array(
            [-0.051753, 0.013869, 0.010337, -0.073727, 0.033059])
        np.testing.assert_allclose(expected_first_logits,
                                   output['logits'][0],
                                   atol=1e-6)
        self.assertEqual('<start> a c <end>', output['preprocessed_text'][0])
        self.assertEqual('<end>', output['predicted_text'])

        loss_transformer = 0
        train_step = summarizer_transformer.new_train_step(
            loss_function=loss_func, batch_size=2)
        for e in range(0, 10):
            for source_seq, target_seq in dataset.take(-1):
                loss_transformer = train_step(source_seq, target_seq)
                print(str(loss_transformer))

        self.assertAlmostEqual(1.175953984260559, float(loss_transformer), 5)
        output_transformer = summarizer_transformer.predict_vectors('a c', '')

        expected_first_logits = np.array(
            [-0.197903, 0.884185, 1.147212, 0.318798, 0.97936])
        np.testing.assert_allclose(expected_first_logits,
                                   output_transformer['logits'][0],
                                   atol=1e-6)
        self.assertEqual('<start> a c <end>',
                         output_transformer['preprocessed_text'][0])
        self.assertEqual('d <end>', output_transformer['predicted_text'])