Ejemplo n.º 1
0
import tensorflow as tf

from nalp.corpus import TextCorpus
from nalp.datasets import LanguageModelingDataset
from nalp.encoders import IntegerEncoder
from nalp.models.generators import LSTMGenerator

# Creating a character TextCorpus from file
corpus = TextCorpus(from_file='data/text/chapter1_harry.txt', corpus_type='char')

# Creating an IntegerEncoder
encoder = IntegerEncoder()

# Learns the encoding based on the TextCorpus dictionary and reverse dictionary
encoder.learn(corpus.vocab_index, corpus.index_vocab)

# Applies the encoding on new data
encoded_tokens = encoder.encode(corpus.tokens)

# Creating Language Modeling Dataset
dataset = LanguageModelingDataset(encoded_tokens, max_length=10, batch_size=64)

# Creating the LSTM
lstm = LSTMGenerator(encoder=encoder, vocab_size=corpus.vocab_size, embedding_size=256, hidden_size=512)

# As NALP's LSTMs are stateful, we need to build it with a fixed batch size
lstm.build((64, None))

# Compiling the LSTM
lstm.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001),
            loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
Ejemplo n.º 2
0
from nalp.corpus import TextCorpus
from nalp.datasets import LanguageModelingDataset
from nalp.encoders import IntegerEncoder

# Creating a character TextCorpus from file
corpus = TextCorpus(from_file='data/text/chapter1_harry.txt',
                    corpus_type='char')

# Creating an IntegerEncoder, learning encoding and encoding tokens
encoder = IntegerEncoder()
encoder.learn(corpus.vocab_index, corpus.index_vocab)
encoded_tokens = encoder.encode(corpus.tokens)

# Creating Language Modeling Dataset
dataset = LanguageModelingDataset(encoded_tokens,
                                  max_contiguous_pad_length=10,
                                  batch_size=1,
                                  shuffle=True)

# Iterating over one batch
for input_batch, target_batch in dataset.batches.take(1):
    # For every input and target inside the batch
    for x, y in zip(input_batch, target_batch):
        # Transforms the tensor to numpy and decodes it
        print(encoder.decode(x.numpy()), encoder.decode(y.numpy()))
Ejemplo n.º 3
0
import tensorflow as tf

from nalp.corpus import TextCorpus
from nalp.encoders import IntegerEncoder
from nalp.models import SeqGAN

# When generating artificial text, make sure
# to use the same data, classes and parameters
# as the pre-trained network

# Creating a character TextCorpus from file
corpus = TextCorpus(from_file='data/text/chapter1_harry.txt',
                    corpus_type='char')

# Creating an IntegerEncoder
encoder = IntegerEncoder()

# Learns the encoding based on the TextCorpus dictionary and reverse dictionary
encoder.learn(corpus.vocab_index, corpus.index_vocab)

# Creating the SeqGAN
seqgan = SeqGAN(encoder=encoder,
                vocab_size=corpus.vocab_size,
                max_length=10,
                embedding_size=256,
                hidden_size=512,
                n_filters=(64, 128, 256),
                filters_size=(3, 5, 5),
                dropout_rate=0.25,
                temperature=1)