import tensorflow as tf from nalp.corpus import TextCorpus from nalp.datasets import LanguageModelingDataset from nalp.encoders import IntegerEncoder from nalp.models.generators import LSTMGenerator # Creating a character TextCorpus from file corpus = TextCorpus(from_file='data/text/chapter1_harry.txt', corpus_type='char') # Creating an IntegerEncoder encoder = IntegerEncoder() # Learns the encoding based on the TextCorpus dictionary and reverse dictionary encoder.learn(corpus.vocab_index, corpus.index_vocab) # Applies the encoding on new data encoded_tokens = encoder.encode(corpus.tokens) # Creating Language Modeling Dataset dataset = LanguageModelingDataset(encoded_tokens, max_length=10, batch_size=64) # Creating the LSTM lstm = LSTMGenerator(encoder=encoder, vocab_size=corpus.vocab_size, embedding_size=256, hidden_size=512) # As NALP's LSTMs are stateful, we need to build it with a fixed batch size lstm.build((64, None)) # Compiling the LSTM lstm.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
from nalp.corpus import TextCorpus from nalp.datasets import LanguageModelingDataset from nalp.encoders import IntegerEncoder # Creating a character TextCorpus from file corpus = TextCorpus(from_file='data/text/chapter1_harry.txt', corpus_type='char') # Creating an IntegerEncoder, learning encoding and encoding tokens encoder = IntegerEncoder() encoder.learn(corpus.vocab_index, corpus.index_vocab) encoded_tokens = encoder.encode(corpus.tokens) # Creating Language Modeling Dataset dataset = LanguageModelingDataset(encoded_tokens, max_contiguous_pad_length=10, batch_size=1, shuffle=True) # Iterating over one batch for input_batch, target_batch in dataset.batches.take(1): # For every input and target inside the batch for x, y in zip(input_batch, target_batch): # Transforms the tensor to numpy and decodes it print(encoder.decode(x.numpy()), encoder.decode(y.numpy()))
import tensorflow as tf from nalp.corpus import TextCorpus from nalp.encoders import IntegerEncoder from nalp.models import SeqGAN # When generating artificial text, make sure # to use the same data, classes and parameters # as the pre-trained network # Creating a character TextCorpus from file corpus = TextCorpus(from_file='data/text/chapter1_harry.txt', corpus_type='char') # Creating an IntegerEncoder encoder = IntegerEncoder() # Learns the encoding based on the TextCorpus dictionary and reverse dictionary encoder.learn(corpus.vocab_index, corpus.index_vocab) # Creating the SeqGAN seqgan = SeqGAN(encoder=encoder, vocab_size=corpus.vocab_size, max_length=10, embedding_size=256, hidden_size=512, n_filters=(64, 128, 256), filters_size=(3, 5, 5), dropout_rate=0.25, temperature=1)