def __init__(self, **kwargs): dp = DataPreprocessor() file_name_formatted = dp.write_to_file() dc = DataCleaner(file_name_formatted) dc.clean_data_pipeline().trim_rare_words() self.data_loader = DataLoader(dc.vocabulary, dc.pairs) self.dp = dp self.dc = dc load_embedding = kwargs.get('pretrained_embedding', False) embedding_file = kwargs.get('pretrained_embedding_file', None) load_enc_dec = kwargs.get('pretrained_enc_dec', False) load_enc_file = kwargs.get('pretrained_enc_file', None) load_dec_file = kwargs.get('pretrained_dec_file', None) self.model_name = kwargs.get('model_name', 'cb_model') attn_model = kwargs.get('attention_type', 'dot') self.hidden_size = kwargs.get('hidden_size', 500) self.encoder_nr_layers = kwargs.get('enc_nr_layers', 2) self.decoder_nr_layers = kwargs.get('dec_nr_layers', 2) dropout = kwargs.get('dropout', 0.1) self.batch_size = kwargs.get('batch_size', 64) self.clip = kwargs.get('clip', 50.0) self.teacher_forcing_ratio = kwargs.get('teacher_forcing_ratio', 1.0) self.learning_rate = kwargs.get('lr', 0.0001) self.decoder_learning_ratio = kwargs.get('decoder_learning_ratio', 5.0) self.nr_iteration = kwargs.get('nr_iterations', 4000) self.print_every = kwargs.get('print_every', 1) self.save_every = 500 self.embedding = nn.Embedding(self.dc.vocabulary.num_words, self.hidden_size) if load_embedding: self.embedding.load_state_dict(embedding_file) # Initialize encoder & decoder models encoder = EncoderRNN(self.hidden_size, self.embedding, self.encoder_nr_layers, dropout) decoder = DecoderRNN( attn_model, self.embedding, self.hidden_size, self.dc.vocabulary.num_words, self.decoder_nr_layers, dropout ) if load_enc_dec: encoder.load_state_dict(load_enc_file) decoder.load_state_dict(load_dec_file) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) self.encoder = encoder self.decoder = decoder self.encoder_optimizer = optim.Adam(encoder.parameters(), lr=self.learning_rate) self.decoder_optimizer = optim.Adam(decoder.parameters(), lr=self.learning_rate * self.decoder_learning_ratio) return
def main(): """primary entry """ voc, pairs = loadPreparedData() print('Building encoder and decoder ...') # Initialize word embeddings #embedding = nn.Embedding(voc.num_words, params.hidden_size) embedding = nn.Embedding(voc.num_words, params.embedding_size) # Initialize encoder & decoder models encoder = EncoderRNN(embedding, params.hidden_size, params.encoder_n_layers, params.dropout) decoder = LuongAttnDecoderRNN(params.attn_model, embedding, params.hidden_size, voc.num_words, params.decoder_n_layers, params.dropout) # Use appropriate device encoder = encoder.to(params.device) decoder = decoder.to(params.device) print('Models built and ready to go!') # Ensure dropout layers are in train mode encoder.train() decoder.train() # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=params.learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=params.learning_rate * params.decoder_learning_ratio) # Run training iterations print("Starting Training!") trainIters(voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, params.encoder_n_layers, params.decoder_n_layers, params.save_dir, params.n_iteration, params.batch_size, params.print_every, params.save_every, params.clip, params.corpus_name, load_filename=None)
def initialize_models(voc): print('Building encoder and decoder ...') # initialize word embeddings embedding = nn.Embedding(voc.num_words, c.HIDDEN_SIZE) if c.LOAD_FILENAME: embedding.load_state_dict(embedding_sd) # initialize encoder & decoder models encoder = EncoderRNN(c.HIDDEN_SIZE, embedding, c.ENCODER_N_LAYERS, c.DROPOUT) decoder = LuongAttnDecoderRNN(c.ATTN_MODEL, embedding, c.HIDDEN_SIZE, voc.num_words, c.DECODER_N_LAYERS, c.DROPOUT) if c.LOAD_FILENAME: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(c.DEVICE) decoder = decoder.to(c.DEVICE) print('Models built and ready to go!') return embedding, encoder, decoder
encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] voc.__dict__ = checkpoint['voc_dict'] # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) encoder = encoder.to(device) decoder = decoder.to(device) # Set dropout layers to eval mode encoder.eval() decoder.eval() # Initialize search module searcher = GreedySearchDecoder(encoder, decoder) # Begin chatting (uncomment and run the following line to begin) evaluateInput(encoder, decoder, searcher, voc)