def __init__(self, source_lang, target_lang, hps, training_hps, writer=None, searchengine=None): super(Seq2Seq, self).__init__() self.hps = hps self.writer = writer self.training_hps = training_hps self.source_lang = source_lang self.target_lang = target_lang self.i_cont = 0 self.encoder = EncoderRNN(source_lang.input_size(), self.hps, self.training_hps, writer=writer) self.decoder = DecoderRNN(target_lang.input_size(), target_lang.input_size(), self.hps, self.training_hps, writer=writer) self.max_length = self.training_hps.max_length self.criterion = nn.NLLLoss(reduce=False, size_average=False) if hps.tm_init: self.translationmemory = TranslationMemory( self, writer=writer, hps=hps, searchengine=searchengine) else: self.translationmemory = None
def __init__(self, cfg): super(Model, self).__init__() self.cfg = cfg self.embeddings_src = nn.Embedding( cfg.svoc.size, cfg.emb_src_size) ### embeddings for encoder self.encoder = EncoderRNN(self.embeddings_src, self.cfg) if self.cfg.reuse_words: self.embeddings_tgt = self.embeddings_src ### same embeddings for encoder and decoder else: self.embeddings_tgt = nn.Embedding( self.cfg.tvoc.size, self.cfg.emb_tgt_size) ### new embeddings for decoder self.decoder = DecoderRNN_Attn(self.embeddings_tgt, self.cfg) sys.stderr.write('Initializing model pars\n') for param in self.encoder.parameters(): param.data.uniform_(-0.08, 0.08) for param in self.decoder.parameters(): param.data.uniform_(-0.08, 0.08)
def __init__(self, config: HiDDenConfiguration, noiser: Noiser): super(EncoderDecoder, self).__init__() self.encoder = Encoder(config) self.encoder = nn.DataParallel(self.encoder) self.encode_rnn = EncoderRNN(config) #self.encode_rnn = nn.DataParallel(self.encode_rnn) self.noiser = noiser self.decoder = Decoder(config) self.decoder = nn.DataParallel(self.decoder) self.decode_rnn = DecoderRNN(config) self.adversarial_decode_rnn = DecoderRNN(config)
class Model(nn.Module): def __init__(self, cfg): super(Model, self).__init__() self.cfg = cfg self.embeddings_src = nn.Embedding( cfg.svoc.size, cfg.emb_src_size) ### embeddings for encoder self.encoder = EncoderRNN(self.embeddings_src, self.cfg) if self.cfg.reuse_words: self.embeddings_tgt = self.embeddings_src ### same embeddings for encoder and decoder else: self.embeddings_tgt = nn.Embedding( self.cfg.tvoc.size, self.cfg.emb_tgt_size) ### new embeddings for decoder self.decoder = DecoderRNN_Attn(self.embeddings_tgt, self.cfg) sys.stderr.write('Initializing model pars\n') for param in self.encoder.parameters(): param.data.uniform_(-0.08, 0.08) for param in self.decoder.parameters(): param.data.uniform_(-0.08, 0.08) def forward(self, src_batch, tgt_batch, len_src_batch, len_tgt_batch, teacher_forcing=1.0): enc_outputs, enc_final = self.encoder(src_batch, len_src_batch) if self.cfg.par.beam_size > 1: dec_outputs, dec_output_words = self.decoder.beam_search( self.cfg, len_src_batch, self.cfg.par.max_tgt_len, enc_final, enc_outputs, teacher_forcing) else: dec_outputs, dec_output_words = self.decoder( tgt_batch, len_src_batch, len_tgt_batch, enc_final, enc_outputs, teacher_forcing) return dec_outputs, dec_output_words
def get_default_hparams(): return merge_hparams(EncoderRNN.get_default_hparams(), DecoderRNN.get_default_hparams(), TranslationMemory.get_default_hparams(), lang.Lang.get_default_hparams())
class Seq2Seq(nn.Module): @log_func def __init__(self, source_lang, target_lang, hps, training_hps, writer=None, searchengine=None): super(Seq2Seq, self).__init__() self.hps = hps self.writer = writer self.training_hps = training_hps self.source_lang = source_lang self.target_lang = target_lang self.i_cont = 0 self.encoder = EncoderRNN(source_lang.input_size(), self.hps, self.training_hps, writer=writer) self.decoder = DecoderRNN(target_lang.input_size(), target_lang.input_size(), self.hps, self.training_hps, writer=writer) self.max_length = self.training_hps.max_length self.criterion = nn.NLLLoss(reduce=False, size_average=False) if hps.tm_init: self.translationmemory = TranslationMemory( self, writer=writer, hps=hps, searchengine=searchengine) else: self.translationmemory = None @log_func def eval(self): return self.train(False) @log_func def train(self, mode=True): super(self.__class__, self).train(mode) if self.translationmemory is not None: self.translationmemory.train(mode) return self @log_func def translate(self, input_batch, mask, use_search=False): batch_size = input_batch.size()[0] encoder_outputs = self.encoder(input_batch) if use_search: assert self.translationmemory is not None, "No sample pairs for translation memory, did you want it?" self.translationmemory.fit(input_batch) hidden = None dec_input = Variable(torch.LongTensor([lang.BOS_TOKEN] * batch_size)) if self.training_hps.use_cuda: dec_input = dec_input.cuda() translations = [[lang.BOS_TOKEN] for _ in range(batch_size)] converged = np.zeros(shape=(batch_size, )) for i in range(self.max_length): if use_search: output, hidden, _ = self.decoder(dec_input, encoder_outputs, mask=mask, hidden=hidden,\ translation_memory=self.translationmemory, position=i) else: output, hidden, _ = self.decoder(dec_input, encoder_outputs, mask=mask, hidden=hidden) _, output_idx = torch.max(output, -1) for j in range(batch_size): if translations[j][-1] != self.target_lang.get_eos(): translations[j].append(output_idx[j].data[0]) else: converged[j] = True if output_idx[j].data[0] == self.target_lang.get_eos(): converged[j] = True dec_input = Variable( torch.LongTensor([tr[-1] for tr in translations])) if self.training_hps.use_cuda: dec_input = dec_input.cuda() if np.all(converged): break if use_search: self.translationmemory.dump_logs([ tuple(map(self.target_lang.get_word, elem)) for elem in translations ], "../dumped_translation_logs.pkl") return [ ' '.join(map(self.target_lang.get_word, elem)) for elem in translations ] @log_func def forward(self, input_batch, mask, output_batch, out_mask, use_search=False): encoder_outputs = self.encoder(input_batch) if use_search: assert self.translationmemory is not None, "No sample pairs for translation memory, did you want it?" self.translationmemory.fit(input_batch) hidden = None loss = 0.0 contexts = Variable(torch.zeros((out_mask.size()[0], out_mask.size()[1] - 1,\ (self.hps.enc_bidirectional + 1) *\ self.hps.enc_hidden_size))) #translate_to_all_loggers("out_mask s2s {}".format(out_mask.size())) for i in range(out_mask.size()[1] - 1): if use_search: output, hidden, _ = self.decoder(output_batch[:, i], encoder_outputs, mask=mask, hidden=hidden,\ translation_memory=self.translationmemory, position=i) else: output, hidden, _ = self.decoder(output_batch[:, i], encoder_outputs, mask=mask, hidden=hidden) contexts[:, i, :] = _ loss += (self.criterion(output, output_batch[:, i + 1]) * out_mask[:, i + 1]).sum() if not use_search: self.writer.add_scalar("normal/context", (contexts.max(1)[0] - contexts.min(1)[0]).sum(-1).mean(), self.i_cont) self.i_cont += 1 loss /= out_mask.sum() return loss @log_func def get_hiddens_and_contexts(self, input_batch, mask, output_batch, out_mask): """ input_batch: [B, T] encoder_outputs: [B, T, DE * HE] """ encoder_outputs = self.encoder(input_batch) B, *_ = input_batch.shape hidden = None loss = 0.0 hiddens = Variable(torch.zeros((self.hps.dec_layers * (self.hps.dec_bidirectional + 1), out_mask.size()[1] - 1, \ B, self.hps.dec_hidden_size))) contexts = Variable(torch.zeros((B, out_mask.size()[1] - 1,\ (self.hps.enc_bidirectional + 1) *\ self.hps.enc_hidden_size))) if self.training_hps.use_cuda: hiddens = hiddens.cuda() contexts = contexts.cuda() for i in range(out_mask.size()[1] - 1): output, hidden, context = self.decoder(output_batch[:, i], encoder_outputs, mask=mask, hidden=hidden) hiddens[:, i, :, :] = hidden contexts[:, i, :] = context return hiddens, contexts def state_dict(self, destination=None, prefix='', keep_vars=False): destination = super(Seq2Seq, self).state_dict(destination, prefix, keep_vars) if self.translationmemory: self.translationmemory.state_dict(destination, prefix, keep_vars) return destination def load_state_dict(self, state_dict, strict=True): if self.translationmemory is not None: self.translationmemory.load_state_dict(state_dict) #print(state_dict) #del state_dict['translation_memory.M'] super(Seq2Seq, self).load_state_dict(state_dict, strict) def cuda(self): if self.translationmemory: self.translationmemory = self.translationmemory.cuda() self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() return super(Seq2Seq, self).cuda() def cpu(self): self.translationmemory = self.translationmemory.cpu() self.encoder = self.encoder.cpu() self.decoder = self.decoder.cpu() return super(Seq2Seq, self).cpu() #def eval(self): # self.train = False #def eval(self): # self.train = True @staticmethod def get_default_hparams(): return merge_hparams(EncoderRNN.get_default_hparams(), DecoderRNN.get_default_hparams(), TranslationMemory.get_default_hparams(), lang.Lang.get_default_hparams())
def run_1tm(n_cluster, n_exp, ed, hs): embedding_dim = ed hidden_size = hs encoder_n_layers = 2 decoder_n_layers = 2 save_dir = os.path.join("..", "data", "save", "cluster" + str(n_cluster), "exp" + str(n_exp)) option = 'story_fp_1tm' model_name = option + '_model_Attn_embedding_dim' + str(ed) attn_model = 'general' checkpoint_iter = 160000 k = n_cluster loadFilename = os.path.join( save_dir, model_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), '{}_checkpoint.tar'.format(checkpoint_iter)) # Load model if a loadFilename is provided if not os.path.isfile(loadFilename): print('Cannot find model file: ' + loadFilename) else: print('Loading model file: ' + loadFilename) # If loading on same machine the model was trained on checkpoint = torch.load(loadFilename) # If loading a model trained on GPU to CPU # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu')) encoder_sd = checkpoint['en'] decoder_sd = [checkpoint['de' + str(i + 1)] for i in range(k)] #decoder_sd1 = checkpoint['de1'] #decoder_sd2 = checkpoint['de2'] #decoder_sd3 = checkpoint['de3'] #decoder_sd4 = checkpoint['de4'] # encoder_optimizer_sd = checkpoint['en_opt'] # decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] # prepare vocabulary and test data voc, _, test_pairs = prepareData_1tm( os.path.join("..", "data", "save", 'vocab.pickle'), os.path.join(save_dir, option + '.pickle')) # test_pairs = read_stories_from_xls(os.path.join('..', 'data', 'compare.xlsx')) # test_pairs = [random.choice(test_pairs) for _ in range(10)] print("Read {!s} testing pairs".format(len(test_pairs))) print('Building encoder and decoder ...') # Initialize word embeddings embedding = torch.nn.Embedding(voc.num_words, embedding_dim) embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models encoder = EncoderRNN(embedding_dim, hidden_size, embedding, encoder_n_layers) decoder = [ LuongAttnDecoderRNN(attn_model, embedding, embedding_dim, hidden_size, voc.num_words, decoder_n_layers) for _ in range(k) ] #decoder1 = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers) #decoder2 = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers) #decoder3 = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers) #decoder4 = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers) # decoder = DecoderRNN(embedding, hidden_size, voc.num_words, decoder_n_layers) encoder.load_state_dict(encoder_sd) [decoder[i].load_state_dict(decoder_sd[i]) for i in range(k)] #decoder1.load_state_dict(decoder_sd1) #decoder2.load_state_dict(decoder_sd2) #decoder3.load_state_dict(decoder_sd3) #decoder4.load_state_dict(decoder_sd4) # Use appropriate device encoder = encoder.to(device) [decoder[i].to(device) for i in range(k)] #decoder1 = decoder1.to(device) #decoder2 = decoder2.to(device) #decoder3 = decoder3.to(device) #decoder4 = decoder4.to(device) # Set dropout layers to eval mode encoder.eval() [decoder[i].eval() for i in range(k)] #decoder1.eval() #decoder2.eval() #decoder3.eval() #decoder4.eval() # Initialize search module searcher = GreedySearchDecoder_1tm(encoder, decoder, k) print('Evaluating {!s} test pairs ...'.format(len(test_pairs))) OMEG_score = 0 OMEG_dis = 0 SAFE_score = 0 SAFE_dis = 0 MOSES_score = 0 MOSES_dis = 0 num = 0 # fps_list = read_predicted_fps(save_dir) for i, test_pair in enumerate(test_pairs): num += 1 # try: OMEG_s, OMEG_d = evaluate_1tm(searcher, voc, test_pair[0], test_pair[1], k) # SAFE_s, SAFE_d = evaluate_SAFE(test_pair[0], test_pair[1]) # MOSES_s, MOSES_d = evaluate_MOSES(test_pair[0], test_pair[1], fps_list[i]) # s, d = evaluate_beam_pretrain(encoder, decoder, voc, test_pair[0], test_pair[3]) OMEG_score += OMEG_s OMEG_dis += OMEG_d # SAFE_score += SAFE_s # SAFE_dis += SAFE_d # MOSES_score += MOSES_s # MOSES_dis += MOSES_d # except ZeroDivisionError: # continue print( '\n\n' + '-' * 40 + "cluster" + str(n_cluster), "exp" + str(n_exp) + '_embedding' + str(ed) + '_hidden' + str(hs) + '-' * 40) print('OMEG Total Precision: ' + str(OMEG_score / num)) print('OMEG Total Recall: ' + str(OMEG_dis / num)) # print('SAFE Total Precision: ' + str(SAFE_score / num)) # print('SAFE Total Recall: ' + str(SAFE_dis / num)) # # print('MOSES Total Precision: ' + str(MOSES_score / num)) # print('MOSES Total Recall: ' + str(MOSES_dis / num)) print( '-' * 40 + "cluster" + str(n_cluster), "exp" + str(n_exp) + '_embedding' + str(ed) + '_hidden' + str(hs) + '-' * 40 + '\n\n')
def run_pretrain(): hidden_size = 100 encoder_n_layers = 2 decoder_n_layers = 2 save_dir = os.path.join("..", "data", "save") option = 'story_fp' model_name = option + '_nd_model_Attn' attn_model = 'general' checkpoint_iter = 40000 loadFilename = os.path.join( save_dir, model_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), '{}_checkpoint.tar'.format(checkpoint_iter)) # Load model if a loadFilename is provided if not os.path.isfile(loadFilename): print('Cannot find model file: ' + loadFilename) else: print('Loading model file: ' + loadFilename) # If loading on same machine the model was trained on checkpoint = torch.load(loadFilename) # If loading a model trained on GPU to CPU # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu')) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] # encoder_optimizer_sd = checkpoint['en_opt'] # decoder_optimizer_sd = checkpoint['de_opt'] embedding_sd = checkpoint['embedding'] # prepare vocabulary and test data voc, _, test_pairs = prepareData( os.path.join(save_dir, 'vocab.pickle'), os.path.join(save_dir, option + '.pickle'), option) # test_pairs = read_stories_from_xls(os.path.join('..', 'data', 'compare.xlsx')) # test_pairs = [random.choice(test_pairs) for _ in range(10)] print("Read {!s} testing pairs".format(len(test_pairs))) print('Building encoder and decoder ...') # Initialize word embeddings embedding = torch.nn.Embedding(voc.num_words, hidden_size) embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers) # decoder = DecoderRNN(embedding, hidden_size, voc.num_words, decoder_n_layers) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) # Set dropout layers to eval mode encoder.eval() decoder.eval() # Initialize search module searcher = GreedySearchDecoder_pretrain(encoder, decoder) print('Evaluating {!s} test pairs ...'.format(len(test_pairs))) score = 0 dis = 0 num = 0 for test_pair in test_pairs: try: s, d = evaluate_pretain(searcher, voc, test_pair[0], test_pair[3]) # s, d = evaluate_beam_pretrain(encoder, decoder, voc, test_pair[0], test_pair[3]) score += s dis += d num += 1 except ZeroDivisionError: continue print('Total BLEU score: ' + str(score / num)) print('Total levenshtein distance: ' + str(dis / num))
def run(): hidden_size = 100 encoder_n_layers = 2 decoder_n_layers = 2 save_dir = os.path.join("..", "data", "save") option = 'story_fp' model_name = option + '_model_Attn' attn_model = 'general' checkpoint_iter = 120000 loadFilename = os.path.join( save_dir, model_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), '{}_checkpoint.tar'.format(checkpoint_iter)) # Load model if a loadFilename is provided if not os.path.isfile(loadFilename): print('Cannot find model file: ' + loadFilename) else: print('Loading model file: ' + loadFilename) # If loading on same machine the model was trained on checkpoint = torch.load(loadFilename) # If loading a model trained on GPU to CPU # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu')) transfer_encoder_sd = checkpoint['transfer_en'] # smr_encoder_sd = checkpoint['smr_en'] # des_encoder_sd = checkpoint['des_en'] # accp_encoder_sd = checkpoint['accp_en'] fp_decoder_sd = checkpoint['fp_de'] # encoder_optimizer_sd = checkpoint['en_opt'] # decoder_optimizer_sd = checkpoint['de_opt'] # embedding_sd = checkpoint['embedding'] # prepare vocabulary and test data voc, _, test_pairs = prepareData( os.path.join(save_dir, 'vocab.pickle'), os.path.join(save_dir, option + '.pickle'), option) print("Read {!s} testing pairs".format(len(test_pairs))) print('Building encoder and decoder ...') # Initialize word embeddings embedding = torch.nn.Embedding(voc.num_words, hidden_size) # embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models # smr_encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers) smr_encoder = rnn.RNNModel('GRU', voc.num_words, 100, 100, 2) des_encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers) accp_encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers) fp_decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers) # fp_decoder = DecoderRNN(embedding, hidden_size, voc.num_words, decoder_n_layers) fp_decoder.load_state_dict(fp_decoder_sd) # transfer_encoder = TransferEncoder(smr_encoder, des_encoder, accp_encoder, fp_decoder.hidden_size) transfer_encoder = TransferEncoder(smr_encoder, smr_encoder, smr_encoder, fp_decoder.hidden_size) transfer_encoder.load_state_dict(transfer_encoder_sd) # smr_encoder.load_state_dict(smr_encoder_sd) # des_encoder.load_state_dict(des_encoder_sd) # accp_encoder.load_state_dict(accp_encoder_sd) # for name, para in encoder_decoder.named_parameters(): # print(name, ':', para) # encoder.load_state_dict(encoder_sd) # decoder.load_state_dict(decoder_sd) # Use appropriate device transfer_encoder = transfer_encoder.to(device) # smr_encoder = smr_encoder.to(device) # des_encoder = des_encoder.to(device) # accp_encoder = accp_encoder.to(device) fp_decoder = fp_decoder.to(device) # Set dropout layers to eval mode transfer_encoder.eval() # smr_encoder.train() # des_encoder.train() # accp_encoder.train() fp_decoder.eval() # Initialize search module searcher = GreedySearchDecoder(transfer_encoder, fp_decoder) print('Evaluating {!s} test pairs ...'.format(len(test_pairs))) score = 0 dis = 0 num = 0 for test_pair in test_pairs: try: s, d = evaluate(searcher, voc, test_pair[0], test_pair[1], test_pair[2], test_pair[3]) # score += evaluate_beam(transfer_encoder, fp_decoder, voc, test_pair[0], test_pair[1], test_pair[2], test_pair[3]) score += s dis += d num += 1 except ZeroDivisionError: continue print('Total BLEU score: ' + str(score / num)) print('Total levenshtein distance: ' + str(dis / num))
else: decoded_words.append(output_lang.index2word[topi.item()]) decoder_input = topi.squeeze().detach() return decoded_words, decoder_attentions[:di + 1] def evaluateRandomly(encoder, decoder, n=10): counter = 0 for i in range(n): pair = random.choice(pairs) print('>', pair[0]) print('=', pair[1]) output_words, attentions = evaluate(encoder, decoder, pair[0]) output_sentence = ' '.join(output_words) if output_sentence[:-6] == pair[1]: counter += 1 print('<', output_sentence) print('') print("Correct Examples : {} out of {}".format(counter, n)) hidden_size = 256 encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) trainIters(encoder1, attn_decoder1, 100000, print_every=500, plot_every=1000) evaluateRandomly(encoder1, attn_decoder1, n=1000)
style_tok_test_reverse = [[getword(input_lang, a1)] if i else [getword(input_lang, a2)] for i in y_test_reverse] attribute_train.append(torch.LongTensor(style_tok_train)) attribute_train.append(torch.LongTensor(style_tok_train_reverse)) attribute_valid.append(torch.LongTensor(style_tok_valid)) attribute_valid.append(torch.LongTensor(style_tok_valid_reverse)) attribute_test.append(torch.LongTensor(style_tok_test)) attribute_test.append(torch.LongTensor(style_tok_test_reverse)) label_train.append(torch.LongTensor(y_train)) label_train.append(torch.LongTensor(y_train_reverse)) label_valid.append(torch.LongTensor(y_valid)) label_valid.append(torch.LongTensor(y_valid_reverse)) label_test.append(torch.LongTensor(y_test)) label_test.append(torch.LongTensor(y_test_reverse)) print(input_lang.n_words) #print(len(train_pairs)) encoder = EncoderRNN(input_lang.n_words, config['hidden_size'], config['num_layers'], embedding_weights, config['embedding_dim'], config['dropout']).to(device) decoder1 = DecoderRNN(config['hidden_size'], input_lang.n_words, config['MAX_LENGTH'], config['num_layers'], embedding_weights, config['embedding_dim'], config['dropout']).to(device) #decoder1 = AttnDecoderRNN(config['hidden_size'], input_lang.n_words, config['MAX_LENGTH'], config['num_layers'], embedding_weights, config['embedding_dim'], config['dropout']).to(device) decoder2 = AttnDecoderRNN(config['hidden_size'], input_lang.n_words, config['MAX_LENGTH'], config['num_layers'], embedding_weights, config['embedding_dim'], config['dropout']).to(device) classifier = model = WordAttention(input_lang.n_words, tag_lang.n_words, dep_lang.n_words, config['embedding_dim'], config['tag_dim'], config['dep_dim'], config['hidden_size'], config['classifer_class_size'], config['num_layers'], config['dropout'], embedding_weights, config['structural']) classifier.load_state_dict(torch.load(config['classifier_name'] + '.pt')) def init_weights(m): for name, param in m.named_parameters(): if name != 'embedding.weight': if 'weight' in name: #print(name) #print(param) #nn.init.uniform_(param.data, -0.1, 0.1) nn.init.normal_(param.data, mean=0, std=0.1) else: nn.init.constant_(param.data, 0)
def run(j, i, k, ed, hs): save_dir = os.path.join("..", "data", "save", "cluster" + str(k), "exp" + str(j)) if not os.path.exists(save_dir): os.makedirs(save_dir) option = 'story_fp_1tm' # if option not in ['story_summary', 'story_description', 'story_acceptance', 'fp', 'story_fp', 'story_fp_1tm']: # raise ValueError(option, "is not an appropriate corpus type.") voc, pairs, _ = prepareData_1tm( os.path.join("..", "data", "save", 'vocab.pickle'), os.path.join(save_dir, option + '.pickle')) print("Read {!s} training pairs".format(len(pairs))) # Configure models model_name = option + '_model_Attn_embedding_dim' + str(ed) # attn_model = 'dot' attn_model = 'general' # attn_model = 'concat' hidden_size = hs embedding_dim = ed encoder_n_layers = 2 decoder_n_layers = 2 dropout = 0.1 batch_size = 64 # Set checkpoint to load from; set to None if starting from scratch checkpoint_iter = i * 40000 n_iteration = checkpoint_iter + 40000 if i == 0: loadFilename = None else: loadFilename = os.path.join( save_dir, model_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), '{}_checkpoint.tar'.format(checkpoint_iter)) checkpoint_iteration = 0 # Load model if a loadFilename is provided if loadFilename: # If loading on same machine the model was trained on checkpoint = torch.load(loadFilename) # If loading a model trained on GPU to CPU # checkpoint = torch.load(loadFilename, map_location=torch.device('cpu')) encoder_sd = checkpoint['en'] decoder_sd1 = checkpoint['de1'] decoder_sd2 = checkpoint['de2'] decoder_sd3 = checkpoint['de3'] #decoder_sd4 = checkpoint['de4'] encoder_optimizer_sd = checkpoint['en_opt'] decoder_optimizer_sd1 = checkpoint['de_opt1'] decoder_optimizer_sd2 = checkpoint['de_opt2'] decoder_optimizer_sd3 = checkpoint['de_opt3'] #decoder_optimizer_sd4 = checkpoint['de_opt4'] embedding_sd = checkpoint['embedding'] checkpoint_iteration = checkpoint['iteration'] print('Building encoder and decoder ...') # Initialize word embeddings embedding = torch.nn.Embedding(voc.num_words, embedding_dim) if loadFilename: embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models encoder = EncoderRNN(embedding_dim, hidden_size, embedding, encoder_n_layers, dropout) decoder1 = LuongAttnDecoderRNN(attn_model, embedding, embedding_dim, hidden_size, voc.num_words, decoder_n_layers) decoder2 = LuongAttnDecoderRNN(attn_model, embedding, embedding_dim, hidden_size, voc.num_words, decoder_n_layers) decoder3 = LuongAttnDecoderRNN(attn_model, embedding, embedding_dim, hidden_size, voc.num_words, decoder_n_layers) #decoder4 = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers) # decoder = DecoderRNN(embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) if loadFilename: encoder.load_state_dict(encoder_sd) decoder1.load_state_dict(decoder_sd1) decoder2.load_state_dict(decoder_sd2) decoder3.load_state_dict(decoder_sd3) #decoder4.load_state_dict(decoder_sd4) # Use appropriate device encoder = encoder.to(device) decoder1 = decoder1.to(device) decoder2 = decoder2.to(device) decoder3 = decoder3.to(device) #decoder4 = decoder4.to(device) print('Models built and ready to go!') # Configure training/optimization clip = 50.0 teacher_forcing_ratio = 1.0 learning_rate = 0.0001 decoder_learning_ratio = 5.0 print_every = 100 save_every = 40000 # Ensure dropout layers are in train mode encoder.train() decoder1.train() decoder2.train() decoder3.train() #decoder4.train() # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer1 = optim.Adam(decoder1.parameters(), lr=learning_rate * decoder_learning_ratio) decoder_optimizer2 = optim.Adam(decoder2.parameters(), lr=learning_rate * decoder_learning_ratio) decoder_optimizer3 = optim.Adam(decoder3.parameters(), lr=learning_rate * decoder_learning_ratio) #decoder_optimizer4 = optim.Adam(decoder4.parameters(), lr=learning_rate * decoder_learning_ratio) if loadFilename: encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer1.load_state_dict(decoder_optimizer_sd1) decoder_optimizer2.load_state_dict(decoder_optimizer_sd2) decoder_optimizer3.load_state_dict(decoder_optimizer_sd3) #decoder_optimizer4.load_state_dict(decoder_optimizer_sd4) # Run training iterations print("Starting Training!") trainIters(model_name, voc, pairs, encoder, decoder1, decoder2, decoder3, encoder_optimizer, decoder_optimizer1, decoder_optimizer2, decoder_optimizer3, embedding, encoder_n_layers, decoder_n_layers, hidden_size, save_dir, n_iteration, batch_size, print_every, save_every, teacher_forcing_ratio, clip, loadFilename, checkpoint_iter, checkpoint_iteration, option)
from utils import * from model.encoder import EncoderRNN from model.decoder import DecoderRNN from model.decoder_attn import AttnDecoderRNN from test import * from evaluate import * hidden_size = 256 num_layers = 1 input_lang, output_lang, pairs = prepareData('eng', 'fra', True) print(random.choice(pairs)) encoder1 = EncoderRNN(input_lang.n_words, hidden_size, num_layers).to(device) #decoder1 = DecoderRNN(hidden_size, output_lang.n_words).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, MAX_LENGTH, num_layers, dropout_p=0.1).to(device) trainIters(encoder1, attn_decoder1, 10000, pairs, input_lang, output_lang, print_every=5000) evaluateRandomly(encoder1, attn_decoder1, pairs, input_lang, output_lang)