def evaluate(encoder, decoder, valid_set, lang, embedding_size, encoder_style=ENCODER_STYLE, iter_time=10, beam_size=1, verbose=True): """The evaluate procedure.""" # Get evaluate data valid_iter = data_iter(valid_set, batch_size=1, shuffle=False) if use_cuda: encoder.cuda() decoder.cuda() for iteration in range(iter_time): # Get data data, idx_data = get_batch(next(valid_iter)) rt, re, rm, summary = idx_data # For Encoding rt = Variable(torch.LongTensor(rt)) re = Variable(torch.LongTensor(re)) rm = Variable(torch.LongTensor(rm)) # For Decoding summary = Variable(torch.LongTensor(summary)) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Get decoding words and attention matrix decoded_words, decoder_attentions = predictwords( rt, re, rm, encoder, decoder, lang, embedding_size, encoder_style, beam_size) res = ' '.join(decoded_words[:-1]) if verbose: print(res) yield res # Compare to the origin data triplets, gold_summary = data[0] print(triplets) for word in gold_summary: print(word, end=' ') print(' ') showAttention(triplets, decoded_words, decoder_attentions)
def evaluate(encoder, decoder, valid_set, lang, embedding_size, encoder_style=ENCODER_STYLE, epoch_time=EPOCH_TIME, beam_size=1, verbose=True): """The evaluate procedure.""" # Get evaluate data valid_iter = data_iter(valid_set, batch_size=1, shuffle=True) if use_cuda: encoder.cuda() decoder.cuda() for iteration in range(epoch_time): # Get data data, idx_data = get_batch(next(valid_iter)) rt, re, rm, summary = idx_data # For Encoding rt = Variable(torch.LongTensor(rt)) re = Variable(torch.LongTensor(re)) rm = Variable(torch.LongTensor(rm)) # For Decoding summary = Variable(torch.LongTensor(summary)) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Get decoding words and attention matrix decoded_words, decoder_attentions = predictwords( rt, re, rm, summary, encoder, decoder, lang, embedding_size, encoder_style, beam_size) res = ' '.join(decoded_words[:-1]) if verbose: print(res) yield res
def evaluate(encoder, decoder, valid_set, lang, embedding_size, encoder_style=ENCODER_STYLE, epoch_time=EPOCH_TIME, beam_size=1, verbose=True): valid_iter = data_iter(valid_set, batch_size=1, shuffle=True) if use_cuda: encoder.cuda() decoder.cuda() for iteration in range(epoch_time): data, idx_data = get_batch(next(valid_iter)) rt, re, rm, summary = idx_data rt = Variable(torch.LongTensor(rt)) re = Variable(torch.LongTensor(re)) rm = Variable(torch.LongTensor(rm)) summary = Variable(torch.LongTensor(summary)) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() decoded_words, decoder_attentions = hierarchical_predictwords( rt, re, rm, summary, encoder, decoder, lang, embedding_size, encoder_style, beam_size) res = ' '.join(decoded_words[:-1]) if verbose: print(res) yield res
def train(train_set, langs, embedding_size=600, learning_rate=0.01, iter_time=10, batch_size=32, get_loss=GET_LOSS, save_model=SAVE_MODEL, encoder_style=ENCODER_STYLE, use_model=USE_MODEL): """The training procedure.""" # Set the timer start = time.time() # Initialize the model emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() if encoder_style == 'LIN': encoder = EncoderLIN(embedding_size, emb) elif encoder_style == 'BiLSTM': encoder = EncoderBiLSTM(embedding_size, emb) else: encoder = EncoderRNN(embedding_size, emb) decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) # Choose optimizer loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) # decoder_optimizer = optim.Adagrad(decoder.parameters(), lr=learning_rate, lr_decay=0, weight_decay=0) criterion = nn.NLLLoss() total_loss = 0 iteration = 0 for epo in range(1, iter_time + 1): print("Epoch #%d" % (epo)) # Get data train_iter = data_iter(train_set, batch_size=batch_size) for dt in train_iter: iteration += 1 data, idx_data = get_batch(dt) rt, re, rm, summary = idx_data # Add paddings rt = addpaddings(rt) re = addpaddings(re) rm = addpaddings(rm) summary = addpaddings(summary) rt = Variable(torch.LongTensor(rt), requires_grad=False) re = Variable(torch.LongTensor(re), requires_grad=False) rm = Variable(torch.LongTensor(rm), requires_grad=False) # For Decoding summary = Variable(torch.LongTensor(summary), requires_grad=False) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Get the average loss on the sentences loss = sentenceloss(rt, re, rm, summary, encoder, decoder, loss_optimizer, criterion, embedding_size, encoder_style) total_loss += loss # Print the information and save model if iteration % get_loss == 0: print("Time {}, iter {}, avg loss = {:.4f}".format( gettime(start), iteration, total_loss / get_loss)) total_loss = 0 if epo % save_model == 0: torch.save(encoder.state_dict(), "{}_encoder_{}".format(OUTPUT_FILE, iteration)) torch.save(decoder.state_dict(), "{}_decoder_{}".format(OUTPUT_FILE, iteration)) print("Save the model at iter {}".format(iteration)) return encoder, decoder
def train(train_set, langs, embedding_size=EMBEDDING_SIZE, learning_rate=LR, batch_size=BATCH_SIZE, get_loss=GET_LOSS, grad_clip=GRAD_CLIP, encoder_style=ENCODER_STYLE, decoder_style=DECODER_STYLE, to_copy=TOCOPY, epoch_time=EPOCH_TIME, layer_depth=LAYER_DEPTH, max_length=MAX_LENGTH, max_sentence=MAX_SENTENCES, save_model=SAVE_MODEL, output_file=OUTPUT_FILE, iter_num=iterNum, pretrain=PRETRAIN): """The training procedure.""" # # Test arg parser (For Debugging) # print("embedding_size={}, learning_rate={}, batch_size={}, get_loss={}, grad_clip={},\ # encoder_style={}, decoder_style={}, max_length={},\ # max_sentece={}, save_model={}, output_file={}, to_copy={},\ # epoch={}, layer_depth={}, iter num={}, pretrain={}".format( # embedding_size, learning_rate, batch_size, get_loss, grad_clip, # encoder_style, decoder_style, max_length, max_sentece, save_model, output_file, # to_copy, epoch_time, layer_depth, iter_num, pretrain)) # Set the timer start = time.time() # Initialize the model emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() # Choose encoder style if encoder_style == 'LIN': encoder = EncoderLIN(embedding_size, emb) elif encoder_style == 'BiLSTM': encoder = EncoderBiLSTM(embedding_size, emb, n_layers=layer_depth) elif encoder_style == 'BiLSTMMax': encoder = EncoderBiLSTMMaxPool(embedding_size, emb, n_layers=layer_depth) elif encoder_style == 'HierarchicalBiLSTM': encoder_args = { "hidden_size": embedding_size, "local_embed": emb, "n_layers": layer_depth } encoder = HierarchicalBiLSTM(**encoder_args) elif encoder_style == 'HierarchicalLIN': encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalLIN(**encoder_args) else: # initialize hierarchical encoder rnn, (both global and local) encoder_args = { "hidden_size": embedding_size, "local_embed": emb, "n_layers": layer_depth } encoder = HierarchicalRNN(**encoder_args) # Choose decoder style and training function if decoder_style == 'HierarchicalRNN': decoder = HierarchicalDecoder(embedding_size, langs['summary'].n_words, n_layers=layer_depth, copy=to_copy) train_func = Hierarchical_seq_train else: decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words, n_layers=layer_depth, copy=to_copy) train_func = Plain_seq_train if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() # Choose optimizer loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) # loss_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), # lr=learning_rate) # Load pre-train model use_model = None if pretrain is not None and iter_num is not None: use_model = [ './models/' + pretrain + '_' + s + '_' + str(iter_num) for s in ['encoder', 'decoder', 'optim'] ] if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) loss_optimizer.load_state_dict(torch.load(use_model[2])) print("Load Pretrain Model {}".format(use_model)) else: print("Not use Pretrain Model") criterion = nn.NLLLoss() # Build up the model model = Seq2Seq(encoder, decoder, train_func, criterion, embedding_size, langs) # print(encoder) # print(decoder) # print(loss_optimizer) total_loss = 0 iteration = 0 for epo in range(1, epoch_time + 1): # Start of an epoch print("Epoch #%d" % (epo)) # Get data train_iter = data_iter(train_set, batch_size=batch_size) for dt in train_iter: iteration += 1 data, idx_data = get_batch(dt) rt, re, rm, summary = idx_data # Debugging: check the input triplets # show_triplets(data[0][0]) # Add paddings rt = addpaddings(rt) re = addpaddings(re) rm = addpaddings(rm) # For summary paddings, if the model is herarchical then pad between sentences # If the batch_size is 1 then we don't need to do sentence padding if decoder_style == 'HierarchicalRNN' and batch_size != 1: summary = add_sentence_paddings(summary) else: summary = addpaddings(summary) rt = Variable(torch.LongTensor(rt), requires_grad=False) re = Variable(torch.LongTensor(re), requires_grad=False) rm = Variable(torch.LongTensor(rm), requires_grad=False) # For Decoding summary = Variable(torch.LongTensor(summary), requires_grad=False) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Zero the gradient loss_optimizer.zero_grad() model.train() # calculate loss of "a batch of input sequence" loss = sequenceloss(rt, re, rm, summary, model) # Backpropagation loss.backward() torch.nn.utils.clip_grad_norm( list(model.encoder.parameters()) + list(model.decoder.parameters()), grad_clip) loss_optimizer.step() # Get the average loss on the sentences target_length = summary.size()[1] if float(torch.__version__[:3]) > 0.3: total_loss += loss.item() else: total_loss += loss.data[0] # Print the information and save model if iteration % get_loss == 0: print("Time {}, iter {}, Seq_len:{}, avg loss = {:.4f}".format( gettime(start), iteration, target_length, total_loss / get_loss)) total_loss = 0 if epo % save_model == 0: torch.save(encoder.state_dict(), "models/{}_encoder_{}".format(output_file, iteration)) torch.save(decoder.state_dict(), "models/{}_decoder_{}".format(output_file, iteration)) torch.save(loss_optimizer.state_dict(), "models/{}_optim_{}".format(output_file, iteration)) print("Save the model at iter {}".format(iteration)) return model.encoder, model.decoder
def train(train_set, langs, embedding_size=600, learning_rate=0.01, iter_time=10, batch_size=32, get_loss=GET_LOSS, save_model=SAVE_MODEL, encoder_style=ENCODER_STYLE, decoder_style=DECODER_STYLE, use_model=USE_MODEL): """The training procedure.""" # Set the timer start = time.time() encoder, decoder, loss_optimizer, train_func = model_initialization( encoder_style, decoder_style, langs, embedding_size, learning_rate, use_model) criterion = nn.NLLLoss() # Build up the model model = Seq2Seq(encoder, decoder, train_func, None, criterion, embedding_size, langs) # print(encoder) # print(decoder) # print(loss_optimizer) total_loss = 0 iteration = 0 for epo in range(1, iter_time + 1): # Start of an epoch print("Epoch #%d" % (epo)) # Get data train_iter = data_iter(train_set, batch_size=batch_size) for dt in train_iter: iteration += 1 data, idx_data = get_batch(dt) print(idx_data) rt, re, rm, summary = idx_data # Debugging: check the input triplets # show_triplets(data[0][0]) # Add paddings rt = addpaddings(rt) re = addpaddings(re) rm = addpaddings(rm) # For summary paddings, if the model is herarchical then pad between sentences if decoder_style == 'HierarchicalRNN': summary = add_sentence_paddings(summary) else: summary = addpaddings(summary) rt = Variable(torch.LongTensor(rt), requires_grad=False) re = Variable(torch.LongTensor(re), requires_grad=False) rm = Variable(torch.LongTensor(rm), requires_grad=False) # DEBUG: if torch.sum(rm == 3).item() == 0: print('skip') continue # For Decoding summary = Variable(torch.LongTensor(summary), requires_grad=False) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Zero the gradient loss_optimizer.zero_grad() model.train() # calculate loss of "a batch of input sequence" loss = sequenceloss(rt, re, rm, summary, model) # Backpropagation loss.backward() torch.nn.utils.clip_grad_norm( list(model.encoder.parameters()) + list(model.decoder.parameters()), GRAD_CLIP) loss_optimizer.step() # Get the average loss on the sentences target_length = summary.size()[1] if float(torch.__version__[:3]) > 0.3: total_loss += loss.item() else: total_loss += loss.data[0] # Print the information and save model if iteration % get_loss == 0: print("Time {}, iter {}, Seq_len:{}, avg loss = {:.4f}".format( gettime(start), iteration, target_length, total_loss / get_loss)) total_loss = 0 if epo % save_model == 0: torch.save(encoder.state_dict(), "models/{}_encoder_{}".format(OUTPUT_FILE, iteration)) torch.save(decoder.state_dict(), "models/{}_decoder_{}".format(OUTPUT_FILE, iteration)) torch.save(loss_optimizer.state_dict(), "models/{}_optim_{}".format(OUTPUT_FILE, iteration)) print("Save the model at iter {}".format(iteration)) return model.encoder, model.decoder
def train(train_set, langs, embedding_size=600, learning_rate=0.01, iter_time=10, batch_size=32, get_loss=GET_LOSS, save_model=SAVE_MODEL, encoder_style=ENCODER_STYLE, decoder_style=DECODER_STYLE, use_model=USE_MODEL): """The training procedure.""" # Set the timer start = time.time() # Initialize the model emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() # Choose encoder style # TODO:: Set up a choice for hierarchical or not if encoder_style == 'LIN': encoder = EncoderLIN(embedding_size, emb) elif encoder_style == 'BiLSTM': encoder = EncoderBiLSTM(embedding_size, emb) elif encoder_style == 'BiLSTMMax': encoder = EncoderBiLSTMMaxPooling(embedding_size, emb) elif encoder_style == 'HierarchicalBiLSTM': encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalBiLSTM(**encoder_args) elif encoder_style == 'HierarchicalLIN': encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalLIN(**encoder_args) else: # initialize hierarchical encoder rnn, (both global and local) encoder_args = {"hidden_size": embedding_size, "local_embed": emb} encoder = HierarchicalEncoderRNN(**encoder_args) # Choose decoder style and training function if decoder_style == 'HierarchicalRNN': decoder = HierarchicalDecoder(embedding_size, langs['summary'].n_words) train_func = Hierarchical_seq_train else: decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words) train_func = Plain_seq_train if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() # Choose optimizer loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) # loss_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), # lr=learning_rate) if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) loss_optimizer.load_state_dict(torch.load(use_model[2])) criterion = nn.NLLLoss() # Build up the model model = Seq2Seq(encoder, decoder, train_func, criterion, embedding_size, langs) # print(encoder) # print(decoder) # print(loss_optimizer) total_loss = 0 iteration = 0 for epo in range(1, iter_time + 1): # Start of an epoch print("Epoch #%d" % (epo)) # Get data train_iter = data_iter(train_set, batch_size=batch_size) for dt in train_iter: iteration += 1 data, idx_data = get_batch(dt) rt, re, rm, summary = idx_data # Debugging: check the input triplets # show_triplets(data[0][0]) # Add paddings rt = addpaddings(rt) re = addpaddings(re) rm = addpaddings(rm) # For summary paddings, if the model is herarchical then pad between sentences if decoder_style == 'HierarchicalRNN': summary = add_sentence_paddings(summary) else: summary = addpaddings(summary) rt = Variable(torch.LongTensor(rt), requires_grad=False) re = Variable(torch.LongTensor(re), requires_grad=False) rm = Variable(torch.LongTensor(rm), requires_grad=False) # For Decoding summary = Variable(torch.LongTensor(summary), requires_grad=False) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() # Zero the gradient loss_optimizer.zero_grad() model.train() # calculate loss of "a batch of input sequence" loss = sequenceloss(rt, re, rm, summary, model) # Backpropagation loss.backward() torch.nn.utils.clip_grad_norm( list(model.encoder.parameters()) + list(model.decoder.parameters()), GRAD_CLIP) loss_optimizer.step() # Get the average loss on the sentences target_length = summary.size()[1] if float(torch.__version__[:3]) > 0.3: total_loss += loss.item() else: total_loss += loss.data[0] # Print the information and save model if iteration % get_loss == 0: print("Time {}, iter {}, Seq_len:{}, avg loss = {:.4f}".format( gettime(start), iteration, target_length, total_loss / get_loss)) total_loss = 0 if epo % save_model == 0: torch.save(encoder.state_dict(), "models/{}_encoder_{}".format(OUTPUT_FILE, iteration)) torch.save(decoder.state_dict(), "models/{}_decoder_{}".format(OUTPUT_FILE, iteration)) torch.save(loss_optimizer.state_dict(), "models/{}_optim_{}".format(OUTPUT_FILE, iteration)) print("Save the model at iter {}".format(iteration)) return model.encoder, model.decoder
train_input = nd.array(hf.get('input')) train_label = nd.array(hf.get('label')) net = SrCnn() net.initialize(ctx=try_gpu()) if os.path.exists("srcnn.params"): net.load_parameters("srcnn.params") ctx = try_gpu() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) print('training on', ctx) loss = gloss.L2Loss() for ep in range(epoch): train_l_sum, n, start = 0.0, 0, time.time() # batch_idxs = len(train_input) // batch_size for X, y in data_iter(batch_size, train_input, train_label): X, y = X.as_in_context(ctx), y.as_in_context(ctx) X = nd.transpose(X, (0, 3, 1, 2)) y = nd.transpose(y, (0, 3, 1, 2)) with autograd.record(): y_hat = net(X) l = loss(y_hat, y).sum() l.backward() trainer.step(batch_size) y = y.astype('float32') train_l_sum += l.asscalar() print(y.size) n += y.size print('epoch %d,loss %f' % (ep+1, train_l_sum/n)) net.save_parameters("srcnn.params")
def train(train_set, langs, embedding_size=EMBEDDING_SIZE, learning_rate=LR, batch_size=BATCH_SIZE, get_loss=GET_LOSS, grad_clip=GRAD_CLIP, encoder_style=ENCODER_STYLE, decoder_style=DECODER_STYLE, to_copy=TOCOPY, epoch_time=EPOCH_TIME, layer_depth=LAYER_DEPTH, max_length=MAX_LENGTH, max_sentence=MAX_SENTENCES, save_model=SAVE_MODEL, output_file=OUTPUT_FILE, iter_num=iterNum, pretrain=PRETRAIN): start = time.time() emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words, langs['rm'].n_words, embedding_size) emb.init_weights() encoder_args = { "hidden_size": embedding_size, "local_embed": emb, "n_layers": layer_depth } encoder = HierarchicalRNN(**encoder_args) if decoder_style == 'HierarchicalRNN': decoder = HierarchicalDecoder(embedding_size, langs['summary'].n_words, n_layers=layer_depth, copy=to_copy) train_func = Hierarchical_seq_train else: decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words, n_layers=layer_depth, copy=to_copy) train_func = Plain_seq_train if use_cuda: emb.cuda() encoder.cuda() decoder.cuda() loss_optimizer = optim.Adagrad(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate, lr_decay=0, weight_decay=0) use_model = None if pretrain is not None and iter_num is not None: use_model = [ './models/' + pretrain + '_' + s + '_' + str(iter_num) for s in ['encoder', 'decoder', 'optim'] ] if use_model is not None: encoder = load_model(encoder, use_model[0]) decoder = load_model(decoder, use_model[1]) loss_optimizer.load_state_dict(torch.load(use_model[2])) print("Load Pretrain Model {}".format(use_model)) else: print("Not use Pretrain Model") criterion = nn.NLLLoss() model = Seq2Seq(encoder, decoder, train_func, criterion, embedding_size, langs) total_loss = 0 iteration = 0 for epo in range(1, epoch_time + 1): print("Epoch #%d" % (epo)) train_iter = data_iter(train_set, batch_size=batch_size) for dt in train_iter: iteration += 1 data, idx_data = get_batch(dt) rt, re, rm, summary = idx_data rt = addpaddings(rt) re = addpaddings(re) rm = addpaddings(rm) if decoder_style == 'HierarchicalRNN' and batch_size != 1: summary = add_sentence_paddings(summary) else: summary = addpaddings(summary) rt = Variable(torch.LongTensor(rt), requires_grad=False) re = Variable(torch.LongTensor(re), requires_grad=False) rm = Variable(torch.LongTensor(rm), requires_grad=False) summary = Variable(torch.LongTensor(summary), requires_grad=False) if use_cuda: rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda( ), summary.cuda() loss_optimizer.zero_grad() model.train() loss = sequenceloss(rt, re, rm, summary, model) loss.backward() torch.nn.utils.clip_grad_norm( list(model.encoder.parameters()) + list(model.decoder.parameters()), grad_clip) loss_optimizer.step() target_length = summary.size()[1] if float(torch.__version__[:3]) > 0.3: total_loss += loss.item() / target_length else: total_loss += loss.data[0] / target_length if iteration % get_loss == 0: print("Time {}, iter {}, Seq_len:{}, avg loss = {:.4f}".format( gettime(start), iteration, target_length, total_loss / get_loss)) total_loss = 0 if epo % save_model == 0: torch.save(encoder.state_dict(), "models/{}_encoder_{}".format(output_file, iteration)) torch.save(decoder.state_dict(), "models/{}_decoder_{}".format(output_file, iteration)) torch.save(loss_optimizer.state_dict(), "models/{}_optim_{}".format(output_file, iteration)) print("Save the model at iter {}".format(iteration)) return model.encoder, model.decoder