eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) print(param.data[0:3]) _, _, norm_val = encoder.vectors_stats() encoder.init_vectors(src.vocab.vectors) # encoder.scale_vectors(0.08) encoder.normalize_vectors(norm_val) encoder.vectors_stats() for param in seq2seq.parameters(): print(param.data[0:3]) if torch.cuda.is_available(): seq2seq.cuda() # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters(), lr=0.001), max_grad_norm=5) # optimizer = Optimizer(torch.optim.SGD(seq2seq.parameters(), lr=0.01, momentum=0.9), max_grad_norm=5) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer.optimizer, step_size=10, gamma=0.5) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer=optimizer.optimizer, mode='min', factor=0.5, patience=5, verbose=True, threshold=0.0001, threshold_mode='rel',
hidden_size, bidirectional=bidirectional, rnn_cell='lstm', variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2, dropout_p=0, use_attention=True, bidirectional=bidirectional, rnn_cell='lstm', eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq = seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # train t = SupervisedTrainer(loss=loss, batch_size=32, checkpoint_every=50, print_every=10, expt_dir=opt.expt_dir) seq2seq = t.train(seq2seq, train, num_epochs=6, dev_data=dev,
def train(): src = SourceField(sequential=True, tokenize=lambda x: [i for i in jieba.lcut(x)]) tgt = TargetField(sequential=True, tokenize=lambda x: [i for i in jieba.lcut(x)]) max_len = 50 def len_filter(example): return len(example.src) <= max_len and len(example.tgt) <= max_len train = torchtext.data.TabularDataset(path=opt.train_path, format='csv', fields=[('src', src), ('tgt', tgt)], filter_pred=len_filter) dev = torchtext.data.TabularDataset(path=opt.dev_path, format='csv', fields=[('src', src), ('tgt', tgt)], filter_pred=len_filter) src.build_vocab(train, max_size=50000) tgt.build_vocab(train, max_size=50000) input_vocab = src.vocab output_vocab = tgt.vocab # NOTE: If the source field name and the target field name # are different from 'src' and 'tgt' respectively, they have # to be set explicitly before any training or inference # seq2seq.src_field_name = 'src' # seq2seq.tgt_field_name = 'tgt' # Prepare loss weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] loss = Perplexity(weight, pad) if torch.cuda.is_available(): loss.cuda() seq2seq = None optimizer = None if not opt.resume: # Initialize model hidden_size = 128 bidirectional = True encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, bidirectional=bidirectional, variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size, dropout_p=0.2, use_attention=True, bidirectional=bidirectional, eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. # # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) # scheduler = StepLR(optimizer.optimizer, 1) # optimizer.set_scheduler(scheduler) # train t = SupervisedTrainer(loss=loss, batch_size=32, checkpoint_every=50, print_every=10, expt_dir=opt.expt_dir) seq2seq = t.train(seq2seq, train, num_epochs=6, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=opt.resume) predictor = Predictor(seq2seq, input_vocab, output_vocab)
def offline_training(opt, traget_file_path): # Prepare dataset with torchtext src = SourceField(tokenize=treebank_tokenizer) tgt = TargetField(tokenize=treebank_tokenizer) def sample_filter(sample): """ sample example for future purpose""" return True train = torchtext.data.TabularDataset(path=opt.train_path, format='tsv', fields=[('src', src), ('tgt', tgt)], filter_pred=sample_filter) dev = torchtext.data.TabularDataset(path=opt.dev_path, format='tsv', fields=[('src', src), ('tgt', tgt)], filter_pred=sample_filter) test = torchtext.data.TabularDataset(path=opt.dev_path, format='tsv', fields=[('src', src), ('tgt', tgt)], filter_pred=sample_filter) src.build_vocab(train, max_size=opt.src_vocab_size) tgt.build_vocab(train, max_size=opt.tgt_vocab_size) input_vocab = src.vocab output_vocab = tgt.vocab # NOTE: If the source field name and the target field name # are different from 'src' and 'tgt' respectively, they have # to be set explicitly before any training or inference # seq2seq.src_field_name = 'src' # seq2seq.tgt_field_name = 'tgt' # Prepare loss weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] if opt.loss == 'perplexity': loss = Perplexity(weight, pad) else: raise TypeError seq2seq = None optimizer = None if not opt.resume: # Initialize model encoder = EncoderRNN(vocab_size=len(src.vocab), max_len=opt.max_length, hidden_size=opt.hidden_size, input_dropout_p=opt.intput_dropout_p, dropout_p=opt.dropout_p, n_layers=opt.n_layers, bidirectional=opt.bidirectional, rnn_cell=opt.rnn_cell, variable_lengths=True, embedding=input_vocab.vectors if opt.use_pre_trained_embedding else None, update_embedding=opt.update_embedding) decoder = DecoderRNN(vocab_size=len(tgt.vocab), max_len=opt.max_length, hidden_size=opt.hidden_size * 2 if opt.bidirectional else opt.hidden_size, sos_id=tgt.sos_id, eos_id=tgt.eos_id, n_layers=opt.n_layers, rnn_cell=opt.rnn_cell, bidirectional=opt.bidirectional, input_dropout_p=opt.input_dropout_p, dropout_p=opt.dropout_p, use_attention=opt.use_attention) seq2seq = Seq2seq(encoder=encoder, decoder=decoder) if opt.gpu >= 0 and torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # train trainer = SupervisedTrainer(loss=loss, batch_size=opt.batch_size, checkpoint_every=opt.checkpoint_every, print_every=opt.print_every, expt_dir=opt.expt_dir) seq2seq = trainer.train(model=seq2seq, data=train, num_epochs=opt.epochs, resume=opt.resume, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=opt.teacher_forcing_rate)