def initialize_model(opt, src, tgt, train): # build vocabulary src.build_vocab(train, max_size=opt.src_vocab) tgt.build_vocab(train, max_size=opt.tgt_vocab) input_vocab = src.vocab output_vocab = tgt.vocab # Initialize model hidden_size = opt.hidden_size decoder_hidden_size = hidden_size * 2 if opt.bidirectional else hidden_size encoder = EncoderRNN(len(src.vocab), opt.max_len, hidden_size, opt.embedding_size, dropout_p=opt.dropout_p_encoder, n_layers=opt.n_layers, bidirectional=opt.bidirectional, rnn_cell=opt.rnn_cell, variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), opt.max_len, decoder_hidden_size, dropout_p=opt.dropout_p_decoder, n_layers=opt.n_layers, attention_method=opt.attention_method, full_focus=opt.full_focus, bidirectional=opt.bidirectional, rnn_cell=opt.rnn_cell, eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) seq2seq.to(device) return seq2seq, input_vocab, output_vocab
def load(file_path, dataset): checkpoint = torch.load(file_path, map_location=device) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] embedding_sd = checkpoint['embedding'] voc = Voc(checkpoint['voc_dict']['name']) voc.__dict__ = checkpoint['voc_dict'] print('Building encoder and decoder ...') # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) if loadFilename: embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) if loadFilename: encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') encoder.eval() decoder.eval() policy = RLGreedySearchDecoder(encoder, decoder, voc) env = Env(voc, dataset) return policy, env
def test_dropout_WITH_PROB_ZERO(self): rnn = EncoderRNN(self.vocab_size, 50, 16, dropout_p=0) for param in rnn.parameters(): param.data.uniform_(-1, 1) output1, _ = rnn(self.input_var, self.lengths) output2, _ = rnn(self.input_var, self.lengths) self.assertTrue(torch.equal(output1.data, output2.data))
def chat_with_latest(savepath=SAVE_PATH): model = load_latest_state_dict(savepath) attn_model = 'dot' #attn_model = 'general' #attn_model = 'concat' hidden_size = 500 encoder_n_layers = 2 decoder_n_layers = 2 dropout = 0.1 batch_size = 64 voc = Voc(model['voc_dict']['name']) voc.__dict__ = model['voc_dict'] embedding = nn.Embedding(voc.num_words, hidden_size) encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) encoder.load_state_dict(model['en']) decoder.load_state_dict(model['de']) searcher = GreedySearchDecoder(encoder, decoder) evaluateInput(encoder, decoder, searcher, voc)
def test_pretrained_embedding(self): hidden_size = 16 pretrained_embedding = torch.randn(self.vocab_size, hidden_size) rnn = EncoderRNN(self.vocab_size, 50, hidden_size, embedding=pretrained_embedding, update_embedding=False) self.assertTrue(torch.equal(pretrained_embedding, rnn.embedding.weight.data)) self.assertFalse(rnn.embedding.weight.requires_grad)
def test_dropout_WITH_PROB_ZERO(self): rnn = EncoderRNN(self.dataset.input_vocab, 50, 16, dropout_p=0) for param in rnn.parameters(): param.data.uniform_(-1, 1) batch = [[1, 2, 3], [1, 2], [1]] output1, _ = rnn(batch) output2, _ = rnn(batch) self.assertEqual(output1, output2)
def train(): N_EPOCHS = 5 output_size = 1 save_dir = 'data/save/Adversarial_Discriminator/' attn_model = 'dot' hidden_size = 500 encoder_n_layers = 2 decoder_n_layers = 2 dropout = 0.1 seq2seqModel = load_latest_state_dict(savepath=SAVE_PATH_SEQ2SEQ) voc = Voc('name') voc.__dict__ = seq2seqModel['voc_dict'] embedding = nn.Embedding(voc.num_words, hidden_size) model = Adversarial_Discriminator(hidden_size, output_size, embedding) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) criterion = nn.BCELoss() encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) encoder.load_state_dict(seq2seqModel['en']) decoder.load_state_dict(seq2seqModel['de']) encoder = encoder.to(device) decoder = decoder.to(device) searcher = RLGreedySearchDecoder(encoder, decoder, voc) train_data = AlexaDataset('train.json', rare_word_threshold=3) # sorry cornell train_data.trimPairsToVocab(voc) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) test_data = AlexaDataset('test_freq.json', rare_word_threshold=3) test_data.trimPairsToVocab(voc) test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True) for epoch in range(1, N_EPOCHS + 1): test_AdversarialDiscriminatorOnLatestSeq2Seq(model, searcher, test_loader, voc) loss = trainAdversarialDiscriminatorOnLatestSeq2Seq( model, searcher, voc, train_loader, criterion, optimizer, embedding, save_dir, epoch) if epoch % 1 == 0: torch.save( { 'iteration': epoch, 'model': model.state_dict(), 'opt': optimizer.state_dict(), 'loss': loss, 'voc_dict': voc.__dict__, 'embedding': embedding.state_dict() }, os.path.join(save_dir, '{}_{}.tar'.format(epoch, 'epochs')))
def loadModel(hidden_size=hidden_size, encoder_n_layers=encoder_n_layers, decoder_n_layers=decoder_n_layers, dropout=dropout, attn_model=attn_model, learning_rate=learning_rate, decoder_learning_ratio=decoder_learning_ratio, directory=SAVE_PATH): state_dict = load_latest_state_dict(directory) episode = state_dict['iteration'] encoder_sd = state_dict['en'] decoder_sd = state_dict['de'] encoder_optimizer_sd = state_dict['en_opt'] decoder_optimizer_sd = state_dict['de_opt'] embedding_sd = state_dict['embedding'] voc = Voc('placeholder_name') voc.__dict__ = state_dict['voc_dict'] print('Building encoder and decoder ...') # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) embedding.load_state_dict(embedding_sd) embedding.to(device) # Initialize encoder & decoder models encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) print('Models built and ready to go!') # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio) encoder_optimizer.load_state_dict(encoder_optimizer_sd) decoder_optimizer.load_state_dict(decoder_optimizer_sd) if device == 'cuda': # If you have cuda, configure cuda to call for state in encoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() for state in decoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() print('Optimizers built and ready to go!') return episode, encoder, decoder, encoder_optimizer, decoder_optimizer, voc
def init_model(): if args.load_checkpoint is not None: logging.info("loading checkpoint from {}".format( os.path.join(args.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, args.load_checkpoint))) checkpoint_path = os.path.join(args.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, args.load_checkpoint) checkpoint = Checkpoint.load(checkpoint_path) model = checkpoint.model input_vocab = checkpoint.input_vocab output_vocab = checkpoint.output_vocab else: # build the vocabulary index and embedding spk.build_vocab(train, vectors="glove.6B.100d") src.build_vocab(train, max_size=args.vocab_size, vectors="glove.6B.100d") tgt.build_vocab(train, max_size=args.vocab_size, vectors="glove.6B.100d") input_vocab, output_vocab = src.vocab, tgt.vocab # Initialize model encoder = EncoderRNN( vocab_size=len(input_vocab), max_len=args.max_len, vectors=input_vocab.vectors if args.embedding else None, input_dropout_p=args.input_dropout_p, dropout_p=args.dropout_p, hidden_size=args.hidden_size, bidirectional=args.bidirectional, variable_lengths=True) decoder = SpkDecoderRNN( num_spk=args.num_spk, spk_embed_size=args.spk_embed_size, vocab_size=len(output_vocab), max_len=args.max_len, hidden_size=args.hidden_size * 2 if args.bidirectional else args.hidden_size, dropout_p=args.dropout_p, input_dropout_p=args.input_dropout_p, vectors=input_vocab.vectors if args.embedding else None, use_attention=True, bidirectional=args.bidirectional, eos_id=tgt.eos_id, sos_id=tgt.sos_id) model = SpkSeq2seq(encoder, decoder) if torch.cuda.is_available(): model.cuda() for param in model.parameters(): param.data.uniform_(-0.08, 0.08) return model, input_vocab, output_vocab
def __init__(self, data_path, model_save_path, model_load_path, hidden_size=32, max_vocab=4000, device='cuda'): self.src = SourceField() self.tgt = TargetField() self.max_length = 90 self.data_path = data_path self.model_save_path = model_save_path self.model_load_path = model_load_path def len_filter(example): return len(example.src) <= self.max_length and len( example.tgt) <= self.max_length self.trainset = torchtext.data.TabularDataset( path=os.path.join(self.data_path, 'train'), format='tsv', fields=[('src', self.src), ('tgt', self.tgt)], filter_pred=len_filter) self.devset = torchtext.data.TabularDataset(path=os.path.join( self.data_path, 'eval'), format='tsv', fields=[('src', self.src), ('tgt', self.tgt)], filter_pred=len_filter) self.src.build_vocab(self.trainset, max_size=max_vocab) self.tgt.build_vocab(self.trainset, max_size=max_vocab) weight = torch.ones(len(self.tgt.vocab)) pad = self.tgt.vocab.stoi[self.tgt.pad_token] self.loss = Perplexity(weight, pad) self.loss.cuda() self.optimizer = None self.hidden_size = hidden_size self.bidirectional = True encoder = EncoderRNN(len(self.src.vocab), self.max_length, self.hidden_size, bidirectional=self.bidirectional, variable_lengths=True) decoder = DecoderRNN(len(self.tgt.vocab), self.max_length, self.hidden_size * 2 if self.bidirectional else self.hidden_size, dropout_p=0.2, use_attention=True, bidirectional=self.bidirectional, eos_id=self.tgt.eos_id, sos_id=self.tgt.sos_id) self.device = device self.seq2seq = Seq2seq(encoder, decoder).cuda() for param in self.seq2seq.parameters(): param.data.uniform_(-0.08, 0.08)
def __init__(self, args): super(IPComm_listener, self).__init__() self.vocab_size = 10 self.max_len = 5 self.hidden_size = args.comm_embed_dim self.listener = EncoderRNN(self.vocab_size, self.max_len, self.hidden_size, rnn_cell='gru') self.reconstruct_fc = nn.Linear(self.hidden_size, args.comm_embed_dim) self.fc = nn.Linear(args.comm_embed_dim, args.comm_embed_dim)
def setUpClass(self): self.test_wd = os.getcwd() self.dataset = Dataset(path=os.path.join(self.test_wd,'tests/data/eng-fra.txt'), src_max_len=50, tgt_max_len=50, src_max_vocab=50000, tgt_max_vocab=50000) self.encoder = EncoderRNN(self.dataset.input_vocab,max_len=10, hidden_size=10, rnn_cell='lstm') self.decoder = DecoderRNN(self.dataset.output_vocab, max_len=10, hidden_size=10, rnn_cell='lstm') self.seq2seq = Seq2seq(self.encoder,self.decoder) if torch.cuda.is_available(): self.seq2seq.cuda() self.mock_seq2seq = Seq2seq(self.encoder, self.decoder) for param in self.seq2seq.parameters(): param.data.uniform_(-0.08, 0.08)
def test_input_dropout_WITH_NON_ZERO_PROB(self): rnn = EncoderRNN(self.vocab_size, 50, 16, input_dropout_p=0.5) for param in rnn.parameters(): param.data.uniform_(-1, 1) equal = True for _ in range(50): output1, _ = rnn(self.input_var, self.lengths) output2, _ = rnn(self.input_var, self.lengths) if not torch.equal(output1.data, output2.data): equal = False break self.assertFalse(equal)
def test_dropout_WITH_NON_ZERO_PROB(self): rnn = EncoderRNN(self.dataset.input_vocab, 50, 16, dropout_p=0.5) for param in rnn.parameters(): param.data.uniform_(-1, 1) batch = [[1, 2, 3], [1, 2], [1]] equal = True for _ in range(50): output1, _ = rnn(batch) output2, _ = rnn(batch) if output1 != output2: equal = False break self.assertFalse(equal)
def test_dropout_WITH_NON_ZERO_PROB(self): # It's critical to set n_layer=2 here since dropout won't work # when the RNN only has one layer according to pytorch's doc rnn = EncoderRNN(self.vocab_size, 50, 16, n_layers=2, dropout_p=0.5) for param in rnn.parameters(): param.data.uniform_(-1, 1) equal = True for _ in range(50): output1, _ = rnn(self.input_var, self.lengths) output2, _ = rnn(self.input_var, self.lengths) if not torch.equal(output1.data, output2.data): equal = False break self.assertFalse(equal)
def setUpClass(self): test_path = os.path.dirname(os.path.realpath(__file__)) src = SourceField() trg = TargetField() dataset = torchtext.data.TabularDataset( path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv', fields=[('src', src), ('trg', trg)], ) src.build_vocab(dataset) trg.build_vocab(dataset) encoder = EncoderRNN(len(src.vocab), 10, 10, rnn_cell='lstm') decoder = DecoderRNN(len(trg.vocab), 10, 10, trg.sos_id, trg.eos_id, rnn_cell='lstm') seq2seq = Seq2seq(encoder, decoder) self.predictor = Predictor(seq2seq, src.vocab, trg.vocab)
def main(): vocabulary = pickle.load(open(f'{EMBEDDING_DIR}/vocab.pkl', 'rb')) print("Number of words in data set: %d" % len(vocabulary)) embedding_matrix, vocab_to_index = map_vocab_to_embedding(vocabulary) hidden_size = 600 encoder = EncoderRNN(embedding_matrix, hidden_size) decoder = DecoderRNN(embedding_matrix, hidden_size) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() train_file = open(os.path.join(EMBEDDING_DIR, "train.pkl"), 'rb') train_data = pickle.load(train_file) train_file.close() n_iters = 2000 train(train_data, vocab_to_index, vocabulary, encoder, decoder, n_iters)
def __init__( self, vocabulary_size, embedding_size, hidden_state_size, start_label, end_label, pad_label, slk_parser, MAX_LENGTH=500, dropout_p=0.1, n_layer=3, ): super().__init__() self.embedding = nn.Embedding(vocabulary_size, embedding_size) self.sample = False self.dropout_p = dropout_p self.encoder = EncoderRNN(vocab_size=vocabulary_size, max_len=MAX_LENGTH, input_size=embedding_size, hidden_size=hidden_state_size // 2, n_layers=n_layer, bidirectional=True, rnn_cell='lstm', input_dropout_p=self.dropout_p, dropout_p=self.dropout_p, variable_lengths=False, embedding=None, update_embedding=True) self.decoder = DecoderRNN(vocab_size=vocabulary_size, max_len=MAX_LENGTH, hidden_size=hidden_state_size, sos_id=start_label, eos_id=end_label, n_layers=n_layer, rnn_cell='lstm', bidirectional=False, input_dropout_p=self.dropout_p, dropout_p=self.dropout_p, use_attention=True) self.is_copy_output = nn.Linear(hidden_state_size, 1) self.grammar_mask_output = MaskOutput(hidden_state_size, vocabulary_size) self.decoder_start = torch.ones(1, 1) * start_label self.pad_label = pad_label self.MAX_LENGTH = MAX_LENGTH self.num_layers = n_layer
def setUp(self): test_path = os.path.dirname(os.path.realpath(__file__)) src = SourceField() tgt = TargetField() self.dataset = torchtext.data.TabularDataset( path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv', fields=[('src', src), ('tgt', tgt)], ) src.build_vocab(self.dataset) tgt.build_vocab(self.dataset) encoder = EncoderRNN(len(src.vocab), 10, 10, rnn_cell='lstm') decoder = DecoderRNN(len(tgt.vocab), 10, 10, tgt.sos_id, tgt.eos_id, rnn_cell='lstm') self.seq2seq = Seq2seq(encoder, decoder) for param in self.seq2seq.parameters(): param.data.uniform_(-0.08, 0.08)
def setUpClass(self): self.test_wd = os.getcwd() self.dataset = Dataset(path=os.path.join(self.test_wd,'tests/data/eng-fra.txt'), src_max_len=50, tgt_max_len=50, src_max_vocab=50000, tgt_max_vocab=50000) self.encoder = EncoderRNN(self.dataset.input_vocab,max_len=10, hidden_size=10, rnn_cell='lstm') self.decoder = DecoderRNN(self.dataset.output_vocab, max_len=10, hidden_size=10, rnn_cell='lstm') self.seq2seq = Seq2seq(self.encoder,self.decoder) self.mock_seq2seq = Seq2seq(self.encoder, self.decoder) for param in self.seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) if not os.path.exists(os.path.join(self.test_wd,'checkpoints')): os.mkdir(os.path.join(self.test_wd,'checkpoints')) self.seq2seq.save(os.path.join(self.test_wd,'checkpoints')) self.mock_seq2seq.load(os.path.join(self.test_wd, 'checkpoints'))
def initialize_model( train, input_vocab, output_vocab, max_len=10, hidden_size=256, dropout_p=0.5, bidirectional=True, n_beam=5, ): # Initialize model encoder = EncoderRNN( len(input_vocab), max_len, hidden_size, bidirectional=bidirectional, variable_lengths=True, ) decoder = DecoderRNN( len(output_vocab), max_len, hidden_size * (2 if bidirectional else 1), dropout_p=dropout_p, use_attention=True, bidirectional=bidirectional, eos_id=train.tgt_field.eos_id, sos_id=train.tgt_field.sos_id, ) # decoder = TopKDecoder(decoder ,n_beam) seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq = seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) scheduler = StepLR(optimizer.optimizer, 1) optimizer.set_scheduler(scheduler) return seq2seq, optimizer, scheduler
def get_seq2seq(): decoder = DecoderRNN(len(field.vocab.stoi), args.max_len, args.hidden_size * 2 if bidirectional else args.hidden_size, n_layers=args.n_layers, rnn_cell=rnn_cell, input_dropout_p=0.0, dropout_p=0.0, use_attention=use_attention, bidirectional=bidirectional, eos_id=field.vocab.stoi['<eos>'], sos_id=field.vocab.stoi['<sos>']).to(device) if tied: # compatibility with the older code nn.init.normal_(decoder.out.weight) encoder = EncoderRNN(len(field.vocab.stoi), args.max_len, args.hidden_size, input_dropout_p=0.0, dropout_p=0.0, n_layers=args.n_layers, bidirectional=bidirectional, rnn_cell=rnn_cell, variable_lengths=True, embedding=(decoder.out.weight if tied else None)).to(device) return Seq2seq(encoder, decoder)
def build_model(src, tgt, hidden_size, mini_batch_size, bidirectional, dropout, attention, init_value): EXPERIMENT.param("Hidden", hidden_size) EXPERIMENT.param("Bidirectional", bidirectional) EXPERIMENT.param("Dropout", dropout) EXPERIMENT.param("Attention", attention) EXPERIMENT.param("Mini-batch", mini_batch_size) weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] loss = Perplexity(weight, pad) encoder = EncoderRNN(len(src.vocab), MAX_LEN, hidden_size, rnn_cell="lstm", bidirectional=bidirectional, dropout_p=dropout, variable_lengths=False) decoder = DecoderRNN( len(tgt.vocab), MAX_LEN, hidden_size, # * 2 if bidirectional else hidden_size, rnn_cell="lstm", use_attention=attention, eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) using_cuda = False if torch.cuda.is_available(): using_cuda = True encoder.cuda() decoder.cuda() seq2seq.cuda() loss.cuda() EXPERIMENT.param("CUDA", using_cuda) for param in seq2seq.parameters(): param.data.uniform_(-init_value, init_value) trainer = SupervisedTrainer(loss=loss, batch_size=mini_batch_size, checkpoint_every=5000, random_seed=42, print_every=1000) return seq2seq, trainer
# loss = Perplexity(weight, pad) loss = NLLLoss(weight=weight, mask=pad, size_average=True) if torch.cuda.is_available(): loss.cuda() seq2seq = None optimizer = None if not opt.resume: # Initialize model hidden_size = opt.word_lstm_dim bidirectional = opt.word_bidirect encoder = EncoderRNN(vocab_size=len(src.vocab), max_len=max_len, word_dim=opt.word_dim, hidden_size=hidden_size, input_dropout_p=opt.input_dropout, bidirectional=bidirectional, n_layers=1, rnn_cell='gru', variable_lengths=True) decoder = DecoderRNN(vocab_size=len(tgt.vocab), max_len=max_len, hidden_size=hidden_size * 2 if bidirectional else 1, dropout_p=opt.dropout, use_attention=True, bidirectional=bidirectional, n_layers=1, rnn_cell='gru', eos_id=tgt.eos_id, sos_id=tgt.sos_id)
# Prepare loss weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] loss = Perplexity(weight, pad) if torch.cuda.is_available(): loss.cuda() seq2seq = None optimizer = None if not opt.resume: # Initialize model # hidden_size=128 hidden_size = 300 bidirectional = True encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, bidirectional=bidirectional, variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else 1, dropout_p=0.2, use_attention=True, bidirectional=bidirectional, eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) print(param.data) encoder.vectors_stats() # encoder.init_vectors(src.vocab.vectors) # for param in seq2seq.parameters(): # print(param.data) if torch.cuda.is_available(): seq2seq.cuda()
def run_training(opt, default_data_dir, num_epochs=100): if opt.load_checkpoint is not None: logging.info("loading checkpoint from {}".format( os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint))) checkpoint_path = os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint) checkpoint = Checkpoint.load(checkpoint_path) seq2seq = checkpoint.model input_vocab = checkpoint.input_vocab output_vocab = checkpoint.output_vocab else: # Prepare dataset src = SourceField() tgt = TargetField() max_len = 50 data_file = os.path.join(default_data_dir, opt.train_path, 'data.txt') logging.info("Starting new Training session on %s", data_file) def len_filter(example): return (len(example.src) <= max_len) and (len(example.tgt) <= max_len) \ and (len(example.src) > 0) and (len(example.tgt) > 0) train = torchtext.data.TabularDataset( path=data_file, format='json', fields={'src': ('src', src), 'tgt': ('tgt', tgt)}, filter_pred=len_filter ) dev = None if opt.no_dev is False: dev_data_file = os.path.join(default_data_dir, opt.train_path, 'dev-data.txt') dev = torchtext.data.TabularDataset( path=dev_data_file, format='json', fields={'src': ('src', src), 'tgt': ('tgt', tgt)}, filter_pred=len_filter ) src.build_vocab(train, max_size=50000) tgt.build_vocab(train, max_size=50000) input_vocab = src.vocab output_vocab = tgt.vocab # NOTE: If the source field name and the target field name # are different from 'src' and 'tgt' respectively, they have # to be set explicitly before any training or inference # seq2seq.src_field_name = 'src' # seq2seq.tgt_field_name = 'tgt' # Prepare loss weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] loss = Perplexity(weight, pad) if torch.cuda.is_available(): logging.info("Yayyy We got CUDA!!!") loss.cuda() else: logging.info("No cuda available device found running on cpu") seq2seq = None optimizer = None if not opt.resume: hidden_size = 128 decoder_hidden_size = hidden_size * 2 logging.info("EncoderRNN Hidden Size: %s", hidden_size) logging.info("DecoderRNN Hidden Size: %s", decoder_hidden_size) bidirectional = True encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, bidirectional=bidirectional, rnn_cell='lstm', variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, decoder_hidden_size, dropout_p=0, use_attention=True, bidirectional=bidirectional, rnn_cell='lstm', eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # Optimizer and learning rate scheduler can be customized by # explicitly constructing the objects and pass to the trainer. optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5) scheduler = StepLR(optimizer.optimizer, 1) optimizer.set_scheduler(scheduler) # train num_epochs = num_epochs batch_size = 32 checkpoint_every = num_epochs / 10 print_every = num_epochs / 100 properties = dict(batch_size=batch_size, checkpoint_every=checkpoint_every, print_every=print_every, expt_dir=opt.expt_dir, num_epochs=num_epochs, teacher_forcing_ratio=0.5, resume=opt.resume) logging.info("Starting training with the following Properties %s", json.dumps(properties, indent=2)) t = SupervisedTrainer(loss=loss, batch_size=num_epochs, checkpoint_every=checkpoint_every, print_every=print_every, expt_dir=opt.expt_dir) seq2seq = t.train(seq2seq, train, num_epochs=num_epochs, dev_data=dev, optimizer=optimizer, teacher_forcing_ratio=0.5, resume=opt.resume) evaluator = Evaluator(loss=loss, batch_size=batch_size) if opt.no_dev is False: dev_loss, accuracy = evaluator.evaluate(seq2seq, dev) logging.info("Dev Loss: %s", dev_loss) logging.info("Accuracy: %s", dev_loss) beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 4)) predictor = Predictor(beam_search, input_vocab, output_vocab) while True: try: seq_str = raw_input("Type in a source sequence:") seq = seq_str.strip().split() results = predictor.predict_n(seq, n=3) for i, res in enumerate(results): print('option %s: %s\n', i + 1, res) except KeyboardInterrupt: logging.info("Bye Bye") exit(0)
seq2seq = None optimizer = None if not opt.resume: # Initialize model hidden_size = config['encoder embed'] # TODO is this ideal? feat_hidden_size = len(feats.vocab) // 2 bidirectional = True encoder = EncoderRNN( len(src.vocab), feats.vocab, max_len, # TODO can we make these be different sizes? hidden_size, feat_hidden_size, # hidden_size, hidden_size, bidirectional=bidirectional, rnn_cell='LSTM', variable_lengths=True, n_layers=config['num layers'] #, # features=feats ) # pdb.set_trace() # if config['use_vecs']: # decoder = VecDecoderRNN(len(tgt.vocab), # max_len, # hidden_size * 2 if bidirectional else hidden_size, # dropout_p=float(config['dropout']), # use_attention=True, # bidirectional=bidirectional,
tgt_vocab = VocabField(tgt_vocab_list, vocab_size=opt.tgt_vocab_size, sos_token="<SOS>", eos_token="<EOS>") pad_id = tgt_vocab.word2idx[tgt_vocab.pad_token] # Prepare loss weight = torch.ones(len(tgt_vocab.vocab)) loss = Perplexity(weight, pad_id) loss.to(device) # Initialize model encoder = EncoderRNN(len(src_vocab.vocab), opt.max_src_length, embedding_size=opt.embedding_size, rnn_cell=opt.rnn_cell, n_layers=opt.n_hidden_layer, hidden_size=opt.hidden_size, bidirectional=opt.bidirectional, variable_lengths=False) decoder = DecoderRNN(len(tgt_vocab.vocab), opt.max_tgt_length, embedding_size=opt.embedding_size, rnn_cell=opt.rnn_cell, n_layers=opt.n_hidden_layer, hidden_size=opt.hidden_size * 2 if opt.bidirectional else opt.hidden_size, bidirectional=opt.bidirectional, dropout_p=0.2, use_attention=opt.use_attn, eos_id=tgt_vocab.word2idx[tgt_vocab.eos_token],
# 檢查Constants是否有誤 assert EN.vocab.stoi[Constants.BOS_WORD] == Constants.BOS assert EN.vocab.stoi[Constants.EOS_WORD] == Constants.EOS assert EN.vocab.stoi[Constants.PAD_WORD] == Constants.PAD assert EN.vocab.stoi[Constants.UNK_WORD] == Constants.UNK # ---------- init model ---------- try: G = load_model(opt.load_G_from) except AttributeError: hidden_size = 512 bidirectional = True encoder = EncoderRNN(len(EN.vocab), opt.max_len, hidden_size, input_dropout_p=0, dropout_p=0, n_layers=1, bidirectional=bidirectional, variable_lengths=True, rnn_cell='gru') decoder = DecoderRNN(len(EN.vocab), opt.max_len, hidden_size * 2 if bidirectional else 1, n_layers=1, dropout_p=0.2, use_attention=True, bidirectional=bidirectional, rnn_cell='gru', eos_id=Constants.EOS, sos_id=Constants.BOS) G = Seq2seq(encoder, decoder) for param in G.parameters(): param.data.uniform_(-0.08, 0.08) try: D = load_model(opt.load_D_from) except AttributeError: D = BinaryClassifierCNN(len(EN.vocab), embed_dim=opt.embed_dim, num_kernel=opt.num_kernel, kernel_sizes=opt.kernel_sizes,
weight = torch.ones(len(tgt.vocab)) pad = tgt.vocab.stoi[tgt.pad_token] loss = BLEULoss(weight, pad, tgt) seq2seq = None optimizer = None if not opt.resume: # Initialize model hidden_size = 100 bidirectional = True encoder = EncoderRNN( len(src.vocab), max_len, hidden_size, embedding=src.vocab.vectors, bidirectional=bidirectional, variable_lengths=True, rnn_cell="lstm", ) decoder = DecoderRNN( len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size, # dropout_p=0.2, use_attention=True, bidirectional=bidirectional, eos_id=tgt.eos_id, sos_id=tgt.sos_id, rnn_cell="lstm", )