def _load_model(self): print 'Loading pretrained model' if self.config['model']['seq2seq'] == 'vanilla': print 'Loading Seq2Seq Vanilla model' self.model = Seq2Seq( src_emb_dim=self.config['model']['dim_word_src'], trg_emb_dim=self.config['model']['dim_word_trg'], src_vocab_size=len(self.src_dict), trg_vocab_size=len(self.tgt_dict), src_hidden_dim=self.config['model']['dim'], trg_hidden_dim=self.config['model']['dim'], batch_size=self.config['data']['batch_size'], bidirectional=self.config['model']['bidirectional'], pad_token_src=self.src_dict['<pad>'], pad_token_trg=self.tgt_dict['<pad>'], nlayers=self.config['model']['n_layers_src'], nlayers_trg=self.config['model']['n_layers_trg'], dropout=0., ).cuda() elif self.config['model']['seq2seq'] == 'attention': print 'Loading Seq2Seq Attention model' self.model = Seq2SeqAttention( src_emb_dim=self.config['model']['dim_word_src'], trg_emb_dim=self.config['model']['dim_word_trg'], src_vocab_size=len(self.src_dict), trg_vocab_size=len(self.tgt_dict), src_hidden_dim=self.config['model']['dim'], trg_hidden_dim=self.config['model']['dim'], ctx_hidden_dim=self.config['model']['dim'], attention_mode='dot', batch_size=self.config['data']['batch_size'], bidirectional=self.config['model']['bidirectional'], pad_token_src=self.src_dict['<pad>'], pad_token_trg=self.tgt_dict['<pad>'], nlayers=self.config['model']['n_layers_src'], nlayers_trg=self.config['model']['n_layers_trg'], dropout=0., ).cuda() self.model.load_state_dict(torch.load( open(self.model_weights) )) print "Model loaded"
vocab_length_source = len(sourceIndexer) print("Source vocabulary size: {}".format(vocab_length_source)) vocab_length_target = len(targetIndexer) print("Target vocabulary size: {}".format(vocab_length_target)) dev_data = list(read_dataset(dev_source_file, dev_target_file)) test_data = list(read_dataset(test_source_file, test_target_file)) num_layer = 1 embedding_size = 128 hidden_size = 64 att_size = 32 # start Dynet and define trainer model = Seq2SeqAttention(vocab_length_source, vocab_length_target, targetIndexer, targetDictionnary, num_layer, embedding_size, hidden_size, att_size) def train(train_data, log_writer): random.shuffle(train_data) train_words, train_loss = 0, 0.0 start = time.time() for sent_id, sent in enumerate(train_data): input_sent, output_sent = sent[0], sent[1] loss = model.forward(input_sent, output_sent, mode="train") train_loss += loss.value() train_words += len(sent[1][1:]) model.backward_and_update(loss)
args.max_length = 25 # Maximum number of words in a title print("\nParameters:") for attr, value in sorted(args.__dict__.items()): print("\t{}={}".format(attr.upper(), value)) # Load dataset abstracts, titles = get_splitted_data() args.src_vocab_size = 100 args.trg_vocab_size = 100 args.bidirectional = False loss_criterion = nn.CrossEntropyLoss() model = Seq2SeqAttention(args) optimizer = optim.Adam(model.parameters(), lr=args.lr) decoder_logit = model(input_lines_src, input_lines_trg) optimizer.zero_grad() loss = loss_criterion(decoder_logit.contiguous().view(-1, vocab_size), output_lines_trg.view(-1)) losses.append(loss.data[0]) loss.backward() optimizer.step()
dropout=0., ) if torch.cuda.is_available(): model.cuda() elif config['model']['seq2seq'] == 'attention': model = Seq2SeqAttention( src_emb_dim=config['model']['dim_word_src'], trg_emb_dim=config['model']['dim_word_trg'], src_vocab_size=src_vocab_size, trg_vocab_size=trg_vocab_size, src_hidden_dim=config['model']['dim'], trg_hidden_dim=config['model']['dim'], ctx_hidden_dim=config['model']['dim'], attention_mode='dot', batch_size=batch_size, bidirectional=config['model']['bidirectional'], pad_token_src=src['word2id']['<pad>'], pad_token_trg=trg['word2id']['<pad>'], nlayers=config['model']['n_layers_src'], nlayers_trg=config['model']['n_layers_trg'], dropout=0., attention_method=config['model']['attention_method']) if torch.cuda.is_available(): model.cuda() elif config['model']['seq2seq'] == 'fastattention': model = Seq2SeqFastAttention( src_emb_dim=config['model']['dim_word_src'],
print('Reading test data...') BATCH_SIZE = 128 _, X_test = ptb(section='wsj_23', directory='data/', column=0) _, y_test = ptb(section='wsj_23', directory='data/', column=1) X_test, y_test = sort_by_len(X_test, y_test) X_test_raw, _ = batch(X_test, batch_size=BATCH_SIZE, mask='<mask>') y_test_raw, _ = batch(y_test, batch_size=BATCH_SIZE, mask='<mask>') X_test_seq, word_to_n, n_to_word = text_to_sequence(X_test, in_vocab) y_test_seq, _, _ = text_to_sequence(y_test, out_vocab) X_test_seq, X_test_masks = batch(X_test_seq, batch_size=BATCH_SIZE, mask=len(in_vocab)-1) y_test_seq, y_test_masks = batch(y_test_seq, batch_size=BATCH_SIZE, mask=len(in_vocab)-1) print('Done.') print('Building model...') collection = dy.ParameterCollection() seq2seq = Seq2SeqAttention(collection, len(in_vocab), len(out_vocab)) print('Done.') print('Loading model...') RUN = 'runs/baseline' checkpoint = os.path.join(RUN, 'baseline.model') print('Loading from %s.' % checkpoint) collection.populate(checkpoint) print('Done.') print('Testing...') loss = 0. correct_toks = 0. total_toks = 0. test = open(os.path.join(RUN, 'test'), 'wt')
'val': valid_data, 'test': test_data, }) data_batch = data_iterator.get_next() source_vocab = training_data.source_vocab target_vocab = training_data.target_vocab model = Seq2SeqAttention( src_emb_dim=model_config.embdim, trg_emb_dim=model_config.embdim, src_vocab_size=source_vocab.size, trg_vocab_size=target_vocab.size, src_hidden_dim=model_config.dim, trg_hidden_dim=model_config.dim, ctx_hidden_dim=model_config.dim, attention_mode='dot', batch_size=data_config.training_data_hparams['batch_size'], bidirectional=model_config.bidir, pad_token_src=int(source_vocab.pad_token_id), pad_token_trg=int(target_vocab.pad_token_id), nlayers=model_config.nlayerssrc, nlayers_trg=model_config.nlayerstgt, dropout=train_config.dropout).to(device) criterion_cross_entropy = nn.CrossEntropyLoss( ignore_index=int(target_vocab.pad_token_id)) criterion_bleu = mBLEU(train_config.max_order) step = 0 def _load_model(epoch, step=None):