Exemple #1
0
    def _load_model(self):
        print 'Loading pretrained model'
        if self.config['model']['seq2seq'] == 'vanilla':
            print 'Loading Seq2Seq Vanilla model'

            self.model = Seq2Seq(
                src_emb_dim=self.config['model']['dim_word_src'],
                trg_emb_dim=self.config['model']['dim_word_trg'],
                src_vocab_size=len(self.src_dict),
                trg_vocab_size=len(self.tgt_dict),
                src_hidden_dim=self.config['model']['dim'],
                trg_hidden_dim=self.config['model']['dim'],
                batch_size=self.config['data']['batch_size'],
                bidirectional=self.config['model']['bidirectional'],
                pad_token_src=self.src_dict['<pad>'],
                pad_token_trg=self.tgt_dict['<pad>'],
                nlayers=self.config['model']['n_layers_src'],
                nlayers_trg=self.config['model']['n_layers_trg'],
                dropout=0.,
            ).cuda()

        elif self.config['model']['seq2seq'] == 'attention':
            print 'Loading Seq2Seq Attention model'

            self.model = Seq2SeqAttention(
                src_emb_dim=self.config['model']['dim_word_src'],
                trg_emb_dim=self.config['model']['dim_word_trg'],
                src_vocab_size=len(self.src_dict),
                trg_vocab_size=len(self.tgt_dict),
                src_hidden_dim=self.config['model']['dim'],
                trg_hidden_dim=self.config['model']['dim'],
                ctx_hidden_dim=self.config['model']['dim'],
                attention_mode='dot',
                batch_size=self.config['data']['batch_size'],
                bidirectional=self.config['model']['bidirectional'],
                pad_token_src=self.src_dict['<pad>'],
                pad_token_trg=self.tgt_dict['<pad>'],
                nlayers=self.config['model']['n_layers_src'],
                nlayers_trg=self.config['model']['n_layers_trg'],
                dropout=0.,
            ).cuda()

        self.model.load_state_dict(torch.load(
            open(self.model_weights)
        ))
        print "Model loaded"
Exemple #2
0
vocab_length_source = len(sourceIndexer)
print("Source vocabulary size: {}".format(vocab_length_source))
vocab_length_target = len(targetIndexer)
print("Target vocabulary size: {}".format(vocab_length_target))

dev_data = list(read_dataset(dev_source_file, dev_target_file))
test_data = list(read_dataset(test_source_file, test_target_file))

num_layer = 1
embedding_size = 128
hidden_size = 64
att_size = 32

# start Dynet and define trainer
model = Seq2SeqAttention(vocab_length_source, vocab_length_target,
                         targetIndexer, targetDictionnary, num_layer,
                         embedding_size, hidden_size, att_size)


def train(train_data, log_writer):
    random.shuffle(train_data)
    train_words, train_loss = 0, 0.0
    start = time.time()

    for sent_id, sent in enumerate(train_data):
        input_sent, output_sent = sent[0], sent[1]
        loss = model.forward(input_sent, output_sent, mode="train")
        train_loss += loss.value()
        train_words += len(sent[1][1:])
        model.backward_and_update(loss)
Exemple #3
0
args.max_length = 25            # Maximum number of words in a title


print("\nParameters:")
for attr, value in sorted(args.__dict__.items()):
    print("\t{}={}".format(attr.upper(), value))


# Load dataset
abstracts, titles = get_splitted_data()

args.src_vocab_size = 100
args.trg_vocab_size = 100
args.bidirectional = False



loss_criterion = nn.CrossEntropyLoss()

model = Seq2SeqAttention(args)
optimizer = optim.Adam(model.parameters(), lr=args.lr)

decoder_logit = model(input_lines_src, input_lines_trg)
optimizer.zero_grad()

loss = loss_criterion(decoder_logit.contiguous().view(-1, vocab_size),
                      output_lines_trg.view(-1))
losses.append(loss.data[0])
loss.backward()
optimizer.step()
Exemple #4
0
        dropout=0.,
    )
    if torch.cuda.is_available():
        model.cuda()

elif config['model']['seq2seq'] == 'attention':

    model = Seq2SeqAttention(
        src_emb_dim=config['model']['dim_word_src'],
        trg_emb_dim=config['model']['dim_word_trg'],
        src_vocab_size=src_vocab_size,
        trg_vocab_size=trg_vocab_size,
        src_hidden_dim=config['model']['dim'],
        trg_hidden_dim=config['model']['dim'],
        ctx_hidden_dim=config['model']['dim'],
        attention_mode='dot',
        batch_size=batch_size,
        bidirectional=config['model']['bidirectional'],
        pad_token_src=src['word2id']['<pad>'],
        pad_token_trg=trg['word2id']['<pad>'],
        nlayers=config['model']['n_layers_src'],
        nlayers_trg=config['model']['n_layers_trg'],
        dropout=0.,
        attention_method=config['model']['attention_method'])
    if torch.cuda.is_available():
        model.cuda()

elif config['model']['seq2seq'] == 'fastattention':

    model = Seq2SeqFastAttention(
        src_emb_dim=config['model']['dim_word_src'],
    print('Reading test data...')
    BATCH_SIZE = 128
    _, X_test = ptb(section='wsj_23', directory='data/', column=0)
    _, y_test = ptb(section='wsj_23', directory='data/', column=1)
    X_test, y_test = sort_by_len(X_test, y_test)
    X_test_raw, _ = batch(X_test, batch_size=BATCH_SIZE, mask='<mask>') 
    y_test_raw, _ = batch(y_test, batch_size=BATCH_SIZE, mask='<mask>')
    X_test_seq, word_to_n, n_to_word = text_to_sequence(X_test, in_vocab)
    y_test_seq, _, _ = text_to_sequence(y_test, out_vocab)
    X_test_seq, X_test_masks = batch(X_test_seq, batch_size=BATCH_SIZE, mask=len(in_vocab)-1)
    y_test_seq, y_test_masks = batch(y_test_seq, batch_size=BATCH_SIZE, mask=len(in_vocab)-1)
    print('Done.')

    print('Building model...')
    collection = dy.ParameterCollection()
    seq2seq = Seq2SeqAttention(collection, len(in_vocab), len(out_vocab))
    print('Done.')

    print('Loading model...')
    RUN = 'runs/baseline'
    checkpoint = os.path.join(RUN, 'baseline.model')
    print('Loading from %s.' % checkpoint)
    collection.populate(checkpoint)
    print('Done.')

    print('Testing...')
    loss = 0.
    correct_toks = 0.
    total_toks = 0.

    test = open(os.path.join(RUN, 'test'), 'wt')
            'val': valid_data,
            'test': test_data,
        })
        data_batch = data_iterator.get_next()

        source_vocab = training_data.source_vocab
        target_vocab = training_data.target_vocab

        model = Seq2SeqAttention(
            src_emb_dim=model_config.embdim,
            trg_emb_dim=model_config.embdim,
            src_vocab_size=source_vocab.size,
            trg_vocab_size=target_vocab.size,
            src_hidden_dim=model_config.dim,
            trg_hidden_dim=model_config.dim,
            ctx_hidden_dim=model_config.dim,
            attention_mode='dot',
            batch_size=data_config.training_data_hparams['batch_size'],
            bidirectional=model_config.bidir,
            pad_token_src=int(source_vocab.pad_token_id),
            pad_token_trg=int(target_vocab.pad_token_id),
            nlayers=model_config.nlayerssrc,
            nlayers_trg=model_config.nlayerstgt,
            dropout=train_config.dropout).to(device)

    criterion_cross_entropy = nn.CrossEntropyLoss(
        ignore_index=int(target_vocab.pad_token_id))
    criterion_bleu = mBLEU(train_config.max_order)

    step = 0

    def _load_model(epoch, step=None):