Beispiel #1
0
def _main():
    data_manager = DataManager()
    vocab_size = len(data_manager.word2ix)
    model = BiLSTM_CRF(device, vocab_size, data_manager.tag2ix, EMBEDDING_DIM, HIDDEN_DIM)
    model = model.to(device)

    train_set = NerDataset(data_manager.train_sents, data_manager.train_tags)
    dev_set = NerDataset(data_manager.dev_sents, data_manager.dev_tags)
    train_loader = DataLoader(train_set, batch_size=BATCH_SZ, shuffle=True)
    dev_loader = DataLoader(dev_set, batch_size=BATCH_SZ, shuffle=True)

    optimizer = optim.Adam(model.parameters(), lr=0.01)
    epoch_loss = []

    '''with torch.no_grad():
        precheck_sent = to_tensor(train_loader[0])
        precheck_tag = to_tensor(dataset.train_tags[0])
        print(precheck_tag)
        print(model(precheck_sent))'''

    for epoch in range(EPOCH_NUM):
        for sents, tags, lengths in tqdm(train_loader):
            sents = sents.to(device)
            tags = tags.to(device)
            lengths = lengths.to(device)
            # print(lengths, sents.size(), tags.size())
            loss = model.neg_log_likelihood(sents, tags, lengths)

            epoch_loss.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(epoch, ' epoch loss: ', sum(epoch_loss)/len(epoch_loss))
        save_model(model, epoch)
        eval(model, dev_loader)
Beispiel #2
0
def train_and_val():
    embedding_dim = 100
    hidden_dim = 100
    model_load_path = None
    best_model_save_path = 'model/model_100_best_0223.pth'
    max_score = 0
    stop_epoch = 30
    unimprove_time = 0
    val_json_path = '/home/agwave/Data/resume/val_0222.json'
    val_pdf_dir = '/home/agwave/Data/resume/val_0222/'

    training_data = get_data_from_data_txt(TRAIN_WORD_TO_TAG_PATH)
    with open('supporting_document/train_word_to_tag_0223.json', 'r') as j:
        word_to_ix = json.load(j)
    tag_to_ix = {'b-name': 0, 'i-name': 1, 'b-bir': 2, 'i-bir': 3, 'b-gend': 4, 'i-gend': 5,
                 'b-tel': 6, 'i-tel': 7, 'b-acad': 8, 'i-acad': 9, 'b-nati': 10, 'i-nati': 11,
                 'b-live': 12, 'i-live': 13, 'b-poli': 14, 'i-poli': 15, 'b-unv': 16, 'i-unv': 17,
                 'b-comp': 18, 'i-comp': 19, 'b-work': 20, 'i-work': 21, 'b-post': 22, 'i-post': 23,
                 'b-proj': 24, 'i-proj': 25, 'b-resp': 26, 'i-resp': 27, 'b-degr': 28, 'i-degr': 29,
                 'b-grti': 30, 'i-grti': 31, 'b-woti': 32, 'i-woti': 33, 'b-prti': 34, 'i-prti': 35,
                 'o': 36, '<start>': 37, '<stop>': 38}
    model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, embedding_dim, hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    start_epoch = 0
    if model_load_path != None:
        print('load model...')
        checkpoint = torch.load(model_load_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
    preliminary_score = get_score_by_model(model, val_json_path, val_pdf_dir)
    print('preliminary score:', preliminary_score)

    for epoch in range(start_epoch, stop_epoch):
        print("---------------------")
        print("running epoch : ", epoch)
        start_time = time.time()
        for sentence, tags in tqdm(training_data):
            model.zero_grad()
            sentence_in = prepare_sequence(sentence, word_to_ix)
            targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long)
            loss = model.neg_log_likelihood(sentence_in, targets)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()
        cur_epoch_score = get_score_by_model(model, val_json_path, val_pdf_dir)
        print('score', cur_epoch_score)
        print('running time:', time.time() - start_time)
        if cur_epoch_score > max_score:
            unimprove_time = 0
            max_score = cur_epoch_score
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'epoch': epoch
            }, best_model_save_path)
            print('save best model successfully.')
        else:
            break
Beispiel #3
0
def train_all_data():
    embedding_dim = 100
    hidden_dim = 100
    stop_epoch = 1
    model_1_epoch = 'model/model_1_epoch_lr0001.pth'

    training_data = get_data_from_data_txt(DATA_PERFECT_PATH)
    word_to_ix = get_word_to_ix(training_data, min_word_freq=1)
    tag_to_ix = {'b-name': 0, 'i-name': 1, 'b-bir': 2, 'i-bir': 3, 'b-gend': 4, 'i-gend': 5,
                 'b-tel': 6, 'i-tel': 7, 'b-acad': 8, 'i-acad': 9, 'b-nati': 10, 'i-nati': 11,
                 'b-live': 12, 'i-live': 13, 'b-poli': 14, 'i-poli': 15, 'b-unv': 16, 'i-unv': 17,
                 'b-comp': 18, 'i-comp': 19, 'b-work': 20, 'i-work': 21, 'b-post': 22, 'i-post': 23,
                 'b-proj': 24, 'i-proj': 25, 'b-resp': 26, 'i-resp': 27, 'b-degr': 28, 'i-degr': 29,
                 'b-grti': 30, 'i-grti': 31, 'b-woti': 32, 'i-woti': 33, 'b-prti': 34, 'i-prti': 35,
                 'o': 36, '<start>': 37, '<stop>': 38, 'c-live': 39, 'c-proj': 40, 'c-woti': 41,
                 'c-post': 42, 'c-unv': 43, 'c-nati': 44, 'c-poli': 45, 'c-prti':46, 'c-comp': 47}

    model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, embedding_dim, hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Make sure prepare_sequence from earlier in the LSTM section is loaded
    for epoch in range(
            stop_epoch):  # again, normally you would NOT do 300 epochs, it is toy data
        print("---------------------")
        print("running epon : ", epoch + 1)
        start_time = time.time()
        for sentence, tags in tqdm(training_data):
            model.zero_grad()
            sentence_in = prepare_sequence(sentence, word_to_ix)
            targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long)
            loss = model.neg_log_likelihood(sentence_in, targets)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 15)
            optimizer.step()
        cur_epoch_score = get_score_by_model(model, TRAIN_JSON_PATH, TRAIN_PDF_DIR)
        print('score', cur_epoch_score)
        print('running time:', time.time() - start_time)
        print()
        if epoch == stop_epoch:
            torch.save({
                'model_state_dict': model.state_dict()
            }, model_1_epoch)
Beispiel #4
0
        sentence_char_lengths, sentence_char_position_map, str_words, unaligned_tags = \
            generate_mini_batch_input(train_set, mini_batch_idx, mappings, char_mode)

        if use_gpu:
            sentence_masks = sentence_masks.to(device)
            words = words.to(device)
            chars = chars.to(device)
            tags = tags.to(device)
            sentence_char_lengths = sentence_char_lengths.to(device)

        start_train = datetime.now()

        model.zero_grad()

        neg_log_likelihood = model.neg_log_likelihood(
            words, sentence_masks, tags, chars, sentence_char_lengths,
            sentence_char_position_map, device)
        neg_log_likelihood.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
        optimizer.step()

        end_train = datetime.now()
        train_time += (end_train - start_train).total_seconds()

        loss = neg_log_likelihood.data.item()
        log = "epoch {0} batch {1}/{2} loss {3}".format(
            epoch + 1, batch_i + 1, batch_count, loss)
        # print(log)
        full_logs.append(log)

    print("train_time:{}".format(train_time))
Beispiel #5
0
                chars2_mask[i, :chars2_length[i]] = c
            chars2_mask = Variable(torch.LongTensor(chars2_mask))
        # else:
        #     chars2_mask = Variable(torch.LongTensor(0))
        #     chars2_length= []

        targets = torch.LongTensor(tags)
        caps = Variable(torch.LongTensor(data['caps']))
        gaz_ids = Variable(torch.LongTensor(data['gaz_ids']))
        # print (data['pos_ids'])
        pos_ids = Variable(torch.LongTensor(data['pos_ids']))
        pre_ids = Variable(torch.LongTensor(data['pre_ids']))
        suf_ids = Variable(torch.LongTensor(data['suf_ids']))
        if use_gpu:
            neg_log_likelihood = model.neg_log_likelihood(
                sentence_in.cuda(), targets.cuda(), chars2_mask.cuda(),
                caps.cuda(), chars2_length, d, gaz_ids.cuda(), pos_ids.cuda(),
                pre_ids.cuda(), suf_ids.cuda())
        else:
            neg_log_likelihood = model.neg_log_likelihood(
                sentence_in, targets, chars2_mask, caps, chars2_length, d,
                gaz_ids, pos_ids, pre_ids, suf_ids)
        loss += neg_log_likelihood.data[0] / len(data['words'])
        neg_log_likelihood.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 5.0)
        optimizer.step()

        if count % plot_every == 0:
            loss /= plot_every
            print(count, ': ', loss)
            if losses == []:
                losses.append(loss)
Beispiel #6
0
        # ######## char cnn
        if parameters['char_mode'] == 'CNN':
            d = {}
            chars2_length = [len(c) for c in chars2]
            char_maxl = max(chars2_length)
            chars2_mask = np.zeros((len(chars2_length), char_maxl),
                                   dtype='int')
            for i, c in enumerate(chars2):
                chars2_mask[i, :chars2_length[i]] = c
            chars2_mask = Variable(torch.LongTensor(chars2_mask))

        targets = torch.LongTensor(tags)
        caps = Variable(torch.LongTensor(data['caps']))
        if use_gpu:
            neg_log_likelihood = model.neg_log_likelihood(
                sentence_in.cuda(), targets.cuda(), chars2_mask.cuda(),
                caps.cuda(), chars2_length, d)
        else:
            neg_log_likelihood = model.neg_log_likelihood(
                sentence_in, targets, chars2_mask, caps, chars2_length, d)
        loss += neg_log_likelihood.data[0] / len(data['words'])
        neg_log_likelihood.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 5.0)
        optimizer.step()

        if count % plot_every == 0:
            loss /= plot_every
            print(count, ': ', loss)
            if losses == []:
                losses.append(loss)
            losses.append(loss)
# Make sure prepare_sequence from earlier in the LSTM section is loaded
for epoch in range(
        300):  # again, normally you would NOT do 300 epochs, it is toy data
    for sentence, tags in training_data:
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Get our inputs ready for the network, that is,
        # turn them into Tensors of word indices.
        sentence_in = prepare_sequence(sentence, word_to_ix)  # sentence id序列
        targets = torch.tensor([tag_to_ix[t] for t in tags],
                               dtype=torch.long)  # tags id序列

        # Step 3. Run our forward pass.
        loss = model.neg_log_likelihood(sentence_in, targets)

        # Step 4. Compute the loss, gradients, and update the parameters by
        # calling optimizer.step()
        loss.backward()
        optimizer.step()

# Check predictions after training
with torch.no_grad():
    # sentence id序列
    precheck_sent = prepare_sequence(training_data[0][0], word_to_ix)
    print('预测后score和tags id序列:')
    print(model(precheck_sent))
    print('对应的真实tags id序列:')
    print(precheck_tags)
Beispiel #8
0
                       HIDDEN_DIM).to(device)

    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
    best_f1 = -1
    if args.do_train:
        for epoch in range(epochs):
            for i, batch in enumerate(train_data_loader):
                model.zero_grad()
                sents, labs, lens = batch
                sents = pad_sequence(sents, batch_first=True).to(device)
                labs = pad_sequence(labs, batch_first=True).to(device)
                lens = torch.tensor(lens).to(device)
                lens, idx = torch.sort(lens, descending=True)
                sents = sents[idx]
                labs = labs[idx]
                loss = model.neg_log_likelihood(sents, labs, lens)
                loss.backward()
                optimizer.step()
                score, preds = model(sents, lens)
                true_labs = [
                    seqid2text(labs[i, :l], ix_to_lab)
                    for i, l in enumerate(lens)
                ]
                pred_labs = [
                    seqid2text(preds[i, :l], ix_to_lab)
                    for i, l in enumerate(lens)
                ]
                acc = accuracy_score(true_labs, pred_labs)
                f1 = f1_score(true_labs, pred_labs)
                print(
                    "Epoch {}, batch {}, train loss {:.4f}, train acc {:.4f}, train f1 {:.4f} "
Beispiel #9
0
def train(conf):
    train_sentences = load_sentences(conf.train_file, conf.zeros)
    dev_sentences = load_sentences(conf.dev_file, conf.zeros)
    test_sentences = load_sentences(conf.test_file, conf.zeros)

    dico_chars_train = char_mapping(train_sentences, conf.lower)[0]
    dico_chars, char_to_id, id_to_char = augment_with_pretrained(
        dico_chars_train.copy(), conf.emb_file,
        list(
            itertools.chain.from_iterable([[w[0] for w in s]
                                           for s in test_sentences])))
    _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 conf.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               conf.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                conf.lower)

    #loading word embeddings
    all_word_embeds = {}
    for i, line in enumerate(codecs.open(conf.emb_file, 'r', 'utf-8')):
        s = line.strip().split()
        if len(s) == conf.embedding_dim + 1:
            all_word_embeds[s[0]] = np.array([float(i) for i in s[1:]])
    word_embeds_dict = np.random.uniform(-np.sqrt(0.06), np.sqrt(0.06),
                                         (len(char_to_id), conf.embedding_dim))
    for w in char_to_id:
        if w in all_word_embeds:
            word_embeds_dict[char_to_id[w]] = all_word_embeds[w]
        elif w.lower() in all_word_embeds:
            word_embeds_dict[char_to_id[w]] = all_word_embeds[w.lower()]
    print('Loaded %i pretrained embeddings.' % len(all_word_embeds))

    train_manager = BatchManager(train_data, conf.batch_size)

    model = BiLSTM_CRF(conf, tag_to_id, char_to_id, word_embeds_dict)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=conf.learning_rate,
                                weight_decay=1e-4)
    epoch = conf.epochs
    dev_f1_ = 0
    for epoch in range(1, epoch + 1):
        print(f'train on epoch {epoch}')
        j = 1
        for batch in train_manager.iter_batch(shuffle=True):
            batch_loss = 0.0
            sentences = batch[1]
            tags = batch[-1]
            for i, index in enumerate(np.random.permutation(len(sentences))):
                model.zero_grad()
                sentence_in = sentences[index]
                tags_in = tags[index]
                loss = model.neg_log_likelihood(sentence_in, tags_in)
                loss.backward()
                optimizer.step()
                batch_loss += loss.data
            print(
                f'[batch {j},batch size:{conf.batch_size}] On this batch loss: {batch_loss}'
            )
            j = j + 1
        print(f'Begin validing result on [epoch {epoch}] valid dataset ...')
        dev_results = get_predictions(model, dev_data, id_to_tag)
        dev_f1 = evaluate_ner(dev_results, conf)
        if dev_f1 > dev_f1_:
            torch.save(model, conf.model_file)
            print('save model success.')
        test_results = get_predictions(model, test_data, id_to_tag)
        test_f1 = evaluate_ner(test_results, conf)
        print(f'[epoch {epoch}] On test dataset] f1: {test_f1:3f}')
Beispiel #10
0
        model.zero_grad()
        words = data['words']
        words1 = data['words1']
        words2 = data['words2']
        words3 = data['words3']
        tags = data['tags']

        words = Variable(torch.LongTensor(words))
        words1 = Variable(torch.LongTensor(words1))
        words2 = Variable(torch.LongTensor(words2))
        words3 = Variable(torch.LongTensor(words3))
        targets = torch.LongTensor(tags)

        if use_gpu:
            neg_log_likelihood = model.neg_log_likelihood(
                words.cuda(), words1.cuda(), words2.cuda(), words3.cuda(),
                targets.cuda())
        else:
            neg_log_likelihood = model.neg_log_likelihood(
                words, words1, words2, words3, targets)
        loss += neg_log_likelihood.data[0] / len(data['words'])
        neg_log_likelihood.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 5.0)
        optimizer.step()

        if i % plot_every == 0:
            loss /= plot_every
            print("%i, cost average: %f, %i/%i epoch" %
                  (i, loss, epoch, n_epoch))
            if losses == []:
                losses.append(loss)

model = BiLSTM_CRF(args, label2idx, weight,device).to(device)
optimizer = optim.Adam(filter(lambda p:p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.weight_decay)

best_f1 = 0.0
print('training on ',device)
for epoch in range(1):
    model.train()
    train_loss_sum = 0.0
    steps = 0
    for batch in train_iter:
        X, y = batch.TEXT, batch.LABEL
        X, y = X.to(device).long(), y.to(device).long()

        loss = model.neg_log_likelihood(X, y)

        optimizer.zero_grad()
        loss.backward()
        # 梯度裁剪
        nn.utils.clip_grad_norm_(filter(lambda p:p.requires_grad,model.parameters()))
        optimizer.step()

        train_loss_sum += loss
        steps+=1

        # 测试
        if steps % test_per_step ==0:
            model.eval()
            valid_loss_sum = 0.0
            m = 0
Beispiel #12
0
            mix_data = train_data + dev_data
            data = mix_data[index]
            model.zero_grad()

            sentence_in = data['words']
            sentence_in = Variable(torch.LongTensor(sentence_in))
            tags = data['tags']
            semroles = data['semroles']

            targets = torch.LongTensor(tags)
            semroles = Variable(torch.LongTensor(semroles))
            ELMo = Variable(torch.FloatTensor(emlo_traddde[index]))
            Topic = Variable(torch.FloatTensor(embed_traddde[index]))

            if use_gpu:
                neg_log_likelihood = model.neg_log_likelihood(sentence_in.cuda(), targets.cuda(),
							  semroles.cuda(), Topic.cuda(), ELMo.cuda())
            else:
                neg_log_likelihood = model.neg_log_likelihood(sentence_in, targets,
							  semroles, Topic, ELMo)

            loss += neg_log_likelihood.data[0] / len(data['words'])
            neg_log_likelihood.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 5.0)
            optimizer.step()

            if count % plot_every == 0:
                loss /= plot_every
                print(count, ': ', loss)
                if losses == []:
                    losses.append(loss)
                losses.append(loss)
Beispiel #13
0
    model.train(True)
    for epoch in range(1, n_epoch + 1):
        start_epoch_tim = time.time()
        epoch_costs = []
        for i, index in enumerate(np.random.permutation(len(train_data))):
            tr = time.time()
            count += 1
            data = train_data[index]
            model.zero_grad()
            sentence_in = data['words']
            tags = data['tags']
            sentence_in = Variable(torch.LongTensor(sentence_in))
            targets = torch.LongTensor(tags)
            caps = Variable(torch.LongTensor(data['caps']))
            if use_gpu:
                neg_log_likelihood = model.neg_log_likelihood(
                    sentence_in.cuda(), targets.cuda(), caps.cuda())
            else:
                neg_log_likelihood = model.neg_log_likelihood(
                    sentence_in, targets, caps)
            loss += neg_log_likelihood.data[0] / len(data['words'])
            neg_log_likelihood.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 5.0)
            optimizer.step()

            if i % plot_every == 0:
                loss /= plot_every
                print("%i, cost average: %f, %i/%i epoch" %
                      (i, loss, epoch, n_epoch))
                if losses == []:
                    losses.append(loss)
                losses.append(loss)
Beispiel #14
0
                        d[j] = a
                        continue
            chars2_length = [len(c) for c in chars2_sorted]
            char_maxl = max(chars2_length)
            chars2_mask = np.zeros((len(chars2_sorted), char_maxl), dtype = 'int')
            for a, c in enumerate(chars2_sorted):
                chars2_mask[a, : chars2_length[a]] = c

            #Transform list data form to Varialble(torch.LongTensor) data form
            chars2_mask = Variable(torch.LongTensor(chars2_mask))
            sentence_in = Variable(torch.LongTensor(sentence_in))

            # sentence_character_ids = Variable(sentence_character_ids)
            targets = torch.LongTensor(tags)
            if use_gpu:
                neg_log_likelihood = model.neg_log_likelihood(sentence_in.cuda(), sentence_character_ids.cuda(),targets.cuda(), 
                    chars2_mask.cuda(), chars2_length, d)
            else:
                neg_log_likelihood = model.neg_log_likelihood(sentence_in, sentence_character_ids, targets, chars2_mask, chars2_length, d)

            loss += neg_log_likelihood.data / len(data['words'])
            neg_log_likelihood.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()

            if i % plot_every == 0:
                loss /= plot_every
                print("%i, cost average: %f, %i/%i epoch" %(i, loss, epoch, n_epoch))
                if losses == []:
                    losses.append(loss)
                losses.append(loss)
                text = '<p>' + '</p><p>'.join([str(l) for l in losses[-9:]]) + '</p>'
Beispiel #15
0
    batch_generator = generate_conll_batches(dataset,
                                             batch_size=args.batch_size,
                                             device=args.device)
    train_loss_sum = 0
    train_batch_size = 0
    valid_loss_sum = 0
    valid_batch_size = 0

    running_loss = 0
    model.train()
    for batch_index, batch_dict in enumerate(batch_generator):
        optimizer.zero_grad()
        current_batch_size = len(batch_dict["token_vec"])

        batch_loss = model.neg_log_likelihood(batch_dict["token_vec"],
                                              batch_dict["tag_vec"],
                                              batch_dict["seq_len"])  # [b_s]
        # batch_loss = model(batch_dict["token_vec"],
        #                                           batch_dict["tag_vec"],
        #                                           batch_dict["seq_len"])  # [b_s]
        loss = batch_loss.mean()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=args.clip_max_norm)
        optimizer.step()
        train_loss_sum += batch_loss.sum().item()
        train_batch_size += current_batch_size
        running_loss = train_loss_sum / train_batch_size

        train_bar.set_postfix(loss=running_loss, epoch=epoch_index)
        train_bar.update()
Beispiel #16
0
import torch.optim as optim
from dataset import Dataset
from model import BiLSTM_CRF

# torch.set_default_tensor_type('torch.cuda.FloatTensor')

epochs = 100
dataset = Dataset()
train_loader = dataset.get_train_loader(1)
model = BiLSTM_CRF(dataset.get_vocab_size(), dataset.get_label_index_dict(),
                   128, 128)

optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

model.train()
for epoch in range(epochs):
    for iter, batch in enumerate(train_loader):
        sentence_in, targets = batch.line, batch.label

        sentence_in = sentence_in.permute([1, 0]).reshape(-1).contiguous()
        targets = targets.permute([1, 0]).reshape(-1).contiguous()

        model.zero_grad()
        loss = model.neg_log_likelihood(sentence_in.squeeze(-1),
                                        targets.squeeze(-1)) / len(sentence_in)

        loss.backward()
        optimizer.step()

        print("{}-{}: {:.5f}".format(epoch, iter, loss.item()))
Beispiel #17
0
            context_bef = data['context_bef']
            context_aft = data['context_aft']

            targets = torch.LongTensor(tags)
            semroles = Variable(torch.LongTensor(semroles))
            context_bef = Variable(torch.LongTensor(context_bef))
            context_aft = Variable(torch.LongTensor(context_aft))
            ELMo = Variable(torch.FloatTensor(elmo_traddde[index]))
            ELMo_ConBef = Variable(
                torch.FloatTensor(elmo_conbef_traddde[index]))
            ELMo_ConAft = Variable(
                torch.FloatTensor(elmo_conaft_traddde[index]))

            if use_gpu:
                neg_log_likelihood = model.neg_log_likelihood(
                    sentence_in.cuda(), targets.cuda(), semroles.cuda(),
                    context_bef.cuda(), context_aft.cuda(), ELMo.cuda(),
                    ELMo_ConBef.cuda(), ELMo_ConAft.cuda())
            else:
                neg_log_likelihood = model.neg_log_likelihood(
                    sentence_in, targets, semroles, context_bef, context_aft,
                    ELMo, ELMo_ConBef, ELMo_ConAft)

            loss += neg_log_likelihood.data[0] / len(data['words'])
            neg_log_likelihood.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 5.0)
            optimizer.step()

            if count % plot_every == 0:
                loss /= plot_every
                print(count, ': ', loss)
                if losses == []: