Python BiLSTM_CRF.zero_grad Examples

Programming Language: Python

Namespace/Package Name: model.bilstmcrf

Class/Type: BiLSTM_CRF

Method/Function: zero_grad

Examples at hotexamples.com: 6

Python BiLSTM_CRF.zero_grad - 6 examples found. These are the top rated real world Python examples of model.bilstmcrf.BiLSTM_CRF.zero_grad extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

train(6)

zero_grad(5)

neg_log_likelihood_loss(5)

parameters(5)

load_state_dict(5)

state_dict(4)

BiLSTM_CRF(2)

cuda(1)

neg_log_likelihood_loss_general(1)

neg_log_likelihood_loss_ner(1)

add_instance(1)

readd_instance(1)

reinforment_reward(1)

reinforment_supervised(1)

sample_train(1)

topk(1)

crf_loss(1)

Example #1

Show file

def train(data, save_model_dir, seg=True):
    model = SeqModel(data)
    # print "finished built model."
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum)
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" %(idx,data.HP_iteration))
        # random.shuffle(data.index_data)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = 1 ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights
        train_num = len(data.index_data)
        total_batch = train_num//batch_size+1
        for batch_id in range(total_batch):
            start = batch_id*batch_size
            end = (batch_id+1)*batch_size 
            if end >train_num:
                end = train_num
            instance = data.index_data[start:end]
            if not instance:
                continue
            gaz_list, batch_word, batch_entity, batch_gloss, batch_label, mask = batchify_with_label(instance, data.HP_gpu)
            loss,predict = model.neg_log_likelihood_loss(gaz_list,batch_word,batch_entity,batch_gloss,batch_label, mask)
            # print(predict)
            print("Batch_Id:",batch_id," Loss:",loss)
            loss.backward()
            optimizer.step()
            model.zero_grad()
            predict_check(predict, batch_label, mask)
            evaluate_result(predict, batch_label, mask)
        print("\n")

Example #2

Show file

File: main.py Project: chongp/Name-Entity-Recognition

def train(data, save_model_dir,save_data_set, seg=True):
    print ("Training model...")
    data.show_data_summary()
    #save_data_name = save_data_set
    #save_data_setting(data, save_data_name)

    model = SeqModel(data)
    print ("finished built model.")
    loss_function = nn.NLLLoss()
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum)
    best_dev = -1
    data.HP_iteration = 100
    ## start training data.HP_iteration
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" %(idx,data.HP_iteration))
        optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        batch_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = 10 ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights
        train_num = len(data.train_Ids)
        total_batch = train_num//batch_size+1
        for batch_id in range(total_batch):
            start = batch_id*batch_size
            end = (batch_id+1)*batch_size 
            if end >train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            gaz_list,  batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask  = batchify_with_label(instance, data.HP_gpu)
            # print "gaz_list:",gaz_list
            # exit(0)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(gaz_list, batch_word, batch_biword, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]
            batch_loss += loss

            if end%500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print("Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))
                sys.stdout.flush()
                sample_loss = 0
            if end%data.HP_batch_size == 0:
                batch_loss.backward()
                optimizer.step()
                model.zero_grad()
                batch_loss = 0
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))       
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss))
        # exit(0)
        # continue
        speed, acc, p, r, f, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            current_score = f
            print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(dev_cost, speed, acc))

        if current_score > best_dev:
            if seg:
                print ("Exceed previous best f score:", best_dev)
            else:
                print ("Exceed previous best acc score:", best_dev)
            model_name = save_model_dir +'.'+ str(idx) + ".model"
            torch.save(model.state_dict(), model_name)
            best_dev = current_score 
        # ## decode test
        speed, acc, p, r, f, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if seg:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc))
        gc.collect()

Example #3

Show file

File: main.py Project: zxsted/LatticeLSTM

def train(data, save_model_dir, seg=True):
    print "Training model..."
    data.show_data_summary()
    save_data_name = save_model_dir + ".dset"
    save_data_setting(data, save_data_name)
    model = SeqModel(data)
    print "finished built model."
    loss_function = nn.NLLLoss()
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum)
    best_dev = -1
    data.HP_iteration = 100
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        batch_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = 1  ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu)
            # print "gaz_list:",gaz_list
            # exit(0)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                gaz_list, batch_word, batch_biword, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]
            batch_loss += loss

            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                sys.stdout.flush()
                sample_loss = 0
            if end % data.HP_batch_size == 0:
                batch_loss.backward()
                optimizer.step()
                model.zero_grad()
                batch_loss = 0
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        # exit(0)
        # continue
        speed, acc, p, r, f, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if seg:
                print "Exceed previous best f score:", best_dev
            else:
                print "Exceed previous best acc score:", best_dev
            model_name = save_model_dir + '.' + str(idx) + ".model"
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        # ## decode test
        speed, acc, p, r, f, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if seg:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                  (test_cost, speed, acc))
        gc.collect()

Example #4

Show file

File: main.py Project: catnlp/CatNER

def train(data, save_model_dir, seg=True):
    print "Training model..."
    data.show_data_summary()
    save_data_name = save_model_dir + ".dset"
    save_data_setting(data, save_data_name)
    model = SeqModel(data)
    # model = torch.nn.DataParallel(model, device_ids=[1, 2, 3, 0]) ## catner
    loss_function = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr = data.HP_lr, momentum = data.HP_momentum)
    # optimizer = optim.Adam(model.parameters(), lr = data.HP_lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
    # best_dev = -1
    best_test = -1
    data.HP_iteration = 100
    vis = visdom.Visdom()
    losses = []
    all_F = [[0, 0, 0]]
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" %(idx, data.HP_iteration))
        optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)  ### catner udpate lr
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        # batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size+1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id+1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start: end]
            if not instance:
                continue
            batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask  = batchify_with_label(instance, data.HP_gpu)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token))
                sys.stdout.flush()
                losses.append(sample_loss / 50.0)
                vis.line(np.array(losses), X=np.array([i for i in range(len(losses))]),
                         win='loss', opts={'title': 'loss', 'legend': ['loss']})
                sample_loss = 0
            loss.backward()
            if data.HP_clip:
                torch.nn.utils.clip_grad_norm(model.parameters(), 50.0)
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))       
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss))
        # continue
        speed, acc, p, r, f_train, _ = evaluate(data, model, "train")

        speed, acc, p, r, f_dev, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            # current_score = f_dev
            print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(dev_cost, speed, acc, p, r, f_dev))
        else:
            # current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(dev_cost, speed, acc))

        # if current_score > best_dev:
        #     if seg:
        #         print "Exceed previous best f score:", best_dev
        #     else:
        #         print "Exceed previous best acc score:", best_dev
        #     model_name = save_model_dir +'.'+ str(idx) + ".model"
        #     torch.save(model.state_dict(), model_name)
        #     best_dev = current_score
        # ## decode test
        speed, acc, p, r, f_test, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if seg:
            current_score = f_test
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f_test))
        else:
            current_score = acc
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc))
        if current_score > best_test:
            if seg:
                print("Exceed previous best f score:", best_test)
            else:
                print ("Exceed previous best acc score:", best_test)
            model_name = save_model_dir +'/model'+ str(idx)
            torch.save(model.state_dict(), model_name)
            best_test = current_score
            with open(save_model_dir + '/eval' + str(idx) + ".txt", 'wb') as f:
                if seg:
                    f.write("acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (acc, p, r, f_test))
                else:
                    f.write("acc: %.4f" % acc)

        if seg:
            print("Current best f score:", best_test)
        else:
            print("Current best acc score:", best_test)

        all_F.append([f_train*100.0, f_dev*100.0, f_test*100.0])
        Fwin = 'F-score of {train, dev, test}'
        vis.line(np.array(all_F), win=Fwin,
                 X=np.array([i for i in range(len(all_F))]),
                 opts={'title': Fwin, 'legend': ['train', 'dev', 'test']})
        gc.collect()

Example #5

Show file

File: main.py Project: sjtu-cs222/Group_11

def train(data,
          save_model_dir,
          seg=True,
          ori_model_dir=None,
          use_attn=False,
          use_w2c=False):
    logger.info(("Training model..."))

    data.show_data_summary()
    save_data_name = save_model_dir + ".dset"
    # save_data_setting(data, save_data_name)
    model = SeqModel(data, use_attn=use_attn, use_w2c=use_w2c)
    '''i added'''
    if data.HP_gpu:
        model = model.cuda()
    logger.info(("finished built model."))

    # model.load_state_dict(torch.load(ori_model_dir))

    loss_function = nn.NLLLoss()
    parameters = [p for p in model.parameters() if p.requires_grad]
    sgd_optimizer = optim.SGD(parameters,
                              lr=data.HP_lr,
                              momentum=data.HP_momentum)
    # optimizer = optim.Adam(parameters)
    best_dev = -1
    data.HP_iteration = 100
    ## start training
    for idx in range(data.HP_iteration):
        # idx = idx + 6
        epoch_start = time.time()
        temp_start = epoch_start
        logger.info((("Epoch: %s/%s" % (idx, data.HP_iteration))))

        # if (idx < 5):
        #     optimizer = optim.Adam(parameters)
        # else:
        # optimizer = lr_decay(sgd_optimizer, idx + 5, data.HP_lr_decay, data.HP_lr)
        optimizer = lr_decay(sgd_optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        batch_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = 1  ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu)
            # logger.info(( "gaz_list:",gaz_list))
            # logger.info(())
            # exit(0)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                gaz_list, batch_word, batch_biword, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]
            batch_loss += loss

            # originally 500
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                logger.info(((
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))))

                sys.stdout.flush()
                sample_loss = 0
            if end % data.HP_batch_size == 0:
                batch_loss.backward()
                optimizer.step()
                model.zero_grad()
                batch_loss = 0
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        logger.info(
            (("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        logger.info(((
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))))
        # exit(0)
        # continue
        speed, acc, p, r, f, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            current_score = f
            logger.info(((
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))))
        else:
            current_score = acc
            logger.info((("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                          (dev_cost, speed, acc))))

        if current_score > best_dev:
            if seg:
                logger.info(("Exceed previous best f score:", best_dev))
            else:
                logger.info(("Exceed previous best acc score:", best_dev))
        if current_score > best_dev or idx == data.HP_iteration - 1:
            model_name = save_model_dir + '.' + str(idx) + ".model"
            torch.save(model.state_dict(), model_name)

            logger.info(("model name: " + model_name))

            if idx > 3:
                save_data_setting(data,
                                  save_data_name + '.' + str(idx) + '.dset')
            best_dev = current_score
            # ## decode test
        # speed, acc, p, r, f, _ = evaluate(data, model, "test")
        # test_finish = time.time()
        # test_cost = test_finish - dev_finish
        # if seg:
        #     logger.info((("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f))))
        # else:
        #     logger.info((("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc))))
        gc.collect()

Example #6

Show file

File: main.py Project: youqingxiaozhua/LatticeLSTM

# -*- coding: utf-8 -*-