Beispiel #1
0
def train(data, save_model_dir, seg=True):
    model = SeqModel(data)
    # print "finished built model."
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum)
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" %(idx,data.HP_iteration))
        # random.shuffle(data.index_data)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = 1 ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights
        train_num = len(data.index_data)
        total_batch = train_num//batch_size+1
        for batch_id in range(total_batch):
            start = batch_id*batch_size
            end = (batch_id+1)*batch_size 
            if end >train_num:
                end = train_num
            instance = data.index_data[start:end]
            if not instance:
                continue
            gaz_list, batch_word, batch_entity, batch_gloss, batch_label, mask = batchify_with_label(instance, data.HP_gpu)
            loss,predict = model.neg_log_likelihood_loss(gaz_list,batch_word,batch_entity,batch_gloss,batch_label, mask)
            # print(predict)
            print("Batch_Id:",batch_id," Loss:",loss)
            loss.backward()
            optimizer.step()
            model.zero_grad()
            predict_check(predict, batch_label, mask)
            evaluate_result(predict, batch_label, mask)
        print("\n")
Beispiel #2
0
 def __init__(self, model_dir, dset_dir, gpu, seg):
     self.model_dir = model_dir
     self.dset_dir = dset_dir
     self.data = load_data_setting(dset_dir)
     self.data.HP_gpu = gpu
     self.model = SeqModel(self.data)
     self.model.load_state_dict(torch.load(self.model_dir))
Beispiel #3
0
def load_model_decode(model_dir, data, name, gpu, seg=True):
    data.HP_gpu = gpu
    print "Load Model from file: ", model_dir
    model = SeqModel(data)
    ## load model need consider if the model trained in GPU and load in CPU, or vice versa
    # if not gpu:
    #     model.load_state_dict(torch.load(model_dir), map_location=lambda storage, loc: storage)
    #     # model = torch.load(model_dir, map_location=lambda storage, loc: storage)
    # else:
    model.load_state_dict(torch.load(model_dir))
    # model = torch.load(model_dir)

    print("Decode %s data ..." % (name))
    start_time = time.time()
    speed, acc, p, r, f, pred_results = evaluate(data, model, name)
    end_time = time.time()
    time_cost = end_time - start_time
    if seg:
        print(
            "%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
            % (name, time_cost, speed, acc, p, r, f))
    else:
        print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f" %
              (name, time_cost, speed, acc))
    return pred_results
Beispiel #4
0
def load_model(model_dir, data, gpu):
    data.HP_gpu = gpu
    print
    "Load Model from file: ", model_dir
    model = SeqModel(data)
    ## load model need consider if the model trained in GPU and load in CPU, or vice versa
    if not gpu:
        model.load_state_dict(
            torch.load(model_dir, map_location=lambda storage, loc: storage))
        # model = torch.load(model_dir, map_location=lambda storage, loc: storage)
    else:
        model.load_state_dict(torch.load(model_dir))
    # model = torch.load(model_dir)
    return model
Beispiel #5
0
def load_model_decode(save_dir, data, name, seg=True):
    logger.info("Load Model from file: " + save_dir)
    model = SeqModel(data)
    model.load_state_dict(torch.load(save_dir))
    logger.info(F"Decode {name} data ...")
    start_time = time.time()
    speed, acc, p, r, f, pred_results = evaluate(data, model, name)
    end_time = time.time()
    time_cost = end_time - start_time
    if seg:
        logger.info((
            "%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
            % (name, time_cost, speed, acc, p, r, f)))
    else:
        logger.info(("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f" %
                     (name, time_cost, speed, acc)))
    return pred_results
Beispiel #6
0
def load_model_decode(save_dir, data):
    logger.info("Load Model from file: " + save_dir)
    model = SeqModel(data)
    model.load_state_dict(torch.load(save_dir))
    logger.info(F"Decode dev data ...")
    start_time = time.time()
    speed, acc, p, r, f, pred_results = evaluate(data, model, 'dev')
    end_time = time.time()
    time_cost = end_time - start_time
    logger.info(("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%('dev', time_cost, speed, acc, p, r, f)))

    logger.info(F"Decode test data ...")
    start_time = time.time()
    speed, acc, p, r, f, pred_results = evaluate(data, model, 'test')
    end_time = time.time()
    time_cost = end_time - start_time
    logger.info(("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%('test', time_cost, speed, acc, p, r, f)))
Beispiel #7
0
def train(data, save_model_dir, seg=True):
    print "Training model..."
    data.show_data_summary()
    save_data_name = save_model_dir + ".dset"
    save_data_setting(data, save_data_name)
    model = SeqModel(data)
    print "finished built model."
    loss_function = nn.NLLLoss()
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum)
    best_dev = -1
    data.HP_iteration = 100
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        batch_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = 1  ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu)
            # print "gaz_list:",gaz_list
            # exit(0)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                gaz_list, batch_word, batch_biword, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]
            batch_loss += loss

            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                sys.stdout.flush()
                sample_loss = 0
            if end % data.HP_batch_size == 0:
                batch_loss.backward()
                optimizer.step()
                model.zero_grad()
                batch_loss = 0
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        # exit(0)
        # continue
        speed, acc, p, r, f, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if seg:
                print "Exceed previous best f score:", best_dev
            else:
                print "Exceed previous best acc score:", best_dev
            model_name = save_model_dir + '.' + str(idx) + ".model"
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        # ## decode test
        speed, acc, p, r, f, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if seg:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                  (test_cost, speed, acc))
        gc.collect()
Beispiel #8
0
def load_model_inference(model_dir, data, name, gpu, seg=True):
    data.HP_gpu = gpu
    model = SeqModel(data)
    model.load_state_dict(torch.load(model_dir))
    return inference(data, model)
Beispiel #9
0
def train(data, save_model_dir, seg=True):
    print "Training model..."
    data.show_data_summary()
    save_data_name = save_model_dir + ".dset"
    save_data_setting(data, save_data_name)
    model = SeqModel(data)
    # model = torch.nn.DataParallel(model, device_ids=[1, 2, 3, 0]) ## catner
    loss_function = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr = data.HP_lr, momentum = data.HP_momentum)
    # optimizer = optim.Adam(model.parameters(), lr = data.HP_lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
    # best_dev = -1
    best_test = -1
    data.HP_iteration = 100
    vis = visdom.Visdom()
    losses = []
    all_F = [[0, 0, 0]]
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" %(idx, data.HP_iteration))
        optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)  ### catner udpate lr
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        # batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size+1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id+1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start: end]
            if not instance:
                continue
            batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask  = batchify_with_label(instance, data.HP_gpu)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token))
                sys.stdout.flush()
                losses.append(sample_loss / 50.0)
                vis.line(np.array(losses), X=np.array([i for i in range(len(losses))]),
                         win='loss', opts={'title': 'loss', 'legend': ['loss']})
                sample_loss = 0
            loss.backward()
            if data.HP_clip:
                torch.nn.utils.clip_grad_norm(model.parameters(), 50.0)
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))       
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss))
        # continue
        speed, acc, p, r, f_train, _ = evaluate(data, model, "train")

        speed, acc, p, r, f_dev, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            # current_score = f_dev
            print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(dev_cost, speed, acc, p, r, f_dev))
        else:
            # current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(dev_cost, speed, acc))

        # if current_score > best_dev:
        #     if seg:
        #         print "Exceed previous best f score:", best_dev
        #     else:
        #         print "Exceed previous best acc score:", best_dev
        #     model_name = save_model_dir +'.'+ str(idx) + ".model"
        #     torch.save(model.state_dict(), model_name)
        #     best_dev = current_score
        # ## decode test
        speed, acc, p, r, f_test, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if seg:
            current_score = f_test
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f_test))
        else:
            current_score = acc
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc))
        if current_score > best_test:
            if seg:
                print("Exceed previous best f score:", best_test)
            else:
                print ("Exceed previous best acc score:", best_test)
            model_name = save_model_dir +'/model'+ str(idx)
            torch.save(model.state_dict(), model_name)
            best_test = current_score
            with open(save_model_dir + '/eval' + str(idx) + ".txt", 'wb') as f:
                if seg:
                    f.write("acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (acc, p, r, f_test))
                else:
                    f.write("acc: %.4f" % acc)

        if seg:
            print("Current best f score:", best_test)
        else:
            print("Current best acc score:", best_test)

        all_F.append([f_train*100.0, f_dev*100.0, f_test*100.0])
        Fwin = 'F-score of {train, dev, test}'
        vis.line(np.array(all_F), win=Fwin,
                 X=np.array([i for i in range(len(all_F))]),
                 opts={'title': Fwin, 'legend': ['train', 'dev', 'test']})
        gc.collect() 
Beispiel #10
0
def train(data, save_model_dir, seg=True):
    logger.info("Training model...")
    data.show_data_summary()
    model = SeqModel(data)
    logger.info("finished built model.")
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum)
    best_dev = -1
    data.HP_iteration = 100
    #  start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        logger.info(("Epoch: %s/%s" % (idx, data.HP_iteration)))
        optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_loss = 0
        batch_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        model.train()
        model.zero_grad()
        train_num = len(data.train_Ids)
        total_batch = train_num // data.HP_batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * data.HP_batch_size
            end = min((batch_id + 1) * data.HP_batch_size, train_num)
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                gaz_list, batch_word, batch_biword, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]
            batch_loss += loss

            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                logger.info((
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token)))
                sys.stdout.flush()
                sample_loss = 0
            if end % data.HP_batch_size == 0:
                batch_loss.backward()
                optimizer.step()
                model.zero_grad()
                batch_loss = 0
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        logger.info(
            ("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
             (end, temp_cost, sample_loss, right_token, whole_token,
              (right_token + 0.) / whole_token)))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        logger.info((
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss)))
        speed, acc, p, r, f, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            current_score = f
            logger.info((
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f)))
        else:
            current_score = acc
            logger.info(("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                         (dev_cost, speed, acc)))

        if current_score > best_dev:
            if seg:
                logger.info(F"Exceed previous best f score: {best_dev}")
            else:
                logger.info(F"Exceed previous best acc score: {best_dev}")
            model_name = save_model_dir + '.' + str(idx) + ".model"
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        # ## decode test
        # speed, acc, p, r, f, _ = evaluate(data, model, "test")
        # test_finish = time.time()
        # test_cost = test_finish - dev_finish
        # if seg:
        #     logger.info(("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f)))
        # else:
        #     logger.info(("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc)))
        gc.collect()
Beispiel #11
0
def train(data,
          save_model_dir,
          seg=True,
          ori_model_dir=None,
          use_attn=False,
          use_w2c=False):
    logger.info(("Training model..."))

    data.show_data_summary()
    save_data_name = save_model_dir + ".dset"
    # save_data_setting(data, save_data_name)
    model = SeqModel(data, use_attn=use_attn, use_w2c=use_w2c)
    '''i added'''
    if data.HP_gpu:
        model = model.cuda()
    logger.info(("finished built model."))

    # model.load_state_dict(torch.load(ori_model_dir))

    loss_function = nn.NLLLoss()
    parameters = [p for p in model.parameters() if p.requires_grad]
    sgd_optimizer = optim.SGD(parameters,
                              lr=data.HP_lr,
                              momentum=data.HP_momentum)
    # optimizer = optim.Adam(parameters)
    best_dev = -1
    data.HP_iteration = 100
    ## start training
    for idx in range(data.HP_iteration):
        # idx = idx + 6
        epoch_start = time.time()
        temp_start = epoch_start
        logger.info((("Epoch: %s/%s" % (idx, data.HP_iteration))))

        # if (idx < 5):
        #     optimizer = optim.Adam(parameters)
        # else:
        # optimizer = lr_decay(sgd_optimizer, idx + 5, data.HP_lr_decay, data.HP_lr)
        optimizer = lr_decay(sgd_optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        batch_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = 1  ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu)
            # logger.info(( "gaz_list:",gaz_list))
            # logger.info(())
            # exit(0)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                gaz_list, batch_word, batch_biword, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]
            batch_loss += loss

            # originally 500
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                logger.info(((
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))))

                sys.stdout.flush()
                sample_loss = 0
            if end % data.HP_batch_size == 0:
                batch_loss.backward()
                optimizer.step()
                model.zero_grad()
                batch_loss = 0
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        logger.info(
            (("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        logger.info(((
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))))
        # exit(0)
        # continue
        speed, acc, p, r, f, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            current_score = f
            logger.info(((
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))))
        else:
            current_score = acc
            logger.info((("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                          (dev_cost, speed, acc))))

        if current_score > best_dev:
            if seg:
                logger.info(("Exceed previous best f score:", best_dev))
            else:
                logger.info(("Exceed previous best acc score:", best_dev))
        if current_score > best_dev or idx == data.HP_iteration - 1:
            model_name = save_model_dir + '.' + str(idx) + ".model"
            torch.save(model.state_dict(), model_name)

            logger.info(("model name: " + model_name))

            if idx > 3:
                save_data_setting(data,
                                  save_data_name + '.' + str(idx) + '.dset')
            best_dev = current_score
            # ## decode test
        # speed, acc, p, r, f, _ = evaluate(data, model, "test")
        # test_finish = time.time()
        # test_cost = test_finish - dev_finish
        # if seg:
        #     logger.info((("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f))))
        # else:
        #     logger.info((("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc))))
        gc.collect()
Beispiel #12
0
def train(data, save_model_dir, save_dset_path, use_ple_lstm, seg=True, epochs=100, new_tag_scheme=False):
    print("Training model...")
    data.show_data_summary()
    save_data_setting(data, save_dset_path)
    if new_tag_scheme:  # 使用多任务标注方案
        model = PleSeqModel(data, use_ple_lstm=use_ple_lstm)
        model.to(device)
    else:
        model = SeqModel(data)
        model.to(device)
    print("finished built model.")
    loss_function = nn.NLLLoss()
    parameters = [p for p in model.parameters() if p.requires_grad]
    # optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum)
    optimizer = optim.Adam(parameters, lr=1e-3, weight_decay=1e-5)
    best_dev = -1
    data.HP_iteration = epochs
    best_model_name = None
    ## start training
    best_test_prf1 = (0, 0, 0)
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print(("Epoch: %s/%s" % (idx, data.HP_iteration)))
        # optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        batch_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = 1  ## current only support batch size = 1 to compulate and accumulate to data.HP_batch_size update weights
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            word_text = [[data.word_alphabet.get_instance(l) for l in sample[0]] for sample in instance]
            label_text = [[data.label_alphabet.get_instance(l) for l in sample[4]] for sample in instance]
            # print("="*30, 'Gold')
            # print(word_text)
            # print(len(label_text[0]), label_text)
            gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, \
                batch_span_label, batch_attr_start_label, batch_attr_end_label \
                = batchify_with_label(instance, data.HP_gpu, data.span_label_alphabet.get_index('O'),
                                      data.attr_label_alphabet.get_index(ATTR_NULLKEY))
            # print "gaz_list:",gaz_list
            # exit(0)

            instance_count += 1
            if new_tag_scheme:
                loss, span_tag_seq, attr_start_output, attr_end_output \
                    = model.neg_log_likelihood_loss(gaz_list,
                                                    batch_word,
                                                    batch_biword,
                                                    batch_wordlen,
                                                    batch_char,
                                                    batch_charlen,
                                                    batch_charrecover,
                                                    batch_span_label,
                                                    batch_attr_start_label,
                                                    batch_attr_end_label,
                                                    mask)
                tag_seq = convert_attr_seq_to_ner_seq(attr_start_output, attr_end_output, data.label_alphabet,
                                                      data.attr_label_alphabet, data.tagScheme)
            else:
                loss, tag_seq = model.neg_log_likelihood_loss(gaz_list,
                                                              batch_word,
                                                              batch_biword,
                                                              batch_wordlen,
                                                              batch_char,
                                                              batch_charlen,
                                                              batch_charrecover,
                                                              batch_label,
                                                              mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.item()
            total_loss += loss.item()
            batch_loss += loss

            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (
                    end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)))
                sys.stdout.flush()
                sample_loss = 0
            if end % data.HP_batch_size == 0:
                batch_loss.backward()
                optimizer.step()
                model.zero_grad()
                batch_loss = 0
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print(("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (
            end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token)))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s" % (
            idx, epoch_cost, train_num / epoch_cost, total_loss)))
        # exit(0)
        # continue
        speed, acc, p, r, f, _ = evaluate(data, model, "dev", new_tag_scheme)
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            current_score = f
        else:
            current_score = acc

        is_best_model = False
        if current_score > best_dev:
            if seg:
                print("Exceed previous best f score:", best_dev)
            else:
                print("Exceed previous best acc score:", best_dev)
            model_name = save_model_dir + '/latticelstm' + str(idx) + ".model"
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
            best_model_name = model_name
            is_best_model = True
            # ## decode test
        logger.info("Epoch{}, Is best model: {}".format(idx, is_best_model))
        print("Is best model: {}".format(is_best_model))
        if seg:
            print(("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (
                dev_cost, speed, acc, p, r, f)))
            logger.info(("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (
                dev_cost, speed, acc, p, r, f)))
        else:
            print(("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)))
            logger.info(("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" % (dev_cost, speed, acc)))

        if args.dataset != 'msra':
            speed, acc, p, r, f, _ = evaluate(data, model, "test", new_tag_scheme)
            test_finish = time.time()
            test_cost = test_finish - dev_finish
            if seg:
                print(("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (
                    test_cost, speed, acc, p, r, f)))
                logger.info(("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (
                    test_cost, speed, acc, p, r, f)))
            else:
                print(("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)))
                logger.info(("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" % (test_cost, speed, acc)))
        if is_best_model and seg:
            best_test_prf1 = (p, r, f)
        gc.collect()
    if best_model_name:
        shutil.copy(best_model_name, save_model_dir + '/best.model')
        logger.info(f"copy {best_model_name} as best model")
        logger.info("Test results: p: %.4f, r: %.4f, f1: %.4f" % (best_test_prf1[0], best_test_prf1[1], best_test_prf1[2]))
Beispiel #13
0
# -*- coding: utf-8 -*-
Beispiel #14
0
def train(data, save_model_dir, seg=True):
    print "Training model..."
    data.show_data_summary()
    save_data_name = save_model_dir + ".dset"
    save_data_setting(data, save_data_name)
    loss_function = nn.NLLLoss()
    model = SeqModel(data)
    #model=copy.deepcopy(premodel)
    optimizer = optim.SGD(model.examiner.parameters(),
                          lr=data.HP_lr,
                          momentum=data.HP_momentum)
    best_dev = -1
    data.HP_iteration = 5
    USE_CRF = True
    ## start training
    acc_list = []
    p_list = []
    r_list = []
    f_list = []
    map_list = []
    #random.seed(2)
    print("total", )
    data.HP_lr = 0.1
    for idx in range(1):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        total_rl_loss = 0
        total_ml_loss = 0
        total_num = 0.0
        total_reward = 0.0
        right_token_reform = 0
        whole_token_reform = 0
        #random.seed(2)
        #random.shuffle(data.train_Ids)
        #random.seed(seed_num)
        ## set model in train model
        model.examiner.train()
        model.examiner.zero_grad()
        model.topk = 5
        model.examiner.topk = 5
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        gamma = 0
        cnt = 0
        click = 0
        sum_click = 0
        sum_p_at_5 = 0.0
        sum_p = 1.0
        #if idx==0:
        #    selected_data=[batch_id for batch_id in range(0,total_batch//1000)]
        tag_mask = None
        batch_ids = [i for i in range(total_batch)]
        for batch_idx in range(0, total_batch):
            # if end%500 == 0:
            #     temp_time = time.time()
            #     temp_cost = temp_time - temp_start
            #     temp_start = temp_time
            #     print("     Instance: %s; Time: %.2fs; loss: %.4f;"%(end, temp_cost, sample_loss))
            #     sys.stdout.flush()
            #     sample_loss = 0
            #updating the crf by selected position
            batch_id = batch_ids[batch_idx]

            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue

            update_once = False

            start_time = time.time()
            #selected_data.append(batch_id)

            if batch_id == 15:

                for j in range(0, 10):
                    __tot = 0.0
                    for i in range(5, 15):
                        model.sample_train(0, i)
                        batch_id_temp = batch_ids[i]
                        start = batch_id_temp * batch_size
                        end = (batch_id_temp + 1) * batch_size
                        instance = data.train_Ids[start:end]

                        batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                            instance, data.HP_gpu)
                        real_batch_label = batch_label
                        batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss(
                            batch_word, batch_wordlen, batch_char,
                            batch_charlen, batch_charrecover, batch_label,
                            mask)

                        #_pred_label, _gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover)
                        _tag_mask = tag_mask

                        pos_mask, score = model.reinforment_supervised(
                            batch_word, batch_wordlen, batch_char,
                            batch_charlen, batch_charrecover, real_batch_label,
                            tag_seq, tag_prob, mask)
                        __tot += score.sum()

                        score.sum().backward()
                        optimizer.step()
                        model.examiner.zero_grad()

                    __tot = 0.0
                    for i in range(10, -1, -1):
                        print(i)
                        model.sample_train(i + 1, 15)
                        batch_id_temp = batch_ids[i]
                        start = batch_id_temp * batch_size
                        end = (batch_id_temp + 1) * batch_size
                        instance = data.train_Ids[start:end]

                        batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                            instance, data.HP_gpu)
                        real_batch_label = batch_label
                        batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss(
                            batch_word, batch_wordlen, batch_char,
                            batch_charlen, batch_charrecover, batch_label,
                            mask)

                        #_pred_label, _gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover)
                        _tag_mask = tag_mask

                        pos_mask, score = model.reinforment_supervised(
                            batch_word, batch_wordlen, batch_char,
                            batch_charlen, batch_charrecover, real_batch_label,
                            tag_seq, tag_prob, mask)
                        __tot += score.sum()

                        score.sum().backward()
                        optimizer.step()
                        model.examiner.zero_grad()
                    print("score", __tot / 14)
                model.train()
            if batch_id >= 15:
                t = np.random.randint(0, len(model.X_train))
                if np.random.rand() > -1 or model.tag_mask_list[t].sum(
                ).data[0] <= 5:
                    t = np.random.randint(len(model.X_train), total_batch)
                    #This is for seq choosing
                    #if batch_id>total_batch//100+100:
                    #    batch_id=batch_ids[batch_idx]
                    # tmin=-1
                    # for i in range(len(model.X_train),total_batch):
                    #     batch_id=batch_ids[i]
                    #     start = batch_id*batch_size
                    #     end = (batch_id+1)*batch_size
                    #     if end >train_num:
                    #         end = train_num
                    #     instance = data.train_Ids[start:end]
                    #     if len(instance)==0:
                    #         continue
                    #     batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask  = batchify_with_label(instance, data.HP_gpu)
                    #     batch_label,tag_seq,tag_mask,score,indices,scores_ref=model.crf_loss(batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask)
                    #     if tmin==-1 or (scores_ref.cpu().data[0]》=tmin):
                    #         tmin=scores_ref.cpu().data[0]
                    #         t=i
                    # temp=batch_ids[batch_idx]
                    # batch_ids[batch_idx]=batch_ids[t]
                    # batch_ids[t]=temp

                    batch_id = batch_ids[batch_idx]
                    start = batch_id * batch_size
                    end = (batch_id + 1) * batch_size
                    if end > train_num:
                        end = train_num
                    instance = data.train_Ids[start:end]

                    batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                        instance, data.HP_gpu)
                    real_batch_label = batch_label
                    batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss(
                        batch_word, batch_wordlen, batch_char, batch_charlen,
                        batch_charrecover, batch_label, mask)
                    model.add_instance(batch_word, batch_label, tag_mask,
                                       instance, scores_ref.data[0])

                    #pred_label, gold_label = recover_label(Variable(tag_seq.cuda()), real_batch_label.cuda(),mask.cuda(), data.label_alphabet, batch_wordrecover)

                    # u=False
                    # for x in pred_label:
                    #     if not gold_label==pred_label:
                    #         u=True
                    #         break
                    # #if u==True:
                    # print "mask", tag_mask
                    # print "gold", gold_label
                    # print "pred", pred_label

                else:
                    # tmin=model.scores_refs[t]
                    # for i in range(len(model.X_train)):
                    #     if model.scores_refs[i]<=tmin:
                    #         tmin=model.scores_refs[i]
                    #         t=i

                    instance = model.instances[t]
                    batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                        instance, data.HP_gpu)
                    real_batch_label = batch_label
                    batch_label, tag_seq, tag_prob, tag_mask, score, indices, scores_ref = model.crf_loss(
                        batch_word,
                        batch_wordlen,
                        batch_char,
                        batch_charlen,
                        batch_charrecover,
                        batch_label,
                        mask,
                        t=t)
                    model.readd_instance(batch_label, mask, tag_mask, t,
                                         scores_ref.data[0])

                print("score", score)
                #sum_p_at_5+=score
                sum_p += 1.0

                end_time = time.time()
                if click + 5 >= 10:
                    print("time", end_time - start_time)
            else:
                batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                    instance, data.HP_gpu)
                model.add_instance(batch_word, batch_label, tag_mask, instance,
                                   -100000.0)

            #print("Y_train",model.Y_train[-1])
            # if batch_id>=total_batch//100+15:
            #     for i in range(15):
            #         model.train()
            #         model.reevaluate_instance(mask)
            #print("loss",loss)
            #print(batch_wordlen)
            if batch_id < 15:
                if batch_id == 14:
                    model.train()
                    #print("Y_train",model.Y_train)
                    print(batch_ids)
                    speed, acc, p, r, f, _ = evaluate(data, model, "test")
                    print(len(model.Y_train))
                    print("after", acc)
                    print("Check", f)
                    acc_list.append(acc)
                    p_list.append(p)
                    r_list.append(r)
                    f_list.append(sum_click)
                    sum_p_at_5 = 0.0
                    sum_p = 1.0
                continue
            if batch_id == 15:
                model.train()
                #print("Y_train",model.Y_train)
                print(batch_ids)
                speed, acc, p, r, f, _ = evaluate(data, model, "test")
                print(len(model.Y_train))
                print("after", acc)
                print("Check", f)
                acc_list.append(acc)
                p_list.append(p)
                r_list.append(r)
                f_list.append(sum_click)
                sum_p_at_5 = 0.0
                sum_p = 1.0

            click += model.topk
            sum_click += model.topk

            #click+=batch_wordlen[0]
            #sum_click+=batch_wordlen[0]

            if click >= 10:
                model.train()
                speed, acc, p, r, f, _ = evaluate(data, model, "test")
                print("Step:", len(model.Y_train))
                print("after", acc)
                acc_list.append(acc)
                p_list.append(p)
                r_list.append(r)
                f_list.append(sum_click)
                sum_p_at_5 = 0.0
                sum_p = 1.0

                click -= 10
            instance_count += 1

            pos_mask, selection_score, select_reward = model.reinforment_reward(
                batch_word, batch_wordlen, batch_char, batch_charlen,
                batch_charrecover, real_batch_label, tag_seq, tag_prob, mask)
            if USE_CRF == True:
                start_time = time.time()
                t = np.random.randint(1, 10)
                #print("size",total_batch)
                speed, acc, p, r, f, _ = evaluate(data, model, "dev")
                end_time = time.time()
                if total_num != 0:
                    ave_scores = total_reward / total_num
                else:
                    ave_scores = 0.0
                total_reward += acc
                total_num += 1

                # print(batch_label)
                sample_scores = torch.from_numpy(np.asarray([acc])).float()
                ave_scores = torch.from_numpy(np.asarray([ave_scores])).float()
                if idx >= 0:
                    reward_diff = Variable(sample_scores - ave_scores,
                                           requires_grad=False)
                else:
                    reward_diff = select_reward
                reward_diff = reward_diff.cuda()
            rl_loss = -selection_score  # B

            #if idx>=10:
            #print("rl_loss",rl_loss)
            print("reward", reward_diff)
            rl_loss = torch.mul(rl_loss,
                                reward_diff.expand_as(rl_loss))  #b_size

            #print("reward",reward_diff)
            #rl_loss = rl_loss.sum()

            rl_loss.backward()
            optimizer.step()
            model.examiner.zero_grad()
            if len(p_list) >= 100:
                break
        if len(p_list) >= 100:
            break

        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("rl_loss", total_rl_loss)
        print("ml_loss", total_ml_loss)
        #print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        # continue
        speed, acc, p, r, f, _ = evaluate(data, model, "test")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if seg:
                print "Exceed previous best f score:", best_dev
            else:
                print "Exceed previous best acc score:", best_dev
            model_name = save_model_dir + '.' + str(idx) + ".model"
            #torch.save(model.state_dict(), model_name)
            best_dev = current_score
        ## decode test

        speed, acc, p, r, f, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if best_dev == current_score:
            best_ = test_cost, speed, acc, p, r, f
        if seg:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                  (test_cost, speed, acc))
        gc.collect()
    file_dump = open("exp_list.pkl", "w")
    pickle.dump([acc_list, p_list, r_list, f_list, map_list], file_dump)
    file_dump.close()