예제 #1
0
def eval(sentence, predict, tags):
    entityres, entityall = [], []
    entityres = calculate(sentence, predict.permute(1, 0), id2word, id2tag,
                          entityres)
    entityall = calculate(sentence, tags, id2word, id2tag, entityall)

    intersection = []
    acc, recall, f1 = [], [], []
    for i in range(len(entityres)):
        intersection.append([j for j in entityres[i] if j in entityall[i]])
    for i in range(len(intersection)):
        if len(entityres[i]) == 0:
            acc_ = 1.
        else:
            acc_ = float(len(intersection[i]) / len(entityres[i]))
        if len(entityall[i]) == 0:
            recall_ = 1.
        else:
            recall_ = float(len(intersection[i]) / len(entityall[i]))
        if acc_ + recall_ == 0.:
            f1_ = 0.
        else:
            f1_ = (2 * acc_ * recall_) / (acc_ + recall_)
        acc.append(acc_)
        recall.append(recall_)
        f1.append(f1_)
    return np.mean(acc), np.mean(recall), np.mean(f1)
예제 #2
0
def train(model,sess):    
    for epoch in range(training_epochs):
        for batch in range(batch_num): 
            x_batch, y_batch = data_train.next_batch(batch_size)
            
            feed_dict = {model.input_data:x_batch, model.labels:y_batch}
            pre,_ = sess.run([model.viterbi_sequence,model.train_op], feed_dict)
            acc = 0
            if batch%100==0:
                for i in range(len(y_batch)):
                    for j in range(len(y_batch[0])):
                        if y_batch[i][j]==pre[i][j]:
                            acc+=1
                print float(acc)/(len(y_batch)*len(y_batch[0]))
        path_name = "./model/model"+str(epoch)+".ckpt"
        print path_name
        if epoch%2==0:
            saver.save(sess, path_name)
            print "model has been saved"
            entityres=[]
            entityall=[]
            for batch in range(batch_num): 
                x_batch, y_batch = data_train.next_batch(batch_size)
                feed_dict = {model.input_data:x_batch, model.labels:y_batch}
                pre = sess.run([model.viterbi_sequence], feed_dict)
                pre = pre[0]
                entityres = calculate(x_batch,pre,id2word,id2tag,entityres)
                entityall = calculate(x_batch,y_batch,id2word,id2tag,entityall)
            jiaoji = [i for i in entityres if i in entityall]
            if len(jiaoji)!=0:
                zhun = float(len(jiaoji))/len(entityres)
                zhao = float(len(jiaoji))/len(entityall)
                print "train"
                print "zhun:", zhun
                print "zhao:", zhao
                print "f:", (2*zhun*zhao)/(zhun+zhao)
            else:
                print "zhun:",0

            entityres=[]
            entityall=[]
            for batch in range(batch_num_test): 
                x_batch, y_batch = data_test.next_batch(batch_size)
                feed_dict = {model.input_data:x_batch, model.labels:y_batch}
                pre = sess.run([model.viterbi_sequence], feed_dict)
                pre = pre[0]
                entityres = calculate(x_batch,pre,id2word,id2tag,entityres)
                entityall = calculate(x_batch,y_batch,id2word,id2tag,entityall)
            jiaoji = [i for i in entityres if i in entityall]
            if len(jiaoji)!=0:
                zhun = float(len(jiaoji))/len(entityres)
                zhao = float(len(jiaoji))/len(entityall)
                print "test"
                print "zhun:", zhun
                print "zhao:", zhao
                print "f:", (2*zhun*zhao)/(zhun+zhao)
            else:
                print "zhun:",0
예제 #3
0
def train():

    model = BiLSTM_CRF(len(word2id) + 1, tag2id, EMBEDDING_DIM,
                       HIDDEN_DIM)  # 实例化模型
    optimizer = optim.SGD(model.parameters(), lr=0.005,
                          weight_decay=1e-4)  # 随机梯度下降优化算法
    # 训练
    for epoch in range(EPOCHS):
        index = 0
        for sentence, tags in zip(x_train, y_train):
            index += 1  # 计量每次迭代进行了多少步
            model.zero_grad()  # 清空梯度
            sentence = torch.tensor(sentence, dtype=torch.long)
            tags = torch.tensor([tag2id[t] for t in tags], dtype=torch.long)
            loss = model.neg_log_likelihood(sentence, tags)  # 模型定义的loss
            loss.backward()
            optimizer.step()
            if index % 300 == 0:
                print("epoch", epoch, "batch", index)

        # 用来保存测试结果
        entityres = []
        entityall = []
        # 每个epoch后测试一下
        for sentence, tags in zip(x_test, y_test):
            sentence = torch.tensor(sentence, dtype=torch.long)
            tags = torch.tensor([tag2id[t] for t in tags], dtype=torch.long)
            score, predict = model(sentence)
            entityres = calculate(sentence, predict, id2word, id2tag,
                                  entityres)
            entityall = calculate(sentence, tags.numpy(), id2word, id2tag,
                                  entityall)

        jiaoji = [i for i in entityres if i in entityall]
        if len(jiaoji) != 0:
            zhun = float(len(jiaoji)) / len(entityres)  # 准确率
            zhao = float(len(jiaoji)) / len(entityall)  # 召回率
            print("test:")
            print("precision:", zhun)
            print("recall:", zhao)
            print("F:", (2 * zhun * zhao) / (zhun + zhao))
        else:
            print("precision:", 0)

    path_name = "./model/model.pkl"
    print(path_name)
    torch.save(model, path_name)
    print("model has been saved")
예제 #4
0
def test():
    path_name = "./model/model.pkl"
    model = torch.load(path_name)
    entityres = []
    entityall = []
    # 测试
    for sentence, tags in zip(x_test, y_test):
        sentence = torch.tensor(sentence, dtype=torch.long)
        score, predict = model(sentence)
        entityres = calculate(sentence, predict, id2word, id2tag, entityres)
        entityall = calculate(sentence, tags, id2word, id2tag, entityall)
    jiaoji = [i for i in entityres if i in entityall]
    if len(jiaoji) != 0:
        zhun = float(len(jiaoji)) / len(entityres)  # 准确率
        zhao = float(len(jiaoji)) / len(entityall)  # 召回率
        print("test:")
        print("zhun:", zhun)
        print("zhao:", zhao)
        print("f:", (2 * zhun * zhao) / (zhun + zhao))
    else:
        print("zhun:", 0)
예제 #5
0
        sentence = torch.tensor(sentence, dtype=torch.long)
        tags = torch.tensor([tag2id[t] for t in tags], dtype=torch.long)

        loss = model.neg_log_likelihood(sentence, tags)

        loss.backward()
        optimizer.step()
        if index % 300 == 0:
            print("epoch", epoch, "index", index)

        entityres = []
        entityall = []
        for sentence, tags in zip(x_test, y_test):
            sentence = torch.tensor(sentence, dtype=torch.long)
            score, predict_tags = model(sentence)
            entityres = calculate(sentence, predict_tags, id2word, id2tag,
                                  entityres)
            entityall = calculate(sentence, tags, id2word, id2tag, entityall)

        common_entity = [i for i in entityres if i in entityall]
        if len(common_entity) != 0:
            zhun = float(len(common_entity)) / len(entityres)
            zhao = float(len(common_entity)) / len(entityall)
            print("test:")
            print("Percision:", zhun)
            print("Recall:", zhao)
            print("F1:", (2 * zhun * zhao) / (zhun + zhao))
        else:
            print("Percision:", 0)

    path_name = "C:\\Users\\Administrator\\PycharmProjects\\ChineseNER\\save_model\\save_model" + str(
        epoch) + ".pkl"
예제 #6
0
                    for j in range(len(y_batch[0])):
                        if y_batch[i][j] == pre[i][j]:
                            acc += 1
                print float(acc) / (len(y_batch) * len(y_batch[0]))
        path_name = "./model/model" + str(epoch) + ".ckpt"
        print path_name
        saver.save(sess, path_name)
        print "model has been saved"
        entityres = []
        entityall = []
        for batch in range(batch_num):
            x_batch, y_batch = data_train.next_batch(batch_size)
            feed_dict = {input_data: x_batch, labels: y_batch}
            pre = sess.run([viterbi_sequence], feed_dict)
            pre = pre[0]
            entityres = calculate(x_batch, pre, id2word, id2tag, entityres)
            entityall = calculate(x_batch, y_batch, id2word, id2tag, entityall)
        jiaoji = [i for i in entityres if i in entityall]
        if len(jiaoji) != 0:
            zhun = float(len(jiaoji)) / len(entityres)
            zhao = float(len(jiaoji)) / len(entityall)
            print "train"
            print "zhun:", zhun
            print "zhao:", zhao
            print "f:", (2 * zhun * zhao) / (zhun + zhao)
        else:
            print "zhun:", 0

        entityres = []
        entityall = []
        for batch in range(batch_num_test):
예제 #7
0
valLoader = DataLoader(dataset=validation, batch_size=batch_size, shuffle=True)
testLoader = DataLoader(dataset=test, batch_size=batch_size, shuffle=True)

for epoch in range(EPOCHS):
    train_loss = 0.0
    entityres = []
    entityall = []
    for i, data in enumerate(trainLoader):
        x, y = data
        x = x.permute(1, 0, 2)
        #y = y.permute(1,0,2)
        optimizer.zero_grad()
        loss = model.neg_log_likelihood(x, y)
        train_loss += loss
        _, pred = model(x)
        entityres = calculate(x, pred, id2word, id2tag, entityres)
        entityall = calculate(x, y, id2word, id2tag, entityall)
        train_acc, train_recall, train_f1 = matrix(entityres, entityall)
        iter = epoch * len(trainLoader) + i
        writer.add_scalar("train_loss", train_loss, iter)
        writer.add_scalar("train_acc", train_acc, iter)
        writer.add_scalar("train_recall", train_recall, iter)
        writer.add_scalar("train_f1", train_f1, iter)
        loss.backward()
        optimizer.step()
    val_loss = 0.0
    val_res = []
    val_all = []
    for i, data in enumerate(valLoader):
        x, y = data
        x = x.permute(1, 0, 2)