コード例 #1
0
def create_1000_case_test():
    li = []
    bc = BertClient(ip='222.19.197.230', port=5555, port_out=5556, check_version=False)
    test_text = pre_deal.get_test_textVector()
    zero_vector = np.zeros((500, 768))
    for i in range(0, len(test_text)):
    x = tokenize.word_tokenize(test_text[i])
    if (len(x) >502):
        index = KMP.KMP_algorithm(test_text[i], x[500] + " " + x[501])
        if (index != -1):
            list = []
            sentence_1 = test_text[i][0:index]
            sentence_2 = test_text[i][index:]
            list.append(sentence_1)
            list.append(sentence_2)
            vector = bc.encode(list)
            ve = np.concatenate((vector[0], vector[1]), axis=0)
            li.append(ve.tolist())
        else:
            list = []
            list.append(test_text[i])
            vector = bc.encode(list)
            ve = np.concatenate((vector[0], zero_vector), axis=0)
            li.append(ve.tolist())
    else:
        list = []
        list.append(test_text[i])
        vector = bc.encode(list)
        ve = np.concatenate((vector[0], zero_vector), axis=0)
        li.append(ve.tolist())
    li_vector = np.array(li)
    np.save("test_case_1000.npy", li_vector)
コード例 #2
0
ファイル: get_model_0378.py プロジェクト: daojiaxu/semeval_11
        labels_tag[list_labels[j][7:16]].append(min(l1))
        labels_tag[list_labels[j][7:16]].append(max(l1))
        print("----------------------")
        if (min(l1) == max(l1) and min(l1) > 400):
            li = texts_token[j][min(l1):]
            print(texts_token[j][min(l1):])
        else:
            li = texts_token[j][min(l1):max(l1)]
            print(texts_token[j][min(l1):max(l1)])

        list2 = [str(i) for i in li]  # 使用列表推导式把列表中的单个元素全部转化为str类型
        list3 = ' '.join(list2)  # 把列表中的元素放在空串中,元素间用空格隔开
        if (list3 == ''):
            pass
        else:
            a = KMP.KMP_algorithm(test_text[j], list3)
            if (a == -1):
                list_gai = str(texts_token[j][min(l1)])
                a = KMP.KMP_algorithm(test_text[j], list_gai)  # 开始位置
                print(list_labels[j][7:16])
                print("值为:" + str(a))
                b = a + len(list3)
                print("结束值为:" + str(b))
                # str_1 = random.choice(list_tc)
                if (a == -1):
                    if (b > 100):
                        f.write(list_labels[j][7:16] + '\t' + str(b - 100) +
                                '\t' + str(b) + '\n')
                    else:
                        f.write(list_labels[j][7:16] + '\t' + str(0) + '\t' +
                                str(b) + '\n')
コード例 #3
0
from nltk import tokenize
import KMP
from bert_serving.client import BertClient
import numpy as np

li = []
bc = BertClient(ip='222.19.197.230',
                port=5555,
                port_out=5556,
                check_version=False)
labels_vector_dict, test_text = pre_deal.get_labels_vector()
zero_vector = np.zeros((500, 768))
for i in range(0, len(test_text)):
    x = tokenize.word_tokenize(test_text[i])
    if (len(x) > 502):
        index = KMP.KMP_algorithm(test_text[i], x[500] + " " + x[501])
        if (index != -1):
            list = []
            sentence_1 = test_text[i][0:index]
            sentence_2 = test_text[i][index:]
            list.append(sentence_1)
            list.append(sentence_2)
            vector = bc.encode(list)
            ve = np.concatenate((vector[0], vector[1]), axis=0)
            li.append(ve.tolist())
        else:
            list = []
            list.append(test_text[i])
            vector = bc.encode(list)
            ve = np.concatenate((vector[0], zero_vector), axis=0)
            li.append(ve.tolist())