def create_1000_case_test(): li = [] bc = BertClient(ip='222.19.197.230', port=5555, port_out=5556, check_version=False) test_text = pre_deal.get_test_textVector() zero_vector = np.zeros((500, 768)) for i in range(0, len(test_text)): x = tokenize.word_tokenize(test_text[i]) if (len(x) >502): index = KMP.KMP_algorithm(test_text[i], x[500] + " " + x[501]) if (index != -1): list = [] sentence_1 = test_text[i][0:index] sentence_2 = test_text[i][index:] list.append(sentence_1) list.append(sentence_2) vector = bc.encode(list) ve = np.concatenate((vector[0], vector[1]), axis=0) li.append(ve.tolist()) else: list = [] list.append(test_text[i]) vector = bc.encode(list) ve = np.concatenate((vector[0], zero_vector), axis=0) li.append(ve.tolist()) else: list = [] list.append(test_text[i]) vector = bc.encode(list) ve = np.concatenate((vector[0], zero_vector), axis=0) li.append(ve.tolist()) li_vector = np.array(li) np.save("test_case_1000.npy", li_vector)
labels_tag[list_labels[j][7:16]].append(min(l1)) labels_tag[list_labels[j][7:16]].append(max(l1)) print("----------------------") if (min(l1) == max(l1) and min(l1) > 400): li = texts_token[j][min(l1):] print(texts_token[j][min(l1):]) else: li = texts_token[j][min(l1):max(l1)] print(texts_token[j][min(l1):max(l1)]) list2 = [str(i) for i in li] # 使用列表推导式把列表中的单个元素全部转化为str类型 list3 = ' '.join(list2) # 把列表中的元素放在空串中,元素间用空格隔开 if (list3 == ''): pass else: a = KMP.KMP_algorithm(test_text[j], list3) if (a == -1): list_gai = str(texts_token[j][min(l1)]) a = KMP.KMP_algorithm(test_text[j], list_gai) # 开始位置 print(list_labels[j][7:16]) print("值为:" + str(a)) b = a + len(list3) print("结束值为:" + str(b)) # str_1 = random.choice(list_tc) if (a == -1): if (b > 100): f.write(list_labels[j][7:16] + '\t' + str(b - 100) + '\t' + str(b) + '\n') else: f.write(list_labels[j][7:16] + '\t' + str(0) + '\t' + str(b) + '\n')
from nltk import tokenize import KMP from bert_serving.client import BertClient import numpy as np li = [] bc = BertClient(ip='222.19.197.230', port=5555, port_out=5556, check_version=False) labels_vector_dict, test_text = pre_deal.get_labels_vector() zero_vector = np.zeros((500, 768)) for i in range(0, len(test_text)): x = tokenize.word_tokenize(test_text[i]) if (len(x) > 502): index = KMP.KMP_algorithm(test_text[i], x[500] + " " + x[501]) if (index != -1): list = [] sentence_1 = test_text[i][0:index] sentence_2 = test_text[i][index:] list.append(sentence_1) list.append(sentence_2) vector = bc.encode(list) ve = np.concatenate((vector[0], vector[1]), axis=0) li.append(ve.tolist()) else: list = [] list.append(test_text[i]) vector = bc.encode(list) ve = np.concatenate((vector[0], zero_vector), axis=0) li.append(ve.tolist())