def read_data_use(option, sen2id): file_name = option.use_data_path max_length = option.num_steps dict_size = option.dict_size with open(file_name) as f: data=[] for line in f: data.append(sen2id(line.strip().lower().split())) data_new=array_data(data, max_length, dict_size) return data_new # sentence
def read_data_use(option, sen2id): file_name = option.use_data_path max_length = option.num_steps dict_size = option.dict_size time1 = time.time() Rake = RAKE.Rake(RAKE.SmartStopList()) z = ZPar(option.pos_path) tagger = z.get_tagger() time2 = time.time() print("read data load time: ", time2 - time1) with open(file_name) as f: data = [] vector = [] sta_vec_list = [] j = 0 for line in f: if len(line.strip().split()) > 15: line = ' '.join(line.strip().split()[:15]) sta_vec = list(np.zeros([option.num_steps - 1])) keyword = Rake.run(line.strip()) pos_list = tagger.tag_sentence(line.strip()).split() pos = list(zip(*[x.split('/') for x in pos_list]))[0] # pos=list(zip(*[x.split('/') for x in pos_list]))[0] if keyword != []: keyword = list(list(zip(*keyword))[0]) keyword_new = [] linewords = line.strip().split() for i in range(len(linewords)): for item in keyword: length11 = len(item.split()) if ' '.join(linewords[i:i + length11]) == item: keyword_new.extend( [i + k for k in range(length11)]) for i in range(len(keyword_new)): ind = keyword_new[i] if ind <= option.num_steps - 2: sta_vec[ind] = 1 if option.keyword_pos == True: sta_vec_list.append(keyword_pos2sta_vec(option, sta_vec, pos)) else: sta_vec_list.append(list(np.zeros([option.num_steps - 1]))) data.append(sen2id(line.strip().lower().split())) data_new = array_data(data, max_length, dict_size) return data_new, sta_vec_list # sentence, keyvector
def read_data_use1(option, sen2id): file_name = option.use_data_path max_length = option.num_steps dict_size = option.dict_size Rake = RAKE.Rake(RAKE.SmartStopList()) z = ZPar(option.pos_path) tagger = z.get_tagger() with open(file_name) as f: data = [] vector = [] sta_vec_list = [] j = 0 for line in f: print('sentence:' + line) sta_vec = list(np.zeros([option.num_steps - 1])) keyword = Rake.run(line.strip()) pos_list = tagger.tag_sentence(line.strip()).split() # pos=zip(*[x.split('/') for x in pos_list])[0] pos = list(zip(*[x.split('/') for x in pos_list]))[0] print(keyword) if keyword != []: keyword = list(list(zip(*keyword))[0]) keyword_new = [] for item in keyword: tem1 = [ line.strip().split().index(x) for x in item.split() if x in line.strip().split() ] print('id', tem1) keyword_new.extend(tem1) print(keyword_new) for i in range(len(keyword_new)): ind = keyword_new[i] if ind <= option.num_steps - 2: sta_vec[ind] = 1 if option.keyword_pos == True: sta_vec_list.append(keyword_pos2sta_vec(option, sta_vec, pos)) else: sta_vec_list.append(list(np.zeros([option.num_steps - 1]))) print(keyword_pos2sta_vec(option, sta_vec, pos)) data.append(sen2id(line.strip().lower().split())) data_new = array_data(data, max_length, dict_size) return data_new, sta_vec_list # sentence, keyvector