def index_writer(self, file_path): with open(file_path, 'r', encoding='utf-8') as f: for line in f.readlines(): key, title, link = line.strip().split('\t\t') doc = Doc() doc.add('key', key) doc.add('title', title) doc.add('link', link) self.doc_list.append(doc) self.index()
def index_writer(self, file_path): with open(file_path, 'r', encoding='utf-8') as f: for line in f.readlines(): word, English, miandian, time = line.strip().split('///') doc = Doc() doc.add('word', word) doc.add('English', English) doc.add('miandian', miandian) # doc.add('time', time) self.doc_list.append(doc) self.index()
def index_writer(self, file_path): for dirpath, dirnames, filenames in os.walk(file_path): for i in filenames: with open(file_path+"\\"+i, 'r', encoding='utf-8') as f: key, title, context = f.read().split('\t\t') #读取文件 并根据关键词存入 关键词和网页题目,以及内容。 doc = Doc() doc.add('key', key) doc.add('title', title) doc.add('context', context) self.doc_list.append(doc) self.index()