def prepare(text): """this will take a text as input, and put the letters in order of most frequent """ text = text.lower() text = [i for i in text if i in alpha] text = "".join(text) out = [i[0] for i in frequency.top(frequency.letters(text))] return out
out_put(leader[0]) return leader[0] # 下面才是真正的入口 # with open(wen_dang, 'r', encoding='utf-8') as f: with open(wen_dang, 'r', encoding='gbk') as f: target_words = f.readlines() need_pop = " " def me_replace(str11): return "".join(str11.split(need_pop)) topx = frequency.top(target_words, medal=int(1/threshold)) for me_item in topx: need_pop = the_long_the_better(me_item, True) # 每找出一个最长的词组,就把这个词组从文件中删掉。 if need_pop: target_words = list(map(me_replace, target_words)) print(datetime.now() - start) print(out_put_list) # topx = frequency.top(wen_dang, "", medal=int(1 / threshold)) # print(datetime.now() - start) # for me_item in topx: # the_long_the_better(me_item, False) # # print(out_put_list)