def case_content_wordcloud(): content = '' for case in Case.objects.all(): content += case.first_history.content content += case.first_history.title jieba.set_dictionary(str(settings.ROOT_DIR('static/dict.txt'))) pattern = re.compile( '[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?“”、~@#¥%……&*()(\d+)]+') content = pattern.sub("", content) words = [word for word in jieba.cut_for_search(content) if len(word) > 2] counter = Counter(words) data = [{ 'name': word, 'weight': weight } for word, weight in counter.most_common(20)] chart = get_highchart_word_cloud(data=data) return chart
def case_content_wordcloud(): content = '' for case in Case.objects.all(): content += case.first_history.content content += case.first_history.title jieba.set_dictionary(str(settings.ROOT_DIR('static/jieba/dict.txt'))) stop = [] with open(str(settings.ROOT_DIR('static/jieba/stop.txt')), 'r', encoding='UTF-8') as file: for data in file.readlines(): data = data.strip() stop.append(data) pattern = re.compile( '[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?“”、~@#¥%……&*()(\d+)]+') content = pattern.sub("", content) words_2 = [ word for word in jieba.cut_for_search(content) if len(word) == 2 and word not in stop ] counter_2 = Counter(words_2) words_3 = [ word for word in jieba.cut_for_search(content) if len(word) > 2 and word not in stop ] counter_3 = Counter(words_3) data = [{'name': word, 'weight': weight} for word, weight in counter_2.most_common(50)] + \ [{'name': word, 'weight': weight} for word, weight in counter_3.most_common(50)] chart = get_highchart_word_cloud(data=data) return chart