Esempio n. 1
0
def case_content_wordcloud():
    content = ''
    for case in Case.objects.all():
        content += case.first_history.content
        content += case.first_history.title

    jieba.set_dictionary(str(settings.ROOT_DIR('static/dict.txt')))

    pattern = re.compile(
        '[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?“”、~@#¥%……&*()(\d+)]+')
    content = pattern.sub("", content)

    words = [word for word in jieba.cut_for_search(content) if len(word) > 2]
    counter = Counter(words)
    data = [{
        'name': word,
        'weight': weight
    } for word, weight in counter.most_common(20)]

    chart = get_highchart_word_cloud(data=data)

    return chart
Esempio n. 2
0
def case_content_wordcloud():
    content = ''
    for case in Case.objects.all():
        content += case.first_history.content
        content += case.first_history.title

    jieba.set_dictionary(str(settings.ROOT_DIR('static/jieba/dict.txt')))
    stop = []
    with open(str(settings.ROOT_DIR('static/jieba/stop.txt')),
              'r',
              encoding='UTF-8') as file:
        for data in file.readlines():
            data = data.strip()
            stop.append(data)

    pattern = re.compile(
        '[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?“”、~@#¥%……&*()(\d+)]+')
    content = pattern.sub("", content)

    words_2 = [
        word for word in jieba.cut_for_search(content)
        if len(word) == 2 and word not in stop
    ]
    counter_2 = Counter(words_2)

    words_3 = [
        word for word in jieba.cut_for_search(content)
        if len(word) > 2 and word not in stop
    ]
    counter_3 = Counter(words_3)

    data = [{'name': word, 'weight': weight} for word, weight in counter_2.most_common(50)] + \
           [{'name': word, 'weight': weight} for word, weight in counter_3.most_common(50)]

    chart = get_highchart_word_cloud(data=data)

    return chart