# 作者:田丰(FontTian)
# 创建时间:'2017/8/17'
import nltk
from ftools import cnlp

import matplotlib as mpl

mpl.rcParams[u'font.sans-serif'] = [u'KaiTi']
mpl.rcParams[u'font.serif'] = [u'KaiTi']
# mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题,或者转换负号为字符串

stopwords_path = u'/home/fonttian/Data/NLP/stopwords/CNENstopwords.txt'  # 停用词词表
text_path = u'/home/fonttian/Data/NLP/txt/lz.txt'  # 设置要分析的文本路径

fontsTools = cnlp.basis(textPath=text_path,
                        stopwordsPath=stopwords_path,
                        ReadText=True)
fontsTools.addUserWords([u'路明非'])

cutAndclearStr = fontsTools.getText()

print(u'nltk 文本分析基本操作')

print(u'nltk.word_tokenize(cutAndclearStr')
print(u'nltk.FreqDist(tokenstr)')

tokenstr = nltk.word_tokenize(cutAndclearStr)

print(u'====== 其他基本指标并绘图 ======')

print(u'下面是nltk的其他类 ')
# - * - coding: utf - 8 -*-
# 作者:田丰(FontTian)
# 创建时间:'2017/8/12'

from os import path
from scipy.misc import imread

from ftools import cnlp

d = path.dirname(__file__)

text_path = d + '/txt/lztest.txt'  # 设置要分析的文本路径
stopwords_path = d + '/stopwords/CNENstopwords.txt'  # 停用词词表

fontsTools = cnlp.basis(textPath=text_path, stopwordsPath=stopwords_path)
fontsTools.addUserWords([u'路明非'])

font_path = d + '/Fonts/simkai.ttf'  # 为worldcloud设置中文字体路径没
back_coloring_path = d + '/img/lz1.jpg'  # 设置背景图片路径

imgname1 = d + '/WordCloudDefautColors.png'  # 保存的图片名字1(只按照背景图片形状)
imgname2 = d + '/WordCloudColorsByImg.png'  # 保存的图片名字2(颜色按照背景图片颜色布局生成)
back_coloring = imread(path.join(d, back_coloring_path))  # 设置词云的形状

# 设置词云属性
wc = fontsTools.wordcloud(
    text=None,
    image_path=back_coloring_path,
    to_file_path=imgname2,
    show=True,
    font_path=font_path,  # 设置字体