Python analyse 예제들, jieba.analyse Python 예제들

예제 #1

0

파일 보기

파일: spider-bilibili.py 프로젝트: xubinzheng/bilibili_spider

def printDanmu(dmlist, av):
    filename = "av" + str(av) + ".txt"
    print("Loading...\n")
    try:
        with codecs.open(filename, 'w', "utf-8") as t:
            for dm in dmlist:
                t.write(dm.string + '\n')
        analyse(filename, av)
    except Exception, e:
        logger.error('Failed to upload to ftp: ' + str(e))

예제 #2

0

파일 보기

파일: main.py 프로젝트: chiris-ye/party-text-research

def total_analyse(data, dirname='beifen'):
    pl = os.listdir(dirname)
    for p in pl:
        path = dirname + '\\' + p
        f = open(path, 'rb')
        scale = pickle.load(f)
        f.close()
        for i in range(4):
            print '* * * * ' * 6
            analyse(data, scale, i)
            print '* * * * ' * 6

예제 #3

0

파일 보기

파일: main.py 프로젝트: yiyuezhuo/party-text-research

def total_analyse(data,dirname='beifen'):
    pl=os.listdir(dirname)
    for p in pl:
        path=dirname+'\\'+p
        f=open(path,'rb')
        scale=pickle.load(f)
        f.close()
        for i in range(4):
            print '* * * * '*6
            analyse(data,scale,i)
            print '* * * * '*6

예제 #4

0

파일 보기

def main():
    # func = input('''
    #     1: Calculate password;
    #     2: Analyse WeChat message.
    #     ''')
    func = '2'
    if func == '1':
        IMEI = input('IMEI: ')  # 794825438204445
        UID = input('UID: ')  # -1193869337
        password = getPsw(IMEI, UID)  # 74ee691
        print(password)
    elif func == '2':
        knownType = checkType()
        result = analyse(knownType)
        with open('results\\result.json', 'w', encoding='utf8') as j:
            json.dump(result, j, ensure_ascii=False)
        print('Result.json saved successfully.')
    else:
        print('Try again.')
        main()

예제 #5

0

파일 보기

def draw_word_cloud(cid_list, name):
    '''
    绘制词云
    :param cid 视频cid编号
    :param name 人物名
    '''
    lst = analyse(cid_list)
    words_count_list_TR = lst[0]
    words_count_list_TI = lst[1]

    word_cloud_TR = (
        WordCloud(init_opts=opts.InitOpts(theme=ThemeType.ROMA))
            .add("", words_count_list_TR, word_size_range=[20, 50], shape=SymbolType.RECT)
            .set_global_opts(title_opts=opts.TitleOpts(title="{}词云TOP50".format(name), subtitle="基于TextRank算法的关键词抽取"))   
            .render('{}_WordCloud_TR.html'.format(name))
    )

    word_cloud_TI = (
        WordCloud(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
            .add("", words_count_list_TI, word_size_range=[20, 100], shape=SymbolType.DIAMOND)
            .set_global_opts(title_opts=opts.TitleOpts(title="{}词云TOP50".format(name), subtitle="基于TF-IDF算法的关键词抽取"))
            .render('{}_WordCloud_TI.html'.format(name))
    )
    print('====={}词云绘制完毕====='.format(name))

예제 #6

0

파일 보기

# coding:utf-8
import jieba
import jieba.analyse
def analyse():
    content = open('Readhub.md').read()
    tags = jieba.analyse.extract_tags(content,topK=100)
    print '  '.join(tags)
if __name__ == '__main__':
    analyse()

예제 #7

0

파일 보기

파일: analyser.py 프로젝트: yankaics/LagouJob

                                      withWeight=True,
                                      allowPOS=('n'))

    # 带词频
    for tag in tags:
        print(tag[0] + ',' + str(tag[1]))
    print('===========================')
    # 不带词频，仅用于生成TagCloud的...
    for tag in tags:
        print(tag[0], end=',')


def get_content(txtdir):
    content = []
    txts = os.listdir(txtdir)
    for each_txt in txts:
        with open(txtdir + os.sep + each_txt, mode='rt',
                  encoding='utf-8') as f:
            str = "".join(f.readlines())
            content.append(str)
            f.close()

    return "".join(content)


if __name__ == '__main__':
    content_txt = get_content('D:\LagouJobInfo\lagou\details\网络爬虫')
    analyse(content_txt,
            'C:/Users/XuLu/PycharmProjects/LagouJob/stopwords.txt',
            'C:/Users/XuLu/PycharmProjects/LagouJob/userdict.txt')

예제 #8

0

파일 보기

파일: fc.py 프로젝트: zyong812/fuck_illness

            else:
                tongji[u'' + v] = int(n * 10000)
    finally:
        pass


#要分析多少行数据，在这里写，配合SQL语句使用
for x in range(12, 100000, 100000):
    start = time.time()
    content = check_state(str(x))
    end = time.time()
    escape = end - start
    print('本次数据库读取时间：' + str(escape))

    start = time.time()
    analyse(content)
    end = time.time()
    escape = end - start
    print('本次词频提取用时：' + str(escape))
    print 'do a work'
#此处也可以直接打印结果
#result = sorted(tongji.items(), key = lambda x: x[1], reverse=True)
#fre = json.dumps(result, ensure_ascii=False, encoding='UTF-8')
#绘制词云图
wordcloud = WordCloud(
    font_path="simfang.ttf",
    background_color='White').generate_from_frequencies(tongji)
import matplotlib.pyplot as plt

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")

예제 #9

0

파일 보기

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import jieba
seg_list = jieba.analyse('我来到北京清华大学')
print seg_list

예제 #10

0

파일 보기

파일: analyser.py 프로젝트: arong-me/LagouJob

    jieba.load_userdict(userdictpath)
    # 加载停用词
    jieba.analyse.set_stop_words(stopwordspath)
    tags = jieba.analyse.extract_tags(strcontent, topK=20, withWeight=True, allowPOS=('n'))

    # 带词频
    for tag in tags:
        print(tag[0] + ',' + str(tag[1]))
    print('===========================')
    # 不带词频，仅用于生成TagCloud的...
    for tag in tags:
        print(tag[0], end=',')


def get_content(txtdir):
    content = []
    txts = os.listdir(txtdir)
    for each_txt in txts:
        with open(txtdir + os.sep + each_txt, mode='rt', encoding='utf-8') as f:
            str = "".join(f.readlines())
            content.append(str)
            f.close()

    return "".join(content)


if __name__ == '__main__':
    content_txt = get_content('D:/LagouJobInfo/lagou/details/计算机视觉')
    analyse(content_txt, 'D:/Users/LucasX/PycharmProjects/LagouJob/stopwords.txt',
            'D:/Users/LucasX/PycharmProjects/LagouJob/userdict.txt')

예제 #11

0

파일 보기

파일: analyser.py 프로젝트: yzc605/LagouJob

    tags = jieba.analyse.extract_tags(strcontent, topK=20, withWeight=True, allowPOS=("n"))

    # 带词频
    for tag in tags:
        print(tag[0] + " : " + str(tag[1]))
    print("===========================")
    # 不带词频，仅用于生成TagCloud的...
    for tag in tags:
        print(tag[0], end=",")


def get_content(txtdir):
    content = []
    txts = os.listdir(txtdir)
    for each_txt in txts:
        with open(txtdir + os.sep + each_txt, mode="rt", encoding="utf-8") as f:
            str = "".join(f.readlines())
            content.append(str)
            f.close()

    return "".join(content)


if __name__ == "__main__":
    content_txt = get_content("D:/datamining")
    analyse(
        content_txt,
        "C:/Users/XuLu/PycharmProjects/LagouJob/stopwords.txt",
        "C:/Users/XuLu/PycharmProjects/LagouJob/userdict.txt",
    )