def printDanmu(dmlist, av): filename = "av" + str(av) + ".txt" print("Loading...\n") try: with codecs.open(filename, 'w', "utf-8") as t: for dm in dmlist: t.write(dm.string + '\n') analyse(filename, av) except Exception, e: logger.error('Failed to upload to ftp: ' + str(e))
def total_analyse(data, dirname='beifen'): pl = os.listdir(dirname) for p in pl: path = dirname + '\\' + p f = open(path, 'rb') scale = pickle.load(f) f.close() for i in range(4): print '* * * * ' * 6 analyse(data, scale, i) print '* * * * ' * 6
def total_analyse(data,dirname='beifen'): pl=os.listdir(dirname) for p in pl: path=dirname+'\\'+p f=open(path,'rb') scale=pickle.load(f) f.close() for i in range(4): print '* * * * '*6 analyse(data,scale,i) print '* * * * '*6
def main(): # func = input(''' # 1: Calculate password; # 2: Analyse WeChat message. # ''') func = '2' if func == '1': IMEI = input('IMEI: ') # 794825438204445 UID = input('UID: ') # -1193869337 password = getPsw(IMEI, UID) # 74ee691 print(password) elif func == '2': knownType = checkType() result = analyse(knownType) with open('results\\result.json', 'w', encoding='utf8') as j: json.dump(result, j, ensure_ascii=False) print('Result.json saved successfully.') else: print('Try again.') main()
def draw_word_cloud(cid_list, name): ''' 绘制词云 :param cid 视频cid编号 :param name 人物名 ''' lst = analyse(cid_list) words_count_list_TR = lst[0] words_count_list_TI = lst[1] word_cloud_TR = ( WordCloud(init_opts=opts.InitOpts(theme=ThemeType.ROMA)) .add("", words_count_list_TR, word_size_range=[20, 50], shape=SymbolType.RECT) .set_global_opts(title_opts=opts.TitleOpts(title="{}词云TOP50".format(name), subtitle="基于TextRank算法的关键词抽取")) .render('{}_WordCloud_TR.html'.format(name)) ) word_cloud_TI = ( WordCloud(init_opts=opts.InitOpts(theme=ThemeType.LIGHT)) .add("", words_count_list_TI, word_size_range=[20, 100], shape=SymbolType.DIAMOND) .set_global_opts(title_opts=opts.TitleOpts(title="{}词云TOP50".format(name), subtitle="基于TF-IDF算法的关键词抽取")) .render('{}_WordCloud_TI.html'.format(name)) ) print('====={}词云绘制完毕====='.format(name))
# coding:utf-8 import jieba import jieba.analyse def analyse(): content = open('Readhub.md').read() tags = jieba.analyse.extract_tags(content,topK=100) print ' '.join(tags) if __name__ == '__main__': analyse()
withWeight=True, allowPOS=('n')) # 带词频 for tag in tags: print(tag[0] + ',' + str(tag[1])) print('===========================') # 不带词频,仅用于生成TagCloud的... for tag in tags: print(tag[0], end=',') def get_content(txtdir): content = [] txts = os.listdir(txtdir) for each_txt in txts: with open(txtdir + os.sep + each_txt, mode='rt', encoding='utf-8') as f: str = "".join(f.readlines()) content.append(str) f.close() return "".join(content) if __name__ == '__main__': content_txt = get_content('D:\LagouJobInfo\lagou\details\网络爬虫') analyse(content_txt, 'C:/Users/XuLu/PycharmProjects/LagouJob/stopwords.txt', 'C:/Users/XuLu/PycharmProjects/LagouJob/userdict.txt')
else: tongji[u'' + v] = int(n * 10000) finally: pass #要分析多少行数据,在这里写,配合SQL语句使用 for x in range(12, 100000, 100000): start = time.time() content = check_state(str(x)) end = time.time() escape = end - start print('本次数据库读取时间:' + str(escape)) start = time.time() analyse(content) end = time.time() escape = end - start print('本次词频提取用时:' + str(escape)) print 'do a work' #此处也可以直接打印结果 #result = sorted(tongji.items(), key = lambda x: x[1], reverse=True) #fre = json.dumps(result, ensure_ascii=False, encoding='UTF-8') #绘制词云图 wordcloud = WordCloud( font_path="simfang.ttf", background_color='White').generate_from_frequencies(tongji) import matplotlib.pyplot as plt plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off")
#!/usr/bin/python # -*- coding: UTF-8 -*- import jieba seg_list = jieba.analyse('我来到北京清华大学') print seg_list
jieba.load_userdict(userdictpath) # 加载停用词 jieba.analyse.set_stop_words(stopwordspath) tags = jieba.analyse.extract_tags(strcontent, topK=20, withWeight=True, allowPOS=('n')) # 带词频 for tag in tags: print(tag[0] + ',' + str(tag[1])) print('===========================') # 不带词频,仅用于生成TagCloud的... for tag in tags: print(tag[0], end=',') def get_content(txtdir): content = [] txts = os.listdir(txtdir) for each_txt in txts: with open(txtdir + os.sep + each_txt, mode='rt', encoding='utf-8') as f: str = "".join(f.readlines()) content.append(str) f.close() return "".join(content) if __name__ == '__main__': content_txt = get_content('D:/LagouJobInfo/lagou/details/计算机视觉') analyse(content_txt, 'D:/Users/LucasX/PycharmProjects/LagouJob/stopwords.txt', 'D:/Users/LucasX/PycharmProjects/LagouJob/userdict.txt')
tags = jieba.analyse.extract_tags(strcontent, topK=20, withWeight=True, allowPOS=("n")) # 带词频 for tag in tags: print(tag[0] + " : " + str(tag[1])) print("===========================") # 不带词频,仅用于生成TagCloud的... for tag in tags: print(tag[0], end=",") def get_content(txtdir): content = [] txts = os.listdir(txtdir) for each_txt in txts: with open(txtdir + os.sep + each_txt, mode="rt", encoding="utf-8") as f: str = "".join(f.readlines()) content.append(str) f.close() return "".join(content) if __name__ == "__main__": content_txt = get_content("D:/datamining") analyse( content_txt, "C:/Users/XuLu/PycharmProjects/LagouJob/stopwords.txt", "C:/Users/XuLu/PycharmProjects/LagouJob/userdict.txt", )