Example #1
0
from mymodule import stats_word
import requests
from pyquery import PyQuery

response = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
print(response.text)

document = PyQuery(response.text)
content = document('#js_content').text()
print(content)

result = print(stats_word.stats_text_cn(content, 100))

import getpass
sender = input('输入发件人邮箱:')
password = getpass.getpass('输入发件人邮箱密码:')
recipients = input('输入收件人邮箱:')

import yagmail
mail = yagmail.SMTP(sender, password, 'smtp.qq.com')
subject = '自学训练营学习1群 DAY11 samele0077'
mail.send(to=recipients, subject=subject, contents=result)
from mymodule import stats_word  
import json   # 读取本地文件模块


# string = 123456789
# try:
#     stats_word.stats_text(string)
# except ValueError as error:
#     print(error)


#读取本地文件,进行词频统计

with open(r'E:\xuefeng\selfteaching-python-camp\exercises\1901100277\Day 09\mymodule\tang300.json', 'r',encoding='utf-8') as f:
    #  https://www.liaoxuefeng.com/wiki/1016959663602400/1017607179232640  廖雪峰:文件的读写

    cn_text = f.read()  # 读取文件全部内容,如果文件比较大,会浪费很多系统资源,更优的选择是 读取部分文件.模块有介绍
    

print(stats_word.stats_text_cn(cn_text))




Example #3
0
from mymodule import stats_word
from os import path
import json

if __name__ == '__main__':
    file_path = path.join(path.dirname(path.abspath(__file__)), 'tang300.json')

    with open(file_path, 'r') as f:
        data = f.read()

    poems = ''
    for item in json.loads(data):
        poems += item.get('contents', '')
    
    print('统计中文词频:')
    print(stats_word.stats_text_cn(poems, 20))
Example #4
0
def test():
    article = get_article('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
    result = stats_word.stats_text_cn(article, 10)
    image_path = path.join(cwd, 'stats.png')
    generate_image(result, image_path)
Example #5
0
from mymodule import stats_word
import requests
response = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')

#将response.text用pyquery把链接中内容提取出来
from pyquery import PyQuery
document = PyQuery(response.text)
content = document('#js_content').text()

#使⽤用stats_word中的stats_text对提取结果进⾏行行分析和词频统计处理理(返回前100个词的 统计结果)
result = str(stats_word.stats_text_cn(content,20))

#开始制作图表部分
#import matplotlib.pyplot as plt
import numpy as np
#解决MAC无法正常显示问题  
import matplotlib  
import matplotlib.pyplot as plt
from pylab import mpl

# plt.rcParams['font.family'] = ['Light'] #正常显示中文 
# plt.rcParams['font.sans-serif'] = ['STFangsong']
# plt.rcParams['font.sans-serif'] = ['STFangsong']
# plt.rcParams['font.sans-serif'] = ['simhei']
# mpl.font_manager.FontProperties(fname='/System/Library/Fonts/PingFang.ttc')
# plt.rcParams['font.family'] = ['Arial Unicode MS'] #正常显示中文
# mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
plt.rcParams['font.family'] = ['Arial Unicode MS']
mpl.rcParams['axes.unicode_minus'] =  False
fig, ax = plt.subplots()
result_list=eval(result)
Example #6
0
from mymodule import stats_word
import requests
from pyquery import PyQuery
import getpass
import yagmail

r = requests.get("https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA")

document = PyQuery(r.text)
content = document('#js_content').text()

word_list = stats_word.stats_text_cn(content, 20)
mail_content = ""
for x in word_list:
    mail_content += "{}: {}\n".format(x[0], x[1])
print(mail_content)

sender = input('输⼊发件⼈邮箱:')
password = getpass.getpass('输⼊发件⼈邮箱密码(可复制粘贴):')
recipients = '*****@*****.**'
# recipients = input('输⼊收件⼈邮箱:')
"""
sender = '*****@*****.**'
password = '******'
recipients = '*****@*****.**'
"""

yag = yagmail.SMTP(user=sender, password=password, host='smtp.163.com')
# contents = [mail_content]
yag.send(recipients, '自学训练营学习9群 D11 adamlu008', mail_content)
from mymodule import stats_word

path = r'd:\用户目录\我的文档\GitHub\selfteaching-python-camp\exercises\1901010161\d11\mymodule\tang300.json'
with open(path, 'r', encoding='UTF-8') as f:  # byte编码的类型名称是 UTF-8

    read_date = f.read()

try:
    print('出现频率最高的前20个词: \n', stats_word.stats_text_cn(read_date, 20))
except ValueError:
    print('ValueError:type of argument is not string!')
#Filled with admiration for Yugong, the Emperor of Heavens ordered two mighty gods to carry the mountains away.
#'''

#text = [1,3,4,6,6] #using a list to check the exception handling. worked...

#from mymodule import stats_word # whole text words frequence stats (en + cn)
#try:
#    print('Day7全文词频统计结果: ', stats_word.stats_text(text))
#except ValueError:
#    print('Inappropriate argument value (of correct type)')
import os
import json

# with open("tang300.json","r+") as f: my thoughts not working
with open(
        os.path.join(os.path.dirname(os.path.abspath(__file__)),
                     'tang300.json')) as f:
    ''' day9 1. 导入json文件并读取文件内容'''
    read_file = f.read()
#if used f= open() ,then f.closed required, with here thus no f.closed

from mymodule import stats_word  # @wangrui thoughts copied and revised a little
''' 1. 捕获传入非字符串参数异常。
    2. day9调用stats_word.py中的stats_text_cn(),传入读取文件结果和输出限制参数。
'''
try:
    print('Day9top100词频统计结果: ', stats_word.stats_text_cn(read_file, 100))
except ValueError:
    print('Inappropriate argument value (of correct type)')
from mymodule import stats_word
import json
count = 100
with open('tang300.json', 'r', encoding='utf-8') as file:
    text = file.read()
    print(stats_word.stats_text_cn(text, count))

try:
    stats_word.stats_text_cn(text, count)
except ValueError:
    print('Invalid string')
#import os
#import json

#text1 = 1

#while True:
#try:
#stats_word.stats_text(text1)
#break
#except ValueError:
#print("导入字符非法")
#raise

from mymodule import stats_word as sw
with open('tang300.json', encoding='UTF-8') as poem:

    #with open('/Users/huyusu/Documents/GitHub/selfteaching-python-camp/19100302/huyusu/main.py',encoding='UTF-8') as poem:

    #with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tang300.json')) as poem:

    #with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tang300.json')) as f :

    read_file = poem.read()
    #read_file = f.read()

    poem.closed
    #f.closed

print('唐诗中词频前20的词和词频数:', sw.stats_text_cn(read_file, 20))
from mymodule import stats_word as s  #从模块中导入自定义函数
import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning
                         )  #禁用安全证书,解决InsecureRequestWarning

url = 'https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA'
response = requests.get(url)  # 最基本的GET请求

from pyquery import PyQuery
document = PyQuery(response.text)
content = document('#js_content').text()  #从网页中导出文本

results = s.stats_text_cn(content, 100)  #调用函数,统计词频
r_string = ''.join(str(i) for i in results)  #列表转化为字符串
print(r_string)

import yagmail  #通过邮箱发送内容
import getpass

sender = input('输入发件人邮箱:')
password = getpass.getpass('请输入发件人邮箱密码(可复制粘贴):')  #在填写授权码时,是不会出现东西的,只要一口气把它输完就好
recipients = input('输入收件人邮箱:')
yag = yagmail.SMTP(sender, password,
                   'smtp.163.com').send(recipients, '自学训练营3群+zhangmmmin',
                                        r_string)

try:  #try except  捕获异常
    print(s.stats_text_cn(content, 100))
except ValueError as err:
    print("err:not string ,try again")
year to travel back and forth once.
On the bank of the Yellow River dwelled an old man much respected for
his wisdom. When he saw their back-breaking labour, he ridiculed
Yugong saying,”Aren’t you foolish, my friend? You are very old now,
and with whatever remains of your waning strength, you won’t be able
to remove even a corner of the mountain.”
Yugong uttered a sigh and said,”A biased person like you will never
understand. You can’t even compare with the widow’s little boy!”
“Even if I were dead, there will still be my children, my
grandchildren, my great grandchildren, my great great grandchildren.
They descendants will go on forever. But these mountains will not
grow any taler. We shall level them one day!” he declared with
confidence.
The wise old man was totally silenced.
When the guardian gods of the mountains saw how determined Yugong and
his crew were, they were struck with fear and reported the incident
to the Emperor of Heavens.
Filled with admiration for Yugong, the Emperor of Heavens ordered two
mighty gods to carry the mountains away.
'''

with open('tang300.json') as f:
    read_file = f.read()
f.closed

from mymodule import stats_word
try:
    print('汉字字频最高的前20字统计结果:', stats_word.stats_text_cn(read_file, 20))
except ValueError as ve:
    print(ve)
Example #13
0
import requests
import getpass
import yagmail
from pyquery import PyQuery
from mymodule import stats_word

# 提取微信公众号正文
response = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
document = PyQuery(response.text)
content = document('#js_content').text()

# 应用 stats_word 方法提取前100个词
day11 = stats_word.stats_text_cn(content)

day11_1 = str(day11)
# print(day11_1)

# 设置邮箱
user = input('请输入你的邮箱:')  #邮箱账号
password = getpass.getpass('请输入发件人邮箱密码(可复制粘贴):')  #邮箱开通smtp服务授权码
recipient = input('请输入收件人邮箱:')
smtp = "smtp.163.com"  #服务器地址
# print(user,password,recipient)  #检查

# 发送邮件
yag = yagmail.SMTP(user, password, smtp)
yag.send(recipient, '19100102 lipeer', day11_1)
Example #14
0
# 这是一个调用stats_word.py模块,统计并输出一段文字中英文和中文词频的程序
'''导入stats_word模块'''
from mymodule import stats_word

with open('tang300.json') as a:
    filel = a.read()
    a.closed
'''调用stats_word模块中的stats_text函数'''

print('汉字单字出现的结果如下:')
print(stats_word.stats_text_cn(filel, 20))
Example #15
0
from mymodule import stats_word
from os import path

file_path = path.join(path.dirname(path.abspath(__file__)),
                      './tang300.json')  #获取同一文件夹下文件路径
with open(file_path, 'r', encoding='utf-8') as f:  #打开tang300.json
    #用try...except捕获异常
    try:
        print('文件中汉字词频最高的前20个词:\n', stats_word.stats_text_cn(f.read(), 20))
    except ValueError:
        print('ValueError: Oops! That was no valid string.')
Example #16
0
 def handler(msg):
     article = get_article(msg.url)
     result = stats_word.stats_text_cn(article, 100)
     msg.reply(str(result))
Example #17
0
def test():
    article = get_article('https://mp.weixin.qq.com/s/gl6VLZ9KNoKFkk-Uh7Dabw')
    result = stats_word.stats_text_cn(article,20)
    image_path = path.join(cwd,'stats.png')
    generate_image(result,image_path)
import numpy as np
from mymodule import stats_word
from pyquery import PyQuery as py
import requests
import matplotlib.pyplot as plt

reponse = requests.get('https://mp.weixin.qq.com/s/_oFklhozwgz_1QnB_pLioA')   # 网页请求
web_text = reponse.text    # 保存更多网页文本数据
document = py(web_text)
content = document('#js_content').text()

w_list = stats_word.stats_text_cn(content, 10)
w_list = dict(w_list)
# group_data = list(w_list.values())
group_data = tuple(w_list.values())
group_names = list(w_list.keys())
# plt.rcdefaults()
fig, ax = plt.subplots()  # 建立一个figure对象,建立一个axis对象
y_pos = np.arange(len(group_names))
ax.barh(y_pos, group_data, align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(group_names)
ax.invert_yaxis()
ax.set_xlabel('词频')
ax.set_title('网页中TOP10中文词语')
plt.show()

# plt.savefig(r'wordsCnt.jpeg')  #保存图片
# msg.reply_image(r'wordsCnt.jpeg')  #回复图片
Example #19
0
import json
from mymodule import stats_word

text1 = {}

with open('tang300.json', encoding='utf-8') as f:
    text = f.read()
try:
    print("词频最高的前20个词", stats_word.stats_text_cn(text, 20))
except ValueError as ve:
    print(ve)
response = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')

#将response.text,用pyquery提取正文
from pyquery import PyQuery
document = PyQuery(response.text)
content = document('#js_content').text()
#print(type(content))

#用stats_word中的stats_text对提取结果进行词频分析,结果转换成str
from mymodule import stats_word
str_con = str(content)
#print(type(str_con))
#print(str_con)
#print('字频最高的前100字统计结果: ', stats_word.stats_text_cn(str_con))
#现在的问题是换行符号也给统计进去了,暂未解决。正则表达没看懂也许有关系。
str_result = str(stats_word.stats_text_cn(str_con))
#print(str_result)

#登录邮箱发邮件 参考@slona-song同学的作业和issue#1057
import yagmail
import getpass

#sender = input('发件邮箱:')
sender = '*****@*****.**'
password = getpass.getpass('邮箱授权密码:')  #这里应该输入开通smtp的时候设置的授权密码,并非邮箱登录密码!!!
#recipients = input('收件邮箱')
recipients = '*****@*****.**'
sever = 'smtp.163.com'  #使用的服务器

yag = yagmail.SMTP(sender, password, host=sever)
#contents = [str_result]
Example #21
0
'''这是一个通过网络请求获得网页内容,使用分词工具对中文字符串
进行分词,统计词频,得出结果,并发送到指定邮箱的程序'''
import requests
import pyquery
from pyquery import PyQuery
from mymodule import stats_word
'''访问网址'''
image_url = "https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA"
'''将网址中的内容全部赋值给response'''
response = requests.get(image_url)
'''提取网址中的正文内容'''
document = pyquery.PyQuery(response.text)
content = document('#js_content').text()
statList = stats_word.stats_text_cn(content, 100)
statstring = ''.join(str(i) for i in statList)

import getpass

sender = input('输入发件人邮箱:')
password = getpass.getpass('输入发件人邮箱密码(可复制粘贴):')
recipients = input('输入收件人邮箱:')

import yagmail

yag = yagmail.SMTP(user=sender, password=password, host='smtp.qq.com')

yag.send(recipients, '19100305 luokaiwen1022主题:张小龙微信公开课演讲稿中文词频前100名统计',
         statstring)
import mymodule
with open('tang300.json') as t:
    ''' 1. 导入json文件并读取文件内容'''
    read_file = t.read()

from mymodule import stats_word
''' 1. 捕获传入非字符串参数异常。
    2. 调用stats_word.py中的stats_text_cn(),传入读取文件结果和输出限制参数。
'''
try:
    print('100中文词频统计结果:', stats_word.stats_text_cn(read_file, 100))
except ValueError as ve:
    print(ve)
Example #23
0
import json
import os
import sys
from pyquery import PyQuery
import yagmail
import getpass
#能加载的全都加上

r = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
response = r.text

#document = PyQuery(response.text)
#对不起,我之后再研究PyQuery

#print('字频最高的前100字统计结果: ', stats_word.stats_text_cn(response))
result_list = stats_word.stats_text_cn(response)
#print(type(result_list))

#result_str = "".join(result_list)
'''for i in result_list:
    print result_list[i].__str__())
''' #tup没有成功转换成str

sender = input('发件邮箱:')
password = getpass.getpass('邮箱密码')
recipients = input('收件邮箱')

yag = yagmail.SMTP()
contents = [result_list]
yag.send('*****@*****.**', '19100303 Luchen1471', result_list)
Example #24
0
grow any taler. We shall level them one day!” he declared with
confidence.
The wise old man was totally silenced.
When the guardian gods of the mountains saw how determined Yugong and
his crew were, they were struck with fear and reported the incident
to the Emperor of Heavens.
Filled with admiration for Yugong, the Emperor of Heavens ordered two
mighty gods to carry the mountains away.
'''

chao = {}

from mymodule import stats_word
import os

# 第9天,读取文件并排序
with open(
        os.path.join(os.path.dirname(os.path.abspath(__file__)),
                     'tang300.json')) as nine:
    read_data = nine.read()
    nine.closed

    result = stats_word.stats_text_cn(read_data)
    #print("yangchao\n",type (result))
    print(result.most_common(20))

try:
    #print("全部结果为:",stats_word.stats_text(chao))
    pass
except ValueError as err:
    print(err)
Example #25
0
# _*_ coding:utf-8 _*_

from mymodule import stats_word as sw
with open('tang300.json', encoding='UTF-8') as poem:
    read_file = poem.read()
poem.closed

print('最多的20个词:', sw.stats_text_cn(read_file, 20))
# 1. 获取到网页内容
# 2. 使用以前写的词频统计
# 3. 发送email到自己的邮箱

from mymodule.stats_word import stats_text_cn
from mymodule.utils import request, send_email
import json
# print(r.status_code)

# print(r.headers['content-type'])

# print(r.encoding)
text = request('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
res = stats_text_cn(text, 100)
res = json.dumps(res, ensure_ascii=False)
send_email(res)
print(res)
# print(r.text)
def text1_traceback():
    try:
        print(stats_word.stats_text_cn(data, 100))
    except Exception as e:
        print('text1_traceback =>', e)
        print(traceback.format_exc())
from mymodule import stats_word
import json
#路径前+r,避免转义。
with open(r'C:\Users\CS-Mu\Documents\selfteaching-python-camp\exercises\1901010061\d09\tang300.json',encoding='UTF-8') as f: 
    read_date = f.read()
try:
    print('统计前20的词频数: \n',stats_word.stats_text_cn(read_date,20))
except ValueError as e:
    print(e) 

from mymodule import stats_word

import json

from os import path

file_path = path.join(path.dirname(path.abspath(__file__)),'tang300.json')


with open(file_path,'r',encoding='utf-8') as f:
    text = f.read()
    #print(type(text))
    print(stats_word.stats_text_cn(text, 100)) 

#(text,100)
try:
    text
except ValueError:
    print("error:文本为非字符串")   





Example #30
0
from mymodule import stats_word
import requests

response = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
from pyquery import PyQuery

document = PyQuery(response.text)
content = document('#js_content').text()

output = stats_word.stats_text_cn(content, 100)
output_str = str(output)
print(output_str, type(output_str))

import yagmail
import getpass

sender = input('输入发件人邮箱:')
password = getpass.getpass('输入发件人邮箱密码:')
recipients = input('输入收件人邮箱:')
host = 'smtp.163.com'
yag = yagmail.SMTP(sender, password, host)
subject = '19100401 Newonefromhere'
yag.send(to=recipients, subject=subject, contents=output_str)