-
Notifications
You must be signed in to change notification settings - Fork 0
/
kq.py
executable file
·112 lines (92 loc) · 2.91 KB
/
kq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/python
#-*- coding: utf-8 -*-
import os
os.chdir('/home/caozhzh/work/rss')
import re
import urllib
from xgoogle.BeautifulSoup import BeautifulSoup
from xgoogle.browser import Browser, BrowserError
from xgoogle.GeneralFetch import GeneralFetch
url = "http://blog.sina.com.cn/u/1696709200"
b = Browser()
page = b.get_page(url)
page = page.replace('<!–[if lte IE 6]>','')
page = page.replace('<![endif]–>','')
#print page
be = BeautifulSoup(page)
div = be.find('div', {'class': 'diywidget'})
txt = ''.join(div.findAll(text=True))
#print type(txt)
import feedparser
origin_feed = feedparser.parse('http://blog.sina.com.cn/rss/1696709200.xml')
from feedformatter import Feed
import time
import datetime
import uuid
# Create the feed
feed = Feed()
# Set the feed/channel level properties
feed.feed["title"] = u"孔青老师信息公告"
feed.feed["link"] = u"http://blog.sina.com.cn/u/1696709200"
feed.feed["author"] = u"自动获取工具"
feed.feed["description"] = u"此RSS是由自动获取新浪博客信息公告的工具产生的,作者无法保证正确性,请以孔青老师博客公布的信息为准"
import json
lastmd5=""
try:
with open('lastmd5.txt', 'r') as f:
lastmd5 = json.load(f)
except:
pass
notices=[]
try:
with open('notices.txt', 'r') as f:
notices = json.load(f)
except:
pass
import hashlib
md5=hashlib.md5()
md5.update(txt.encode('ascii', 'ignore'))
md5.digest()
#print lastmd5
#print md5.hexdigest()
if lastmd5 != md5.hexdigest() :
# Create an item
item = {}
tm = datetime.datetime.now()
item["title"] = tm.strftime("%a, %d %b %Y %H:%M:%S %Z") + u"获取到的新信息公告"
item["link"] = "http://blog.sina.com.cn/u/1696709200"
item["description"] = txt
item["pubDate"] = datetime.datetime.now().strftime('%a, %d %b %Y %H:%M:%S %Z')#timetuple
item["guid"] = str(uuid.uuid3(uuid.NAMESPACE_X500,str(tm)))
notices.insert(0, item)
with open('notices.txt', 'a+') as f:
json.dump( notices, f )
lastmd5 = md5.hexdigest()
with open('lastmd5.txt', 'w') as f:
json.dump(lastmd5, f)
# Add item to feed
# You can do this as many times as you like with multiple items
i = 0
while i < len(notices) :
feed.items.append(notices[i])
i = i + 1
i = 0
while ( i < len(origin_feed.entries) ):
str1=origin_feed.entries[i].published
str2=re.sub(r"[+-]([0-9])+", "", str1)
dt = datetime.datetime.strptime(str2, '%a, %d %b %Y %H:%M:%S ')
tm = dt.timetuple()
origin_feed.entries[i].published = tm
origin_feed.entries[i].updated = tm
feed.items.append(origin_feed.entries[i])
i = i + 1
# Print the feed to stdout in various formats
#print feed.format_rss1_string()
#print feed.format_rss2_string()
#print feed.format_atom_string()
# Save the feed to a file in various formats
#feed.format_rss1_file("example_feed_rss1.xml")
feed.format_rss2_file("rss2.xml")
import os
cmd = 'cp rss2.xml /var/www'
os.system(cmd)