This repository has been archived by the owner on Mar 25, 2023. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
newsAgent2.py
168 lines (138 loc) · 4.67 KB
/
newsAgent2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import re
import textwrap
from nntplib import NNTP, decode_header
from urllib.request import urlopen
class NewsItem:
def __init__(self, title, body):
"""
A simple news item consisting of a title and body text.
"""
self.title = title
self.body = body
class NewsAgent:
"""
An object that can distribute news items from news sources to news
destinations.
"""
def __init__(self):
self.sources = []
self.destinations = []
def add_source(self, source):
self.sources.append(source)
def add_destination(self, destination):
self.destinations.append(destination)
def distribute(self):
"""
Retrieve all news items from all sources, and Distribute them to all
destinations.
"""
items = []
for source in self.sources:
items.extend(source.get_items())
for dest in self.destinations:
dest.receive_items(items)
class PlainDestination:
"""
A news destination that formats all its news items as plain text.
"""
@staticmethod
def receive_items(items):
for item in items:
print(item.title)
print('-' * len(item.title))
print(item.body)
class NNTPSource:
"""
A news source that retrieves news items from an NNTP group.
"""
def __init__(self, servername, group, how_many):
self.servername = servername
self.group = group
self.how_many = how_many
def get_items(self):
server = NNTP(self.servername)
_, count, first, last, name = server.group(self.group)
start = last - self.how_many + 1
_, overviews = server.over((start, last))
for ID, over in overviews:
title = decode_header(over['subject'])
_, info = server.body(ID)
body = '\n'.join(line.decode('latin') for line in info.lines)
yield NewsItem(title, body)
server.quit()
class SimpleWebSource:
"""
A news source that extracts news items from a web page using regular
expressions.
"""
def __init__(self, url, title_pattern, body_pattern, encoding="utf-8"):
self.url = url
self.title_pattern = re.compile(title_pattern)
self.body_pattern = re.compile(body_pattern)
self.encoding = encoding
def get_items(self):
text = urlopen(self.url).read().decode(self.encoding)
titles = self.title_pattern.findall(text)
bodies = self.body_pattern.findall(text)
for title, body in zip(titles, bodies):
yield NewsItem(title, textwrap.fill(body) + '\n')
class HTMLDestination:
"""
A news destination that formats all its news items as HTML.
"""
def __init__(self, filename):
self.filename = filename
def receive_items(self, items):
out = open(self.filename, 'w')
print('''<!DOCTYPE html>
<html lang="en">
<head>
<title>Today's News</title>
<meta charset="UTF-8">
<link rel="stylesheet" href="format.css" type="text/css"/>
</head>
<body>
<h1>Today's News</h1>
''', file=out)
print('<ul>', file=out)
ids = 0
for item in items:
ids += 1
print(' <li><a href="#{}">{}</a></li>'
.format(ids, item.title), file=out)
print('</ul>', file=out)
ids = 0
for item in items:
ids += 1
print('<h2 id="{}">{}</h2>'
.format(ids, item.title), file=out)
print('<pre>{}</pre>'.format(item.body), file=out)
print("""
</body>
</html>
""", file=out)
out.close()
def run_default_setup():
"""
A default setup of sources and destination. Modify to taste.
"""
agent = NewsAgent()
# A SimpleWebSource that retrieves news from Reuters:
reuters_url = 'http://www.reuters.com/news/world'
reuters_title = r'<h2><a href="[^"]*"\s*>(.*?)</a>'
reuters_body = r'</h2><p>(.*?)</p>'
reuters = SimpleWebSource(reuters_url, reuters_title, reuters_body)
agent.add_source(reuters)
# An NNTPSource that retrieves news from comp.lang.python.announce:
clpa_server = 'nntp.aioe.org'
clpa_group = 'comp.lang.python.announce'
clpa_howmany = 10
clpa = NNTPSource(clpa_server, clpa_group, clpa_howmany)
agent.add_source(clpa)
# Add plain-text destination and an HTML destination:
agent.add_destination(PlainDestination())
agent.add_destination(HTMLDestination('news.html'))
# Distribute the news items:
agent.distribute()
if __name__ == '__main__':
run_default_setup()