forked from hnrss/hnrss
/
rss.py
77 lines (62 loc) · 3.46 KB
/
rss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import re
import time
from xml.sax.saxutils import unescape as sax_unescape
from lxml import etree
def unescape(s):
deref_ncr = lambda m: unichr(int(m.group(1), 16)) # '/' -> '/'
s = re.sub('&#[Xx]([A-Fa-f0-9]+);', deref_ncr, s)
entities = {'"': '"', ''': "'"}
return sax_unescape(s, entities)
class RSS(object):
def __init__(self, api_response, title, link='https://news.ycombinator.com/'):
self.api_response = api_response
self.rss_root = etree.Element('rss', version='2.0')
self.rss_channel = etree.SubElement(self.rss_root, 'channel')
self.add_element(self.rss_channel, 'title', title)
self.add_element(self.rss_channel, 'link', link)
self.add_element(self.rss_channel, 'description', 'Hacker News RSS')
self.add_element(self.rss_channel, 'docs', 'https://edavis.github.io/hnrss/')
self.add_element(self.rss_channel, 'generator', 'https://github.com/edavis/hnrss')
self.add_element(self.rss_channel, 'lastBuildDate', self.generate_rfc2822())
self.generate_body()
def generate_body(self):
for hit in self.api_response['hits']:
rss_item = etree.SubElement(self.rss_channel, 'item')
hn_url = 'https://news.ycombinator.com/item?id=%s' % hit['objectID']
tags = hit.get('_tags', [])
if 'comment' in tags:
if hit.get('story_title') and hit.get('comment_text'):
self.add_element(rss_item, 'title', 'New comment by %s in "%s"' % (
hit.get('author'), hit.get('story_title')))
self.add_element(rss_item, 'description', unescape(hit.get('comment_text')))
else:
if hit.get('title'):
self.add_element(rss_item, 'title', hit.get('title'))
if hit.get('story_text'):
self.add_element(rss_item, 'description', unescape(hit.get('story_text')))
elif self.api_response['description'] and self.api_response['link_to'] == 'url':
self.add_element(rss_item, 'description', 'Comments URL: <a href="%(hn_url)s">%(hn_url)s</a>' % {'hn_url': hn_url})
elif self.api_response['description'] and self.api_response['link_to'] == 'comments':
self.add_element(rss_item, 'description', 'Article URL: <a href="%(url)s">%(url)s</a>' % {'url': hit.get('url') or hn_url})
self.add_element(rss_item, 'pubDate', self.generate_rfc2822(hit.get('created_at_i')))
if self.api_response['link_to'] == 'comments':
self.add_element(rss_item, 'link', hn_url)
else:
self.add_element(rss_item, 'link', hit.get('url') or hn_url)
self.add_element(rss_item, 'author', hit.get('author'))
if ('story' in tags or 'poll' in tags):
self.add_element(rss_item, 'comments', hn_url)
self.add_element(rss_item, 'guid', hn_url, isPermaLink='false')
def response(self):
rss_xml = etree.tostring(
self.rss_root, pretty_print=True, encoding='UTF-8', xml_declaration=True,
)
return (rss_xml, 200, {'Content-Type': 'text/xml'})
def add_element(self, parent, tag, text, **attrs):
el = etree.Element(tag, attrs)
el.text = text
parent.append(el)
return el
def generate_rfc2822(self, secs=None):
t = time.gmtime(secs)
return time.strftime('%a, %d %b %Y %H:%M:%S GMT', t)