forked from FaustXVI/poche-to-calibre
/
pocheToCalibre.py
60 lines (52 loc) · 2.02 KB
/
pocheToCalibre.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
class NYTimes(BasicNewsRecipe):
appURL = 'http://app.inthepoche.com'
title = 'Poche'
__author__ = 'Xavier Detant'
description = 'Ma poche'
needs_subscription = True
remove_tags_before = dict(id='article')
remove_tags_after = dict(id='article')
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username and self.password:
if self.appURL == 'http://app.inthepoche.com':
br.open(self.appURL + '/u/' + self.username)
else:
br.open(self.appURL)
br.select_form(name='loginform')
br['login'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
if self.appURL == 'http://app.inthepoche.com':
baseURL = self.appURL + '/u/' + self.username + '/'
else:
baseURL = self.appURL
soup = self.index_to_soup(baseURL + 'index.php')
articles = {}
key = None
ans = []
for div in soup.findAll(True, attrs={'class': ['entrie']}):
a = div.find('a', href=True)
if not a:
continue
key = self.tag_to_string(div.find(
'a', attrs={'class': ['reading-time']}))
url = baseURL + a['href']
title = self.tag_to_string(a, use_alt=False)
description = ''
pubdate = strftime('%a, %d %b')
summary = div.find('p')
if summary:
description = self.tag_to_string(summary, use_alt=False)
feed = key if key is not None else 'Uncategorized'
if not feed in articles.keys():
articles[feed] = []
articles[feed].append(dict(
title=title, url=url, date=pubdate,
description=description, content=''))
ans = [(key, articles[key]) for key in articles.keys()]
return ans