/
pocket_podcast.py
executable file
·86 lines (68 loc) · 2.4 KB
/
pocket_podcast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/python3
from podcast import YamlPodcast
from pocketpy.pocket import retrieve
from pocketpy.tags import add_tags
import re
import json
import requests
import pathlib
from urllib.parse import urlsplit
def download_base_file(url, out_folder=pathlib.Path('/home/dlu/public_html/podcast/')):
if 'podtrac' in url:
new_url = url[url.rindex('/') + 1:]
split = urlsplit(new_url)
base = split.path
else:
split = urlsplit(url)
base = pathlib.Path(split.path).name
response = requests.get(url)
outfile = out_folder / base
with open(outfile, 'wb') as f:
f.write(response.content)
return base
class NPR:
URL_PATT = re.compile(r'npr\.org')
T_PAT = re.compile(r'<title>(.*)</title>')
M_PAT = re.compile(r'<li class="audio-tool audio-tool-download">\s*<a href="([^"]*)"')
class WBUR:
URL_PATT = re.compile(r'wbur\.org')
T_PAT = re.compile(r'<title>(.*)</title>')
M_PAT = re.compile(r'<a href="([^"]*)" class="article-audio-dl" title="Download the audio"')
class WESA:
URL_PATT = re.compile(r'wesa\.fm')
T_PAT = re.compile(r'<title>(.*) \| 90.5 WESA</title>')
M_PAT = re.compile(r'<div class="ArtP-audioPlayer">\s*'
r'<ps-stream\s+[^>]+>\s*'
r'<ps-stream-url[^>]+data-stream-url="([^"]+)">',
re.DOTALL)
PATTERNS = [NPR, WBUR, WESA]
yaml = '/home/dlu/public_html/podcast/david_misc.yaml'
podcast = YamlPodcast(yaml)
parent_folder = pathlib.Path(__file__).parent
config_path = parent_folder / '.creds'
config = json.load(open(config_path))
for key, entry in retrieve(config, verbose=True).items():
if 'podcast' in entry.get('tags', {}):
continue
try:
url = entry.get('resolved_url', entry.get('given_url', None))
if url is None:
continue
for pattern in PATTERNS:
if not pattern.URL_PATT.search(url):
continue
page = requests.get(url).text
m = pattern.M_PAT.search(page)
if not m:
continue
fn = download_base_file(m.group(1))
m2 = pattern.T_PAT.search(page)
if m2:
title = m2.group(1)
else:
title = m.group(1)
podcast.add_episode(title, fn, '')
add_tags(config, [key], 'podcast')
except Exception:
raise
podcast.write_to_file()