/
rpmcache.py
executable file
·193 lines (173 loc) · 6.77 KB
/
rpmcache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/env python3
""" rpmcache - cache downloaded packages for other machines in your lan
rpmcache is a caching proxy for rpm packages, to be used as proxy by
eg. dnf. It should also work for other (eg. deb) packages, the only
special files are metadata files (config option 'md_files', eg.
repomd.xml) that are fetched again after a configurable time (config
option 'md_keep', in minutes) to be able to get updates.
"""
import os
import sys
import time
import mimetypes
import uwsgi
import pycurl
# from StringIO import StringIO
# from cgi import parse_qs, escape
# just for testing/debugging
# from pprint import pformat
CONFIG = {
'cache_dir': '/var/cache/rpmcache',
'log_level': 3, # 0 = silence, 4 = debug
'use_color': False, # colorized output for terminal
'md_files': [ # list of metadata files
'repomd.xml', # main repo metadata file
# Fedora uses $id-{filelists,primary}.xml.gz (==> needs cleanup)
# Google doesn't use an id
'filelists.xml.gz', # needed for google-chrome repo
'primary.xml.gz', # needed for google-chrome repo
# .deb based distros, probably incomplete
'InRelease', # file with hashes and other md_files
'Packages.gz', # package list
'Packages.xz', # package list
],
'md_keep': 360, # how many minutes to cache metadata files
}
def log(msg):
""" Log message.
Log format is '<level>: <message>'.
Level is one of 'D'(ebug), 'I'(nfo), 'W'(arn), 'E'(rror)
Message is only printed if CONFIG['log_level'] >= message level.
Use ANSI color codes if CONFIG['use_color'] is True.
"""
level = msg.split(':')[0]
text = ':'.join(msg.split(':')[1:])
num = {'D': 4, 'I': 3, 'W': 2, 'E': 1}
color = {
'D': '\033[01;30m', 'I': '\033[01;37m',
'W': '\033[01;33m', 'E': '\033[01;31m',
'off': '\033[00m'
}
if CONFIG['log_level'] >= num[level]:
if CONFIG['use_color']:
# print(color[level] + level + color['off'] + ':' + text)
# print(color[level] + level + ':' + text + color['off'])
sys.stdout.write(color[level] + level + ':' + text +
color['off'] + '\n')
sys.stdout.flush()
else:
print(msg)
return True
else:
return False
# example request from dnf upgrade
# GET ftp://ftp.informatik.uni-frankfurt.de/pub/Mirrors/fedora\
# /updates/22/x86_64/repodata/repomd.xml
def localfile(url):
"""Return a local file path/name from given url."""
dest = '/'.join(url.split('/')[2:])
# TODO: sanitize dest (../../) to prevent writing outside cache_dir
dest = '/'.join([CONFIG['cache_dir'], dest])
return dest
def get_url(url):
"""Download a file from url to cache_dir."""
# set a lock to prevent multiple simultaneous downloads of the same file
mypid = os.getpid()
uwsgi.lock()
otherpid = uwsgi.cache_get(url)
if otherpid:
uwsgi.unlock()
while otherpid:
log('D: [%d] waiting for pid %s to download %s' %
(mypid, otherpid, url))
time.sleep(1)
otherpid = uwsgi.cache_get(url)
return 200
else:
uwsgi.cache_set(url, str(mypid))
uwsgi.unlock()
dest = localfile(url)
log('D: [%d] downloading %s to %s' % (mypid, url, dest))
curl = pycurl.Curl()
curl.setopt(curl.URL, url)
curl.setopt(curl.FOLLOWLOCATION, True)
path = '/'.join(dest.split('/')[:-1])
if not os.path.exists(path):
# parallel download of rpms in subdir will create it right now
try:
os.makedirs(path)
except OSError as e:
# this catches duplicate creation (so just W not E)
# TODO: need to bypass the open() on real errors
# like permissions
log('W: [%d] OS error(%d): %s' %
(mypid, e.errno, e.strerror))
with open(dest, 'wb') as fil:
curl.setopt(curl.WRITEFUNCTION, fil.write)
curl.perform()
uwsgi.cache_del(url)
return curl.getinfo(curl.HTTP_CODE)
def application(env, start_response):
"""Process request that is given via WSGI."""
# log("D: env=%s" % pformat(env.items()))
must_fetch = False
url = env.get('REQUEST_URI')
log("I: client %s (%s) GET %s" % (env.get('REMOTE_ADDR'),
env.get('HTTP_USER_AGENT'),
url))
lfile = localfile(url)
# metadata files must expire or we will never get updates again
lfile_name = lfile.split('/')[-1]
if lfile_name in CONFIG['md_files'] and os.path.exists(lfile):
mtime = os.path.getmtime(lfile)
now = float(time.strftime('%s'))
delta = mtime + 60 * CONFIG['md_keep'] - now
if delta < 0:
must_fetch = True
log('D: %s mtime=%s now=%s md_keep=%s delta=%s '
'must_fetch=%s' %
(lfile_name, mtime, float(now),
60 * float(CONFIG['md_keep']), delta, must_fetch))
if os.path.isdir(lfile): # a little safeguard for browser
start_response('422 Unprocessable Entry',
[('Content-Type', 'text/plain')])
return ['Directory listing not supported.\n'
'This is rpmcache, use a proxy= line '
'in dnf.conf to use it.']
if not os.path.exists(lfile) or must_fetch:
log("W: fetching %s" % lfile)
response = get_url(url)
if response == 404:
log("E: response=%s for %s" % (response, url))
start_response('404 NOT FOUND',
[('Content-Type', 'text/plain')])
os.unlink(localfile(url))
# TODO: try other mirror?
return ['Not found']
else:
log("D: response=%s" % response)
else:
log("I: cache hit %s" % lfile)
(mime_type, encoding) = mimetypes.guess_type('file://' + lfile)
log("D: mime_type=%s encoding=%s" % (mime_type, encoding))
size = os.path.getsize(lfile)
log("D: size=%s" % size)
start_response('200 OK', [
('Content-Type', str(mime_type)),
('Content-Length', str(size)),
('Content-Encoding', str(encoding)),
('Content-Disposition', 'attachment; filename=' +
str(lfile_name))
])
log("I: send file %s" % lfile)
fil = open(lfile, 'rb')
if 'wsgi.file_wrapper' in env:
return env['wsgi.file_wrapper'](fil, 4096)
else:
return iter(lambda: fil.read(4096), '')
if __name__ == '__main__':
def start_response(code, header):
""" Define start_restponse when run as non-wsgi """
print(code)
print(header)
print(application({'REQUEST_URI': '/'}, start_response))