def fetch(self, pac, prefix=''): # for use by the failure callback self.curpac = pac MirrorGroup._join_url = join_url mg = MirrorGroup(self.gr, pac.urllist, failure_callback=(self.failureReport, (), {})) if self.http_debug: print('\nURLs to try for package \'%s\':' % pac, file=sys.stderr) print('\n'.join(pac.urllist), file=sys.stderr) print(file=sys.stderr) try: with tempfile.NamedTemporaryFile(prefix='osc_build', delete=False) as tmpfile: mg.urlgrab(pac.filename, filename=tmpfile.name, text='%s(%s) %s' % (prefix, pac.project, pac.filename)) self.move_package(tmpfile.name, pac.localdir, pac) except URLGrabError as e: if self.enable_cpio and e.errno == 256: self.__add_cpio(pac) return print() print('Error:', e.strerror, file=sys.stderr) print('Failed to retrieve %s from the following locations ' '(in order):' % pac.filename, file=sys.stderr) print('\n'.join(pac.urllist), file=sys.stderr) sys.exit(1) finally: if os.path.exists(tmpfile.name): os.unlink(tmpfile.name)
class FailoverTests(TestCase): def setUp(self): self.g = URLGrabber() fullmirrors = [base_mirror_url + m + '/' for m in \ (bad_mirrors + good_mirrors)] self.mg = MirrorGroup(self.g, fullmirrors) def test_simple_grab(self): """test that a the MG fails over past a bad mirror""" filename = tempfile.mktemp() url = 'reference' elist = [] def cb(e, elist=elist): elist.append(e) self.mg.urlgrab(url, filename, failure_callback=cb) fo = open(filename) contents = fo.read() fo.close() # first be sure that the first mirror failed and that the # callback was called self.assertEqual(len(elist), 1) # now be sure that the second mirror succeeded and the correct # data was returned self.assertEqual(contents, reference_data)
class BasicTests(TestCase): def setUp(self): self.g = URLGrabber() fullmirrors = [base_mirror_url + m + '/' for m in good_mirrors] self.mg = MirrorGroup(self.g, fullmirrors) def test_urlgrab(self): """MirrorGroup.urlgrab""" filename = tempfile.mktemp() url = 'short_reference' self.mg.urlgrab(url, filename) fo = open(filename) data = fo.read() fo.close() self.assertEqual(data, short_reference_data) def test_urlread(self): """MirrorGroup.urlread""" url = 'short_reference' data = self.mg.urlread(url) self.assertEqual(data, short_reference_data) def test_urlopen(self): """MirrorGroup.urlopen""" url = 'short_reference' fo = self.mg.urlopen(url) data = fo.read() fo.close() self.assertEqual(data, short_reference_data)
def fetch(self, pac, prefix=''): # for use by the failure callback self.curpac = pac MirrorGroup._join_url = join_url mg = MirrorGroup(self.gr, pac.urllist, failure_callback=(self.failureReport,(),{})) if self.http_debug: print >>sys.stderr, '\nURLs to try for package \'%s\':' % pac print >>sys.stderr, '\n'.join(pac.urllist) print >>sys.stderr (fd, tmpfile) = tempfile.mkstemp(prefix='osc_build') try: try: mg.urlgrab(pac.filename, filename = tmpfile, text = '%s(%s) %s' %(prefix, pac.project, pac.filename)) self.move_package(tmpfile, pac.localdir, pac) except URLGrabError, e: if self.enable_cpio and e.errno == 256: self.__add_cpio(pac) return print print >>sys.stderr, 'Error:', e.strerror print >>sys.stderr, 'Failed to retrieve %s from the following locations (in order):' % pac.filename print >>sys.stderr, '\n'.join(pac.urllist) sys.exit(1) finally: os.close(fd) if os.path.exists(tmpfile): os.unlink(tmpfile)
def fetch(self, pac, prefix=''): # for use by the failure callback self.curpac = pac MirrorGroup._join_url = join_url mg = MirrorGroup(self.gr, pac.urllist, failure_callback=(self.failureReport, (), {})) if self.http_debug: print >> sys.stderr, '\nURLs to try for package \'%s\':' % pac print >> sys.stderr, '\n'.join(pac.urllist) print >> sys.stderr (fd, tmpfile) = tempfile.mkstemp(prefix='osc_build') try: try: mg.urlgrab(pac.filename, filename=tmpfile, text='%s(%s) %s' % (prefix, pac.project, pac.filename)) self.move_package(tmpfile, pac.localdir, pac) except URLGrabError, e: if self.enable_cpio and e.errno == 256: self.__add_cpio(pac) return print print >> sys.stderr, 'Error:', e.strerror print >> sys.stderr, 'Failed to retrieve %s from the following locations (in order):' % pac.filename print >> sys.stderr, '\n'.join(pac.urllist) sys.exit(1) finally: os.close(fd) if os.path.exists(tmpfile): os.unlink(tmpfile)
class HttpReplyCode(TestCase): def setUp(self): def server(): import socket s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(('localhost', 2000)); s.listen(1) while 1: c, a = s.accept() while not c.recv(4096).endswith('\r\n\r\n'): pass c.sendall('HTTP/1.1 %d %s\r\n' % self.reply) c.close() import thread self.reply = 503, "Busy" thread.start_new_thread(server, ()) def failure(obj): self.code = getattr(obj.exception, 'code', None) return {} self.g = URLGrabber() self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback = failure) def test_grab(self): self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo') self.assertEquals(self.code, 503); del self.code err = [] self.mg.urlgrab('foo', async = True, failfunc = err.append) urlgrabber.grabber.parallel_wait() self.assertEquals([e.exception.errno for e in err], [256]) self.assertEquals(self.code, 503); del self.code
class HttpReplyCode(TestCase): def setUp(self): # start the server self.exit = False self.process = lambda data: None def server(): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(LOCALPORT); s.listen(1) while 1: c, a = s.accept() if self.exit: c.close(); break data = '' while not data.endswith('\r\n\r\n'): data = c.recv(4096) self.process(data) c.sendall('HTTP/1.1 %d %s\r\n' % self.reply) if self.content is not None: c.sendall('Content-Length: %d\r\n\r\n' % len(self.content)) c.sendall(self.content) c.close() s.close() self.exit = False self.thread = threading.Thread(target=server) self.thread.start() # create grabber and mirror group objects def failure(obj): self.code = getattr(obj.exception, 'code', None) return {} self.g = URLGrabber() self.mg = MirrorGroup(self.g, ['http://%s:%d' % LOCALPORT], failure_callback = failure) def tearDown(self): # shut down the server self.exit = True s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect(LOCALPORT); s.close() # wake it up self.thread.join() def test_grab(self): 'tests the propagation of HTTP reply code' self.reply = 503, "Busy" self.content = None # single self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo') self.assertEquals(self.code, 503); del self.code # multi err = [] self.mg.urlgrab('foo', async = True, failfunc = err.append) urlgrabber.grabber.parallel_wait() self.assertEquals([e.exception.errno for e in err], [256]) self.assertEquals(self.code, 503); del self.code def test_range(self): 'test client-side processing of HTTP ranges' # server does not process ranges self.reply = 200, "OK" self.content = 'ABCDEF' # no range specified data = self.mg.urlread('foo') self.assertEquals(data, 'ABCDEF') data = self.mg.urlread('foo', range = (3, 5)) self.assertEquals(data, 'DE') def test_retry_no_cache(self): 'test bypassing proxy cache on failure' def process(data): if 'Pragma:no-cache' in data: self.content = 'version2' else: self.content = 'version1' def checkfunc_read(obj): if obj.data == 'version1': raise URLGrabError(-1, 'Outdated version of foo') def checkfunc_grab(obj): with open('foo') as f: if f.read() == 'version1': raise URLGrabError(-1, 'Outdated version of foo') self.process = process self.reply = 200, "OK" opts = self.g.opts opts.retry = 3 opts.retry_no_cache = True # single opts.checkfunc = checkfunc_read try: self.mg.urlread('foo') except URLGrabError as e: self.fail(str(e)) # multi opts.checkfunc = checkfunc_grab self.mg.urlgrab('foo', async=True) try: urlgrabber.grabber.parallel_wait() except URLGrabError as e: self.fail(str(e))
class ActionTests(TestCase): def setUp(self): self.snarfed_logs = [] self.db = urlgrabber.mirror.DEBUG urlgrabber.mirror.DEBUG = FakeLogger() self.mirrors = ['a', 'b', 'c', 'd', 'e', 'f'] self.g = FakeGrabber([URLGrabError(3), URLGrabError(3), 'filename']) self.mg = MirrorGroup(self.g, self.mirrors) def tearDown(self): urlgrabber.mirror.DEBUG = self.db def test_defaults(self): 'test default action policy' self.mg.urlgrab('somefile') expected_calls = [ (m + '/' + 'somefile', None) \ for m in self.mirrors[:3] ] expected_logs = \ ['MIRROR: trying somefile -> a/somefile', 'MIRROR: failed', 'GR mirrors: [b c d e f] 0', 'MAIN mirrors: [a b c d e f] 1', 'MIRROR: trying somefile -> b/somefile', 'MIRROR: failed', 'GR mirrors: [c d e f] 0', 'MAIN mirrors: [a b c d e f] 2', 'MIRROR: trying somefile -> c/somefile'] self.assertEquals(self.g.calls, expected_calls) self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) def test_instance_action(self): 'test the effects of passed-in default_action' self.mg.default_action = {'remove_master': 1} self.mg.urlgrab('somefile') expected_calls = [ (m + '/' + 'somefile', None) \ for m in self.mirrors[:3] ] expected_logs = \ ['MIRROR: trying somefile -> a/somefile', 'MIRROR: failed', 'GR mirrors: [b c d e f] 0', 'MAIN mirrors: [b c d e f] 0', 'MIRROR: trying somefile -> b/somefile', 'MIRROR: failed', 'GR mirrors: [c d e f] 0', 'MAIN mirrors: [c d e f] 0', 'MIRROR: trying somefile -> c/somefile'] self.assertEquals(self.g.calls, expected_calls) self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) def test_method_action(self): 'test the effects of method-level default_action' self.mg.urlgrab('somefile', default_action={'remove_master': 1}) expected_calls = [ (m + '/' + 'somefile', None) \ for m in self.mirrors[:3] ] expected_logs = \ ['MIRROR: trying somefile -> a/somefile', 'MIRROR: failed', 'GR mirrors: [b c d e f] 0', 'MAIN mirrors: [b c d e f] 0', 'MIRROR: trying somefile -> b/somefile', 'MIRROR: failed', 'GR mirrors: [c d e f] 0', 'MAIN mirrors: [c d e f] 0', 'MIRROR: trying somefile -> c/somefile'] self.assertEquals(self.g.calls, expected_calls) self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) def callback(self, e): return {'fail': 1} def test_callback_action(self): 'test the effects of a callback-returned action' self.assertRaises(URLGrabError, self.mg.urlgrab, 'somefile', failure_callback=self.callback) expected_calls = [ (m + '/' + 'somefile', None) \ for m in self.mirrors[:1] ] expected_logs = \ ['MIRROR: trying somefile -> a/somefile', 'MIRROR: failed', 'GR mirrors: [b c d e f] 0', 'MAIN mirrors: [a b c d e f] 1'] self.assertEquals(self.g.calls, expected_calls) self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
class HttpReplyCode(TestCase): def setUp(self): # start the server self.exit = False self.process = lambda data: None s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(('localhost', 0)) s.listen(1) self.port = s.getsockname()[1] def server(): while True: c, a = s.accept() if self.exit: c.close() break data = b'' while not data.endswith(b'\r\n\r\n'): data = c.recv(4096) self.process(data) c.sendall(b'HTTP/1.1 %d %s\r\n' % self.reply) if self.content is not None: c.sendall(b'Content-Length: %d\r\n\r\n' % len(self.content)) c.sendall(self.content) c.close() s.close() self.exit = False self.thread = threading.Thread(target=server) self.thread.start() # create grabber and mirror group objects def failure(obj): self.code = getattr(obj.exception, 'code', None) return {} self.g = URLGrabber() self.mg = MirrorGroup(self.g, ['http://localhost:%d' % self.port], failure_callback=failure) def tearDown(self): # shut down the server self.exit = True s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect(('localhost', self.port)) # wake it up except ConnectionRefusedError: # already gone? pass s.close() self.thread.join() def test_grab(self): 'tests the propagation of HTTP reply code' self.reply = 503, b'Busy' self.content = None # single self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo') self.assertEqual(self.code, 503) del self.code # multi err = [] self.mg.urlgrab('foo', async_=True, failfunc=err.append) urlgrabber.grabber.parallel_wait() self.assertEqual([e.exception.errno for e in err], [256]) self.assertEqual(self.code, 503) del self.code def test_range(self): 'test client-side processing of HTTP ranges' # server does not process ranges self.reply = 200, b'OK' self.content = b'ABCDEF' # no range specified data = self.mg.urlread('foo') self.assertEqual(data, b'ABCDEF') data = self.mg.urlread('foo', range=(3, 5)) self.assertEqual(data, b'DE') def test_retry_no_cache(self): 'test bypassing proxy cache on failure' def process(data): if b'Pragma:no-cache' in data: self.content = b'version2' else: self.content = b'version1' def checkfunc_read(obj): if obj.data == b'version1': raise URLGrabError(-1, 'Outdated version of foo') elif obj.data != b'version2': self.fail('Unexpected file content') def checkfunc_grab(obj): with open('foo') as f: data = f.read() if data == 'version1': raise URLGrabError(-1, 'Outdated version of foo') elif data != 'version2': self.fail('Unexpected file content') self.process = process self.reply = 200, b'OK' opts = self.g.opts opts.retry = 3 opts.retry_no_cache = True # single opts.checkfunc = checkfunc_read try: self.mg.urlread('foo') except URLGrabError as e: self.fail(str(e)) # multi opts.checkfunc = checkfunc_grab self.mg.urlgrab('foo', async_=True) try: urlgrabber.grabber.parallel_wait() except URLGrabError as e: self.fail(str(e))
class SourceDownloader(object): def __init__(self, pakfire, mirrors=None): self.pakfire = pakfire self.grabber = PakfireGrabber( self.pakfire, progress_obj = TextMeter(), ) if mirrors: self.grabber = MirrorGroup(self.grabber, [{ "mirror" : m.encode("utf-8") } for m in mirrors]) def download(self, files): existant_files = [] download_files = [] for file in files: filename = os.path.join(SOURCE_CACHE_DIR, file) log.debug("Checking existance of %s..." % filename) if os.path.exists(filename) and os.path.getsize(filename): log.debug("...exists!") existant_files.append(filename) else: log.debug("...does not exist!") download_files.append(filename) if download_files: log.info(_("Downloading source files:")) if self.pakfire.offline: raise OfflineModeError, _("Cannot download source code in offline mode.") # Create source download directory. if not os.path.exists(SOURCE_CACHE_DIR): os.makedirs(SOURCE_CACHE_DIR) for filename in download_files: try: self.grabber.urlgrab(os.path.basename(filename), filename=filename) except URLGrabError, e: # Remove partly downloaded file. try: os.unlink(filename) except OSError: pass raise DownloadError, "%s %s" % (os.path.basename(filename), e) # Check if the downloaded file was empty. if os.path.getsize(filename) == 0: # Remove the file and raise an error. os.unlink(filename) raise DownloadError, _("Downloaded empty file: %s") \ % os.path.basename(filename) log.info("") return existant_files + download_files
#!/usr/bin/python3 from urlgrabber import urlopen from urlgrabber.grabber import URLGrabber from urlgrabber.mirror import MirrorGroup fo = urlopen('http://localhost') data = fo.read() print(data) gr = URLGrabber() mg = MirrorGroup(gr, ['http://localhost2/', 'http://me.myself/']) mg.urlgrab('test.txt')
class HttpReplyCode(TestCase): def setUp(self): # start the server self.exit = False def server(): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(LOCALPORT); s.listen(1) while 1: c, a = s.accept() if self.exit: c.close(); break while not c.recv(4096).endswith('\r\n\r\n'): pass c.sendall('HTTP/1.1 %d %s\r\n' % self.reply) if self.content is not None: c.sendall('Content-Length: %d\r\n\r\n' % len(self.content)) c.sendall(self.content) c.close() s.close() self.exit = False thread.start_new_thread(server, ()) # create grabber and mirror group objects def failure(obj): self.code = getattr(obj.exception, 'code', None) return {} self.g = URLGrabber() self.mg = MirrorGroup(self.g, ['http://%s:%d' % LOCALPORT], failure_callback = failure) def tearDown(self): # shut down the server self.exit = True s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect(LOCALPORT); s.close() # wake it up while self.exit: pass # poor man's join def test_grab(self): 'tests the propagation of HTTP reply code' self.reply = 503, "Busy" self.content = None # single self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo') self.assertEquals(self.code, 503); del self.code # multi err = [] self.mg.urlgrab('foo', async = True, failfunc = err.append) urlgrabber.grabber.parallel_wait() self.assertEquals([e.exception.errno for e in err], [256]) self.assertEquals(self.code, 503); del self.code def test_range(self): 'test client-side processing of HTTP ranges' # server does not process ranges self.reply = 200, "OK" self.content = 'ABCDEF' # no range specified data = self.mg.urlread('foo') self.assertEquals(data, 'ABCDEF') data = self.mg.urlread('foo', range = (3, 5)) self.assertEquals(data, 'DE')
class ActionTests(TestCase): def setUp(self): self.snarfed_logs = [] self.db = urlgrabber.mirror.DEBUG urlgrabber.mirror.DEBUG = FakeLogger() self.mirrors = ["a", "b", "c", "d", "e", "f"] self.g = FakeGrabber([URLGrabError(3), URLGrabError(3), "filename"]) self.mg = MirrorGroup(self.g, self.mirrors) def tearDown(self): urlgrabber.mirror.DEBUG = self.db def test_defaults(self): "test default action policy" self.mg.urlgrab("somefile") expected_calls = [(m + "/" + "somefile", None) for m in self.mirrors[:3]] expected_logs = [ "MIRROR: trying somefile -> a/somefile", "MIRROR: failed", "GR mirrors: [b c d e f] 0", "MAIN mirrors: [a b c d e f] 1", "MIRROR: trying somefile -> b/somefile", "MIRROR: failed", "GR mirrors: [c d e f] 0", "MAIN mirrors: [a b c d e f] 2", "MIRROR: trying somefile -> c/somefile", ] self.assertEquals(self.g.calls, expected_calls) self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) def test_instance_action(self): "test the effects of passed-in default_action" self.mg.default_action = {"remove_master": 1} self.mg.urlgrab("somefile") expected_calls = [(m + "/" + "somefile", None) for m in self.mirrors[:3]] expected_logs = [ "MIRROR: trying somefile -> a/somefile", "MIRROR: failed", "GR mirrors: [b c d e f] 0", "MAIN mirrors: [b c d e f] 0", "MIRROR: trying somefile -> b/somefile", "MIRROR: failed", "GR mirrors: [c d e f] 0", "MAIN mirrors: [c d e f] 0", "MIRROR: trying somefile -> c/somefile", ] self.assertEquals(self.g.calls, expected_calls) self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) def test_method_action(self): "test the effects of method-level default_action" self.mg.urlgrab("somefile", default_action={"remove_master": 1}) expected_calls = [(m + "/" + "somefile", None) for m in self.mirrors[:3]] expected_logs = [ "MIRROR: trying somefile -> a/somefile", "MIRROR: failed", "GR mirrors: [b c d e f] 0", "MAIN mirrors: [b c d e f] 0", "MIRROR: trying somefile -> b/somefile", "MIRROR: failed", "GR mirrors: [c d e f] 0", "MAIN mirrors: [c d e f] 0", "MIRROR: trying somefile -> c/somefile", ] self.assertEquals(self.g.calls, expected_calls) self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) def callback(self, e): return {"fail": 1} def test_callback_action(self): "test the effects of a callback-returned action" self.assertRaises(URLGrabError, self.mg.urlgrab, "somefile", failure_callback=self.callback) expected_calls = [(m + "/" + "somefile", None) for m in self.mirrors[:1]] expected_logs = [ "MIRROR: trying somefile -> a/somefile", "MIRROR: failed", "GR mirrors: [b c d e f] 0", "MAIN mirrors: [a b c d e f] 1", ] self.assertEquals(self.g.calls, expected_calls) self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)