Beispiel #1
0
class BasicTests(TestCase):
    def setUp(self):
        self.g  = URLGrabber()
        fullmirrors = [base_mirror_url + m + '/' for m in good_mirrors]
        self.mg = MirrorGroup(self.g, fullmirrors)

    def test_urlgrab(self):
        """MirrorGroup.urlgrab"""
        filename = tempfile.mktemp()
        url = 'short_reference'
        self.mg.urlgrab(url, filename)

        fo = open(filename)
        data = fo.read()
        fo.close()

        self.assertEqual(data, short_reference_data)

    def test_urlread(self):
        """MirrorGroup.urlread"""
        url = 'short_reference'
        data = self.mg.urlread(url)

        self.assertEqual(data, short_reference_data)

    def test_urlopen(self):
        """MirrorGroup.urlopen"""
        url = 'short_reference'
        fo = self.mg.urlopen(url)
        data = fo.read()
        fo.close()

        self.assertEqual(data, short_reference_data)
Beispiel #2
0
class CallbackTests(TestCase):
    def setUp(self):
        self.g  = URLGrabber()
        fullmirrors = [base_mirror_url + m + '/' for m in \
                       (bad_mirrors + good_mirrors)]
        if hasattr(urlgrabber.grabber, '_TH'):
            # test assumes mirrors are not re-ordered
            urlgrabber.grabber._TH.hosts.clear()
        self.mg = MirrorGroup(self.g, fullmirrors)
    
    def test_failure_callback(self):
        "test that MG executes the failure callback correctly"
        tricky_list = []
        def failure_callback(cb_obj, tl):
            tl.append(str(cb_obj.exception))
        self.mg.failure_callback = failure_callback, (tricky_list, ), {}
        data = self.mg.urlread('reference')
        self.assert_(data == reference_data)
        self.assertEquals(tricky_list[0][:25],
                          '[Errno 14] HTTP Error 403')

    def test_callback_reraise(self):
        "test that the callback can correctly re-raise the exception"
        def failure_callback(cb_obj): raise cb_obj.exception
        self.mg.failure_callback = failure_callback
        self.assertRaises(URLGrabError, self.mg.urlread, 'reference')
Beispiel #3
0
class BasicTests(TestCase):
    def setUp(self):
        self.g  = URLGrabber()
        fullmirrors = [base_mirror_url + m + '/' for m in good_mirrors]
        self.mg = MirrorGroup(self.g, fullmirrors)

    def test_urlgrab(self):
        """MirrorGroup.urlgrab"""
        filename = tempfile.mktemp()
        url = 'short_reference'
        self.mg.urlgrab(url, filename)

        fo = open(filename)
        data = fo.read()
        fo.close()

        self.assertEqual(data, short_reference_data)

    def test_urlread(self):
        """MirrorGroup.urlread"""
        url = 'short_reference'
        data = self.mg.urlread(url)

        self.assertEqual(data, short_reference_data)

    def test_urlopen(self):
        """MirrorGroup.urlopen"""
        url = 'short_reference'
        fo = self.mg.urlopen(url)
        data = fo.read()
        fo.close()

        self.assertEqual(data, short_reference_data)
Beispiel #4
0
class CallbackTests(TestCase):
    def setUp(self):
        self.g = URLGrabber()
        fullmirrors = [base_mirror_url + m + '/' for m in \
                       (bad_mirrors + good_mirrors)]
        if hasattr(urlgrabber.grabber, '_TH'):
            # test assumes mirrors are not re-ordered
            urlgrabber.grabber._TH.hosts.clear()
        self.mg = MirrorGroup(self.g, fullmirrors)

    def test_failure_callback(self):
        "test that MG executes the failure callback correctly"
        tricky_list = []

        def failure_callback(cb_obj, tl):
            tl.append(str(cb_obj.exception))

        self.mg.failure_callback = failure_callback, (tricky_list, ), {}
        data = self.mg.urlread('reference')
        self.assertTrue(data == reference_data)
        self.assertEqual(tricky_list[0][:25], '[Errno 14] HTTP Error 404')

    def test_callback_reraise(self):
        "test that the callback can correctly re-raise the exception"

        def failure_callback(cb_obj):
            raise cb_obj.exception

        self.mg.failure_callback = failure_callback
        self.assertRaises(URLGrabError, self.mg.urlread, 'reference')
Beispiel #5
0
class HttpReplyCode(TestCase):
    def setUp(self):
        # start the server
        self.exit = False
        self.process = lambda data: None
        def server():
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            s.bind(LOCALPORT); s.listen(1)
            while 1:
                c, a = s.accept()
                if self.exit: c.close(); break
                data = ''
                while not data.endswith('\r\n\r\n'):
                    data = c.recv(4096)
                self.process(data)
                c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)
                if self.content is not None:
                    c.sendall('Content-Length: %d\r\n\r\n' % len(self.content))
                    c.sendall(self.content)
                c.close()
            s.close()
            self.exit = False
        self.thread = threading.Thread(target=server)
        self.thread.start()

        # create grabber and mirror group objects
        def failure(obj):
            self.code = getattr(obj.exception, 'code', None)
            return {}
        self.g  = URLGrabber()
        self.mg = MirrorGroup(self.g, ['http://%s:%d' % LOCALPORT],
                              failure_callback = failure)

    def tearDown(self):
        # shut down the server
        self.exit = True
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.connect(LOCALPORT); s.close() # wake it up
        self.thread.join()

    def test_grab(self):
        'tests the propagation of HTTP reply code'
        self.reply = 503, "Busy"
        self.content = None

        # single
        self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')
        self.assertEquals(self.code, 503); del self.code

        # multi
        err = []
        self.mg.urlgrab('foo', async = True, failfunc = err.append)
        urlgrabber.grabber.parallel_wait()
        self.assertEquals([e.exception.errno for e in err], [256])
        self.assertEquals(self.code, 503); del self.code

    def test_range(self):
        'test client-side processing of HTTP ranges'
        # server does not process ranges
        self.reply = 200, "OK"
        self.content = 'ABCDEF'

        # no range specified
        data = self.mg.urlread('foo')
        self.assertEquals(data, 'ABCDEF')

        data = self.mg.urlread('foo', range = (3, 5))
        self.assertEquals(data, 'DE')

    def test_retry_no_cache(self):
        'test bypassing proxy cache on failure'
        def process(data):
            if 'Pragma:no-cache' in data:
                self.content = 'version2'
            else:
                self.content = 'version1'

        def checkfunc_read(obj):
            if obj.data == 'version1':
                raise URLGrabError(-1, 'Outdated version of foo')

        def checkfunc_grab(obj):
            with open('foo') as f:
                if f.read() == 'version1':
                    raise URLGrabError(-1, 'Outdated version of foo')

        self.process = process
        self.reply = 200, "OK"

        opts = self.g.opts
        opts.retry = 3
        opts.retry_no_cache = True

        # single
        opts.checkfunc = checkfunc_read
        try:
            self.mg.urlread('foo')
        except URLGrabError as e:
            self.fail(str(e))

        # multi
        opts.checkfunc = checkfunc_grab
        self.mg.urlgrab('foo', async=True)
        try:
            urlgrabber.grabber.parallel_wait()
        except URLGrabError as e:
            self.fail(str(e))
Beispiel #6
0
class HttpReplyCode(TestCase):
    def setUp(self):
        # start the server
        self.exit = False
        self.process = lambda data: None

        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        s.bind(('localhost', 0))
        s.listen(1)
        self.port = s.getsockname()[1]

        def server():
            while True:
                c, a = s.accept()
                if self.exit:
                    c.close()
                    break
                data = b''
                while not data.endswith(b'\r\n\r\n'):
                    data = c.recv(4096)
                self.process(data)
                c.sendall(b'HTTP/1.1 %d %s\r\n' % self.reply)
                if self.content is not None:
                    c.sendall(b'Content-Length: %d\r\n\r\n' %
                              len(self.content))
                    c.sendall(self.content)
                c.close()
            s.close()
            self.exit = False

        self.thread = threading.Thread(target=server)
        self.thread.start()

        # create grabber and mirror group objects
        def failure(obj):
            self.code = getattr(obj.exception, 'code', None)
            return {}

        self.g = URLGrabber()
        self.mg = MirrorGroup(self.g, ['http://localhost:%d' % self.port],
                              failure_callback=failure)

    def tearDown(self):
        # shut down the server
        self.exit = True
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            s.connect(('localhost', self.port))  # wake it up
        except ConnectionRefusedError:
            # already gone?
            pass
        s.close()
        self.thread.join()

    def test_grab(self):
        'tests the propagation of HTTP reply code'
        self.reply = 503, b'Busy'
        self.content = None

        # single
        self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')
        self.assertEqual(self.code, 503)
        del self.code

        # multi
        err = []
        self.mg.urlgrab('foo', async_=True, failfunc=err.append)
        urlgrabber.grabber.parallel_wait()
        self.assertEqual([e.exception.errno for e in err], [256])
        self.assertEqual(self.code, 503)
        del self.code

    def test_range(self):
        'test client-side processing of HTTP ranges'
        # server does not process ranges
        self.reply = 200, b'OK'
        self.content = b'ABCDEF'

        # no range specified
        data = self.mg.urlread('foo')
        self.assertEqual(data, b'ABCDEF')

        data = self.mg.urlread('foo', range=(3, 5))
        self.assertEqual(data, b'DE')

    def test_retry_no_cache(self):
        'test bypassing proxy cache on failure'

        def process(data):
            if b'Pragma:no-cache' in data:
                self.content = b'version2'
            else:
                self.content = b'version1'

        def checkfunc_read(obj):
            if obj.data == b'version1':
                raise URLGrabError(-1, 'Outdated version of foo')
            elif obj.data != b'version2':
                self.fail('Unexpected file content')

        def checkfunc_grab(obj):
            with open('foo') as f:
                data = f.read()
                if data == 'version1':
                    raise URLGrabError(-1, 'Outdated version of foo')
                elif data != 'version2':
                    self.fail('Unexpected file content')

        self.process = process
        self.reply = 200, b'OK'

        opts = self.g.opts
        opts.retry = 3
        opts.retry_no_cache = True

        # single
        opts.checkfunc = checkfunc_read
        try:
            self.mg.urlread('foo')
        except URLGrabError as e:
            self.fail(str(e))

        # multi
        opts.checkfunc = checkfunc_grab
        self.mg.urlgrab('foo', async_=True)
        try:
            urlgrabber.grabber.parallel_wait()
        except URLGrabError as e:
            self.fail(str(e))
Beispiel #7
0
class HttpReplyCode(TestCase):
    def setUp(self):
        # start the server
        self.exit = False
        def server():
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            s.bind(LOCALPORT); s.listen(1)
            while 1:
                c, a = s.accept()
                if self.exit: c.close(); break
                while not c.recv(4096).endswith('\r\n\r\n'): pass
                c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)
                if self.content is not None:
                    c.sendall('Content-Length: %d\r\n\r\n' % len(self.content))
                    c.sendall(self.content)
                c.close()
            s.close()
            self.exit = False
        thread.start_new_thread(server, ())

        # create grabber and mirror group objects
        def failure(obj):
            self.code = getattr(obj.exception, 'code', None)
            return {}
        self.g  = URLGrabber()
        self.mg = MirrorGroup(self.g, ['http://%s:%d' % LOCALPORT],
                              failure_callback = failure)

    def tearDown(self):
        # shut down the server
        self.exit = True
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.connect(LOCALPORT); s.close() # wake it up
        while self.exit: pass # poor man's join

    def test_grab(self):
        'tests the propagation of HTTP reply code'
        self.reply = 503, "Busy"
        self.content = None

        # single
        self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')
        self.assertEquals(self.code, 503); del self.code

        # multi
        err = []
        self.mg.urlgrab('foo', async = True, failfunc = err.append)
        urlgrabber.grabber.parallel_wait()
        self.assertEquals([e.exception.errno for e in err], [256])
        self.assertEquals(self.code, 503); del self.code

    def test_range(self):
        'test client-side processing of HTTP ranges'
        # server does not process ranges
        self.reply = 200, "OK"
        self.content = 'ABCDEF'

        # no range specified
        data = self.mg.urlread('foo')
        self.assertEquals(data, 'ABCDEF')

        data = self.mg.urlread('foo', range = (3, 5))
        self.assertEquals(data, 'DE')
class HttpReplyCode(TestCase):
    def setUp(self):
        # start the server
        self.exit = False

        def server():
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            s.bind(LOCALPORT)
            s.listen(1)
            while 1:
                c, a = s.accept()
                if self.exit:
                    c.close()
                    break
                ending_compat = '\r\n\r\n' if not six.PY3 else b'\r\n\r\n'
                while not c.recv(4096).endswith(ending_compat):
                    pass
                http_compat = 'HTTP/1.1 %d %s\r\n' % self.reply
                c.sendall(http_compat if not six.PY3 else http_compat.
                          encode('utf-8'))
                if self.content is not None:
                    cont_length_compat = 'Content-Length: %d\r\n\r\n' % len(
                        self.content)
                    c.sendall(cont_length_compat if not six.PY3 else
                              cont_length_compat.encode('utf-8'))
                    c.sendall(self.content if not six.PY3 else self.content.
                              encode('utf-8'))
                c.close()
            s.close()
            self.exit = False

        thread.start_new_thread(server, ())

        # create grabber and mirror group objects
        def failure(obj):
            self.code = getattr(obj.exception, 'code', None)
            return {}

        self.g = URLGrabber()
        self.mg = MirrorGroup(self.g, ['http://%s:%d' % LOCALPORT],
                              failure_callback=failure)

    def tearDown(self):
        # shut down the server
        self.exit = True
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.connect(LOCALPORT)
        s.close()  # wake it up
        while self.exit:
            pass  # poor man's join

    def test_grab(self):
        'tests the propagation of HTTP reply code'
        self.reply = 503, "Busy"
        self.content = None

        # single
        self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')
        self.assertEquals(self.code, 503)
        del self.code

        # multi
        err = []
        self.mg.urlgrab('foo', async=True, failfunc=err.append)
        urlgrabber.grabber.parallel_wait()
        self.assertEquals([e.exception.errno for e in err], [256])
        self.assertEquals(self.code, 503)
        del self.code

    def test_range(self):
        'test client-side processing of HTTP ranges'
        # server does not process ranges
        self.reply = 200, "OK"
        self.content = 'ABCDEF'

        # no range specified
        data = self.mg.urlread('foo')
        self.assertEquals(data, 'ABCDEF')

        data = self.mg.urlread('foo', range=(3, 5))
        self.assertEquals(data, 'DE')