def test_fetch(): '''test URL fetching''' assert utils.fetch_url('1234') == '' assert utils.fetch_url('https://httpbin.org/status/404') is None assert utils.decode_response(b'\x1f\x8babcdef') is not None assert utils.fetch_url('https://expired.badssl.com/', no_ssl=True) is not None
def test_download(): '''test page download and command-line interface''' testargs = ['', '-v'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert cli.examine(' ', args) is None assert cli.examine('0' * int(10e7), args) is None #assert utils.fetch_url('https://httpbin.org/status/404') is None #url = 'https://httpbin.org/status/200' #teststring = utils.fetch_url(url) #assert teststring is None # too small #assert cli.examine(teststring, args, url) is None url = 'https://httpbin.org/links/2/2' teststring = utils.fetch_url(url) assert teststring is not None assert cli.examine(teststring, args, url) is None url = 'https://httpbin.org/html' teststring = utils.fetch_url(url) assert teststring is not None assert cli.examine(teststring, args, url) is not None # multiprocessing domain_dict = dict() domain_dict['httpbin.org'] = [ 'https://httpbin.org/status/301', 'https://httpbin.org/status/304', 'https://httpbin.org/status/200', 'https://httpbin.org/status/300', 'https://httpbin.org/status/400', 'https://httpbin.org/status/500' ] assert cli_utils.multi_threaded_processing(domain_dict, args, 0.25, None) is None
def test_download(): '''test page download and command-line interface''' testargs = ['', '-v'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert cli.examine(None, args) is None assert cli.examine(' ', args) is None assert cli.examine('0' * int(10e7), args) is None #url = 'https://httpbin.org/status/200' #teststring = utils.fetch_url(url) #assert teststring is None # too small #assert cli.examine(teststring, args, url) is None url = 'https://httpbin.org/links/2/2' teststring = utils.fetch_url(url) assert teststring is not None assert cli.examine(teststring, args, url) is None url = 'https://httpbin.org/html' teststring = utils.fetch_url(url) assert teststring is not None assert cli.examine(teststring, args, url) is not None # multiprocessing domain_dict = dict() domain_dict['httpbin.org'] = [ 'https://httpbin.org/status/301', 'https://httpbin.org/status/304', 'https://httpbin.org/status/200', 'https://httpbin.org/status/300', 'https://httpbin.org/status/400', 'https://httpbin.org/status/505' ] assert cli_utils.multi_threaded_processing( domain_dict, args, 0.25, None) == (['https://httpbin.org/status/301'], None) # test backoff algorithm testdict = dict() backoffdict = dict() testdict['test.org'] = ['http://test.org/1'] assert cli_utils.draw_backoff_url(testdict, backoffdict, 0, 0) == ('http://test.org/1', dict(), dict(), 0) testdict['test.org'] = ['http://test.org/1'] backoffdict['test.org'] = datetime(2019, 5, 18, 15, 17, 8, 132263) assert cli_utils.draw_backoff_url(testdict, backoffdict, 0, 0) == ('http://test.org/1', dict(), dict(), 0) testdict['test.org'] = ['http://test.org/1'] backoffdict['test.org'] = datetime(2019, 5, 18, 15, 17, 8, 132263) assert cli_utils.draw_backoff_url(testdict, backoffdict, 0, 3) == ('http://test.org/1', dict(), dict(), 3) testdict['test.org'] = ['http://test.org/1'] backoffdict['test.org'] = datetime(2030, 5, 18, 15, 17, 8, 132263) assert cli_utils.draw_backoff_url(testdict, backoffdict, 0, 3) == ('http://test.org/1', dict(), dict(), 0)
def test_download(): '''test page download''' assert utils.fetch_url('https://httpbin.org/status/404') is None url = 'https://httpbin.org/status/200' teststring = utils.fetch_url(url) assert teststring is not None assert cli.examine(teststring, url, False, True) is None url = 'https://httpbin.org/links/2/2' teststring = utils.fetch_url(url) assert teststring is not None assert cli.examine(teststring, url, False, True) is None url = 'https://httpbin.org/html' teststring = utils.fetch_url(url) assert teststring is not None assert cli.examine(teststring, url, False, True) is not None
def test_download(): '''test page download and command-line interface''' testargs = ['', '-v'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert cli.examine(None, args) is None assert cli.examine(' ', args) is None assert cli.examine('0' * int(10e7), args) is None #url = 'https://httpbin.org/status/200' #teststring = utils.fetch_url(url) #assert teststring is None # too small #assert cli.examine(teststring, args, url) is None #url = 'https://httpbin.org/links/2/2' #teststring = utils.fetch_url(url) #assert teststring is not None #assert cli.examine(teststring, args, url) is None url = 'https://httpbin.org/html' teststring = utils.fetch_url(url) assert teststring is not None assert cli.examine(teststring, args, url) is not None # single/multiprocessing domain_dict = dict() domain_dict['https://httpbin.org'] = [ '/status/301', '/status/304', '/status/200', '/status/300', '/status/400', '/status/505' ] args.archived = True args.config_file = os.path.join(RESOURCES_DIR, 'newsettings.cfg') config = use_config(filename=args.config_file) results = cli_utils.download_queue_processing(domain_dict, args, None, config) assert len(results[0]) == 5 and results[1] is None # test backoff algorithm testdict = dict() backoffdict = dict() testdict['http://test.org'] = ['/1'] assert cli_utils.draw_backoff_url(testdict, backoffdict, 0, 0) == ('http://test.org/1', dict(), dict(), 0) testdict['http://test.org'] = ['/1'] backoffdict['test.org'] = datetime(2019, 5, 18, 15, 17, 8, 132263) assert cli_utils.draw_backoff_url(testdict, backoffdict, 0, 0) == ('http://test.org/1', dict(), dict(), 0) testdict['http://test.org'] = ['/1'] backoffdict['test.org'] = datetime(2019, 5, 18, 15, 17, 8, 132263) assert cli_utils.draw_backoff_url(testdict, backoffdict, 0, 3) == ('http://test.org/1', dict(), dict(), 3) testdict['http://test.org'] = ['/1'] backoffdict['test.org'] = datetime(2030, 5, 18, 15, 17, 8, 132263) assert cli_utils.draw_backoff_url(testdict, backoffdict, 0, 3) == ('http://test.org/1', dict(), dict(), 0)
def test_download(): '''test page download and command-line interface''' testargs = ['', '-v'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert cli.examine(' ', args) is None assert cli.examine('0' * int(10e7), args) is None assert utils.fetch_url('https://httpbin.org/status/404') is None url = 'https://httpbin.org/status/200' teststring = utils.fetch_url(url) assert teststring is None # too small assert cli.examine(teststring, args, url) is None url = 'https://httpbin.org/links/2/2' teststring = utils.fetch_url(url) assert teststring is not None assert cli.examine(teststring, args, url) is None url = 'https://httpbin.org/html' teststring = utils.fetch_url(url) assert teststring is not None assert cli.examine(teststring, args, url) is not None
def test_fetch(): '''test URL fetching''' assert utils.fetch_url('1234') == '' assert utils.fetch_url('https://httpbin.org/status/404') is None assert utils.decode_response(b'\x1f\x8babcdef') is not None assert utils.fetch_url('https://expired.badssl.com/', no_ssl=True) is not None # no decoding response = utils.fetch_url('https://httpbin.org/status/200', decode=False) assert response == '' # response object url = 'https://httpbin.org/encoding/utf8' response = utils._send_request(url, False, DEFAULT_CONFIG) myobject = utils._handle_response(url, response, False, DEFAULT_CONFIG) assert myobject.data.startswith(b'<h1>Unicode Demo</h1>') # straight handling of response object assert utils.load_html(response) is not None # nothing to see here assert extract(response, url=response.geturl(), config=ZERO_CONFIG) is None # user-agents rotation assert utils._parse_config(UA_CONFIG) == ['Firefox', 'Chrome'] custom = utils._determine_headers(UA_CONFIG) assert custom['User-Agent'] == 'Chrome' or custom['User-Agent'] == 'Firefox'
def test_fetch(): '''test URL fetching''' assert utils.fetch_url('1234') == '' assert utils.fetch_url('https://httpbin.org/status/404') is None