def test_http_error(status_code): with requests_mock.Mocker() as mock: mock.get(HOST, status_code=status_code) with tempfile.TemporaryDirectory() as tempdir: with pytest.raises(requests.exceptions.HTTPError): download(HOST, tempdir)
def test_connection_error(): with requests_mock.Mocker() as mock: mock.get(HOST, exc=requests.exceptions.ConnectionError) with tempfile.TemporaryDirectory() as tempdir: with pytest.raises(requests.exceptions.ConnectionError): download(HOST, tempdir)
def test_download(): url = 'https://help.hexlet.io/ru' with tempfile.TemporaryDirectory() as td: download(url, td) with open( os.path.join(os.path.dirname(__file__), 'fixtures/result.html')) as f: expected_result = f.read() with open(os.path.join(td, 'help-hexlet-io-ru.html')) as f: result = f.read() assert result == expected_result
def test_download_request_errors( url, mock_url, content_type, status_code, error_type ): with tempfile.TemporaryDirectory() as tmp_dir: with requests_mock.Mocker() as mocker: mocker.get(mock_url, headers={ 'Content-Type': content_type, }, status_code=status_code ) with pytest.raises(error_type): download(url, tmp_dir)
def test_loader(): url = f'{HOST}' img_url = f'{HOST}/assets/img.png' css_url = f'{HOST}/assets/application.css' js_url = f'{HOST}/assets/runtime.js' data = _read_file(FIXTURE_FOLDER, 'web_page.html') img_data = _read_file(FIXTURE_FOLDER, 'assets/img.png', mode='rb') css_data = _read_file(FIXTURE_FOLDER, 'assets/application.css', mode='r') js_data = _read_file(FIXTURE_FOLDER, 'assets/runtime.js', mode='r') with requests_mock.Mocker() as mock: mock.get(url, text=data) mock.get(img_url, content=img_data) mock.get(css_url, text=css_data) mock.get(js_url, text=js_data) with tempfile.TemporaryDirectory() as tempdir: file_path = download(url, tempdir) file_folder = os.path.join(tempdir, 'some-ru_files') assert file_path.split('/')[-1] == 'some-ru.html' assert _read_file(file_folder, 'some-ru-assets-img.png', mode='rb') == img_data assert _read_file(file_folder, 'some-ru-assets-application.css', mode='r') == css_data assert _read_file(file_folder, 'some-ru-assets-runtime.js', mode='r') == js_data
def test_loader( base_url, png_url, css_url, js_url, html, png, css, js, requests_mock, expected_html, expected_file_names, ): """Test download and save page.""" with TemporaryDirectory() as tmpdirname: path_to_save = tmpdirname base_name = url.to_name(base_url) base_directory = url.to_name(base_url, directory=True) requests_mock.get(base_url, text=html) requests_mock.get(png_url, content=png) requests_mock.get(css_url, content=css) requests_mock.get(js_url, content=js) ouput = loader.download(base_url, path_to_save) assert len(os.listdir(tmpdirname)) == 2 assert ouput == os.path.join(tmpdirname, base_name) local_resources = os.listdir(os.path.join(path_to_save, base_directory)) assert set(local_resources) == expected_file_names with open(os.path.join(path_to_save, base_name)) as f: # noqa: WPS111 actually_html = f.read() assert actually_html == expected_html
def test_download( url, mock_url, html_file_expect, content_type, data, expect_data, request ): data = request.getfixturevalue(data) expect_data = request.getfixturevalue(expect_data) with tempfile.TemporaryDirectory() as tmp_dir: with requests_mock.Mocker() as mocker: mocker.get(mock_url, text=data, headers={ 'Content-Type': content_type }) result_path = download(url, tmp_dir) expect_path = os.path.join( tmp_dir, html_file_expect, ) assert result_path == expect_path with open(result_path, 'r') as result_file: soup = BeautifulSoup(result_file.read(), 'lxml') expect_soup = BeautifulSoup(expect_data, 'lxml') assert soup.prettify(formatter='html5') == ( expect_soup.prettify(formatter='html5') )
def main() -> None: # noqa: WPS210, WPS213 """Run a code.""" args = cli.get_parser().parse_args() url = args.url output = args.output level = args.verbosity my_logging.setup(level) logging.debug( 'The following arguments were introduced: {0}'.format(args), ) exit_code = EXIT_FAILURE try: # noqa: WPS225, WPS229 path_to_page = loader.download(url, output) exit_code = EXIT_SUCCES except errors.DownloadDirectoryError as err1: logging.debug(str(err1.__cause__), exc_info=True) logging.error(err1.message) except errors.DownloadFileError as err2: logging.debug(str(err2.__cause__), exc_info=True) logging.error(err2.message) except errors.DownloadNetworkError as err3: logging.debug(str(err3.__cause__), exc_info=True) logging.error(err3.message) except errors.DownloadError as err4: logging.debug(str(err4.__cause__), exc_info=True) logging.error(err4.message) else: print( # noqa: WPS421 'Page loading completed successfully to {0}'.format(path_to_page), ) sys.exit(exit_code)
def test_simple_download(): with tempfile.TemporaryDirectory() as tmp_dir: with requests_mock.Mocker() as mocker: mocker.get('http://test.com', text='test_page_data') file_path = download('http://test.com', tmp_dir) with open(file_path, 'r') as file: # noqa: WPS110 page = file.read() assert page == 'test_page_data\n'
def test_response_with_error(base_url, code, requests_mock): """Test bad responses.""" url = base_url.format(code) # noqa:WPS442 requests_mock.get(url, status_code=code) with TemporaryDirectory() as tmpdirname: with pytest.raises(Exception): # noqa: PT011 assert loader.download(url, tmpdirname)
def test_download(): mock_url = 'https://test_page.com/first_page' with requests_mock.Mocker() as mock: mock.get(mock_url, text="test-page") with tempfile.TemporaryDirectory() as tempdir: file_path = download(mock_url, tempdir) assert 'test-page-com-first-page.html' == file_path.split('/')[-1] page = requests.get('https://test_page.com/first_page') assert read_file(file_path) == page.text
def main(): url, output, log_level = _parse_arguments() setup_logger(log_level) try: file_path = download(url, output) logging.info('Page was downloaded: %s', file_path) print(file_path) except Exception as e: logging.error(e) sys.exit(1)
def main(): args = get_parser().parse_args() try: page = download(args.url, args.output) except Exception as error: print(error) print() print('Page is completely loaded into:') print(page)
def test_bad_loader(requests_mock): """Test bad download.""" invalid_url = 'https://badsite.com' requests_mock.get(invalid_url, exc=errors.DownloadError) with TemporaryDirectory() as tmpdirname: with pytest.raises(Exception): # noqa: PT011 assert loader.download(invalid_url, tmpdirname) assert not os.listdir(tmpdirname)
def test_download(test_html, expect_test_html): with tempfile.TemporaryDirectory() as tmp_dir: with requests_mock.Mocker() as mocker: mocker.get('http://test.com', text=test_html) mocker.get('http://test.com/assets/application.css') mocker.get('http://test.com/courses') mocker.get('http://test.com/assets/professions/nodejs.png') mocker.get('http://test.com/packs/js/runtime.js') result_path = download('http://test.com', tmp_dir) with open(result_path, 'r', encoding='utf-8') as file: page = file.read() assert page == expect_test_html
def test_download_page(requests_mock, tmpdir): path = os.path.join(os.getcwd(), 'tests', 'fixtures', 'page.html') page_url = 'https://ru.hexlet.io/courses' with open(path, 'r') as file: content = file.read() requests_mock.get(page_url, text=content) page_file_path = tmpdir / 'ru-hexlet-io-courses.html' assert download(tmpdir, page_url) == page_file_path with open(page_file_path) as file: assert sorted(file.read()) == sorted(content)
def test_invalid_destination(): with tempfile.TemporaryDirectory() as tempdir: with pytest.raises(PermissionError): os.chmod(tempdir, 0o400) download(HOST, tempdir) with tempfile.NamedTemporaryFile() as tem_file: with pytest.raises(NotADirectoryError): download(HOST, tem_file.name) with pytest.raises(FileExistsError): download(HOST, 'tempdir')
def test_download_dir_not_exist(): URL = 'test.com' with tempfile.TemporaryDirectory() as tmp_dir: with pytest.raises(errors.SaveError): download(URL, os.path.join(tmp_dir, 'temp'))
def test_download_hexlet( url, html_data, expect_html_data, expect_res_path, expect_page_file_name, text_res_urls, img_res_urls, img, text_expect_files, img_expect_files, request ): # Get fixtures html_data = request.getfixturevalue(html_data) expect_html_data = request.getfixturevalue(expect_html_data) text_res_urls = request.getfixturevalue(text_res_urls) img_res_urls = request.getfixturevalue(img_res_urls) img = request.getfixturevalue(img) text_expect_files = request.getfixturevalue(text_expect_files) img_expect_files = request.getfixturevalue(img_expect_files) expect_soup = BeautifulSoup(expect_html_data, 'html.parser') # Preparing the environment with tempfile.TemporaryDirectory() as tmp_dir: with requests_mock.Mocker() as mocker: mocker.get(url, text=html_data) for url_res in text_res_urls: mocker.get(url_res.strip(), text=html_data) for url_res in img_res_urls: mocker.get(url_res.strip(), content=img) # Run app result_path = download(url, tmp_dir) # Page file path is expect expect_path = os.path.join( tmp_dir, expect_page_file_name ) assert result_path == expect_path # Resource directory is exist expect_res_folder = os.path.join(tmp_dir, expect_res_path) assert os.path.isdir(expect_res_folder) # Resources downloaded correctly (text) for text_expect_file in text_expect_files: res_path = os.path.join( expect_res_folder, text_expect_file.strip() ) assert os.path.exists(res_path) with open(res_path) as res_file: assert html_data == res_file.read() # Resources downloaded correctly (bytes) for img_expect_file in img_expect_files: res_path = os.path.join(expect_res_folder, img_expect_file.strip()) assert os.path.exists(res_path) with open(res_path, 'rb') as res_file: assert img == res_file.read() # Html page downloaded correctly with open(result_path, 'r') as result_file: soup = BeautifulSoup(result_file.read(), 'html.parser') expect_html = expect_soup.prettify(formatter='html5') result_html = soup.prettify(formatter='html5') assert expect_html == result_html
def main(): args = parse_args() output_dir = download(args[0], args[1]) print(output_dir)
def test_invalid_url(): with tempfile.TemporaryDirectory() as tempdir: with pytest.raises(requests.exceptions.MissingSchema): download('site.ru', tempdir)
def main(): parser = arg_parse() args = parser.parse_args() print(download(args.url, args.output))