Esempio n. 1
0
def test_download():
    with tempfile.TemporaryDirectory() as temp_dir:
        with requests_mock.Mocker() as mock:
            # create mock for main html page
            html = read_file(os.path.join(FIXTURES_PATH, INPUT_FIXTURE))
            mock.get(URL, text=html)

            # create mock's for all assets
            for link, fixture_path, _ in ASSETS:
                asset_link = get_abs_link(page_url=URL, local_link=link)

                asset_fixture_path = os.path.join(FIXTURES_PATH, fixture_path)
                bytecode = read_file(file_path=asset_fixture_path, mode='rb')
                mock.get(asset_link, content=bytecode)

            # download page from URL
            download(URL, temp_dir)

            # assert if downloaded html in temp folder equals expected in fixtures
            expected = read_file(os.path.join(FIXTURES_PATH, EXPECTED_FIXTURE))
            downloaded = read_file(os.path.join(temp_dir, EXPECTED_FILENAME))
            assert expected == downloaded

            # assert if expected assets exists in temp folder
            for _, _, expected_file in ASSETS:
                assert os.path.exists(f'{temp_dir}/{expected_file}') is True

            # assert if asset's content equals to asset's fixture
            for _, fixture_path, expected_file in ASSETS:
                fixture = read_file(get_full_path(FIXTURES_PATH, fixture_path),
                                    mode='rb')
                expected = read_file(get_full_path(temp_dir, expected_file),
                                     mode='rb')
                assert fixture == expected
def test_not_dir_error(requests_mock):
    html = read_fixture("index.html")
    requests_mock.get(URL, text=html)
    with tempfile.TemporaryDirectory() as output:
        _, tmp_path = tempfile.mkstemp(dir=os.path.abspath(output))
        with pytest.raises(NotADirectoryError):
            download(URL, tmp_path)
def test_fs_permission_error(requests_mock):
    html = read_fixture("index.html")
    requests_mock.get(URL, text=html)
    with tempfile.TemporaryDirectory() as output:
        os.chmod(output, stat.S_ENFMT)
        with pytest.raises(PermissionError):
            download(URL, output)
Esempio n. 4
0
def test_download_permission_denied():
    with TemporaryDirectory() as tempdir:
        with requests_mock.Mocker() as mock:
            mock.get(URL, text="text")
            os.chmod(tempdir, stat.S_IRUSR)
            with pytest.raises(errors.SavingError):
                download(URL, tempdir)
Esempio n. 5
0
def test_download_with_existing_name():
    with TemporaryDirectory() as tempdir:
        with requests_mock.Mocker() as mock:
            mock.get(URL, text="text")
            download(URL, tempdir)
            with pytest.raises(errors.SavingError):
                download(URL, tempdir)
def test_http_errors(requests_mock, status):
    route = map_status_to_route[status]
    url = urljoin(URL, route)
    requests_mock.get(url, status_code=int(status))

    with tempfile.TemporaryDirectory() as output:
        with pytest.raises(requests.exceptions.HTTPError):
            download(url, output)
Esempio n. 7
0
def test_connection_errors():
    with tempfile.TemporaryDirectory() as tmpdirname:
        with requests_mock.Mocker() as response:
            response.get('http://google.com', exc=ConnectionError)
            with pytest.raises(ConnectionError):
                download('http://google.com', tmpdirname)
            response.get('http://google.com', exc=TimeoutError)
            with pytest.raises(TimeoutError):
                download('http://google.com', tmpdirname)
Esempio n. 8
0
def test_exceptions(url):
    # Test with wrong url
    with TemporaryDirectory() as tempdir:
        with pytest.raises(ConnectionError):
            download(url, tempdir)
    # Test with wrong output path
    with requests_mock.Mocker() as rmock:
        rmock.get(url, text="Test")
        with pytest.raises(PathAccessError):
            download(url, 'wrong')
Esempio n. 9
0
def test_iscorrect_img_name():
    with tempfile.TemporaryDirectory() as tmpdir:
        url = "https://page-loader.hexlet.repl.co"
        download(url, tmpdir)
        expect_dir_name = os.path.join(tmpdir,
                                       "page-loader-hexlet-repl-co_files")
        expect_img_file_name = os.path.join(
            tmpdir, expect_dir_name,
            "page-loader-hexlet-repl-co-assets-professions-nodejs.png")
        assert os.path.isfile(expect_img_file_name)
Esempio n. 10
0
def test():
    with tempfile.TemporaryDirectory() as tmpdirname:
        with requests_mock.Mocker() as response:
            for link, content in EXPECTED_CONTENT.items():
                with open(content, 'r') as fixture_content:
                    response.get(link, text=fixture_content.read())
            for link, content in EXPECTED_BINARY_CONTENT.items():
                with open(content, 'rb') as fixture_content:
                    response.get(link, content=fixture_content.read())
            response.get('http://google.com', status_code=301, headers={'Location': 'https://ru.hexlet.io/courses'})

            download('http://google.com', tmpdirname)

        result_file = os.path.join(tmpdirname, 'ru-hexlet-io-courses.html')
        assert os.path.exists(result_file) is True, 'page should exist'
        with open(result_file) as result_file, open(EXPECTED_PAGE) as expected_page:
            assert result_file.read() == expected_page.read(), 'page should be equal'

        files_dir = os.path.join(tmpdirname, 'ru-hexlet-io-courses_files/')

        assert content_exists(files_dir, 'ru-hexlet-io-images-python-icon.png'), 'all images should be downloaded'
        result_image = os.path.join(tmpdirname, 'ru-hexlet-io-courses_files/', 'ru-hexlet-io-images-python-icon.png')
        with open(EXPECTED_IMAGE1, 'rb') as expected_image, open(result_image, 'rb') as result_image:
            assert expected_image.read() == result_image.read(), 'images should be equal'

        assert content_exists(files_dir, 'ru-hexlet-io-images-python-icon2.png'), 'all images should be downloaded'
        result_image = os.path.join(tmpdirname, 'ru-hexlet-io-courses_files/', 'ru-hexlet-io-images-python-icon2.png')
        with open(EXPECTED_IMAGE2, 'rb') as expected_image, open(result_image, 'rb') as result_image:
            assert expected_image.read() == result_image.read(), 'images should be equal'

        assert content_exists(files_dir, 'ru-hexlet-io-assets-application.css'), 'links content should be downloaded'
        result_file = os.path.join(tmpdirname, 'ru-hexlet-io-courses_files/', 'ru-hexlet-io-assets-application.css')
        with open(EXPECTED_APPLICATION_CSS, 'r') as expected_file, open(result_file, 'r') as result_file:
            assert result_file.read() == expected_file.read(), 'downloaded files should be equal'

        application_css = os.path.join(files_dir, 'ru-hexlet-io-assets-application.css')
        with open(application_css) as application_css, open(EXPECTED_APPLICATION_CSS) as expected_application_css:
            assert application_css.read() == expected_application_css.read(), 'application.css should be equal'

        assert content_exists(files_dir, 'ru-hexlet-io-courses.html'), 'links content should be downloaded'
        courses_html = os.path.join(files_dir, 'ru-hexlet-io-courses.html')
        with open(courses_html) as courses_html, open(EXPECTED_COURSES_HTML) as expected_courses_html:
            assert courses_html.read() == expected_courses_html.read(), 'courses.html should be equal'

        assert not content_exists(files_dir, 'ru-hexlet-io-cdn2-hexlet-io-assets-menu.css'), 'files from other host should not be downloaded'

        assert content_exists(files_dir, 'ru-hexlet-io-packs-js-runtime.js'), 'script content should be downloaded'
        runtime_js = os.path.join(files_dir, 'ru-hexlet-io-packs-js-runtime.js')
        with open(runtime_js) as runtime_js, open(EXPECTED_RUNTIME_JS) as expected_runtime_js:
            assert runtime_js.read() == expected_runtime_js.read(), 'runtime.js should be equal'
Esempio n. 11
0
def test_http_errors():
    with tempfile.TemporaryDirectory() as tmpdirname:
        with requests_mock.Mocker() as response:
            fixture = 'tests/fixtures/error_test_page.html'
            with open(fixture, 'r') as fixture_content:
                response.get('http://google.com', text=fixture_content.read())
            response.get('http://google.com/images/python-icon.png', status_code=404)
            download('http://google.com', tmpdirname)
            result_file = os.path.join(tmpdirname, 'google-com.html')
            assert os.path.exists(result_file) is True, "broken file links shouldn't raise errors"

            response.get('http://google.com', status_code=404)
            with pytest.raises(requests.HTTPError) as error:
                download('http://google.com', tmpdirname)
            assert '404' in str(error.value)
def test_page_loader(requests_mock):
    html = read_fixture("index.html")
    requests_mock.get(URL, text=html)

    for resource in RESOURCES:
        url = RESOURCES[resource]["url"]
        output_content = read_fixture(os.path.join("res", resource), "rb")
        requests_mock.get(urljoin(URL, url), content=output_content)

    with tempfile.TemporaryDirectory() as output:
        html_path = os.path.join(output, HTML_NAME)
        resources_dir_path = os.path.join(output, RESOURCES_DIR_NAME)
        output_path = download(URL, output)
        html_content = read_file(html_path)
        expected_html_content = read_fixture("expected-index.html")

        assert output_path == html_path
        assert html_content == expected_html_content
        assert len(os.listdir(resources_dir_path)) == len(RESOURCES)

        for resource in RESOURCES:
            output_name = RESOURCES[resource]["expected_name"]
            output_path = os.path.join(resources_dir_path, output_name)
            fixture_path = os.path.join("res", resource)
            output_content = read_file(output_path, "rb")
            expected_content = read_fixture(fixture_path, "rb")
            assert output_content == expected_content
Esempio n. 13
0
def test_download_page(requests_mock):
    html_file = get_file(FIXTURE_DIR / HTML_FILE_NAME)
    requests_mock.get(BASE_URL, text=html_file)

    html_reference_filepath = FIXTURE_DIR / HTML_FILE_NAME

    for asset in ASSETS:
        asset_url = urljoin(BASE_URL, asset['url_path'])
        reference_fs_path = FIXTURE_DIR / \
                            ASSETS_DIR_NAME / \
                            asset['file_name']
        reference_content = get_file(reference_fs_path, 'rb')
        asset['content'] = reference_content
        requests_mock.get(asset_url, content=reference_content)

    with TemporaryDirectory() as tmp_dir:

        assert not os.listdir(tmp_dir)

        print(type(requests_mock))

        #print('base_UUUURL\n'+BASE_URL)
        #print('tmp_dir\n'+tmp_dir)


        # with requests_mock.Mocker(real_http=True) as m:
        #     m.register_uri('GET', 'BASE_URL', text='resp')
        #     print(requests.get('BASE_URL').text)
        #     print(requests.get('BASE_URL').status_code)

        downloaded_html_file_path = download(BASE_URL, Path(tmp_dir).resolve())
Esempio n. 14
0
def test_response_error(requests_mock, code):
    url = urljoin(BASE_URL, "not-real-url")
    requests_mock.get(url, status_code=code)

    with TemporaryDirectory() as tmp_dir:
        with pytest.raises(requests.exceptions.HTTPError):
            assert download(url, tmp_dir)
Esempio n. 15
0
def main():
    """Page-loader script."""
    url, output_path = get_arguments()
    exit_status = 0
    try:
        print(download(url, output_path))
    except errors.AppInternalError:
        exit_status = 1
    sys.exit(exit_status)
Esempio n. 16
0
def main():
    args = arg_parser.parse()
    try:
        logging.basicConfig(level=logging.INFO)
        logging.info('Start download')
        print('Path webpage: ', download(args.url, args.output))
    except Exception as e:
        logging.critical(e)
        sys.exit(1)
def main():
    args = get_parsed_args()
    setup(log_level=args.log_level, filename=args.file)
    try:
        file_path = download(args.url, args.output)
        print('The webpage had been successfully downloaded to\n', file_path)
    except Exception as e:
        logging.error(str(e))
        logging.debug(str(e), exc_info=True),
        sys.exit(1)
def test_page_download(fixture_tempdir, **kwargs):
    mock_source = kwargs['m']
    mock_source.get('mock://test.com', text='test_text')
    loaded_file = download('mock://test.com', fixture_tempdir)
    expected = BeautifulSoup(
        requests.get('mock://test.com').content,
        'html.parser',
    )
    with open(loaded_file, 'r', encoding='utf-8') as file:
        resulting = file.read()
    assert resulting == expected.prettify(formatter='html5')
Esempio n. 19
0
def test_download():
    expected_text = "Test data"
    expected_name = "ru-hexlet-io-professions.html"
    with requests_mock.Mocker() as rmock:
        rmock.get("https://ru.hexlet.io/professions", text=expected_text)
        with TemporaryDirectory() as tempdir:
            local_page, _ = download("https://ru.hexlet.io/professions",
                                     tempdir)
            received_text = local_page.read_text(encoding='utf-8')
            assert received_text == expected_text
    assert local_page.name == expected_name
Esempio n. 20
0
def main():
    args = parse_args()
    logging.configure_logger(args.loglevel)
    logger.debug("Program started with arguments: {}".format(args))
    try:
        page, resources = download(args.url, args.output, args.externals)
    except logging.PageLoaderError:
        exit(1)
    if args.x:
        webbrowser.open(page)
    print(page)
    exit(0)
Esempio n. 21
0
def test_network_error(requests_mock):
    bad_url = "https://shm____oogle.com"
    reference_exception = requests.exceptions.ConnectionError
    requests_mock.get(bad_url, exc=reference_exception)

    with TemporaryDirectory() as tmp_dir:
        assert not os.listdir(tmp_dir)

        with pytest.raises(reference_exception):
            assert download(bad_url, tmp_dir)

        assert not os.listdir(tmp_dir)
def test_io_errors():
    with pytest.raises(NotADirectoryError):
        download(URL, './tests/__init__.py')
    with pytest.raises(FileNotFoundError):
        download(URL, 'non_existing_path/')
    with pytest.raises(PermissionError):
        download(URL, '/sys')
Esempio n. 23
0
def main():
    parser = argparse.ArgumentParser(
        description="Download HTML-page into file"
    )
    parser.add_argument(
        "web_source",
        type=str, help="destination of resource to download"
    )
    parser.add_argument(
        "-o", "--output",
        type=str,
        help="'path_to_file' - destination to download HTML-page."
        "(Default: 'cwd')",
        default=os.getcwd()
    )
    parser.parse_args()
    ie_source = parser.parse_args().web_source
    output = parser.parse_args().output

    try:
        print(download(ie_source, output))

    except FileNotFoundError as fnfe:
        print(fnfe)
        sys.exit(1)

    except PermissionError as pe:
        print(pe)
        sys.exit(1)

    except NotADirectoryError as nade:
        print(nade)
        sys.exit(1)

    except requests.exceptions.ConnectionError:
        print(f'Unable to connect to {ie_source}')
        sys.exit(1)

    except ConnectionAbortedError as cae:
        print(cae)
        sys.exit(1)

    except FileExistsError as fee:
        print(fee)
        sys.exit(1)

    except Exception as e:
        print(e)
        sys.exit(1)

    else:
        sys.exit(0)
Esempio n. 24
0
def test_download_with_local_resources(
    open_with_local_resources,
    open_with_changed_paths,
    open_test_css,
    open_test_html,
    open_test_js,
    open_test_png,
):
    mocks = {
        URL: open_with_local_resources,
        "http://test.com/assets/application.css": open_test_css,
        "http://test.com/courses": open_test_html,
        "http://test.com/assets/professions/nodejs.png": open_test_png,
        "http://test.com/packs/js/runtime.js": open_test_js,
    }

    with TemporaryDirectory() as tempdir:
        with requests_mock.Mocker() as mock:
            for url, content in mocks.items():
                if isinstance(content, bytes):
                    mock.get(url, content=content)
                else:
                    mock.get(url, text=content)
            received_html_path = download("http://test.com", tempdir)
            dir_path = os.path.join(tempdir, "test-com_files")
            received_resource_list = sorted(os.listdir(dir_path))
        assert received_html_path == f"{tempdir}/test-com.html"
        assert received_resource_list == [
            "test-com-assets-application.css",
            "test-com-assets-professions-nodejs.png",
            "test-com-courses.html",
            "test-com-packs-js-runtime.js",
        ]

        with open(received_html_path, "r") as received:
            assert received.read() == open_with_changed_paths

        with open(
            f"{dir_path}/test-com-assets-application.css", "r"
        ) as received:  # noqa: E501
            assert received.read() == open_test_css

        with open(
            f"{dir_path}/test-com-assets-professions-nodejs.png", "rb"
        ) as received:
            assert received.read() == open_test_png

        with open(f"{dir_path}/test-com-courses.html", "r") as received:
            assert received.read() == open_test_html

        with open(f"{dir_path}/test-com-packs-js-runtime.js", "r") as received:
            assert received.read() == open_test_js
Esempio n. 25
0
def main():
    args = cli.get_args_parser().parse_args()
    logging.setup(args.log_level, filename=args.log_file)

    try:
        path = download(args.url, args.output)
    except Exception as e:
        import logging as logger
        logger.error(str(e))
        logger.debug(str(e), exc_info=True)
        sys.exit(1)
    else:
        print(f'Success! File path: {path}')
Esempio n. 26
0
def main():
    """Main page_loader script."""
    try:
        logger.info('Page-loader script started!')
        url, path = get_args().URL, get_args().output
        file_path = download(url=url, path=path)
    except Exception as err:
        print(err)
        logger.info('Page-loader script finished.')
        sys.exit(1)
    else:
        logger.info('Page-loader script finished.')
        print(f'Output page had been saved to \'{file_path}\'')
        sys.exit(0)
Esempio n. 27
0
def test_resources_download(page, image, style, script):
    address = "https://local.com"
    with requests_mock.Mocker() as rmock:
        rmock.get(address, text=page.read_text(encoding='utf-8'))
        rmock.get(f"{address}/assets/style.css", text=style.read_text())
        rmock.get(f"{address}/assets/script.js", text=script.read_text())
        rmock.get(f"{address}/assets/python.png", content=image.read_bytes())
        with TemporaryDirectory() as tempdir:
            page_path, resources_path = download(address, tempdir, False)
            assert page_path.read_text(encoding='utf-8') != page.read_text(
                encoding='utf-8')
            # Check resources exist in *_files directory.
            resources_list = [x.name for x in resources_path.iterdir()]
            assert len(resources_list) == 3
            for f in ['style.css', 'script.js', 'python.png']:
                assert f"local-com-assets-{f}" in resources_list
def test_page_loader(mocker):
    """
    Test page loader.

    Args:
        mocker: mocker
    """
    mocker.patch(
        'random.choices',
        return_value=['h', '2', 'i', 'S', '4', 'x', 'h', '2', 'i', 'S'],
    )
    with tempfile.TemporaryDirectory() as tempdir:
        expected_path_index_page = join(tempdir, 'test-com.html')
        with open(get_file_absolute_path('page.html')) as file_before:
            with requests_mock.Mocker() as mock:
                long_name = ''.join(['a' for _ in range(MAX_LENGTH_FILENAME + 45)])
                mock.get(URL, text=file_before.read())
                mock.get(
                    '{url}/{link_name}LongLink'.format(
                        url=URL,
                        link_name=long_name,
                    ),
                    content=b'html',
                )
                mock.get('{url}/images/python.png'.format(url=URL), content=b'png')
                mock.get('{url}/scripts/test.js'.format(url=URL), content=b'js')
                mock.get('{url}/courses'.format(url=URL), content=b'html')
                mock.get('{url}/styles/app.css'.format(url=URL), content=b'css')
                path_index_page = download(URL, tempdir)
                assert expected_path_index_page == path_index_page

                resources_dir = join(tempdir, 'test-com_files')
                assert isdir(resources_dir)

                expected_files = [
                    'test-com-{name}h2iS4xh2iS.html'.format(name=long_name[:231]),
                    'test-com-images-python.png',
                    'test-com-courses.html',
                    'test-com-scripts-test.js',
                    'test-com-styles-app.css',
                ]
                for filename in expected_files:
                    assert isfile(join(resources_dir, filename))

                with open(get_file_absolute_path('expected.html')) as file_exp:
                    with open(path_index_page) as file_tested:
                        assert file_exp.read() == file_tested.read()
def fixture_source_mock(fixture_tempdir, **kwargs):
    mock_source = kwargs['mock']
    with open('tests/fixtures/test.html', 'r', encoding='utf-8') as test_file:
        test_text = test_file.read()
    mock_source.get('mock://test.com', text=test_text)
    with open('tests/fixtures/haha.png', 'rb') as img_file:
        img_img = img_file.read()
    mock_source.get('mock://test.com/haha.png', content=img_img)
    with open('tests/fixtures/test1.css', 'r', encoding='utf-8') as css_file:
        css_text = css_file.read()
    mock_source.get('mock://test.com/test1.css', text=css_text)
    with open('tests/fixtures/test2.js', 'r', encoding='utf-8') as js_file:
        js_text = js_file.read()
    mock_source.get('mock://test.com/test2.js', text=js_text)
    mock_source.get('mock://test.com/courses', text='')
    path_to_html = download('mock://test.com', fixture_tempdir)
    return path_to_html
Esempio n. 30
0
def main():
    args = cli.set_parser()

    if args.verbose == "debug":
        set_logging(log.DEBUG)
    elif args.verbose == "info":
        set_logging(log.INFO)
    else:
        set_logging(log.WARNING)

    try:
        path = download(args.url, args.output)
    except errors.KnownError as error:
        log.error(f"{error}")
        sys.exit(EXIT_ERROR)
    else:
        print(f"\u2714 Page was successfully downloaded into '{path}'")
        log.info("Page with resources were successfully downloaded")
        sys.exit(EXIT_OK)