Example #1
0
def main():
    try:
        arguments = args.parse()
        print(download(arguments.url, arguments.output))
    except Exception as error:
        logging.error(error)
        sys.exit(1)
def main():
    """Page loader."""
    logging.basicConfig(level=logging.INFO, format=FORMAT)
    logger = logging.getLogger('page_loader')
    args = cli.get_args()
    try:
        print(page.download(args.url, args.output))
    except Exception as exc:
        logger.error('Error: {0}'.format(exc))
        sys.exit(1)
Example #3
0
def test_download_exceptions(
    requests_mock,
    url: str,
    dir: str,
    file: str,
    rights,
    exception,
):
    with tempfile.TemporaryDirectory() as tmpdirname:
        requests_mock.get(
            url,
            text='<script src="page-loader-test-_files/test">',
        )
        os.chmod(tmpdirname, rights)
        if file:
            open(os.path.join(tmpdirname, file), 'a').close()
        with pytest.raises(exception):
            page.download(url, os.path.join(tmpdirname, dir, file))
        os.chmod(tmpdirname, ALLOWED_FILE_PERMISSION_CODE)
Example #4
0
def test_download(requests_mock):
    test_html = read_file(build_fixure_path(TEST_HTML), 'r')
    requests_mock.get(TEST_URL, text=test_html)
    for asset_relative_path in TEST_ASSETS:
        fixture_path = build_fixure_path(
            asset_relative_path,
            FIXTURES_ASSETS_DIR,
        )
        fixture_content = read_file(fixture_path)
        requests_mock.get(
            urljoin(TEST_URL, asset_relative_path),
            content=fixture_content,
        )
    with tempfile.TemporaryDirectory() as tmpdirname:
        html_path = page.download(TEST_URL, tmpdirname)
        html_content = read_file(html_path, 'r')
        expected_html = read_file(
            build_absolute_path(EXPECTED_HTML_PATH),
            'r',
        )
        assert html_content == expected_html
        output_html_name, output_assets_dir = sorted(os.listdir(tmpdirname))
        assert output_html_name == EXPECTED_HTML_NAME
        output_assets_files = sorted(os.listdir(os.path.join(
            tmpdirname,
            output_assets_dir,
        )))
        expected_assets_files = sorted(os.listdir(build_absolute_path(
            EXPECTED_PATH,
            EXPECTED_ASSETS_DIR,
        )))
        assert output_assets_files == expected_assets_files
        for output_assets_file in output_assets_files:
            output_content = read_file(os.path.join(
                tmpdirname,
                output_assets_dir,
                output_assets_file,
            ))
            expected_content = read_file(build_absolute_path(
                EXPECTED_PATH,
                EXPECTED_ASSETS_DIR,
                output_assets_file,
            ))
            assert output_content == expected_content
Example #5
0
def test_get_data_exceptions(requests_mock, status_code):
    requests_mock.get(TEST_URL, text='', status_code=status_code)
    with pytest.raises(requests.HTTPError):
        with tempfile.TemporaryDirectory() as tmpdirname:
            page.download(TEST_URL, tmpdirname)