Python DirectScraper Examples, mozdownload.DirectScraper Python Examples

Example #1

0

Show file

File: test_base_scraper.py Project: victorotazu/mozdownload

    def test_download(self):
        """Test download method"""

        filename = 'download_test.txt'
        # standard download
        test_url = urljoin(self.wdir, filename)
        scraper = mozdownload.DirectScraper(url=test_url,
                                            destination=self.temp_dir,
                                            logger=self.logger)
        scraper.download()
        self.assertTrue(os.path.isfile(os.path.join(self.temp_dir, filename)))
        # Compare original and downloaded file via md5 hash
        md5_original = create_md5(
            os.path.join(mhttpd.HERE, mhttpd.WDIR, filename))
        md5_downloaded = create_md5(os.path.join(self.temp_dir, filename))
        self.assertEqual(md5_original, md5_downloaded)

        # RequestException
        test_url1 = urljoin(self.wdir, 'does_not_exist.html')
        scraper1 = mozdownload.DirectScraper(url=test_url1,
                                             destination=self.temp_dir,
                                             logger=self.logger)
        self.assertRaises(requests.exceptions.RequestException,
                          scraper1.download)

        # Covering retry_attempts
        test_url2 = urljoin(self.wdir, 'does_not_exist.html')
        scraper2 = mozdownload.DirectScraper(url=test_url2,
                                             destination=self.temp_dir,
                                             retry_attempts=3,
                                             retry_delay=1.0,
                                             logger=self.logger)
        self.assertRaises(requests.exceptions.RequestException,
                          scraper2.download)

Example #2

0

Show file

File: test_base_scraper.py Project: tanmoydas9166/mozdownload

def test_destination_multiple_dir(httpd, tmpdir):
    """ensure that multiple non existing directories are created"""
    filename = 'download_test.txt'
    test_url = urljoin(httpd.get_url(), filename)
    destination = os.path.join(str(tmpdir), 'tmp1', 'tmp2', filename)
    scraper = mozdownload.DirectScraper(url=test_url, destination=destination)
    assert scraper.destination == destination

Example #3

0

Show file

File: test_base_scraper.py Project: tanmoydas9166/mozdownload

def test_retry_attempts(httpd, tmpdir):
    test_url = urljoin(httpd.get_url(), 'does_not_exist.html')
    scraper = mozdownload.DirectScraper(url=test_url,
                                        destination=str(tmpdir),
                                        retry_attempts=3,
                                        retry_delay=0.1)
    with pytest.raises(errors.NotFoundError):
        scraper.download()

Example #4

0

Show file

File: test_base_scraper.py Project: Mozilla-GitHub-Standards/1bbd0dff6c012c7cd3f19f6b6f13b5834a0bfbdb2ab0c3a249e4c3324572da1f

def test_retry_attempts(httpd, tmpdir):
    test_url = urljoin(httpd.get_url(), 'does_not_exist.html')
    scraper = mozdownload.DirectScraper(url=test_url,
                                        destination=str(tmpdir),
                                        retry_attempts=3,
                                        retry_delay=0.1)
    with pytest.raises(requests.exceptions.RequestException):
        scraper.download()

Example #5

0

Show file

File: test_base_scraper.py Project: tanmoydas9166/mozdownload

def test_compare_download(httpd, tmpdir):
    """Compare original and downloaded file via md5 hash"""
    filename = 'download_test.txt'
    test_url = urljoin(httpd.get_url(), filename)
    scraper = mozdownload.DirectScraper(url=test_url, destination=str(tmpdir))
    scraper.download()
    md5_original = create_md5(os.path.join(httpd.router.doc_root, filename))
    md5_downloaded = create_md5(os.path.join(str(tmpdir), filename))
    assert md5_original == md5_downloaded

Example #6

0

Show file

File: test_base_scraper.py Project: Mozilla-GitHub-Standards/1bbd0dff6c012c7cd3f19f6b6f13b5834a0bfbdb2ab0c3a249e4c3324572da1f

def test_valid_authentication(tmpdir):
    username = '******'
    password = '******'
    basic_auth_url = 'http://mozqa.com/data/mozqa.com/http_auth/basic/'
    scraper = mozdownload.DirectScraper(destination=str(tmpdir),
                                        url=basic_auth_url,
                                        username=username,
                                        password=password)
    scraper.download()
    assert os.path.isfile(os.path.join(str(tmpdir), 'mozqa.com'))

Example #7

0

Show file

File: test_base_scraper.py Project: tanmoydas9166/mozdownload

def test_valid_authentication(httpd, tmpdir):
    username = '******'
    password = '******'
    basic_auth_url = urljoin(httpd.get_url(), 'basic_auth')
    scraper = mozdownload.DirectScraper(destination=str(tmpdir),
                                        url=basic_auth_url,
                                        username=username,
                                        password=password)
    scraper.download()
    assert os.path.isfile(os.path.join(str(tmpdir), 'basic_auth'))

Example #8

0

Show file

File: test_base_scraper.py Project: victorotazu/mozdownload

    def test_authentication(self):
        """testing with basic authentication"""
        username = '******'
        password = '******'
        basic_auth_url = 'http://mozqa.com/data/mozqa.com/http_auth/basic/'

        # test with invalid authentication
        scraper = mozdownload.DirectScraper(destination=self.temp_dir,
                                            url=basic_auth_url,
                                            logger=self.logger)
        self.assertRaises(requests.exceptions.HTTPError, scraper.download)

        # testing with valid authentication
        scraper = mozdownload.DirectScraper(destination=self.temp_dir,
                                            url=basic_auth_url,
                                            logger=self.logger,
                                            username=username,
                                            password=password)
        scraper.download()
        self.assertTrue(
            os.path.isfile(os.path.join(self.temp_dir, 'mozqa.com')))

Example #9

0

Show file

    def test_optional_authentication(self):
        """testing with optional basic authentication"""
        optional_auth_url = 'https://webqa-ci.mozilla.com/'

        # requires optional authentication with no data specified
        scraper = mozdownload.DirectScraper(destination=self.temp_dir,
                                            url=optional_auth_url,
                                            logger=self.logger)
        scraper.download()
        self.assertTrue(
            os.path.isfile(os.path.join(self.temp_dir,
                                        'webqa-ci.mozilla.com')))

Example #10

0

Show file

File: test_base_scraper.py Project: victorotazu/mozdownload

    def test_destination(self):
        """Test for various destination scenarios"""

        filename = 'download_test.txt'
        test_url = urljoin(self.wdir, filename)

        # destination is directory
        scraper = mozdownload.DirectScraper(url=test_url,
                                            destination=self.temp_dir,
                                            logger=self.logger)
        self.assertEqual(scraper.filename,
                         os.path.join(self.temp_dir, filename))

        # destination has directory path with filename
        destination = os.path.join(self.temp_dir, filename)
        scraper = mozdownload.DirectScraper(url=test_url,
                                            destination=destination,
                                            logger=self.logger)
        self.assertEqual(scraper.filename, destination)

        # destination only has filename
        scraper = mozdownload.DirectScraper(url=test_url,
                                            destination=filename,
                                            logger=self.logger)
        self.assertEqual(scraper.filename, os.path.abspath(filename))

        # destination directory does not exist
        destination = os.path.join(self.temp_dir, 'temp_folder', filename)
        scraper = mozdownload.DirectScraper(url=test_url,
                                            destination=destination,
                                            logger=self.logger)
        self.assertEqual(scraper.destination, destination)

        # ensure that multiple non existing directories are created
        destination = os.path.join(self.temp_dir, 'tmp1', 'tmp2', filename)
        scraper = mozdownload.DirectScraper(url=test_url,
                                            destination=destination,
                                            logger=self.logger)
        self.assertEqual(scraper.destination, destination)

Example #11

0

Show file

File: test_base_scraper.py Project: tanmoydas9166/mozdownload

def test_invalid_authentication(httpd, tmpdir):
    basic_auth_url = urljoin(httpd.get_url(), 'basic_auth')
    scraper = mozdownload.DirectScraper(destination=str(tmpdir),
                                        url=basic_auth_url)
    with pytest.raises(requests.exceptions.HTTPError):
        scraper.download()

Example #12

0

Show file

File: test_base_scraper.py Project: tanmoydas9166/mozdownload

def test_url_not_found(httpd, tmpdir):
    test_url = urljoin(httpd.get_url(), 'does_not_exist.html')
    scraper = mozdownload.DirectScraper(url=test_url, destination=str(tmpdir))
    with pytest.raises(errors.NotFoundError):
        scraper.download()

Example #13

0

Show file

File: test_base_scraper.py Project: tanmoydas9166/mozdownload

def test_standard_download(httpd, tmpdir):
    filename = 'download_test.txt'
    test_url = urljoin(httpd.get_url(), filename)
    scraper = mozdownload.DirectScraper(url=test_url, destination=str(tmpdir))
    scraper.download()
    assert os.path.isfile(os.path.join(str(tmpdir), filename))

Example #14

0

Show file

File: test_base_scraper.py Project: tanmoydas9166/mozdownload

def test_destination_as_filename_only(httpd):
    filename = 'download_test.txt'
    test_url = urljoin(httpd.get_url(), filename)
    scraper = mozdownload.DirectScraper(url=test_url, destination=filename)
    assert scraper.filename == os.path.abspath(filename)

Example #15

0

Show file

File: test_base_scraper.py Project: Mozilla-GitHub-Standards/1bbd0dff6c012c7cd3f19f6b6f13b5834a0bfbdb2ab0c3a249e4c3324572da1f

def test_invalid_authentication(tmpdir):
    basic_auth_url = 'http://mozqa.com/data/mozqa.com/http_auth/basic/'
    scraper = mozdownload.DirectScraper(destination=str(tmpdir),
                                        url=basic_auth_url)
    with pytest.raises(requests.exceptions.HTTPError):
        scraper.download()

Example #16

0

Show file

File: test_base_scraper.py Project: Mozilla-GitHub-Standards/1bbd0dff6c012c7cd3f19f6b6f13b5834a0bfbdb2ab0c3a249e4c3324572da1f

def test_url_not_found(httpd, tmpdir):
    test_url = urljoin(httpd.get_url(), 'does_not_exist.html')
    scraper = mozdownload.DirectScraper(url=test_url, destination=str(tmpdir))
    with pytest.raises(requests.exceptions.RequestException):
        scraper.download()

Example #17

0

Show file

File: test_base_scraper.py Project: tanmoydas9166/mozdownload

def test_destination_as_path_with_filename(httpd, tmpdir):
    filename = 'download_test.txt'
    test_url = urljoin(httpd.get_url(), filename)
    destination = os.path.join(str(tmpdir), filename)
    scraper = mozdownload.DirectScraper(url=test_url, destination=destination)
    assert scraper.filename == destination

Example #18

0

Show file

File: test_base_scraper.py Project: tanmoydas9166/mozdownload

def test_destination_does_not_exist(httpd, tmpdir):
    filename = 'download_test.txt'
    test_url = urljoin(httpd.get_url(), filename)
    destination = os.path.join(str(tmpdir), 'temp_folder', filename)
    scraper = mozdownload.DirectScraper(url=test_url, destination=destination)
    assert scraper.destination == destination