def test_download(self): """Test download method""" filename = 'download_test.txt' # standard download test_url = urljoin(self.wdir, 'download_test.txt') scraper = mozdownload.DirectScraper(url=test_url, directory=self.temp_dir, version=None) scraper.download() self.assertTrue(os.path.isfile(os.path.join(self.temp_dir, filename))) # Compare original and downloaded file via md5 hash md5_original = create_md5(os.path.join(mhttpd.HERE, mhttpd.WDIR, filename)) md5_downloaded = create_md5(os.path.join(self.temp_dir, filename)) self.assertEqual(md5_original, md5_downloaded) # RequestException test_url1 = urljoin(self.wdir, 'does_not_exist.html') scraper1 = mozdownload.DirectScraper(url=test_url1, directory=self.temp_dir, version=None) self.assertRaises(requests.exceptions.RequestException, scraper1.download) # Covering retry_attempts test_url2 = urljoin(self.wdir, 'does_not_exist.html') scraper2 = mozdownload.DirectScraper(url=test_url2, directory=self.temp_dir, version=None, retry_attempts=3, retry_delay=1.0) self.assertRaises(requests.exceptions.RequestException, scraper2.download)
def test_download(self): """Test download method""" filename = 'download_test.txt' # standard download test_url = urljoin(self.wdir, filename) scraper = mozdownload.DirectScraper(url=test_url, destination=self.temp_dir, logger=self.logger) scraper.download() self.assertTrue(os.path.isfile(os.path.join(self.temp_dir, filename))) # Compare original and downloaded file via md5 hash md5_original = create_md5( os.path.join(mhttpd.HERE, mhttpd.WDIR, filename)) md5_downloaded = create_md5(os.path.join(self.temp_dir, filename)) self.assertEqual(md5_original, md5_downloaded) # RequestException test_url1 = urljoin(self.wdir, 'does_not_exist.html') scraper1 = mozdownload.DirectScraper(url=test_url1, destination=self.temp_dir, logger=self.logger) self.assertRaises(requests.exceptions.RequestException, scraper1.download) # Covering retry_attempts test_url2 = urljoin(self.wdir, 'does_not_exist.html') scraper2 = mozdownload.DirectScraper(url=test_url2, destination=self.temp_dir, retry_attempts=3, retry_delay=1.0, logger=self.logger) self.assertRaises(requests.exceptions.RequestException, scraper2.download)
def get_latest_build_date(self): """Return date of latest available nightly build.""" if self.application not in ('fennec'): url = urljoin(self.base_url, 'nightly', 'latest-%s/' % self.branch) else: url = urljoin(self.base_url, 'nightly', 'latest-%s-%s/' % (self.branch, self.platform)) self.logger.info('Retrieving the build status file from %s' % url) parser = self._create_directory_parser(url) parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) if not parser.entries: message = 'Status file for %s build cannot be found' % \ self.platform_regex raise errors.NotFoundError(message, url) # Read status file for the platform, retrieve build id, # and convert to a date headers = {'Cache-Control': 'max-age=0'} r = self.session.get(url + parser.entries[-1], headers=headers) try: r.raise_for_status() return datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S') finally: r.close()
def test_filter(httpd): """Testing the DirectoryParser filter method""" parser = DirectoryParser( urljoin(httpd.get_url(), 'directoryparser', 'filter/')) parser.entries.sort() # Get the contents of the folder - dirs and files folder_path = urljoin(httpd.router.doc_root, 'directoryparser', 'filter') contents = os.listdir(folder_path) contents.sort() assert parser.entries == contents # filter out files parser.entries = parser.filter(r'^\d+$') # Get only the subdirectories of the folder if six.PY2: dirs = os.walk(folder_path).next()[1] elif six.PY3: dirs = os.walk(folder_path).__next__()[1] dirs.sort() assert parser.entries == dirs # Test filter method with a function parser.entries = parser.filter(lambda x: x == dirs[0]) assert parser.entries == [dirs[0]]
def test_names_with_spaces(self): parser = DirectoryParser(urljoin(self.wdir, 'directoryparser', 'some spaces/')) # Get the contents of the folder - dirs and files folder_path = urljoin(mhttpd.HERE, mhttpd.WDIR, 'directoryparser', 'some spaces') contents = os.listdir(folder_path) contents.sort() self.assertEqual(parser.entries, contents)
def test_names_with_spaces(httpd): parser = DirectoryParser(urljoin(httpd.get_url(), 'directoryparser', 'some spaces/')) parser.entries.sort() # Get the contents of the folder - dirs and files folder_path = urljoin(httpd.router.doc_root, 'directoryparser', 'some spaces') contents = os.listdir(folder_path) contents.sort() assert parser.entries == contents
def test_names_with_spaces(httpd): parser = DirectoryParser( urljoin(httpd.get_url(), 'directoryparser', 'some spaces/')) parser.entries.sort() # Get the contents of the folder - dirs and files folder_path = urljoin(httpd.router.doc_root, 'directoryparser', 'some spaces') contents = os.listdir(folder_path) contents.sort() assert parser.entries == contents
def path_regex(self): """Return the regex for the path to the build folder.""" try: path = '%s/' % urljoin(self.monthly_build_list_regex, self.builds[self.build_index]) if self.application in APPLICATIONS_MULTI_LOCALE \ and self.locale != 'multi': path = '%s/' % urljoin(path, self.locale) return path except: folder = urljoin(self.base_url, self.monthly_build_list_regex) raise errors.NotFoundError("Specified sub folder cannot be found", folder)
def path_regex(self): """Return the regex for the path to the build folder.""" try: path = '%s/' % urljoin(self.monthly_build_list_regex, self.builds[self.build_index]) if self.application in APPLICATIONS_MULTI_LOCALE \ and self.locale != 'multi': path = '%s/' % urljoin(path, self.locale) return path except Exception: folder = urljoin(self.base_url, self.monthly_build_list_regex) raise errors.NotFoundError("Specified sub folder cannot be found", folder)
def test_implementation_error(httpd, tmpdir, attr): """test implementations available""" filename = 'download_test.txt' test_url = urljoin(httpd.get_url(), filename) scraper = DirectScraper(url=test_url, destination=str(tmpdir)) with pytest.raises(errors.NotImplementedError): getattr(scraper, attr)
def test_destination_multiple_dir(httpd, tmpdir): """ensure that multiple non existing directories are created""" filename = 'download_test.txt' test_url = urljoin(httpd.get_url(), filename) destination = os.path.join(str(tmpdir), 'tmp1', 'tmp2', filename) scraper = mozdownload.DirectScraper(url=test_url, destination=destination) assert scraper.destination == destination
def test_scraper(httpd, tmpdir, args, filename, url): """Testing various download scenarios for TinderboxScraper""" scraper = TinderboxScraper(destination=str(tmpdir), base_url=httpd.get_url(), **args) expected_filename = os.path.join(str(tmpdir), filename) assert scraper.filename == expected_filename assert unquote(scraper.url) == urljoin(httpd.get_url(), url)
def path_regex(self): """Return the regex for the path to the build folder.""" if self.locale_build: return self.build_list_regex return '%s/' % urljoin(self.build_list_regex, self.builds[self.build_index])
def test_latest_build(httpd, tmpdir, args, filename, url): """Testing various download scenarios for latest release candidate builds""" scraper = ReleaseCandidateScraper(destination=str(tmpdir), base_url=httpd.get_url(), **args) expected_filename = os.path.join(str(tmpdir), filename) assert scraper.filename == expected_filename assert urllib.unquote(scraper.url) == urljoin(httpd.get_url(), url)
def get_build_info(self): """Define additional build information.""" ReleaseScraper.get_build_info(self) # Internally we access builds via index url = urljoin(self.base_url, self.candidate_build_list_regex) self.logger.info('Retrieving list of candidate builds from %s' % url) parser = self._create_directory_parser(url) if not parser.entries: message = 'Folder for specific candidate builds at %s has not' \ 'been found' % url raise errors.NotFoundError(message, url) self.show_matching_builds(parser.entries) self.builds = parser.entries self.build_index = len(parser.entries) - 1 if self.build_number and \ ('build%s' % self.build_number) in self.builds: self.builds = ['build%s' % self.build_number] self.build_index = 0 self.logger.info('Selected build: build%s' % self.build_number) else: self.logger.info('Selected build: build%d' % (self.build_index + 1))
def test_scraper(httpd, tmpdir, args, filename, url): """Testing various download scenarios for TinderboxScraper""" scraper = TinderboxScraper(destination=str(tmpdir), base_url=httpd.get_url(), **args) expected_filename = os.path.join(str(tmpdir), filename) assert scraper.filename == expected_filename assert urllib.unquote(scraper.url) == urljoin(httpd.get_url(), url)
def test_candidate_scraper(self): for test in tests_candidate_scraper: scraper = mozdownload.ReleaseCandidateScraper( destination=self.temp_dir, logger=self.logger, **test['args']) if test.get('url'): self.assertEqual(urllib.unquote(scraper.url), urljoin(BASE_URL, test['url']))
def get_build_info(self): """Define additional build information.""" ReleaseScraper.get_build_info(self) # Internally we access builds via index url = urljoin(self.base_url, self.candidate_build_list_regex) self.logger.info('Retrieving list of candidate builds from %s' % url) parser = self._create_directory_parser(url) if not parser.entries: message = 'Folder for specific candidate builds at %s has not' \ 'been found' % url raise errors.NotFoundError(message, url) self.show_matching_builds(parser.entries) self.builds = parser.entries self.build_index = len(parser.entries) - 1 if self.build_number and \ ('build%s' % self.build_number) in self.builds: self.builds = ['build%s' % self.build_number] self.build_index = 0 self.logger.info('Selected build: build%s' % self.build_number) else: self.logger.info('Selected build: %s' % (parser.entries[self.build_index]))
def test_init(self): """Testing the basic functionality of the DirectoryParser Class""" # DirectoryParser returns output parser = DirectoryParser(self.wdir) # relies on the presence of other files in the directory # Checks if DirectoryParser lists the server entries self.assertNotEqual(parser.entries, [], "parser.entries were not listed") # path_regex to mozdownload -t release -p win32 -v latest testpath = urljoin(self.wdir, 'directoryparser/') parser1 = DirectoryParser(testpath) parser1.entries.sort() testdir = os.listdir(urljoin(mhttpd.HERE, 'data', 'directoryparser')) testdir.sort() self.assertEqual(parser1.entries, testdir)
def test_init(httpd): """Testing the basic functionality of the DirectoryParser Class""" # DirectoryParser returns output parser = DirectoryParser(httpd.get_url()) # relies on the presence of other files in the directory # Checks if DirectoryParser lists the server entries assert parser.entries != [], "parser.entries were not listed" # path_regex to mozdownload -t release -p win32 -v latest testpath = urljoin(httpd.get_url(), 'directoryparser/') parser = DirectoryParser(testpath) parser.entries.sort() testdir = os.listdir(urljoin(httpd.router.doc_root, 'directoryparser')) testdir.sort() assert parser.entries == testdir
def path_regex(self): """Return the regex for the path to the build folder.""" build_dir = 'try-%(PLATFORM)s%(DEBUG)s/' % { 'PLATFORM': self.platform_regex, 'DEBUG': '-debug' if self.debug_build else ''} return urljoin(self.build_list_regex, self.builds[self.build_index], build_dir)
def test_candidate_scraper(self): for test in tests_candidate_scraper: scraper = mozdownload.ReleaseCandidateScraper(destination=self.temp_dir, logger=self.logger, **test['args']) if test.get('url'): self.assertEqual(urllib.unquote(scraper.url), urljoin(BASE_URL, test['url']))
def test_retry_attempts(httpd, tmpdir): test_url = urljoin(httpd.get_url(), 'does_not_exist.html') scraper = mozdownload.DirectScraper(url=test_url, destination=str(tmpdir), retry_attempts=3, retry_delay=0.1) with pytest.raises(requests.exceptions.RequestException): scraper.download()
def test_release_scraper(self): for test in tests_release_scraper: scraper = mozdownload.ReleaseScraper(destination=self.temp_dir, log_level='ERROR', **test['args']) if test.get('url'): self.assertEqual(urllib.unquote(scraper.url), urljoin(BASE_URL, test['url']))
def test_retry_attempts(httpd, tmpdir): test_url = urljoin(httpd.get_url(), 'does_not_exist.html') scraper = mozdownload.DirectScraper(url=test_url, destination=str(tmpdir), retry_attempts=3, retry_delay=0.1) with pytest.raises(errors.NotFoundError): scraper.download()
def __init__(self, destination=None, platform=None, application='firefox', locale=None, extension=None, username=None, password=None, retry_attempts=0, retry_delay=10., is_stub_installer=False, timeout=None, logger=None, base_url=BASE_URL): """Create an instance of the generic scraper.""" # Private properties for caching self._filename = None self._binary = None self.logger = logger or logging.getLogger(self.__module__) self.destination = destination or os.getcwd() if not locale: if application in APPLICATIONS_MULTI_LOCALE: self.locale = 'multi' else: self.locale = 'en-US' else: self.locale = locale self.locale_build = self.locale not in ('en-US', 'multi') self.platform = platform or self.detect_platform() self.session = requests.Session() if (username, password) != (None, None): self.session.auth = (username, password) self.retry_attempts = retry_attempts self.retry_delay = retry_delay self.is_stub_installer = is_stub_installer self.timeout_download = timeout # this is the timeout used in requests.get. Unlike "auth", # it does not work if we attach it on the session, so we handle # it independently. self.timeout_network = 60. # build the base URL self.application = application self.base_url = '%s/' % urljoin( base_url, APPLICATIONS_TO_FTP_DIRECTORY.get(self.application, self.application) ) if extension: self.extension = extension else: if self.application in APPLICATIONS_MULTI_LOCALE and \ self.platform in ('win32', 'win64'): # builds for APPLICATIONS_MULTI_LOCALE only exist in zip self.extension = 'zip' else: self.extension = DEFAULT_FILE_EXTENSIONS[self.platform] self._retry_check_404(self.get_build_info)
def test_compare_download(httpd, tmpdir): """Compare original and downloaded file via md5 hash""" filename = 'download_test.txt' test_url = urljoin(httpd.get_url(), filename) scraper = mozdownload.DirectScraper(url=test_url, destination=str(tmpdir)) scraper.download() md5_original = create_md5(os.path.join(httpd.router.doc_root, filename)) md5_downloaded = create_md5(os.path.join(str(tmpdir), filename)) assert md5_original == md5_downloaded
def test_scraper(self): """Testing various download scenarios for DailyScraper""" for entry in tests: scraper = DailyScraper( directory=self.temp_dir, base_url=self.wdir, version=None, log_level="ERROR", **entry["args"] ) expected_target = os.path.join(self.temp_dir, entry["target"]) self.assertEqual(scraper.target, expected_target) self.assertEqual(urllib.unquote(scraper.final_url), urljoin(self.wdir, entry["target_url"]))
def test_url_download(httpd, tmpdir): """test mozdownload direct url scraper""" filename = 'download_test.txt' test_url = urljoin(httpd.get_url(), filename) scraper = DirectScraper(url=test_url, destination=str(tmpdir)) assert scraper.url == test_url assert scraper.filename == os.path.join(str(tmpdir), filename) scraper.download() assert os.path.isfile(os.path.join(str(tmpdir), scraper.filename))
def test_scraper(self): """Testing various download scenarios for ReleaseScraper""" for entry in tests: scraper = ReleaseScraper(destination=self.temp_dir, base_url=self.wdir, log_level='ERROR', **entry['args']) expected_target = os.path.join(self.temp_dir, entry['target']) self.assertEqual(scraper.target, expected_target) self.assertEqual(urllib.unquote(scraper.final_url), urljoin(self.wdir, entry['target_url']))
def setUp(self): """Starts server that lists all files in the directory""" self.logger = mozlog.unstructured.getLogger(self.__class__.__name__) self.logger.setLevel('ERROR') self.httpd = mozhttpd.MozHttpd(port=8080, docroot=HERE, urlhandlers=[{'method': 'GET', 'path': '/hg/(.+)/json-pushes?', 'function': resource_get}]) self.logger.debug("Serving '%s' at %s:%s" % (self.httpd.docroot, self.httpd.host, self.httpd.port)) self.httpd.start(block=False) self.server_address = "http://%s:%s" % (self.httpd.host, self.httpd.port) self.wdir = urljoin(self.server_address, WDIR) self.hgdir = urljoin(self.server_address, "hg") # Create a temporary directory for potential downloads self.temp_dir = tempfile.mkdtemp()
def test_valid_authentication(httpd, tmpdir): username = '******' password = '******' basic_auth_url = urljoin(httpd.get_url(), 'basic_auth') scraper = mozdownload.DirectScraper(destination=str(tmpdir), url=basic_auth_url, username=username, password=password) scraper.download() assert os.path.isfile(os.path.join(str(tmpdir), 'basic_auth'))
def test_scraper(self): """Testing various download scenarios for TinderboxScraper""" for entry in tests: scraper = TinderboxScraper(directory=self.temp_dir, version=None, base_url=self.wdir, **entry['args']) expected_target = os.path.join(self.temp_dir, entry['target']) self.assertEqual(scraper.target, expected_target) self.assertEqual(urllib.unquote(scraper.final_url), urljoin(self.wdir, entry['target_url']))
def test_scraper(httpd, tmpdir, args, filename, url, mocker): """Testing various download scenarios for TryScraper""" query_builds_by_revision = mocker.patch('mozdownload.treeherder.Treeherder.query_builds_by_revision') query_builds_by_revision.return_value = [ '/firefox/try-builds/[email protected]/try-foobar/' ] scraper = TryScraper(destination=str(tmpdir), base_url=httpd.get_url(), **args) expected_filename = os.path.join(str(tmpdir), filename) assert scraper.filename == expected_filename assert urllib.unquote(scraper.url) == urljoin(httpd.get_url(), url)
def test_scraper(self): """Testing various download scenarios for ReleaseScraper""" for entry in tests: scraper = ReleaseScraper(directory=self.temp_dir, base_url=self.wdir, log_level='ERROR', **entry['args']) expected_target = os.path.join(self.temp_dir, entry['target']) self.assertEqual(scraper.target, expected_target) self.assertEqual(urllib.unquote(scraper.final_url), urljoin(self.wdir, entry['target_url']))
def test_scraper(httpd, tmpdir, args, filename, url, mocker): """Testing various download scenarios for TryScraper""" query_builds_by_revision = mocker.patch('mozdownload.treeherder.Treeherder.query_builds_by_revision') query_builds_by_revision.return_value = [ '/firefox/try-builds/[email protected]/try-foobar/' ] scraper = TryScraper(destination=str(tmpdir), base_url=httpd.get_url(), **args) expected_filename = os.path.join(str(tmpdir), filename) assert scraper.filename == expected_filename assert unquote(scraper.url) == urljoin(httpd.get_url(), url)
def test_scraper(self): """Testing various download scenarios for TinderboxScraper""" for entry in tests: scraper = TinderboxScraper(destination=self.temp_dir, base_url=self.wdir, logger=self.logger, **entry['args']) expected_filename = os.path.join(self.temp_dir, entry['filename']) self.assertEqual(scraper.filename, expected_filename) self.assertEqual(urllib.unquote(scraper.url), urljoin(self.wdir, entry['url']))
def is_build_dir(self, folder_name): """Return whether or not the given dir contains a build.""" # Cannot move up to base scraper due to parser.entries call in # get_build_info_for_index (see below) url = '%s/' % urljoin(self.base_url, self.build_list_regex, folder_name) if self.application in APPLICATIONS_MULTI_LOCALE \ and self.locale != 'multi': url = '%s/' % urljoin(url, self.locale) parser = self._create_directory_parser(url) pattern = re.compile(self.binary_regex, re.IGNORECASE) for entry in parser.entries: try: pattern.match(entry).group() return True except Exception: # No match, continue with next entry continue return False
def test_latest_build(self): """Testing various download scenarios for latest release candidate builds""" for entry in tests: scraper = ReleaseCandidateScraper(destination=self.temp_dir, base_url=self.wdir, logger=self.logger, **entry['args']) expected_filename = os.path.join(self.temp_dir, entry['filename']) self.assertEqual(scraper.filename, expected_filename) self.assertEqual(urllib.unquote(scraper.url), urljoin(self.wdir, entry['url']))
def is_build_dir(self, folder_name): """Return whether or not the given dir contains a build.""" # Cannot move up to base scraper due to parser.entries call in # get_build_info_for_index (see below) url = '%s/' % urljoin(self.base_url, self.build_list_regex, folder_name) if self.application in APPLICATIONS_MULTI_LOCALE \ and self.locale != 'multi': url = '%s/' % urljoin(url, self.locale) parser = self._create_directory_parser(url) pattern = re.compile(self.binary_regex, re.IGNORECASE) for entry in parser.entries: try: pattern.match(entry).group() return True except: # No match, continue with next entry continue return False
def test_scraper(self): """Testing various download scenarios for ReleaseScraper""" for entry in tests: scraper = ReleaseScraper(destination=self.temp_dir, base_url=self.wdir, logger=self.logger, **entry['args']) expected_filename = os.path.join(self.temp_dir, entry['filename']) self.assertEqual(scraper.filename, expected_filename) self.assertEqual(urllib.unquote(scraper.url), urljoin(self.wdir, entry['url']))
def test_filter(self): """Testing the DirectoryParser filter method""" parser = DirectoryParser(urljoin(self.wdir, 'directoryparser', 'filter/')) # Get the contents of the folder - dirs and files folder_path = urljoin(mhttpd.HERE, mhttpd.WDIR, 'directoryparser', 'filter') contents = os.listdir(folder_path) contents.sort() self.assertEqual(parser.entries, contents) # filter out files parser.entries = parser.filter(r'^\d+$') # Get only the subdirectories of the folder dirs = os.walk(folder_path).next()[1] dirs.sort() self.assertEqual(parser.entries, dirs) # Test filter method with a function parser.entries = parser.filter(lambda x: x == dirs[0]) self.assertEqual(parser.entries, [dirs[0]])
def test_filter(httpd): """Testing the DirectoryParser filter method""" parser = DirectoryParser(urljoin(httpd.get_url(), 'directoryparser', 'filter/')) parser.entries.sort() # Get the contents of the folder - dirs and files folder_path = urljoin(httpd.router.doc_root, 'directoryparser', 'filter') contents = os.listdir(folder_path) contents.sort() assert parser.entries == contents # filter out files parser.entries = parser.filter(r'^\d+$') # Get only the subdirectories of the folder dirs = os.walk(folder_path).next()[1] dirs.sort() assert parser.entries == dirs # Test filter method with a function parser.entries = parser.filter(lambda x: x == dirs[0]) assert parser.entries == [dirs[0]]
def query_versions(self, version=None): """Check specified version and resolve special values.""" if version not in RELEASE_AND_CANDIDATE_LATEST_VERSIONS: return [version] url = urljoin(self.base_url, 'releases/') parser = self._create_directory_parser(url) if version: versions = parser.filter(RELEASE_AND_CANDIDATE_LATEST_VERSIONS[version]) from distutils.version import LooseVersion versions.sort(key=LooseVersion) return [versions[-1]] else: return parser.entries
def test_url_download(self): filename = 'download_test.txt' test_url = urljoin(self.wdir, filename) scraper = DirectScraper(url=test_url, destination=self.temp_dir, logger=self.logger) self.assertEqual(scraper.url, test_url) self.assertEqual(scraper.filename, os.path.join(self.temp_dir, filename)) for attr in ['binary', 'binary_regex', 'path', 'path_regex']: self.assertRaises(errors.NotImplementedError, getattr, scraper, attr) scraper.download() self.assertTrue(os.path.isfile(os.path.join(self.temp_dir, scraper.filename)))