def get_build_info(self, build_index=None): url = '/'.join([self.base_url, self.build_list_regex]) print 'Retrieving list of builds from %s' % url # If a timestamp is given, retrieve just that build regex = '^' + self.timestamp + '$' if self.timestamp else r'^\d+$' parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) parser.entries = parser.filter(regex) # If date is given, retrieve the subset of builds on that date if self.date is not None: parser.entries = filter(self.date_matches, parser.entries) if not parser.entries: message = 'No builds have been found' raise NotFoundError(message, url) # If no index has been given, set it to the last build of the day. if build_index is None: build_index = len(parser.entries) - 1 return (parser.entries, build_index)
def get_build_info_for_date(self, date, has_time=False, build_index=None): url = '/'.join([self.base_url, self.monthly_build_list_regex]) print 'Retrieving list of builds from %s' % url parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s$' % { 'DATE': date.strftime('%Y-%m-%d'), 'BRANCH': self.branch, 'L10N': '' if self.locale == 'en-US' else '-l10n' } parser.entries = parser.filter(regex) if not parser.entries: message = 'Folder for builds on %s has not been found' % \ self.date.strftime('%Y-%m-%d') raise NotFoundError(message, url) if has_time: # If a time is included in the date, use it to determine the # build's index regex = r'.*%s.*' % date.strftime('%H-%M-%S') build_index = parser.entries.index(parser.filter(regex)[0]) else: # If no index has been given, set it to the last build of the day. if build_index is None: build_index = len(parser.entries) - 1 return (parser.entries, build_index)
def get_build_info_for_index(self, build_index=None): url = urljoin(self.base_url, self.build_list_regex) self.logger.info('Retrieving list of builds from %s' % url) parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) parser.entries = parser.filter(r'^\d+$') if self.timestamp: # If a timestamp is given, retrieve the folder with the timestamp # as name parser.entries = self.timestamp in parser.entries and \ [self.timestamp] elif self.date: # If date is given, retrieve the subset of builds on that date parser.entries = filter(self.date_matches, parser.entries) if not parser.entries: message = 'No builds have been found' raise NotFoundError(message, url) self.show_matching_builds(parser.entries) # If no index has been given, set it to the last build of the day. if build_index is None: build_index = len(parser.entries) - 1 return (parser.entries, build_index)
def get_latest_build_date(self): """ Returns date of latest available nightly build.""" if self.application not in ('fennec'): url = urljoin(self.base_url, 'nightly', 'latest-%s/' % self.branch) else: url = urljoin(self.base_url, 'nightly', 'latest-%s-%s/' % (self.branch, self.platform)) self.logger.info('Retrieving the build status file from %s' % url) parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) if not parser.entries: message = 'Status file for %s build cannot be found' % \ self.platform_regex raise errors.NotFoundError(message, url) # Read status file for the platform, retrieve build id, # and convert to a date headers = {'Cache-Control': 'max-age=0'} r = requests.get(url + parser.entries[-1], auth=self.authentication, headers=headers) try: r.raise_for_status() return datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S') finally: r.close()
def __init__(self, branch='mozilla-central', build_id=None, date=None, build_number=None, *args, **kwargs): Scraper.__init__(self, *args, **kwargs) self.branch = branch # Internally we access builds via index if build_number is not None: self.build_index = int(build_number) - 1 else: self.build_index = None if build_id: # A build id has been specified. Split up its components so the # date and time can be extracted: # '20111212042025' -> '2011-12-12 04:20:25' self.date = datetime.strptime(build_id, '%Y%m%d%H%M%S') self.builds, self.build_index = self.get_build_info_for_date( self.date, has_time=True) elif date: # A date (without time) has been specified. Use its value and the # build index to find the requested build for that day. self.date = datetime.strptime(date, '%Y-%m-%d') self.builds, self.build_index = self.get_build_info_for_date( self.date, build_index=self.build_index) else: # If no build id nor date have been specified the lastest available # build of the given branch has to be identified. We also have to # retrieve the date of the build via its build id. url = '%s/nightly/latest-%s/' % (self.base_url, self.branch) print 'Retrieving the build status file from %s' % url parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) if not parser.entries: message = 'Status file for %s build cannot be found' % \ self.platform_regex raise NotFoundError(message, url) # Read status file for the platform, retrieve build id, # and convert to a date headers = {'Cache-Control': 'max-age=0'} r = requests.get(url + parser.entries[-1], auth=self.authentication, headers=headers) r.raise_for_status() self.date = datetime.strptime( r.text.split('\n')[0], '%Y%m%d%H%M%S') self.builds, self.build_index = self.get_build_info_for_date( self.date, has_time=True)
def get_build_info_for_date(self, date, build_index=None): url = urljoin(self.base_url, self.monthly_build_list_regex) has_time = date and date.time() self.logger.info('Retrieving list of builds from %s' % url) parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s$' % { 'DATE': date.strftime('%Y-%m-%d'), 'BRANCH': self.branch, # ensure to select the correct subfolder for localized builds 'L10N': '' if self.locale in ('en-US', 'multi') else '(-l10n)?'} parser.entries = parser.filter(regex) parser.entries = parser.filter(self.is_build_dir) if has_time: # If a time is included in the date, use it to determine the # build's index regex = r'.*%s.*' % date.strftime('%H-%M-%S') parser.entries = parser.filter(regex) if not parser.entries: date_format = '%Y-%m-%d-%H-%M-%S' if has_time else '%Y-%m-%d' message = 'Folder for builds on %s has not been found' % \ self.date.strftime(date_format) raise NotFoundError(message, url) # If no index has been given, set it to the last build of the day. self.show_matching_builds(parser.entries) if build_index is None: build_index = len(parser.entries) - 1 self.logger.info('Selected build: %s' % parser.entries[build_index]) return (parser.entries, build_index)
def get_build_info(self): """Defines additional build information""" # Internally we access builds via index if self.build_number is not None: self.build_index = int(self.build_number) - 1 else: self.build_index = None if self.build_id: # A build id has been specified. Split up its components so the # date and time can be extracted: # '20111212042025' -> '2011-12-12 04:20:25' self.date = datetime.strptime(self.build_id, '%Y%m%d%H%M%S') self.builds, self.build_index = self.get_build_info_for_date( self.date, has_time=True) elif self.date: # A date (without time) has been specified. Use its value and the # build index to find the requested build for that day. self.date = datetime.strptime(self.date, '%Y-%m-%d') self.builds, self.build_index = self.get_build_info_for_date( self.date, build_index=self.build_index) else: # If no build id nor date have been specified the lastest available # build of the given branch has to be identified. We also have to # retrieve the date of the build via its build id. url = '%s/nightly/latest-%s/' % (self.base_url, self.branch) self.logger.info('Retrieving the build status file from %s' % url) parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) if not parser.entries: message = 'Status file for %s build cannot be found' % \ self.platform_regex raise NotFoundError(message, url) # Read status file for the platform, retrieve build id, # and convert to a date headers = {'Cache-Control': 'max-age=0'} r = requests.get(url + parser.entries[-1], auth=self.authentication, headers=headers) r.raise_for_status() self.date = datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S') self.builds, self.build_index = self.get_build_info_for_date( self.date, has_time=True)
def binary(self): """Return the name of the build""" attempt = 0 while self._binary is None: attempt += 1 try: # Retrieve all entries from the remote virtual folder parser = DirectoryParser(self.path, authentication=self.authentication, timeout=self.timeout_network) if not parser.entries: raise NotFoundError('No entries found', self.path) # Download the first matched directory entry pattern = re.compile(self.binary_regex, re.IGNORECASE) for entry in parser.entries: try: self._binary = pattern.match(entry).group() break except: # No match, continue with next entry continue else: raise NotFoundError("Binary not found in folder", self.path) except (NotFoundError, requests.exceptions.RequestException), e: if self.retry_attempts > 0: # Print only if multiple attempts are requested print "Build not found: '%s'" % e.message print "Retrying... (attempt %s)" % attempt if attempt >= self.retry_attempts: raise time.sleep(self.retry_delay)
def get_build_info_for_index(self, build_index=None): url = urljoin(self.base_url, self.build_list_regex) self.logger.info('Retrieving list of builds from %s' % url) parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) parser.entries = parser.filter('.*-%s$' % self.changeset) if not parser.entries: raise errors.NotFoundError('No builds have been found', url) self.show_matching_builds(parser.entries) self.logger.info('Selected build: %s' % parser.entries[0]) return (parser.entries, 0)
def files(self): attempt = 0 parser = None while parser is None: attempt += 1 try: # Retrieve all entries from the remote virtual folder parser = DirectoryParser(self.base_url, timeout=self.timeout) if not parser.entries: raise NotFoundError('No entries found', self.base_url) except (NotFoundError, requests.exceptions.RequestException), e: if self.retry_attempts > 0: # Log only if multiple attempts are requested #self.logger.warning("Build not found: '%s'" % e.message) #self.logger.info('Will retry in %s seconds...' % # (self.retry_delay)) time.sleep(self.retry_delay) #self.logger.info("Retrying... (attempt %s)" % attempt) if attempt >= self.retry_attempts: if hasattr(e, 'response') and \ e.response.status_code == 404: message = "Specified url has not been found" raise NotFoundError(message, e.response.url) else: raise
def __init__(self, branch='mozilla-central', build_id=None, date=None, build_number=None, *args, **kwargs): Scraper.__init__(self, *args, **kwargs) self.branch = branch # Internally we access builds via index if build_number is not None: self.build_index = int(build_number) - 1 else: self.build_index = None if build_id: # A build id has been specified. Split up its components so the date # and time can be extracted: '20111212042025' -> '2011-12-12 04:20:25' self.date = datetime.strptime(build_id, '%Y%m%d%H%M%S') self.builds, self.build_index = self.get_build_info_for_date(self.date, has_time=True) elif date: # A date (without time) has been specified. Use its value and the # build index to find the requested build for that day. self.date = datetime.strptime(date, '%Y-%m-%d') self.builds, self.build_index = self.get_build_info_for_date(self.date, build_index=self.build_index) else: # If no build id nor date have been specified the lastest available # build of the given branch has to be identified. We also have to # retrieve the date of the build via its build id. url = '%s/nightly/latest-%s/' % (self.base_url, self.branch) print 'Retrieving the build status file from %s' % url parser = DirectoryParser(url) parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) if not parser.entries: message = 'Status file for %s build cannot be found' % self.platform_regex raise NotFoundException(message, url) # Read status file for the platform, retrieve build id, and convert to a date status_file = url + parser.entries[-1] f = urllib.urlopen(status_file) self.date = datetime.strptime(f.readline().strip(), '%Y%m%d%H%M%S') self.builds, self.build_index = self.get_build_info_for_date(self.date, has_time=True)
def get_build_info_for_date(self, date, has_time=False, build_index=None): url = urljoin(self.base_url, self.monthly_build_list_regex) self.logger.info('Retrieving list of builds from %s' % url) parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s$' % { 'DATE': date.strftime('%Y-%m-%d'), 'BRANCH': self.branch, 'L10N': '' if self.locale == 'en-US' else '(-l10n)?'} parser.entries = parser.filter(regex) parser.entries = parser.filter(self.is_build_dir) if has_time: # If a time is included in the date, use it to determine the # build's index regex = r'.*%s.*' % date.strftime('%H-%M-%S') parser.entries = parser.filter(regex) if not parser.entries: date_format = '%Y-%m-%d-%H-%M-%S' if has_time else '%Y-%m-%d' message = 'Folder for builds on %s has not been found' % \ self.date.strftime(date_format) raise NotFoundError(message, url) # If no index has been given, set it to the last build of the day. self.show_matching_builds(parser.entries) if build_index is None: build_index = len(parser.entries) - 1 return (parser.entries, build_index)
def get_latest_build_date(self): """ Returns date of latest available nightly build.""" url = urljoin(self.base_url, 'nightly', 'latest-%s/' % self.branch) self.logger.info('Retrieving the build status file from %s' % url) parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) if not parser.entries: message = 'Status file for %s build cannot be found' % \ self.platform_regex raise NotFoundError(message, url) # Read status file for the platform, retrieve build id, # and convert to a date headers = {'Cache-Control': 'max-age=0'} r = requests.get(url + parser.entries[-1], auth=self.authentication, headers=headers) r.raise_for_status() return datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S')
def get_build_info_for_index(self, build_index=None): url = urljoin(self.base_url, self.build_list_regex) self.logger.info('Retrieving list of builds from %s' % url) parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) parser.entries = parser.filter(r'^\d+$') if self.timestamp: # If a timestamp is given, retrieve the folder with the timestamp # as name parser.entries = self.timestamp in parser.entries and \ [self.timestamp] elif self.date: # If date is given, retrieve the subset of builds on that date parser.entries = filter(self.date_matches, parser.entries) if not parser.entries: message = 'No builds have been found' raise NotFoundError(message, url) self.show_matching_builds(parser.entries) # If no index has been given, set it to the last build of the day. if build_index is None: build_index = len(parser.entries) - 1 self.logger.info('Selected build: %s' % parser.entries[build_index]) return (parser.entries, build_index)
def get_build_info(self, build_index=None): url = '/'.join([self.base_url, self.build_list_regex]) print 'Retrieving list of builds from %s' % url # If a timestamp is given, retrieve just that build regex = '^' + self.timestamp + '$' if self.timestamp else r'^\d+$' parser = DirectoryParser(url) parser.entries = parser.filter(regex) # If date is given, retrieve the subset of builds on that date if self.date is not None: parser.entries = filter(self.date_matches, parser.entries) if not parser.entries: message = 'No builds have been found' raise NotFoundException(message, url) # If no index has been given, set it to the last build of the day. if build_index is None: build_index = len(parser.entries) - 1 return (parser.entries, build_index)
def get_build_info_for_date(self, date, has_time=False, build_index=None): url = '/'.join([self.base_url, self.monthly_build_list_regex]) print 'Retrieving list of builds from %s' % url parser = DirectoryParser(url) regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s$' % { 'DATE': date.strftime('%Y-%m-%d'), 'BRANCH': self.branch, 'L10N': '' if self.locale == 'en-US' else '-l10n'} parser.entries = parser.filter(regex) if not parser.entries: message = 'Folder for builds on %s has not been found' % self.date.strftime('%Y-%m-%d') raise NotFoundException(message, url) if has_time: # If a time is included in the date, use it to determine the build's index regex = r'.*%s.*' % date.strftime('%H-%M-%S') build_index = parser.entries.index(parser.filter(regex)[0]) else: # If no index has been given, set it to the last build of the day. if build_index is None: build_index = len(parser.entries) - 1 return (parser.entries, build_index)
def get_build_info_for_version(self, version, build_index=None): url = '/'.join([self.base_url, self.candidate_build_list_regex]) print 'Retrieving list of candidate builds from %s' % url parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) if not parser.entries: message = 'Folder for specific candidate builds at %s has not' \ 'been found' % url raise NotFoundError(message, url) # If no index has been given, set it to the last build of the given # version. if build_index is None: build_index = len(parser.entries) - 1 return (parser.entries, build_index)
def binary(self): """Return the name of the build""" attempt = 0 while self._binary is None: attempt += 1 try: # Retrieve all entries from the remote virtual folder parser = DirectoryParser(self.path, authentication=self.authentication, timeout=self.timeout_network) if not parser.entries: raise NotFoundError('No entries found', self.path) # Download the first matched directory entry pattern = re.compile(self.binary_regex, re.IGNORECASE) for entry in parser.entries: try: self._binary = pattern.match(entry).group() break except: # No match, continue with next entry continue else: raise NotFoundError("Binary not found in folder", self.path) except (NotFoundError, requests.exceptions.RequestException), e: if self.retry_attempts > 0: # Log only if multiple attempts are requested self.logger.warning("Build not found: '%s'" % e.message) self.logger.info('Will retry in %s seconds...' % (self.retry_delay)) time.sleep(self.retry_delay) self.logger.info("Retrying... (attempt %s)" % attempt) if attempt >= self.retry_attempts: if hasattr(e, 'response') and \ e.response.status_code == 404: message = "Specified build has not been found" raise NotFoundError(message, self.path) else: raise
def is_build_dir(self, dir): """Return whether or not the given dir contains a build.""" url = urljoin(self.base_url, self.monthly_build_list_regex, dir) if self.application in MULTI_LOCALE_APPLICATIONS \ and self.locale != 'multi': url = urljoin(url, self.locale) parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) pattern = re.compile(self.binary_regex, re.IGNORECASE) for entry in parser.entries: try: pattern.match(entry).group() return True except: # No match, continue with next entry continue return False
def get_build_info_for_date(self, date, build_index=None): url = urljoin(self.base_url, self.monthly_build_list_regex) has_time = date and date.time() self.logger.info('Retrieving list of builds from %s' % url) parser = DirectoryParser(url, authentication=self.authentication, timeout=self.timeout_network) regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s%(PLATFORM)s$' % { 'DATE': date.strftime('%Y-%m-%d'), 'BRANCH': self.branch, # ensure to select the correct subfolder for localized builds 'L10N': '' if self.locale in ('en-US', 'multi') else '(-l10n)?', 'PLATFORM': '' if self.application not in ( 'fennec') else '-' + self.platform } parser.entries = parser.filter(regex) parser.entries = parser.filter(self.is_build_dir) if has_time: # If a time is included in the date, use it to determine the # build's index regex = r'.*%s.*' % date.strftime('%H-%M-%S') parser.entries = parser.filter(regex) if not parser.entries: date_format = '%Y-%m-%d-%H-%M-%S' if has_time else '%Y-%m-%d' message = 'Folder for builds on %s has not been found' % \ self.date.strftime(date_format) raise errors.NotFoundError(message, url) # If no index has been given, set it to the last build of the day. self.show_matching_builds(parser.entries) # If no index has been given, set it to the last build of the day. if build_index is None: # Find the most recent non-empty entry. build_index = len(parser.entries) for build in reversed(parser.entries): build_index -= 1 if not build_index or self.is_build_dir(build): break self.logger.info('Selected build: %s' % parser.entries[build_index]) return (parser.entries, build_index)