class ArtifactCache(object): '''Fetch artifacts from URLS and purge least recently used artifacts from disk.''' def __init__(self, cache_dir, log=None, skip_cache=False): mkdir(cache_dir, not_indexed=True) self._cache_dir = cache_dir self._log = log self._skip_cache = skip_cache self._persist_limit = ArtifactPersistLimit(log) self._download_manager = DownloadManager( self._cache_dir, persist_limit=self._persist_limit) self._last_dl_update = -1 def log(self, *args, **kwargs): if self._log: self._log(*args, **kwargs) def fetch(self, url, force=False): fname = os.path.basename(url) try: # Use the file name from the url if it looks like a hash digest. if len(fname) not in (32, 40, 56, 64, 96, 128): raise TypeError() binascii.unhexlify(fname) except TypeError: # We download to a temporary name like HASH[:16]-basename to # differentiate among URLs with the same basenames. We used to then # extract the build ID from the downloaded artifact and use it to make a # human readable unique name, but extracting build IDs is time consuming # (especially on Mac OS X, where we must mount a large DMG file). hash = hashlib.sha256(url).hexdigest()[:16] # Strip query string and fragments. basename = os.path.basename(urlparse.urlparse(url).path) fname = hash + '-' + basename path = os.path.abspath(mozpath.join(self._cache_dir, fname)) if self._skip_cache and os.path.exists(path): self.log( logging.INFO, 'artifact', {'path': path}, 'Skipping cache: removing cached downloaded artifact {path}') os.remove(path) self.log(logging.INFO, 'artifact', {'path': path}, 'Downloading to temporary location {path}') try: dl = self._download_manager.download(url, fname) def download_progress(dl, bytes_so_far, total_size): if not total_size: return percent = (float(bytes_so_far) / total_size) * 100 now = int(percent / 5) if now == self._last_dl_update: return self._last_dl_update = now self.log( logging.INFO, 'artifact', { 'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent }, 'Downloading... {percent:02.1f} %') if dl: dl.set_progress(download_progress) dl.wait() else: # Avoid the file being removed if it was in the cache already. path = os.path.join(self._cache_dir, fname) self._persist_limit.register_file(path) self.log(logging.INFO, 'artifact', { 'path': os.path.abspath(mozpath.join(self._cache_dir, fname)) }, 'Downloaded artifact to {path}') return os.path.abspath(mozpath.join(self._cache_dir, fname)) finally: # Cancel any background downloads in progress. self._download_manager.cancel() def clear_cache(self): if self._skip_cache: self.log(logging.INFO, 'artifact', {}, 'Skipping cache: ignoring clear_cache!') return self._persist_limit.remove_all()
class ArtifactCache(CacheManager): '''Fetch Task Cluster artifact URLs and purge least recently used artifacts from disk.''' def __init__(self, cache_dir, log=None, skip_cache=False): # TODO: instead of storing N artifact packages, store M megabytes. CacheManager.__init__(self, cache_dir, 'fetch', MAX_CACHED_ARTIFACTS, cache_callback=self.delete_file, log=log, skip_cache=skip_cache) self._cache_dir = cache_dir size_limit = 1024 * 1024 * 1024 # 1Gb in bytes. file_limit = 4 # But always keep at least 4 old artifacts around. persist_limit = PersistLimit(size_limit, file_limit) self._download_manager = DownloadManager(self._cache_dir, persist_limit=persist_limit) self._last_dl_update = -1 def delete_file(self, key, value): try: os.remove(value) self.log(logging.INFO, 'artifact', {'filename': value}, 'Purged artifact {filename}') except (OSError, IOError): pass try: os.remove(value + PROCESSED_SUFFIX) self.log(logging.INFO, 'artifact', {'filename': value + PROCESSED_SUFFIX}, 'Purged processed artifact {filename}') except (OSError, IOError): pass @cachedmethod(operator.attrgetter('_cache')) def fetch(self, url, force=False): # We download to a temporary name like HASH[:16]-basename to # differentiate among URLs with the same basenames. We used to then # extract the build ID from the downloaded artifact and use it to make a # human readable unique name, but extracting build IDs is time consuming # (especially on Mac OS X, where we must mount a large DMG file). hash = hashlib.sha256(url).hexdigest()[:16] fname = hash + '-' + os.path.basename(url) path = os.path.abspath(mozpath.join(self._cache_dir, fname)) if self._skip_cache and os.path.exists(path): self.log(logging.DEBUG, 'artifact', {'path': path}, 'Skipping cache: removing cached downloaded artifact {path}') os.remove(path) self.log(logging.INFO, 'artifact', {'path': path}, 'Downloading to temporary location {path}') try: dl = self._download_manager.download(url, fname) def download_progress(dl, bytes_so_far, total_size): percent = (float(bytes_so_far) / total_size) * 100 now = int(percent / 5) if now == self._last_dl_update: return self._last_dl_update = now self.log(logging.INFO, 'artifact', {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent}, 'Downloading... {percent:02.1f} %') if dl: dl.set_progress(download_progress) dl.wait() self.log(logging.INFO, 'artifact', {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))}, 'Downloaded artifact to {path}') return os.path.abspath(mozpath.join(self._cache_dir, fname)) finally: # Cancel any background downloads in progress. self._download_manager.cancel() def print_last_item(self, args, sorted_kwargs, result): url, = args self.log(logging.INFO, 'artifact', {'url': url}, 'Last installed binaries from url {url}') self.log(logging.INFO, 'artifact', {'filename': result}, 'Last installed binaries from local file {filename}') self.log(logging.INFO, 'artifact', {'filename': result + PROCESSED_SUFFIX}, 'Last installed binaries from local processed file {filename}')
class ArtifactCache(object): '''Fetch Task Cluster artifact URLs and purge least recently used artifacts from disk.''' def __init__(self, cache_dir, log=None, skip_cache=False): mkdir(cache_dir, not_indexed=True) self._cache_dir = cache_dir self._log = log self._skip_cache = skip_cache self._persist_limit = ArtifactPersistLimit(log) self._download_manager = DownloadManager( self._cache_dir, persist_limit=self._persist_limit) self._last_dl_update = -1 def log(self, *args, **kwargs): if self._log: self._log(*args, **kwargs) def fetch(self, url, force=False): fname = os.path.basename(url) try: # Use the file name from the url if it looks like a hash digest. if len(fname) not in (32, 40, 56, 64, 96, 128): raise TypeError() binascii.unhexlify(fname) except TypeError: # We download to a temporary name like HASH[:16]-basename to # differentiate among URLs with the same basenames. We used to then # extract the build ID from the downloaded artifact and use it to make a # human readable unique name, but extracting build IDs is time consuming # (especially on Mac OS X, where we must mount a large DMG file). hash = hashlib.sha256(url).hexdigest()[:16] fname = hash + '-' + os.path.basename(url) path = os.path.abspath(mozpath.join(self._cache_dir, fname)) if self._skip_cache and os.path.exists(path): self.log(logging.DEBUG, 'artifact', {'path': path}, 'Skipping cache: removing cached downloaded artifact {path}') os.remove(path) self.log(logging.INFO, 'artifact', {'path': path}, 'Downloading to temporary location {path}') try: dl = self._download_manager.download(url, fname) def download_progress(dl, bytes_so_far, total_size): if not total_size: return percent = (float(bytes_so_far) / total_size) * 100 now = int(percent / 5) if now == self._last_dl_update: return self._last_dl_update = now self.log(logging.INFO, 'artifact', {'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent}, 'Downloading... {percent:02.1f} %') if dl: dl.set_progress(download_progress) dl.wait() else: # Avoid the file being removed if it was in the cache already. path = os.path.join(self._cache_dir, fname) self._persist_limit.register_file(path) self.log(logging.INFO, 'artifact', {'path': os.path.abspath(mozpath.join(self._cache_dir, fname))}, 'Downloaded artifact to {path}') return os.path.abspath(mozpath.join(self._cache_dir, fname)) finally: # Cancel any background downloads in progress. self._download_manager.cancel() def clear_cache(self): if self._skip_cache: self.log(logging.DEBUG, 'artifact', {}, 'Skipping cache: ignoring clear_cache!') return self._persist_limit.remove_all()